Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3markupsafe 

4~~~~~~~~~~ 

5 

6Implements an escape function and a Markup string to replace HTML 

7special characters with safe representations. 

8 

9:copyright: 2010 Pallets 

10:license: BSD-3-Clause 

11""" 

12import re 

13import string 

14 

15from ._compat import int_types 

16from ._compat import iteritems 

17from ._compat import Mapping 

18from ._compat import PY2 

19from ._compat import string_types 

20from ._compat import text_type 

21from ._compat import unichr 

22 

23__version__ = "1.1.1" 

24 

25__all__ = ["Markup", "soft_unicode", "escape", "escape_silent"] 

26 

27_striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)") 

28_entity_re = re.compile(r"&([^& ;]+);") 

29 

30 

31class Markup(text_type): 

32 """A string that is ready to be safely inserted into an HTML or XML 

33 document, either because it was escaped or because it was marked 

34 safe. 

35 

36 Passing an object to the constructor converts it to text and wraps 

37 it to mark it safe without escaping. To escape the text, use the 

38 :meth:`escape` class method instead. 

39 

40 >>> Markup('Hello, <em>World</em>!') 

41 Markup('Hello, <em>World</em>!') 

42 >>> Markup(42) 

43 Markup('42') 

44 >>> Markup.escape('Hello, <em>World</em>!') 

45 Markup('Hello &lt;em&gt;World&lt;/em&gt;!') 

46 

47 This implements the ``__html__()`` interface that some frameworks 

48 use. Passing an object that implements ``__html__()`` will wrap the 

49 output of that method, marking it safe. 

50 

51 >>> class Foo: 

52 ... def __html__(self): 

53 ... return '<a href="/foo">foo</a>' 

54 ... 

55 >>> Markup(Foo()) 

56 Markup('<a href="/foo">foo</a>') 

57 

58 This is a subclass of the text type (``str`` in Python 3, 

59 ``unicode`` in Python 2). It has the same methods as that type, but 

60 all methods escape their arguments and return a ``Markup`` instance. 

61 

62 >>> Markup('<em>%s</em>') % 'foo & bar' 

63 Markup('<em>foo &amp; bar</em>') 

64 >>> Markup('<em>Hello</em> ') + '<foo>' 

65 Markup('<em>Hello</em> &lt;foo&gt;') 

66 """ 

67 

68 __slots__ = () 

69 

70 def __new__(cls, base=u"", encoding=None, errors="strict"): 

71 if hasattr(base, "__html__"): 

72 base = base.__html__() 

73 if encoding is None: 

74 return text_type.__new__(cls, base) 

75 return text_type.__new__(cls, base, encoding, errors) 

76 

77 def __html__(self): 

78 return self 

79 

80 def __add__(self, other): 

81 if isinstance(other, string_types) or hasattr(other, "__html__"): 

82 return self.__class__(super(Markup, self).__add__(self.escape(other))) 

83 return NotImplemented 

84 

85 def __radd__(self, other): 

86 if hasattr(other, "__html__") or isinstance(other, string_types): 

87 return self.escape(other).__add__(self) 

88 return NotImplemented 

89 

90 def __mul__(self, num): 

91 if isinstance(num, int_types): 

92 return self.__class__(text_type.__mul__(self, num)) 

93 return NotImplemented 

94 

95 __rmul__ = __mul__ 

96 

97 def __mod__(self, arg): 

98 if isinstance(arg, tuple): 

99 arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg) 

100 else: 

101 arg = _MarkupEscapeHelper(arg, self.escape) 

102 return self.__class__(text_type.__mod__(self, arg)) 

103 

104 def __repr__(self): 

105 return "%s(%s)" % (self.__class__.__name__, text_type.__repr__(self)) 

106 

107 def join(self, seq): 

108 return self.__class__(text_type.join(self, map(self.escape, seq))) 

109 

110 join.__doc__ = text_type.join.__doc__ 

111 

112 def split(self, *args, **kwargs): 

113 return list(map(self.__class__, text_type.split(self, *args, **kwargs))) 

114 

115 split.__doc__ = text_type.split.__doc__ 

116 

117 def rsplit(self, *args, **kwargs): 

118 return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs))) 

119 

120 rsplit.__doc__ = text_type.rsplit.__doc__ 

121 

122 def splitlines(self, *args, **kwargs): 

123 return list(map(self.__class__, text_type.splitlines(self, *args, **kwargs))) 

124 

125 splitlines.__doc__ = text_type.splitlines.__doc__ 

126 

127 def unescape(self): 

128 """Convert escaped markup back into a text string. This replaces 

129 HTML entities with the characters they represent. 

130 

131 >>> Markup('Main &raquo; <em>About</em>').unescape() 

132 'Main » <em>About</em>' 

133 """ 

134 from ._constants import HTML_ENTITIES 

135 

136 def handle_match(m): 

137 name = m.group(1) 

138 if name in HTML_ENTITIES: 

139 return unichr(HTML_ENTITIES[name]) 

140 try: 

141 if name[:2] in ("#x", "#X"): 

142 return unichr(int(name[2:], 16)) 

143 elif name.startswith("#"): 

144 return unichr(int(name[1:])) 

145 except ValueError: 

146 pass 

147 # Don't modify unexpected input. 

148 return m.group() 

149 

150 return _entity_re.sub(handle_match, text_type(self)) 

151 

152 def striptags(self): 

153 """:meth:`unescape` the markup, remove tags, and normalize 

154 whitespace to single spaces. 

155 

156 >>> Markup('Main &raquo;\t<em>About</em>').striptags() 

157 'Main » About' 

158 """ 

159 stripped = u" ".join(_striptags_re.sub("", self).split()) 

160 return Markup(stripped).unescape() 

161 

162 @classmethod 

163 def escape(cls, s): 

164 """Escape a string. Calls :func:`escape` and ensures that for 

165 subclasses the correct type is returned. 

166 """ 

167 rv = escape(s) 

168 if rv.__class__ is not cls: 

169 return cls(rv) 

170 return rv 

171 

172 def make_simple_escaping_wrapper(name): # noqa: B902 

173 orig = getattr(text_type, name) 

174 

175 def func(self, *args, **kwargs): 

176 args = _escape_argspec(list(args), enumerate(args), self.escape) 

177 _escape_argspec(kwargs, iteritems(kwargs), self.escape) 

178 return self.__class__(orig(self, *args, **kwargs)) 

179 

180 func.__name__ = orig.__name__ 

181 func.__doc__ = orig.__doc__ 

182 return func 

183 

184 for method in ( 

185 "__getitem__", 

186 "capitalize", 

187 "title", 

188 "lower", 

189 "upper", 

190 "replace", 

191 "ljust", 

192 "rjust", 

193 "lstrip", 

194 "rstrip", 

195 "center", 

196 "strip", 

197 "translate", 

198 "expandtabs", 

199 "swapcase", 

200 "zfill", 

201 ): 

202 locals()[method] = make_simple_escaping_wrapper(method) 

203 

204 def partition(self, sep): 

205 return tuple(map(self.__class__, text_type.partition(self, self.escape(sep)))) 

206 

207 def rpartition(self, sep): 

208 return tuple(map(self.__class__, text_type.rpartition(self, self.escape(sep)))) 

209 

210 def format(self, *args, **kwargs): 

211 formatter = EscapeFormatter(self.escape) 

212 kwargs = _MagicFormatMapping(args, kwargs) 

213 return self.__class__(formatter.vformat(self, args, kwargs)) 

214 

215 def __html_format__(self, format_spec): 

216 if format_spec: 

217 raise ValueError("Unsupported format specification " "for Markup.") 

218 return self 

219 

220 # not in python 3 

221 if hasattr(text_type, "__getslice__"): 

222 __getslice__ = make_simple_escaping_wrapper("__getslice__") 

223 

224 del method, make_simple_escaping_wrapper 

225 

226 

227class _MagicFormatMapping(Mapping): 

228 """This class implements a dummy wrapper to fix a bug in the Python 

229 standard library for string formatting. 

230 

231 See http://bugs.python.org/issue13598 for information about why 

232 this is necessary. 

233 """ 

234 

235 def __init__(self, args, kwargs): 

236 self._args = args 

237 self._kwargs = kwargs 

238 self._last_index = 0 

239 

240 def __getitem__(self, key): 

241 if key == "": 

242 idx = self._last_index 

243 self._last_index += 1 

244 try: 

245 return self._args[idx] 

246 except LookupError: 

247 pass 

248 key = str(idx) 

249 return self._kwargs[key] 

250 

251 def __iter__(self): 

252 return iter(self._kwargs) 

253 

254 def __len__(self): 

255 return len(self._kwargs) 

256 

257 

258if hasattr(text_type, "format"): 

259 

260 class EscapeFormatter(string.Formatter): 

261 def __init__(self, escape): 

262 self.escape = escape 

263 

264 def format_field(self, value, format_spec): 

265 if hasattr(value, "__html_format__"): 

266 rv = value.__html_format__(format_spec) 

267 elif hasattr(value, "__html__"): 

268 if format_spec: 

269 raise ValueError( 

270 "Format specifier {0} given, but {1} does not" 

271 " define __html_format__. A class that defines" 

272 " __html__ must define __html_format__ to work" 

273 " with format specifiers.".format(format_spec, type(value)) 

274 ) 

275 rv = value.__html__() 

276 else: 

277 # We need to make sure the format spec is unicode here as 

278 # otherwise the wrong callback methods are invoked. For 

279 # instance a byte string there would invoke __str__ and 

280 # not __unicode__. 

281 rv = string.Formatter.format_field(self, value, text_type(format_spec)) 

282 return text_type(self.escape(rv)) 

283 

284 

285def _escape_argspec(obj, iterable, escape): 

286 """Helper for various string-wrapped functions.""" 

287 for key, value in iterable: 

288 if hasattr(value, "__html__") or isinstance(value, string_types): 

289 obj[key] = escape(value) 

290 return obj 

291 

292 

293class _MarkupEscapeHelper(object): 

294 """Helper for Markup.__mod__""" 

295 

296 def __init__(self, obj, escape): 

297 self.obj = obj 

298 self.escape = escape 

299 

300 def __getitem__(self, item): 

301 return _MarkupEscapeHelper(self.obj[item], self.escape) 

302 

303 def __str__(self): 

304 return text_type(self.escape(self.obj)) 

305 

306 __unicode__ = __str__ 

307 

308 def __repr__(self): 

309 return str(self.escape(repr(self.obj))) 

310 

311 def __int__(self): 

312 return int(self.obj) 

313 

314 def __float__(self): 

315 return float(self.obj) 

316 

317 

318# we have to import it down here as the speedups and native 

319# modules imports the markup type which is define above. 

320try: 

321 from ._speedups import escape, escape_silent, soft_unicode 

322except ImportError: 

323 from ._native import escape, escape_silent, soft_unicode 

324 

325if not PY2: 

326 soft_str = soft_unicode 

327 __all__.append("soft_str")