Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/mako/filters.py : 48%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# mako/filters.py
2# Copyright 2006-2020 the Mako authors and contributors <see AUTHORS file>
3#
4# This module is part of Mako and is released under
5# the MIT License: http://www.opensource.org/licenses/mit-license.php
8import codecs
9import re
11from mako import compat
12from mako.compat import codepoint2name
13from mako.compat import name2codepoint
14from mako.compat import quote_plus
15from mako.compat import unquote_plus
17xml_escapes = {
18 "&": "&",
19 ">": ">",
20 "<": "<",
21 '"': """, # also " in html-only
22 "'": "'", # also ' in html-only
23}
25# XXX: " is valid in HTML and XML
26# ' is not valid HTML, but is valid XML
29def legacy_html_escape(s):
30 """legacy HTML escape for non-unicode mode."""
31 s = s.replace("&", "&")
32 s = s.replace(">", ">")
33 s = s.replace("<", "<")
34 s = s.replace('"', """)
35 s = s.replace("'", "'")
36 return s
39try:
40 import markupsafe
42 html_escape = markupsafe.escape
43except ImportError:
44 html_escape = legacy_html_escape
47def xml_escape(string):
48 return re.sub(r'([&<"\'>])', lambda m: xml_escapes[m.group()], string)
51def url_escape(string):
52 # convert into a list of octets
53 string = string.encode("utf8")
54 return quote_plus(string)
57def legacy_url_escape(string):
58 # convert into a list of octets
59 return quote_plus(string)
62def url_unescape(string):
63 text = unquote_plus(string)
64 if not is_ascii_str(text):
65 text = text.decode("utf8")
66 return text
69def trim(string):
70 return string.strip()
73class Decode(object):
74 def __getattr__(self, key):
75 def decode(x):
76 if isinstance(x, compat.text_type):
77 return x
78 elif not isinstance(x, compat.binary_type):
79 return decode(str(x))
80 else:
81 return compat.text_type(x, encoding=key)
83 return decode
86decode = Decode()
89_ASCII_re = re.compile(r"\A[\x00-\x7f]*\Z")
92def is_ascii_str(text):
93 return isinstance(text, str) and _ASCII_re.match(text)
96################################################################
99class XMLEntityEscaper(object):
100 def __init__(self, codepoint2name, name2codepoint):
101 self.codepoint2entity = dict(
102 [
103 (c, compat.text_type("&%s;" % n))
104 for c, n in codepoint2name.items()
105 ]
106 )
107 self.name2codepoint = name2codepoint
109 def escape_entities(self, text):
110 """Replace characters with their character entity references.
112 Only characters corresponding to a named entity are replaced.
113 """
114 return compat.text_type(text).translate(self.codepoint2entity)
116 def __escape(self, m):
117 codepoint = ord(m.group())
118 try:
119 return self.codepoint2entity[codepoint]
120 except (KeyError, IndexError):
121 return "&#x%X;" % codepoint
123 __escapable = re.compile(r'["&<>]|[^\x00-\x7f]')
125 def escape(self, text):
126 """Replace characters with their character references.
128 Replace characters by their named entity references.
129 Non-ASCII characters, if they do not have a named entity reference,
130 are replaced by numerical character references.
132 The return value is guaranteed to be ASCII.
133 """
134 return self.__escapable.sub(
135 self.__escape, compat.text_type(text)
136 ).encode("ascii")
138 # XXX: This regexp will not match all valid XML entity names__.
139 # (It punts on details involving involving CombiningChars and Extenders.)
140 #
141 # .. __: http://www.w3.org/TR/2000/REC-xml-20001006#NT-EntityRef
142 __characterrefs = re.compile(
143 r"""& (?:
144 \#(\d+)
145 | \#x([\da-f]+)
146 | ( (?!\d) [:\w] [-.:\w]+ )
147 ) ;""",
148 re.X | re.UNICODE,
149 )
151 def __unescape(self, m):
152 dval, hval, name = m.groups()
153 if dval:
154 codepoint = int(dval)
155 elif hval:
156 codepoint = int(hval, 16)
157 else:
158 codepoint = self.name2codepoint.get(name, 0xFFFD)
159 # U+FFFD = "REPLACEMENT CHARACTER"
160 if codepoint < 128:
161 return chr(codepoint)
162 return chr(codepoint)
164 def unescape(self, text):
165 """Unescape character references.
167 All character references (both entity references and numerical
168 character references) are unescaped.
169 """
170 return self.__characterrefs.sub(self.__unescape, text)
173_html_entities_escaper = XMLEntityEscaper(codepoint2name, name2codepoint)
175html_entities_escape = _html_entities_escaper.escape_entities
176html_entities_unescape = _html_entities_escaper.unescape
179def htmlentityreplace_errors(ex):
180 """An encoding error handler.
182 This python codecs error handler replaces unencodable
183 characters with HTML entities, or, if no HTML entity exists for
184 the character, XML character references::
186 >>> u'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace')
187 'The cost was €12.'
188 """
189 if isinstance(ex, UnicodeEncodeError):
190 # Handle encoding errors
191 bad_text = ex.object[ex.start : ex.end]
192 text = _html_entities_escaper.escape(bad_text)
193 return (compat.text_type(text), ex.end)
194 raise ex
197codecs.register_error("htmlentityreplace", htmlentityreplace_errors)
200# TODO: options to make this dynamic per-compilation will be added in a later
201# release
202DEFAULT_ESCAPES = {
203 "x": "filters.xml_escape",
204 "h": "filters.html_escape",
205 "u": "filters.url_escape",
206 "trim": "filters.trim",
207 "entity": "filters.html_entities_escape",
208 "unicode": "unicode",
209 "decode": "decode",
210 "str": "str",
211 "n": "n",
212}
214if compat.py3k:
215 DEFAULT_ESCAPES.update({"unicode": "str"})
217NON_UNICODE_ESCAPES = DEFAULT_ESCAPES.copy()
218NON_UNICODE_ESCAPES["h"] = "filters.legacy_html_escape"
219NON_UNICODE_ESCAPES["u"] = "filters.legacy_url_escape"