Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3 pygments.util 

4 ~~~~~~~~~~~~~ 

5 

6 Utility functions. 

7 

8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. 

9 :license: BSD, see LICENSE for details. 

10""" 

11 

12import re 

13from io import TextIOWrapper 

14 

15 

16split_path_re = re.compile(r'[/\\ ]') 

17doctype_lookup_re = re.compile(r''' 

18 <!DOCTYPE\s+( 

19 [a-zA-Z_][a-zA-Z0-9]* 

20 (?: \s+ # optional in HTML5 

21 [a-zA-Z_][a-zA-Z0-9]*\s+ 

22 "[^"]*")? 

23 ) 

24 [^>]*> 

25''', re.DOTALL | re.MULTILINE | re.VERBOSE) 

26tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>', 

27 re.UNICODE | re.IGNORECASE | re.DOTALL | re.MULTILINE) 

28xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I) 

29 

30 

31class ClassNotFound(ValueError): 

32 """Raised if one of the lookup functions didn't find a matching class.""" 

33 

34 

35class OptionError(Exception): 

36 pass 

37 

38 

39def get_choice_opt(options, optname, allowed, default=None, normcase=False): 

40 string = options.get(optname, default) 

41 if normcase: 

42 string = string.lower() 

43 if string not in allowed: 

44 raise OptionError('Value for option %s must be one of %s' % 

45 (optname, ', '.join(map(str, allowed)))) 

46 return string 

47 

48 

49def get_bool_opt(options, optname, default=None): 

50 string = options.get(optname, default) 

51 if isinstance(string, bool): 

52 return string 

53 elif isinstance(string, int): 

54 return bool(string) 

55 elif not isinstance(string, str): 

56 raise OptionError('Invalid type %r for option %s; use ' 

57 '1/0, yes/no, true/false, on/off' % ( 

58 string, optname)) 

59 elif string.lower() in ('1', 'yes', 'true', 'on'): 

60 return True 

61 elif string.lower() in ('0', 'no', 'false', 'off'): 

62 return False 

63 else: 

64 raise OptionError('Invalid value %r for option %s; use ' 

65 '1/0, yes/no, true/false, on/off' % ( 

66 string, optname)) 

67 

68 

69def get_int_opt(options, optname, default=None): 

70 string = options.get(optname, default) 

71 try: 

72 return int(string) 

73 except TypeError: 

74 raise OptionError('Invalid type %r for option %s; you ' 

75 'must give an integer value' % ( 

76 string, optname)) 

77 except ValueError: 

78 raise OptionError('Invalid value %r for option %s; you ' 

79 'must give an integer value' % ( 

80 string, optname)) 

81 

82 

83def get_list_opt(options, optname, default=None): 

84 val = options.get(optname, default) 

85 if isinstance(val, str): 

86 return val.split() 

87 elif isinstance(val, (list, tuple)): 

88 return list(val) 

89 else: 

90 raise OptionError('Invalid type %r for option %s; you ' 

91 'must give a list value' % ( 

92 val, optname)) 

93 

94 

95def docstring_headline(obj): 

96 if not obj.__doc__: 

97 return '' 

98 res = [] 

99 for line in obj.__doc__.strip().splitlines(): 

100 if line.strip(): 

101 res.append(" " + line.strip()) 

102 else: 

103 break 

104 return ''.join(res).lstrip() 

105 

106 

107def make_analysator(f): 

108 """Return a static text analyser function that returns float values.""" 

109 def text_analyse(text): 

110 try: 

111 rv = f(text) 

112 except Exception: 

113 return 0.0 

114 if not rv: 

115 return 0.0 

116 try: 

117 return min(1.0, max(0.0, float(rv))) 

118 except (ValueError, TypeError): 

119 return 0.0 

120 text_analyse.__doc__ = f.__doc__ 

121 return staticmethod(text_analyse) 

122 

123 

124def shebang_matches(text, regex): 

125 r"""Check if the given regular expression matches the last part of the 

126 shebang if one exists. 

127 

128 >>> from pygments.util import shebang_matches 

129 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?') 

130 True 

131 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?') 

132 True 

133 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?') 

134 False 

135 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?') 

136 False 

137 >>> shebang_matches('#!/usr/bin/startsomethingwith python', 

138 ... r'python(2\.\d)?') 

139 True 

140 

141 It also checks for common windows executable file extensions:: 

142 

143 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?') 

144 True 

145 

146 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does 

147 the same as ``'perl -e'``) 

148 

149 Note that this method automatically searches the whole string (eg: 

150 the regular expression is wrapped in ``'^$'``) 

151 """ 

152 index = text.find('\n') 

153 if index >= 0: 

154 first_line = text[:index].lower() 

155 else: 

156 first_line = text.lower() 

157 if first_line.startswith('#!'): 

158 try: 

159 found = [x for x in split_path_re.split(first_line[2:].strip()) 

160 if x and not x.startswith('-')][-1] 

161 except IndexError: 

162 return False 

163 regex = re.compile(r'^%s(\.(exe|cmd|bat|bin))?$' % regex, re.IGNORECASE) 

164 if regex.search(found) is not None: 

165 return True 

166 return False 

167 

168 

169def doctype_matches(text, regex): 

170 """Check if the doctype matches a regular expression (if present). 

171 

172 Note that this method only checks the first part of a DOCTYPE. 

173 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"' 

174 """ 

175 m = doctype_lookup_re.search(text) 

176 if m is None: 

177 return False 

178 doctype = m.group(1) 

179 return re.compile(regex, re.I).match(doctype.strip()) is not None 

180 

181 

182def html_doctype_matches(text): 

183 """Check if the file looks like it has a html doctype.""" 

184 return doctype_matches(text, r'html') 

185 

186 

187_looks_like_xml_cache = {} 

188 

189 

190def looks_like_xml(text): 

191 """Check if a doctype exists or if we have some tags.""" 

192 if xml_decl_re.match(text): 

193 return True 

194 key = hash(text) 

195 try: 

196 return _looks_like_xml_cache[key] 

197 except KeyError: 

198 m = doctype_lookup_re.search(text) 

199 if m is not None: 

200 return True 

201 rv = tag_re.search(text[:1000]) is not None 

202 _looks_like_xml_cache[key] = rv 

203 return rv 

204 

205 

206def surrogatepair(c): 

207 """Given a unicode character code with length greater than 16 bits, 

208 return the two 16 bit surrogate pair. 

209 """ 

210 # From example D28 of: 

211 # http://www.unicode.org/book/ch03.pdf 

212 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff))) 

213 

214 

215def format_lines(var_name, seq, raw=False, indent_level=0): 

216 """Formats a sequence of strings for output.""" 

217 lines = [] 

218 base_indent = ' ' * indent_level * 4 

219 inner_indent = ' ' * (indent_level + 1) * 4 

220 lines.append(base_indent + var_name + ' = (') 

221 if raw: 

222 # These should be preformatted reprs of, say, tuples. 

223 for i in seq: 

224 lines.append(inner_indent + i + ',') 

225 else: 

226 for i in seq: 

227 # Force use of single quotes 

228 r = repr(i + '"') 

229 lines.append(inner_indent + r[:-2] + r[-1] + ',') 

230 lines.append(base_indent + ')') 

231 return '\n'.join(lines) 

232 

233 

234def duplicates_removed(it, already_seen=()): 

235 """ 

236 Returns a list with duplicates removed from the iterable `it`. 

237 

238 Order is preserved. 

239 """ 

240 lst = [] 

241 seen = set() 

242 for i in it: 

243 if i in seen or i in already_seen: 

244 continue 

245 lst.append(i) 

246 seen.add(i) 

247 return lst 

248 

249 

250class Future: 

251 """Generic class to defer some work. 

252 

253 Handled specially in RegexLexerMeta, to support regex string construction at 

254 first use. 

255 """ 

256 def get(self): 

257 raise NotImplementedError 

258 

259 

260def guess_decode(text): 

261 """Decode *text* with guessed encoding. 

262 

263 First try UTF-8; this should fail for non-UTF-8 encodings. 

264 Then try the preferred locale encoding. 

265 Fall back to latin-1, which always works. 

266 """ 

267 try: 

268 text = text.decode('utf-8') 

269 return text, 'utf-8' 

270 except UnicodeDecodeError: 

271 try: 

272 import locale 

273 prefencoding = locale.getpreferredencoding() 

274 text = text.decode() 

275 return text, prefencoding 

276 except (UnicodeDecodeError, LookupError): 

277 text = text.decode('latin1') 

278 return text, 'latin1' 

279 

280 

281def guess_decode_from_terminal(text, term): 

282 """Decode *text* coming from terminal *term*. 

283 

284 First try the terminal encoding, if given. 

285 Then try UTF-8. Then try the preferred locale encoding. 

286 Fall back to latin-1, which always works. 

287 """ 

288 if getattr(term, 'encoding', None): 

289 try: 

290 text = text.decode(term.encoding) 

291 except UnicodeDecodeError: 

292 pass 

293 else: 

294 return text, term.encoding 

295 return guess_decode(text) 

296 

297 

298def terminal_encoding(term): 

299 """Return our best guess of encoding for the given *term*.""" 

300 if getattr(term, 'encoding', None): 

301 return term.encoding 

302 import locale 

303 return locale.getpreferredencoding() 

304 

305 

306class UnclosingTextIOWrapper(TextIOWrapper): 

307 # Don't close underlying buffer on destruction. 

308 def close(self): 

309 self.flush()