Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# mako/lexer.py 

2# Copyright 2006-2020 the Mako authors and contributors <see AUTHORS file> 

3# 

4# This module is part of Mako and is released under 

5# the MIT License: http://www.opensource.org/licenses/mit-license.php 

6 

7"""provides the Lexer class for parsing template strings into parse trees.""" 

8 

9import codecs 

10import re 

11 

12from mako import compat 

13from mako import exceptions 

14from mako import parsetree 

15from mako.pygen import adjust_whitespace 

16 

17_regexp_cache = {} 

18 

19 

20class Lexer(object): 

21 def __init__( 

22 self, 

23 text, 

24 filename=None, 

25 disable_unicode=False, 

26 input_encoding=None, 

27 preprocessor=None, 

28 ): 

29 self.text = text 

30 self.filename = filename 

31 self.template = parsetree.TemplateNode(self.filename) 

32 self.matched_lineno = 1 

33 self.matched_charpos = 0 

34 self.lineno = 1 

35 self.match_position = 0 

36 self.tag = [] 

37 self.control_line = [] 

38 self.ternary_stack = [] 

39 self.disable_unicode = disable_unicode 

40 self.encoding = input_encoding 

41 

42 if compat.py3k and disable_unicode: 

43 raise exceptions.UnsupportedError( 

44 "Mako for Python 3 does not " "support disabling Unicode" 

45 ) 

46 

47 if preprocessor is None: 

48 self.preprocessor = [] 

49 elif not hasattr(preprocessor, "__iter__"): 

50 self.preprocessor = [preprocessor] 

51 else: 

52 self.preprocessor = preprocessor 

53 

54 @property 

55 def exception_kwargs(self): 

56 return { 

57 "source": self.text, 

58 "lineno": self.matched_lineno, 

59 "pos": self.matched_charpos, 

60 "filename": self.filename, 

61 } 

62 

63 def match(self, regexp, flags=None): 

64 """compile the given regexp, cache the reg, and call match_reg().""" 

65 

66 try: 

67 reg = _regexp_cache[(regexp, flags)] 

68 except KeyError: 

69 if flags: 

70 reg = re.compile(regexp, flags) 

71 else: 

72 reg = re.compile(regexp) 

73 _regexp_cache[(regexp, flags)] = reg 

74 

75 return self.match_reg(reg) 

76 

77 def match_reg(self, reg): 

78 """match the given regular expression object to the current text 

79 position. 

80 

81 if a match occurs, update the current text and line position. 

82 

83 """ 

84 

85 mp = self.match_position 

86 

87 match = reg.match(self.text, self.match_position) 

88 if match: 

89 (start, end) = match.span() 

90 if end == start: 

91 self.match_position = end + 1 

92 else: 

93 self.match_position = end 

94 self.matched_lineno = self.lineno 

95 lines = re.findall(r"\n", self.text[mp : self.match_position]) 

96 cp = mp - 1 

97 while cp >= 0 and cp < self.textlength and self.text[cp] != "\n": 

98 cp -= 1 

99 self.matched_charpos = mp - cp 

100 self.lineno += len(lines) 

101 # print "MATCHED:", match.group(0), "LINE START:", 

102 # self.matched_lineno, "LINE END:", self.lineno 

103 # print "MATCH:", regexp, "\n", self.text[mp : mp + 15], \ 

104 # (match and "TRUE" or "FALSE") 

105 return match 

106 

107 def parse_until_text(self, watch_nesting, *text): 

108 startpos = self.match_position 

109 text_re = r"|".join(text) 

110 brace_level = 0 

111 paren_level = 0 

112 bracket_level = 0 

113 while True: 

114 match = self.match(r"#.*\n") 

115 if match: 

116 continue 

117 match = self.match( 

118 r"(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1", re.S 

119 ) 

120 if match: 

121 continue 

122 match = self.match(r"(%s)" % text_re) 

123 if match and not ( 

124 watch_nesting 

125 and (brace_level > 0 or paren_level > 0 or bracket_level > 0) 

126 ): 

127 return ( 

128 self.text[ 

129 startpos : self.match_position - len(match.group(1)) 

130 ], 

131 match.group(1), 

132 ) 

133 elif not match: 

134 match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S) 

135 if match: 

136 brace_level += match.group(1).count("{") 

137 brace_level -= match.group(1).count("}") 

138 paren_level += match.group(1).count("(") 

139 paren_level -= match.group(1).count(")") 

140 bracket_level += match.group(1).count("[") 

141 bracket_level -= match.group(1).count("]") 

142 continue 

143 raise exceptions.SyntaxException( 

144 "Expected: %s" % ",".join(text), **self.exception_kwargs 

145 ) 

146 

147 def append_node(self, nodecls, *args, **kwargs): 

148 kwargs.setdefault("source", self.text) 

149 kwargs.setdefault("lineno", self.matched_lineno) 

150 kwargs.setdefault("pos", self.matched_charpos) 

151 kwargs["filename"] = self.filename 

152 node = nodecls(*args, **kwargs) 

153 if len(self.tag): 

154 self.tag[-1].nodes.append(node) 

155 else: 

156 self.template.nodes.append(node) 

157 # build a set of child nodes for the control line 

158 # (used for loop variable detection) 

159 # also build a set of child nodes on ternary control lines 

160 # (used for determining if a pass needs to be auto-inserted 

161 if self.control_line: 

162 control_frame = self.control_line[-1] 

163 control_frame.nodes.append(node) 

164 if not ( 

165 isinstance(node, parsetree.ControlLine) 

166 and control_frame.is_ternary(node.keyword) 

167 ): 

168 if self.ternary_stack and self.ternary_stack[-1]: 

169 self.ternary_stack[-1][-1].nodes.append(node) 

170 if isinstance(node, parsetree.Tag): 

171 if len(self.tag): 

172 node.parent = self.tag[-1] 

173 self.tag.append(node) 

174 elif isinstance(node, parsetree.ControlLine): 

175 if node.isend: 

176 self.control_line.pop() 

177 self.ternary_stack.pop() 

178 elif node.is_primary: 

179 self.control_line.append(node) 

180 self.ternary_stack.append([]) 

181 elif self.control_line and self.control_line[-1].is_ternary( 

182 node.keyword 

183 ): 

184 self.ternary_stack[-1].append(node) 

185 elif self.control_line and not self.control_line[-1].is_ternary( 

186 node.keyword 

187 ): 

188 raise exceptions.SyntaxException( 

189 "Keyword '%s' not a legal ternary for keyword '%s'" 

190 % (node.keyword, self.control_line[-1].keyword), 

191 **self.exception_kwargs 

192 ) 

193 

194 _coding_re = re.compile(r"#.*coding[:=]\s*([-\w.]+).*\r?\n") 

195 

196 def decode_raw_stream(self, text, decode_raw, known_encoding, filename): 

197 """given string/unicode or bytes/string, determine encoding 

198 from magic encoding comment, return body as unicode 

199 or raw if decode_raw=False 

200 

201 """ 

202 if isinstance(text, compat.text_type): 

203 m = self._coding_re.match(text) 

204 encoding = m and m.group(1) or known_encoding or "utf-8" 

205 return encoding, text 

206 

207 if text.startswith(codecs.BOM_UTF8): 

208 text = text[len(codecs.BOM_UTF8) :] 

209 parsed_encoding = "utf-8" 

210 m = self._coding_re.match(text.decode("utf-8", "ignore")) 

211 if m is not None and m.group(1) != "utf-8": 

212 raise exceptions.CompileException( 

213 "Found utf-8 BOM in file, with conflicting " 

214 "magic encoding comment of '%s'" % m.group(1), 

215 text.decode("utf-8", "ignore"), 

216 0, 

217 0, 

218 filename, 

219 ) 

220 else: 

221 m = self._coding_re.match(text.decode("utf-8", "ignore")) 

222 if m: 

223 parsed_encoding = m.group(1) 

224 else: 

225 parsed_encoding = known_encoding or "utf-8" 

226 

227 if decode_raw: 

228 try: 

229 text = text.decode(parsed_encoding) 

230 except UnicodeDecodeError: 

231 raise exceptions.CompileException( 

232 "Unicode decode operation of encoding '%s' failed" 

233 % parsed_encoding, 

234 text.decode("utf-8", "ignore"), 

235 0, 

236 0, 

237 filename, 

238 ) 

239 

240 return parsed_encoding, text 

241 

242 def parse(self): 

243 self.encoding, self.text = self.decode_raw_stream( 

244 self.text, not self.disable_unicode, self.encoding, self.filename 

245 ) 

246 

247 for preproc in self.preprocessor: 

248 self.text = preproc(self.text) 

249 

250 # push the match marker past the 

251 # encoding comment. 

252 self.match_reg(self._coding_re) 

253 

254 self.textlength = len(self.text) 

255 

256 while True: 

257 if self.match_position > self.textlength: 

258 break 

259 

260 if self.match_end(): 

261 break 

262 if self.match_expression(): 

263 continue 

264 if self.match_control_line(): 

265 continue 

266 if self.match_comment(): 

267 continue 

268 if self.match_tag_start(): 

269 continue 

270 if self.match_tag_end(): 

271 continue 

272 if self.match_python_block(): 

273 continue 

274 if self.match_text(): 

275 continue 

276 

277 if self.match_position > self.textlength: 

278 break 

279 raise exceptions.CompileException("assertion failed") 

280 

281 if len(self.tag): 

282 raise exceptions.SyntaxException( 

283 "Unclosed tag: <%%%s>" % self.tag[-1].keyword, 

284 **self.exception_kwargs 

285 ) 

286 if len(self.control_line): 

287 raise exceptions.SyntaxException( 

288 "Unterminated control keyword: '%s'" 

289 % self.control_line[-1].keyword, 

290 self.text, 

291 self.control_line[-1].lineno, 

292 self.control_line[-1].pos, 

293 self.filename, 

294 ) 

295 return self.template 

296 

297 def match_tag_start(self): 

298 match = self.match( 

299 r""" 

300 \<% # opening tag 

301 

302 ([\w\.\:]+) # keyword 

303 

304 ((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \ 

305 # sign, string expression 

306 

307 \s* # more whitespace 

308 

309 (/)?> # closing 

310 

311 """, 

312 re.I | re.S | re.X, 

313 ) 

314 

315 if match: 

316 keyword, attr, isend = match.groups() 

317 self.keyword = keyword 

318 attributes = {} 

319 if attr: 

320 for att in re.findall( 

321 r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr 

322 ): 

323 key, val1, val2 = att 

324 text = val1 or val2 

325 text = text.replace("\r\n", "\n") 

326 attributes[key] = text 

327 self.append_node(parsetree.Tag, keyword, attributes) 

328 if isend: 

329 self.tag.pop() 

330 else: 

331 if keyword == "text": 

332 match = self.match(r"(.*?)(?=\</%text>)", re.S) 

333 if not match: 

334 raise exceptions.SyntaxException( 

335 "Unclosed tag: <%%%s>" % self.tag[-1].keyword, 

336 **self.exception_kwargs 

337 ) 

338 self.append_node(parsetree.Text, match.group(1)) 

339 return self.match_tag_end() 

340 return True 

341 else: 

342 return False 

343 

344 def match_tag_end(self): 

345 match = self.match(r"\</%[\t ]*(.+?)[\t ]*>") 

346 if match: 

347 if not len(self.tag): 

348 raise exceptions.SyntaxException( 

349 "Closing tag without opening tag: </%%%s>" 

350 % match.group(1), 

351 **self.exception_kwargs 

352 ) 

353 elif self.tag[-1].keyword != match.group(1): 

354 raise exceptions.SyntaxException( 

355 "Closing tag </%%%s> does not match tag: <%%%s>" 

356 % (match.group(1), self.tag[-1].keyword), 

357 **self.exception_kwargs 

358 ) 

359 self.tag.pop() 

360 return True 

361 else: 

362 return False 

363 

364 def match_end(self): 

365 match = self.match(r"\Z", re.S) 

366 if match: 

367 string = match.group() 

368 if string: 

369 return string 

370 else: 

371 return True 

372 else: 

373 return False 

374 

375 def match_text(self): 

376 match = self.match( 

377 r""" 

378 (.*?) # anything, followed by: 

379 ( 

380 (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based 

381 # comment preceded by a 

382 # consumed newline and whitespace 

383 | 

384 (?=\${) # an expression 

385 | 

386 (?=</?[%&]) # a substitution or block or call start or end 

387 # - don't consume 

388 | 

389 (\\\r?\n) # an escaped newline - throw away 

390 | 

391 \Z # end of string 

392 )""", 

393 re.X | re.S, 

394 ) 

395 

396 if match: 

397 text = match.group(1) 

398 if text: 

399 self.append_node(parsetree.Text, text) 

400 return True 

401 else: 

402 return False 

403 

404 def match_python_block(self): 

405 match = self.match(r"<%(!)?") 

406 if match: 

407 line, pos = self.matched_lineno, self.matched_charpos 

408 text, end = self.parse_until_text(False, r"%>") 

409 # the trailing newline helps 

410 # compiler.parse() not complain about indentation 

411 text = adjust_whitespace(text) + "\n" 

412 self.append_node( 

413 parsetree.Code, 

414 text, 

415 match.group(1) == "!", 

416 lineno=line, 

417 pos=pos, 

418 ) 

419 return True 

420 else: 

421 return False 

422 

423 def match_expression(self): 

424 match = self.match(r"\${") 

425 if match: 

426 line, pos = self.matched_lineno, self.matched_charpos 

427 text, end = self.parse_until_text(True, r"\|", r"}") 

428 if end == "|": 

429 escapes, end = self.parse_until_text(True, r"}") 

430 else: 

431 escapes = "" 

432 text = text.replace("\r\n", "\n") 

433 self.append_node( 

434 parsetree.Expression, 

435 text, 

436 escapes.strip(), 

437 lineno=line, 

438 pos=pos, 

439 ) 

440 return True 

441 else: 

442 return False 

443 

444 def match_control_line(self): 

445 match = self.match( 

446 r"(?<=^)[\t ]*(%(?!%)|##)[\t ]*((?:(?:\\r?\n)|[^\r\n])*)" 

447 r"(?:\r?\n|\Z)", 

448 re.M, 

449 ) 

450 if match: 

451 operator = match.group(1) 

452 text = match.group(2) 

453 if operator == "%": 

454 m2 = re.match(r"(end)?(\w+)\s*(.*)", text) 

455 if not m2: 

456 raise exceptions.SyntaxException( 

457 "Invalid control line: '%s'" % text, 

458 **self.exception_kwargs 

459 ) 

460 isend, keyword = m2.group(1, 2) 

461 isend = isend is not None 

462 

463 if isend: 

464 if not len(self.control_line): 

465 raise exceptions.SyntaxException( 

466 "No starting keyword '%s' for '%s'" 

467 % (keyword, text), 

468 **self.exception_kwargs 

469 ) 

470 elif self.control_line[-1].keyword != keyword: 

471 raise exceptions.SyntaxException( 

472 "Keyword '%s' doesn't match keyword '%s'" 

473 % (text, self.control_line[-1].keyword), 

474 **self.exception_kwargs 

475 ) 

476 self.append_node(parsetree.ControlLine, keyword, isend, text) 

477 else: 

478 self.append_node(parsetree.Comment, text) 

479 return True 

480 else: 

481 return False 

482 

483 def match_comment(self): 

484 """matches the multiline version of a comment""" 

485 match = self.match(r"<%doc>(.*?)</%doc>", re.S) 

486 if match: 

487 self.append_node(parsetree.Comment, match.group(1)) 

488 return True 

489 else: 

490 return False