Coverage for src/midgy/render.py: 97%

148 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-14 15:49 -0800

1"""render builds the machinery to translate markdown documents to code.""" 

2 

3from dataclasses import dataclass, field 

4from functools import partial 

5from io import StringIO 

6from re import compile 

7 

8__all__ = () 

9 

10DOCTEST_CHAR, CONTINUATION_CHAR, COLON_CHAR, QUOTES_CHARS = 62, 92, 58, {39, 34} 

11BLOCK, FENCE, PYCON = "code_block", "fence", "pycon" 

12ESCAPE = {x: "\\" + x for x in "'\""} 

13ESCAPE_PATTERN = compile("[" + "".join(ESCAPE) + "]") 

14escape = partial(ESCAPE_PATTERN.sub, lambda m: ESCAPE.get(m.group(0))) 

15SP, QUOTES = chr(32), (chr(34) * 3, chr(39) * 3) 

16 

17 

18# the Renderer is special markdown renderer designed to produce 

19# line for line transformations of markdown to the converted code. 

20# not all languages require this, but for python it matters. 

21@dataclass 

22class Renderer: 

23 """the base render system for markdown to code. 

24 

25 * tokenize & render markdown as code 

26 * line-for-line rendering 

27 * use indented code as fiducial markers for translation 

28 * augment the commonmark spec with shebang, doctest, code, and front_matter tokens 

29 * a reusable base class that underlies the python translation 

30 """ 

31 

32 parser: object = None 

33 cell_hr_length: int = 9 

34 include_code: bool = True # the nuclear option 

35 include_code_fences: set = field(default_factory=set) 

36 include_indented_code: bool = True 

37 include_doctest: bool = False 

38 config_key: str = "py" 

39 env: dict = None 

40 

41 def __post_init__(self): 

42 self.parser = self.get_parser() 

43 

44 @classmethod 

45 def code_from_string(cls, body, **kwargs): 

46 """render a string""" 

47 return cls(**kwargs).render(body) 

48 

49 def get_block(self, env, stop=None): 

50 """iterate through the lines in a buffer""" 

51 if stop is None: 

52 yield from env["source"] 

53 else: 

54 while env["last_line"] < stop: 

55 yield self.readline(env) 

56 

57 def get_cells(self, tokens, *, env=None, include_hr=True): 

58 """walk cells separated by mega-hrs""" 

59 block = [] 

60 for token in tokens: 

61 if token.type == "hr": 

62 if (len(token.markup) - token.markup.count(" ")) > self.cell_hr_length: 

63 yield (list(block), token) 

64 block.clear() 

65 if include_hr: 

66 block.append(token) 

67 elif env is not None: 

68 list(self.get_block(env, token)) 

69 else: 

70 block.append(token) 

71 if block: 

72 yield block, None 

73 

74 def get_front_matter(self, tokens): 

75 for token in tokens: 

76 if token.type == "shebang": 

77 continue 

78 if token.type == "front_matter": 

79 from .front_matter import load 

80 

81 if "data" in token.meta: 

82 return token.meta["data"] 

83 return token.meta.setdefault("data", load(token.content)) 

84 return 

85 

86 def get_initial_env(self, src, tokens): 

87 """initialize the parser environment indents""" 

88 env = dict(**self.env or dict(), source=StringIO(src), last_line=0, last_indent=0) 

89 for token in filter(self.is_code_block, tokens): # iterate through the tokens 

90 env["min_indent"] = min(env.get("min_indent", 9999), token.meta["min_indent"]) 

91 env.setdefault("min_indent", 0) 

92 return env 

93 

94 def get_parser(self): 

95 from markdown_it import MarkdownIt 

96 

97 parser = MarkdownIt("gfm-like", options_update=dict(inline_definitions=True, langPrefix="")) 

98 return self.set_parser_defaults(parser) 

99 

100 def get_updated_env(self, token, env, **kwargs): 

101 """update the state of the environment""" 

102 left = token.content.rstrip() 

103 env.update( 

104 continued=left.endswith("\\"), 

105 colon_block=left.endswith(":"), 

106 quoted_block=left.endswith(QUOTES), 

107 ) 

108 env.update(kwargs) 

109 

110 def is_code_block(self, token): 

111 """is the token a code block entry""" 

112 if self.include_code: 

113 if token.type == BLOCK: 

114 if token.meta["is_doctest"]: 

115 return self.include_doctest 

116 return self.include_indented_code 

117 elif token.type == FENCE: 

118 if token.info in self.include_code_fences: 

119 return True 

120 if token.info == PYCON: 

121 return self.include_doctest 

122 return False 

123 

124 def non_code(self, env, next=None): 

125 yield from self.get_block(env, next.map[0] if next else None) 

126 if next: 

127 env.update(last_indent=next.meta.get("last_indent", 0)) 

128 

129 def parse(self, src, env=None): 

130 return self.parser.parse(src, env) 

131 

132 def parse_cells(self, body, *, include_hr=True): 

133 yield from (x[0] for x in self.get_cells(self.parse(body), include_hr=include_hr)) 

134 

135 def print(self, iter, io): 

136 return print(*iter, file=io, sep="", end="") 

137 

138 def readline(self, env): 

139 try: 

140 return env["source"].readline() 

141 finally: 

142 env["last_line"] += 1 

143 

144 def render(self, src): 

145 return self.render_tokens(self.parse(src), src=src) 

146 

147 def render_cells(self, src, *, include_hr=True): 

148 # cells allow different parsers in a single pass 

149 tokens = self.parse(src) 

150 self = self.renderer_from_tokens(tokens) 

151 prior = self.get_initial_env(src, tokens) 

152 prior_token = None 

153 source = prior.pop("source") 

154 

155 for block, next_token in self.get_cells(tokens, env=prior, include_hr=include_hr): 

156 env = self.get_initial_env(src, block) 

157 env["source"], env["last_line"] = source, prior["last_line"] 

158 prior_token and block.insert(0, prior_token) 

159 yield self.render_tokens(block, env=env, stop=next_token) 

160 prior, prior_token = env, next_token 

161 

162 def render_token(self, token, env): 

163 if token: 

164 method = getattr(self, token.type, None) 

165 if method: 

166 yield from method(token, env) or () 

167 

168 def render_tokens(self, tokens, env=None, src=None, stop=None, target=None): 

169 """render parsed markdown tokens""" 

170 if target is None: 

171 target = StringIO() 

172 self = self.renderer_from_tokens(tokens) 

173 if env is None: 

174 env = self.get_initial_env(src, tokens) 

175 for token in tokens: 

176 if self.is_code_block(token): 

177 env["next_code"] = token 

178 self.print(self.render_token(token, env), target) 

179 # handle anything left in the buffer 

180 self.print(self.non_code(env, stop), target) 

181 return target.getvalue() # return the value of the target, a format string. 

182 

183 def renderer_from_tokens(self, tokens): 

184 front_matter = self.get_front_matter(tokens) 

185 if front_matter: 

186 # front matter can reconfigure the parser and make a new one 

187 config = {k: getattr(self, k) for k in self.__dataclass_fields__} 

188 config.update(front_matter.get(self.config_key, {})) 

189 if config: 

190 return type(self)(**config) 

191 return self 

192 

193 def set_parser_defaults(self, parser): 

194 # our tangling system adds extra conventions to commonmark: 

195 ## extend indented code to recognize doctest syntax in-line 

196 ## replace the indented code lexer to recognize doctests and append metadata. 

197 ## recognize shebang lines at the beginning of a document. 

198 ## recognize front-matter at the beginning of document of following shebangs 

199 from mdit_py_plugins import deflist, footnote 

200 from .front_matter import _front_matter_lexer, _shebang_lexer 

201 from .lexers import code_fence_lexer, doctest_lexer, code_lexer 

202 

203 parser.block.ruler.before("code", "doctest", doctest_lexer) 

204 parser.block.ruler.disable("code") 

205 # our indented code captures doctests in indented blocks 

206 parser.block.ruler.after("doctest", "code", code_lexer) 

207 parser.disable(FENCE) 

208 # our code fence captures indent information 

209 parser.block.ruler.after("code", FENCE, code_fence_lexer) 

210 # shebang because this markdown is code 

211 parser.block.ruler.before("table", "shebang", _shebang_lexer) 

212 parser.block.ruler.before("table", "front_matter", _front_matter_lexer) 

213 parser.use(footnote.footnote_plugin).use(deflist.deflist_plugin) 

214 parser.disable("footnote_tail") 

215 return parser