Coverage for src/midgy/render.py: 97%
148 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-14 15:49 -0800
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-14 15:49 -0800
1"""render builds the machinery to translate markdown documents to code."""
3from dataclasses import dataclass, field
4from functools import partial
5from io import StringIO
6from re import compile
8__all__ = ()
10DOCTEST_CHAR, CONTINUATION_CHAR, COLON_CHAR, QUOTES_CHARS = 62, 92, 58, {39, 34}
11BLOCK, FENCE, PYCON = "code_block", "fence", "pycon"
12ESCAPE = {x: "\\" + x for x in "'\""}
13ESCAPE_PATTERN = compile("[" + "".join(ESCAPE) + "]")
14escape = partial(ESCAPE_PATTERN.sub, lambda m: ESCAPE.get(m.group(0)))
15SP, QUOTES = chr(32), (chr(34) * 3, chr(39) * 3)
18# the Renderer is special markdown renderer designed to produce
19# line for line transformations of markdown to the converted code.
20# not all languages require this, but for python it matters.
21@dataclass
22class Renderer:
23 """the base render system for markdown to code.
25 * tokenize & render markdown as code
26 * line-for-line rendering
27 * use indented code as fiducial markers for translation
28 * augment the commonmark spec with shebang, doctest, code, and front_matter tokens
29 * a reusable base class that underlies the python translation
30 """
32 parser: object = None
33 cell_hr_length: int = 9
34 include_code: bool = True # the nuclear option
35 include_code_fences: set = field(default_factory=set)
36 include_indented_code: bool = True
37 include_doctest: bool = False
38 config_key: str = "py"
39 env: dict = None
41 def __post_init__(self):
42 self.parser = self.get_parser()
44 @classmethod
45 def code_from_string(cls, body, **kwargs):
46 """render a string"""
47 return cls(**kwargs).render(body)
49 def get_block(self, env, stop=None):
50 """iterate through the lines in a buffer"""
51 if stop is None:
52 yield from env["source"]
53 else:
54 while env["last_line"] < stop:
55 yield self.readline(env)
57 def get_cells(self, tokens, *, env=None, include_hr=True):
58 """walk cells separated by mega-hrs"""
59 block = []
60 for token in tokens:
61 if token.type == "hr":
62 if (len(token.markup) - token.markup.count(" ")) > self.cell_hr_length:
63 yield (list(block), token)
64 block.clear()
65 if include_hr:
66 block.append(token)
67 elif env is not None:
68 list(self.get_block(env, token))
69 else:
70 block.append(token)
71 if block:
72 yield block, None
74 def get_front_matter(self, tokens):
75 for token in tokens:
76 if token.type == "shebang":
77 continue
78 if token.type == "front_matter":
79 from .front_matter import load
81 if "data" in token.meta:
82 return token.meta["data"]
83 return token.meta.setdefault("data", load(token.content))
84 return
86 def get_initial_env(self, src, tokens):
87 """initialize the parser environment indents"""
88 env = dict(**self.env or dict(), source=StringIO(src), last_line=0, last_indent=0)
89 for token in filter(self.is_code_block, tokens): # iterate through the tokens
90 env["min_indent"] = min(env.get("min_indent", 9999), token.meta["min_indent"])
91 env.setdefault("min_indent", 0)
92 return env
94 def get_parser(self):
95 from markdown_it import MarkdownIt
97 parser = MarkdownIt("gfm-like", options_update=dict(inline_definitions=True, langPrefix=""))
98 return self.set_parser_defaults(parser)
100 def get_updated_env(self, token, env, **kwargs):
101 """update the state of the environment"""
102 left = token.content.rstrip()
103 env.update(
104 continued=left.endswith("\\"),
105 colon_block=left.endswith(":"),
106 quoted_block=left.endswith(QUOTES),
107 )
108 env.update(kwargs)
110 def is_code_block(self, token):
111 """is the token a code block entry"""
112 if self.include_code:
113 if token.type == BLOCK:
114 if token.meta["is_doctest"]:
115 return self.include_doctest
116 return self.include_indented_code
117 elif token.type == FENCE:
118 if token.info in self.include_code_fences:
119 return True
120 if token.info == PYCON:
121 return self.include_doctest
122 return False
124 def non_code(self, env, next=None):
125 yield from self.get_block(env, next.map[0] if next else None)
126 if next:
127 env.update(last_indent=next.meta.get("last_indent", 0))
129 def parse(self, src, env=None):
130 return self.parser.parse(src, env)
132 def parse_cells(self, body, *, include_hr=True):
133 yield from (x[0] for x in self.get_cells(self.parse(body), include_hr=include_hr))
135 def print(self, iter, io):
136 return print(*iter, file=io, sep="", end="")
138 def readline(self, env):
139 try:
140 return env["source"].readline()
141 finally:
142 env["last_line"] += 1
144 def render(self, src):
145 return self.render_tokens(self.parse(src), src=src)
147 def render_cells(self, src, *, include_hr=True):
148 # cells allow different parsers in a single pass
149 tokens = self.parse(src)
150 self = self.renderer_from_tokens(tokens)
151 prior = self.get_initial_env(src, tokens)
152 prior_token = None
153 source = prior.pop("source")
155 for block, next_token in self.get_cells(tokens, env=prior, include_hr=include_hr):
156 env = self.get_initial_env(src, block)
157 env["source"], env["last_line"] = source, prior["last_line"]
158 prior_token and block.insert(0, prior_token)
159 yield self.render_tokens(block, env=env, stop=next_token)
160 prior, prior_token = env, next_token
162 def render_token(self, token, env):
163 if token:
164 method = getattr(self, token.type, None)
165 if method:
166 yield from method(token, env) or ()
168 def render_tokens(self, tokens, env=None, src=None, stop=None, target=None):
169 """render parsed markdown tokens"""
170 if target is None:
171 target = StringIO()
172 self = self.renderer_from_tokens(tokens)
173 if env is None:
174 env = self.get_initial_env(src, tokens)
175 for token in tokens:
176 if self.is_code_block(token):
177 env["next_code"] = token
178 self.print(self.render_token(token, env), target)
179 # handle anything left in the buffer
180 self.print(self.non_code(env, stop), target)
181 return target.getvalue() # return the value of the target, a format string.
183 def renderer_from_tokens(self, tokens):
184 front_matter = self.get_front_matter(tokens)
185 if front_matter:
186 # front matter can reconfigure the parser and make a new one
187 config = {k: getattr(self, k) for k in self.__dataclass_fields__}
188 config.update(front_matter.get(self.config_key, {}))
189 if config:
190 return type(self)(**config)
191 return self
193 def set_parser_defaults(self, parser):
194 # our tangling system adds extra conventions to commonmark:
195 ## extend indented code to recognize doctest syntax in-line
196 ## replace the indented code lexer to recognize doctests and append metadata.
197 ## recognize shebang lines at the beginning of a document.
198 ## recognize front-matter at the beginning of document of following shebangs
199 from mdit_py_plugins import deflist, footnote
200 from .front_matter import _front_matter_lexer, _shebang_lexer
201 from .lexers import code_fence_lexer, doctest_lexer, code_lexer
203 parser.block.ruler.before("code", "doctest", doctest_lexer)
204 parser.block.ruler.disable("code")
205 # our indented code captures doctests in indented blocks
206 parser.block.ruler.after("doctest", "code", code_lexer)
207 parser.disable(FENCE)
208 # our code fence captures indent information
209 parser.block.ruler.after("code", FENCE, code_fence_lexer)
210 # shebang because this markdown is code
211 parser.block.ruler.before("table", "shebang", _shebang_lexer)
212 parser.block.ruler.before("table", "front_matter", _front_matter_lexer)
213 parser.use(footnote.footnote_plugin).use(deflist.deflist_plugin)
214 parser.disable("footnote_tail")
215 return parser