Coverage for pymend\docstring_parser\epydoc.py: 99%

153 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2024-04-20 19:09 +0200

1"""Epyoc-style docstring parsing. 

2 

3.. seealso:: http://epydoc.sourceforge.net/manual-fields.html 

4""" 

5 

6import inspect 

7import re 

8from typing import NamedTuple, Optional 

9 

10from .common import ( 

11 Docstring, 

12 DocstringMeta, 

13 DocstringParam, 

14 DocstringRaises, 

15 DocstringReturns, 

16 DocstringStyle, 

17 DocstringYields, 

18 ParseError, 

19 RenderingStyle, 

20 append_description, 

21 clean_str, 

22 split_description, 

23) 

24 

25 

26class SectionPattern(NamedTuple): 

27 """Patterns for docstring sections.""" 

28 

29 param: re.Pattern[str] 

30 raises: re.Pattern[str] 

31 returns: re.Pattern[str] 

32 meta: re.Pattern[str] 

33 

34 

35class SectionMatch(NamedTuple): 

36 """Matches of docstring sections.""" 

37 

38 param: Optional[re.Match[str]] 

39 raises: Optional[re.Match[str]] 

40 returns: Optional[re.Match[str]] 

41 meta: Optional[re.Match[str]] 

42 

43 

44def _get_matches_for_chunk(chunk: str, patterns: SectionPattern) -> SectionMatch: 

45 """Apply a search for each pattern to the chunk. 

46 

47 Parameters 

48 ---------- 

49 chunk : str 

50 Chunk to match the patterns against. 

51 patterns : SectionPattern 

52 Collection of regex patterns to match against the chunk. 

53 

54 Returns 

55 ------- 

56 SectionMatch 

57 Tuple of matches of the patterns against the chunk. 

58 """ 

59 return SectionMatch( 

60 param=re.search(patterns.param, chunk), 

61 raises=re.search(patterns.raises, chunk), 

62 returns=re.search(patterns.returns, chunk), 

63 meta=re.search(patterns.meta, chunk), 

64 ) 

65 

66 

67class StreamToken(NamedTuple): 

68 """One entry of the stream list.""" 

69 

70 base: str 

71 key: str 

72 args: list[str] 

73 desc: str 

74 

75 

76def _tokenize( 

77 meta_chunk: str, 

78 patterns: SectionPattern, 

79) -> list[StreamToken]: 

80 """Return the tokenized stream according to the regex patterns. 

81 

82 Parameters 

83 ---------- 

84 meta_chunk : str 

85 Chunk to tokenize. 

86 patterns : SectionPattern 

87 Collection of patterns for different sections. 

88 

89 Returns 

90 ------- 

91 list[StreamToken] 

92 (base, key, args, desc) 

93 base: Literal['param', 'raise', 'return', 'meta'] 

94 key: str: 

95 args: List[str] 

96 desc: str: Description 

97 

98 Raises 

99 ------ 

100 ParseError 

101 If none of the patterns match against the chunk. 

102 ParseError 

103 If we match a section in the general meta case that should have already 

104 been matched in a specific section. 

105 """ 

106 stream: list[StreamToken] = [] 

107 for chunk_match in re.finditer(r"(^@.*?)(?=^@|\Z)", meta_chunk, flags=re.S | re.M): 

108 chunk = chunk_match.group(0) 

109 if not chunk: 109 ↛ 110line 109 didn't jump to line 110, because the condition on line 109 was never true

110 continue 

111 

112 matches = _get_matches_for_chunk(chunk, patterns) 

113 

114 match = matches.param or matches.raises or matches.returns or matches.meta 

115 if not match: 

116 msg = f'Error parsing meta information near "{chunk}".' 

117 raise ParseError(msg) 

118 

119 if matches.param: 

120 base = "param" 

121 key: str = match.group(1) 

122 args = [match.group(2).strip()] 

123 elif matches.raises: 

124 base = "raise" 

125 key: str = match.group(1) 

126 args = [] if match.group(2) is None else [match.group(2).strip()] 

127 elif matches.returns: 

128 base = "return" if match.group(1) in ("return", "rtype") else "yield" 

129 key: str = match.group(1) 

130 args = [] 

131 else: 

132 base = "meta" 

133 key: str = match.group(1) 

134 token = clean_str(match.group(2).strip()) 

135 args = [] if token is None else re.split(r"\s+", token) 

136 

137 # Make sure we didn't match some existing keyword in an incorrect 

138 # way here: 

139 if key in { 

140 "param", 

141 "keyword", 

142 "type", 

143 "return", 

144 "rtype", 

145 "yield", 

146 "ytype", 

147 }: 

148 msg = f'Error parsing meta information near "{chunk}".' 

149 raise ParseError(msg) 

150 

151 desc = chunk[match.end() :].strip() 

152 if "\n" in desc: 

153 first_line, rest = desc.split("\n", 1) 

154 desc = first_line + "\n" + inspect.cleandoc(rest) 

155 stream.append(StreamToken(base, key, args, desc)) 

156 return stream 

157 

158 

159def _combine_params(stream: list[StreamToken]) -> dict[str, dict[str, Optional[str]]]: 

160 """Group the list of tokens into sections based on section and information.. 

161 

162 Parameters 

163 ---------- 

164 stream : list[StreamToken] 

165 List of tokens to group into dict. 

166 

167 Returns 

168 ------- 

169 dict[str, dict[str, Optional[str]]] 

170 Dictionary grouping parsed param sections 

171 by section (param name, "return", "yield") and 

172 information they represent (type_name, description) 

173 """ 

174 params: dict[str, dict[str, Optional[str]]] = {} 

175 for base, key, args, desc in stream: 

176 if base not in ["param", "return", "yield"]: 

177 continue # nothing to do 

178 arg_name = args[0] if base == "param" else base 

179 info = params.setdefault(arg_name, {}) 

180 info_key = "type_name" if "type" in key else "description" 

181 info[info_key] = desc 

182 return params 

183 

184 

185def _add_meta_information( 

186 stream: list[StreamToken], 

187 params: dict[str, dict[str, Optional[str]]], 

188 ret: Docstring, 

189) -> None: 

190 """Add the meta information into the docstring instance. 

191 

192 Parameters 

193 ---------- 

194 stream : list[StreamToken] 

195 Stream of tokens of the string- 

196 params : dict[str, dict[str, Optional[str]]] 

197 Grouped information about each section. 

198 ret : Docstring 

199 Docstring instance to add the information to. 

200 

201 Raises 

202 ------ 

203 ParseError 

204 If an unexpected section is encountered. 

205 """ 

206 is_done: dict[str, bool] = {} 

207 for token in stream: 

208 if token.base == "param" and not is_done.get(token.args[0], False): 

209 (arg_name,) = token.args 

210 info = params[arg_name] 

211 type_name = info.get("type_name") 

212 

213 if type_name and type_name.endswith("?"): 

214 is_optional = True 

215 type_name = type_name[:-1] 

216 else: 

217 is_optional = False 

218 

219 match = re.match(r".*defaults to (.+)", token.desc, flags=re.DOTALL) 

220 default = match[1].rstrip(".") if match else None 

221 

222 meta_item = DocstringParam( 

223 args=[token.key, arg_name], 

224 description=info.get("description"), 

225 arg_name=arg_name, 

226 type_name=type_name, 

227 is_optional=is_optional, 

228 default=default, 

229 ) 

230 is_done[arg_name] = True 

231 elif token.base == "return" and not is_done.get("return", False): 

232 info = params["return"] 

233 meta_item = DocstringReturns( 

234 args=[token.key], 

235 description=info.get("description"), 

236 type_name=info.get("type_name"), 

237 is_generator=False, 

238 ) 

239 is_done["return"] = True 

240 elif token.base == "yield" and not is_done.get("yield", False): 

241 info = params["yield"] 

242 meta_item = DocstringYields( 

243 args=[token.key], 

244 description=info.get("description"), 

245 type_name=info.get("type_name"), 

246 is_generator=True, 

247 ) 

248 is_done["yield"] = True 

249 elif token.base == "raise": 

250 (type_name,) = token.args or (None,) 

251 meta_item = DocstringRaises( 

252 args=[token.key, *token.args], 

253 description=token.desc, 

254 type_name=type_name, 

255 ) 

256 elif token.base == "meta": 

257 meta_item = DocstringMeta( 

258 args=[token.key, *token.args], 

259 description=token.desc, 

260 ) 

261 else: 

262 arg_key = token.args[0] if token.args else token.base 

263 if not is_done.get(arg_key, False): 

264 msg = ( 

265 "Error building meta information. " 

266 f"Encountered unexpected section {arg_key}." 

267 ) 

268 raise ParseError(msg) 

269 continue # don't append 

270 

271 ret.meta.append(meta_item) 

272 

273 

274def parse(text: Optional[str]) -> Docstring: 

275 """Parse the epydoc-style docstring into its components. 

276 

277 Parameters 

278 ---------- 

279 text : Optional[str] 

280 docstring to parse 

281 

282 Returns 

283 ------- 

284 Docstring 

285 parsed docstring 

286 """ 

287 ret = Docstring(style=DocstringStyle.EPYDOC) 

288 if not text: 

289 return ret 

290 

291 text = inspect.cleandoc(text) 

292 if match := re.search("^@", text, flags=re.M): 

293 desc_chunk = text[: match.start()] 

294 meta_chunk = text[match.start() :] 

295 else: 

296 desc_chunk = text 

297 meta_chunk = "" 

298 

299 split_description(ret, desc_chunk) 

300 

301 patterns = SectionPattern( 

302 param=re.compile(r"(param|keyword|type)(\s+[_A-z][_A-z0-9]*\??):"), 

303 raises=re.compile(r"(raise)(\s+[_A-z][_A-z0-9]*\??)?:"), 

304 returns=re.compile(r"(return|rtype|yield|ytype):"), 

305 meta=re.compile(r"([_A-z][_A-z0-9]+)((\s+[_A-z][_A-z0-9]*\??)*):"), 

306 ) 

307 

308 # tokenize 

309 stream = _tokenize(meta_chunk, patterns) 

310 

311 # Combine type_name, arg_name, and description information 

312 params = _combine_params(stream) 

313 

314 _add_meta_information(stream, params, ret) 

315 

316 return ret 

317 

318 

319def compose( 

320 docstring: Docstring, 

321 rendering_style: RenderingStyle = RenderingStyle.COMPACT, 

322 indent: str = " ", 

323) -> str: 

324 """Render a parsed docstring into docstring text. 

325 

326 Parameters 

327 ---------- 

328 docstring : Docstring 

329 parsed docstring representation 

330 rendering_style : RenderingStyle 

331 the style to render docstrings (Default value = RenderingStyle.COMPACT) 

332 indent : str 

333 the characters used as indentation in the 

334 docstring string (Default value = ' ') 

335 

336 Returns 

337 ------- 

338 str 

339 docstring text 

340 """ 

341 

342 def process_desc(desc: Optional[str], *, is_type: bool) -> str: 

343 """Process a description section. 

344 

345 Parameters 

346 ---------- 

347 desc : Optional[str] 

348 Description to process 

349 is_type : bool 

350 Whether the description represent type information. 

351 

352 Returns 

353 ------- 

354 str 

355 The properly rendered description information. 

356 """ 

357 if not desc: 

358 return "" 

359 

360 if rendering_style == RenderingStyle.EXPANDED or ( 

361 rendering_style == RenderingStyle.CLEAN and not is_type 

362 ): 

363 (first, *rest) = desc.splitlines() 

364 return "\n".join(["\n" + indent + first] + [indent + line for line in rest]) 

365 

366 (first, *rest) = desc.splitlines() 

367 return "\n".join([f" {first}"] + [indent + line for line in rest]) 

368 

369 parts: list[str] = [] 

370 append_description(docstring, parts) 

371 

372 for meta in docstring.meta: 

373 if isinstance(meta, DocstringParam): 

374 if meta.type_name: 

375 type_name = f"{meta.type_name}?" if meta.is_optional else meta.type_name 

376 text = f"@type {meta.arg_name}:" 

377 text += process_desc(type_name, is_type=True) 

378 parts.append(text) 

379 text = ( 

380 f"@param {meta.arg_name}:" 

381 f"{process_desc(meta.description, is_type=False)}" 

382 ) 

383 parts.append(text) 

384 elif isinstance(meta, (DocstringReturns, DocstringYields)): 

385 (arg_key, type_key) = ( 

386 ("yield", "ytype") 

387 if isinstance(meta, DocstringYields) 

388 else ("return", "rtype") 

389 ) 

390 if meta.type_name: 

391 text = f"@{type_key}:{process_desc(meta.type_name, is_type=True)}" 

392 parts.append(text) 

393 if meta.description: 

394 text = f"@{arg_key}:{process_desc(meta.description, is_type=False)}" 

395 parts.append(text) 

396 elif isinstance(meta, DocstringRaises): 

397 text = f"@raise {meta.type_name}:" if meta.type_name else "@raise:" 

398 text += process_desc(meta.description, is_type=False) 

399 parts.append(text) 

400 else: 

401 text = f'@{" ".join(meta.args)}:' 

402 text += process_desc(meta.description, is_type=False) 

403 parts.append(text) 

404 return "\n".join(parts)