Coverage for pymend\pymend.py: 63%
210 statements
« prev ^ index » next coverage.py v7.3.2, created at 2024-04-20 19:09 +0200
« prev ^ index » next coverage.py v7.3.2, created at 2024-04-20 19:09 +0200
1"""Module for general management of writing docstrings of multiple files."""
3import ast
4import platform
5import sys
6import tempfile
7import traceback
8from dataclasses import dataclass
9from pathlib import Path
10from typing import NamedTuple
12from click import echo
14import pymend.docstring_parser as dsp
16from .file_parser import AstAnalyzer
17from .output import diff
18from .report import Changed
19from .types import ElementDocstring, FixerSettings
21__author__ = "J-E. Nitschke"
22__copyright__ = "Copyright 2012-2021 A. Daouzli"
23__licence__ = "GPL3"
24__version__ = "1.0.10"
25__maintainer__ = "J-E. Nitschke"
28@dataclass
29class FileContentRepresentation:
30 """Container for str and list representation of file contents."""
32 lst: list[str]
33 lines: str
36class Styles(NamedTuple):
37 """Container for input and output style."""
39 input_style: dsp.DocstringStyle
40 output_style: dsp.DocstringStyle
43class PyComment:
44 """Manage several python scripts docstrings.
46 It is used to parse and rewrite in a Pythonic way all the
47 functions', methods' and classes' docstrings.
48 The changes are then provided in a patch file.
49 """
51 def __init__(
52 self,
53 input_file: Path,
54 *,
55 fixer_settings: FixerSettings,
56 output_style: dsp.DocstringStyle = dsp.DocstringStyle.NUMPYDOC,
57 input_style: dsp.DocstringStyle = dsp.DocstringStyle.AUTO,
58 proceed_directly: bool = True,
59 ) -> None:
60 r"""Set the configuration including the source to proceed and options.
62 Parameters
63 ----------
64 input_file : Path
65 path name (file or folder)
66 fixer_settings : FixerSettings
67 Settings for which fixes should be performed.
68 output_style : dsp.DocstringStyle
69 Output style to use for docstring.
70 (Default value = dsp.DocstringStyle.NUMPYDOC)
71 input_style : dsp.DocstringStyle
72 Input docstring style.
73 Auto means that the style is detected automatically. Can cause issues when
74 styles are mixed in examples or descriptions."
75 (Default value = dsp.DocstringStyle.AUTO)
76 proceed_directly : bool
77 Whether the file should be parsed directly with the call of
78 the constructor. (Default value = True)
79 """
80 self.input_file = input_file
81 self.style = Styles(input_style, output_style)
82 input_lines = self.input_file.read_text(encoding="utf-8")
83 self._input = FileContentRepresentation(
84 input_lines.splitlines(keepends=True), input_lines
85 )
86 self._output = FileContentRepresentation([], "")
87 self.settings = fixer_settings
88 self._changed = []
89 self.docs_list = []
90 self.fixed = False
91 if proceed_directly: 91 ↛ exitline 91 didn't return from function '__init__', because the condition on line 91 was never false
92 self.proceed()
94 def proceed(self) -> None:
95 """Parse file and generates/converts the docstrings."""
96 self._parse()
97 self._compute_before_after()
99 def _parse(self) -> list[ElementDocstring]:
100 """Parse input file's content and generates a list of its elements/docstrings.
102 Returns
103 -------
104 list[ElementDocstring]
105 List of information about module, classes and functions.
106 """
107 ast_parser = AstAnalyzer(self._input.lines, settings=self.settings)
108 self.docs_list = sorted(
109 ast_parser.parse_from_ast(), key=lambda element: element.lines
110 )
111 return self.docs_list
113 def _compute_before_after(self) -> tuple[list[str], list[str], list[str]]:
114 r"""Compute the before and after and assert equality and stability.
116 Make sure that pymend is idempotent.
117 Make sure that the original and final Ast's are the same (except for docstring.)
119 Returns
120 -------
121 tuple[list[str], list[str], list[str]]
122 Tuple of before, after, changed,
123 """
124 list_from, list_to, list_changed = self._get_changes()
126 self._output.lst = list_to
127 self._output.lines = "".join(list_to)
128 self._changed = list_changed
130 self.assert_stability(list_from, list_to)
131 self.assert_equality(self._input.lines, self._output.lines)
132 self.fixed = True
133 return list_from, list_to, list_changed
135 def _get_changes(self) -> tuple[list[str], list[str], list[str]]:
136 r"""Compute the list of lines before and after the proposed docstring changes.
138 Elements of the list already contain '\n' at the end.
140 Returns
141 -------
142 list_from : list[str]
143 Original file as list of lines.
144 list_to : list[str]
145 Modified content as list of lines.
146 list_changed : list[str]
147 List of names of elements that were changed.
149 Raises
150 ------
151 ValueError
152 If the endline of a docstring was parsed as None.
153 """
154 list_from = self._input.lst
155 list_to: list[str] = []
156 list_changed: list[str] = []
157 last = 0
158 # Loop over all found docstrings and replace the lines where they used to
159 # (or ought to) be with the new docstring.
160 for e in self.docs_list:
161 start, end = e.lines
162 if end is None: 162 ↛ 163line 162 didn't jump to line 163, because the condition on line 162 was never true
163 log = self.dump_to_file(
164 "INTERNAL ERROR: End of docstring is None."
165 " Not sure what to do with this yet.",
166 "Original file:.\n",
167 "".join(list_from),
168 "Problematic element:\n",
169 repr(e),
170 )
171 msg = (
172 "INTERNAL ERROR: End of docstring is None."
173 " Not sure what to do with this yet."
174 " Please report a bug on"
175 " https://github.com/JanEricNitschke/pymend/issues."
176 f" This diff might be helpful: {log}"
177 )
178 raise ValueError(msg)
179 # e.line are line number starting at one.
180 # We are now using them to index into a list starting at 0.
181 start, end = start - 1, end - 1
183 # Grab output docstring and add quotes, indentation and modifiers
184 in_docstring = e.docstring
185 # Do not need to worry about start being out of range
186 # if there was a docstring then it points to that.
187 # If there wasnt then there should still be at least one line
188 # after the function/class definition. Otherwise that would
189 # already have raised an error earlier.
190 old_line = list_from[start]
191 leading_whitespace = old_line[: -len(old_line.lstrip())]
192 trailing_comment = self._get_trailing_comment(list_from[end])
193 out_docstring = self._finalizes(
194 docstring=e.output_docstring(
195 output_style=self.style.output_style,
196 input_style=self.style.input_style,
197 settings=self.settings,
198 ),
199 indentation=leading_whitespace,
200 modifier=e.modifier,
201 trailing=trailing_comment,
202 )
203 # Check if the docstring changed and if so, add it to the list of changed
204 # We can not directly compare with the original out_docstring
205 # because that is missing indentation.
206 # And it is easiest to add the quotes, modifiers, trailings
207 # in one go with the indentation. So for this comparison we have to
208 # strip them away again.
209 if (
210 in_docstring
211 != out_docstring.strip()[
212 3 + len(e.modifier) : -(3 + len(trailing_comment))
213 ]
214 ):
215 list_changed.append(e.name)
217 # Add all the unchanged things between last and current docstring
218 list_to.extend(list_from[last:start])
219 # Add the new docstring
220 list_to.extend(out_docstring.splitlines(keepends=True))
221 # If there was no old docstring then we need to make sure we
222 # do not remove the content that was originally on the first line
223 # of element.
224 if not in_docstring:
225 list_to.append(old_line)
226 last = end + 1
227 # Add the rest of the file.
228 if last < len(list_from):
229 list_to.extend(list_from[last:])
230 return list_from, list_to, list_changed
232 def _get_trailing_comment(self, line: str) -> str:
233 """Grab any trailing comment that was potentially at the last line.
235 Parameters
236 ----------
237 line : str
238 The last line of the docstring.
240 Returns
241 -------
242 str
243 The trailing comment
244 """
245 # This might need some work in the future if there are both
246 # types in the same line.
247 line = line.strip()
248 closing_quotes = max(line.rfind('"""'), line.rfind("'''"))
249 if closing_quotes == -1:
250 return ""
251 return line[closing_quotes + 3 :]
253 def _finalizes(
254 self,
255 docstring: str,
256 quotes: str = '"""',
257 indentation: str = " ",
258 modifier: str = "",
259 trailing: str = "",
260 ) -> str:
261 r"""Add quotes, indentation and modifiers to the docstring.
263 Parameters
264 ----------
265 docstring : str
266 The raw docstring to complete.
267 quotes : str
268 Quotes to use for the docstring. (Default value = '\"\"\"')
269 indentation : str
270 How much to indent the docstring lines (Default value = ' ')
271 modifier : str
272 Modifier to put before the opening triple quotes.
273 Any combination of ("r", "f", "u") (Default value = '')
274 trailing : str
275 Any trailing comment was after the original docstring but on
276 the same line. (Default value = '')
278 Returns
279 -------
280 str
281 The properly indented docstring, wrapped in triple quotes
282 and preceded by the desired modifier.
283 """
284 split = f"{modifier}{quotes}{docstring}".splitlines()
285 # One line docstring get the quotes on the same line
286 if len(split) > 1:
287 split.append(quotes)
288 # Multi-line get them on the next
289 else:
290 split[0] += quotes
291 for index, line in enumerate(split):
292 if line.strip():
293 split[index] = indentation + line
294 return "\n".join(split) + trailing + "\n"
296 def assert_stability(self, src: list[str], dst: list[str]) -> None:
297 """Assert that running pymend on its own output does not change anything.
299 Parameters
300 ----------
301 src : list[str]
302 List of lines from the input file.
303 dst : list[str]
304 List of lines that pymend produced.
306 Raises
307 ------
308 AssertionError
309 If a second run of pymend produces a different output than the first.
310 """
311 # pylint: disable=protected-access
312 comment = self.__copy_from_output()
313 comment._parse() # noqa: SLF001
314 before, after, changed = comment._get_changes() # noqa: SLF001
315 if changed or not (dst == before and dst == after): 315 ↛ 316line 315 didn't jump to line 316, because the condition on line 315 was never true
316 log = self.dump_to_file(
317 "INTERNAL ERROR: PyMend produced different "
318 "docstrings on the second pass.\n"
319 "Changed:\n",
320 "\n".join(changed),
321 "".join(diff(src, dst, "source", "first pass")),
322 "".join(diff(dst, after, "first pass", "second pass")),
323 )
324 msg = (
325 "INTERNAL ERROR:"
326 " PyMend produced different docstrings on the second pass."
327 " Please report a bug on"
328 " https://github.com/JanEricNitschke/pymend/issues."
329 f" This diff might be helpful: {log}"
330 )
331 raise AssertionError(msg)
333 def assert_equality(self, src_lines: str, dst_lines: str) -> None:
334 """Assert that running pymend does not change functional ast.
336 Done by comparing the asts for the original and produced outputs
337 while ignoring the docstrings themselves.
339 Parameters
340 ----------
341 src_lines : str
342 Lines from the input file.
343 dst_lines : str
344 Lines that pymend produced.
346 Raises
347 ------
348 AssertionError
349 If the content of the input file could not be parsed into an ast.
350 AssertionError
351 If the output from pymend could not be parsed into an ast.
352 AssertionError
353 If the output from pymend produces a different (reduced) ast
354 than the input.
355 """
356 try:
357 src_ast = ast.parse(src_lines)
358 except Exception as exc: # noqa: BLE001
359 msg = f"Failed to parse source file AST: {exc}\n"
360 raise AssertionError(msg) from exc
361 try:
362 dst_ast = ast.parse(dst_lines)
363 except Exception as exc: # noqa: BLE001
364 log = self.dump_to_file(
365 "INTERNAL ERROR: PyMend produced invalid code:\n",
366 "".join(traceback.format_tb(exc.__traceback__)),
367 dst_lines,
368 )
369 msg = (
370 f"INTERNAL ERROR: PyMend produced invalid code: {exc}. "
371 "Please report a bug on"
372 " https://github.com/JanEricNitschke/pymend/issues."
373 f" This invalid output might be helpful: {log}"
374 )
375 raise AssertionError(msg) from None
376 src_ast_list = self._stringify_ast(src_ast)
377 dst_ast_list = self._stringify_ast(dst_ast)
378 if src_ast_list != dst_ast_list: 378 ↛ 379line 378 didn't jump to line 379, because the condition on line 378 was never true
379 log = self.dump_to_file(
380 "INTERNAL ERROR: PyMend produced code "
381 "that is not equivalent to the source\n",
382 "".join(diff(src_ast_list, dst_ast_list, "src", "dst")),
383 )
384 msg = (
385 "INTERNAL ERROR: PyMend produced code that is not equivalent to the"
386 " source. Please report a bug on "
387 "https://github.com/JanEricNitschke/pymend/issues."
388 f" This diff might be helpful: {log}"
389 )
390 raise AssertionError(msg) from None
392 def __copy_from_output(self) -> "PyComment":
393 """Create a new PyComment with the same output style and lines from the input.
395 Parameters
396 ----------
397 lines : list[str]
398 List of lines that should make up the `input_lines` of the copied
399 instance.
401 Returns
402 -------
403 'PyComment'
404 The new instance with the same output style and lines initialized
405 by the `lines` argument.
406 """
407 # pylint: disable=protected-access
408 py_comment = PyComment.__new__(PyComment)
409 py_comment._input = FileContentRepresentation( # noqa: SLF001
410 self._output.lst.copy(), self._output.lines
411 )
412 py_comment.settings = self.settings
413 py_comment._output = FileContentRepresentation([], "") # noqa: SLF001
414 py_comment.style = self.style
415 py_comment.docs_list = []
416 return py_comment
418 def _strip_ast(self, ast_node: ast.AST) -> None:
419 """Remove all docstrings from the ast.
421 Parameters
422 ----------
423 ast_node : ast.AST
424 Node representing the full ast.
425 """
426 for node in ast.walk(ast_node):
427 # let's work only on functions & classes definitions
428 if not isinstance(
429 node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef, ast.Module)
430 ):
431 continue
433 if not node.body: 433 ↛ 434line 433 didn't jump to line 434, because the condition on line 433 was never true
434 continue
436 if not isinstance(first_element := node.body[0], ast.Expr):
437 continue
439 if not isinstance(docnode := first_element.value, ast.Constant):
440 continue
442 if not isinstance(docnode.value, str):
443 continue
445 node.body = node.body[1:]
447 def _stringify_ast(self, node: ast.AST) -> list[str]:
448 """Turn ast into string representation with all docstrings removed.
450 Parameters
451 ----------
452 node : ast.AST
453 Node to turn into a reduced string representation.
455 Returns
456 -------
457 list[str]
458 List of lines making up the reduced string representation.
459 """
460 self._strip_ast(node)
461 return ast.dump(node, indent=1).splitlines(keepends=True)
463 def dump_to_file(self, *output: str, ensure_final_newline: bool = True) -> str:
464 """Dump `output` to a temporary file. Return path to the file.
466 Parameters
467 ----------
468 *output : str
469 List of strings to dump into the output.
470 ensure_final_newline : bool
471 Whether to make sure that every dumped string
472 ends in a new line. (Default value = True)
474 Returns
475 -------
476 str
477 Path to the produced temp file.
478 """
479 with tempfile.NamedTemporaryFile(
480 mode="w", prefix="blk_", suffix=".log", delete=False, encoding="utf8"
481 ) as f:
482 for lines in output:
483 f.write(lines)
484 if ensure_final_newline and lines and lines[-1] != "\n":
485 f.write("\n")
486 return f.name
488 def _docstring_diff(self) -> list[str]:
489 """Build the diff between original docstring and proposed docstring.
491 Returns
492 -------
493 list[str]
494 The resulting diff
495 """
496 return diff(
497 self._input.lst,
498 self._output.lst,
499 f"a/{self.input_file}",
500 f"b/{self.input_file}",
501 )
503 def output_patch(self) -> Changed:
504 """Output the patch. Either to stdout or a file depending on input file.
506 Returns
507 -------
508 Changed
509 Whether there were any changes.
510 """
511 if not self.fixed:
512 self.proceed()
513 if self._changed:
514 lines_to_write = self._get_patch_lines()
516 if self.input_file.name == "-":
517 sys.stdout.writelines(lines_to_write)
518 else:
519 self._write_patch_file(lines_to_write)
520 return Changed.YES if bool(self._changed) else Changed.NO
522 def output_fix(self) -> Changed:
523 """Output the fixed file. Either to stdout or the file.
525 Returns
526 -------
527 Changed
528 Whether there were any changes.
530 Raises
531 ------
532 AssertionError
533 If the input and output lines are identical but pymend reports
534 some elements to have changed.
535 """
536 if not self.fixed:
537 self.proceed()
538 if (self._input.lines == self._output.lines) != (len(self._changed) == 0):
539 log = self.dump_to_file(
540 "INTERNAL ERROR: "
541 "Elements having changed does not line up with list of changed "
542 "elements.\n",
543 "List of changed elements:\n",
544 "\n".join(self._changed),
545 "Diff\n",
546 "".join(self._docstring_diff()),
547 )
548 msg = (
549 "INTERNAL ERROR: "
550 "Elements having changed does not line up with list of changed"
551 " elements."
552 " Please report a bug on"
553 " https://github.com/JanEricNitschke/pymend/issues."
554 f" This invalid output might be helpful: {log}"
555 )
556 raise AssertionError(msg)
557 if self.input_file.name == "-":
558 sys.stdout.writelines(self._output.lst)
559 elif self._input.lines != self._output.lines:
560 echo(
561 "Modified docstrings of element"
562 f'{"s" if len(self._changed) > 1 else ""} '
563 f'({", ".join(self._changed)}) in file {self.input_file}.'
564 )
565 self._overwrite_source_file()
566 return Changed.YES if bool(self._changed) else Changed.NO
568 def _get_patch_lines(self) -> list[str]:
569 r"""Return the diff between source_path and target_path.
571 Parameters
572 ----------
573 source_path : str
574 name of the original file (Default value = '')
575 target_path : str
576 name of the final file (Default value = '')
578 Returns
579 -------
580 list[str]
581 the diff as a list of \n terminated lines
582 """
583 return [
584 f"# Patch generated by Pymend v{__version__}\n\n",
585 *self._docstring_diff(),
586 ]
588 def _write_patch_file(self, lines_to_write: list[str]) -> None:
589 r"""Write lines_to_write to a the file called patch_file.
591 Parameters
592 ----------
593 lines_to_write : list[str]
594 lines to write to the file - they should be \n terminated
595 """
596 # Change this if pathlib ever gets a `append_suffix` method
597 # To Path(self.input_file).append_suffix(".patch")
598 with Path(f"{Path(self.input_file).name}.patch").open(
599 "w", encoding="utf-8"
600 ) as file:
601 file.writelines(lines_to_write)
603 def _overwrite_source_file(self) -> None:
604 r"""Overwrite the file with line_to_write.
606 Parameters
607 ----------
608 lines_to_write : list[str]
609 lines to write to the file - they should be \n terminated
610 """
611 tmp_filename = Path(f"{self.input_file}.writing")
612 ok = False
613 try:
614 with tmp_filename.open("w", encoding="utf-8") as file:
615 file.writelines(self._output.lines)
616 ok = True
617 finally:
618 if ok:
619 if platform.system() == "Windows":
620 self._windows_rename(tmp_filename)
621 else:
622 tmp_filename.rename(self.input_file)
623 else:
624 tmp_filename.unlink()
626 def _windows_rename(self, tmp_filename: Path) -> None:
627 """Workaround the fact that os.rename raises an OSError on Windows.
629 Parameters
630 ----------
631 tmp_filename : Path
632 The file to rename
633 """
634 input_file = Path(self.input_file)
635 if input_file.is_file(): 635 ↛ 637line 635 didn't jump to line 637, because the condition on line 635 was never false
636 input_file.unlink()
637 tmp_filename.rename(input_file)
639 def report_issues(self) -> tuple[int, str]:
640 """Produce a report of all found issues with the docstrings in the file.
642 Returns
643 -------
644 tuple[int, str]
645 The number of elements that had issues as well as
646 a string representation of those.
647 """
648 issues: list[str] = []
649 for elem in self.docs_list:
650 n_issues, report = elem.report_issues()
651 if n_issues:
652 issues.append(report)
653 if not issues:
654 return 0, ""
655 report = (
656 f"{'*'*50}\nThe following issues were found in file {self.input_file}:\n"
657 + "\n".join(issues)
658 )
659 return len(issues), report