Coverage for lmcat\processors.py: 38%
82 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-29 16:57 -0700
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-29 16:57 -0700
1import json
2from typing import Callable, Sequence
3from pathlib import Path
6# type defs
7# ==================================================
9ProcessorName = str
10DeciderName = str
12ProcessorFunc = Callable[[Path], str]
13DeciderFunc = Callable[[Path], bool]
16# global dicts of processors and deciders
17# ==================================================
19PROCESSORS: dict[ProcessorName, ProcessorFunc] = dict()
21DECIDERS: dict[DeciderName, DeciderFunc] = dict()
24# register functions
25# ==================================================
28def register_processor(func: ProcessorFunc) -> ProcessorFunc:
29 """Register a function as a path processor"""
30 PROCESSORS[ProcessorName(func.__name__)] = func
31 return func
34def register_decider(func: DeciderFunc) -> DeciderFunc:
35 """Register a function as a decider"""
36 DECIDERS[DeciderName(func.__name__)] = func
37 return func
40# default deciders
41# ==================================================
42@register_decider
43def is_over_10kb(path: Path) -> bool:
44 """Check if file is over 10KB."""
45 return path.stat().st_size > 2**1
48@register_decider
49def is_documentation(path: Path) -> bool:
50 """Check if file is documentation."""
51 return path.suffix in {".md", ".rst", ".txt"}
54# default processors
55# ==================================================
58@register_processor
59def remove_comments(path: Path) -> str:
60 """Remove single-line comments from code."""
61 lines = path.read_text().splitlines()
62 processed = [line for line in lines if not line.strip().startswith("#")]
63 return "\n".join(processed)
66@register_processor
67def compress_whitespace(path: Path) -> str:
68 """Compress multiple whitespace characters into single spaces."""
69 return " ".join(path.read_text().split())
72@register_processor
73def to_relative_path(path: Path) -> str:
74 """return the path to the file as a string"""
75 return path.as_posix()
78@register_processor
79def ipynb_to_md(path: Path) -> str:
80 """Convert an IPython notebook to markdown."""
81 nb_contents: dict = json.loads(path.read_text(encoding="utf-8"))
83 output: list[str] = []
85 for cell in nb_contents["cells"]:
86 if cell["cell_type"] == "markdown":
87 output.extend(cell["source"])
88 output.append("\n\n")
89 elif cell["cell_type"] == "code":
90 output.append("```python\n")
91 output.extend(cell["source"])
92 output.append("\n```\n\n")
94 return "".join(output)
97@register_processor
98def makefile_recipes(path: Path) -> str:
99 """Process a Makefile to show only target descriptions and basic structure.
101 Preserves:
102 - Comments above .PHONY targets up to first empty line
103 - The .PHONY line and target line
104 - First line after target if it starts with @echo
106 # Parameters:
107 - `path : Path`
108 Path to the Makefile to process
110 # Returns:
111 - `str`
112 Processed Makefile content
113 """
114 lines: Sequence[str] = path.read_text().splitlines()
115 output_lines: list[str] = []
117 i: int = 0
118 while i < len(lines):
119 line: str = lines[i]
121 # Look for .PHONY lines
122 if line.strip().startswith(".PHONY:"):
123 # Store target name for later matching
124 target_name: str = line.split(":")[1].strip()
126 # Collect comments above until empty line
127 comment_lines: list[str] = []
128 look_back: int = i - 1
129 while look_back >= 0 and lines[look_back].strip():
130 if lines[look_back].strip().startswith("#"):
131 comment_lines.insert(0, lines[look_back])
132 look_back -= 1
134 # Add collected comments
135 output_lines.extend(comment_lines)
137 # Add .PHONY line
138 output_lines.append(line)
140 # Add target line (should be next)
141 if i + 1 < len(lines) and lines[i + 1].startswith(f"{target_name}:"):
142 output_lines.append(lines[i + 1])
143 i += 1
145 # Check for @echo on next line
146 if i + 1 < len(lines) and lines[i + 1].strip().startswith("@echo"):
147 output_lines.append(lines[i + 1])
149 output_lines.append(" ...")
150 output_lines.append("")
152 i += 1
154 return "\n".join(output_lines)
157@register_processor
158def csv_preview_5_lines(path: Path) -> str:
159 """Preview first few lines of a CSV file (up to 5)
161 Reads only first 1024 bytes and splits into lines.
162 Does not attempt to parse CSV structure.
164 # Parameters:
165 - `path : Path`
166 Path to CSV file
168 # Returns:
169 - `str`
170 First few lines of the file"""
171 try:
172 with path.open("r", encoding="utf-8") as f:
173 content = f.read(1024)
175 lines = content.splitlines()[:5]
176 if len(content) == 1024:
177 lines.append("... (truncated)")
179 return "\n".join(lines)
180 except Exception as e:
181 return f"Error previewing CSV: {str(e)}"