lmcat.processors
1import json 2from typing import Callable, Sequence 3from pathlib import Path 4 5 6# type defs 7# ================================================== 8 9ProcessorName = str 10DeciderName = str 11 12ProcessorFunc = Callable[[Path], str] 13DeciderFunc = Callable[[Path], bool] 14 15 16# global dicts of processors and deciders 17# ================================================== 18 19PROCESSORS: dict[ProcessorName, ProcessorFunc] = dict() 20 21DECIDERS: dict[DeciderName, DeciderFunc] = dict() 22 23 24# register functions 25# ================================================== 26 27 28def register_processor(func: ProcessorFunc) -> ProcessorFunc: 29 """Register a function as a path processor""" 30 PROCESSORS[ProcessorName(func.__name__)] = func 31 return func 32 33 34def register_decider(func: DeciderFunc) -> DeciderFunc: 35 """Register a function as a decider""" 36 DECIDERS[DeciderName(func.__name__)] = func 37 return func 38 39 40# default deciders 41# ================================================== 42@register_decider 43def is_over_10kb(path: Path) -> bool: 44 """Check if file is over 10KB.""" 45 return path.stat().st_size > 2**1 46 47 48@register_decider 49def is_documentation(path: Path) -> bool: 50 """Check if file is documentation.""" 51 return path.suffix in {".md", ".rst", ".txt"} 52 53 54# default processors 55# ================================================== 56 57 58@register_processor 59def remove_comments(path: Path) -> str: 60 """Remove single-line comments from code.""" 61 lines = path.read_text().splitlines() 62 processed = [line for line in lines if not line.strip().startswith("#")] 63 return "\n".join(processed) 64 65 66@register_processor 67def compress_whitespace(path: Path) -> str: 68 """Compress multiple whitespace characters into single spaces.""" 69 return " ".join(path.read_text().split()) 70 71 72@register_processor 73def to_relative_path(path: Path) -> str: 74 """return the path to the file as a string""" 75 return path.as_posix() 76 77 78@register_processor 79def ipynb_to_md(path: Path) -> str: 80 """Convert an IPython notebook to markdown.""" 81 nb_contents: dict = json.loads(path.read_text(encoding="utf-8")) 82 83 output: list[str] = [] 84 85 for cell in nb_contents["cells"]: 86 if cell["cell_type"] == "markdown": 87 output.extend(cell["source"]) 88 output.append("\n\n") 89 elif cell["cell_type"] == "code": 90 output.append("```python\n") 91 output.extend(cell["source"]) 92 output.append("\n```\n\n") 93 94 return "".join(output) 95 96 97@register_processor 98def makefile_recipes(path: Path) -> str: 99 """Process a Makefile to show only target descriptions and basic structure. 100 101 Preserves: 102 - Comments above .PHONY targets up to first empty line 103 - The .PHONY line and target line 104 - First line after target if it starts with @echo 105 106 # Parameters: 107 - `path : Path` 108 Path to the Makefile to process 109 110 # Returns: 111 - `str` 112 Processed Makefile content 113 """ 114 lines: Sequence[str] = path.read_text().splitlines() 115 output_lines: list[str] = [] 116 117 i: int = 0 118 while i < len(lines): 119 line: str = lines[i] 120 121 # Look for .PHONY lines 122 if line.strip().startswith(".PHONY:"): 123 # Store target name for later matching 124 target_name: str = line.split(":")[1].strip() 125 126 # Collect comments above until empty line 127 comment_lines: list[str] = [] 128 look_back: int = i - 1 129 while look_back >= 0 and lines[look_back].strip(): 130 if lines[look_back].strip().startswith("#"): 131 comment_lines.insert(0, lines[look_back]) 132 look_back -= 1 133 134 # Add collected comments 135 output_lines.extend(comment_lines) 136 137 # Add .PHONY line 138 output_lines.append(line) 139 140 # Add target line (should be next) 141 if i + 1 < len(lines) and lines[i + 1].startswith(f"{target_name}:"): 142 output_lines.append(lines[i + 1]) 143 i += 1 144 145 # Check for @echo on next line 146 if i + 1 < len(lines) and lines[i + 1].strip().startswith("@echo"): 147 output_lines.append(lines[i + 1]) 148 149 output_lines.append(" ...") 150 output_lines.append("") 151 152 i += 1 153 154 return "\n".join(output_lines) 155 156 157@register_processor 158def csv_preview_5_lines(path: Path) -> str: 159 """Preview first few lines of a CSV file (up to 5) 160 161 Reads only first 1024 bytes and splits into lines. 162 Does not attempt to parse CSV structure. 163 164 # Parameters: 165 - `path : Path` 166 Path to CSV file 167 168 # Returns: 169 - `str` 170 First few lines of the file""" 171 try: 172 with path.open("r", encoding="utf-8") as f: 173 content = f.read(1024) 174 175 lines = content.splitlines()[:5] 176 if len(content) == 1024: 177 lines.append("... (truncated)") 178 179 return "\n".join(lines) 180 except Exception as e: 181 return f"Error previewing CSV: {str(e)}"
ProcessorName =
<class 'str'>
DeciderName =
<class 'str'>
ProcessorFunc =
typing.Callable[[pathlib.Path], str]
DeciderFunc =
typing.Callable[[pathlib.Path], bool]
PROCESSORS: dict[str, typing.Callable[[pathlib.Path], str]] =
{'remove_comments': <function remove_comments>, 'compress_whitespace': <function compress_whitespace>, 'to_relative_path': <function to_relative_path>, 'ipynb_to_md': <function ipynb_to_md>, 'makefile_recipes': <function makefile_recipes>, 'csv_preview_5_lines': <function csv_preview_5_lines>}
DECIDERS: dict[str, typing.Callable[[pathlib.Path], bool]] =
{'is_over_10kb': <function is_over_10kb>, 'is_documentation': <function is_documentation>}
def
register_processor(func: Callable[[pathlib.Path], str]) -> Callable[[pathlib.Path], str]:
29def register_processor(func: ProcessorFunc) -> ProcessorFunc: 30 """Register a function as a path processor""" 31 PROCESSORS[ProcessorName(func.__name__)] = func 32 return func
Register a function as a path processor
def
register_decider(func: Callable[[pathlib.Path], bool]) -> Callable[[pathlib.Path], bool]:
35def register_decider(func: DeciderFunc) -> DeciderFunc: 36 """Register a function as a decider""" 37 DECIDERS[DeciderName(func.__name__)] = func 38 return func
Register a function as a decider
@register_decider
def
is_over_10kb(path: pathlib.Path) -> bool:
43@register_decider 44def is_over_10kb(path: Path) -> bool: 45 """Check if file is over 10KB.""" 46 return path.stat().st_size > 2**1
Check if file is over 10KB.
@register_decider
def
is_documentation(path: pathlib.Path) -> bool:
49@register_decider 50def is_documentation(path: Path) -> bool: 51 """Check if file is documentation.""" 52 return path.suffix in {".md", ".rst", ".txt"}
Check if file is documentation.
@register_processor
def
remove_comments(path: pathlib.Path) -> str:
59@register_processor 60def remove_comments(path: Path) -> str: 61 """Remove single-line comments from code.""" 62 lines = path.read_text().splitlines() 63 processed = [line for line in lines if not line.strip().startswith("#")] 64 return "\n".join(processed)
Remove single-line comments from code.
@register_processor
def
compress_whitespace(path: pathlib.Path) -> str:
67@register_processor 68def compress_whitespace(path: Path) -> str: 69 """Compress multiple whitespace characters into single spaces.""" 70 return " ".join(path.read_text().split())
Compress multiple whitespace characters into single spaces.
@register_processor
def
to_relative_path(path: pathlib.Path) -> str:
73@register_processor 74def to_relative_path(path: Path) -> str: 75 """return the path to the file as a string""" 76 return path.as_posix()
return the path to the file as a string
@register_processor
def
ipynb_to_md(path: pathlib.Path) -> str:
79@register_processor 80def ipynb_to_md(path: Path) -> str: 81 """Convert an IPython notebook to markdown.""" 82 nb_contents: dict = json.loads(path.read_text(encoding="utf-8")) 83 84 output: list[str] = [] 85 86 for cell in nb_contents["cells"]: 87 if cell["cell_type"] == "markdown": 88 output.extend(cell["source"]) 89 output.append("\n\n") 90 elif cell["cell_type"] == "code": 91 output.append("```python\n") 92 output.extend(cell["source"]) 93 output.append("\n```\n\n") 94 95 return "".join(output)
Convert an IPython notebook to markdown.
@register_processor
def
makefile_recipes(path: pathlib.Path) -> str:
98@register_processor 99def makefile_recipes(path: Path) -> str: 100 """Process a Makefile to show only target descriptions and basic structure. 101 102 Preserves: 103 - Comments above .PHONY targets up to first empty line 104 - The .PHONY line and target line 105 - First line after target if it starts with @echo 106 107 # Parameters: 108 - `path : Path` 109 Path to the Makefile to process 110 111 # Returns: 112 - `str` 113 Processed Makefile content 114 """ 115 lines: Sequence[str] = path.read_text().splitlines() 116 output_lines: list[str] = [] 117 118 i: int = 0 119 while i < len(lines): 120 line: str = lines[i] 121 122 # Look for .PHONY lines 123 if line.strip().startswith(".PHONY:"): 124 # Store target name for later matching 125 target_name: str = line.split(":")[1].strip() 126 127 # Collect comments above until empty line 128 comment_lines: list[str] = [] 129 look_back: int = i - 1 130 while look_back >= 0 and lines[look_back].strip(): 131 if lines[look_back].strip().startswith("#"): 132 comment_lines.insert(0, lines[look_back]) 133 look_back -= 1 134 135 # Add collected comments 136 output_lines.extend(comment_lines) 137 138 # Add .PHONY line 139 output_lines.append(line) 140 141 # Add target line (should be next) 142 if i + 1 < len(lines) and lines[i + 1].startswith(f"{target_name}:"): 143 output_lines.append(lines[i + 1]) 144 i += 1 145 146 # Check for @echo on next line 147 if i + 1 < len(lines) and lines[i + 1].strip().startswith("@echo"): 148 output_lines.append(lines[i + 1]) 149 150 output_lines.append(" ...") 151 output_lines.append("") 152 153 i += 1 154 155 return "\n".join(output_lines)
Process a Makefile to show only target descriptions and basic structure.
Preserves:
- Comments above .PHONY targets up to first empty line
- The .PHONY line and target line
- First line after target if it starts with @echo
Parameters:
path : Path
Path to the Makefile to process
Returns:
str
Processed Makefile content
@register_processor
def
csv_preview_5_lines(path: pathlib.Path) -> str:
158@register_processor 159def csv_preview_5_lines(path: Path) -> str: 160 """Preview first few lines of a CSV file (up to 5) 161 162 Reads only first 1024 bytes and splits into lines. 163 Does not attempt to parse CSV structure. 164 165 # Parameters: 166 - `path : Path` 167 Path to CSV file 168 169 # Returns: 170 - `str` 171 First few lines of the file""" 172 try: 173 with path.open("r", encoding="utf-8") as f: 174 content = f.read(1024) 175 176 lines = content.splitlines()[:5] 177 if len(content) == 1024: 178 lines.append("... (truncated)") 179 180 return "\n".join(lines) 181 except Exception as e: 182 return f"Error previewing CSV: {str(e)}"
Preview first few lines of a CSV file (up to 5)
Reads only first 1024 bytes and splits into lines. Does not attempt to parse CSV structure.
Parameters:
path : Path
Path to CSV file
Returns:
str
First few lines of the file