Coverage for src/paperap/plugins/collect_test_data.py: 51%
112 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-11 21:37 -0400
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-11 21:37 -0400
1"""
2Usage example:
3 test_dir = Path(__file__).parent.parent.parent.parent / "tests/sample_data"
4 collector = TestDataCollector(test_dir)
6----------------------------------------------------------------------------
8 METADATA:
10 File: collect_test_data.py
11 Project: paperap
12 Created: 2025-03-04
13 Version: 0.0.5
14 Author: Jess Mann
15 Email: jess@jmann.me
16 Copyright (c) 2025 Jess Mann
18----------------------------------------------------------------------------
20 LAST MODIFIED:
22 2025-03-04 By Jess Mann
24"""
26from __future__ import annotations
28import datetime
29import json
30import logging
31import re
32from decimal import Decimal
33from pathlib import Path
34from typing import TYPE_CHECKING, Any, override
36from faker import Faker
38from paperap.models import BaseModel
39from paperap.plugins.base import Plugin
40from paperap.signals import SignalPriority, registry
42if TYPE_CHECKING:
43 from paperap.client import PaperlessClient
45logger = logging.getLogger(__name__)
47sanitize_pattern = re.compile(r"[^a-zA-Z0-9_-]")
49SANITIZE_KEYS = [
50 "email",
51 "first_name",
52 "last_name",
53 "name",
54 "phone",
55 "username",
56 "content",
57 "filename",
58 "title",
59 "slug",
60 "original_file_name",
61 "archived_file_name",
62 "task_file_name",
63 "filename",
64]
67class TestDataCollector(Plugin):
68 """
69 Plugin to collect test data from API responses.
70 """
72 name = "test_data_collector"
73 description = "Collects sample data from API responses for testing purposes"
74 version = "0.0.2"
75 fake = Faker()
76 test_dir: Path
78 def __init__(self, client: "PaperlessClient", test_dir: Path | None = None, **kwargs: Any) -> None:
79 # Convert string path to Path object if needed
80 if test_dir and isinstance(test_dir, str):
81 test_dir = Path(test_dir)
83 self.test_dir = test_dir or Path(self.config.get("test_dir", "tests/sample_data"))
84 self.test_dir.mkdir(parents=True, exist_ok=True)
85 super().__init__(client, **kwargs)
87 @override
88 def setup(self) -> None:
89 """Register signal handlers."""
90 registry.connect("resource._handle_response:after", self.save_list_response, SignalPriority.LOW)
91 registry.connect("resource._handle_results:before", self.save_first_item, SignalPriority.LOW)
92 registry.connect("client.request:after", self.save_parsed_response, SignalPriority.LOW)
94 @override
95 def teardown(self) -> None:
96 """Unregister signal handlers."""
97 registry.disconnect("resource._handle_response:after", self.save_list_response)
98 registry.disconnect("resource._handle_results:before", self.save_first_item)
99 registry.disconnect("client.request:after", self.save_parsed_response)
101 @staticmethod
102 def _json_serializer(obj: Any) -> Any:
103 """Serialize objects that are not natively serializable."""
104 if isinstance(obj, datetime.datetime):
105 return obj.isoformat()
106 if isinstance(obj, Path):
107 return str(obj)
108 if isinstance(obj, Decimal):
109 return float(obj)
110 if isinstance(obj, BaseModel):
111 return obj.to_dict()
112 if isinstance(obj, BaseModel):
113 return obj.model_dump()
114 if isinstance(obj, set):
115 return list(obj)
116 if isinstance(obj, bytes):
117 return obj.decode("utf-8")
118 raise TypeError(f"Type {type(obj).__name__} is not JSON serializable")
120 def _sanitize_response(self, **response: dict[str, Any]) -> dict[str, Any]:
121 """
122 Sanitize the response data to replace any strings with potentially personal information with dummy data
123 """
124 sanitized = {}
125 for key, value in response.items():
126 sanitized[key] = self._sanitize_value_recursive(key, value)
128 # Replace "next" domain using regex
129 if "next" in response and isinstance(response["next"], str):
130 sanitized["next"] = re.sub(r"https?://.*?/", "https://example.com/", response["next"])
132 return sanitized
134 def _sanitize_value_recursive(self, key: str, value: Any) -> Any:
135 """
136 Recursively sanitize the value to replace any strings with potentially personal information with dummy data
137 """
138 if isinstance(value, dict):
139 return {k: self._sanitize_value_recursive(k, v) for k, v in value.items()}
141 if key in SANITIZE_KEYS:
142 if isinstance(value, str):
143 return self.fake.word()
144 if isinstance(value, list):
145 return [self.fake.word() for _ in value]
147 return value
149 def save_response(self, filepath: Path, response: dict[str, Any], **kwargs: Any) -> None:
150 """
151 Save the response to a JSON file.
152 """
153 if filepath.exists():
154 return
156 try:
157 response = self._sanitize_response(**response)
158 with filepath.open("w") as f:
159 json.dump(response, f, indent=4, sort_keys=True, ensure_ascii=False, default=self._json_serializer)
160 except (TypeError, OverflowError, OSError) as e:
161 # Don't allow the plugin to interfere with normal operations in the event of failure
162 logger.error("Error saving response to file: %s", e)
164 def save_list_response(self, sender: Any, response: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
165 """Save the list response to a JSON file."""
166 if not response or not (resource_name := kwargs.get("resource")):
167 return response
169 filepath = self.test_dir / f"{resource_name}_list.json"
170 self.save_response(filepath, response)
172 return response
174 def save_first_item(self, sender: Any, item: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
175 """Save the first item from a list to a JSON file."""
176 resource_name = kwargs.get("resource")
177 if not resource_name:
178 return item
180 filepath = self.test_dir / f"{resource_name}_item.json"
181 self.save_response(filepath, item)
183 # Disable this handler after saving the first item
184 registry.disable("resource._handle_results:before", self.save_first_item)
186 return item
188 def save_parsed_response(
189 self,
190 parsed_response: dict[str, Any],
191 method: str,
192 params: dict[str, Any] | None,
193 json_response: bool,
194 endpoint: str,
195 **kwargs: Any,
196 ) -> dict[str, Any]:
197 """
198 Save the request data to a JSON file.
200 Connects to client.request:after signal.
201 """
202 if not json_response or not params:
203 return parsed_response
205 # Strip url to final path segment
206 resource_name = ".".join(endpoint.split("/")[-2:])
207 resource_name = sanitize_pattern.sub("_", resource_name)
209 combined_params = list(params.keys())
210 params_str = "|".join(combined_params)
211 params_str = sanitize_pattern.sub("_", params_str)
212 filename_prefix = ""
213 if method.lower() != "get":
214 filename_prefix = f"{method.lower()}__"
215 filename = f"{filename_prefix}{resource_name}__{params_str}.json"
217 filepath = self.test_dir / filename
218 self.save_response(filepath, parsed_response)
220 return parsed_response
222 @override
223 @classmethod
224 def get_config_schema(cls) -> dict[str, Any]:
225 """Define the configuration schema for this plugin."""
226 return {
227 "test_dir": {
228 "type": "string",
229 "description": "Directory to save test data files",
230 "required": False,
231 }
232 }