Coverage for src/paperap/plugins/collect_test_data.py: 98%
121 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-18 12:26 -0400
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-18 12:26 -0400
1"""
3----------------------------------------------------------------------------
5 METADATA:
7 File: collect_test_data.py
8 Project: paperap
9 Created: 2025-03-04
10 Version: 0.0.7
11 Author: Jess Mann
12 Email: jess@jmann.me
13 Copyright (c) 2025 Jess Mann
15----------------------------------------------------------------------------
17 LAST MODIFIED:
19 2025-03-04 By Jess Mann
21"""
23from __future__ import annotations
25import datetime
26import json
27import logging
28import re
29from decimal import Decimal
30from pathlib import Path
31from typing import TYPE_CHECKING, Any, override
33from faker import Faker
34from pydantic import field_validator
36from paperap.exceptions import ModelValidationError
37from paperap.models import StandardModel
38from paperap.plugins.base import Plugin
39from paperap.signals import SignalPriority, registry
41logger = logging.getLogger(__name__)
43sanitize_pattern = re.compile(r"[^a-zA-Z0-9|.=_-]")
45SANITIZE_KEYS = [
46 "email",
47 "first_name",
48 "last_name",
49 "name",
50 "phone",
51 "username",
52 "content",
53 "filename",
54 "title",
55 "slug",
56 "original_file_name",
57 "archived_file_name",
58 "task_file_name",
59 "filename",
60]
63class SampleDataCollector(Plugin):
64 """
65 Plugin to collect test data from API responses.
66 """
68 name = "test_data_collector"
69 description = "Collects sample data from API responses for testing purposes"
70 version = "0.0.3"
71 fake: Faker = Faker()
72 test_dir: Path = Path("tests/sample_data")
74 @field_validator("test_dir", mode="before")
75 @classmethod
76 def validate_test_dir(cls, value: Any) -> Path | None:
77 """Validate the test directory path."""
78 # Convert string path to Path object if needed
79 if not value:
80 value = Path("tests/sample_data")
82 if isinstance(value, str):
83 value = Path(value)
85 if not isinstance(value, Path):
86 raise ModelValidationError("Test directory must be a string or Path object")
88 value.mkdir(parents=True, exist_ok=True)
89 return value
91 @override
92 def setup(self) -> None:
93 """Register signal handlers."""
94 registry.connect("resource._handle_response:after", self.save_list_response, SignalPriority.LOW)
95 registry.connect("resource._handle_results:before", self.save_first_item, SignalPriority.LOW)
96 registry.connect("client.request:after", self.save_parsed_response, SignalPriority.LOW)
98 @override
99 def teardown(self) -> None:
100 """Unregister signal handlers."""
101 registry.disconnect("resource._handle_response:after", self.save_list_response)
102 registry.disconnect("resource._handle_results:before", self.save_first_item)
103 registry.disconnect("client.request:after", self.save_parsed_response)
105 @staticmethod
106 def _json_serializer(obj: Any) -> Any:
107 """Serialize objects that are not natively serializable."""
108 if isinstance(obj, datetime.datetime):
109 return obj.isoformat()
110 if isinstance(obj, Path):
111 return str(obj)
112 if isinstance(obj, Decimal):
113 return float(obj)
114 if isinstance(obj, StandardModel):
115 return obj.to_dict()
116 if isinstance(obj, StandardModel):
117 return obj.model_dump()
118 if isinstance(obj, set):
119 return list(obj)
120 if isinstance(obj, bytes):
121 return obj.decode("utf-8")
122 raise TypeError(f"Type {type(obj).__name__} is not JSON serializable")
124 def _sanitize_response(self, **response: dict[str, Any]) -> dict[str, Any]:
125 """
126 Sanitize the response data to replace any strings with potentially personal information with dummy data
127 """
128 sanitized = {}
129 for key, value in response.items():
130 sanitized[key] = self._sanitize_value_recursive(key, value)
132 # Replace "next" domain using regex
133 if "next" in response and isinstance(response["next"], str):
134 sanitized["next"] = re.sub(r"https?://.*?/", "https://example.com/", response["next"])
136 return sanitized
138 def _sanitize_value_recursive(self, key: str, value: Any) -> Any:
139 """
140 Recursively sanitize the value to replace any strings with potentially personal information with dummy data
141 """
142 if isinstance(value, dict):
143 return {k: self._sanitize_value_recursive(k, v) for k, v in value.items()}
145 if key in SANITIZE_KEYS:
146 if isinstance(value, str):
147 return self.fake.word()
148 if isinstance(value, list):
149 return [self.fake.word() for _ in value]
151 return value
153 def save_response(self, filepath: Path, response: dict[str, Any], **kwargs: Any) -> None:
154 """
155 Save the response to a JSON file.
156 """
157 if filepath.exists():
158 return
160 try:
161 response = self._sanitize_response(**response)
162 filepath.parent.mkdir(parents=True, exist_ok=True)
163 with filepath.open("w") as f:
164 json.dump(response, f, indent=4, sort_keys=True, ensure_ascii=False, default=self._json_serializer)
165 except (TypeError, OverflowError, OSError) as e:
166 # Don't allow the plugin to interfere with normal operations in the event of failure
167 logger.error("Error saving response to file (%s): %s", filepath.absolute(), e)
169 def save_list_response(self, sender: Any, response: dict[str, Any] | None, **kwargs: Any) -> dict[str, Any] | None:
170 """Save the list response to a JSON file."""
171 if not response or not (resource_name := kwargs.get("resource")):
172 return response
174 filepath = self.test_dir / f"{resource_name}_list.json"
175 self.save_response(filepath, response)
177 return response
179 def save_first_item(self, sender: Any, item: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
180 """Save the first item from a list to a JSON file."""
181 resource_name = kwargs.get("resource")
182 if not resource_name:
183 return item
185 filepath = self.test_dir / f"{resource_name}_item.json"
186 self.save_response(filepath, item)
188 # Disable this handler after saving the first item
189 registry.disable("resource._handle_results:before", self.save_first_item)
191 return item
193 def save_parsed_response(
194 self,
195 parsed_response: dict[str, Any],
196 method: str,
197 params: dict[str, Any] | None,
198 json_response: bool,
199 endpoint: str,
200 **kwargs: Any,
201 ) -> dict[str, Any]:
202 """
203 Save the request data to a JSON file.
205 Connects to client.request:after signal.
206 """
207 # If endpoint contains "example.com", we're testing, so skip it
208 if "example.com" in str(endpoint):
209 return parsed_response
211 if not json_response or not params:
212 return parsed_response
214 # Strip url to final path segment
215 resource_name = ".".join(endpoint.split("/")[-2:])
217 combined_params = list(f"{k}={v}" for k, v in params.items())
218 params_str = "|".join(combined_params)
219 filename_prefix = ""
220 if method.lower() != "get":
221 filename_prefix = f"{method.lower()}__"
222 filename = f"{filename_prefix}{resource_name}__{params_str}.json"
223 filename = sanitize_pattern.sub("_", filename)
225 filepath = self.test_dir / filename
226 self.save_response(filepath, parsed_response)
228 return parsed_response
230 @override
231 @classmethod
232 def get_config_schema(cls) -> dict[str, Any]:
233 """Define the configuration schema for this plugin."""
234 return {
235 "test_dir": {
236 "type": str,
237 "description": "Directory to save test data files",
238 "required": False,
239 }
240 }