Coverage for src/paperap/client.py: 95%

232 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-20 13:17 -0400

1""" 

2---------------------------------------------------------------------------- 

3 

4 METADATA: 

5 

6 File: client.py 

7 Project: paperap 

8 Created: 2025-03-04 

9 Version: 0.0.8 

10 Author: Jess Mann 

11 Email: jess@jmann.me 

12 Copyright (c) 2025 Jess Mann 

13 

14---------------------------------------------------------------------------- 

15 

16 LAST MODIFIED: 

17 

18 2025-03-04 By Jess Mann 

19 

20""" 

21 

22from __future__ import annotations 

23 

24import logging 

25from pathlib import Path 

26from string import Template 

27from typing import TYPE_CHECKING, Any, Literal, Unpack, overload 

28 

29import requests 

30from pydantic import HttpUrl 

31 

32from paperap.auth import AuthBase, BasicAuth, TokenAuth 

33from paperap.exceptions import ( 

34 APIError, 

35 AuthenticationError, 

36 BadResponseError, 

37 ConfigurationError, 

38 InsufficientPermissionError, 

39 RequestError, 

40 ResourceNotFoundError, 

41 ResponseParsingError, 

42) 

43from paperap.resources import ( 

44 CorrespondentResource, 

45 CustomFieldResource, 

46 DocumentMetadataResource, 

47 DocumentNoteResource, 

48 DocumentResource, 

49 DocumentSuggestionsResource, 

50 DocumentTypeResource, 

51 DownloadedDocumentResource, 

52 GroupResource, 

53 ProfileResource, 

54 SavedViewResource, 

55 ShareLinksResource, 

56 StoragePathResource, 

57 TagResource, 

58 TaskResource, 

59 UISettingsResource, 

60 UserResource, 

61 WorkflowActionResource, 

62 WorkflowResource, 

63 WorkflowTriggerResource, 

64) 

65from paperap.settings import Settings, SettingsArgs 

66from paperap.signals import registry 

67 

68if TYPE_CHECKING: 

69 from paperap.plugins.base import Plugin 

70 from paperap.plugins.manager import PluginConfig 

71 

72logger = logging.getLogger(__name__) 

73 

74 

75class PaperlessClient: 

76 """ 

77 Client for interacting with the Paperless-NgX API. 

78 

79 Args: 

80 settings: Settings object containing client configuration. 

81 

82 Examples: 

83 ```python 

84 # Using token authentication 

85 client = PaperlessClient( 

86 Settings( 

87 base_url="https://paperless.example.com", 

88 token="40characterslong40characterslong40charac" 

89 ) 

90 ) 

91 

92 # Using basic authentication 

93 client = PaperlessClient( 

94 Settings( 

95 base_url="https://paperless.example.com", 

96 username="user", 

97 password="pass" 

98 ) 

99 ) 

100 

101 # Loading all settings from environment variables (e.g. PAPERLESS_TOKEN) 

102 client = PaperlessClient() 

103 

104 # With context manager 

105 with PaperlessClient(...) as client: 

106 docs = client.documents.list() 

107 ``` 

108 

109 """ 

110 

111 settings: Settings 

112 auth: AuthBase 

113 session: requests.Session 

114 plugins: dict[str, "Plugin"] 

115 

116 # Resources 

117 correspondents: CorrespondentResource 

118 custom_fields: CustomFieldResource 

119 document_types: DocumentTypeResource 

120 document_metadata: DocumentMetadataResource 

121 document_suggestions: DocumentSuggestionsResource 

122 downloaded_documents: DownloadedDocumentResource 

123 documents: DocumentResource 

124 document_notes: DocumentNoteResource 

125 groups: GroupResource 

126 profile: ProfileResource 

127 saved_views: SavedViewResource 

128 share_links: ShareLinksResource 

129 storage_paths: StoragePathResource 

130 tags: TagResource 

131 tasks: TaskResource 

132 ui_settings: UISettingsResource 

133 users: UserResource 

134 workflow_actions: WorkflowActionResource 

135 workflow_triggers: WorkflowTriggerResource 

136 workflows: WorkflowResource 

137 

138 def __init__(self, settings: Settings | None = None, **kwargs: Unpack[SettingsArgs]) -> None: 

139 if not settings: 

140 # Any params not provided in kwargs will be loaded from env vars 

141 settings = Settings(**kwargs) 

142 

143 self.settings = settings 

144 # Prioritize username/password over token if both are provided 

145 if self.settings.username and self.settings.password: 

146 self.auth = BasicAuth(username=self.settings.username, password=self.settings.password) 

147 elif self.settings.token: 

148 self.auth = TokenAuth(token=self.settings.token) 

149 else: 

150 raise ValueError("Provide a token, or a username and password") 

151 

152 self.session = requests.Session() 

153 

154 # Set default headers 

155 self.session.headers.update( 

156 { 

157 "Accept": "application/json; version=2", 

158 "Content-Type": "application/json", 

159 } 

160 ) 

161 

162 # Initialize resources 

163 self._init_resources() 

164 self._initialize_plugins() 

165 super().__init__() 

166 

167 @property 

168 def base_url(self) -> HttpUrl: 

169 """Get the base URL.""" 

170 return self.settings.base_url 

171 

172 def __enter__(self) -> PaperlessClient: 

173 return self 

174 

175 def __exit__(self, exc_type, exc_val, exc_tb) -> None: 

176 self.close() 

177 

178 def _init_resources(self) -> None: 

179 """Initialize all API resources.""" 

180 # Initialize resources 

181 self.correspondents = CorrespondentResource(self) 

182 self.custom_fields = CustomFieldResource(self) 

183 self.document_types = DocumentTypeResource(self) 

184 self.document_metadata = DocumentMetadataResource(self) 

185 self.document_suggestions = DocumentSuggestionsResource(self) 

186 self.downloaded_documents = DownloadedDocumentResource(self) 

187 self.documents = DocumentResource(self) 

188 self.document_notes = DocumentNoteResource(self) 

189 self.groups = GroupResource(self) 

190 self.profile = ProfileResource(self) 

191 self.saved_views = SavedViewResource(self) 

192 self.share_links = ShareLinksResource(self) 

193 self.storage_paths = StoragePathResource(self) 

194 self.tags = TagResource(self) 

195 self.tasks = TaskResource(self) 

196 self.ui_settings = UISettingsResource(self) 

197 self.users = UserResource(self) 

198 self.workflow_actions = WorkflowActionResource(self) 

199 self.workflow_triggers = WorkflowTriggerResource(self) 

200 self.workflows = WorkflowResource(self) 

201 

202 def _initialize_plugins(self, plugin_config: "PluginConfig | None" = None) -> None: 

203 """ 

204 Initialize plugins based on configuration. 

205 

206 Args: 

207 plugin_config: Optional configuration dictionary for plugins. 

208 

209 """ 

210 from paperap.plugins.manager import PluginManager # type: ignore # pylint: disable=import-outside-toplevel 

211 

212 PluginManager.model_rebuild() 

213 

214 # Create and configure the plugin manager 

215 self.manager = PluginManager(client=self) 

216 

217 # Discover available plugins 

218 self.manager.discover_plugins() 

219 

220 # Configure plugins 

221 plugin_config = plugin_config or { 

222 "enabled_plugins": ["SampleDataCollector"], 

223 "settings": { 

224 "SampleDataCollector": { 

225 "test_dir": str(Path(__file__).parents[3] / "tests/sample_data"), 

226 }, 

227 }, 

228 } 

229 self.manager.configure(plugin_config) 

230 

231 # Initialize all enabled plugins 

232 self.plugins = self.manager.initialize_all_plugins() 

233 

234 def _get_auth_params(self) -> dict[str, Any]: 

235 """Get authentication parameters for requests.""" 

236 return self.auth.get_auth_params() 

237 

238 def get_headers(self) -> dict[str, str]: 

239 """Get headers for requests.""" 

240 headers = {} 

241 

242 headers.update(self.auth.get_auth_headers()) 

243 

244 return headers 

245 

246 def close(self) -> None: 

247 """Close the client and release resources.""" 

248 if hasattr(self, "session"): 

249 self.session.close() 

250 

251 def request_raw( 

252 self, 

253 method: str, 

254 endpoint: str | HttpUrl | Template, 

255 *, 

256 params: dict[str, Any] | None = None, 

257 data: dict[str, Any] | None = None, 

258 files: dict[str, Any] | None = None, 

259 ) -> requests.Response | None: 

260 """ 

261 Make a request to the Paperless-NgX API. 

262 

263 Args: 

264 method: HTTP method (GET, POST, PUT, DELETE). 

265 endpoint: API endpoint relative to base URL. 

266 params: Query parameters for the request. 

267 data: Request body data. 

268 files: Files to upload. 

269 json_response: Whether to parse the response as JSON. 

270 

271 Returns: 

272 Response object or None if no content. 

273 

274 Raises: 

275 AuthenticationError: If authentication fails. 

276 ResourceNotFoundError: If the requested resource doesn't exist. 

277 APIError: If the API returns an error. 

278 PaperapError: For other errors. 

279 

280 """ 

281 # Handle different endpoint types 

282 if isinstance(endpoint, Template): 

283 # Convert Template to string representation 

284 url = f"{self.base_url}{endpoint.template.lstrip('/')}" 

285 elif isinstance(endpoint, HttpUrl): 

286 # Use URL object directly 

287 url = str(endpoint) 

288 

289 elif isinstance(endpoint, str): 

290 if endpoint.startswith("http"): 

291 url = endpoint 

292 else: 

293 url = f"{self.base_url}{endpoint.lstrip('/')}" 

294 else: 

295 url = f"{self.base_url}{str(endpoint).lstrip('/')}" 

296 

297 logger.debug("Requesting %s %s", method, url) 

298 

299 # Add headers from authentication and session defaults 

300 headers = {**self.session.headers, **self.get_headers()} 

301 

302 # If we're uploading files, don't set Content-Type 

303 if files: 

304 headers.pop("Content-Type", None) 

305 

306 try: 

307 # TODO: Temporary hack 

308 params = params.get("params", params) if params else params 

309 

310 # logger.critical("Request (%s) url %s, params %s, data %s, files %s", method, url, params, data, files) 

311 response = self.session.request( 

312 method=method, 

313 url=url, 

314 headers=headers, 

315 params=params, 

316 json=data if not files and data else None, 

317 data=data if files else None, 

318 files=files, 

319 timeout=self.settings.timeout, 

320 **self._get_auth_params(), 

321 ) 

322 

323 # Handle HTTP errors 

324 if response.status_code >= 400: 

325 self._handle_request_errors(response, url, params=params, data=data, files=files) 

326 

327 # No content 

328 if response.status_code == 204: 

329 return None 

330 

331 return response 

332 

333 except requests.exceptions.ConnectionError as ce: 

334 logger.error( 

335 "Unable to connect to Paperless server: %s url %s, params %s, data %s, files %s", 

336 method, 

337 url, 

338 params, 

339 data, 

340 files, 

341 ) 

342 raise RequestError(f"Connection error: {str(ce)}") from ce 

343 except requests.exceptions.RequestException as re: 

344 raise RequestError(f"Request failed: {str(re)}") from re 

345 

346 def _handle_request_errors( 

347 self, 

348 response: requests.Response, 

349 url: str, 

350 *, 

351 params: dict[str, Any] | None = None, 

352 data: dict[str, Any] | None = None, 

353 files: dict[str, Any] | None = None, 

354 ) -> None: 

355 error_message = self._extract_error_message(response) 

356 

357 if response.status_code == 400: 

358 if "This field is required" in error_message: 

359 raise ValueError(f"Required field missing: {error_message}") 

360 if response.status_code == 401: 

361 raise AuthenticationError(f"Authentication failed: {error_message}") 

362 if response.status_code == 403: 

363 if "this site requires a CSRF" in error_message: 

364 raise ConfigurationError(f"Response claims CSRF token required. Is the url correct? {url}") 

365 raise InsufficientPermissionError(f"Permission denied: {error_message}") 

366 if response.status_code == 404: 

367 raise ResourceNotFoundError(f"Paperless returned 404 for {url}") 

368 

369 # All else... 

370 raise BadResponseError(error_message, response.status_code) 

371 

372 @overload 

373 def _handle_response( 

374 self, response: requests.Response, *, json_response: Literal[True] = True 

375 ) -> dict[str, Any]: ... 

376 

377 @overload 

378 def _handle_response(self, response: None, *, json_response: bool = True) -> None: ... 

379 

380 @overload 

381 def _handle_response( 

382 self, response: requests.Response | None, *, json_response: Literal[False] 

383 ) -> bytes | None: ... 

384 

385 @overload 

386 def _handle_response( 

387 self, response: requests.Response | None, *, json_response: bool = True 

388 ) -> dict[str, Any] | bytes | None: ... 

389 

390 def _handle_response( 

391 self, response: requests.Response | None, *, json_response: bool = True 

392 ) -> dict[str, Any] | bytes | None: 

393 """Handle the response based on the content type.""" 

394 if not response: 

395 return None 

396 

397 # Try to parse as JSON if requested 

398 if json_response: 

399 try: 

400 return response.json() 

401 except ValueError as e: 

402 url = getattr(response, "url", "unknown URL") 

403 logger.error("Failed to parse JSON response: %s -> url %s -> content: %s", e, url, response.content) 

404 raise ResponseParsingError(f"Failed to parse JSON response: {str(e)} -> url {url}") from e 

405 

406 return response.content 

407 

408 @overload 

409 def request( 

410 self, 

411 method: str, 

412 endpoint: str | HttpUrl | Template, 

413 *, 

414 params: dict[str, Any] | None = None, 

415 data: dict[str, Any] | None = None, 

416 files: dict[str, Any] | None = None, 

417 ) -> dict[str, Any] | None: ... 

418 

419 @overload 

420 def request( 

421 self, 

422 method: str, 

423 endpoint: str | HttpUrl | Template, 

424 *, 

425 params: dict[str, Any] | None = None, 

426 data: dict[str, Any] | None = None, 

427 files: dict[str, Any] | None = None, 

428 json_response: Literal[False], 

429 ) -> bytes | None: ... 

430 

431 @overload 

432 def request( 

433 self, 

434 method: str, 

435 endpoint: str | HttpUrl | Template, 

436 *, 

437 params: dict[str, Any] | None = None, 

438 data: dict[str, Any] | None = None, 

439 files: dict[str, Any] | None = None, 

440 json_response: bool = True, 

441 ) -> dict[str, Any] | bytes | None: ... 

442 

443 def request( 

444 self, 

445 method: str, 

446 endpoint: str | HttpUrl | Template, 

447 *, 

448 params: dict[str, Any] | None = None, 

449 data: dict[str, Any] | None = None, 

450 files: dict[str, Any] | None = None, 

451 json_response: bool = True, 

452 ) -> dict[str, Any] | bytes | None: 

453 """ 

454 Make a request to the Paperless-NgX API. 

455 

456 Generally, this should be done using resources, not by calling this method directly. 

457 

458 Args: 

459 method: HTTP method (GET, POST, PUT, DELETE). 

460 endpoint: API endpoint relative to base URL. 

461 params: Query parameters for the request. 

462 data: Request body data. 

463 files: Files to upload. 

464 json_response: Whether to parse the response as JSON. 

465 

466 Returns: 

467 Parsed response data. 

468 

469 """ 

470 kwargs = { 

471 "client": self, 

472 "method": method, 

473 "endpoint": endpoint, 

474 "params": params, 

475 "data": data, 

476 "files": files, 

477 "json_response": json_response, 

478 } 

479 

480 registry.emit( 

481 "client.request:before", "Before a request is sent to the Paperless server", args=[self], kwargs=kwargs 

482 ) 

483 

484 if not (response := self.request_raw(method, endpoint, params=params, data=data, files=files)): 

485 return None 

486 

487 registry.emit( 

488 "client.request__response", 

489 "After a response is received, before it is parsed", 

490 args=[response], 

491 kwargs=kwargs, 

492 ) 

493 

494 parsed_response = self._handle_response(response, json_response=json_response) 

495 parsed_response = registry.emit( 

496 "client.request:after", 

497 "After a request is parsed.", 

498 args=parsed_response, 

499 kwargs=kwargs, 

500 ) 

501 

502 return parsed_response 

503 

504 def _extract_error_message(self, response: requests.Response) -> str: 

505 """Extract error message from response.""" 

506 try: 

507 error_data = response.json() 

508 if isinstance(error_data, dict): 

509 # Try different possible error formats 

510 if "detail" in error_data: 

511 return error_data["detail"] 

512 if "error" in error_data: 

513 return error_data["error"] 

514 if "non_field_errors" in error_data: 

515 return ", ".join(error_data["non_field_errors"]) 

516 

517 # Handle nested error messages 

518 messages = [] 

519 for key, value in error_data.items(): 

520 if isinstance(value, list): 

521 values = [str(i) for i in value] 

522 messages.append(f"{key}: {', '.join(values)}") 

523 else: 

524 messages.append(f"{key}: {value}") 

525 return "; ".join(messages) 

526 return str(error_data) 

527 except ValueError: 

528 return response.text or f"HTTP {response.status_code}" 

529 

530 def generate_token( 

531 self, 

532 base_url: str, 

533 username: str, 

534 password: str, 

535 timeout: int | None = None, 

536 ) -> str: 

537 """ 

538 Generate an API token using username and password. 

539 

540 Args: 

541 base_url: The base URL of the Paperless-NgX instance. 

542 username: Username for authentication. 

543 password: Password for authentication. 

544 timeout: Request timeout in seconds. 

545 

546 Returns: 

547 Generated API token. 

548 

549 Raises: 

550 AuthenticationError: If authentication fails. 

551 PaperapError: For other errors. 

552 

553 """ 

554 if timeout is None: 

555 timeout = self.settings.timeout 

556 

557 if not base_url.startswith(("http://", "https://")): 

558 base_url = f"https://{base_url}" 

559 

560 url = f"{base_url.rstrip('/')}/api/token/" 

561 

562 registry.emit( 

563 "client.generate_token__before", 

564 "Before a new token is generated", 

565 kwargs={"url": url, "username": username}, 

566 ) 

567 

568 try: 

569 response = requests.post( 

570 url, 

571 json={"username": username, "password": password}, 

572 headers={"Accept": "application/json"}, 

573 timeout=timeout, 

574 ) 

575 

576 response.raise_for_status() 

577 data = response.json() 

578 

579 registry.emit( 

580 "client.generate_token__after", 

581 "After a new token is generated", 

582 kwargs={"url": url, "username": username, "response": data}, 

583 ) 

584 

585 if "token" not in data: 

586 raise ResponseParsingError("Token not found in response") 

587 

588 return data["token"] 

589 except requests.exceptions.HTTPError as he: 

590 if he.response.status_code == 401: 

591 raise AuthenticationError("Invalid username or password") from he 

592 try: 

593 error_data = he.response.json() 

594 error_message = error_data.get("detail", str(he)) 

595 except (ValueError, KeyError): 

596 error_message = str(he) 

597 

598 raise RequestError(f"Failed to generate token: {error_message}") from he 

599 except requests.exceptions.RequestException as re: 

600 raise RequestError(f"Error while requesting a new token: {str(re)}") from re 

601 except (ValueError, KeyError) as ve: 

602 raise ResponseParsingError(f"Failed to parse response when generating token: {str(ve)}") from ve 

603 

604 def get_statistics(self) -> dict[str, Any]: 

605 """ 

606 Get system statistics. 

607 

608 Returns: 

609 Dictionary containing system statistics. 

610 

611 """ 

612 if result := self.request("GET", "api/statistics/"): 

613 return result 

614 raise APIError("Failed to get statistics") 

615 

616 def get_system_status(self) -> dict[str, Any]: 

617 """ 

618 Get system status. 

619 

620 Returns: 

621 Dictionary containing system status information. 

622 

623 """ 

624 if result := self.request("GET", "api/status/"): 

625 return result 

626 raise APIError("Failed to get system status") 

627 

628 def get_config(self) -> dict[str, Any]: 

629 """ 

630 Get system configuration. 

631 

632 Returns: 

633 Dictionary containing system configuration. 

634 

635 """ 

636 if result := self.request("GET", "api/config/"): 

637 return result 

638 raise APIError("Failed to get system configuration")