Coverage for src/paperap/client.py: 47%

223 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-11 21:37 -0400

1""" 

2---------------------------------------------------------------------------- 

3 

4 METADATA: 

5 

6 File: client.py 

7 Project: paperap 

8 Created: 2025-03-04 

9 Version: 0.0.5 

10 Author: Jess Mann 

11 Email: jess@jmann.me 

12 Copyright (c) 2025 Jess Mann 

13 

14---------------------------------------------------------------------------- 

15 

16 LAST MODIFIED: 

17 

18 2025-03-04 By Jess Mann 

19 

20""" 

21 

22from __future__ import annotations 

23 

24import logging 

25from pathlib import Path 

26from string import Template 

27from typing import Any, Iterator, Literal, Union, Unpack, overload 

28 

29import requests 

30from yarl import URL 

31 

32from paperap.auth import AuthBase, BasicAuth, TokenAuth 

33from paperap.exceptions import ( 

34 APIError, 

35 AuthenticationError, 

36 BadResponseError, 

37 ConfigurationError, 

38 InsufficientPermissionError, 

39 PaperlessError, 

40 RequestError, 

41 ResourceNotFoundError, 

42 ResponseParsingError, 

43) 

44from paperap.plugin_manager import PluginConfig 

45from paperap.plugins.base import Plugin 

46from paperap.resources import ( 

47 CorrespondentResource, 

48 CustomFieldResource, 

49 DocumentNoteResource, 

50 DocumentResource, 

51 DocumentTypeResource, 

52 GroupResource, 

53 ProfileResource, 

54 SavedViewResource, 

55 ShareLinksResource, 

56 StoragePathResource, 

57 TagResource, 

58 TaskResource, 

59 UISettingsResource, 

60 UserResource, 

61 WorkflowActionResource, 

62 WorkflowResource, 

63 WorkflowTriggerResource, 

64) 

65from paperap.settings import Settings, SettingsArgs 

66from paperap.signals import registry 

67 

68logger = logging.getLogger(__name__) 

69 

70 

71class PaperlessClient: 

72 """ 

73 Client for interacting with the Paperless-NgX API. 

74 

75 Args: 

76 settings: Settings object containing client configuration. 

77 

78 Examples: 

79 ```python 

80 # Using token authentication 

81 client = PaperlessClient( 

82 Settings( 

83 base_url="https://paperless.example.com", 

84 token="your-token" 

85 ) 

86 ) 

87 

88 # Using basic authentication 

89 client = PaperlessClient( 

90 Settings( 

91 base_url="https://paperless.example.com", 

92 username="user", 

93 password="pass" 

94 ) 

95 ) 

96 

97 # Loading all settings from environment variables (e.g. PAPERLESS_TOKEN) 

98 client = PaperlessClient() 

99 

100 # With context manager 

101 with PaperlessClient(...) as client: 

102 docs = client.documents.list() 

103 ``` 

104 

105 """ 

106 

107 settings: Settings 

108 auth: AuthBase 

109 session: requests.Session 

110 plugins: dict[str, Plugin] 

111 

112 # Resources 

113 correspondents: CorrespondentResource 

114 custom_fields: CustomFieldResource 

115 document_types: DocumentTypeResource 

116 documents: DocumentResource 

117 document_notes: DocumentNoteResource 

118 groups: GroupResource 

119 profile: ProfileResource 

120 saved_views: SavedViewResource 

121 share_links: ShareLinksResource 

122 storage_paths: StoragePathResource 

123 tags: TagResource 

124 tasks: TaskResource 

125 ui_settings: UISettingsResource 

126 users: UserResource 

127 workflow_actions: WorkflowActionResource 

128 workflow_triggers: WorkflowTriggerResource 

129 workflows: WorkflowResource 

130 

131 def __init__(self, settings: Settings | None = None, **kwargs: Unpack[SettingsArgs]) -> None: 

132 if not settings: 

133 # Any params not provided in kwargs will be loaded from env vars 

134 settings = Settings(**kwargs) # type: ignore # base_url is a URL, but accepts str | URL 

135 

136 self.settings = settings 

137 if self.settings.token: 

138 self.auth = TokenAuth(token=self.settings.token) 

139 elif self.settings.username and self.settings.password: 

140 self.auth = BasicAuth(username=self.settings.username, password=self.settings.password) 

141 else: 

142 raise ValueError("Provide a token, or a username and password") 

143 

144 self.session = requests.Session() 

145 

146 # Set default headers 

147 self.session.headers.update( 

148 { 

149 "Accept": "application/json; version=2", 

150 "Content-Type": "application/json", 

151 } 

152 ) 

153 

154 # Initialize resources 

155 self._init_resources() 

156 self._initialize_plugins() 

157 super().__init__() 

158 

159 @property 

160 def base_url(self) -> URL: 

161 """Get the base URL.""" 

162 return self.settings.base_url 

163 

164 def __enter__(self) -> PaperlessClient: 

165 return self 

166 

167 def __exit__(self, exc_type, exc_val, exc_tb) -> bool | None: 

168 self.close() 

169 

170 def _init_resources(self) -> None: 

171 """Initialize all API resources.""" 

172 # Initialize resources 

173 self.correspondents = CorrespondentResource(self) 

174 self.custom_fields = CustomFieldResource(self) 

175 self.document_types = DocumentTypeResource(self) 

176 self.documents = DocumentResource(self) 

177 self.document_notes = DocumentNoteResource(self) 

178 self.groups = GroupResource(self) 

179 self.profile = ProfileResource(self) 

180 self.saved_views = SavedViewResource(self) 

181 self.share_links = ShareLinksResource(self) 

182 self.storage_paths = StoragePathResource(self) 

183 self.tags = TagResource(self) 

184 self.tasks = TaskResource(self) 

185 self.ui_settings = UISettingsResource(self) 

186 self.users = UserResource(self) 

187 self.workflow_actions = WorkflowActionResource(self) 

188 self.workflow_triggers = WorkflowTriggerResource(self) 

189 self.workflows = WorkflowResource(self) 

190 

191 def _initialize_plugins(self, plugin_config: PluginConfig | None = None) -> None: 

192 """ 

193 Initialize plugins based on configuration. 

194 

195 Args: 

196 plugin_config: Optional configuration dictionary for plugins. 

197 

198 """ 

199 from paperap.plugin_manager import PluginManager # type: ignore # pylint: disable=import-outside-toplevel 

200 

201 # Create and configure the plugin manager 

202 self.plugin_manager = PluginManager() 

203 

204 # Discover available plugins 

205 self.plugin_manager.discover_plugins() 

206 

207 # Configure plugins 

208 default_config: PluginConfig = { 

209 "enabled_plugins": ["TestDataCollector"], 

210 "settings": { 

211 "TestDataCollector": { 

212 "test_dir": str(Path(__file__).parent.parent.parent / "tests/sample_data"), 

213 }, 

214 }, 

215 } 

216 config = plugin_config or default_config 

217 self.plugin_manager.configure(config) 

218 

219 # Initialize all enabled plugins 

220 self.plugins = self.plugin_manager.initialize_all_plugins(self) 

221 

222 def _get_auth_params(self) -> dict[str, Any]: 

223 """Get authentication parameters for requests.""" 

224 return self.auth.get_auth_params() if self.auth else {} 

225 

226 def _get_headers(self) -> dict[str, str]: 

227 """Get headers for requests.""" 

228 headers = {} 

229 

230 if self.auth: 

231 headers.update(self.auth.get_auth_headers()) 

232 

233 return headers 

234 

235 def close(self) -> None: 

236 """Close the client and release resources.""" 

237 if hasattr(self, "session") and self.session: 

238 self.session.close() 

239 

240 def _request( 

241 self, 

242 method: str, 

243 endpoint: str | URL | Template, 

244 *, 

245 params: dict[str, Any] | None = None, 

246 data: dict[str, Any] | None = None, 

247 files: dict[str, Any] | None = None, 

248 ) -> requests.Response | None: 

249 """ 

250 Make a request to the Paperless-NgX API. 

251 

252 Args: 

253 method: HTTP method (GET, POST, PUT, DELETE). 

254 endpoint: API endpoint relative to base URL. 

255 params: Query parameters for the request. 

256 data: Request body data. 

257 files: Files to upload. 

258 json_response: Whether to parse the response as JSON. 

259 

260 Returns: 

261 Response object or None if no content. 

262 

263 Raises: 

264 AuthenticationError: If authentication fails. 

265 ResourceNotFoundError: If the requested resource doesn't exist. 

266 APIError: If the API returns an error. 

267 PaperlessError: For other errors. 

268 

269 """ 

270 endpoint = str(endpoint) 

271 

272 if endpoint.startswith("http"): 

273 url = endpoint 

274 else: 

275 url = f"{self.base_url}/{endpoint.lstrip('/')}" 

276 

277 logger.debug("Requesting %s %s", method, url) 

278 

279 # Add headers from authentication and session defaults 

280 headers = {**self.session.headers, **self._get_headers()} 

281 

282 # If we're uploading files, don't set Content-Type 

283 if files: 

284 headers.pop("Content-Type", None) 

285 

286 try: 

287 # TODO: Temporary hack 

288 params = params.get("params", params) if params else params 

289 

290 # logger.critical("Request (%s) url %s, params %s, data %s, files %s", method, url, params, data, files) 

291 response = self.session.request( 

292 method=method, 

293 url=url, 

294 headers=headers, 

295 params=params, 

296 json=data if not files and data else None, 

297 data=data if files else None, 

298 files=files, 

299 timeout=self.settings.timeout, 

300 **self._get_auth_params(), 

301 ) 

302 

303 # Handle HTTP errors 

304 if response.status_code >= 400: 

305 self._handle_request_errors(response, url, params=params, data=data, files=files) 

306 

307 # No content 

308 if response.status_code == 204: 

309 return None 

310 

311 return response 

312 

313 except requests.exceptions.ConnectionError as ce: 

314 logger.error( 

315 "Unable to connect to Paperless server: %s url %s, params %s, data %s, files %s", 

316 method, 

317 url, 

318 params, 

319 data, 

320 files, 

321 ) 

322 raise RequestError(f"Connection error: {str(ce)}") from ce 

323 except requests.exceptions.RequestException as re: 

324 raise RequestError(f"Request failed: {str(re)}") from re 

325 

326 def _handle_request_errors( 

327 self, 

328 response: requests.Response, 

329 url: str, 

330 *, 

331 params: dict[str, Any] | None = None, 

332 data: dict[str, Any] | None = None, 

333 files: dict[str, Any] | None = None, 

334 ) -> None: 

335 error_message = self._extract_error_message(response) 

336 

337 if response.status_code == 400: 

338 if "This field is required" in error_message: 

339 raise ValueError(f"Required field missing: {error_message}") 

340 if response.status_code == 401: 

341 raise AuthenticationError(f"Authentication failed: {error_message}") 

342 if response.status_code == 403: 

343 if "this site requires a CSRF" in error_message: 

344 raise ConfigurationError(f"Response claims CSRF token required. Is the url correct? {url}") 

345 raise InsufficientPermissionError(f"Permission denied: {error_message}") 

346 if response.status_code == 404: 

347 raise ResourceNotFoundError(f"Paperless returned 404 for {url}") 

348 

349 # All else... 

350 raise BadResponseError(error_message, response.status_code) 

351 

352 @overload 

353 def _handle_response( 

354 self, response: requests.Response, *, json_response: Literal[True] = True 

355 ) -> dict[str, Any]: ... 

356 

357 @overload 

358 def _handle_response(self, response: None, *, json_response: bool = True) -> None: ... 

359 

360 @overload 

361 def _handle_response( 

362 self, response: requests.Response | None, *, json_response: Literal[False] 

363 ) -> bytes | None: ... 

364 

365 @overload 

366 def _handle_response( 

367 self, response: requests.Response | None, *, json_response: bool = True 

368 ) -> dict[str, Any] | bytes | None: ... 

369 

370 def _handle_response( 

371 self, response: requests.Response | None, *, json_response: bool = True 

372 ) -> dict[str, Any] | bytes | None: 

373 """Handle the response based on the content type.""" 

374 if not response: 

375 return None 

376 

377 # Try to parse as JSON if requested 

378 if json_response: 

379 try: 

380 return response.json() 

381 except ValueError as e: 

382 logger.error( 

383 "Failed to parse JSON response: %s -> url %s -> content: %s", e, response.url, response.content 

384 ) 

385 raise ResponseParsingError(f"Failed to parse JSON response: {str(e)} -> url {response.url}") from e 

386 

387 return response.content 

388 

389 @overload 

390 def request( 

391 self, 

392 method: str, 

393 endpoint: str | URL | Template, 

394 *, 

395 params: dict[str, Any] | None = None, 

396 data: dict[str, Any] | None = None, 

397 files: dict[str, Any] | None = None, 

398 ) -> dict[str, Any] | None: ... 

399 

400 @overload 

401 def request( 

402 self, 

403 method: str, 

404 endpoint: str | URL | Template, 

405 *, 

406 params: dict[str, Any] | None = None, 

407 data: dict[str, Any] | None = None, 

408 files: dict[str, Any] | None = None, 

409 json_response: Literal[False], 

410 ) -> bytes | None: ... 

411 

412 @overload 

413 def request( 

414 self, 

415 method: str, 

416 endpoint: str | URL | Template, 

417 *, 

418 params: dict[str, Any] | None = None, 

419 data: dict[str, Any] | None = None, 

420 files: dict[str, Any] | None = None, 

421 json_response: bool = True, 

422 ) -> dict[str, Any] | bytes | None: ... 

423 

424 def request( 

425 self, 

426 method: str, 

427 endpoint: str | URL | Template, 

428 *, 

429 params: dict[str, Any] | None = None, 

430 data: dict[str, Any] | None = None, 

431 files: dict[str, Any] | None = None, 

432 json_response: bool = True, 

433 ) -> dict[str, Any] | bytes | None: 

434 """ 

435 Make a request to the Paperless-NgX API. 

436 

437 Generally, this should be done using resources, not by calling this method directly. 

438 

439 Args: 

440 method: HTTP method (GET, POST, PUT, DELETE). 

441 endpoint: API endpoint relative to base URL. 

442 params: Query parameters for the request. 

443 data: Request body data. 

444 files: Files to upload. 

445 json_response: Whether to parse the response as JSON. 

446 

447 Returns: 

448 Parsed response data. 

449 

450 """ 

451 kwargs = { 

452 "client": self, 

453 "method": method, 

454 "endpoint": endpoint, 

455 "params": params, 

456 "data": data, 

457 "files": files, 

458 "json_response": json_response, 

459 } 

460 

461 registry.emit( 

462 "client.request:before", "Before a request is sent to the Paperless server", args=[self], kwargs=kwargs 

463 ) 

464 

465 if not (response := self._request(method, endpoint, params=params, data=data, files=files)): 

466 return None 

467 

468 registry.emit( 

469 "client.request__response", 

470 "After a response is received, before it is parsed", 

471 args=[response], 

472 kwargs=kwargs, 

473 ) 

474 

475 parsed_response = self._handle_response(response, json_response=json_response) 

476 parsed_response = registry.emit( 

477 "client.request:after", 

478 "After a request is parsed.", 

479 args=parsed_response, 

480 kwargs=kwargs, 

481 ) 

482 

483 return parsed_response 

484 

485 def _extract_error_message(self, response: requests.Response) -> str: 

486 """Extract error message from response.""" 

487 try: 

488 error_data = response.json() 

489 if isinstance(error_data, dict): 

490 # Try different possible error formats 

491 if "detail" in error_data: 

492 return error_data["detail"] 

493 if "error" in error_data: 

494 return error_data["error"] 

495 if "non_field_errors" in error_data: 

496 return ", ".join(error_data["non_field_errors"]) 

497 

498 # Handle nested error messages 

499 messages = [] 

500 for key, value in error_data.items(): 

501 if isinstance(value, list): 

502 values = [str(i) for i in value] 

503 messages.append(f"{key}: {', '.join(values)}") 

504 else: 

505 messages.append(f"{key}: {value}") 

506 return "; ".join(messages) 

507 return str(error_data) 

508 except ValueError: 

509 return response.text or f"HTTP {response.status_code}" 

510 

511 def generate_token( 

512 self, 

513 base_url: str, 

514 username: str, 

515 password: str, 

516 timeout: int | None = None, 

517 ) -> str: 

518 """ 

519 Generate an API token using username and password. 

520 

521 Args: 

522 base_url: The base URL of the Paperless-NgX instance. 

523 username: Username for authentication. 

524 password: Password for authentication. 

525 timeout: Request timeout in seconds. 

526 

527 Returns: 

528 Generated API token. 

529 

530 Raises: 

531 AuthenticationError: If authentication fails. 

532 PaperlessError: For other errors. 

533 

534 """ 

535 if timeout is None: 

536 timeout = self.settings.timeout 

537 

538 if not base_url.startswith(("http://", "https://")): 

539 base_url = f"https://{base_url}" 

540 

541 url = f"{base_url.rstrip('/')}/api/token/" 

542 

543 registry.emit( 

544 "client.generate_token__before", 

545 "Before a new token is generated", 

546 kwargs={"url": url, "username": username}, 

547 ) 

548 

549 try: 

550 response = requests.post( 

551 url, 

552 json={"username": username, "password": password}, 

553 headers={"Accept": "application/json"}, 

554 timeout=timeout, 

555 ) 

556 

557 response.raise_for_status() 

558 data = response.json() 

559 

560 registry.emit( 

561 "client.generate_token__after", 

562 "After a new token is generated", 

563 kwargs={"url": url, "username": username, "response": data}, 

564 ) 

565 

566 if "token" not in data: 

567 raise ResponseParsingError("Token not found in response") 

568 

569 return data["token"] 

570 except requests.exceptions.HTTPError as he: 

571 if he.response.status_code == 401: 

572 raise AuthenticationError("Invalid username or password") from he 

573 try: 

574 error_data = he.response.json() 

575 error_message = error_data.get("detail", str(he)) 

576 except (ValueError, KeyError): 

577 error_message = str(he) 

578 

579 raise RequestError(f"Failed to generate token: {error_message}") from he 

580 except requests.exceptions.RequestException as re: 

581 raise RequestError(f"Error while requesting a new token: {str(re)}") from re 

582 except (ValueError, KeyError) as ve: 

583 raise ResponseParsingError(f"Failed to parse response when generating token: {str(ve)}") from ve 

584 

585 def get_statistics(self) -> dict[str, Any]: 

586 """ 

587 Get system statistics. 

588 

589 Returns: 

590 Dictionary containing system statistics. 

591 

592 """ 

593 if result := self.request("GET", "api/statistics/"): 

594 return result 

595 raise APIError("Failed to get statistics") 

596 

597 def get_system_status(self) -> dict[str, Any]: 

598 """ 

599 Get system status. 

600 

601 Returns: 

602 Dictionary containing system status information. 

603 

604 """ 

605 if result := self.request("GET", "api/status/"): 

606 return result 

607 raise APIError("Failed to get system status") 

608 

609 def get_config(self) -> dict[str, Any]: 

610 """ 

611 Get system configuration. 

612 

613 Returns: 

614 Dictionary containing system configuration. 

615 

616 """ 

617 if result := self.request("GET", "api/config/"): 

618 return result 

619 raise APIError("Failed to get system configuration")