Coverage for src/paperap/client.py: 47%

223 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-12 23:40 -0400

1""" 

2---------------------------------------------------------------------------- 

3 

4 METADATA: 

5 

6 File: client.py 

7 Project: paperap 

8 Created: 2025-03-04 

9 Version: 0.0.7 

10 Author: Jess Mann 

11 Email: jess@jmann.me 

12 Copyright (c) 2025 Jess Mann 

13 

14---------------------------------------------------------------------------- 

15 

16 LAST MODIFIED: 

17 

18 2025-03-04 By Jess Mann 

19 

20""" 

21 

22from __future__ import annotations 

23 

24import logging 

25from pathlib import Path 

26from string import Template 

27from typing import Any, Iterator, Literal, Union, Unpack, overload 

28 

29import requests 

30from yarl import URL 

31 

32from paperap.auth import AuthBase, BasicAuth, TokenAuth 

33from paperap.exceptions import ( 

34 APIError, 

35 AuthenticationError, 

36 BadResponseError, 

37 ConfigurationError, 

38 InsufficientPermissionError, 

39 PaperlessError, 

40 RequestError, 

41 ResourceNotFoundError, 

42 ResponseParsingError, 

43) 

44from paperap.plugin_manager import PluginConfig 

45from paperap.plugins.base import Plugin 

46from paperap.resources import ( 

47 CorrespondentResource, 

48 CustomFieldResource, 

49 DocumentNoteResource, 

50 DocumentResource, 

51 DocumentTypeResource, 

52 GroupResource, 

53 ProfileResource, 

54 SavedViewResource, 

55 ShareLinksResource, 

56 StoragePathResource, 

57 TagResource, 

58 TaskResource, 

59 UISettingsResource, 

60 UserResource, 

61 WorkflowActionResource, 

62 WorkflowResource, 

63 WorkflowTriggerResource, 

64) 

65from paperap.settings import Settings, SettingsArgs 

66from paperap.signals import registry 

67 

68logger = logging.getLogger(__name__) 

69 

70class PaperlessClient: 

71 """ 

72 Client for interacting with the Paperless-NgX API. 

73 

74 Args: 

75 settings: Settings object containing client configuration. 

76 

77 Examples: 

78 ```python 

79 # Using token authentication 

80 client = PaperlessClient( 

81 Settings( 

82 base_url="https://paperless.example.com", 

83 token="your-token" 

84 ) 

85 ) 

86 

87 # Using basic authentication 

88 client = PaperlessClient( 

89 Settings( 

90 base_url="https://paperless.example.com", 

91 username="user", 

92 password="pass" 

93 ) 

94 ) 

95 

96 # Loading all settings from environment variables (e.g. PAPERLESS_TOKEN) 

97 client = PaperlessClient() 

98 

99 # With context manager 

100 with PaperlessClient(...) as client: 

101 docs = client.documents.list() 

102 ``` 

103 

104 """ 

105 

106 settings: Settings 

107 auth: AuthBase 

108 session: requests.Session 

109 plugins: dict[str, Plugin] 

110 

111 # Resources 

112 correspondents: CorrespondentResource 

113 custom_fields: CustomFieldResource 

114 document_types: DocumentTypeResource 

115 documents: DocumentResource 

116 document_notes: DocumentNoteResource 

117 groups: GroupResource 

118 profile: ProfileResource 

119 saved_views: SavedViewResource 

120 share_links: ShareLinksResource 

121 storage_paths: StoragePathResource 

122 tags: TagResource 

123 tasks: TaskResource 

124 ui_settings: UISettingsResource 

125 users: UserResource 

126 workflow_actions: WorkflowActionResource 

127 workflow_triggers: WorkflowTriggerResource 

128 workflows: WorkflowResource 

129 

130 def __init__(self, settings: Settings | None = None, **kwargs: Unpack[SettingsArgs]) -> None: 

131 if not settings: 

132 # Any params not provided in kwargs will be loaded from env vars 

133 settings = Settings(**kwargs) # type: ignore # base_url is a URL, but accepts str | URL 

134 

135 self.settings = settings 

136 if self.settings.token: 

137 self.auth = TokenAuth(token=self.settings.token) 

138 elif self.settings.username and self.settings.password: 

139 self.auth = BasicAuth(username=self.settings.username, password=self.settings.password) 

140 else: 

141 raise ValueError("Provide a token, or a username and password") 

142 

143 self.session = requests.Session() 

144 

145 # Set default headers 

146 self.session.headers.update( 

147 { 

148 "Accept": "application/json; version=2", 

149 "Content-Type": "application/json", 

150 } 

151 ) 

152 

153 # Initialize resources 

154 self._init_resources() 

155 self._initialize_plugins() 

156 super().__init__() 

157 

158 @property 

159 def base_url(self) -> URL: 

160 """Get the base URL.""" 

161 return self.settings.base_url 

162 

163 def __enter__(self) -> PaperlessClient: 

164 return self 

165 

166 def __exit__(self, exc_type, exc_val, exc_tb) -> bool | None: 

167 self.close() 

168 

169 def _init_resources(self) -> None: 

170 """Initialize all API resources.""" 

171 # Initialize resources 

172 self.correspondents = CorrespondentResource(self) 

173 self.custom_fields = CustomFieldResource(self) 

174 self.document_types = DocumentTypeResource(self) 

175 self.documents = DocumentResource(self) 

176 self.document_notes = DocumentNoteResource(self) 

177 self.groups = GroupResource(self) 

178 self.profile = ProfileResource(self) 

179 self.saved_views = SavedViewResource(self) 

180 self.share_links = ShareLinksResource(self) 

181 self.storage_paths = StoragePathResource(self) 

182 self.tags = TagResource(self) 

183 self.tasks = TaskResource(self) 

184 self.ui_settings = UISettingsResource(self) 

185 self.users = UserResource(self) 

186 self.workflow_actions = WorkflowActionResource(self) 

187 self.workflow_triggers = WorkflowTriggerResource(self) 

188 self.workflows = WorkflowResource(self) 

189 

190 def _initialize_plugins(self, plugin_config: PluginConfig | None = None) -> None: 

191 """ 

192 Initialize plugins based on configuration. 

193 

194 Args: 

195 plugin_config: Optional configuration dictionary for plugins. 

196 

197 """ 

198 from paperap.plugin_manager import PluginManager # type: ignore # pylint: disable=import-outside-toplevel 

199 

200 # Create and configure the plugin manager 

201 self.plugin_manager = PluginManager() 

202 

203 # Discover available plugins 

204 self.plugin_manager.discover_plugins() 

205 

206 # Configure plugins 

207 default_config: PluginConfig = { 

208 "enabled_plugins": ["TestDataCollector"], 

209 "settings": { 

210 "TestDataCollector": { 

211 "test_dir": str(Path(__file__).parent.parent.parent / "tests/sample_data"), 

212 }, 

213 }, 

214 } 

215 config = plugin_config or default_config 

216 self.plugin_manager.configure(config) 

217 

218 # Initialize all enabled plugins 

219 self.plugins = self.plugin_manager.initialize_all_plugins(self) 

220 

221 def _get_auth_params(self) -> dict[str, Any]: 

222 """Get authentication parameters for requests.""" 

223 return self.auth.get_auth_params() if self.auth else {} 

224 

225 def _get_headers(self) -> dict[str, str]: 

226 """Get headers for requests.""" 

227 headers = {} 

228 

229 if self.auth: 

230 headers.update(self.auth.get_auth_headers()) 

231 

232 return headers 

233 

234 def close(self) -> None: 

235 """Close the client and release resources.""" 

236 if hasattr(self, "session") and self.session: 

237 self.session.close() 

238 

239 def _request( 

240 self, 

241 method: str, 

242 endpoint: str | URL | Template, 

243 *, 

244 params: dict[str, Any] | None = None, 

245 data: dict[str, Any] | None = None, 

246 files: dict[str, Any] | None = None, 

247 ) -> requests.Response | None: 

248 """ 

249 Make a request to the Paperless-NgX API. 

250 

251 Args: 

252 method: HTTP method (GET, POST, PUT, DELETE). 

253 endpoint: API endpoint relative to base URL. 

254 params: Query parameters for the request. 

255 data: Request body data. 

256 files: Files to upload. 

257 json_response: Whether to parse the response as JSON. 

258 

259 Returns: 

260 Response object or None if no content. 

261 

262 Raises: 

263 AuthenticationError: If authentication fails. 

264 ResourceNotFoundError: If the requested resource doesn't exist. 

265 APIError: If the API returns an error. 

266 PaperlessError: For other errors. 

267 

268 """ 

269 endpoint = str(endpoint) 

270 

271 if endpoint.startswith("http"): 

272 url = endpoint 

273 else: 

274 url = f"{self.base_url}/{endpoint.lstrip('/')}" 

275 

276 logger.debug("Requesting %s %s", method, url) 

277 

278 # Add headers from authentication and session defaults 

279 headers = {**self.session.headers, **self._get_headers()} 

280 

281 # If we're uploading files, don't set Content-Type 

282 if files: 

283 headers.pop("Content-Type", None) 

284 

285 try: 

286 # TODO: Temporary hack 

287 params = params.get("params", params) if params else params 

288 

289 # logger.critical("Request (%s) url %s, params %s, data %s, files %s", method, url, params, data, files) 

290 response = self.session.request( 

291 method=method, 

292 url=url, 

293 headers=headers, 

294 params=params, 

295 json=data if not files and data else None, 

296 data=data if files else None, 

297 files=files, 

298 timeout=self.settings.timeout, 

299 **self._get_auth_params(), 

300 ) 

301 

302 # Handle HTTP errors 

303 if response.status_code >= 400: 

304 self._handle_request_errors(response, url, params=params, data=data, files=files) 

305 

306 # No content 

307 if response.status_code == 204: 

308 return None 

309 

310 return response 

311 

312 except requests.exceptions.ConnectionError as ce: 

313 logger.error( 

314 "Unable to connect to Paperless server: %s url %s, params %s, data %s, files %s", 

315 method, 

316 url, 

317 params, 

318 data, 

319 files, 

320 ) 

321 raise RequestError(f"Connection error: {str(ce)}") from ce 

322 except requests.exceptions.RequestException as re: 

323 raise RequestError(f"Request failed: {str(re)}") from re 

324 

325 def _handle_request_errors( 

326 self, 

327 response: requests.Response, 

328 url: str, 

329 *, 

330 params: dict[str, Any] | None = None, 

331 data: dict[str, Any] | None = None, 

332 files: dict[str, Any] | None = None, 

333 ) -> None: 

334 error_message = self._extract_error_message(response) 

335 

336 if response.status_code == 400: 

337 if "This field is required" in error_message: 

338 raise ValueError(f"Required field missing: {error_message}") 

339 if response.status_code == 401: 

340 raise AuthenticationError(f"Authentication failed: {error_message}") 

341 if response.status_code == 403: 

342 if "this site requires a CSRF" in error_message: 

343 raise ConfigurationError(f"Response claims CSRF token required. Is the url correct? {url}") 

344 raise InsufficientPermissionError(f"Permission denied: {error_message}") 

345 if response.status_code == 404: 

346 raise ResourceNotFoundError(f"Paperless returned 404 for {url}") 

347 

348 # All else... 

349 raise BadResponseError(error_message, response.status_code) 

350 

351 @overload 

352 def _handle_response( 

353 self, response: requests.Response, *, json_response: Literal[True] = True 

354 ) -> dict[str, Any]: ... 

355 

356 @overload 

357 def _handle_response(self, response: None, *, json_response: bool = True) -> None: ... 

358 

359 @overload 

360 def _handle_response( 

361 self, response: requests.Response | None, *, json_response: Literal[False] 

362 ) -> bytes | None: ... 

363 

364 @overload 

365 def _handle_response( 

366 self, response: requests.Response | None, *, json_response: bool = True 

367 ) -> dict[str, Any] | bytes | None: ... 

368 

369 def _handle_response( 

370 self, response: requests.Response | None, *, json_response: bool = True 

371 ) -> dict[str, Any] | bytes | None: 

372 """Handle the response based on the content type.""" 

373 if not response: 

374 return None 

375 

376 # Try to parse as JSON if requested 

377 if json_response: 

378 try: 

379 return response.json() 

380 except ValueError as e: 

381 logger.error( 

382 "Failed to parse JSON response: %s -> url %s -> content: %s", e, response.url, response.content 

383 ) 

384 raise ResponseParsingError(f"Failed to parse JSON response: {str(e)} -> url {response.url}") from e 

385 

386 return response.content 

387 

388 @overload 

389 def request( 

390 self, 

391 method: str, 

392 endpoint: str | URL | Template, 

393 *, 

394 params: dict[str, Any] | None = None, 

395 data: dict[str, Any] | None = None, 

396 files: dict[str, Any] | None = None, 

397 ) -> dict[str, Any] | None: ... 

398 

399 @overload 

400 def request( 

401 self, 

402 method: str, 

403 endpoint: str | URL | Template, 

404 *, 

405 params: dict[str, Any] | None = None, 

406 data: dict[str, Any] | None = None, 

407 files: dict[str, Any] | None = None, 

408 json_response: Literal[False], 

409 ) -> bytes | None: ... 

410 

411 @overload 

412 def request( 

413 self, 

414 method: str, 

415 endpoint: str | URL | Template, 

416 *, 

417 params: dict[str, Any] | None = None, 

418 data: dict[str, Any] | None = None, 

419 files: dict[str, Any] | None = None, 

420 json_response: bool = True, 

421 ) -> dict[str, Any] | bytes | None: ... 

422 

423 def request( 

424 self, 

425 method: str, 

426 endpoint: str | URL | Template, 

427 *, 

428 params: dict[str, Any] | None = None, 

429 data: dict[str, Any] | None = None, 

430 files: dict[str, Any] | None = None, 

431 json_response: bool = True, 

432 ) -> dict[str, Any] | bytes | None: 

433 """ 

434 Make a request to the Paperless-NgX API. 

435 

436 Generally, this should be done using resources, not by calling this method directly. 

437 

438 Args: 

439 method: HTTP method (GET, POST, PUT, DELETE). 

440 endpoint: API endpoint relative to base URL. 

441 params: Query parameters for the request. 

442 data: Request body data. 

443 files: Files to upload. 

444 json_response: Whether to parse the response as JSON. 

445 

446 Returns: 

447 Parsed response data. 

448 

449 """ 

450 kwargs = { 

451 "client": self, 

452 "method": method, 

453 "endpoint": endpoint, 

454 "params": params, 

455 "data": data, 

456 "files": files, 

457 "json_response": json_response, 

458 } 

459 

460 registry.emit( 

461 "client.request:before", "Before a request is sent to the Paperless server", args=[self], kwargs=kwargs 

462 ) 

463 

464 if not (response := self._request(method, endpoint, params=params, data=data, files=files)): 

465 return None 

466 

467 registry.emit( 

468 "client.request__response", 

469 "After a response is received, before it is parsed", 

470 args=[response], 

471 kwargs=kwargs, 

472 ) 

473 

474 parsed_response = self._handle_response(response, json_response=json_response) 

475 parsed_response = registry.emit( 

476 "client.request:after", 

477 "After a request is parsed.", 

478 args=parsed_response, 

479 kwargs=kwargs, 

480 ) 

481 

482 return parsed_response 

483 

484 def _extract_error_message(self, response: requests.Response) -> str: 

485 """Extract error message from response.""" 

486 try: 

487 error_data = response.json() 

488 if isinstance(error_data, dict): 

489 # Try different possible error formats 

490 if "detail" in error_data: 

491 return error_data["detail"] 

492 if "error" in error_data: 

493 return error_data["error"] 

494 if "non_field_errors" in error_data: 

495 return ", ".join(error_data["non_field_errors"]) 

496 

497 # Handle nested error messages 

498 messages = [] 

499 for key, value in error_data.items(): 

500 if isinstance(value, list): 

501 values = [str(i) for i in value] 

502 messages.append(f"{key}: {', '.join(values)}") 

503 else: 

504 messages.append(f"{key}: {value}") 

505 return "; ".join(messages) 

506 return str(error_data) 

507 except ValueError: 

508 return response.text or f"HTTP {response.status_code}" 

509 

510 def generate_token( 

511 self, 

512 base_url: str, 

513 username: str, 

514 password: str, 

515 timeout: int | None = None, 

516 ) -> str: 

517 """ 

518 Generate an API token using username and password. 

519 

520 Args: 

521 base_url: The base URL of the Paperless-NgX instance. 

522 username: Username for authentication. 

523 password: Password for authentication. 

524 timeout: Request timeout in seconds. 

525 

526 Returns: 

527 Generated API token. 

528 

529 Raises: 

530 AuthenticationError: If authentication fails. 

531 PaperlessError: For other errors. 

532 

533 """ 

534 if timeout is None: 

535 timeout = self.settings.timeout 

536 

537 if not base_url.startswith(("http://", "https://")): 

538 base_url = f"https://{base_url}" 

539 

540 url = f"{base_url.rstrip('/')}/api/token/" 

541 

542 registry.emit( 

543 "client.generate_token__before", 

544 "Before a new token is generated", 

545 kwargs={"url": url, "username": username}, 

546 ) 

547 

548 try: 

549 response = requests.post( 

550 url, 

551 json={"username": username, "password": password}, 

552 headers={"Accept": "application/json"}, 

553 timeout=timeout, 

554 ) 

555 

556 response.raise_for_status() 

557 data = response.json() 

558 

559 registry.emit( 

560 "client.generate_token__after", 

561 "After a new token is generated", 

562 kwargs={"url": url, "username": username, "response": data}, 

563 ) 

564 

565 if "token" not in data: 

566 raise ResponseParsingError("Token not found in response") 

567 

568 return data["token"] 

569 except requests.exceptions.HTTPError as he: 

570 if he.response.status_code == 401: 

571 raise AuthenticationError("Invalid username or password") from he 

572 try: 

573 error_data = he.response.json() 

574 error_message = error_data.get("detail", str(he)) 

575 except (ValueError, KeyError): 

576 error_message = str(he) 

577 

578 raise RequestError(f"Failed to generate token: {error_message}") from he 

579 except requests.exceptions.RequestException as re: 

580 raise RequestError(f"Error while requesting a new token: {str(re)}") from re 

581 except (ValueError, KeyError) as ve: 

582 raise ResponseParsingError(f"Failed to parse response when generating token: {str(ve)}") from ve 

583 

584 def get_statistics(self) -> dict[str, Any]: 

585 """ 

586 Get system statistics. 

587 

588 Returns: 

589 Dictionary containing system statistics. 

590 

591 """ 

592 if result := self.request("GET", "api/statistics/"): 

593 return result 

594 raise APIError("Failed to get statistics") 

595 

596 def get_system_status(self) -> dict[str, Any]: 

597 """ 

598 Get system status. 

599 

600 Returns: 

601 Dictionary containing system status information. 

602 

603 """ 

604 if result := self.request("GET", "api/status/"): 

605 return result 

606 raise APIError("Failed to get system status") 

607 

608 def get_config(self) -> dict[str, Any]: 

609 """ 

610 Get system configuration. 

611 

612 Returns: 

613 Dictionary containing system configuration. 

614 

615 """ 

616 if result := self.request("GET", "api/config/"): 

617 return result 

618 raise APIError("Failed to get system configuration")