Coverage for src/paperap/client.py: 94%

229 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-18 12:26 -0400

1""" 

2---------------------------------------------------------------------------- 

3 

4 METADATA: 

5 

6 File: client.py 

7 Project: paperap 

8 Created: 2025-03-04 

9 Version: 0.0.8 

10 Author: Jess Mann 

11 Email: jess@jmann.me 

12 Copyright (c) 2025 Jess Mann 

13 

14---------------------------------------------------------------------------- 

15 

16 LAST MODIFIED: 

17 

18 2025-03-04 By Jess Mann 

19 

20""" 

21 

22from __future__ import annotations 

23 

24import logging 

25from pathlib import Path 

26from string import Template 

27from typing import TYPE_CHECKING, Any, Iterator, Literal, Union, Unpack, overload 

28 

29import requests 

30from yarl import URL 

31 

32from paperap.auth import AuthBase, BasicAuth, TokenAuth 

33from paperap.exceptions import ( 

34 APIError, 

35 AuthenticationError, 

36 BadResponseError, 

37 ConfigurationError, 

38 InsufficientPermissionError, 

39 PaperlessError, 

40 RequestError, 

41 ResourceNotFoundError, 

42 ResponseParsingError, 

43) 

44from paperap.resources import ( 

45 CorrespondentResource, 

46 CustomFieldResource, 

47 DocumentNoteResource, 

48 DocumentResource, 

49 DocumentTypeResource, 

50 GroupResource, 

51 ProfileResource, 

52 SavedViewResource, 

53 ShareLinksResource, 

54 StoragePathResource, 

55 TagResource, 

56 TaskResource, 

57 UISettingsResource, 

58 UserResource, 

59 WorkflowActionResource, 

60 WorkflowResource, 

61 WorkflowTriggerResource, 

62) 

63from paperap.settings import Settings, SettingsArgs 

64from paperap.signals import registry 

65 

66if TYPE_CHECKING: 

67 from paperap.plugins.base import Plugin 

68 from paperap.plugins.manager import PluginConfig 

69 

70logger = logging.getLogger(__name__) 

71 

72 

73class PaperlessClient: 

74 """ 

75 Client for interacting with the Paperless-NgX API. 

76 

77 Args: 

78 settings: Settings object containing client configuration. 

79 

80 Examples: 

81 ```python 

82 # Using token authentication 

83 client = PaperlessClient( 

84 Settings( 

85 base_url="https://paperless.example.com", 

86 token="40characterslong40characterslong40charac" 

87 ) 

88 ) 

89 

90 # Using basic authentication 

91 client = PaperlessClient( 

92 Settings( 

93 base_url="https://paperless.example.com", 

94 username="user", 

95 password="pass" 

96 ) 

97 ) 

98 

99 # Loading all settings from environment variables (e.g. PAPERLESS_TOKEN) 

100 client = PaperlessClient() 

101 

102 # With context manager 

103 with PaperlessClient(...) as client: 

104 docs = client.documents.list() 

105 ``` 

106 

107 """ 

108 

109 settings: Settings 

110 auth: AuthBase 

111 session: requests.Session 

112 plugins: dict[str, "Plugin"] 

113 

114 # Resources 

115 correspondents: CorrespondentResource 

116 custom_fields: CustomFieldResource 

117 document_types: DocumentTypeResource 

118 documents: DocumentResource 

119 document_notes: DocumentNoteResource 

120 groups: GroupResource 

121 profile: ProfileResource 

122 saved_views: SavedViewResource 

123 share_links: ShareLinksResource 

124 storage_paths: StoragePathResource 

125 tags: TagResource 

126 tasks: TaskResource 

127 ui_settings: UISettingsResource 

128 users: UserResource 

129 workflow_actions: WorkflowActionResource 

130 workflow_triggers: WorkflowTriggerResource 

131 workflows: WorkflowResource 

132 

133 def __init__(self, settings: Settings | None = None, **kwargs: Unpack[SettingsArgs]) -> None: 

134 if not settings: 

135 # Any params not provided in kwargs will be loaded from env vars 

136 settings = Settings(**kwargs) # type: ignore # base_url is a URL, but accepts str | URL 

137 

138 self.settings = settings 

139 # Prioritize username/password over token if both are provided 

140 if self.settings.username and self.settings.password: 

141 self.auth = BasicAuth(username=self.settings.username, password=self.settings.password) 

142 elif self.settings.token: 

143 self.auth = TokenAuth(token=self.settings.token) 

144 else: 

145 raise ValueError("Provide a token, or a username and password") 

146 

147 self.session = requests.Session() 

148 

149 # Set default headers 

150 self.session.headers.update( 

151 { 

152 "Accept": "application/json; version=2", 

153 "Content-Type": "application/json", 

154 } 

155 ) 

156 

157 # Initialize resources 

158 self._init_resources() 

159 self._initialize_plugins() 

160 super().__init__() 

161 

162 @property 

163 def base_url(self) -> URL: 

164 """Get the base URL.""" 

165 return self.settings.base_url 

166 

167 def __enter__(self) -> PaperlessClient: 

168 return self 

169 

170 def __exit__(self, exc_type, exc_val, exc_tb) -> None: 

171 self.close() 

172 

173 def _init_resources(self) -> None: 

174 """Initialize all API resources.""" 

175 # Initialize resources 

176 self.correspondents = CorrespondentResource(self) 

177 self.custom_fields = CustomFieldResource(self) 

178 self.document_types = DocumentTypeResource(self) 

179 self.documents = DocumentResource(self) 

180 self.document_notes = DocumentNoteResource(self) 

181 self.groups = GroupResource(self) 

182 self.profile = ProfileResource(self) 

183 self.saved_views = SavedViewResource(self) 

184 self.share_links = ShareLinksResource(self) 

185 self.storage_paths = StoragePathResource(self) 

186 self.tags = TagResource(self) 

187 self.tasks = TaskResource(self) 

188 self.ui_settings = UISettingsResource(self) 

189 self.users = UserResource(self) 

190 self.workflow_actions = WorkflowActionResource(self) 

191 self.workflow_triggers = WorkflowTriggerResource(self) 

192 self.workflows = WorkflowResource(self) 

193 

194 def _initialize_plugins(self, plugin_config: "PluginConfig | None" = None) -> None: 

195 """ 

196 Initialize plugins based on configuration. 

197 

198 Args: 

199 plugin_config: Optional configuration dictionary for plugins. 

200 

201 """ 

202 from paperap.plugins.manager import PluginManager # type: ignore # pylint: disable=import-outside-toplevel 

203 

204 PluginManager.model_rebuild() 

205 

206 # Create and configure the plugin manager 

207 self.manager = PluginManager(client=self) 

208 

209 # Discover available plugins 

210 self.manager.discover_plugins() 

211 

212 # Configure plugins 

213 plugin_config = plugin_config or { 

214 "enabled_plugins": ["SampleDataCollector"], 

215 "settings": { 

216 "SampleDataCollector": { 

217 "test_dir": str(Path(__file__).parent.parent / "tests/sample_data"), 

218 }, 

219 }, 

220 } 

221 self.manager.configure(plugin_config) 

222 

223 # Initialize all enabled plugins 

224 self.plugins = self.manager.initialize_all_plugins() 

225 

226 def _get_auth_params(self) -> dict[str, Any]: 

227 """Get authentication parameters for requests.""" 

228 return self.auth.get_auth_params() if self.auth else {} 

229 

230 def _get_headers(self) -> dict[str, str]: 

231 """Get headers for requests.""" 

232 headers = {} 

233 

234 if self.auth: 

235 headers.update(self.auth.get_auth_headers()) 

236 

237 return headers 

238 

239 def close(self) -> None: 

240 """Close the client and release resources.""" 

241 if hasattr(self, "session") and self.session: 

242 self.session.close() 

243 

244 def _request( 

245 self, 

246 method: str, 

247 endpoint: str | URL | Template, 

248 *, 

249 params: dict[str, Any] | None = None, 

250 data: dict[str, Any] | None = None, 

251 files: dict[str, Any] | None = None, 

252 ) -> requests.Response | None: 

253 """ 

254 Make a request to the Paperless-NgX API. 

255 

256 Args: 

257 method: HTTP method (GET, POST, PUT, DELETE). 

258 endpoint: API endpoint relative to base URL. 

259 params: Query parameters for the request. 

260 data: Request body data. 

261 files: Files to upload. 

262 json_response: Whether to parse the response as JSON. 

263 

264 Returns: 

265 Response object or None if no content. 

266 

267 Raises: 

268 AuthenticationError: If authentication fails. 

269 ResourceNotFoundError: If the requested resource doesn't exist. 

270 APIError: If the API returns an error. 

271 PaperlessError: For other errors. 

272 

273 """ 

274 # Handle different endpoint types 

275 if isinstance(endpoint, Template): 

276 # Convert Template to string representation 

277 url = f"{self.base_url}/{endpoint.template.lstrip('/')}" 

278 elif isinstance(endpoint, URL): 

279 # Use URL object directly 

280 if endpoint.is_absolute(): 

281 url = str(endpoint) 

282 else: 

283 url = f"{self.base_url}/{str(endpoint).lstrip('/')}" 

284 elif isinstance(endpoint, str): 

285 if endpoint.startswith("http"): 

286 url = endpoint 

287 else: 

288 url = f"{self.base_url}/{endpoint.lstrip('/')}" 

289 else: 

290 url = f"{self.base_url}/{str(endpoint).lstrip('/')}" 

291 

292 logger.debug("Requesting %s %s", method, url) 

293 

294 # Add headers from authentication and session defaults 

295 headers = {**self.session.headers, **self._get_headers()} 

296 

297 # If we're uploading files, don't set Content-Type 

298 if files: 

299 headers.pop("Content-Type", None) 

300 

301 try: 

302 # TODO: Temporary hack 

303 params = params.get("params", params) if params else params 

304 

305 # logger.critical("Request (%s) url %s, params %s, data %s, files %s", method, url, params, data, files) 

306 response = self.session.request( 

307 method=method, 

308 url=url, 

309 headers=headers, 

310 params=params, 

311 json=data if not files and data else None, 

312 data=data if files else None, 

313 files=files, 

314 timeout=self.settings.timeout, 

315 **self._get_auth_params(), 

316 ) 

317 

318 # Handle HTTP errors 

319 if response.status_code >= 400: 

320 self._handle_request_errors(response, url, params=params, data=data, files=files) 

321 

322 # No content 

323 if response.status_code == 204: 

324 return None 

325 

326 return response 

327 

328 except requests.exceptions.ConnectionError as ce: 

329 logger.error( 

330 "Unable to connect to Paperless server: %s url %s, params %s, data %s, files %s", 

331 method, 

332 url, 

333 params, 

334 data, 

335 files, 

336 ) 

337 raise RequestError(f"Connection error: {str(ce)}") from ce 

338 except requests.exceptions.RequestException as re: 

339 raise RequestError(f"Request failed: {str(re)}") from re 

340 

341 def _handle_request_errors( 

342 self, 

343 response: requests.Response, 

344 url: str, 

345 *, 

346 params: dict[str, Any] | None = None, 

347 data: dict[str, Any] | None = None, 

348 files: dict[str, Any] | None = None, 

349 ) -> None: 

350 error_message = self._extract_error_message(response) 

351 

352 if response.status_code == 400: 

353 if "This field is required" in error_message: 

354 raise ValueError(f"Required field missing: {error_message}") 

355 if response.status_code == 401: 

356 raise AuthenticationError(f"Authentication failed: {error_message}") 

357 if response.status_code == 403: 

358 if "this site requires a CSRF" in error_message: 

359 raise ConfigurationError(f"Response claims CSRF token required. Is the url correct? {url}") 

360 raise InsufficientPermissionError(f"Permission denied: {error_message}") 

361 if response.status_code == 404: 

362 raise ResourceNotFoundError(f"Paperless returned 404 for {url}") 

363 

364 # All else... 

365 raise BadResponseError(error_message, response.status_code) 

366 

367 @overload 

368 def _handle_response( 

369 self, response: requests.Response, *, json_response: Literal[True] = True 

370 ) -> dict[str, Any]: ... 

371 

372 @overload 

373 def _handle_response(self, response: None, *, json_response: bool = True) -> None: ... 

374 

375 @overload 

376 def _handle_response( 

377 self, response: requests.Response | None, *, json_response: Literal[False] 

378 ) -> bytes | None: ... 

379 

380 @overload 

381 def _handle_response( 

382 self, response: requests.Response | None, *, json_response: bool = True 

383 ) -> dict[str, Any] | bytes | None: ... 

384 

385 def _handle_response( 

386 self, response: requests.Response | None, *, json_response: bool = True 

387 ) -> dict[str, Any] | bytes | None: 

388 """Handle the response based on the content type.""" 

389 if not response: 

390 return None 

391 

392 # Try to parse as JSON if requested 

393 if json_response: 

394 try: 

395 return response.json() 

396 except ValueError as e: 

397 url = getattr(response, "url", "unknown URL") 

398 logger.error("Failed to parse JSON response: %s -> url %s -> content: %s", e, url, response.content) 

399 raise ResponseParsingError(f"Failed to parse JSON response: {str(e)} -> url {url}") from e 

400 

401 return response.content 

402 

403 @overload 

404 def request( 

405 self, 

406 method: str, 

407 endpoint: str | URL | Template, 

408 *, 

409 params: dict[str, Any] | None = None, 

410 data: dict[str, Any] | None = None, 

411 files: dict[str, Any] | None = None, 

412 ) -> dict[str, Any] | None: ... 

413 

414 @overload 

415 def request( 

416 self, 

417 method: str, 

418 endpoint: str | URL | Template, 

419 *, 

420 params: dict[str, Any] | None = None, 

421 data: dict[str, Any] | None = None, 

422 files: dict[str, Any] | None = None, 

423 json_response: Literal[False], 

424 ) -> bytes | None: ... 

425 

426 @overload 

427 def request( 

428 self, 

429 method: str, 

430 endpoint: str | URL | Template, 

431 *, 

432 params: dict[str, Any] | None = None, 

433 data: dict[str, Any] | None = None, 

434 files: dict[str, Any] | None = None, 

435 json_response: bool = True, 

436 ) -> dict[str, Any] | bytes | None: ... 

437 

438 def request( 

439 self, 

440 method: str, 

441 endpoint: str | URL | Template, 

442 *, 

443 params: dict[str, Any] | None = None, 

444 data: dict[str, Any] | None = None, 

445 files: dict[str, Any] | None = None, 

446 json_response: bool = True, 

447 ) -> dict[str, Any] | bytes | None: 

448 """ 

449 Make a request to the Paperless-NgX API. 

450 

451 Generally, this should be done using resources, not by calling this method directly. 

452 

453 Args: 

454 method: HTTP method (GET, POST, PUT, DELETE). 

455 endpoint: API endpoint relative to base URL. 

456 params: Query parameters for the request. 

457 data: Request body data. 

458 files: Files to upload. 

459 json_response: Whether to parse the response as JSON. 

460 

461 Returns: 

462 Parsed response data. 

463 

464 """ 

465 kwargs = { 

466 "client": self, 

467 "method": method, 

468 "endpoint": endpoint, 

469 "params": params, 

470 "data": data, 

471 "files": files, 

472 "json_response": json_response, 

473 } 

474 

475 registry.emit( 

476 "client.request:before", "Before a request is sent to the Paperless server", args=[self], kwargs=kwargs 

477 ) 

478 

479 if not (response := self._request(method, endpoint, params=params, data=data, files=files)): 

480 return None 

481 

482 registry.emit( 

483 "client.request__response", 

484 "After a response is received, before it is parsed", 

485 args=[response], 

486 kwargs=kwargs, 

487 ) 

488 

489 parsed_response = self._handle_response(response, json_response=json_response) 

490 parsed_response = registry.emit( 

491 "client.request:after", 

492 "After a request is parsed.", 

493 args=parsed_response, 

494 kwargs=kwargs, 

495 ) 

496 

497 return parsed_response 

498 

499 def _extract_error_message(self, response: requests.Response) -> str: 

500 """Extract error message from response.""" 

501 try: 

502 error_data = response.json() 

503 if isinstance(error_data, dict): 

504 # Try different possible error formats 

505 if "detail" in error_data: 

506 return error_data["detail"] 

507 if "error" in error_data: 

508 return error_data["error"] 

509 if "non_field_errors" in error_data: 

510 return ", ".join(error_data["non_field_errors"]) 

511 

512 # Handle nested error messages 

513 messages = [] 

514 for key, value in error_data.items(): 

515 if isinstance(value, list): 

516 values = [str(i) for i in value] 

517 messages.append(f"{key}: {', '.join(values)}") 

518 else: 

519 messages.append(f"{key}: {value}") 

520 return "; ".join(messages) 

521 return str(error_data) 

522 except ValueError: 

523 return response.text or f"HTTP {response.status_code}" 

524 

525 def generate_token( 

526 self, 

527 base_url: str, 

528 username: str, 

529 password: str, 

530 timeout: int | None = None, 

531 ) -> str: 

532 """ 

533 Generate an API token using username and password. 

534 

535 Args: 

536 base_url: The base URL of the Paperless-NgX instance. 

537 username: Username for authentication. 

538 password: Password for authentication. 

539 timeout: Request timeout in seconds. 

540 

541 Returns: 

542 Generated API token. 

543 

544 Raises: 

545 AuthenticationError: If authentication fails. 

546 PaperlessError: For other errors. 

547 

548 """ 

549 if timeout is None: 

550 timeout = self.settings.timeout 

551 

552 if not base_url.startswith(("http://", "https://")): 

553 base_url = f"https://{base_url}" 

554 

555 url = f"{base_url.rstrip('/')}/api/token/" 

556 

557 registry.emit( 

558 "client.generate_token__before", 

559 "Before a new token is generated", 

560 kwargs={"url": url, "username": username}, 

561 ) 

562 

563 try: 

564 response = requests.post( 

565 url, 

566 json={"username": username, "password": password}, 

567 headers={"Accept": "application/json"}, 

568 timeout=timeout, 

569 ) 

570 

571 response.raise_for_status() 

572 data = response.json() 

573 

574 registry.emit( 

575 "client.generate_token__after", 

576 "After a new token is generated", 

577 kwargs={"url": url, "username": username, "response": data}, 

578 ) 

579 

580 if "token" not in data: 

581 raise ResponseParsingError("Token not found in response") 

582 

583 return data["token"] 

584 except requests.exceptions.HTTPError as he: 

585 if he.response.status_code == 401: 

586 raise AuthenticationError("Invalid username or password") from he 

587 try: 

588 error_data = he.response.json() 

589 error_message = error_data.get("detail", str(he)) 

590 except (ValueError, KeyError): 

591 error_message = str(he) 

592 

593 raise RequestError(f"Failed to generate token: {error_message}") from he 

594 except requests.exceptions.RequestException as re: 

595 raise RequestError(f"Error while requesting a new token: {str(re)}") from re 

596 except (ValueError, KeyError) as ve: 

597 raise ResponseParsingError(f"Failed to parse response when generating token: {str(ve)}") from ve 

598 

599 def get_statistics(self) -> dict[str, Any]: 

600 """ 

601 Get system statistics. 

602 

603 Returns: 

604 Dictionary containing system statistics. 

605 

606 """ 

607 if result := self.request("GET", "api/statistics/"): 

608 return result 

609 raise APIError("Failed to get statistics") 

610 

611 def get_system_status(self) -> dict[str, Any]: 

612 """ 

613 Get system status. 

614 

615 Returns: 

616 Dictionary containing system status information. 

617 

618 """ 

619 if result := self.request("GET", "api/status/"): 

620 return result 

621 raise APIError("Failed to get system status") 

622 

623 def get_config(self) -> dict[str, Any]: 

624 """ 

625 Get system configuration. 

626 

627 Returns: 

628 Dictionary containing system configuration. 

629 

630 """ 

631 if result := self.request("GET", "api/config/"): 

632 return result 

633 raise APIError("Failed to get system configuration")