Coverage for src/paperap/models/document/model.py: 60%

229 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-11 21:37 -0400

1""" 

2 

3 

4 

5---------------------------------------------------------------------------- 

6 

7METADATA: 

8 

9File: model.py 

10 Project: paperap 

11Created: 2025-03-09 

12 Version: 0.0.5 

13Author: Jess Mann 

14Email: jess@jmann.me 

15 Copyright (c) 2025 Jess Mann 

16 

17---------------------------------------------------------------------------- 

18 

19LAST MODIFIED: 

20 

212025-03-09 By Jess Mann 

22 

23""" 

24 

25from __future__ import annotations 

26 

27from datetime import datetime 

28from typing import TYPE_CHECKING, Any, Iterable, Iterator, Optional, TypedDict, cast, override 

29 

30from pydantic import Field, field_serializer, field_validator, model_serializer 

31from typing_extensions import TypeVar 

32from yarl import URL 

33 

34from paperap.models.abstract import FilteringStrategies, StandardModel 

35from paperap.models.document.parser import CustomFieldDict 

36from paperap.models.document.queryset import DocumentQuerySet 

37 

38if TYPE_CHECKING: 

39 from paperap.models.correspondent import Correspondent 

40 from paperap.models.custom_field import CustomField, CustomFieldQuerySet 

41 from paperap.models.document_type import DocumentType 

42 from paperap.models.storage_path import StoragePath 

43 from paperap.models.tag import Tag, TagQuerySet 

44 from paperap.models.user import User 

45 

46 

47class DocumentNote(StandardModel): 

48 """ 

49 Represents a note on a Paperless-NgX document. 

50 """ 

51 

52 deleted_at: datetime | None = None 

53 restored_at: datetime | None = None 

54 transaction_id: int | None = None 

55 note: str 

56 created: datetime 

57 document: int 

58 user: int 

59 

60 class Meta(StandardModel.Meta): 

61 read_only_fields = {"deleted_at", "restored_at", "transaction_id", "created"} 

62 

63 @field_serializer("deleted_at", "restored_at", "created") 

64 def serialize_datetime(self, value: datetime | None): 

65 """ 

66 Serialize datetime fields to ISO format. 

67 

68 Args: 

69 value: The datetime value to serialize. 

70 

71 Returns: 

72 The serialized datetime value or None if the value is None. 

73 

74 """ 

75 return value.isoformat() if value else None 

76 

77 def get_document(self) -> "Document": 

78 """ 

79 Get the document associated with this note. 

80 

81 Returns: 

82 The document associated with this note. 

83 

84 """ 

85 return self._client.documents().get(self.document) 

86 

87 def get_user(self) -> "User": 

88 """ 

89 Get the user who created this note. 

90 

91 Returns: 

92 The user who created this note. 

93 

94 """ 

95 return self._client.users().get(self.user) 

96 

97 

98class Document(StandardModel): 

99 """ 

100 Represents a Paperless-NgX document. 

101 

102 Attributes: 

103 added: The timestamp when the document was added to the system. 

104 archive_serial_number: The serial number of the archive. 

105 archived_file_name: The name of the archived file. 

106 content: The content of the document. 

107 correspondent: The correspondent associated with the document. 

108 created: The timestamp when the document was created. 

109 created_date: The date when the document was created. 

110 updated: The timestamp when the document was last updated. 

111 custom_fields: Custom fields associated with the document. 

112 deleted_at: The timestamp when the document was deleted. 

113 document_type: The document type associated with the document. 

114 is_shared_by_requester: Whether the document is shared by the requester. 

115 notes: Notes associated with the document. 

116 original_file_name: The original file name of the document. 

117 owner: The owner of the document. 

118 page_count: The number of pages in the document. 

119 storage_path: The storage path of the document. 

120 tags: The tags associated with the document. 

121 title: The title of the document. 

122 user_can_change: Whether the user can change the document. 

123 

124 Examples: 

125 >>> document = client.documents().get(pk=1) 

126 >>> document.title = 'Example Document' 

127 >>> document.save() 

128 >>> document.title 

129 'Example Document' 

130 

131 """ 

132 

133 added: datetime | None = None 

134 archive_serial_number: int | None = None 

135 archived_file_name: str | None = None 

136 content: str = "" 

137 is_shared_by_requester: bool = False 

138 notes: "list[DocumentNote]" = Field(default_factory=list) 

139 original_file_name: str | None = None 

140 owner: int | None = None 

141 page_count: int | None = None 

142 title: str = "" 

143 user_can_change: bool | None = None 

144 

145 created: datetime | None = Field(description="Creation timestamp", default=None, alias="created_on") 

146 created_date: str | None = None 

147 updated: datetime | None = Field(description="Last update timestamp", default=None, alias="updated_on") 

148 deleted_at: datetime | None = None 

149 

150 custom_field_dicts: list[CustomFieldDict] = Field(default_factory=list) 

151 correspondent_id: int | None = None 

152 document_type_id: int | None = None 

153 storage_path_id: int | None = None 

154 tag_ids: list[int] = Field(default_factory=list) 

155 

156 _correspondent: tuple[int, Correspondent] | None = None 

157 _document_type: tuple[int, DocumentType] | None = None 

158 _storage_path: tuple[int, StoragePath] | None = None 

159 

160 class Meta(StandardModel.Meta): 

161 # NOTE: Filtering appears to be disabled by paperless on page_count 

162 queryset = DocumentQuerySet 

163 read_only_fields = {"page_count", "deleted_at", "updated", "is_shared_by_requester"} 

164 filtering_disabled = {"page_count", "deleted_at", "updated", "is_shared_by_requester"} 

165 filtering_strategies = {FilteringStrategies.WHITELIST} 

166 field_map = { 

167 "tags": "tag_ids", 

168 "custom_fields": "custom_field_dicts", 

169 "document_type": "document_type_id", 

170 "correspondent": "correspondent_id", 

171 "storage_path": "storage_path_id", 

172 } 

173 supported_filtering_params = { 

174 "id__in", 

175 "id", 

176 "title__istartswith", 

177 "title__iendswith", 

178 "title__icontains", 

179 "title__iexact", 

180 "content__istartswith", 

181 "content__iendswith", 

182 "content__icontains", 

183 "content__iexact", 

184 "archive_serial_number", 

185 "archive_serial_number__gt", 

186 "archive_serial_number__gte", 

187 "archive_serial_number__lt", 

188 "archive_serial_number__lte", 

189 "archive_serial_number__isnull", 

190 "content__contains", # maybe? 

191 "correspondent__isnull", 

192 "correspondent__id__in", 

193 "correspondent__id", 

194 "correspondent__name__istartswith", 

195 "correspondent__name__iendswith", 

196 "correspondent__name__icontains", 

197 "correspondent__name__iexact", 

198 "correspondent__slug__iexact", # maybe? 

199 "created__year", 

200 "created__month", 

201 "created__day", 

202 "created__date__gt", 

203 "created__gt", 

204 "created__date__lt", 

205 "created__lt", 

206 "added__year", 

207 "added__month", 

208 "added__day", 

209 "added__date__gt", 

210 "added__gt", 

211 "added__date__lt", 

212 "added__lt", 

213 "modified__year", 

214 "modified__month", 

215 "modified__day", 

216 "modified__date__gt", 

217 "modified__gt", 

218 "modified__date__lt", 

219 "modified__lt", 

220 "original_filename__istartswith", 

221 "original_filename__iendswith", 

222 "original_filename__icontains", 

223 "original_filename__iexact", 

224 "checksum__istartswith", 

225 "checksum__iendswith", 

226 "checksum__icontains", 

227 "checksum__iexact", 

228 "tags__id__in", 

229 "tags__id", 

230 "tags__name__istartswith", 

231 "tags__name__iendswith", 

232 "tags__name__icontains", 

233 "tags__name__iexact", 

234 "document_type__isnull", 

235 "document_type__id__in", 

236 "document_type__id", 

237 "document_type__name__istartswith", 

238 "document_type__name__iendswith", 

239 "document_type__name__icontains", 

240 "document_type__name__iexact", 

241 "storage_path__isnull", 

242 "storage_path__id__in", 

243 "storage_path__id", 

244 "storage_path__name__istartswith", 

245 "storage_path__name__iendswith", 

246 "storage_path__name__icontains", 

247 "storage_path__name__iexact", 

248 "owner__isnull", 

249 "owner__id__in", 

250 "owner__id", 

251 "is_tagged", 

252 "tags__id__all", 

253 "tags__id__none", 

254 "correspondent__id__none", 

255 "document_type__id__none", 

256 "storage_path__id__none", 

257 "is_in_inbox", 

258 "title_content", 

259 "owner__id__none", 

260 "custom_fields__icontains", 

261 "custom_fields__id__all", 

262 "custom_fields__id__none", # ?? 

263 "custom_fields__id__in", 

264 "custom_field_query", # ?? 

265 "has_custom_fields", 

266 "shared_by__id", 

267 "shared_by__id__in", 

268 } 

269 

270 @field_serializer("added", "created", "updated", "deleted_at") 

271 def serialize_datetime(self, value: datetime | None) -> str | None: 

272 """ 

273 Serialize datetime fields to ISO format. 

274 

275 Args: 

276 value: The datetime value to serialize. 

277 

278 Returns: 

279 The serialized datetime value. 

280 

281 """ 

282 return value.isoformat() if value else None 

283 

284 @field_serializer("notes") 

285 def serialize_notes(self, value: list[DocumentNote]): 

286 """ 

287 Serialize notes to a list of dictionaries. 

288 

289 Args: 

290 value: The list of DocumentNote objects to serialize. 

291 

292 Returns: 

293 A list of dictionaries representing the notes. 

294 

295 """ 

296 return [note.to_dict() for note in value] if value else [] 

297 

298 @field_validator("tag_ids", mode="before") 

299 @classmethod 

300 def validate_tags(cls, value: list[int] | None) -> list[int]: 

301 """ 

302 Validate and convert tag IDs to a list of integers. 

303 

304 Args: 

305 value: The list of tag IDs to validate. 

306 

307 Returns: 

308 A list of validated tag IDs. 

309 

310 """ 

311 if value is None: 

312 return [] 

313 return [int(tag) for tag in value] 

314 

315 @field_validator("custom_field_dicts", mode="before") 

316 @classmethod 

317 def validate_custom_fields(cls, value: list[CustomFieldDict] | None) -> list[CustomFieldDict]: 

318 """ 

319 Validate and return custom field dictionaries. 

320 

321 Args: 

322 value: The list of custom field dictionaries to validate. 

323 

324 Returns: 

325 A list of validated custom field dictionaries. 

326 

327 """ 

328 if value is None: 

329 return [] 

330 return value 

331 

332 @field_validator("content", "title", mode="before") 

333 @classmethod 

334 def validate_text(cls, value: str | None) -> str: 

335 """ 

336 Validate and return a text field. 

337 

338 Args: 

339 value: The value of the text field to validate. 

340 

341 Returns: 

342 The validated text value. 

343 

344 """ 

345 return value or "" 

346 

347 @field_validator("notes", mode="before") 

348 @classmethod 

349 def validate_notes(cls, value: list[Any] | None) -> list[Any]: 

350 """ 

351 Validate and return the list of notes. 

352 

353 Args: 

354 value: The list of notes to validate. 

355 

356 Returns: 

357 The validated list of notes. 

358 

359 """ 

360 return value or [] 

361 

362 @field_validator("is_shared_by_requester", mode="before") 

363 @classmethod 

364 def validate_is_shared_by_requester(cls, value: bool | None) -> bool: 

365 """ 

366 Validate and return the is_shared_by_requester flag. 

367 

368 Args: 

369 value: The flag to validate. 

370 

371 Returns: 

372 The validated flag. 

373 

374 """ 

375 return value or False 

376 

377 @property 

378 def custom_field_ids(self) -> list[int]: 

379 """ 

380 Get the IDs of the custom fields for this document. 

381 """ 

382 return [field["field"] for field in self.custom_field_dicts] 

383 

384 @property 

385 def custom_field_values(self) -> list[Any]: 

386 """ 

387 Get the values of the custom fields for this document. 

388 """ 

389 return [field["value"] for field in self.custom_field_dicts] 

390 

391 @property 

392 def tag_names(self) -> list[str]: 

393 """ 

394 Get the names of the tags for this document. 

395 """ 

396 return [tag.name for tag in self.tags if tag.name] 

397 

398 @property 

399 def tags(self) -> TagQuerySet: 

400 """ 

401 Get the tags for this document. 

402 

403 Returns: 

404 List of tags associated with this document. 

405 

406 Examples: 

407 >>> document = client.documents().get(pk=1) 

408 >>> for tag in document.tags: 

409 ... print(f'{tag.name} # {tag.id}') 

410 'Tag 1 # 1' 

411 'Tag 2 # 2' 

412 'Tag 3 # 3' 

413 

414 >>> if 5 in document.tags: 

415 ... print('Tag ID #5 is associated with this document') 

416 

417 >>> tag = client.tags().get(pk=1) 

418 >>> if tag in document.tags: 

419 ... print('Tag ID #1 is associated with this document') 

420 

421 >>> filtered_tags = document.tags.filter(name__icontains='example') 

422 >>> for tag in filtered_tags: 

423 ... print(f'{tag.name} # {tag.id}') 

424 

425 """ 

426 if not self.tag_ids: 

427 return self._client.tags().none() 

428 

429 # Use the API's filtering capability to get only the tags with specific IDs 

430 # The paperless-ngx API supports id__in filter for retrieving multiple objects by ID 

431 return self._client.tags().id(self.tag_ids) 

432 

433 @tags.setter 

434 def tags(self, value: "Iterable[Tag | int] | None") -> None: 

435 """ 

436 Set the tags for this document. 

437 

438 Args: 

439 value: The tags to set. 

440 

441 """ 

442 if value is None: 

443 self.tag_ids = [] 

444 return 

445 

446 if isinstance(value, Iterable): 

447 for tag in value: 

448 if isinstance(tag, int): 

449 self.tag_ids.append(tag) 

450 continue 

451 

452 # Check against StandardModel to avoid circular imports 

453 # If it is another type of standard model, pydantic validators will complain 

454 if isinstance(tag, StandardModel): 

455 self.tag_ids.append(tag.id) 

456 continue 

457 

458 raise TypeError(f"Invalid type for tags: {type(tag)}") 

459 return 

460 

461 raise TypeError(f"Invalid type for tags: {type(value)}") 

462 

463 @property 

464 def correspondent(self) -> "Correspondent | None": 

465 """ 

466 Get the correspondent for this document. 

467 

468 Returns: 

469 The correspondent or None if not set. 

470 

471 Examples: 

472 >>> document = client.documents().get(pk=1) 

473 >>> document.correspondent.name 

474 'Example Correspondent' 

475 

476 """ 

477 # Return cache 

478 if self._correspondent is not None: 

479 pk, value = self._correspondent 

480 if pk == self.correspondent_id: 

481 return value 

482 

483 # None set to retrieve 

484 if not self.correspondent_id: 

485 return None 

486 

487 # Retrieve it 

488 correspondent = self._client.correspondents().get(self.correspondent_id) 

489 self._correspondent = (self.correspondent_id, correspondent) 

490 return correspondent 

491 

492 @correspondent.setter 

493 def correspondent(self, value: "Correspondent | int | None") -> None: 

494 """ 

495 Set the correspondent for this document. 

496 

497 Args: 

498 value: The correspondent to set. 

499 

500 """ 

501 if value is None: 

502 # Leave cache in place in case it changes again 

503 self.correspondent_id = None 

504 return 

505 

506 if isinstance(value, int): 

507 # Leave cache in place in case id is the same, or id changes again 

508 self.correspondent_id = value 

509 return 

510 

511 # Check against StandardModel to avoid circular imports 

512 # If it is another type of standard model, pydantic validators will complain 

513 if isinstance(value, StandardModel): 

514 self.correspondent_id = value.id 

515 # Pre-populate the cache 

516 self._correspondent = (value.id, value) 

517 return 

518 

519 raise TypeError(f"Invalid type for correspondent: {type(value)}") 

520 

521 @property 

522 def document_type(self) -> "DocumentType | None": 

523 """ 

524 Get the document type for this document. 

525 

526 Returns: 

527 The document type or None if not set. 

528 

529 Examples: 

530 >>> document = client.documents().get(pk=1) 

531 >>> document.document_type.name 

532 'Example Document Type 

533 

534 """ 

535 # Return cache 

536 if self._document_type is not None: 

537 pk, value = self._document_type 

538 if pk == self.document_type_id: 

539 return value 

540 

541 # None set to retrieve 

542 if not self.document_type_id: 

543 return None 

544 

545 # Retrieve it 

546 document_type = self._client.document_types().get(self.document_type_id) 

547 self._document_type = (self.document_type_id, document_type) 

548 return document_type 

549 

550 @document_type.setter 

551 def document_type(self, value: "DocumentType | int | None") -> None: 

552 """ 

553 Set the document type for this document. 

554 

555 Args: 

556 value: The document type to set. 

557 

558 """ 

559 if value is None: 

560 # Leave cache in place in case it changes again 

561 self.document_type_id = None 

562 return 

563 

564 if isinstance(value, int): 

565 # Leave cache in place in case id is the same, or id changes again 

566 self.document_type_id = value 

567 return 

568 

569 # Check against StandardModel to avoid circular imports 

570 # If it is another type of standard model, pydantic validators will complain 

571 if isinstance(value, StandardModel): 

572 self.document_type_id = value.id 

573 # Pre-populate the cache 

574 self._document_type = (value.id, value) 

575 return 

576 

577 raise TypeError(f"Invalid type for document_type: {type(value)}") 

578 

579 @property 

580 def storage_path(self) -> "StoragePath | None": 

581 """ 

582 Get the storage path for this document. 

583 

584 Returns: 

585 The storage path or None if not set. 

586 

587 Examples: 

588 >>> document = client.documents().get(pk=1) 

589 >>> document.storage_path.name 

590 'Example Storage Path' 

591 

592 """ 

593 # Return cache 

594 if self._storage_path is not None: 

595 pk, value = self._storage_path 

596 if pk == self.storage_path_id: 

597 return value 

598 

599 # None set to retrieve 

600 if not self.storage_path_id: 

601 return None 

602 

603 # Retrieve it 

604 storage_path = self._client.storage_paths().get(self.storage_path_id) 

605 self._storage_path = (self.storage_path_id, storage_path) 

606 return storage_path 

607 

608 @storage_path.setter 

609 def storage_path(self, value: "StoragePath | int | None") -> None: 

610 """ 

611 Set the storage path for this document. 

612 

613 Args: 

614 value: The storage path to set. 

615 

616 """ 

617 if value is None: 

618 # Leave cache in place in case it changes again 

619 self.storage_path_id = None 

620 return 

621 

622 if isinstance(value, int): 

623 # Leave cache in place in case id is the same, or id changes again 

624 self.storage_path_id = value 

625 return 

626 

627 # Check against StandardModel to avoid circular imports 

628 # If it is another type of standard model, pydantic validators will complain 

629 if isinstance(value, StandardModel): 

630 self.storage_path_id = value.id 

631 # Pre-populate the cache 

632 self._storage_path = (value.id, value) 

633 return 

634 

635 raise TypeError(f"Invalid type for storage_path: {type(value)}") 

636 

637 @property 

638 def custom_fields(self) -> "CustomFieldQuerySet": 

639 """ 

640 Get the custom fields for this document. 

641 

642 Returns: 

643 List of custom fields associated with this document. 

644 

645 """ 

646 if not self.custom_field_dicts: 

647 return self._client.custom_fields().none() 

648 

649 # Use the API's filtering capability to get only the custom fields with specific IDs 

650 # The paperless-ngx API supports id__in filter for retrieving multiple objects by ID 

651 return self._client.custom_fields().id(self.custom_field_ids) 

652 

653 @custom_fields.setter 

654 def custom_fields(self, value: "Iterable[CustomField | CustomFieldDict] | None") -> None: 

655 """ 

656 Set the custom fields for this document. 

657 

658 Args: 

659 value: The custom fields to set. 

660 

661 """ 

662 if value is None: 

663 self.custom_field_dicts = [] 

664 return 

665 

666 if isinstance(value, Iterable): 

667 new_list: list[CustomFieldDict] = [] 

668 for field in value: 

669 # Check against StandardModel to avoid circular imports 

670 # If it is another type of standard model, pydantic validators will complain 

671 if isinstance(field, StandardModel): 

672 new_list.append({"field": field.id, "value": None}) 

673 continue 

674 

675 if isinstance(field, dict): 

676 new_list.append(field) 

677 continue 

678 

679 raise TypeError(f"Invalid type for custom fields: {type(field)}") 

680 

681 self.custom_field_dicts = new_list 

682 return 

683 

684 raise TypeError(f"Invalid type for custom fields: {type(value)}") 

685 

686 def custom_field_value(self, field_id: int, default: Any = None, *, raise_errors: bool = False) -> Any: 

687 """ 

688 Get the value of a custom field by ID. 

689 

690 Args: 

691 field_id: The ID of the custom field. 

692 default: The value to return if the field is not found. 

693 raise_errors: Whether to raise an error if the field is not found. 

694 

695 Returns: 

696 The value of the custom field or the default value if not found. 

697 

698 """ 

699 for field in self.custom_field_dicts: 

700 if field["field"] == field_id: 

701 return field["value"] 

702 

703 if raise_errors: 

704 raise ValueError(f"Custom field {field_id} not found") 

705 return default 

706 

707 """ 

708 def __getattr__(self, name: str) -> Any: 

709 # Allow easy access to custom fields 

710 for custom_field in self.custom_fields: 

711 if custom_field['field'] == name: 

712 return custom_field['value'] 

713 

714 raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") 

715 """ 

716 

717 @override 

718 def update_locally(self, from_db: bool | None = None, **kwargs: Any): 

719 """ 

720 Update the document locally with the provided data. 

721 

722 Args: 

723 from_db: Whether to update from the database. 

724 **kwargs: Additional data to update the document with. 

725 

726 Raises: 

727 NotImplementedError: If attempting to set notes or tags to None when they are not already None. 

728 

729 """ 

730 # Paperless does not support setting notes or tags to None if not already None 

731 if self._meta.original_data["notes"]: 

732 if "notes" in kwargs and not kwargs.get("notes"): 

733 # TODO: Gracefully delete the notes instead of raising an error. 

734 raise NotImplementedError( 

735 f"Cannot set notes to None. Notes currently: {self._meta.original_data['notes']}" 

736 ) 

737 

738 if self._meta.original_data["tag_ids"]: 

739 if "tag_ids" in kwargs and not kwargs.get("tag_ids"): 

740 # TODO: Gracefully delete the tags instead of raising an error. 

741 raise NotImplementedError( 

742 f"Cannot set tag_ids to None. Tags currently: {self._meta.original_data['tag_ids']}" 

743 ) 

744 

745 return super().update_locally(from_db, **kwargs)