Coverage for src/paperap/models/abstract/queryset.py: 85%
254 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-18 12:26 -0400
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-18 12:26 -0400
1"""
2----------------------------------------------------------------------------
4 METADATA:
6 File: queryset.py
7 Project: paperap
8 Created: 2025-03-04
9 Version: 0.0.7
10 Author: Jess Mann
11 Email: jess@jmann.me
12 Copyright (c) 2025 Jess Mann
14----------------------------------------------------------------------------
16 LAST MODIFIED:
18 2025-03-04 By Jess Mann
20"""
22from __future__ import annotations
24import copy
25import logging
26from datetime import datetime
27from string import Template
28from typing import TYPE_CHECKING, Any, Generic, Iterable, Iterator, Optional, Self, Union, override
30from typing_extensions import TypeVar
31from yarl import URL
33from paperap.exceptions import FilterDisabledError, MultipleObjectsFoundError, ObjectNotFoundError
35if TYPE_CHECKING:
36 from paperap.models.abstract.model import BaseModel, StandardModel
37 from paperap.resources.base import BaseResource, StandardResource
39_BaseModel = TypeVar("_BaseModel", bound="BaseModel", default="BaseModel", covariant=True)
40_StandardModel = TypeVar("_StandardModel", bound="StandardModel", default="StandardModel", covariant=True)
42logger = logging.getLogger(__name__)
45class BaseQuerySet(Iterable[_BaseModel], Generic[_BaseModel]):
46 """
47 A lazy-loaded, chainable query interface for Paperless NGX resources.
49 BaseQuerySet provides pagination, filtering, and caching functionality similar to Django's QuerySet.
50 It's designed to be lazy - only fetching data when it's actually needed.
52 Args:
53 resource: The BaseResource instance.
54 filters: Initial filter parameters.
55 _cache: Optional internal result cache.
56 _fetch_all: Whether all results have been fetched.
57 _next_url: URL for the next page of results.
58 _last_response: Optional last response from the API.
59 _iter: Optional iterator for the results.
61 Returns:
62 A new instance of BaseQuerySet.
64 Examples:
65 # Create a QuerySet for documents
66 >>> docs = client.documents()
67 >>> for doc in docs:
68 ... print(doc.id)
69 1
70 2
71 3
73 """
75 resource: "BaseResource[_BaseModel]"
76 filters: dict[str, Any]
77 _last_response: dict[str, Any] | None = None
78 _result_cache: list[_BaseModel] = []
79 _fetch_all: bool = False
80 _next_url: str | None = None
81 _urls_fetched: list[str] = []
82 _iter: Iterator[_BaseModel] | None
84 def __init__(
85 self,
86 resource: "BaseResource[_BaseModel]",
87 filters: Optional[dict[str, Any]] = None,
88 _cache: Optional[list[_BaseModel]] = None,
89 _fetch_all: bool = False,
90 _next_url: str | None = None,
91 _last_response: Optional[dict[str, Any]] = None,
92 _iter: Optional[Iterator[_BaseModel]] = None,
93 _urls_fetched: Optional[list[str]] = None,
94 ) -> None:
95 self.resource = resource
96 self.filters = filters or {}
97 self._result_cache = _cache or []
98 self._fetch_all = _fetch_all
99 self._next_url = _next_url
100 self._urls_fetched = _urls_fetched or []
101 self._last_response = _last_response
102 self._iter = _iter
104 super().__init__()
106 @property
107 def _model(self) -> type[_BaseModel]:
108 """
109 Return the model class associated with the resource.
111 Returns:
112 The model class
114 Examples:
115 # Create a model instance
116 >>> model = queryset._model(**params)
118 """
119 return self.resource.model_class
121 @property
122 def _meta(self) -> "BaseModel.Meta":
123 """
124 Return the model's metadata.
126 Returns:
127 The model's metadata
129 Examples:
130 # Get the model's metadata
131 >>> queryset._meta.read_only_fields
132 {'id', 'added', 'modified'}
134 """
135 return self._model._meta # pyright: ignore[reportPrivateUsage] # pylint: disable=protected-access
137 def _reset(self) -> None:
138 """
139 Reset the QuerySet to its initial state.
141 This clears the result cache and resets the fetch state.
142 """
143 self._result_cache = []
144 self._fetch_all = False
145 self._next_url = None
146 self._urls_fetched = []
147 self._last_response = None
148 self._iter = None
150 def _update_filters(self, values: dict[str, Any]) -> None:
151 """
152 Update the current filters with new values.
154 This updates the current queryset instance. It does not return a new instance. For that reason,
155 do not call this directly. Call filter() or exclude() instead.
157 Args:
158 values: New filter values to add
160 Raises:
161 FilterDisabledError: If a filter is not allowed by the resource
163 Examples:
164 # Update filters with new values
165 queryset._update_filters({"correspondent": 1})
167 # Update filters with multiple values
168 queryset._update_filters({"correspondent": 1, "document_type": 2})
170 """
171 for key, _value in values.items():
172 if not self._meta.filter_allowed(key):
173 raise FilterDisabledError(
174 f"Filtering by {key} for {self.resource.name} does not appear to be supported by the API."
175 )
177 if values:
178 # Reset the cache if filters change
179 self._reset()
180 self.filters.update(**values)
182 def filter(self, **kwargs: Any) -> Self:
183 """
184 Return a new QuerySet with the given filters applied.
186 Args:
187 **kwargs: Filters to apply, where keys are field names and values are desired values.
188 Supports Django-style lookups like field__contains, field__in, etc.
190 Returns:
191 A new QuerySet with the additional filters applied
193 Examples:
194 # Get documents with specific correspondent
195 docs = client.documents.filter(correspondent=1)
197 # Get documents with specific correspondent and document type
198 docs = client.documents.filter(correspondent=1, document_type=2)
200 # Get documents with title containing "invoice"
201 docs = client.documents.filter(title__contains="invoice")
203 # Get documents with IDs in a list
204 docs = client.documents.filter(id__in=[1, 2, 3])
206 """
207 processed_filters = {}
209 for key, value in kwargs.items():
210 # Handle list values for __in lookups
211 if isinstance(value, (list, set, tuple)):
212 # Convert list to comma-separated string for the API
213 processed_value = ",".join(str(item) for item in value)
214 processed_filters[key] = processed_value
215 # Handle boolean values
216 elif isinstance(value, bool):
217 processed_filters[key] = str(value).lower()
218 # Handle normal values
219 else:
220 processed_filters[key] = value
222 return self._chain(filters={**self.filters, **processed_filters})
224 def exclude(self, **kwargs: Any) -> Self:
225 """
226 Return a new QuerySet excluding objects with the given filters.
228 Args:
229 **kwargs: Filters to exclude, where keys are field names and values are excluded values
231 Returns:
232 A new QuerySet excluding objects that match the filters
234 Examples:
235 # Get documents with any correspondent except ID 1
236 docs = client.documents.exclude(correspondent=1)
238 """
239 # Transform each key to its "not" equivalent
240 exclude_filters = {}
241 for key, value in kwargs.items():
242 if "__" in key:
243 field, lookup = key.split("__", 1)
244 # If it already has a "not" prefix, remove it
245 if lookup.startswith("not_"):
246 exclude_filters[f"{field}__{lookup[4:]}"] = value
247 else:
248 exclude_filters[f"{field}__not_{lookup}"] = value
249 else:
250 exclude_filters[f"{key}__not"] = value
252 return self._chain(filters={**self.filters, **exclude_filters})
254 def get(self, pk: Any) -> _BaseModel:
255 """
256 Retrieve a single object from the API.
258 Raises NotImplementedError. Subclasses may implement this.
260 Args:
261 pk: The primary key (e.g. the id) of the object to retrieve
263 Returns:
264 A single object matching the query
266 Raises:
267 ObjectNotFoundError: If no object or multiple objects are found
268 NotImplementedError: If the method is not implemented by the subclass
270 Examples:
271 # Get document with ID 123
272 doc = client.documents.get(123)
274 """
275 raise NotImplementedError("Getting a single resource is not defined by BaseModels without an id.")
277 def count(self) -> int:
278 """
279 Return the total number of objects in the queryset.
281 Returns:
282 The total count of objects matching the filters
284 Raises:
285 NotImplementedError: If the response does not have a count attribute
287 """
288 # If we have a last response, we can use the "count" field
289 if self._last_response:
290 if (count := self._last_response.get("count")) is not None:
291 return count
292 raise NotImplementedError("Response does not have a count attribute.")
294 # Get one page of results, to populate last response
295 _iter = self._request_iter(params=self.filters)
297 # TODO Hack
298 for _ in _iter:
299 break
301 if not self._last_response:
302 # I don't think this should ever occur, but just in case.
303 raise NotImplementedError("Requested iter, but no last response")
305 if (count := self._last_response.get("count")) is not None:
306 return count
308 # I don't think this should ever occur, but just in case.
309 raise NotImplementedError(
310 f"Unexpected Error: Could not determine count of objects. Last response: {self._last_response}"
311 )
313 def count_this_page(self) -> int:
314 """
315 Return the number of objects on the current page.
317 Returns:
318 The count of objects on the current page
320 Raises:
321 NotImplementedError: If _last_response is not set
323 """
324 # If we have a last response, we can count it without a new request
325 if self._last_response:
326 results = self._last_response.get("results", [])
327 return len(results)
329 # Get one page of results, to populate last response
330 _iter = self._request_iter(params=self.filters)
332 # TODO Hack
333 for _ in _iter:
334 break
336 if not self._last_response:
337 # I don't think this should ever occur, but just in case.
338 raise NotImplementedError("Requested iter, but no last response")
340 results = self._last_response.get("results", [])
341 return len(results)
343 def all(self) -> Self:
344 """
345 Return a new QuerySet that copies the current one.
347 Returns:
348 A copy of the current BaseQuerySet
350 """
351 return self._chain()
353 def order_by(self, *fields: str) -> Self:
354 """
355 Return a new QuerySet ordered by the specified fields.
357 Args:
358 *fields: Field names to order by. Prefix with '-' for descending order.
360 Returns:
361 A new QuerySet with the ordering applied
363 Examples:
364 # Order documents by title ascending
365 docs = client.documents.order_by('title')
367 # Order documents by added date descending
368 docs = client.documents.order_by('-added')
370 """
371 if not fields:
372 return self
374 # Combine with existing ordering if any
375 ordering = self.filters.get("ordering", [])
376 if isinstance(ordering, str):
377 ordering = [ordering]
378 elif not isinstance(ordering, list):
379 ordering = list(ordering)
381 # Add new ordering fields
382 new_ordering = ordering + list(fields)
384 # Join with commas for API
385 ordering_param = ",".join(new_ordering)
387 return self._chain(filters={**self.filters, "ordering": ordering_param})
389 def first(self) -> Optional[_BaseModel]:
390 """
391 Return the first object in the QuerySet, or None if empty.
393 Returns:
394 The first object or None if no objects match
396 """
397 if self._result_cache and len(self._result_cache) > 0:
398 return self._result_cache[0]
400 # If not cached, create a copy limited to 1 result
401 results = list(self._chain(filters={**self.filters, "limit": 1}))
402 return results[0] if results else None
404 def last(self) -> Optional[_BaseModel]:
405 """
406 Return the last object in the QuerySet, or None if empty.
408 Note: This requires fetching all results to determine the last one.
410 Returns:
411 The last object or None if no objects match
413 """
414 # If we have all results, we can just return the last one
415 if self._fetch_all:
416 if self._result_cache and len(self._result_cache) > 0:
417 return self._result_cache[-1]
418 return None
420 # We need all results to get the last one
421 self._fetch_all_results()
423 if self._result_cache and len(self._result_cache) > 0:
424 return self._result_cache[-1]
425 return None
427 def exists(self) -> bool:
428 """
429 Return True if the QuerySet contains any results.
431 Returns:
432 True if there are any objects matching the filters
434 """
435 # Check the cache before potentially making a new request
436 if self._fetch_all or self._result_cache:
437 return len(self._result_cache) > 0
439 # Check if there's at least one result
440 return self.first() is not None
442 def none(self) -> Self:
443 """
444 Return an empty QuerySet.
446 Returns:
447 An empty QuerySet
449 """
450 return self._chain(filters={"limit": 0})
452 def filter_field_by_str(self, field: str, value: str, *, exact: bool = True, case_insensitive: bool = True) -> Self:
453 """
454 Filter a queryset based on a given field.
456 This allows subclasses to easily implement custom filter methods.
458 Args:
459 field: The field name to filter by.
460 value: The value to filter against.
461 exact: Whether to filter by an exact match.
462 case_insensitive: Whether the filter should be case-insensitive.
464 Returns:
465 A new QuerySet instance with the filter applied.
467 """
468 if exact:
469 lookup = f"{field}__iexact" if case_insensitive else field
470 else:
471 lookup = f"{field}__icontains" if case_insensitive else f"{field}__contains"
473 return self.filter(**{lookup: value})
475 def _fetch_all_results(self) -> None:
476 """
477 Fetch all results from the API and populate the cache.
479 Returns:
480 None
482 """
483 if self._fetch_all:
484 return
486 # Clear existing cache if any
487 self._result_cache = []
489 # Initial fetch
490 iterator = self._request_iter(params=self.filters)
492 # Collect results from initial page
493 # TODO: Consider itertools chain for performance reasons (?)
494 self._result_cache.extend(list(iterator))
496 # Fetch additional pages if available
497 while self._last_response and self._next_url:
498 iterator = self._request_iter(url=self._next_url)
499 self._result_cache.extend(list(iterator))
501 self._fetch_all = True
503 def _request_iter(
504 self, url: str | URL | Template | None = None, params: Optional[dict[str, Any]] = None
505 ) -> Iterator[_BaseModel]:
506 """
507 Get an iterator of resources.
509 Args:
510 url: The URL to request, if different from the resource's default.
511 params: Query parameters.
513 Returns:
514 An iterator over the resources.
516 Raises:
517 NotImplementedError: If the request cannot be completed.
519 Examples:
520 # Iterate over documents
521 for doc in queryset._request_iter():
522 print(doc)
524 """
525 if not (response := self.resource.request_raw(url=url, params=params)):
526 logger.debug("No response from request.")
527 return
529 self._last_response = response
531 yield from self.resource.handle_response(**response)
533 def _get_next(self, response: dict[str, Any] | None = None) -> str | None:
534 """
535 Get the next url, and adjust our references accordingly.
536 """
537 # Allow passing a different response
538 if response is None:
539 response = self._last_response
541 # Last response is not set
542 if not response or not (next_url := response.get("next")):
543 self._next_url = None
544 return None
546 # For safety, check both instance attributes, even though the first check isn't strictly necessary
547 # this hopefully future proofs any changes to the implementation
548 if next_url == self._next_url or next_url in self._urls_fetched:
549 logger.debug(
550 "Next URL was previously fetched. Stopping iteration. URL: %s, Already Fetched: %s",
551 next_url,
552 self._urls_fetched,
553 )
554 self._next_url = None
555 return None
557 # Cache it
558 self._next_url = next_url
559 self._urls_fetched.append(next_url)
560 return self._next_url
562 def _chain(self, **kwargs: Any) -> Self:
563 """
564 Return a copy of the current BaseQuerySet with updated attributes.
566 Args:
567 **kwargs: Attributes to update in the new BaseQuerySet
569 Returns:
570 A new QuerySet with the updated attributes
572 """
573 # Create a new BaseQuerySet with copied attributes
574 clone = self.__class__(self.resource)
576 # Copy attributes from self
577 clone.filters = copy.deepcopy(self.filters)
578 # Do not copy the cache, fetch_all, etc, since filters may change it
580 # Update with provided kwargs
581 for key, value in kwargs.items():
582 if key == "filters" and value:
583 clone._update_filters(value) # pylint: disable=protected-access
584 else:
585 setattr(clone, key, value)
587 return clone
589 @override
590 def __iter__(self) -> Iterator[_BaseModel]:
591 """
592 Iterate over the objects in the QuerySet.
594 Returns:
595 An iterator over the objects
597 """
598 # If we have a fully populated cache, use it
599 if self._fetch_all:
600 yield from self._result_cache
601 return
603 if not self._iter:
604 # Start a new iteration
605 self._iter = self._request_iter(params=self.filters)
607 # Yield objects from the current page
608 for obj in self._iter:
609 self._result_cache.append(obj)
610 yield obj
612 self._get_next()
614 # If there are more pages, keep going
615 count = 0
616 while self._next_url:
617 count += 1
618 self._iter = self._request_iter(url=self._next_url)
620 # Yield objects from the current page
621 for obj in self._iter:
622 self._result_cache.append(obj)
623 yield obj
625 self._get_next()
627 # We've fetched everything
628 self._fetch_all = True
629 self._iter = None
631 def __len__(self) -> int:
632 """
633 Return the number of objects in the QuerySet.
635 Returns:
636 The count of objects
638 """
639 return self.count()
641 def __bool__(self) -> bool:
642 """
643 Return True if the QuerySet has any results.
645 Returns:
646 True if there are any objects matching the filters
648 """
649 return self.exists()
651 def __getitem__(self, key: Union[int, slice]) -> Union[_BaseModel, list[_BaseModel]]:
652 """
653 Retrieve an item or slice of items from the QuerySet.
655 Args:
656 key: An integer index or slice
658 Returns:
659 A single object or list of objects
661 Raises:
662 IndexError: If the index is out of range
664 """
665 if isinstance(key, slice):
666 # Handle slicing
667 start = key.start if key.start is not None else 0
668 stop = key.stop
670 if start < 0 or (stop is not None and stop < 0):
671 # Negative indexing requires knowing the full size
672 self._fetch_all_results()
673 return self._result_cache[key]
675 # Optimize by using limit/offset if available
676 if start == 0 and stop is not None:
677 # Simple limit
678 clone = self._chain(filters={**self.filters, "limit": stop})
679 results = list(clone)
680 return results
682 if start > 0 and stop is not None:
683 # Limit with offset
684 clone = self._chain(
685 filters={
686 **self.filters,
687 "limit": stop - start,
688 "offset": start,
689 }
690 )
691 results = list(clone)
692 return results
694 if start > 0 and stop is None:
695 # Just offset
696 clone = self._chain(filters={**self.filters, "offset": start})
697 self._fetch_all_results() # We need all results after the offset
698 return self._result_cache
700 # Default to fetching all and slicing
701 self._fetch_all_results()
702 return self._result_cache[key]
704 # Handle integer indexing
705 if key < 0:
706 # Negative indexing requires the full result set
707 self._fetch_all_results()
708 return self._result_cache[key]
710 # Positive indexing - we can optimize with limit/offset
711 if len(self._result_cache) > key:
712 # Already have this item cached
713 return self._result_cache[key]
715 # Fetch specific item by position
716 clone = self._chain(filters={**self.filters, "limit": 1, "offset": key})
717 results = list(clone)
718 if not results:
719 raise IndexError(f"BaseQuerySet index {key} out of range")
720 return results[0]
722 def __contains__(self, item: Any) -> bool:
723 """
724 Return True if the QuerySet contains the given object.
726 Args:
727 item: The object to check for
729 Returns:
730 True if the object is in the QuerySet
732 """
733 if not isinstance(item, self._model):
734 return False
736 return any(obj == item for obj in self)
739class StandardQuerySet(BaseQuerySet[_StandardModel], Generic[_StandardModel]):
740 """
741 A queryset for StandardModel instances (i.e. BaseModels with standard fields, like id).
743 Returns:
744 A new instance of StandardModel.
746 Raises:
747 ValueError: If resource is not provided.
749 Examples:
750 # Create a StandardModel instance
751 model = StandardModel(id=1)
753 Args:
754 resource: The BaseResource instance.
755 filters: Initial filter parameters.
757 Returns:
758 A new instance of StandardQuerySet.
760 Raises:
761 ObjectNotFoundError: If no object or multiple objects are found.
763 Examples:
764 # Create a StandardQuerySet for documents
765 docs = StandardQuerySet(resource=client.documents)
767 """
769 @override
770 def get(self, pk: int) -> _StandardModel:
771 """
772 Retrieve a single object from the API.
774 Args:
775 pk: The ID of the object to retrieve
777 Returns:
778 A single object matching the query
780 Raises:
781 ObjectNotFoundError: If no object or multiple objects are found
783 Examples:
784 # Get document with ID 123
785 doc = client.documents.get(123)
787 """
788 # Attempt to find it in the result cache
789 if self._result_cache:
790 for obj in self._result_cache:
791 if obj.id == pk:
792 return obj
794 # Direct lookup by ID - use the resource's get method
795 return self.resource.get(pk)
797 def id(self, value: int | list[int]) -> Self:
798 """
799 Filter models by ID.
801 Args:
802 value: The ID or list of IDs to filter by
804 Returns:
805 Filtered QuerySet
807 """
808 if isinstance(value, list):
809 return self.filter(id__in=value)
810 return self.filter(id=value)
812 @override
813 def __contains__(self, item: Any) -> bool:
814 """
815 Return True if the QuerySet contains the given object.
817 NOTE: This method only ensures a match by ID, not by full object equality.
818 This is intentional, as the object may be outdated or not fully populated.
820 Args:
821 item: The object or ID to check for
823 Returns:
824 True if the object is in the QuerySet
826 """
827 # Handle integers directly
828 if isinstance(item, int):
829 return any(obj.id == item for obj in self)
831 # Handle model objects that have an id attribute
832 try:
833 if hasattr(item, "id"):
834 return any(obj.id == item.id for obj in self)
835 except (AttributeError, TypeError):
836 pass
838 # For any other type, it's not in the queryset
839 return False