Coverage for cc_modules/cc_taskcollection.py: 20%
377 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-08 23:14 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-08 23:14 +0000
1#!/usr/bin/env python
3"""
4camcops_server/cc_modules/cc_taskcollection.py
6===============================================================================
8 Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
9 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
11 This file is part of CamCOPS.
13 CamCOPS is free software: you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 CamCOPS is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
26===============================================================================
28**Classes to fetch tasks from the database as efficiently as possible.**
30"""
32from collections import OrderedDict
33import datetime
34from enum import Enum
35import logging
36from threading import Thread
37from typing import (
38 Dict,
39 Generator,
40 List,
41 Optional,
42 Tuple,
43 Type,
44 TYPE_CHECKING,
45 Union,
46)
48from cardinal_pythonlib.json.serialize import (
49 register_class_for_json,
50 register_enum_for_json,
51)
52from cardinal_pythonlib.logs import BraceStyleAdapter
53from cardinal_pythonlib.reprfunc import auto_repr, auto_str
54from cardinal_pythonlib.sort import MINTYPE_SINGLETON, MinType
55from kombu.serialization import dumps, loads
56from pendulum import DateTime as Pendulum
57from sqlalchemy.orm import Query
58from sqlalchemy.orm.session import Session as SqlASession
59from sqlalchemy.sql.functions import func
60from sqlalchemy.sql.expression import and_, exists, or_
62from camcops_server.cc_modules.cc_constants import ERA_NOW
63from camcops_server.cc_modules.cc_exportrecipient import ExportRecipient
64from camcops_server.cc_modules.cc_task import (
65 tablename_to_task_class_dict,
66 Task,
67)
68from camcops_server.cc_modules.cc_taskfactory import (
69 task_query_restricted_to_permitted_users,
70)
71from camcops_server.cc_modules.cc_taskfilter import TaskFilter
72from camcops_server.cc_modules.cc_taskindex import TaskIndexEntry
74if TYPE_CHECKING:
75 from sqlalchemy.sql.elements import ClauseElement, ColumnElement
76 from camcops_server.cc_modules.cc_request import CamcopsRequest
78log = BraceStyleAdapter(logging.getLogger(__name__))
81# =============================================================================
82# Debugging options
83# =============================================================================
85DEBUG_QUERY_TIMING = False
87if DEBUG_QUERY_TIMING:
88 log.warning("Debugging options enabled!")
91# =============================================================================
92# Sorting helpers
93# =============================================================================
96def task_when_created_sorter(
97 task: Task,
98) -> Union[Tuple[Pendulum, datetime.datetime], MinType]:
99 """
100 Function to sort tasks by their creation date/time (with upload date/time
101 as a tiebreak for consistent ordering).
102 """
103 # For sorting of tasks
104 created = task.when_created
105 # noinspection PyProtectedMember
106 uploaded = task._when_added_batch_utc
107 return MINTYPE_SINGLETON if created is None else (created, uploaded)
110@register_enum_for_json
111class TaskSortMethod(Enum):
112 """
113 Enum representing ways to sort tasks.
114 """
116 NONE = 0
117 CREATION_DATE_ASC = 1
118 CREATION_DATE_DESC = 2
121def sort_tasks_in_place(
122 tasklist: List[Task], sortmethod: TaskSortMethod
123) -> None:
124 """
125 Sort a list of tasks, in place, according to ``sortmethod``.
127 Args:
128 tasklist: the list of tasks
129 sortmethod: a :class:`TaskSortMethod` enum
130 """
131 # Sort?
132 if sortmethod == TaskSortMethod.CREATION_DATE_ASC:
133 tasklist.sort(key=task_when_created_sorter)
134 elif sortmethod == TaskSortMethod.CREATION_DATE_DESC:
135 tasklist.sort(key=task_when_created_sorter, reverse=True)
138# =============================================================================
139# Parallel fetch helper
140# =============================================================================
141# - Why consider a parallel fetch?
142# Because a typical fetch might involve 27ms per query (as seen by Python;
143# less as seen by MySQL) but about 100 queries, for a not-very-large
144# database.
145# - Initially UNSUCCESSFUL: even after tweaking pool_size=0 in create_engine()
146# to get round the SQLAlchemy error "QueuePool limit of size 5 overflow 10
147# reached", in the parallel code, a great many queries are launched, but then
148# something goes wrong and others are started but then block -- for ages --
149# waiting for a spare database connection, or something.
150# - Fixed that: I was not explicitly closing the sessions.
151# - But then a major conceptual problem: anything to be lazy-loaded (e.g.
152# patient, but also patient ID, special note, BLOB...) will give this sort of
153# error: "DetachedInstanceError: Parent instance <Phq9 at 0x7fe6cce2d278> is
154# not bound to a Session; lazy load operation of attribute 'patient' cannot
155# proceed" -- for obvious reasons. And some of those operations are only
156# required on the final paginated task set, which requires aggregation across
157# all tasks.
158#
159# HOWEVER, the query time per table drops from ~27ms to 4-8ms if we disable
160# eager loading (lazy="joined") of patients from tasks.
163class FetchThread(Thread):
164 """
165 Thread to fetch tasks in parallel.
167 CURRENTLY UNUSED.
168 """
170 def __init__(
171 self,
172 req: "CamcopsRequest",
173 task_class: Type[Task],
174 factory: "TaskCollection",
175 **kwargs
176 ) -> None:
177 self.req = req
178 self.task_class = task_class
179 self.factory = factory
180 self.error = False
181 name = task_class.__tablename__
182 super().__init__(name=name, target=None, **kwargs)
184 def run(self) -> None:
185 log.debug("Thread starting")
186 dbsession = self.req.get_bare_dbsession()
187 # noinspection PyBroadException
188 try:
189 # noinspection PyProtectedMember
190 q = self.factory._make_query(dbsession, self.task_class)
191 if q:
192 tasks = q.all() # type: List[Task]
193 # https://stackoverflow.com/questions/6319207/are-lists-thread-safe # noqa
194 # https://stackoverflow.com/questions/6953351/thread-safety-in-pythons-dictionary # noqa
195 # http://effbot.org/pyfaq/what-kinds-of-global-value-mutation-are-thread-safe.htm # noqa
196 # noinspection PyProtectedMember
197 self.factory._tasks_by_class[self.task_class] = tasks
198 log.debug("Thread finishing with results")
199 else:
200 log.debug("Thread finishing without results")
201 except Exception:
202 self.error = True
203 log.error("Thread error")
204 dbsession.close()
207# =============================================================================
208# Make a set of tasks, deferring work until things are needed
209# =============================================================================
212class TaskCollection(object):
213 """
214 Represent a potential or instantiated call to fetch tasks from the
215 database.
217 The caller may want them in a giant list (e.g. task viewer, CTVs), or split
218 by task class (e.g. trackers).
219 """
221 def __init__(
222 self,
223 req: Optional["CamcopsRequest"],
224 taskfilter: TaskFilter = None,
225 as_dump: bool = False,
226 sort_method_by_class: TaskSortMethod = TaskSortMethod.NONE,
227 sort_method_global: TaskSortMethod = TaskSortMethod.NONE,
228 current_only: bool = True,
229 via_index: bool = True,
230 export_recipient: "ExportRecipient" = None,
231 ) -> None:
232 """
233 Args:
234 req:
235 The
236 :class:`camcops_server.cc_modules.cc_request.CamcopsRequest`.
237 ``None`` should only be used as a parameter when serializing
238 a :class:`TaskCollection` to the back-end.
239 taskfilter:
240 A :class:`camcops_server.cc_modules.cc_taskfilter.TaskFilter`
241 object that contains any restrictions we may want to apply.
242 Must be supplied unless supplying ``export_recipient`` (in
243 which case, must not be supplied).
244 as_dump:
245 Use the "dump" permissions rather than the "view" permissions?
246 sort_method_by_class:
247 How should we sort tasks within each task class?
248 sort_method_global:
249 How should we sort tasks overall (across all task types)?
250 current_only:
251 Restrict to ``_current`` tasks only?
252 via_index:
253 Use the server's index (faster)? (Not possible with
254 ``current_only=False``.)
255 export_recipient:
256 A :class:`camcops_server.cc_modules.cc_exportrecipient.ExportRecipient`
257 """ # noqa
258 if via_index and not current_only:
259 log.warning("Can't use index for non-current tasks")
260 via_index = False
262 self._req = req
263 self._filter = taskfilter
264 self._as_dump = as_dump
265 self._sort_method_by_class = sort_method_by_class
266 self._sort_method_global = sort_method_global
267 self._current_only = current_only
268 self._via_index = via_index
269 self.export_recipient = export_recipient
271 if export_recipient:
272 # We create a new filter to reflect the export recipient.
273 assert (
274 self._filter is None
275 ), "Can't supply taskfilter if you supply export_recipient"
276 # We can do lots of what we need with a TaskFilter().
277 self._filter = TaskFilter()
278 if not export_recipient.all_groups:
279 self._filter.group_ids = export_recipient.group_ids
280 self._filter.task_types = export_recipient.tasks
281 self._filter.start_datetime = export_recipient.start_datetime_utc
282 self._filter.end_datetime = export_recipient.end_datetime_utc
283 self._filter.finalized_only = export_recipient.finalized_only
284 self._filter.tasks_with_patient_only = (
285 not export_recipient.anonymous_ok()
286 )
287 self._filter.must_have_idnum_type = export_recipient.primary_idnum
288 else:
289 assert (
290 self._filter
291 ), "Must supply taskfilter unless you supply export_recipient"
293 self._tasks_by_class = (
294 OrderedDict()
295 ) # type: Dict[Type[Task], List[Task]] # noqa
296 self._all_tasks = None # type: Optional[List[Task]]
297 self._all_indexes = (
298 None
299 ) # type: Optional[Union[List[TaskIndexEntry], Query]] # noqa
301 def __repr__(self) -> str:
302 return auto_repr(self)
304 def __str__(self) -> str:
305 return auto_str(self)
307 # =========================================================================
308 # Interface to read
309 # =========================================================================
311 @property
312 def req(self) -> "CamcopsRequest":
313 """
314 Returns the associated request, or raises :exc:`AssertionError` if it's
315 not been set.
316 """
317 assert (
318 self._req is not None
319 ), "Must initialize with a request or call set_request() first"
320 return self._req
322 def set_request(self, req: "CamcopsRequest") -> None:
323 """
324 Sets the request object manually. Used by Celery back-end tasks.
326 Args:
327 req: a :class:`camcops_server.cc_modules.cc_request.CamcopsRequest`
328 """
329 self._req = req
331 def task_classes(self) -> List[Type[Task]]:
332 """
333 Return a list of task classes that we want.
334 """
335 return self._filter.task_classes
337 def tasks_for_task_class(self, task_class: Type[Task]) -> List[Task]:
338 """
339 Returns all appropriate task instances for a specific task type.
340 """
341 if self._via_index:
342 self._ensure_everything_fetched_via_index()
343 else:
344 self._fetch_task_class(task_class)
345 tasklist = self._tasks_by_class.get(task_class, [])
346 return tasklist
348 @property
349 def all_tasks(self) -> List[Task]:
350 """
351 Returns a list of all appropriate task instances.
352 """
353 if self._all_tasks is None:
354 if self._via_index:
355 self._ensure_everything_fetched_via_index()
356 else:
357 self._fetch_all_tasks_without_index()
358 return self._all_tasks
360 @property
361 def all_tasks_or_indexes_or_query(
362 self,
363 ) -> Union[List[Task], List[TaskIndexEntry], Query]:
364 """
365 Returns a list of all appropriate task instances, or index entries, or
366 a query returning them.
368 - Returning a list of tasks is fine, but the results of this function
369 may be paginated (e.g. in the main task view), so the end result may
370 be that e.g. 20,000 tasks are fetched and 20 are shown.
371 - More efficient is to fetch 20,000 indexes from the single index
372 table, and fetch only the 20 tasks we need.
373 - More efficient still is to fetch the 20 indexes we need, and then
374 their task.
375 """
376 if not self._via_index:
377 return self.all_tasks
379 self._build_index_query() # ensure self._all_indexes is set
381 if self._all_tasks is not None:
382 # The tasks themselves have been fetched.
383 return self._all_tasks
385 return self._all_indexes # indexes or a query to fetch them
387 # def forget_task_class(self, task_class: Type[Task]) -> None:
388 # """
389 # Ditch results for a specific task class (for memory efficiency).
390 # """
391 # self._tasks_by_class.pop(task_class, None)
392 # # The "None" option prevents it from raising KeyError if the key
393 # # doesn't exist.
394 # # https://stackoverflow.com/questions/11277432/how-to-remove-a-key-from-a-python-dictionary # noqa
396 def gen_all_tasks_or_indexes(
397 self,
398 ) -> Generator[Union[Task, TaskIndexEntry], None, None]:
399 """
400 Generates tasks or index entries.
401 """
402 tasks_or_indexes_or_query = self.all_tasks_or_indexes_or_query
403 if isinstance(tasks_or_indexes_or_query, Query):
404 for item in tasks_or_indexes_or_query.all():
405 yield item
406 else:
407 for item in tasks_or_indexes_or_query:
408 yield item
410 def gen_tasks_by_class(self) -> Generator[Task, None, None]:
411 """
412 Generates all tasks, class-wise.
413 """
414 for cls in self.task_classes():
415 for task in self.tasks_for_task_class(cls):
416 yield task
418 def gen_tasks_in_global_order(self) -> Generator[Task, None, None]:
419 """
420 Generates all tasks, in the global order.
421 """
422 for task in self.all_tasks:
423 yield task
425 @property
426 def dbsession(self) -> SqlASession:
427 """
428 Returns the request's database session.
429 """
430 return self.req.dbsession
432 # =========================================================================
433 # Internals: fetching Task objects
434 # =========================================================================
436 def _fetch_all_tasks_without_index(self, parallel: bool = False) -> None:
437 """
438 Fetch all tasks from the database.
439 """
441 # AVOID parallel=True; see notes above.
442 if DEBUG_QUERY_TIMING:
443 start_time = Pendulum.now()
445 if parallel:
446 # Deprecated parallel fetch
447 threads = [] # type: List[FetchThread]
448 for task_class in self._filter.task_classes:
449 thread = FetchThread(self.req, task_class, self)
450 thread.start()
451 threads.append(thread)
452 for thread in threads:
453 thread.join()
454 if thread.error:
455 raise ValueError("Multithreaded fetch failed")
457 else:
458 # Fetch all tasks, classwise.
459 for task_class in self._filter.task_classes:
460 self._fetch_task_class(task_class)
462 if DEBUG_QUERY_TIMING:
463 end_time = Pendulum.now()
464 # noinspection PyUnboundLocalVariable
465 time_taken = end_time - start_time
466 log.info("_fetch_all_tasks took {}", time_taken)
468 # Build our joint task list
469 self._all_tasks = [] # type: List[Task]
470 for single_task_list in self._tasks_by_class.values():
471 self._all_tasks += single_task_list
472 sort_tasks_in_place(self._all_tasks, self._sort_method_global)
474 def _fetch_task_class(self, task_class: Type[Task]) -> None:
475 """
476 Fetch tasks from the database for one task type.
477 """
478 if task_class in self._tasks_by_class:
479 return # already fetched
480 q = self._serial_query(task_class)
481 if q is None:
482 newtasks = [] # type: List[Task]
483 else:
484 newtasks = q.all() # type: List[Task]
485 # Apply Python-side filters?
486 newtasks = self._filter_through_python(newtasks)
487 sort_tasks_in_place(newtasks, self._sort_method_by_class)
488 self._tasks_by_class[task_class] = newtasks
490 def _serial_query(self, task_class: Type[Task]) -> Optional[Query]:
491 """
492 Make and return an SQLAlchemy ORM query for a specific task class.
494 Returns ``None`` if no tasks would match our criteria.
495 """
496 dbsession = self.req.dbsession
497 return self._make_query(dbsession, task_class)
499 def _make_query(
500 self, dbsession: SqlASession, task_class: Type[Task]
501 ) -> Optional[Query]:
502 """
503 Make and return an SQLAlchemy ORM query for a specific task class.
505 Returns ``None`` if no tasks would match our criteria.
506 """
507 q = dbsession.query(task_class)
509 # Restrict to what the web front end will supply
510 # noinspection PyProtectedMember
511 if self._current_only:
512 # noinspection PyProtectedMember
513 q = q.filter(task_class._current == True) # noqa: E712
515 # Restrict to what is PERMITTED
516 q = task_query_restricted_to_permitted_users(
517 self.req, q, task_class, as_dump=self._as_dump
518 )
520 # Restrict to what is DESIRED
521 if q:
522 q = self._task_query_restricted_by_filter(q, task_class)
523 if q and self.export_recipient:
524 q = self._task_query_restricted_by_export_recipient(q, task_class)
526 return q
528 def _task_query_restricted_by_filter(
529 self, q: Query, cls: Type[Task]
530 ) -> Optional[Query]:
531 """
532 Restricts an SQLAlchemy ORM query for a given task class to those
533 tasks that our filter permits.
535 THIS IS A KEY SECURITY FUNCTION, since it implements some permissions
536 that relate to viewing tasks when unfiltered.
538 Args:
539 q: the starting SQLAlchemy ORM Query
540 cls: the task class
542 Returns:
543 the original query, a modified query, or ``None`` if no tasks
544 would pass the filter
546 """
547 tf = self._filter # task filter
548 user = self.req.user
550 if tf.group_ids:
551 permitted_group_ids = tf.group_ids.copy()
552 else:
553 permitted_group_ids = None # unrestricted
555 if tf.dates_inconsistent():
556 return None
558 if cls not in tf.task_classes:
559 # We don't want this task
560 return None
562 if not cls.is_anonymous:
563 # Not anonymous.
564 if not tf.any_specific_patient_filtering():
565 # No patient filtering. Permissions depend on user settings.
566 if user.may_view_all_patients_when_unfiltered:
567 # May see everything. No restrictions.
568 pass
569 elif user.may_view_no_patients_when_unfiltered:
570 # Can't see patient data from any group.
571 # (a) User not permitted to view any patients when
572 # unfiltered, and (b) not filtered to a level that would
573 # reasonably restrict to one or a small number of
574 # patients. Skip the task class.
575 return None
576 else:
577 # May see patient data from some, but not all, groups.
578 liberal_group_ids = (
579 user.group_ids_nonsuperuser_may_see_when_unfiltered()
580 )
581 if not permitted_group_ids: # was unrestricted
582 permitted_group_ids = liberal_group_ids
583 else: # was restricted; restrict further
584 permitted_group_ids = [
585 gid
586 for gid in permitted_group_ids
587 if gid in liberal_group_ids
588 ]
589 if not permitted_group_ids:
590 return None # down to zero; no point continuing
592 # Patient filtering
593 if tf.any_patient_filtering():
594 # q = q.join(Patient) # fails
595 q = q.join(
596 cls.patient
597 ) # use explicitly configured relationship # noqa
598 q = tf.filter_query_by_patient(q, via_index=False)
600 # Patient-independent filtering
602 if tf.device_ids:
603 # noinspection PyProtectedMember
604 q = q.filter(cls._device_id.in_(tf.device_ids))
606 if tf.era:
607 # noinspection PyProtectedMember
608 q = q.filter(cls._era == tf.era)
609 if tf.finalized_only:
610 q = q.filter(cls._era != ERA_NOW)
612 if tf.adding_user_ids:
613 # noinspection PyProtectedMember
614 q = q.filter(cls._adding_user_id.in_(tf.adding_user_ids))
616 if permitted_group_ids:
617 # noinspection PyProtectedMember
618 q = q.filter(cls._group_id.in_(permitted_group_ids))
620 if tf.start_datetime is not None:
621 q = q.filter(cls.when_created >= tf.start_datetime)
622 if tf.end_datetime is not None:
623 q = q.filter(cls.when_created < tf.end_datetime)
625 q = self._filter_query_for_text_contents(q, cls)
627 return q
629 def _task_query_restricted_by_export_recipient(
630 self, q: Query, cls: Type[Task]
631 ) -> Optional[Query]:
632 """
633 For exports.
635 Filters via our
636 :class:`camcops_server.cc_modules.cc_exportrecipient.ExportRecipient`,
637 except for the bits already implemented via our
638 :class:`camcops_server.cc_modules.cc_taskfilter.TaskFilter`.
640 The main job here is for incremental exports: to find tasks that have
641 not yet been exported. We look for any tasks not yet exported to a
642 recipient of the same name (regardless of ``ExportRecipient.id``, which
643 changes when the export recipient is reconfigured).
645 Compare :meth:`_index_query_restricted_by_export_recipient`.
647 Args:
648 q: the starting SQLAlchemy ORM Query
649 cls: the task class
651 Returns:
652 the original query, a modified query, or ``None`` if no tasks
653 would pass the filter
654 """
655 from camcops_server.cc_modules.cc_exportmodels import (
656 ExportedTask,
657 ) # delayed import
659 r = self.export_recipient
660 if not r.is_incremental():
661 # Full database export; no restrictions
662 return q
663 # Otherwise, restrict to tasks not yet sent to this recipient.
664 # noinspection PyUnresolvedReferences
665 q = q.filter(
666 # "There is not a successful export record for this task/recipient"
667 ~exists()
668 .select_from(
669 ExportedTask.__table__.join(
670 ExportRecipient.__table__,
671 ExportedTask.recipient_id == ExportRecipient.id,
672 )
673 )
674 .where(
675 and_(
676 ExportRecipient.recipient_name == r.recipient_name,
677 ExportedTask.basetable == cls.__tablename__,
678 ExportedTask.task_server_pk == cls._pk,
679 ExportedTask.success == True, # noqa: E712
680 ExportedTask.cancelled == False, # noqa: E712
681 )
682 )
683 )
684 return q
686 def _filter_through_python(self, tasks: List[Task]) -> List[Task]:
687 """
688 Returns those tasks in the list provided that pass any Python-only
689 aspects of our filter (those parts not easily calculable via SQL).
691 This applies to the "direct" (and not "via index") routes only. With
692 the index, we can do everything via SQL.
693 """
694 assert not self._via_index
695 if not self._has_python_parts_to_filter():
696 return tasks
697 return [
698 t for t in tasks if self._task_matches_python_parts_of_filter(t)
699 ]
701 def _has_python_parts_to_filter(self) -> bool:
702 """
703 Does the filter have aspects to it that require some Python thought,
704 not just a database query?
706 Only applicable to the direct (not "via index") route.
707 """
708 assert not self._via_index
709 return self._filter.complete_only
711 def _task_matches_python_parts_of_filter(self, task: Task) -> bool:
712 """
713 Does the task pass the Python parts of the filter?
715 Only applicable to the direct (not "via index") route.
716 """
717 assert not self._via_index
719 # "Is task complete" filter
720 if self._filter.complete_only:
721 if not task.is_complete():
722 return False
724 return True
726 # =========================================================================
727 # Shared between Task and TaskIndexEntry methods
728 # =========================================================================
730 def _filter_query_for_text_contents(
731 self, q: Query, taskclass: Type[Task]
732 ) -> Optional[Query]:
733 """
734 Returns the query, filtered for the "text contents" filter.
736 Args:
737 q: the starting SQLAlchemy ORM Query
738 taskclass: the task class
740 Returns:
741 a Query, potentially modified.
742 """
743 tf = self._filter # task filter
745 if not tf.text_contents:
746 return q # unmodified
748 # task must contain ALL the strings in AT LEAST ONE text column
749 textcols = taskclass.get_text_filter_columns()
750 if not textcols:
751 # Text filtering requested, but there are no text columns, so
752 # by definition the filter must fail.
753 return None
754 clauses_over_text_phrases = [] # type: List[ColumnElement]
755 # ... each e.g. "col1 LIKE '%paracetamol%' OR col2 LIKE '%paracetamol%'" # noqa
756 for textfilter in tf.text_contents:
757 tf_lower = textfilter.lower()
758 clauses_over_columns = [] # type: List[ColumnElement]
759 # ... each e.g. "col1 LIKE '%paracetamol%'"
760 for textcol in textcols:
761 # Case-insensitive comparison:
762 # https://groups.google.com/forum/#!topic/sqlalchemy/331XoToT4lk
763 # https://bitbucket.org/zzzeek/sqlalchemy/wiki/UsageRecipes/StringComparisonFilter # noqa
764 clauses_over_columns.append(
765 func.lower(textcol).contains(tf_lower, autoescape=True)
766 )
767 clauses_over_text_phrases.append(or_(*clauses_over_columns))
768 return q.filter(and_(*clauses_over_text_phrases))
769 # ... thus, e.g.
770 # "(col1 LIKE '%paracetamol%' OR col2 LIKE '%paracetamol%') AND
771 # (col1 LIKE '%overdose%' OR col2 LIKE '%overdose%')
773 # =========================================================================
774 # Internals: fetching TaskIndexEntry objects
775 # =========================================================================
777 def _ensure_everything_fetched_via_index(self) -> None:
778 """
779 Ensure we have all our tasks loaded, using the index.
780 """
781 self._build_index_query()
782 self._fetch_tasks_from_indexes()
784 def _build_index_query(self) -> None:
785 """
786 Creates a Query in :attr:`_all_indexes` that will fetch task indexes.
787 If the task filtering requires the tasks to be fetched (i.e. text
788 contents), fetch the actual tasks too (and filter them).
789 """
790 if self._all_indexes is not None:
791 return
792 self._all_indexes = self._make_index_query()
793 if self._filter.text_contents:
794 self._fetch_tasks_from_indexes()
796 def _fetch_tasks_from_indexes(self) -> None:
797 """
798 Takes the query that has already been stored in :attr:`_all_indexes`,
799 and populate the task attributes, :attr:`_all_tasks` and
800 :attr:`_tasks_by_class`.
801 """
802 if self._all_tasks is not None:
803 return
804 assert self._all_indexes is not None
806 d = tablename_to_task_class_dict()
807 dbsession = self.req.dbsession
808 self._all_tasks = [] # type: List[Task]
810 # Fetch indexes
811 if isinstance(self._all_indexes, Query):
812 # Query built, but indexes not yet fetched.
813 # Replace the query with actual indexes
814 self._all_indexes = (
815 self._all_indexes.all()
816 ) # type: List[TaskIndexEntry] # noqa
817 indexes = self._all_indexes
819 # Fetch tasks
820 tablenames = set(index.task_table_name for index in indexes)
821 for tablename in tablenames:
822 # We do this by task class, so we can execute a single query per
823 # task type (rather than per task).
824 try:
825 taskclass = d[tablename]
826 except KeyError:
827 log.warning("Bad tablename in index: {!r}", tablename)
828 continue
829 tasklist = self._tasks_by_class.setdefault(taskclass, [])
830 task_pks = [i.task_pk for i in indexes if i.tablename == tablename]
831 # noinspection PyProtectedMember
832 qtask = dbsession.query(taskclass).filter(
833 taskclass._pk.in_(task_pks)
834 )
835 qtask = self._filter_query_for_text_contents(qtask, taskclass)
836 tasks = qtask.all() # type: List[Task]
837 for task in tasks:
838 tasklist.append(task)
839 self._all_tasks.append(task)
841 # Sort tasks
842 for tasklist in self._tasks_by_class.values():
843 sort_tasks_in_place(tasklist, self._sort_method_by_class)
844 sort_tasks_in_place(self._all_tasks, self._sort_method_global)
846 def _make_index_query(self) -> Optional[Query]:
847 """
848 Make and return an SQLAlchemy ORM query to retrieve indexes.
850 Returns ``None`` if no tasks would match our criteria.
851 """
852 dbsession = self.req.dbsession
853 q = dbsession.query(TaskIndexEntry)
855 # Restrict to what the web front end will supply
856 assert self._current_only, "_current_only must be true to use index"
858 # Restrict to what is PERMITTED
859 if not self.export_recipient:
860 q = task_query_restricted_to_permitted_users(
861 self.req, q, TaskIndexEntry, as_dump=self._as_dump
862 )
864 # Restrict to what is DESIRED
865 if q:
866 q = self._index_query_restricted_by_filter(q)
867 if q and self.export_recipient:
868 q = self._index_query_restricted_by_export_recipient(q)
870 return q
872 def _index_query_restricted_by_filter(self, q: Query) -> Optional[Query]:
873 """
874 Counterpart to :func:`_task_query_restricted_by_filter`, but for
875 indexes.
877 THIS IS A KEY SECURITY FUNCTION, since it implements some permissions
878 that relate to viewing tasks when unfiltered.
880 Args:
881 q: the starting SQLAlchemy ORM Query
883 Returns:
884 the original query, a modified query, or ``None`` if no tasks
885 would pass the filter
887 """
888 tf = self._filter # task filter
889 user = self.req.user
891 if tf.group_ids:
892 permitted_group_ids = tf.group_ids.copy()
893 else:
894 permitted_group_ids = None # unrestricted
896 if tf.dates_inconsistent():
897 return None
899 # Task type filtering
901 if tf.skip_anonymous_tasks():
902 # noinspection PyPep8
903 q = q.filter(TaskIndexEntry.patient_pk != None) # noqa: E711
905 if not tf.offers_all_non_anonymous_task_types():
906 permitted_task_tablenames = [
907 tc.__tablename__ for tc in tf.task_classes
908 ]
909 q = q.filter(
910 TaskIndexEntry.task_table_name.in_(permitted_task_tablenames)
911 )
913 # Special rules when we've not filtered for any patients
915 if not tf.any_specific_patient_filtering():
916 # No patient filtering. Permissions depend on user settings.
917 if user.may_view_all_patients_when_unfiltered:
918 # May see everything. No restrictions.
919 pass
920 elif user.may_view_no_patients_when_unfiltered:
921 # Can't see patient data from any group.
922 # (a) User not permitted to view any patients when
923 # unfiltered, and (b) not filtered to a level that would
924 # reasonably restrict to one or a small number of
925 # patients. Restrict to anonymous tasks.
926 # noinspection PyPep8
927 q = q.filter(TaskIndexEntry.patient_pk == None) # noqa: E711
928 else:
929 # May see patient data from some, but not all, groups.
930 # This is a little more complex than the equivalent in
931 # _task_query_restricted_by_filter(), because we shouldn't
932 # restrict anonymous tasks.
933 liberal_group_ids = (
934 user.group_ids_nonsuperuser_may_see_when_unfiltered()
935 )
936 # noinspection PyPep8
937 liberal_or_anon_criteria = [
938 TaskIndexEntry.patient_pk
939 == None # noqa: E711
940 # anonymous OK
941 ] # type: List[ClauseElement]
942 for gid in liberal_group_ids:
943 liberal_or_anon_criteria.append(
944 TaskIndexEntry.group_id == gid # this group OK
945 )
946 q = q.filter(or_(*liberal_or_anon_criteria))
948 # Patient filtering
950 if tf.any_patient_filtering():
951 q = q.join(TaskIndexEntry.patient) # use relationship
952 q = tf.filter_query_by_patient(q, via_index=True)
954 # Patient-independent filtering
956 if tf.device_ids:
957 # noinspection PyProtectedMember
958 q = q.filter(TaskIndexEntry.device_id.in_(tf.device_ids))
960 if tf.era:
961 # noinspection PyProtectedMember
962 q = q.filter(TaskIndexEntry.era == tf.era)
963 if tf.finalized_only:
964 q = q.filter(TaskIndexEntry.era != ERA_NOW)
966 if tf.adding_user_ids:
967 # noinspection PyProtectedMember
968 q = q.filter(TaskIndexEntry.adding_user_id.in_(tf.adding_user_ids))
970 if permitted_group_ids:
971 # noinspection PyProtectedMember
972 q = q.filter(TaskIndexEntry.group_id.in_(permitted_group_ids))
974 if tf.start_datetime is not None:
975 q = q.filter(
976 TaskIndexEntry.when_created_utc >= tf.start_datetime_utc
977 )
978 if tf.end_datetime is not None:
979 q = q.filter(TaskIndexEntry.when_created_utc < tf.end_datetime_utc)
981 # text_contents is managed at the later fetch stage when using indexes
983 # But is_complete can be filtered now and in SQL:
984 if tf.complete_only:
985 # noinspection PyPep8
986 q = q.filter(TaskIndexEntry.task_is_complete == True) # noqa: E712
988 # When we use indexes, we embed the global sort criteria in the query.
989 if self._sort_method_global == TaskSortMethod.CREATION_DATE_ASC:
990 q = q.order_by(
991 TaskIndexEntry.when_created_utc.asc(),
992 TaskIndexEntry.when_added_batch_utc.asc(),
993 )
994 elif self._sort_method_global == TaskSortMethod.CREATION_DATE_DESC:
995 q = q.order_by(
996 TaskIndexEntry.when_created_utc.desc(),
997 TaskIndexEntry.when_added_batch_utc.desc(),
998 )
1000 return q
1002 def _index_query_restricted_by_export_recipient(
1003 self, q: Query
1004 ) -> Optional[Query]:
1005 """
1006 For exports.
1008 Filters via our
1009 :class:`camcops_server.cc_modules.cc_exportrecipient.ExportRecipient`,
1010 except for the bits already implemented via our
1011 :class:`camcops_server.cc_modules.cc_taskfilter.TaskFilter`.
1013 The main job here is for incremental exports: to find tasks that have
1014 not yet been exported.
1016 Compare :meth:`_task_query_restricted_by_export_recipient`.
1018 Args:
1019 q: the starting SQLAlchemy ORM Query
1021 Returns:
1022 the original query, a modified query, or ``None`` if no tasks
1023 would pass the filter
1025 """
1026 from camcops_server.cc_modules.cc_exportmodels import (
1027 ExportedTask,
1028 ) # delayed import
1030 r = self.export_recipient
1031 if not r.is_incremental():
1032 # Full database export; no restrictions
1033 return q
1034 # Otherwise, restrict to tasks not yet sent to this recipient.
1035 # Remember: q is a query on TaskIndexEntry.
1036 # noinspection PyUnresolvedReferences
1037 q = q.filter(
1038 # "There is not a successful export record for this task/recipient"
1039 ~exists()
1040 .select_from(
1041 ExportedTask.__table__.join(
1042 ExportRecipient.__table__,
1043 ExportedTask.recipient_id == ExportRecipient.id,
1044 )
1045 )
1046 .where(
1047 and_(
1048 ExportRecipient.recipient_name == r.recipient_name,
1049 ExportedTask.basetable == TaskIndexEntry.task_table_name,
1050 # ... don't use ".tablename" as a property doesn't play
1051 # nicely with SQLAlchemy here
1052 ExportedTask.task_server_pk == TaskIndexEntry.task_pk,
1053 ExportedTask.success == True, # noqa: E712
1054 ExportedTask.cancelled == False, # noqa: E712
1055 )
1056 )
1057 )
1058 return q
1061# noinspection PyProtectedMember
1062def encode_task_collection(coll: TaskCollection) -> Dict:
1063 """
1064 Serializes a :class:`TaskCollection`.
1066 The request is not serialized and must be rebuilt in another way; see e.g.
1067 :func:`camcops_server.cc_modules.celery.email_basic_dump`.
1068 """
1069 return {
1070 "taskfilter": dumps(coll._filter, serializer="json"),
1071 "as_dump": coll._as_dump,
1072 "sort_method_by_class": dumps(
1073 coll._sort_method_by_class, serializer="json"
1074 ),
1075 }
1078# noinspection PyUnusedLocal
1079def decode_task_collection(d: Dict, cls: Type) -> TaskCollection:
1080 """
1081 Creates a :class:`TaskCollection` from a serialized version.
1083 The request is not serialized and must be rebuilt in another way; see e.g.
1084 :func:`camcops_server.cc_modules.celery.email_basic_dump`.
1085 """
1086 kwargs = {
1087 "taskfilter": loads(*reorder_args(*d["taskfilter"])),
1088 "as_dump": d["as_dump"],
1089 "sort_method_by_class": loads(
1090 *reorder_args(*d["sort_method_by_class"])
1091 ),
1092 }
1093 return TaskCollection(req=None, **kwargs)
1096def reorder_args(
1097 content_type: str, content_encoding: str, data: str
1098) -> List[str]:
1099 """
1100 kombu :func:`SerializerRegistry.dumps` returns data as last element in
1101 tuple but for :func:`SerializeRegistry.loads` it's the first argument
1102 """
1103 return [data, content_type, content_encoding]
1106register_class_for_json(
1107 cls=TaskCollection,
1108 obj_to_dict_fn=encode_task_collection,
1109 dict_to_obj_fn=decode_task_collection,
1110)