Coverage for cc_modules/cc_taskcollection.py: 20%

377 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-08 23:14 +0000

1#!/usr/bin/env python 

2 

3""" 

4camcops_server/cc_modules/cc_taskcollection.py 

5 

6=============================================================================== 

7 

8 Copyright (C) 2012, University of Cambridge, Department of Psychiatry. 

9 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

10 

11 This file is part of CamCOPS. 

12 

13 CamCOPS is free software: you can redistribute it and/or modify 

14 it under the terms of the GNU General Public License as published by 

15 the Free Software Foundation, either version 3 of the License, or 

16 (at your option) any later version. 

17 

18 CamCOPS is distributed in the hope that it will be useful, 

19 but WITHOUT ANY WARRANTY; without even the implied warranty of 

20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

21 GNU General Public License for more details. 

22 

23 You should have received a copy of the GNU General Public License 

24 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>. 

25 

26=============================================================================== 

27 

28**Classes to fetch tasks from the database as efficiently as possible.** 

29 

30""" 

31 

32from collections import OrderedDict 

33import datetime 

34from enum import Enum 

35import logging 

36from threading import Thread 

37from typing import ( 

38 Dict, 

39 Generator, 

40 List, 

41 Optional, 

42 Tuple, 

43 Type, 

44 TYPE_CHECKING, 

45 Union, 

46) 

47 

48from cardinal_pythonlib.json.serialize import ( 

49 register_class_for_json, 

50 register_enum_for_json, 

51) 

52from cardinal_pythonlib.logs import BraceStyleAdapter 

53from cardinal_pythonlib.reprfunc import auto_repr, auto_str 

54from cardinal_pythonlib.sort import MINTYPE_SINGLETON, MinType 

55from kombu.serialization import dumps, loads 

56from pendulum import DateTime as Pendulum 

57from sqlalchemy.orm import Query 

58from sqlalchemy.orm.session import Session as SqlASession 

59from sqlalchemy.sql.functions import func 

60from sqlalchemy.sql.expression import and_, exists, or_ 

61 

62from camcops_server.cc_modules.cc_constants import ERA_NOW 

63from camcops_server.cc_modules.cc_exportrecipient import ExportRecipient 

64from camcops_server.cc_modules.cc_task import ( 

65 tablename_to_task_class_dict, 

66 Task, 

67) 

68from camcops_server.cc_modules.cc_taskfactory import ( 

69 task_query_restricted_to_permitted_users, 

70) 

71from camcops_server.cc_modules.cc_taskfilter import TaskFilter 

72from camcops_server.cc_modules.cc_taskindex import TaskIndexEntry 

73 

74if TYPE_CHECKING: 

75 from sqlalchemy.sql.elements import ClauseElement, ColumnElement 

76 from camcops_server.cc_modules.cc_request import CamcopsRequest 

77 

78log = BraceStyleAdapter(logging.getLogger(__name__)) 

79 

80 

81# ============================================================================= 

82# Debugging options 

83# ============================================================================= 

84 

85DEBUG_QUERY_TIMING = False 

86 

87if DEBUG_QUERY_TIMING: 

88 log.warning("Debugging options enabled!") 

89 

90 

91# ============================================================================= 

92# Sorting helpers 

93# ============================================================================= 

94 

95 

96def task_when_created_sorter( 

97 task: Task, 

98) -> Union[Tuple[Pendulum, datetime.datetime], MinType]: 

99 """ 

100 Function to sort tasks by their creation date/time (with upload date/time 

101 as a tiebreak for consistent ordering). 

102 """ 

103 # For sorting of tasks 

104 created = task.when_created 

105 # noinspection PyProtectedMember 

106 uploaded = task._when_added_batch_utc 

107 return MINTYPE_SINGLETON if created is None else (created, uploaded) 

108 

109 

110@register_enum_for_json 

111class TaskSortMethod(Enum): 

112 """ 

113 Enum representing ways to sort tasks. 

114 """ 

115 

116 NONE = 0 

117 CREATION_DATE_ASC = 1 

118 CREATION_DATE_DESC = 2 

119 

120 

121def sort_tasks_in_place( 

122 tasklist: List[Task], sortmethod: TaskSortMethod 

123) -> None: 

124 """ 

125 Sort a list of tasks, in place, according to ``sortmethod``. 

126 

127 Args: 

128 tasklist: the list of tasks 

129 sortmethod: a :class:`TaskSortMethod` enum 

130 """ 

131 # Sort? 

132 if sortmethod == TaskSortMethod.CREATION_DATE_ASC: 

133 tasklist.sort(key=task_when_created_sorter) 

134 elif sortmethod == TaskSortMethod.CREATION_DATE_DESC: 

135 tasklist.sort(key=task_when_created_sorter, reverse=True) 

136 

137 

138# ============================================================================= 

139# Parallel fetch helper 

140# ============================================================================= 

141# - Why consider a parallel fetch? 

142# Because a typical fetch might involve 27ms per query (as seen by Python; 

143# less as seen by MySQL) but about 100 queries, for a not-very-large 

144# database. 

145# - Initially UNSUCCESSFUL: even after tweaking pool_size=0 in create_engine() 

146# to get round the SQLAlchemy error "QueuePool limit of size 5 overflow 10 

147# reached", in the parallel code, a great many queries are launched, but then 

148# something goes wrong and others are started but then block -- for ages -- 

149# waiting for a spare database connection, or something. 

150# - Fixed that: I was not explicitly closing the sessions. 

151# - But then a major conceptual problem: anything to be lazy-loaded (e.g. 

152# patient, but also patient ID, special note, BLOB...) will give this sort of 

153# error: "DetachedInstanceError: Parent instance <Phq9 at 0x7fe6cce2d278> is 

154# not bound to a Session; lazy load operation of attribute 'patient' cannot 

155# proceed" -- for obvious reasons. And some of those operations are only 

156# required on the final paginated task set, which requires aggregation across 

157# all tasks. 

158# 

159# HOWEVER, the query time per table drops from ~27ms to 4-8ms if we disable 

160# eager loading (lazy="joined") of patients from tasks. 

161 

162 

163class FetchThread(Thread): 

164 """ 

165 Thread to fetch tasks in parallel. 

166 

167 CURRENTLY UNUSED. 

168 """ 

169 

170 def __init__( 

171 self, 

172 req: "CamcopsRequest", 

173 task_class: Type[Task], 

174 factory: "TaskCollection", 

175 **kwargs 

176 ) -> None: 

177 self.req = req 

178 self.task_class = task_class 

179 self.factory = factory 

180 self.error = False 

181 name = task_class.__tablename__ 

182 super().__init__(name=name, target=None, **kwargs) 

183 

184 def run(self) -> None: 

185 log.debug("Thread starting") 

186 dbsession = self.req.get_bare_dbsession() 

187 # noinspection PyBroadException 

188 try: 

189 # noinspection PyProtectedMember 

190 q = self.factory._make_query(dbsession, self.task_class) 

191 if q: 

192 tasks = q.all() # type: List[Task] 

193 # https://stackoverflow.com/questions/6319207/are-lists-thread-safe # noqa 

194 # https://stackoverflow.com/questions/6953351/thread-safety-in-pythons-dictionary # noqa 

195 # http://effbot.org/pyfaq/what-kinds-of-global-value-mutation-are-thread-safe.htm # noqa 

196 # noinspection PyProtectedMember 

197 self.factory._tasks_by_class[self.task_class] = tasks 

198 log.debug("Thread finishing with results") 

199 else: 

200 log.debug("Thread finishing without results") 

201 except Exception: 

202 self.error = True 

203 log.error("Thread error") 

204 dbsession.close() 

205 

206 

207# ============================================================================= 

208# Make a set of tasks, deferring work until things are needed 

209# ============================================================================= 

210 

211 

212class TaskCollection(object): 

213 """ 

214 Represent a potential or instantiated call to fetch tasks from the 

215 database. 

216 

217 The caller may want them in a giant list (e.g. task viewer, CTVs), or split 

218 by task class (e.g. trackers). 

219 """ 

220 

221 def __init__( 

222 self, 

223 req: Optional["CamcopsRequest"], 

224 taskfilter: TaskFilter = None, 

225 as_dump: bool = False, 

226 sort_method_by_class: TaskSortMethod = TaskSortMethod.NONE, 

227 sort_method_global: TaskSortMethod = TaskSortMethod.NONE, 

228 current_only: bool = True, 

229 via_index: bool = True, 

230 export_recipient: "ExportRecipient" = None, 

231 ) -> None: 

232 """ 

233 Args: 

234 req: 

235 The 

236 :class:`camcops_server.cc_modules.cc_request.CamcopsRequest`. 

237 ``None`` should only be used as a parameter when serializing 

238 a :class:`TaskCollection` to the back-end. 

239 taskfilter: 

240 A :class:`camcops_server.cc_modules.cc_taskfilter.TaskFilter` 

241 object that contains any restrictions we may want to apply. 

242 Must be supplied unless supplying ``export_recipient`` (in 

243 which case, must not be supplied). 

244 as_dump: 

245 Use the "dump" permissions rather than the "view" permissions? 

246 sort_method_by_class: 

247 How should we sort tasks within each task class? 

248 sort_method_global: 

249 How should we sort tasks overall (across all task types)? 

250 current_only: 

251 Restrict to ``_current`` tasks only? 

252 via_index: 

253 Use the server's index (faster)? (Not possible with 

254 ``current_only=False``.) 

255 export_recipient: 

256 A :class:`camcops_server.cc_modules.cc_exportrecipient.ExportRecipient` 

257 """ # noqa 

258 if via_index and not current_only: 

259 log.warning("Can't use index for non-current tasks") 

260 via_index = False 

261 

262 self._req = req 

263 self._filter = taskfilter 

264 self._as_dump = as_dump 

265 self._sort_method_by_class = sort_method_by_class 

266 self._sort_method_global = sort_method_global 

267 self._current_only = current_only 

268 self._via_index = via_index 

269 self.export_recipient = export_recipient 

270 

271 if export_recipient: 

272 # We create a new filter to reflect the export recipient. 

273 assert ( 

274 self._filter is None 

275 ), "Can't supply taskfilter if you supply export_recipient" 

276 # We can do lots of what we need with a TaskFilter(). 

277 self._filter = TaskFilter() 

278 if not export_recipient.all_groups: 

279 self._filter.group_ids = export_recipient.group_ids 

280 self._filter.task_types = export_recipient.tasks 

281 self._filter.start_datetime = export_recipient.start_datetime_utc 

282 self._filter.end_datetime = export_recipient.end_datetime_utc 

283 self._filter.finalized_only = export_recipient.finalized_only 

284 self._filter.tasks_with_patient_only = ( 

285 not export_recipient.anonymous_ok() 

286 ) 

287 self._filter.must_have_idnum_type = export_recipient.primary_idnum 

288 else: 

289 assert ( 

290 self._filter 

291 ), "Must supply taskfilter unless you supply export_recipient" 

292 

293 self._tasks_by_class = ( 

294 OrderedDict() 

295 ) # type: Dict[Type[Task], List[Task]] # noqa 

296 self._all_tasks = None # type: Optional[List[Task]] 

297 self._all_indexes = ( 

298 None 

299 ) # type: Optional[Union[List[TaskIndexEntry], Query]] # noqa 

300 

301 def __repr__(self) -> str: 

302 return auto_repr(self) 

303 

304 def __str__(self) -> str: 

305 return auto_str(self) 

306 

307 # ========================================================================= 

308 # Interface to read 

309 # ========================================================================= 

310 

311 @property 

312 def req(self) -> "CamcopsRequest": 

313 """ 

314 Returns the associated request, or raises :exc:`AssertionError` if it's 

315 not been set. 

316 """ 

317 assert ( 

318 self._req is not None 

319 ), "Must initialize with a request or call set_request() first" 

320 return self._req 

321 

322 def set_request(self, req: "CamcopsRequest") -> None: 

323 """ 

324 Sets the request object manually. Used by Celery back-end tasks. 

325 

326 Args: 

327 req: a :class:`camcops_server.cc_modules.cc_request.CamcopsRequest` 

328 """ 

329 self._req = req 

330 

331 def task_classes(self) -> List[Type[Task]]: 

332 """ 

333 Return a list of task classes that we want. 

334 """ 

335 return self._filter.task_classes 

336 

337 def tasks_for_task_class(self, task_class: Type[Task]) -> List[Task]: 

338 """ 

339 Returns all appropriate task instances for a specific task type. 

340 """ 

341 if self._via_index: 

342 self._ensure_everything_fetched_via_index() 

343 else: 

344 self._fetch_task_class(task_class) 

345 tasklist = self._tasks_by_class.get(task_class, []) 

346 return tasklist 

347 

348 @property 

349 def all_tasks(self) -> List[Task]: 

350 """ 

351 Returns a list of all appropriate task instances. 

352 """ 

353 if self._all_tasks is None: 

354 if self._via_index: 

355 self._ensure_everything_fetched_via_index() 

356 else: 

357 self._fetch_all_tasks_without_index() 

358 return self._all_tasks 

359 

360 @property 

361 def all_tasks_or_indexes_or_query( 

362 self, 

363 ) -> Union[List[Task], List[TaskIndexEntry], Query]: 

364 """ 

365 Returns a list of all appropriate task instances, or index entries, or 

366 a query returning them. 

367 

368 - Returning a list of tasks is fine, but the results of this function 

369 may be paginated (e.g. in the main task view), so the end result may 

370 be that e.g. 20,000 tasks are fetched and 20 are shown. 

371 - More efficient is to fetch 20,000 indexes from the single index 

372 table, and fetch only the 20 tasks we need. 

373 - More efficient still is to fetch the 20 indexes we need, and then 

374 their task. 

375 """ 

376 if not self._via_index: 

377 return self.all_tasks 

378 

379 self._build_index_query() # ensure self._all_indexes is set 

380 

381 if self._all_tasks is not None: 

382 # The tasks themselves have been fetched. 

383 return self._all_tasks 

384 

385 return self._all_indexes # indexes or a query to fetch them 

386 

387 # def forget_task_class(self, task_class: Type[Task]) -> None: 

388 # """ 

389 # Ditch results for a specific task class (for memory efficiency). 

390 # """ 

391 # self._tasks_by_class.pop(task_class, None) 

392 # # The "None" option prevents it from raising KeyError if the key 

393 # # doesn't exist. 

394 # # https://stackoverflow.com/questions/11277432/how-to-remove-a-key-from-a-python-dictionary # noqa 

395 

396 def gen_all_tasks_or_indexes( 

397 self, 

398 ) -> Generator[Union[Task, TaskIndexEntry], None, None]: 

399 """ 

400 Generates tasks or index entries. 

401 """ 

402 tasks_or_indexes_or_query = self.all_tasks_or_indexes_or_query 

403 if isinstance(tasks_or_indexes_or_query, Query): 

404 for item in tasks_or_indexes_or_query.all(): 

405 yield item 

406 else: 

407 for item in tasks_or_indexes_or_query: 

408 yield item 

409 

410 def gen_tasks_by_class(self) -> Generator[Task, None, None]: 

411 """ 

412 Generates all tasks, class-wise. 

413 """ 

414 for cls in self.task_classes(): 

415 for task in self.tasks_for_task_class(cls): 

416 yield task 

417 

418 def gen_tasks_in_global_order(self) -> Generator[Task, None, None]: 

419 """ 

420 Generates all tasks, in the global order. 

421 """ 

422 for task in self.all_tasks: 

423 yield task 

424 

425 @property 

426 def dbsession(self) -> SqlASession: 

427 """ 

428 Returns the request's database session. 

429 """ 

430 return self.req.dbsession 

431 

432 # ========================================================================= 

433 # Internals: fetching Task objects 

434 # ========================================================================= 

435 

436 def _fetch_all_tasks_without_index(self, parallel: bool = False) -> None: 

437 """ 

438 Fetch all tasks from the database. 

439 """ 

440 

441 # AVOID parallel=True; see notes above. 

442 if DEBUG_QUERY_TIMING: 

443 start_time = Pendulum.now() 

444 

445 if parallel: 

446 # Deprecated parallel fetch 

447 threads = [] # type: List[FetchThread] 

448 for task_class in self._filter.task_classes: 

449 thread = FetchThread(self.req, task_class, self) 

450 thread.start() 

451 threads.append(thread) 

452 for thread in threads: 

453 thread.join() 

454 if thread.error: 

455 raise ValueError("Multithreaded fetch failed") 

456 

457 else: 

458 # Fetch all tasks, classwise. 

459 for task_class in self._filter.task_classes: 

460 self._fetch_task_class(task_class) 

461 

462 if DEBUG_QUERY_TIMING: 

463 end_time = Pendulum.now() 

464 # noinspection PyUnboundLocalVariable 

465 time_taken = end_time - start_time 

466 log.info("_fetch_all_tasks took {}", time_taken) 

467 

468 # Build our joint task list 

469 self._all_tasks = [] # type: List[Task] 

470 for single_task_list in self._tasks_by_class.values(): 

471 self._all_tasks += single_task_list 

472 sort_tasks_in_place(self._all_tasks, self._sort_method_global) 

473 

474 def _fetch_task_class(self, task_class: Type[Task]) -> None: 

475 """ 

476 Fetch tasks from the database for one task type. 

477 """ 

478 if task_class in self._tasks_by_class: 

479 return # already fetched 

480 q = self._serial_query(task_class) 

481 if q is None: 

482 newtasks = [] # type: List[Task] 

483 else: 

484 newtasks = q.all() # type: List[Task] 

485 # Apply Python-side filters? 

486 newtasks = self._filter_through_python(newtasks) 

487 sort_tasks_in_place(newtasks, self._sort_method_by_class) 

488 self._tasks_by_class[task_class] = newtasks 

489 

490 def _serial_query(self, task_class: Type[Task]) -> Optional[Query]: 

491 """ 

492 Make and return an SQLAlchemy ORM query for a specific task class. 

493 

494 Returns ``None`` if no tasks would match our criteria. 

495 """ 

496 dbsession = self.req.dbsession 

497 return self._make_query(dbsession, task_class) 

498 

499 def _make_query( 

500 self, dbsession: SqlASession, task_class: Type[Task] 

501 ) -> Optional[Query]: 

502 """ 

503 Make and return an SQLAlchemy ORM query for a specific task class. 

504 

505 Returns ``None`` if no tasks would match our criteria. 

506 """ 

507 q = dbsession.query(task_class) 

508 

509 # Restrict to what the web front end will supply 

510 # noinspection PyProtectedMember 

511 if self._current_only: 

512 # noinspection PyProtectedMember 

513 q = q.filter(task_class._current == True) # noqa: E712 

514 

515 # Restrict to what is PERMITTED 

516 q = task_query_restricted_to_permitted_users( 

517 self.req, q, task_class, as_dump=self._as_dump 

518 ) 

519 

520 # Restrict to what is DESIRED 

521 if q: 

522 q = self._task_query_restricted_by_filter(q, task_class) 

523 if q and self.export_recipient: 

524 q = self._task_query_restricted_by_export_recipient(q, task_class) 

525 

526 return q 

527 

528 def _task_query_restricted_by_filter( 

529 self, q: Query, cls: Type[Task] 

530 ) -> Optional[Query]: 

531 """ 

532 Restricts an SQLAlchemy ORM query for a given task class to those 

533 tasks that our filter permits. 

534 

535 THIS IS A KEY SECURITY FUNCTION, since it implements some permissions 

536 that relate to viewing tasks when unfiltered. 

537 

538 Args: 

539 q: the starting SQLAlchemy ORM Query 

540 cls: the task class 

541 

542 Returns: 

543 the original query, a modified query, or ``None`` if no tasks 

544 would pass the filter 

545 

546 """ 

547 tf = self._filter # task filter 

548 user = self.req.user 

549 

550 if tf.group_ids: 

551 permitted_group_ids = tf.group_ids.copy() 

552 else: 

553 permitted_group_ids = None # unrestricted 

554 

555 if tf.dates_inconsistent(): 

556 return None 

557 

558 if cls not in tf.task_classes: 

559 # We don't want this task 

560 return None 

561 

562 if not cls.is_anonymous: 

563 # Not anonymous. 

564 if not tf.any_specific_patient_filtering(): 

565 # No patient filtering. Permissions depend on user settings. 

566 if user.may_view_all_patients_when_unfiltered: 

567 # May see everything. No restrictions. 

568 pass 

569 elif user.may_view_no_patients_when_unfiltered: 

570 # Can't see patient data from any group. 

571 # (a) User not permitted to view any patients when 

572 # unfiltered, and (b) not filtered to a level that would 

573 # reasonably restrict to one or a small number of 

574 # patients. Skip the task class. 

575 return None 

576 else: 

577 # May see patient data from some, but not all, groups. 

578 liberal_group_ids = ( 

579 user.group_ids_nonsuperuser_may_see_when_unfiltered() 

580 ) 

581 if not permitted_group_ids: # was unrestricted 

582 permitted_group_ids = liberal_group_ids 

583 else: # was restricted; restrict further 

584 permitted_group_ids = [ 

585 gid 

586 for gid in permitted_group_ids 

587 if gid in liberal_group_ids 

588 ] 

589 if not permitted_group_ids: 

590 return None # down to zero; no point continuing 

591 

592 # Patient filtering 

593 if tf.any_patient_filtering(): 

594 # q = q.join(Patient) # fails 

595 q = q.join( 

596 cls.patient 

597 ) # use explicitly configured relationship # noqa 

598 q = tf.filter_query_by_patient(q, via_index=False) 

599 

600 # Patient-independent filtering 

601 

602 if tf.device_ids: 

603 # noinspection PyProtectedMember 

604 q = q.filter(cls._device_id.in_(tf.device_ids)) 

605 

606 if tf.era: 

607 # noinspection PyProtectedMember 

608 q = q.filter(cls._era == tf.era) 

609 if tf.finalized_only: 

610 q = q.filter(cls._era != ERA_NOW) 

611 

612 if tf.adding_user_ids: 

613 # noinspection PyProtectedMember 

614 q = q.filter(cls._adding_user_id.in_(tf.adding_user_ids)) 

615 

616 if permitted_group_ids: 

617 # noinspection PyProtectedMember 

618 q = q.filter(cls._group_id.in_(permitted_group_ids)) 

619 

620 if tf.start_datetime is not None: 

621 q = q.filter(cls.when_created >= tf.start_datetime) 

622 if tf.end_datetime is not None: 

623 q = q.filter(cls.when_created < tf.end_datetime) 

624 

625 q = self._filter_query_for_text_contents(q, cls) 

626 

627 return q 

628 

629 def _task_query_restricted_by_export_recipient( 

630 self, q: Query, cls: Type[Task] 

631 ) -> Optional[Query]: 

632 """ 

633 For exports. 

634 

635 Filters via our 

636 :class:`camcops_server.cc_modules.cc_exportrecipient.ExportRecipient`, 

637 except for the bits already implemented via our 

638 :class:`camcops_server.cc_modules.cc_taskfilter.TaskFilter`. 

639 

640 The main job here is for incremental exports: to find tasks that have 

641 not yet been exported. We look for any tasks not yet exported to a 

642 recipient of the same name (regardless of ``ExportRecipient.id``, which 

643 changes when the export recipient is reconfigured). 

644 

645 Compare :meth:`_index_query_restricted_by_export_recipient`. 

646 

647 Args: 

648 q: the starting SQLAlchemy ORM Query 

649 cls: the task class 

650 

651 Returns: 

652 the original query, a modified query, or ``None`` if no tasks 

653 would pass the filter 

654 """ 

655 from camcops_server.cc_modules.cc_exportmodels import ( 

656 ExportedTask, 

657 ) # delayed import 

658 

659 r = self.export_recipient 

660 if not r.is_incremental(): 

661 # Full database export; no restrictions 

662 return q 

663 # Otherwise, restrict to tasks not yet sent to this recipient. 

664 # noinspection PyUnresolvedReferences 

665 q = q.filter( 

666 # "There is not a successful export record for this task/recipient" 

667 ~exists() 

668 .select_from( 

669 ExportedTask.__table__.join( 

670 ExportRecipient.__table__, 

671 ExportedTask.recipient_id == ExportRecipient.id, 

672 ) 

673 ) 

674 .where( 

675 and_( 

676 ExportRecipient.recipient_name == r.recipient_name, 

677 ExportedTask.basetable == cls.__tablename__, 

678 ExportedTask.task_server_pk == cls._pk, 

679 ExportedTask.success == True, # noqa: E712 

680 ExportedTask.cancelled == False, # noqa: E712 

681 ) 

682 ) 

683 ) 

684 return q 

685 

686 def _filter_through_python(self, tasks: List[Task]) -> List[Task]: 

687 """ 

688 Returns those tasks in the list provided that pass any Python-only 

689 aspects of our filter (those parts not easily calculable via SQL). 

690 

691 This applies to the "direct" (and not "via index") routes only. With 

692 the index, we can do everything via SQL. 

693 """ 

694 assert not self._via_index 

695 if not self._has_python_parts_to_filter(): 

696 return tasks 

697 return [ 

698 t for t in tasks if self._task_matches_python_parts_of_filter(t) 

699 ] 

700 

701 def _has_python_parts_to_filter(self) -> bool: 

702 """ 

703 Does the filter have aspects to it that require some Python thought, 

704 not just a database query? 

705 

706 Only applicable to the direct (not "via index") route. 

707 """ 

708 assert not self._via_index 

709 return self._filter.complete_only 

710 

711 def _task_matches_python_parts_of_filter(self, task: Task) -> bool: 

712 """ 

713 Does the task pass the Python parts of the filter? 

714 

715 Only applicable to the direct (not "via index") route. 

716 """ 

717 assert not self._via_index 

718 

719 # "Is task complete" filter 

720 if self._filter.complete_only: 

721 if not task.is_complete(): 

722 return False 

723 

724 return True 

725 

726 # ========================================================================= 

727 # Shared between Task and TaskIndexEntry methods 

728 # ========================================================================= 

729 

730 def _filter_query_for_text_contents( 

731 self, q: Query, taskclass: Type[Task] 

732 ) -> Optional[Query]: 

733 """ 

734 Returns the query, filtered for the "text contents" filter. 

735 

736 Args: 

737 q: the starting SQLAlchemy ORM Query 

738 taskclass: the task class 

739 

740 Returns: 

741 a Query, potentially modified. 

742 """ 

743 tf = self._filter # task filter 

744 

745 if not tf.text_contents: 

746 return q # unmodified 

747 

748 # task must contain ALL the strings in AT LEAST ONE text column 

749 textcols = taskclass.get_text_filter_columns() 

750 if not textcols: 

751 # Text filtering requested, but there are no text columns, so 

752 # by definition the filter must fail. 

753 return None 

754 clauses_over_text_phrases = [] # type: List[ColumnElement] 

755 # ... each e.g. "col1 LIKE '%paracetamol%' OR col2 LIKE '%paracetamol%'" # noqa 

756 for textfilter in tf.text_contents: 

757 tf_lower = textfilter.lower() 

758 clauses_over_columns = [] # type: List[ColumnElement] 

759 # ... each e.g. "col1 LIKE '%paracetamol%'" 

760 for textcol in textcols: 

761 # Case-insensitive comparison: 

762 # https://groups.google.com/forum/#!topic/sqlalchemy/331XoToT4lk 

763 # https://bitbucket.org/zzzeek/sqlalchemy/wiki/UsageRecipes/StringComparisonFilter # noqa 

764 clauses_over_columns.append( 

765 func.lower(textcol).contains(tf_lower, autoescape=True) 

766 ) 

767 clauses_over_text_phrases.append(or_(*clauses_over_columns)) 

768 return q.filter(and_(*clauses_over_text_phrases)) 

769 # ... thus, e.g. 

770 # "(col1 LIKE '%paracetamol%' OR col2 LIKE '%paracetamol%') AND 

771 # (col1 LIKE '%overdose%' OR col2 LIKE '%overdose%') 

772 

773 # ========================================================================= 

774 # Internals: fetching TaskIndexEntry objects 

775 # ========================================================================= 

776 

777 def _ensure_everything_fetched_via_index(self) -> None: 

778 """ 

779 Ensure we have all our tasks loaded, using the index. 

780 """ 

781 self._build_index_query() 

782 self._fetch_tasks_from_indexes() 

783 

784 def _build_index_query(self) -> None: 

785 """ 

786 Creates a Query in :attr:`_all_indexes` that will fetch task indexes. 

787 If the task filtering requires the tasks to be fetched (i.e. text 

788 contents), fetch the actual tasks too (and filter them). 

789 """ 

790 if self._all_indexes is not None: 

791 return 

792 self._all_indexes = self._make_index_query() 

793 if self._filter.text_contents: 

794 self._fetch_tasks_from_indexes() 

795 

796 def _fetch_tasks_from_indexes(self) -> None: 

797 """ 

798 Takes the query that has already been stored in :attr:`_all_indexes`, 

799 and populate the task attributes, :attr:`_all_tasks` and 

800 :attr:`_tasks_by_class`. 

801 """ 

802 if self._all_tasks is not None: 

803 return 

804 assert self._all_indexes is not None 

805 

806 d = tablename_to_task_class_dict() 

807 dbsession = self.req.dbsession 

808 self._all_tasks = [] # type: List[Task] 

809 

810 # Fetch indexes 

811 if isinstance(self._all_indexes, Query): 

812 # Query built, but indexes not yet fetched. 

813 # Replace the query with actual indexes 

814 self._all_indexes = ( 

815 self._all_indexes.all() 

816 ) # type: List[TaskIndexEntry] # noqa 

817 indexes = self._all_indexes 

818 

819 # Fetch tasks 

820 tablenames = set(index.task_table_name for index in indexes) 

821 for tablename in tablenames: 

822 # We do this by task class, so we can execute a single query per 

823 # task type (rather than per task). 

824 try: 

825 taskclass = d[tablename] 

826 except KeyError: 

827 log.warning("Bad tablename in index: {!r}", tablename) 

828 continue 

829 tasklist = self._tasks_by_class.setdefault(taskclass, []) 

830 task_pks = [i.task_pk for i in indexes if i.tablename == tablename] 

831 # noinspection PyProtectedMember 

832 qtask = dbsession.query(taskclass).filter( 

833 taskclass._pk.in_(task_pks) 

834 ) 

835 qtask = self._filter_query_for_text_contents(qtask, taskclass) 

836 tasks = qtask.all() # type: List[Task] 

837 for task in tasks: 

838 tasklist.append(task) 

839 self._all_tasks.append(task) 

840 

841 # Sort tasks 

842 for tasklist in self._tasks_by_class.values(): 

843 sort_tasks_in_place(tasklist, self._sort_method_by_class) 

844 sort_tasks_in_place(self._all_tasks, self._sort_method_global) 

845 

846 def _make_index_query(self) -> Optional[Query]: 

847 """ 

848 Make and return an SQLAlchemy ORM query to retrieve indexes. 

849 

850 Returns ``None`` if no tasks would match our criteria. 

851 """ 

852 dbsession = self.req.dbsession 

853 q = dbsession.query(TaskIndexEntry) 

854 

855 # Restrict to what the web front end will supply 

856 assert self._current_only, "_current_only must be true to use index" 

857 

858 # Restrict to what is PERMITTED 

859 if not self.export_recipient: 

860 q = task_query_restricted_to_permitted_users( 

861 self.req, q, TaskIndexEntry, as_dump=self._as_dump 

862 ) 

863 

864 # Restrict to what is DESIRED 

865 if q: 

866 q = self._index_query_restricted_by_filter(q) 

867 if q and self.export_recipient: 

868 q = self._index_query_restricted_by_export_recipient(q) 

869 

870 return q 

871 

872 def _index_query_restricted_by_filter(self, q: Query) -> Optional[Query]: 

873 """ 

874 Counterpart to :func:`_task_query_restricted_by_filter`, but for 

875 indexes. 

876 

877 THIS IS A KEY SECURITY FUNCTION, since it implements some permissions 

878 that relate to viewing tasks when unfiltered. 

879 

880 Args: 

881 q: the starting SQLAlchemy ORM Query 

882 

883 Returns: 

884 the original query, a modified query, or ``None`` if no tasks 

885 would pass the filter 

886 

887 """ 

888 tf = self._filter # task filter 

889 user = self.req.user 

890 

891 if tf.group_ids: 

892 permitted_group_ids = tf.group_ids.copy() 

893 else: 

894 permitted_group_ids = None # unrestricted 

895 

896 if tf.dates_inconsistent(): 

897 return None 

898 

899 # Task type filtering 

900 

901 if tf.skip_anonymous_tasks(): 

902 # noinspection PyPep8 

903 q = q.filter(TaskIndexEntry.patient_pk != None) # noqa: E711 

904 

905 if not tf.offers_all_non_anonymous_task_types(): 

906 permitted_task_tablenames = [ 

907 tc.__tablename__ for tc in tf.task_classes 

908 ] 

909 q = q.filter( 

910 TaskIndexEntry.task_table_name.in_(permitted_task_tablenames) 

911 ) 

912 

913 # Special rules when we've not filtered for any patients 

914 

915 if not tf.any_specific_patient_filtering(): 

916 # No patient filtering. Permissions depend on user settings. 

917 if user.may_view_all_patients_when_unfiltered: 

918 # May see everything. No restrictions. 

919 pass 

920 elif user.may_view_no_patients_when_unfiltered: 

921 # Can't see patient data from any group. 

922 # (a) User not permitted to view any patients when 

923 # unfiltered, and (b) not filtered to a level that would 

924 # reasonably restrict to one or a small number of 

925 # patients. Restrict to anonymous tasks. 

926 # noinspection PyPep8 

927 q = q.filter(TaskIndexEntry.patient_pk == None) # noqa: E711 

928 else: 

929 # May see patient data from some, but not all, groups. 

930 # This is a little more complex than the equivalent in 

931 # _task_query_restricted_by_filter(), because we shouldn't 

932 # restrict anonymous tasks. 

933 liberal_group_ids = ( 

934 user.group_ids_nonsuperuser_may_see_when_unfiltered() 

935 ) 

936 # noinspection PyPep8 

937 liberal_or_anon_criteria = [ 

938 TaskIndexEntry.patient_pk 

939 == None # noqa: E711 

940 # anonymous OK 

941 ] # type: List[ClauseElement] 

942 for gid in liberal_group_ids: 

943 liberal_or_anon_criteria.append( 

944 TaskIndexEntry.group_id == gid # this group OK 

945 ) 

946 q = q.filter(or_(*liberal_or_anon_criteria)) 

947 

948 # Patient filtering 

949 

950 if tf.any_patient_filtering(): 

951 q = q.join(TaskIndexEntry.patient) # use relationship 

952 q = tf.filter_query_by_patient(q, via_index=True) 

953 

954 # Patient-independent filtering 

955 

956 if tf.device_ids: 

957 # noinspection PyProtectedMember 

958 q = q.filter(TaskIndexEntry.device_id.in_(tf.device_ids)) 

959 

960 if tf.era: 

961 # noinspection PyProtectedMember 

962 q = q.filter(TaskIndexEntry.era == tf.era) 

963 if tf.finalized_only: 

964 q = q.filter(TaskIndexEntry.era != ERA_NOW) 

965 

966 if tf.adding_user_ids: 

967 # noinspection PyProtectedMember 

968 q = q.filter(TaskIndexEntry.adding_user_id.in_(tf.adding_user_ids)) 

969 

970 if permitted_group_ids: 

971 # noinspection PyProtectedMember 

972 q = q.filter(TaskIndexEntry.group_id.in_(permitted_group_ids)) 

973 

974 if tf.start_datetime is not None: 

975 q = q.filter( 

976 TaskIndexEntry.when_created_utc >= tf.start_datetime_utc 

977 ) 

978 if tf.end_datetime is not None: 

979 q = q.filter(TaskIndexEntry.when_created_utc < tf.end_datetime_utc) 

980 

981 # text_contents is managed at the later fetch stage when using indexes 

982 

983 # But is_complete can be filtered now and in SQL: 

984 if tf.complete_only: 

985 # noinspection PyPep8 

986 q = q.filter(TaskIndexEntry.task_is_complete == True) # noqa: E712 

987 

988 # When we use indexes, we embed the global sort criteria in the query. 

989 if self._sort_method_global == TaskSortMethod.CREATION_DATE_ASC: 

990 q = q.order_by( 

991 TaskIndexEntry.when_created_utc.asc(), 

992 TaskIndexEntry.when_added_batch_utc.asc(), 

993 ) 

994 elif self._sort_method_global == TaskSortMethod.CREATION_DATE_DESC: 

995 q = q.order_by( 

996 TaskIndexEntry.when_created_utc.desc(), 

997 TaskIndexEntry.when_added_batch_utc.desc(), 

998 ) 

999 

1000 return q 

1001 

1002 def _index_query_restricted_by_export_recipient( 

1003 self, q: Query 

1004 ) -> Optional[Query]: 

1005 """ 

1006 For exports. 

1007 

1008 Filters via our 

1009 :class:`camcops_server.cc_modules.cc_exportrecipient.ExportRecipient`, 

1010 except for the bits already implemented via our 

1011 :class:`camcops_server.cc_modules.cc_taskfilter.TaskFilter`. 

1012 

1013 The main job here is for incremental exports: to find tasks that have 

1014 not yet been exported. 

1015 

1016 Compare :meth:`_task_query_restricted_by_export_recipient`. 

1017 

1018 Args: 

1019 q: the starting SQLAlchemy ORM Query 

1020 

1021 Returns: 

1022 the original query, a modified query, or ``None`` if no tasks 

1023 would pass the filter 

1024 

1025 """ 

1026 from camcops_server.cc_modules.cc_exportmodels import ( 

1027 ExportedTask, 

1028 ) # delayed import 

1029 

1030 r = self.export_recipient 

1031 if not r.is_incremental(): 

1032 # Full database export; no restrictions 

1033 return q 

1034 # Otherwise, restrict to tasks not yet sent to this recipient. 

1035 # Remember: q is a query on TaskIndexEntry. 

1036 # noinspection PyUnresolvedReferences 

1037 q = q.filter( 

1038 # "There is not a successful export record for this task/recipient" 

1039 ~exists() 

1040 .select_from( 

1041 ExportedTask.__table__.join( 

1042 ExportRecipient.__table__, 

1043 ExportedTask.recipient_id == ExportRecipient.id, 

1044 ) 

1045 ) 

1046 .where( 

1047 and_( 

1048 ExportRecipient.recipient_name == r.recipient_name, 

1049 ExportedTask.basetable == TaskIndexEntry.task_table_name, 

1050 # ... don't use ".tablename" as a property doesn't play 

1051 # nicely with SQLAlchemy here 

1052 ExportedTask.task_server_pk == TaskIndexEntry.task_pk, 

1053 ExportedTask.success == True, # noqa: E712 

1054 ExportedTask.cancelled == False, # noqa: E712 

1055 ) 

1056 ) 

1057 ) 

1058 return q 

1059 

1060 

1061# noinspection PyProtectedMember 

1062def encode_task_collection(coll: TaskCollection) -> Dict: 

1063 """ 

1064 Serializes a :class:`TaskCollection`. 

1065 

1066 The request is not serialized and must be rebuilt in another way; see e.g. 

1067 :func:`camcops_server.cc_modules.celery.email_basic_dump`. 

1068 """ 

1069 return { 

1070 "taskfilter": dumps(coll._filter, serializer="json"), 

1071 "as_dump": coll._as_dump, 

1072 "sort_method_by_class": dumps( 

1073 coll._sort_method_by_class, serializer="json" 

1074 ), 

1075 } 

1076 

1077 

1078# noinspection PyUnusedLocal 

1079def decode_task_collection(d: Dict, cls: Type) -> TaskCollection: 

1080 """ 

1081 Creates a :class:`TaskCollection` from a serialized version. 

1082 

1083 The request is not serialized and must be rebuilt in another way; see e.g. 

1084 :func:`camcops_server.cc_modules.celery.email_basic_dump`. 

1085 """ 

1086 kwargs = { 

1087 "taskfilter": loads(*reorder_args(*d["taskfilter"])), 

1088 "as_dump": d["as_dump"], 

1089 "sort_method_by_class": loads( 

1090 *reorder_args(*d["sort_method_by_class"]) 

1091 ), 

1092 } 

1093 return TaskCollection(req=None, **kwargs) 

1094 

1095 

1096def reorder_args( 

1097 content_type: str, content_encoding: str, data: str 

1098) -> List[str]: 

1099 """ 

1100 kombu :func:`SerializerRegistry.dumps` returns data as last element in 

1101 tuple but for :func:`SerializeRegistry.loads` it's the first argument 

1102 """ 

1103 return [data, content_type, content_encoding] 

1104 

1105 

1106register_class_for_json( 

1107 cls=TaskCollection, 

1108 obj_to_dict_fn=encode_task_collection, 

1109 dict_to_obj_fn=decode_task_collection, 

1110)