Coverage for cc_modules/cc_dump.py: 22%
223 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-08 23:14 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-08 23:14 +0000
1#!/usr/bin/env python
3"""
4camcops_server/cc_modules/cc_dump.py
6===============================================================================
8 Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
9 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
11 This file is part of CamCOPS.
13 CamCOPS is free software: you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 CamCOPS is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
26===============================================================================
28**Methods for providing a dump of data from the server to the web user.**
30"""
32import logging
33from typing import (
34 Any,
35 Dict,
36 Generator,
37 Iterable,
38 List,
39 Optional,
40 Set,
41 Tuple,
42 Type,
43 TYPE_CHECKING,
44 Union,
45)
47from cardinal_pythonlib.logs import BraceStyleAdapter
48from cardinal_pythonlib.sqlalchemy.orm_inspect import (
49 gen_columns,
50 gen_orm_classes_from_base,
51 walk_orm_tree,
52)
53from sqlalchemy.exc import CompileError
54from sqlalchemy.engine.base import Engine
55from sqlalchemy.orm import Session as SqlASession
56from sqlalchemy.sql.schema import Column, MetaData, Table
58from camcops_server.cc_modules.cc_blob import Blob
59from camcops_server.cc_modules.cc_db import (
60 GenericTabletRecordMixin,
61 TaskDescendant,
62)
63from camcops_server.cc_modules.cc_device import Device
64from camcops_server.cc_modules.cc_email import Email
65from camcops_server.cc_modules.cc_exportmodels import (
66 ExportedTask,
67 ExportedTaskEmail,
68 ExportedTaskFileGroup,
69 ExportedTaskHL7Message,
70)
71from camcops_server.cc_modules.cc_exportrecipient import ExportRecipient
72from camcops_server.cc_modules.cc_group import Group, group_group_table
73from camcops_server.cc_modules.cc_membership import UserGroupMembership
74from camcops_server.cc_modules.cc_patient import Patient
75from camcops_server.cc_modules.cc_patientidnum import (
76 all_extra_id_columns,
77 PatientIdNum,
78)
79from camcops_server.cc_modules.cc_sqla_coltypes import CamcopsColumn
80from camcops_server.cc_modules.cc_task import Task
81from camcops_server.cc_modules.cc_user import User
83if TYPE_CHECKING:
84 from camcops_server.cc_modules.cc_request import CamcopsRequest
85 from camcops_server.cc_modules.cc_summaryelement import ExtraSummaryTable
86 from camcops_server.cc_modules.cc_simpleobjects import TaskExportOptions
88log = BraceStyleAdapter(logging.getLogger(__name__))
91# =============================================================================
92# Constants
93# =============================================================================
95# Restrict specified tables to certain columns only:
96DUMP_ONLY_COLNAMES = { # mapping of tablename : list_of_column_names
97 Device.__tablename__: ["camcops_version", "friendly_name", "id", "name"],
98 User.__tablename__: ["fullname", "id", "username"],
99}
100# Drop specific columns from certain tables:
101DUMP_DROP_COLNAMES = {} # mapping of tablename : list_of_column_names
102# List of columns to be skipped regardless of table:
103DUMP_SKIP_COLNAMES = [
104 # We restrict to current records only, so many of these are irrelevant:
105 "_addition_pending",
106 "_forcibly_preserved",
107 "_manually_erased",
108 "_manually_erased_at",
109 "_manually_erasing_user_id",
110 "_move_off_tablet",
111 "_removal_pending",
112 "_removing_user_id",
113 "_successor_pk",
114 "_when_removed_batch_utc",
115 "_when_removed_exact",
116]
117DUMP_SKIP_RELNAMES = [
118 # List of *relationship* names to ignore
119 "_manually_erasing_user",
120 "_removing_user",
121]
122# List of table names to be skipped at all times:
123DUMP_SKIP_TABLES = [
124 # We don't have to list all admin tables here; we process the dump starting
125 # with tasks, so only things that have ORM relationships to a task might
126 # feature. (The Email/ExportedTask* set don't, so this is just caution in
127 # case we add a relationship later!)
128 Email.__tablename__,
129 ExportedTask.__tablename__,
130 ExportedTaskEmail.__tablename__,
131 ExportedTaskFileGroup.__tablename__,
132 ExportedTaskHL7Message.__tablename__,
133 ExportRecipient.__tablename__,
134 group_group_table.name,
135 UserGroupMembership.__tablename__,
136]
137# Tables for which no relationships will be traversed:
138DUMP_SKIP_ALL_RELS_FOR_TABLES = [Group.__tablename__]
139FOREIGN_KEY_CONSTRAINTS_IN_DUMP = False
140# ... the keys will be present, but should we try to enforce constraints?
143# =============================================================================
144# Handy place to hold the controlling information
145# =============================================================================
148class DumpController(object):
149 """
150 A controller class that manages the copying (dumping) of information from
151 our database to another SQLAlchemy :class:`Engine`/:class:`Session`.
152 """
154 def __init__(
155 self,
156 dst_engine: Engine,
157 dst_session: SqlASession,
158 export_options: "TaskExportOptions",
159 req: "CamcopsRequest",
160 ) -> None:
161 """
162 Args:
163 dst_engine: destination SQLAlchemy Engine
164 dst_session: destination SQLAlchemy Session
165 export_options: :class:`camcops_server.cc_modules.cc_simpleobjects.TaskExportOptions`
166 req: :class:`camcops_server.cc_modules.cc_request.CamcopsRequest`
167 """ # noqa
168 self.dst_engine = dst_engine
169 self.dst_session = dst_session
170 self.export_options = export_options
171 self.req = req
173 # We start with blank metadata.
174 self.dst_metadata = MetaData()
175 # Tables we are inserting into the destination database:
176 self.dst_tables = {} # type: Dict[str, Table]
177 # ... note that creating a Table() for a given SQLAlchemy metadata is
178 # permitted only once, so we add to self.dst_tables as soon
179 # as we create that.
180 # Tables we've created:
181 self.tablenames_created = set() # type: Set[str]
182 # Tables we've processed, though we may ignore them:
183 self.tablenames_seen = set() # type: Set[str]
184 # ORM objects we've visited:
185 self.instances_seen = set() # type: Set[object]
187 if export_options.db_make_all_tables_even_empty:
188 self._create_all_dest_tables()
190 def _create_all_dest_tables(self) -> None:
191 """
192 Creates all tables in the destination database, even ones that may
193 not be used.
194 """
195 log.debug("Creating all destination tables...")
196 for table in self.gen_all_dest_tables():
197 self._create_dest_table(table)
198 log.debug("... all destination tables created.")
200 def gen_all_dest_tables(self) -> Generator[Table, None, None]:
201 """
202 Generates all destination tables.
203 """
204 tablenames_seen = set() # type: Set[str]
205 for cls in gen_orm_classes_from_base(
206 GenericTabletRecordMixin
207 ): # type: Type[GenericTabletRecordMixin] # noqa
208 instance = cls()
209 for table in self.gen_all_dest_tables_for_obj(instance):
210 if table.name in tablenames_seen:
211 continue
212 tablenames_seen.add(table.name)
213 yield table
215 def gen_all_dest_tables_for_obj(
216 self, src_obj: object
217 ) -> Generator[Table, None, None]:
218 """
219 Generates all destination tables for an object.
220 """
221 # Main table
222 yield self.get_dest_table_for_src_object(src_obj)
223 # Additional tables
224 if isinstance(src_obj, Task):
225 add_extra_id_cols = (
226 self.export_options.db_patient_id_in_each_row
227 and not src_obj.is_anonymous
228 )
229 estables = src_obj.get_all_summary_tables(self.req)
230 for est in estables:
231 yield self.get_dest_table_for_est(
232 est, add_extra_id_cols=add_extra_id_cols
233 )
235 def gen_all_dest_columns(
236 self,
237 ) -> Generator[Union[Column, CamcopsColumn], None, None]:
238 """
239 Generates all destination columns.
240 """
241 for table in self.gen_all_dest_tables():
242 for col in table.columns:
243 yield col
245 def consider_object(self, src_obj: object) -> None:
246 """
247 Think about an SQLAlchemy ORM object. If it comes from a table we
248 want dumped, add this object to the dump.
249 """
250 # noinspection PyUnresolvedReferences
251 src_table = src_obj.__table__ # type: Table
252 src_tablename = src_table.name
253 if src_tablename not in self.tablenames_seen:
254 # If we encounter a table we've not seen, offer our "table decider"
255 # the opportunity to add it to the metadata and create the table.
256 self._add_dump_table_for_src_object(src_obj)
257 # If this table is going into the destination, copy the object
258 # (and maybe remove columns from it, or add columns to it).
259 if src_tablename in self.dst_tables and not self._dump_skip_table(
260 src_tablename
261 ):
262 self._copy_object_to_dump(src_obj)
264 @staticmethod
265 def _merits_extra_id_num_columns(
266 obj: object,
267 ) -> Tuple[bool, Optional[Patient]]:
268 """
269 Is the source object one that would support the addition of extra
270 ID number information if the export option ``DB_PATIENT_ID_PER_ROW`` is
271 set? If so, return the relevant patient.
273 Args:
274 obj: an SQLAlchemy ORM object
276 Returns:
277 tuple: ``(merits, patient)``, where ``merits`` is a ``bool`` (does
278 it merit this?) and ``patient`` is a relevant
279 :class:`camcops_server.cc_modules.cc_patient.Patient``, if found.
280 It is also guaranteed that if a patient is returned, ``merits`` is
281 ``True`` (but not guaranteed that if ``merits`` is true, that
282 ``patient`` is not ``None``).
284 """
285 if not isinstance(obj, GenericTabletRecordMixin):
286 # Must be data that originated from the client.
287 return False, None
288 if isinstance(obj, PatientIdNum):
289 # PatientIdNum already has this info.
290 return False, None
291 if isinstance(obj, Patient):
292 return True, obj
293 if isinstance(obj, Task):
294 if obj.is_anonymous:
295 # Anonymous tasks don't.
296 return False, None
297 return True, obj.patient
298 if isinstance(obj, TaskDescendant):
299 merits = obj.task_ancestor_might_have_patient()
300 patient = obj.task_ancestor_patient()
301 return merits, patient
302 log.warning(
303 f"_merits_extra_id_num_columns_if_requested: don't know "
304 f"how to handle {obj!r}"
305 )
306 return False, None
308 def get_dest_table_for_src_object(self, src_obj: object) -> Table:
309 """
310 Produces the destination table for the source object.
312 Args:
313 src_obj:
314 An SQLAlchemy ORM object. It will *not* be a
315 :class:`camcops_server.cc_modules.cc_summaryelement.ExtraSummaryTable`;
316 those are handled instead by
317 :meth:`_get_or_insert_summary_table`.
319 Returns:
320 an SQLAlchemy :class:`Table`
321 """
322 # noinspection PyUnresolvedReferences
323 src_table = src_obj.__table__ # type: Table
324 tablename = src_table.name
326 # Don't create it twice in the SQLAlchemy metadata.
327 if tablename in self.dst_tables:
328 return self.dst_tables[tablename]
330 # Copy columns, dropping any we don't want, and dropping FK constraints
331 dst_columns = [] # type: List[Column]
332 for src_column in src_table.columns:
333 # log.debug("trying {!r}", src_column.name)
334 if self._dump_skip_column(tablename, src_column.name):
335 # log.debug("... skipping {!r}", src_column.name)
336 continue
337 # You can't add the source column directly; you get
338 # "sqlalchemy.exc.ArgumentError: Column object 'ccc' already
339 # assigned to Table 'ttt'"
340 copied_column = src_column.copy()
341 copied_column.comment = src_column.comment
342 # ... see SQLAlchemy trivial bug:
343 # https://bitbucket.org/zzzeek/sqlalchemy/issues/4087/columncopy-doesnt-copy-comment-attribute # noqa
344 if FOREIGN_KEY_CONSTRAINTS_IN_DUMP:
345 copied_column.foreign_keys = set(
346 fk.copy() for fk in src_column.foreign_keys
347 )
348 log.warning(
349 "NOT WORKING: foreign key commands not being " "emitted"
350 )
351 # but
352 # https://docs.sqlalchemy.org/en/latest/core/constraints.html
353 # works fine under SQLite, even if the other table hasn't been
354 # created yet. Does the table to which the FK refer have to be
355 # in the metadata already?
356 # That's quite possible, but I've not checked.
357 # Would need to iterate through tables in dependency order,
358 # like merge_db() does.
359 else:
360 # Probably blank already, as the copy() command only copies
361 # non-constraint-bound ForeignKey objects, but to be sure:
362 copied_column.foreign_keys = set()
363 # ... type is: Set[ForeignKey]
364 # if src_column.foreign_keys:
365 # log.debug("Column {}, FKs {!r} -> {!r}", src_column.name,
366 # src_column.foreign_keys,
367 # copied_column.foreign_keys)
368 dst_columns.append(copied_column)
370 # Add extra columns?
371 if self.export_options.db_include_summaries:
372 if isinstance(src_obj, GenericTabletRecordMixin):
373 for summary_element in src_obj.get_summaries(self.req):
374 dst_columns.append(
375 CamcopsColumn(
376 summary_element.name,
377 summary_element.coltype,
378 exempt_from_anonymisation=True,
379 comment=summary_element.decorated_comment,
380 )
381 )
382 if self.export_options.db_patient_id_in_each_row:
383 merits, _ = self._merits_extra_id_num_columns(src_obj)
384 if merits:
385 dst_columns.extend(all_extra_id_columns(self.req))
386 if isinstance(src_obj, TaskDescendant):
387 dst_columns += src_obj.extra_task_xref_columns()
389 dst_table = Table(tablename, self.dst_metadata, *dst_columns)
390 # ... that modifies the metadata, so:
391 self.dst_tables[tablename] = dst_table
392 return dst_table
394 def get_dest_table_for_est(
395 self, est: "ExtraSummaryTable", add_extra_id_cols: bool = False
396 ) -> Table:
397 """
398 Add an additional summary table to the dump, if it's not there already.
399 Return the table (from the destination database).
401 Args:
402 est:
403 a
404 :class:`camcops_server.cc_modules.cc_summaryelement.ExtraSummaryTable`
405 add_extra_id_cols:
406 Add extra ID columns, for the ``DB_PATIENT_ID_PER_ROW``
407 export option?
408 """ # noqa
409 tablename = est.tablename
410 if tablename in self.dst_tables:
411 return self.dst_tables[tablename]
413 columns = est.columns.copy()
414 if add_extra_id_cols:
415 columns.extend(all_extra_id_columns(self.req))
416 columns.extend(est.extra_task_xref_columns())
417 table = Table(tablename, self.dst_metadata, *columns)
418 # ... that modifies the metadata, so:
419 self.dst_tables[tablename] = table
420 return table
422 def _add_dump_table_for_src_object(self, src_obj: object) -> None:
423 """
424 - Mark the object's table as seen.
426 - If we want it, add it to the metadata and execute a CREATE TABLE
427 command.
429 - We may translate the table en route.
431 Args:
432 src_obj:
433 An SQLAlchemy ORM object. It will *not* be a
434 :class:`camcops_server.cc_modules.cc_summaryelement.ExtraSummaryTable`;
435 those are handled instead by
436 :meth:`_get_or_insert_summary_table`.
437 """ # noqa
438 # noinspection PyUnresolvedReferences
439 src_table = src_obj.__table__ # type: Table
440 tablename = src_table.name
441 self.tablenames_seen.add(tablename)
443 # Skip the table?
444 if self._dump_skip_table(tablename):
445 return
447 # Get the table definition
448 dst_table = self.get_dest_table_for_src_object(src_obj)
449 # Create it
450 self._create_dest_table(dst_table)
452 def _create_dest_table(self, dst_table: Table) -> None:
453 """
454 Creates a table in the destination database.
455 """
456 tablename = dst_table.name
457 if tablename in self.tablenames_created:
458 return # don't create it twice
459 # Create the table
460 # log.debug("Adding table {!r} to dump output", tablename)
461 # You have to use an engine, not a session, to create tables (or you
462 # get "AttributeError: 'Session' object has no attribute
463 # '_run_visitor'").
464 # However, you have to commit the session, or you get
465 # "sqlalchemy.exc.OperationalError: (sqlite3.OperationalError)
466 # database is locked", since a session is also being used.
467 self.dst_session.commit()
468 dst_table.create(self.dst_engine)
469 self.tablenames_created.add(tablename)
471 def _copy_object_to_dump(self, src_obj: object) -> None:
472 """
473 Copy the SQLAlchemy ORM object to the dump.
474 """
475 # noinspection PyUnresolvedReferences
476 src_table = src_obj.__table__ # type: Table
477 adding_extra_ids = False
478 patient = None # type: Optional[Patient]
479 if self.export_options.db_patient_id_in_each_row:
480 adding_extra_ids, patient = self._merits_extra_id_num_columns(
481 src_obj
482 )
484 # 1. Insert row for this object, potentially adding and removing
485 # columns.
486 tablename = src_table.name
487 dst_table = self.dst_tables[tablename]
488 assert dst_table.name == tablename
489 row = {} # type: Dict[str, Any]
490 # Copy columns, skipping any we don't want
491 for attrname, column in gen_columns(src_obj):
492 if self._dump_skip_column(tablename, column.name):
493 continue
494 row[column.name] = getattr(src_obj, attrname)
495 # Any other columns to add for this table?
496 if isinstance(src_obj, GenericTabletRecordMixin):
497 if self.export_options.db_include_summaries:
498 for summary_element in src_obj.get_summaries(self.req):
499 row[summary_element.name] = summary_element.value
500 if adding_extra_ids:
501 if patient:
502 patient.add_extra_idnum_info_to_row(row)
503 if isinstance(src_obj, TaskDescendant):
504 src_obj.add_extra_task_xref_info_to_row(row)
505 try:
506 self.dst_session.execute(dst_table.insert(row))
507 except CompileError:
508 log.critical("\ndst_table:\n{}\nrow:\n{}", dst_table, row)
509 raise
511 # 2. If required, add extra tables/rows that this task wants to
512 # offer (usually tables whose rows don't have a 1:1 correspondence
513 # to the task or its ancillary objects).
514 if isinstance(src_obj, Task):
515 estables = src_obj.get_all_summary_tables(self.req)
516 # ... includes SNOMED
517 for est in estables:
518 dst_summary_table = self._get_or_insert_summary_table(
519 est, add_extra_id_cols=adding_extra_ids
520 )
521 for row in est.rows:
522 if patient:
523 patient.add_extra_idnum_info_to_row(row)
524 if adding_extra_ids:
525 est.add_extra_task_xref_info_to_row(row)
526 try:
527 self.dst_session.execute(dst_summary_table.insert(row))
528 except CompileError:
529 log.critical(
530 "\ndst_summary_table:\n{}\nrow:\n{}",
531 dst_table,
532 row,
533 )
534 raise
536 def _get_or_insert_summary_table(
537 self, est: "ExtraSummaryTable", add_extra_id_cols: bool = False
538 ) -> Table:
539 """
540 Add an additional summary table to the dump, if it's not there already.
541 Return the table (from the destination database).
543 Args:
544 est:
545 a
546 :class:`camcops_server.cc_modules.cc_summaryelement.ExtraSummaryTable`
547 add_extra_id_cols:
548 Add extra ID columns, for the ``DB_PATIENT_ID_PER_ROW``
549 export option?
550 """ # noqa
551 tablename = est.tablename
552 if tablename not in self.tablenames_created:
553 table = self.get_dest_table_for_est(
554 est, add_extra_id_cols=add_extra_id_cols
555 )
556 self._create_dest_table(table)
557 return self.dst_tables[tablename]
559 def _dump_skip_table(self, tablename: str) -> bool:
560 """
561 Should we skip this table (omit it from the dump)?
562 """
563 if (
564 not self.export_options.include_blobs
565 and tablename == Blob.__tablename__
566 ):
567 return True
568 if tablename in DUMP_SKIP_TABLES:
569 return True
570 return False
572 @staticmethod
573 def _dump_skip_column(tablename: str, columnname: str) -> bool:
574 """
575 Should we skip this column (omit it from the dump)?
576 """
577 if columnname in DUMP_SKIP_COLNAMES:
578 return True
579 if (
580 tablename in DUMP_ONLY_COLNAMES
581 and columnname not in DUMP_ONLY_COLNAMES[tablename]
582 ):
583 return True
584 if (
585 tablename in DUMP_DROP_COLNAMES
586 and columnname in DUMP_DROP_COLNAMES[tablename]
587 ):
588 return True
589 return False
592# =============================================================================
593# Copying stuff to a dump
594# =============================================================================
597def copy_tasks_and_summaries(
598 tasks: Iterable[Task],
599 dst_engine: Engine,
600 dst_session: SqlASession,
601 export_options: "TaskExportOptions",
602 req: "CamcopsRequest",
603) -> None:
604 """
605 Copy a set of tasks, and their associated related information (found by
606 walking the SQLAlchemy ORM tree), to the dump.
608 Args:
609 tasks: tasks to copy
610 dst_engine: destination SQLAlchemy Engine
611 dst_session: destination SQLAlchemy Session
612 export_options: :class:`camcops_server.cc_modules.cc_simpleobjects.TaskExportOptions`
613 req: :class:`camcops_server.cc_modules.cc_request.CamcopsRequest`
614 """ # noqa
615 # How best to create the structure that's required?
616 #
617 # https://stackoverflow.com/questions/21770829/sqlalchemy-copy-schema-and-data-of-subquery-to-another-database # noqa
618 # https://stackoverflow.com/questions/40155340/sqlalchemy-reflect-and-copy-only-subset-of-existing-schema # noqa
619 #
620 # - Should we attempt to copy the MetaData object? That seems extremely
621 # laborious, since every ORM class is tied to it. Moreover,
622 # MetaData.tables is an immutabledict, so we're not going to be editing
623 # anything. Even if we cloned the MetaData, that's not going to give us
624 # ORM classes to walk.
625 # - Shall we operate at a lower level? That seems sensible.
626 # - Given that... we don't need to translate the PKs at all, unlike
627 # merge_db.
628 # - Let's not create FK constraints explicitly. Most are not achievable
629 # anyway (e.g. linking on device/era; omission of BLOBs).
631 controller = DumpController(
632 dst_engine=dst_engine,
633 dst_session=dst_session,
634 export_options=export_options,
635 req=req,
636 )
638 # We walk through all the objects.
639 log.debug("Starting to copy tasks...")
640 for startobj in tasks:
641 log.debug("Processing task: {!r}", startobj)
642 for src_obj in walk_orm_tree(
643 startobj,
644 seen=controller.instances_seen,
645 skip_relationships_always=DUMP_SKIP_RELNAMES,
646 skip_all_relationships_for_tablenames=DUMP_SKIP_ALL_RELS_FOR_TABLES, # noqa
647 skip_all_objects_for_tablenames=DUMP_SKIP_TABLES,
648 ):
649 controller.consider_object(src_obj)
650 log.debug("... finished copying tasks.")