Coverage for cc_modules/merge_db.py: 19%
277 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-08 23:14 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-08 23:14 +0000
1#!/usr/bin/env python
3"""
4camcops_server/cc_modules/merge_db.py
6===============================================================================
8 Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
9 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
11 This file is part of CamCOPS.
13 CamCOPS is free software: you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 CamCOPS is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
26===============================================================================
28**Tool to merge data from one CamCOPS database into another.**
30Has special code to deal with old databases.
32"""
34import logging
35from pprint import pformat
36from typing import Any, cast, Dict, List, Optional, Type, TYPE_CHECKING
38from cardinal_pythonlib.logs import BraceStyleAdapter
39from cardinal_pythonlib.sqlalchemy.merge_db import merge_db, TranslationContext
40from cardinal_pythonlib.sqlalchemy.schema import get_table_names
41from cardinal_pythonlib.sqlalchemy.session import get_safe_url_from_engine
42from cardinal_pythonlib.sqlalchemy.table_identity import TableIdentity
43from sqlalchemy.engine import create_engine
44from sqlalchemy.engine.base import Engine
45from sqlalchemy.orm.exc import MultipleResultsFound, NoResultFound
46from sqlalchemy.orm.session import Session
47from sqlalchemy.sql.expression import column, func, select, table, text
49from camcops_server.cc_modules.cc_audit import AuditEntry
50from camcops_server.cc_modules.cc_constants import (
51 FP_ID_NUM,
52 NUMBER_OF_IDNUMS_DEFUNCT,
53)
54from camcops_server.cc_modules.cc_db import GenericTabletRecordMixin
55from camcops_server.cc_modules.cc_device import Device
56from camcops_server.cc_modules.cc_dirtytables import DirtyTable
57from camcops_server.cc_modules.cc_email import Email
58from camcops_server.cc_modules.cc_exportmodels import (
59 ExportedTask,
60 ExportedTaskEmail,
61 ExportedTaskFileGroup,
62 ExportedTaskHL7Message,
63)
64from camcops_server.cc_modules.cc_exportrecipient import ExportRecipient
65from camcops_server.cc_modules.cc_group import Group, group_group_table
66from camcops_server.cc_modules.cc_idnumdef import IdNumDefinition
67from camcops_server.cc_modules.cc_membership import UserGroupMembership
68from camcops_server.cc_modules.cc_patient import Patient
69from camcops_server.cc_modules.cc_patientidnum import (
70 fake_tablet_id_for_patientidnum,
71 PatientIdNum,
72)
73from camcops_server.cc_modules.cc_request import get_command_line_request
74from camcops_server.cc_modules.cc_session import CamcopsSession
75from camcops_server.cc_modules.cc_serversettings import (
76 server_stored_var_table_defunct,
77 ServerSettings,
78 ServerStoredVarNamesDefunct,
79)
80from camcops_server.cc_modules.cc_sqlalchemy import Base
81from camcops_server.cc_modules.cc_taskindex import reindex_everything
82from camcops_server.cc_modules.cc_user import (
83 SecurityAccountLockout,
84 SecurityLoginFailure,
85 User,
86)
88if TYPE_CHECKING:
89 from sqlalchemy.engine.result import ResultProxy
91log = BraceStyleAdapter(logging.getLogger(__name__))
93DEBUG_VIA_PDB = False
96# =============================================================================
97# Information relating to the source database
98# =============================================================================
101def get_skip_tables(src_tables: List[str]) -> List[TableIdentity]:
102 """
103 From the list of source table names provided, return details of tables in
104 the metadata to skip because they are not in the source database.
106 Also checks that some core CamCOPS tables are present in the source, or
107 raises :exc:`ValueError`.
109 Args:
110 src_tables: list of all table names in the source database
112 Returns:
113 list of
114 :class:`cardinal_pythonlib.sqlalchemy.table_identity.TableIdentity`
115 objects representing tables to skip
117 Note that other tables to skip are defined in :func:`merge_camcops_db`.
119 """
120 skip_tables = [] # type: List[TableIdentity]
122 # Check we have some core tables present in the sources
124 for tname in (Patient.__tablename__, User.__tablename__):
125 if tname not in src_tables:
126 raise ValueError(
127 f"Cannot proceed; table {tname!r} missing from source; "
128 f"unlikely that the source is any sort of old CamCOPS "
129 f"database!"
130 )
132 # In general, we allow missing source tables.
133 # However, we can't allow source tables to be missing if they are
134 # automatically eager-loaded by relationships. This is only true in
135 # CamCOPS for some high-performance queries: Patient, User,
136 # PatientIdNum. In the context of merges we're going to run, that means
137 # PatientIdNum.
139 # SKIP -- disable eager loading instead
140 # # Create patient ID number table in SOURCE database, because it's
141 # # eager-loaded
142 # if PatientIdNum.__tablename__ not in src_tables:
143 # create_table_from_orm_class(engine=src_engine,
144 # ormclass=PatientIdNum,
145 # without_constraints=True)
147 if Group.__tablename__ not in src_tables:
148 log.warning(
149 "No Group information in source database; skipping source "
150 "table {!r}; will create a default group",
151 Group.__tablename__,
152 )
153 skip_tables.append(TableIdentity(tablename=Group.__tablename__))
155 return skip_tables
158def get_src_iddefs(
159 src_engine: Engine, src_tables: List[str]
160) -> Dict[int, IdNumDefinition]:
161 """
162 Get information about all the ID number definitions in the source database.
164 Args:
165 src_engine: source SQLAlchemy :class:`Engine`
166 src_tables: list of all table names in the source database
168 Returns:
169 dictionary: ``{which_idnum: idnumdef}`` mappings, where each
170 ``idnumdef`` is a
171 :class:`camcops_server.cc_modules.cc_idnumdef.IdNumDefinition` not
172 attached to any database session
173 """
174 iddefs = {} # type: Dict[int, IdNumDefinition]
175 if IdNumDefinition.__tablename__ in src_tables:
176 # Source is a more modern CamCOPS database, with an IdNumDefinition
177 # table.
178 log.info(
179 "Fetching source ID number definitions from {!r} table",
180 IdNumDefinition.__tablename__,
181 )
182 # noinspection PyUnresolvedReferences
183 q = (
184 select(
185 [
186 IdNumDefinition.which_idnum,
187 IdNumDefinition.description,
188 IdNumDefinition.short_description,
189 ]
190 )
191 .select_from(IdNumDefinition.__table__)
192 .order_by(IdNumDefinition.which_idnum)
193 )
194 rows = src_engine.execute(q).fetchall()
195 for row in rows:
196 which_idnum = row[0]
197 iddefs[which_idnum] = IdNumDefinition(
198 which_idnum=which_idnum,
199 description=row[1],
200 short_description=row[2],
201 )
202 elif server_stored_var_table_defunct.name in src_tables:
203 # Source is an older CamCOPS database.
204 log.info(
205 "Fetching source ID number definitions from {!r} table",
206 server_stored_var_table_defunct.name,
207 )
208 for which_idnum in range(1, NUMBER_OF_IDNUMS_DEFUNCT + 1):
209 nstr = str(which_idnum)
210 qd = (
211 select([server_stored_var_table_defunct.columns.valueText])
212 .select_from(server_stored_var_table_defunct)
213 .where(
214 server_stored_var_table_defunct.columns.name
215 == ServerStoredVarNamesDefunct.ID_DESCRIPTION_PREFIX + nstr
216 )
217 )
218 rd = src_engine.execute(qd).fetchall()
219 qs = (
220 select([server_stored_var_table_defunct.columns.valueText])
221 .select_from(server_stored_var_table_defunct)
222 .where(
223 server_stored_var_table_defunct.columns.name
224 == ServerStoredVarNamesDefunct.ID_SHORT_DESCRIPTION_PREFIX
225 + nstr
226 )
227 )
228 rs = src_engine.execute(qs).fetchall()
229 iddefs[which_idnum] = IdNumDefinition(
230 which_idnum=which_idnum,
231 description=rd[0][0] if rd else None,
232 short_description=rs[0][0] if rs else None,
233 )
234 else:
235 log.warning(
236 "No information available on source ID number " "descriptions"
237 )
238 return iddefs
241# =============================================================================
242# Information relating to the destination database
243# =============================================================================
246def group_exists(group_id: int, dst_session: Session) -> bool:
247 """
248 Does a group exist in the destination session with the specified group ID?
250 Args:
251 group_id: integer group ID
252 dst_session: destination SQLAlchemy :class:`Session`
253 """
254 return Group.group_exists(dbsession=dst_session, group_id=group_id)
257def fetch_group_id_by_name(group_name: str, dst_session: Session) -> int:
258 """
259 Returns the group ID of the group with the specified name, in the
260 destination session.
262 If there are multiple such groups, that's a bug, and
263 :exc:`MultipleResultsFound` will be raised.
265 If there's no such group in the destination database with that name, one
266 will be created, and its ID returned.
268 Args:
269 group_name: group name
270 dst_session: destination SQLAlchemy :class:`Session`
272 Returns:
273 group ID in the destination database
275 """
276 try:
277 group = (
278 dst_session.query(Group).filter(Group.name == group_name).one()
279 ) # type: Group
280 # ... will fail if there are 0 or >1 results
281 except MultipleResultsFound:
282 log.critical(
283 "Nasty bug: can't have two groups with the same name! "
284 "Group name was {!r}",
285 group_name,
286 )
287 raise
288 except NoResultFound:
289 log.info("Creating new group named {!r}", group_name)
290 group = Group()
291 group.name = group_name
292 dst_session.add(group)
293 flush_session(dst_session) # creates the PK
294 # https://stackoverflow.com/questions/1316952/sqlalchemy-flush-and-get-inserted-id # noqa
295 log.info("... new group has ID {!r}", group.id)
296 return group.id
299def get_dst_group(dest_groupnum: int, dst_session: Session) -> Group:
300 """
301 Ensures that the specified group number exists in the destination database
302 and returns the corresponding group.
304 Args:
305 dest_groupnum: group number
306 dst_session: SQLAlchemy session for the destination database
308 Returns:
309 the group
311 Raises:
312 :exc:`ValueError` upon failure
313 """
314 try:
315 group = (
316 dst_session.query(Group).filter(Group.id == dest_groupnum).one()
317 ) # type: Group
318 # ... will fail if there are 0 or >1 results
319 except MultipleResultsFound:
320 log.critical(
321 "Nasty bug: can't have two groups with the same ID! "
322 "Group ID was {!r}",
323 dest_groupnum,
324 )
325 raise
326 except NoResultFound:
327 raise ValueError(
328 f"Group with ID {dest_groupnum} missing from "
329 f"destination database"
330 )
331 return group
334def ensure_dest_iddef_exists(
335 which_idnum: int, dst_session: Session
336) -> IdNumDefinition:
337 """
338 Ensures that the specified ID number type exists in the destination
339 database.
341 Args:
342 which_idnum: ID number type
343 dst_session: SQLAlchemy session for the destination database
345 Raises:
346 :exc:`ValueError` upon failure
347 """
348 try:
349 iddef = (
350 dst_session.query(IdNumDefinition)
351 .filter(IdNumDefinition.which_idnum == which_idnum)
352 .one()
353 ) # type: IdNumDefinition
354 # ... will fail if there are 0 or >1 results
355 except MultipleResultsFound:
356 log.critical(
357 "Nasty bug: can't have two ID number types with the same "
358 "which_idnum! which_idnum was {!r}",
359 which_idnum,
360 )
361 raise
362 except NoResultFound:
363 raise ValueError(
364 f"ID number type with which_idnum={which_idnum} "
365 f"missing from destination database"
366 )
367 return iddef
370def get_dst_iddef(
371 dst_session: Session, which_idnum: int
372) -> Optional[IdNumDefinition]:
373 """
374 Fetches an ID number definition from the destination database, ensuring it
375 exists.
377 Args:
378 dst_session: destination SQLAlchemy :class:`Session`
379 which_idnum: integer expressing which ID number type to look up
381 Returns:
382 an :class:`camcops_server.cc_modules.cc_idnumdef.IdNumDefinition`, or
383 ``None`` if none was found
385 """
386 return (
387 dst_session.query(IdNumDefinition)
388 .filter(IdNumDefinition.which_idnum == which_idnum)
389 .first()
390 )
393# =============================================================================
394# Extra translation to be applied to individual objects
395# =============================================================================
396# The extra logic for this database:
399def flush_session(dst_session: Session) -> None:
400 """
401 Flushes the destination SQLAlchemy session.
402 """
403 log.debug("Flushing session")
404 dst_session.flush()
407def ensure_default_group_id(trcon: TranslationContext) -> None:
408 """
409 Ensure that the :class:`TranslationContext` has a ``default_group_id``
410 key in its ``info`` dictionary. This is the ID, in the destination
411 database, of the group to put records in where those records come from
412 an older, pre-group-based CamCOPS database.
414 The user may have specified that ``default_group_id` on the command line.
415 Otherwise, they may have specified a ``default_group_name``, so we'll use
416 the ID of that group (creating it if necessary). If they specified neither,
417 we will raise an :exc:`AssertionError`, because we have come to a
418 situation where we need one or the other.
420 Args:
421 trcon: the :class:`TranslationContext`
423 """
424 default_group_id = trcon.info["default_group_id"] # type: Optional[int]
425 if default_group_id is not None:
426 # The user specified a group ID to use for records without one
427 assert group_exists(
428 group_id=default_group_id, dst_session=trcon.dst_session
429 ), (
430 "User specified default_group_id={!r}, and object {!r} needs "
431 "a _group_id (directly or indirectly), but that ID doesn't exist "
432 "in the {!r} table of the destination database".format(
433 default_group_id, trcon.oldobj, Group.__tablename__
434 )
435 )
436 else:
437 default_group_name = trcon.info[
438 "default_group_name"
439 ] # type: Optional[str] # noqa
440 if not default_group_name:
441 assert False, (
442 "User specified neither default_group_id or "
443 "default_group_name, but object {!r} needs a "
444 "_group_id, directly or indirectly".format(trcon.oldobj)
445 )
446 default_group_id = fetch_group_id_by_name(
447 group_name=default_group_name, dst_session=trcon.dst_session
448 )
449 trcon.info["default_group_id"] = default_group_id # for next time!
452'''
453# SUPERSEDED BY MORE CONSERVATIVE MECHANISM, 2019-03-05
455def ensure_idnumdef(trcon: TranslationContext,
456 which_idnum: int) -> IdNumDefinition:
457 """
458 Ensure that the destination database contains an ID number definition with
459 the same ``which_idnum`` as in the source database, or create one.
461 If an ID number definition with that ``which_idnum`` was present in the
462 source and the destination, ensure they don't clash (i.e. ensure that they
463 represent the same sort of ID number).
465 Args:
466 trcon: the :class:`TranslationContext`
467 which_idnum: integer expressing which ID number type to look up
469 Returns:
470 the :class:`camcops_server.cc_modules.cc_idnumdef.IdNumDefinition`,
471 attached to the destination database
473 """
474 dst_iddef = get_dst_iddef(trcon.dst_session, which_idnum=which_idnum)
475 src_iddefs = trcon.info['src_iddefs'] # type: Dict[int, IdNumDefinition] # noqa
476 if dst_iddef:
477 # Present in the destination
478 if which_idnum in src_iddefs.keys():
479 # Also present in the source
480 src_iddef = src_iddefs[which_idnum]
481 ensure_no_iddef_clash(src_iddef=src_iddef, dst_iddef=dst_iddef)
482 return dst_iddef
483 else:
484 # Not present in the destination
485 assert which_idnum in src_iddefs.keys(), (
486 "Descriptions for ID#{} are missing from the source "
487 "database!".format(which_idnum)
488 )
489 src_iddef = src_iddefs[which_idnum]
490 new_iddef = IdNumDefinition(
491 which_idnum=src_iddef.which_idnum,
492 description=src_iddef.description,
493 short_description=src_iddef.short_description
494 )
495 log.info("Adding ID number definition: {!r}", new_iddef)
496 trcon.dst_session.add(new_iddef)
497 flush_session(trcon.dst_session) # required, or database FK checks fail # noqa
498 return new_iddef
499'''
502def ensure_no_iddef_clash(
503 src_iddef: IdNumDefinition, dst_iddef: IdNumDefinition
504) -> None:
505 """
506 Ensure that a given source and destination pair of ID number definitions,
507 which must match on ``which_idnum``, have the same description and short
508 description, or raise :exc:`ValueError`.
510 Args:
511 src_iddef: source
512 :class:`camcops_server.cc_modules.cc_idnumdef.IdNumDefinition`
513 dst_iddef: destination
514 :class:`camcops_server.cc_modules.cc_idnumdef.IdNumDefinition`
515 """
516 assert src_iddef.which_idnum == dst_iddef.which_idnum, (
517 "Bug: ensure_no_iddef_clash() called with IdNumDefinition objects"
518 "that don't share the same value for which_idnum (silly!)."
519 )
520 if src_iddef.description != dst_iddef.description:
521 raise ValueError(
522 "ID description mismatch for ID#{}: source {!r}, "
523 "destination {!r}".format(
524 src_iddef.which_idnum,
525 src_iddef.description,
526 dst_iddef.description,
527 )
528 )
529 if src_iddef.short_description != dst_iddef.short_description:
530 raise ValueError(
531 "ID short_description mismatch for ID#{}: source {!r}, "
532 "destination {!r}".format(
533 src_iddef.which_idnum,
534 src_iddef.short_description,
535 dst_iddef.short_description,
536 )
537 )
540def log_warning_srcobj(srcobj: Any) -> None:
541 """
542 Prints a source (old) object to the log.
544 Args:
545 srcobj: the source object
546 """
547 log.warning("Source was:\n\n{}\n\n", pformat(srcobj.__dict__))
550def get_dest_groupnum(
551 src_groupnum: int, trcon: TranslationContext, oldobj: Any
552) -> int:
553 """
554 For a given source group number, returns the corresponding destination
555 group number (validating en route).
557 Args:
558 src_groupnum: the group number in the source database
559 trcon: the :class:`TranslationContext`
560 oldobj: the source object
562 Returns:
563 the corresponding which_idnum in the destination database
565 Raises:
566 :exc:`ValueError` if bad
567 """
568 groupnum_map = trcon.info["groupnum_map"] # type: Dict[int, int]
569 if src_groupnum not in groupnum_map:
570 log_warning_srcobj(oldobj)
571 log.critical(
572 "Old database contains group number {} and equivalent "
573 "group in destination not known",
574 src_groupnum,
575 )
576 raise ValueError("Bad group mapping")
577 return groupnum_map[src_groupnum]
580def get_dest_which_idnum(
581 src_which_idnum: int, trcon: TranslationContext, oldobj: Any
582) -> int:
583 """
584 For a given source ID number type, returns the corresponding destination
585 ID number type (validating en route).
587 Args:
588 src_which_idnum: which_idnum in the source database
589 trcon: the :class:`TranslationContext`
590 oldobj: the source object
592 Returns:
593 the corresponding which_idnum in the destination database
595 Raises:
596 :exc:`ValueError` if bad
598 """
599 whichidnum_map = trcon.info["whichidnum_map"] # type: Dict[int, int]
600 if src_which_idnum not in whichidnum_map:
601 log_warning_srcobj(oldobj)
602 log.critical(
603 "Old database contains ID number definitions of type {} "
604 "and equivalent ID number type in destination not known",
605 src_which_idnum,
606 )
607 raise ValueError("Bad ID number type mapping")
608 return whichidnum_map[src_which_idnum]
611# noinspection PyProtectedMember
612def translate_fn(trcon: TranslationContext) -> None:
613 """
614 Function to translate source objects to their destination counterparts,
615 where special processing is required. Called as a callback from
616 :func:`cardinal_pythonlib.sqlalchemy.merge_db.merge_db`.
618 Args:
619 trcon: the :class:`TranslationContext`; all the relevant information is
620 in here, and our function modifies its members.
622 This function does the following things:
624 - For any records uploaded from tablets: set ``_group_id``, if it's blank.
626 - For :class:`camcops_server.cc_modules.cc_user.User` objects: if an
627 identical user is found in the destination database, merge on it rather
628 than creating a new one. Users with matching usernames are considered to
629 be identical.
631 - For :class:`Device` objects: if an identical device is found, merge on it
632 rather than creating a new one. Devices with matching names are
633 considered to be identical.
635 - For :class:`camcops_server.cc_modules.cc_group.Group` objects: if an
636 identical group is found, merge on it rather than creating a new one.
637 Groups with matching names are considered to be identical.
639 - For :class:`camcops_server.cc_modules.cc_patient.Patient` objects: if any
640 have ID numbers in the old format (as columns in the Patient table),
641 convert them to the :class:`PatientIdNum` system.
643 - If we're inserting a :class:`PatientIdNum`, make sure there is a
644 corresponding
645 :class:`camcops_server.cc_modules.cc_idnumdef.IdNumDefinition`, and that
646 it's valid.
648 - If we're merging from a more modern database with the
649 :class:`camcops_server.cc_modules.cc_idnumdef.IdNumDefinition` table,
650 check our ID number definitions don't conflict.
652 - Check we're not creating duplicates for anything uploaded.
654 """
655 log.debug("Translating object from table: {!r}", trcon.tablename)
656 oldobj = trcon.oldobj
657 newobj = trcon.newobj
658 # log.debug("Translating: {}", auto_repr(oldobj))
660 # -------------------------------------------------------------------------
661 # Set _group_id correctly for tablet records
662 # -------------------------------------------------------------------------
663 if isinstance(oldobj, GenericTabletRecordMixin):
664 if (
665 "_group_id" in trcon.missing_src_columns
666 or oldobj._group_id is None
667 ):
668 # ... order that "if" statement carefully; if the _group_id column
669 # is missing from the source, don't touch oldobj._group_id or
670 # it'll trigger a DB query that fails.
671 #
672 # Set _group_id because it's blank
673 #
674 ensure_default_group_id(trcon)
675 default_group_id = trcon.info["default_group_id"] # type: int
676 log.debug("Assiging new _group_id of {!r}", default_group_id)
677 newobj._group_id = default_group_id
678 else:
679 #
680 # Re-map _group_id
681 #
682 newobj._group_id = get_dest_groupnum(
683 oldobj._group_id, trcon, oldobj
684 )
686 # -------------------------------------------------------------------------
687 # If an identical user is found, merge on it rather than creating a new
688 # one. Users with matching usernames are considered to be identical.
689 # -------------------------------------------------------------------------
690 if trcon.tablename == User.__tablename__:
691 src_user = cast(User, oldobj)
692 src_username = src_user.username
693 matching_user = (
694 trcon.dst_session.query(User)
695 .filter(User.username == src_username)
696 .one_or_none()
697 ) # type: Optional[User]
698 if matching_user is not None:
699 log.debug(
700 "Matching User (username {!r}) found; merging",
701 matching_user.username,
702 )
703 trcon.newobj = matching_user # so that related records will work
705 # -------------------------------------------------------------------------
706 # If an identical device is found, merge on it rather than creating a
707 # new one. Devices with matching names are considered to be identical.
708 # -------------------------------------------------------------------------
709 if trcon.tablename == Device.__tablename__:
710 src_device = cast(Device, oldobj)
711 src_devicename = src_device.name
712 matching_device = (
713 trcon.dst_session.query(Device)
714 .filter(Device.name == src_devicename)
715 .one_or_none()
716 ) # type: Optional[Device]
717 if matching_device is not None:
718 log.debug(
719 "Matching Device (name {!r}) found; merging",
720 matching_device.name,
721 )
722 trcon.newobj = matching_device
724 # BUT BEWARE, BECAUSE IF YOU MERGE THE SAME DATABASE TWICE (even if
725 # that's a silly thing to do...), MERGING DEVICES WILL BREAK THE KEY
726 # RELATIONSHIPS. For example,
727 # source:
728 # pk = 1, id = 1, device = 100, era = 'NOW', current = 1
729 # dest after first merge:
730 # pk = 1, id = 1, device = 100, era = 'NOW', current = 1
731 # dest after second merge:
732 # pk = 1, id = 1, device = 100, era = 'NOW', current = 1
733 # pk = 2, id = 1, device = 100, era = 'NOW', current = 1
734 # ... so you get a clash/duplicate.
735 # Mind you, that's fair, because there is a duplicate.
736 # SO WE DO SEPARATE DUPLICATE CHECKING, below.
738 # -------------------------------------------------------------------------
739 # Don't copy Group records; the user must set these up manually and specify
740 # groupnum_map, for safety
741 # -------------------------------------------------------------------------
742 if trcon.tablename == Group.__tablename__:
743 trcon.newobj = None # don't insert this object
744 # ... don't set "newobj = None"; that wouldn't alter trcon
745 # Now make sure the map is OK:
746 src_group = cast(Group, oldobj)
747 trcon.objmap[oldobj] = get_dst_group(
748 dest_groupnum=get_dest_groupnum(src_group.id, trcon, src_group),
749 dst_session=trcon.dst_session,
750 )
752 # -------------------------------------------------------------------------
753 # If there are any patient numbers in the old format (as a set of
754 # columns in the Patient table) which were not properly converted
755 # to the new format (as individual records in the PatientIdNum
756 # table), create new entries.
757 # Only worth bothering with for _current entries.
758 # (More explicitly: do not create new PatientIdNum entries for non-current
759 # patients; it's very fiddly if there might be asynchrony between
760 # Patient and PatientIdNum objects for that patient.)
761 # -------------------------------------------------------------------------
762 if trcon.tablename == Patient.__tablename__:
763 # (a) Find old patient numbers
764 old_patient = cast(Patient, oldobj)
765 # noinspection PyUnresolvedReferences
766 src_pt_query = (
767 select([text("*")])
768 .select_from(table(trcon.tablename))
769 .where(column(Patient.id.name) == old_patient.id)
770 .where(column(Patient._current.name) == True) # noqa: E712
771 .where(column(Patient._device_id.name) == old_patient._device_id)
772 .where(column(Patient._era.name) == old_patient._era)
773 )
774 rows = trcon.src_session.execute(src_pt_query) # type: ResultProxy
775 list_of_dicts = [dict(row.items()) for row in rows]
776 assert (
777 len(list_of_dicts) == 1
778 ), "Failed to fetch old patient IDs correctly; bug?"
779 old_patient_dict = list_of_dicts[0]
781 # (b) If any don't exist in the new database, create them.
782 # -- no, that's not right; we will be processing Patient before
783 # PatientIdNum, so that should be: if any don't exist in the *source*
784 # database, create them.
785 src_tables = trcon.src_table_names
786 for src_which_idnum in range(1, NUMBER_OF_IDNUMS_DEFUNCT + 1):
787 old_fieldname = FP_ID_NUM + str(src_which_idnum)
788 idnum_value = old_patient_dict[old_fieldname]
789 if idnum_value is None:
790 # Old Patient record didn't contain this ID number
791 continue
792 # Old Patient record *did* contain the ID number...
793 if PatientIdNum.__tablename__ in src_tables:
794 # noinspection PyUnresolvedReferences
795 src_idnum_query = (
796 select([func.count()])
797 .select_from(table(PatientIdNum.__tablename__))
798 .where(
799 column(PatientIdNum.patient_id.name) == old_patient.id
800 )
801 .where(
802 column(PatientIdNum._current.name)
803 == old_patient._current
804 )
805 .where(
806 column(PatientIdNum._device_id.name)
807 == old_patient._device_id
808 )
809 .where(column(PatientIdNum._era.name) == old_patient._era)
810 .where(
811 column(PatientIdNum.which_idnum.name)
812 == src_which_idnum
813 )
814 )
815 n_present = trcon.src_session.execute(src_idnum_query).scalar()
816 # ^^^
817 # !
818 if n_present != 0:
819 # There was already a PatientIdNum for this which_idnum
820 continue
821 pidnum = PatientIdNum()
822 # PatientIdNum fields:
823 pidnum.id = fake_tablet_id_for_patientidnum(
824 patient_id=old_patient.id, which_idnum=src_which_idnum
825 )
826 # ... guarantees a pseudo client (tablet) PK
827 pidnum.patient_id = old_patient.id
828 pidnum.which_idnum = get_dest_which_idnum(
829 src_which_idnum, trcon, oldobj
830 )
831 pidnum.idnum_value = idnum_value
832 # GenericTabletRecordMixin fields:
833 # _pk: autogenerated
834 # noinspection PyUnresolvedReferences
835 pidnum._device_id = trcon.objmap[old_patient._device].id
836 pidnum._era = old_patient._era
837 pidnum._current = old_patient._current
838 pidnum._when_added_exact = old_patient._when_added_exact
839 pidnum._when_added_batch_utc = old_patient._when_added_batch_utc
840 # noinspection PyUnresolvedReferences
841 pidnum._adding_user_id = (
842 trcon.objmap[old_patient._adding_user].id
843 if old_patient._adding_user is not None
844 else None
845 )
846 pidnum._when_removed_exact = old_patient._when_removed_exact
847 pidnum._when_removed_batch_utc = (
848 old_patient._when_removed_batch_utc
849 )
850 # noinspection PyUnresolvedReferences
851 pidnum._removing_user_id = (
852 trcon.objmap[old_patient._removing_user].id
853 if old_patient._removing_user is not None
854 else None
855 )
856 # noinspection PyUnresolvedReferences
857 pidnum._preserving_user_id = (
858 trcon.objmap[old_patient._preserving_user].id
859 if old_patient._preserving_user is not None
860 else None
861 )
862 pidnum._forcibly_preserved = old_patient._forcibly_preserved
863 pidnum._predecessor_pk = None # Impossible to calculate properly
864 pidnum._successor_pk = None # Impossible to calculate properly
865 pidnum._manually_erased = old_patient._manually_erased
866 pidnum._manually_erased_at = old_patient._manually_erased_at
867 # noinspection PyUnresolvedReferences
868 pidnum._manually_erasing_user_id = (
869 trcon.objmap[old_patient._manually_erasing_user].id
870 if old_patient._manually_erasing_user is not None
871 else None
872 )
873 pidnum._camcops_version = old_patient._camcops_version
874 pidnum._addition_pending = old_patient._addition_pending
875 pidnum._removal_pending = old_patient._removal_pending
876 pidnum._group_id = newobj._group_id
877 # ... will have been set above if it was blank
879 # OK.
880 log.debug("Inserting new PatientIdNum: {}", pidnum)
881 trcon.dst_session.add(pidnum)
883 # -------------------------------------------------------------------------
884 # If we're inserting a PatientIdNum, make sure there is a corresponding
885 # IdNumDefinition, and that it's valid
886 # -------------------------------------------------------------------------
887 if trcon.tablename == PatientIdNum.__tablename__:
888 src_pidnum = cast(PatientIdNum, oldobj)
889 src_which_idnum = src_pidnum.which_idnum
890 # Is it present?
891 if src_which_idnum is None:
892 raise ValueError(f"Bad PatientIdNum: {src_pidnum!r}")
893 # Ensure the new object has an appropriate ID number FK:
894 dst_pidnum = cast(PatientIdNum, newobj)
895 dst_pidnum.which_idnum = get_dest_which_idnum(
896 src_which_idnum, trcon, oldobj
897 )
899 # -------------------------------------------------------------------------
900 # If we're merging from a more modern database with the IdNumDefinition
901 # table, skip source IdNumDefinition records; the user must set these up
902 # manually and specify whichidnum_map, for safety
903 # -------------------------------------------------------------------------
904 if trcon.tablename == IdNumDefinition.__tablename__:
905 trcon.newobj = None # don't insert this object
906 # ... don't set "newobj = None"; that wouldn't alter trcon
907 # Now make sure the map is OK:
908 src_iddef = cast(IdNumDefinition, oldobj)
909 trcon.objmap[oldobj] = get_dst_iddef(
910 which_idnum=get_dest_which_idnum(
911 src_iddef.which_idnum, trcon, src_iddef
912 ),
913 dst_session=trcon.dst_session,
914 )
916 # -------------------------------------------------------------------------
917 # Check we're not creating duplicates for anything uploaded
918 # -------------------------------------------------------------------------
919 if isinstance(oldobj, GenericTabletRecordMixin):
920 # noinspection PyTypeChecker
921 cls = newobj.__class__ # type: Type[GenericTabletRecordMixin]
922 # Records uploaded from tablets must be unique on the combination of:
923 # id = table PK
924 # _device_id = device
925 # _era = device era
926 # _when_removed_exact = removal date or NULL
927 # noinspection PyUnresolvedReferences
928 exists_query = (
929 select([func.count()])
930 .select_from(table(trcon.tablename))
931 .where(column(cls.id.name) == oldobj.id)
932 .where(
933 column(cls._device_id.name) == trcon.objmap[oldobj._device].id
934 )
935 .where(column(cls._era.name) == oldobj._era)
936 .where(
937 column(cls._when_removed_exact.name)
938 == oldobj._when_removed_exact
939 )
940 )
941 # Note re NULLs... Although it's an inconvenient truth in SQL that
942 # SELECT NULL = NULL; -- returns NULL
943 # in this code we have a comparison of a column to a Python value.
944 # SQLAlchemy is clever and renders "IS NULL" if the Python value is
945 # None, or an "=" comparison otherwise.
946 # If we were comparing a column to another column, we'd have to do
947 # more; e.g.
948 #
949 # WRONG one-to-one join to self:
950 #
951 # SELECT a._pk, b._pk, a._when_removed_exact
952 # FROM phq9 a
953 # INNER JOIN phq9 b
954 # ON a._pk = b._pk
955 # AND a._when_removed_exact = b._when_removed_exact;
956 #
957 # -- drops all rows
958 #
959 # CORRECT one-to-one join to self:
960 #
961 # SELECT a._pk, b._pk, a._when_removed_exact
962 # FROM phq9 a
963 # INNER JOIN phq9 b
964 # ON a._pk = b._pk
965 # AND (a._when_removed_exact = b._when_removed_exact
966 # OR (a._when_removed_exact IS NULL AND
967 # b._when_removed_exact IS NULL));
968 #
969 # -- returns all rows
970 n_exists = trcon.dst_session.execute(exists_query).scalar()
971 if n_exists > 0:
972 # noinspection PyUnresolvedReferences
973 existing_rec_q = (
974 select(["*"])
975 .select_from(table(trcon.tablename))
976 .where(column(cls.id.name) == oldobj.id)
977 .where(
978 column(cls._device_id.name)
979 == trcon.objmap[oldobj._device].id
980 )
981 .where(column(cls._era.name) == oldobj._era)
982 .where(
983 column(cls._when_removed_exact.name)
984 == oldobj._when_removed_exact
985 )
986 )
987 resultproxy = trcon.dst_session.execute(existing_rec_q).fetchall()
988 existing_rec = [dict(row) for row in resultproxy]
989 log.critical(
990 "Source record, inheriting from GenericTabletRecordMixin and "
991 "shown below, already exists in destination database... "
992 "in table {t!r}, clashing on: "
993 "id={i!r}, device_id={d!r}, era={e!r}, "
994 "_when_removed_exact={w!r}.\n"
995 "ARE YOU TRYING TO MERGE THE SAME DATABASE IN TWICE? "
996 "DON'T.",
997 t=trcon.tablename,
998 i=oldobj.id,
999 d=oldobj._device_id,
1000 e=oldobj._era,
1001 w=oldobj._when_removed_exact,
1002 )
1003 if trcon.tablename == PatientIdNum.__tablename__ and (
1004 oldobj.id % NUMBER_OF_IDNUMS_DEFUNCT == 0
1005 ):
1006 log.critical(
1007 "Since this error has occurred for table {t!r} "
1008 "(and for id % {n} == 0), "
1009 "this error may reflect a previous bug in the patient ID "
1010 "number fix for the database upload script, in which all "
1011 "ID numbers for patients with patient.id = n were given "
1012 "patient_idnum.id = n * {n} themselves (or possibly were "
1013 "all given patient_idnum.id = 0). "
1014 "Fix this by running, on the source database:\n\n"
1015 " UPDATE patient_idnum SET id = _pk;\n\n",
1016 t=trcon.tablename,
1017 n=NUMBER_OF_IDNUMS_DEFUNCT,
1018 )
1019 # Print the actual instance last; accessing them via pformat can
1020 # lead to crashes if there are missing source fields, as an
1021 # on-demand SELECT is executed sometimes (e.g. when a PatientIdNum
1022 # is printed, its Patient is selected, including the [user]
1023 # 'fullname' attribute that is absent in old databases).
1024 # Not a breaking point, since we're going to crash anyway, but
1025 # inelegant.
1026 # Since lazy loading (etc.) is configured at query time, the best
1027 # thing (as per Michael Bayer) is to detach the object from the
1028 # session:
1029 # https://groups.google.com/forum/#!topic/sqlalchemy/X_wA8K97smE
1030 trcon.src_session.expunge(oldobj) # prevent implicit queries
1031 # Then all should work:
1032 log_warning_srcobj(oldobj)
1033 log.critical(
1034 "Existing record(s) in destination DB was/were:\n\n" "{}\n\n",
1035 pformat(existing_rec),
1036 )
1037 raise ValueError(
1038 "Attempt to insert duplicate record; see log " "message above."
1039 )
1042# =============================================================================
1043# Postprocess
1044# =============================================================================
1046# noinspection PyUnusedLocal
1047def postprocess(src_engine: Engine, dst_session: Session) -> None:
1048 """
1049 Implement any extra processing after :func:`merge_db` has been called.
1051 - Reindexes tasks.
1052 - Warns you about things that need to be done manually.
1054 Args:
1055 src_engine: source database SQLAlchemy engine
1056 dst_session: destination database SQLAlchemy session
1057 """
1058 log.info("Reindexing destination database")
1059 reindex_everything(dst_session)
1060 log.warning(
1061 "NOT IMPLEMENTED AUTOMATICALLY: copying user/group mapping "
1062 "from table {!r}; do this by hand.",
1063 UserGroupMembership.__tablename__,
1064 )
1065 log.warning(
1066 "NOT IMPLEMENTED AUTOMATICALLY: copying group/group mapping "
1067 "from table {!r}; do this by hand.",
1068 group_group_table.name,
1069 )
1072# =============================================================================
1073# Main
1074# =============================================================================
1077def merge_camcops_db(
1078 src: str,
1079 echo: bool,
1080 report_every: int,
1081 dummy_run: bool,
1082 info_only: bool,
1083 default_group_id: Optional[int],
1084 default_group_name: Optional[str],
1085 groupnum_map: Dict[int, int],
1086 whichidnum_map: Dict[int, int],
1087 skip_export_logs: bool = True,
1088 skip_audit_logs: bool = True,
1089) -> None:
1090 """
1091 Merge an existing database (with a pre-v2 or later structure) into a
1092 comtemporary CamCOPS database.
1094 Args:
1095 src:
1096 source database SQLAlchemy URL
1098 echo:
1099 echo the SQL that is produced?
1101 report_every:
1102 provide a progress report every *n* records
1104 dummy_run:
1105 don't alter the destination database
1107 info_only:
1108 show info, then stop
1110 default_group_id:
1111 integer group ID (in the destination database) to use for source
1112 records that have no group (because they come from a very old
1113 source database) but need one
1115 default_group_name:
1116 group name (in the destination database) to use for source
1117 records that have no group (because they come from a very old
1118 source database) but need one
1120 groupnum_map:
1121 dictionary mapping group ID values from the source database to
1122 the destination database
1124 whichidnum_map:
1125 dictionary mapping ``which_idnum`` values from the source database
1126 to the destination database
1128 skip_export_logs:
1129 skip export log tables
1131 skip_audit_logs:
1132 skip audit log table
1134 """
1135 req = get_command_line_request() # requires manual COMMIT; see below
1136 src_engine = create_engine(src, echo=echo, pool_pre_ping=True)
1137 log.info("SOURCE: " + get_safe_url_from_engine(src_engine))
1138 log.info("DESTINATION: " + get_safe_url_from_engine(req.engine))
1139 log.info(
1140 "Destination ID number type map (source:destination) is: {!r}",
1141 whichidnum_map,
1142 )
1143 log.info(
1144 "Group number type map (source:destination) is {!r}", groupnum_map
1145 )
1147 # Delay the slow import until we've checked our syntax
1148 log.info("Loading all models...")
1149 # noinspection PyUnresolvedReferences
1150 import camcops_server.cc_modules.cc_all_models # delayed import # import side effects (ensure all models registered) # noqa
1152 log.info("Models loaded.")
1154 # Now, any special dependencies?
1155 # From the point of view of translating any tablet-related fields, the
1156 # actual (server) PK values are irrelevant; all relationships will be
1157 # identical if you change any PK (not standard database practice, but
1158 # convenient here).
1159 # The dependencies that do matter are server-side things, like user_id
1160 # variables.
1162 # For debugging only, some junk:
1163 # test_dependencies = [
1164 # TableDependency(parent_tablename="patient",
1165 # child_tablename="_dirty_tables")
1166 # ]
1168 # -------------------------------------------------------------------------
1169 # Tables to skip
1170 # -------------------------------------------------------------------------
1172 skip_tables = [
1173 # Transient stuff we don't want to copy across, or wouldn't want to
1174 # overwrite the destination with, or where the PK structure has
1175 # changed and we don't care about old data:
1176 TableIdentity(tablename=x)
1177 for x in (
1178 CamcopsSession.__tablename__,
1179 DirtyTable.__tablename__,
1180 ServerSettings.__tablename__,
1181 SecurityAccountLockout.__tablename__,
1182 SecurityLoginFailure.__tablename__,
1183 UserGroupMembership.__tablename__,
1184 group_group_table.name,
1185 )
1186 ]
1188 # Tedious and bulky stuff the user may want to skip:
1189 if skip_export_logs:
1190 skip_tables.extend(
1191 [
1192 TableIdentity(tablename=x)
1193 for x in (
1194 Email.__tablename__,
1195 ExportRecipient.__tablename__,
1196 ExportedTask.__tablename__,
1197 ExportedTaskEmail.__tablename__,
1198 ExportedTaskFileGroup.__tablename__,
1199 ExportedTaskHL7Message.__tablename__,
1200 )
1201 ]
1202 )
1203 if skip_audit_logs:
1204 skip_tables.append(TableIdentity(tablename=AuditEntry.__tablename__))
1206 # -------------------------------------------------------------------------
1207 # Initial operations on SOURCE database
1208 # -------------------------------------------------------------------------
1210 src_tables = get_table_names(src_engine)
1211 skip_tables += get_skip_tables(src_tables=src_tables)
1212 src_iddefs = get_src_iddefs(src_engine, src_tables)
1213 log.info("Source ID number definitions: {!r}", src_iddefs)
1215 # -------------------------------------------------------------------------
1216 # Initial operations on DESTINATION database
1217 # -------------------------------------------------------------------------
1218 dst_session = req.dbsession
1219 # So that system users get the first ID (cosmetic!):
1220 _ = User.get_system_user(dbsession=dst_session)
1221 _ = Device.get_server_device(dbsession=dst_session)
1223 # -------------------------------------------------------------------------
1224 # Set up source-to-destination mappings
1225 # -------------------------------------------------------------------------
1227 # Map source to destination ID number types
1228 for src_which_idnum, dest_which_idnum in whichidnum_map.items():
1229 assert isinstance(src_which_idnum, int)
1230 assert isinstance(dest_which_idnum, int)
1231 src_iddef = src_iddefs[src_which_idnum]
1232 dst_iddef = ensure_dest_iddef_exists(dest_which_idnum, dst_session)
1233 ensure_no_iddef_clash(src_iddef, dst_iddef)
1235 # Map source to destination group numbers
1236 for src_groupnum, dest_groupnum in groupnum_map.items():
1237 assert isinstance(src_groupnum, int)
1238 assert isinstance(dest_groupnum, int)
1239 _ = get_dst_group(dest_groupnum, dst_session)
1241 # -------------------------------------------------------------------------
1242 # Merge
1243 # -------------------------------------------------------------------------
1245 # Merge! It's easy...
1246 trcon_info = dict(
1247 default_group_id=default_group_id,
1248 default_group_name=default_group_name,
1249 src_iddefs=src_iddefs,
1250 whichidnum_map=whichidnum_map,
1251 groupnum_map=groupnum_map,
1252 )
1253 merge_db(
1254 base_class=Base,
1255 src_engine=src_engine,
1256 dst_session=dst_session,
1257 allow_missing_src_tables=True,
1258 allow_missing_src_columns=True,
1259 translate_fn=translate_fn,
1260 skip_tables=skip_tables,
1261 only_tables=None,
1262 tables_to_keep_pks_for=None,
1263 # extra_table_dependencies=test_dependencies,
1264 extra_table_dependencies=None,
1265 dummy_run=dummy_run,
1266 info_only=info_only,
1267 report_every=report_every,
1268 flush_per_table=True,
1269 flush_per_record=False,
1270 commit_with_flush=False,
1271 commit_at_end=True,
1272 prevent_eager_load=True,
1273 trcon_info=trcon_info,
1274 )
1276 # -------------------------------------------------------------------------
1277 # Postprocess
1278 # -------------------------------------------------------------------------
1280 postprocess(src_engine=src_engine, dst_session=dst_session)
1282 # -------------------------------------------------------------------------
1283 # Done
1284 # -------------------------------------------------------------------------
1286 dst_session.commit()