Coverage for cc_modules/cc_sqla_coltypes.py: 60%
511 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-08 23:14 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-08 23:14 +0000
1#!/usr/bin/env python
3"""
4camcops_server/cc_modules/cc_sqla_coltypes.py
6===============================================================================
8 Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
9 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
11 This file is part of CamCOPS.
13 CamCOPS is free software: you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 CamCOPS is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
26===============================================================================
28**SQLAlchemy column types used by CamCOPS.**
30Note these built-in SQLAlchemy types
31(https://docs.sqlalchemy.org/en/latest/core/type_basics.html#generic-types):
33 =============== ===========================================================
34 SQLAlchemy type Comment
35 =============== ===========================================================
36 BigInteger MySQL: -9,223,372,036,854,775,808 to
37 9,223,372,036,854,775,807 (64-bit)
38 (compare NHS number: up to 9,999,999,999)
39 Boolean
40 Date
41 DateTime
42 Enum
43 Float
44 Integer MySQL: -2,147,483,648 to 2,147,483,647 (32-bit)
45 Interval For ``datetime.timedelta``
46 LargeBinary Under MySQL, maps to ``BLOB``
47 MatchType For the return type of the ``MATCH`` operator
48 Numeric For fixed-precision numbers like ``NUMERIC`` or ``DECIMAL``
49 PickleType
50 SchemaType
51 SmallInteger
52 String ``VARCHAR``
53 Text Variably sized string type.
54 (Under MySQL, renders as ``TEXT``.)
55 Time
56 Unicode Implies that the underlying column explicitly supports
57 Unicode
58 UnicodeText Variably sized version of Unicode
59 (Under MySQL, renders as ``TEXT`` too.)
60 =============== ===========================================================
62Not supported across all platforms:
64 =============== ===========================================================
65 SQL type Comment
66 =============== ===========================================================
67 BIGINT UNSIGNED MySQL: 0 to 18,446,744,073,709,551,615 (64-bit).
68 Use ``sqlalchemy.dialects.mysql.BIGINT(unsigned=True)``.
69 INT UNSIGNED MySQL: 0 to 4,294,967,295 (32-bit).
70 Use ``sqlalchemy.dialects.mysql.INTEGER(unsigned=True)``.
71 =============== ===========================================================
73Other MySQL sizes:
75 =============== ===========================================================
76 MySQL type Comment
77 =============== ===========================================================
78 TINYBLOB 2^8 bytes = 256 bytes
79 BLOB 2^16 bytes = 64 KiB
80 MEDIUMBLOB 2^24 bytes = 16 MiB
81 LONGBLOB 2^32 bytes = 4 GiB
82 TINYTEXT 255 (2^8 - 1) bytes
83 TEXT 65,535 bytes (2^16 - 1) = 64 KiB
84 MEDIUMTEXT 16,777,215 (2^24 - 1) bytes = 16 MiB
85 LONGTEXT 4,294,967,295 (2^32 - 1) bytes = 4 GiB
86 =============== ===========================================================
88See https://stackoverflow.com/questions/13932750/tinytext-text-mediumtext-and-longtext-maximum-storage-sizes.
90Also notes:
92- Columns may need their character set specified explicitly under MySQL:
93 https://stackoverflow.com/questions/2108824/mysql-incorrect-string-value-error-when-save-unicode-string-in-django
95""" # noqa
97# =============================================================================
98# Imports
99# =============================================================================
101import json
102import logging
103from typing import (
104 Any,
105 Generator,
106 List,
107 Optional,
108 Sequence,
109 Tuple,
110 Type,
111 TYPE_CHECKING,
112 Union,
113)
114import uuid
116from cardinal_pythonlib.datetimefunc import (
117 coerce_to_pendulum,
118 convert_datetime_to_utc,
119 duration_from_iso,
120 duration_to_iso,
121 PotentialDatetimeType,
122)
123from cardinal_pythonlib.lists import chunks
124from cardinal_pythonlib.logs import BraceStyleAdapter
125from cardinal_pythonlib.reprfunc import auto_repr
126from cardinal_pythonlib.sqlalchemy.dialect import SqlaDialectName
127from cardinal_pythonlib.sqlalchemy.orm_inspect import (
128 gen_columns,
129 gen_relationships,
130)
131from cardinal_pythonlib.sqlalchemy.sqlfunc import (
132 fail_unknown_dialect,
133 fetch_processed_single_clause,
134)
135from isodate.isoerror import ISO8601Error
136from pendulum import DateTime as Pendulum, Duration
137from pendulum.parsing.exceptions import ParserError
138import phonenumbers
139from semantic_version import Version
140from sqlalchemy import util
141from sqlalchemy.dialects import mysql
142from sqlalchemy.engine.interfaces import Dialect
143from sqlalchemy.ext.compiler import compiles
144from sqlalchemy.orm.relationships import RelationshipProperty
145from sqlalchemy.sql.elements import conv
146from sqlalchemy.sql.expression import text
147from sqlalchemy.sql.functions import FunctionElement
148from sqlalchemy.sql.schema import Column
149from sqlalchemy.sql.sqltypes import (
150 Boolean,
151 CHAR,
152 DateTime,
153 LargeBinary,
154 String,
155 Text,
156 Unicode,
157 UnicodeText,
158)
159from sqlalchemy.sql.type_api import TypeDecorator
161from camcops_server.cc_modules.cc_constants import PV, StringLengths
162from camcops_server.cc_modules.cc_simpleobjects import IdNumReference
163from camcops_server.cc_modules.cc_sqlalchemy import (
164 LONG_COLUMN_NAME_WARNING_LIMIT,
165)
166from camcops_server.cc_modules.cc_version import make_version
168if TYPE_CHECKING:
169 from sqlalchemy.sql.elements import ClauseElement # noqa: F401
170 from sqlalchemy.sql.compiler import SQLCompiler # noqa: F401
171 from camcops_server.cc_modules.cc_db import (
172 GenericTabletRecordMixin,
173 ) # noqa: E501,F401
175log = BraceStyleAdapter(logging.getLogger(__name__))
178# =============================================================================
179# Debugging options
180# =============================================================================
182DEBUG_DATETIME_AS_ISO_TEXT = False
183DEBUG_DURATION_AS_ISO_TEXT = False
184DEBUG_IDNUMDEF_LIST = False
185DEBUG_INT_LIST_COLTYPE = False
186DEBUG_SEMANTIC_VERSION = False
187DEBUG_STRING_LIST_COLTYPE = False
189if any(
190 [
191 DEBUG_DATETIME_AS_ISO_TEXT,
192 DEBUG_DURATION_AS_ISO_TEXT,
193 DEBUG_SEMANTIC_VERSION,
194 DEBUG_IDNUMDEF_LIST,
195 DEBUG_INT_LIST_COLTYPE,
196 DEBUG_STRING_LIST_COLTYPE,
197 ]
198):
199 log.warning("Debugging options enabled!")
202# =============================================================================
203# Constants
204# =============================================================================
207class RelationshipInfo(object):
208 """
209 Used as keys the ``info`` (user-defined) dictionary parameter to SQLAlchemy
210 ``relationship`` calls; see
211 https://docs.sqlalchemy.org/en/latest/orm/relationship_api.html#sqlalchemy.orm.relationship.
212 """ # noqa
214 IS_ANCILLARY = "is_ancillary"
215 IS_BLOB = "is_blob"
218# =============================================================================
219# Simple derivative column types
220# =============================================================================
221# If you insert something too long into a VARCHAR, it just gets truncated.
223AuditSourceColType = String(length=StringLengths.AUDIT_SOURCE_MAX_LEN)
225# BigIntUnsigned = Integer().with_variant(mysql.BIGINT(unsigned=True), 'mysql')
226# ... partly because Alembic breaks on variants (Aug 2017), and partly because
227# it's nonstandard and unnecessary, changed all BigIntUnsigned to
228# BigInteger (2017-08-25).
230Base32ColType = String(length=StringLengths.BASE32_MAX_LEN)
232CharColType = String(length=1)
233CharsetColType = String(length=StringLengths.CHARSET_MAX_LEN)
234CurrencyColType = Unicode(length=StringLengths.CURRENCY_MAX_LEN)
236DatabaseTitleColType = Unicode(length=StringLengths.DATABASE_TITLE_MAX_LEN)
237DeviceNameColType = String(length=StringLengths.DEVICE_NAME_MAX_LEN)
238DiagnosticCodeColType = String(length=StringLengths.DIAGNOSTIC_CODE_MAX_LEN)
240EmailAddressColType = Unicode(length=StringLengths.EMAIL_ADDRESS_MAX_LEN)
241EraColType = String(length=StringLengths.ISO8601_DATETIME_STRING_MAX_LEN)
242ExportRecipientNameColType = String(
243 length=StringLengths.EXPORT_RECIPIENT_NAME_MAX_LEN
244)
245ExportTransmissionMethodColType = String(
246 length=StringLengths.SENDING_FORMAT_MAX_LEN
247)
249FilterTextColType = Unicode(length=StringLengths.FILTER_TEXT_MAX_LEN)
250FileSpecColType = Unicode(length=StringLengths.FILESPEC_MAX_LEN)
251FullNameColType = Unicode(length=StringLengths.FULLNAME_MAX_LEN)
253GroupDescriptionColType = Unicode(
254 length=StringLengths.GROUP_DESCRIPTION_MAX_LEN
255)
256GroupNameColType = Unicode(length=StringLengths.GROUP_NAME_MAX_LEN)
258HashedPasswordColType = String(length=StringLengths.HASHED_PW_MAX_LEN)
259# ... You might think that we must ensure case-SENSITIVE comparison on this
260# field. That would require the option collation='utf8mb4_bin' to String(),
261# for MySQL. However, that is MySQL-specific, and SQLAlchemy currently (Oct
262# 2017) doesn't support database-specific *per-column* collations. SQLite
263# accepts COLLATE commands but chokes on 'utf8mb4_bin'. Now, the hashed
264# password from bcrypt() is case-sensitive. HOWEVER, the important thing is
265# that we always retrieve the string from the database and do a case-sensitive
266# comparison in Python (see calls to is_password_valid()). So the database
267# collation doesn't matter. So we don't set it.
268# See further notes in cc_sqlalchemy.py
269HL7AssigningAuthorityType = String(length=StringLengths.HL7_AA_MAX_LEN)
270HL7IdTypeType = String(length=StringLengths.HL7_ID_TYPE_MAX_LEN)
271HostnameColType = String(length=StringLengths.HOSTNAME_MAX_LEN)
273IdDescriptorColType = Unicode(length=StringLengths.ID_DESCRIPTOR_MAX_LEN)
274IdPolicyColType = String(length=StringLengths.ID_POLICY_MAX_LEN)
275# IntUnsigned = Integer().with_variant(mysql.INTEGER(unsigned=True), 'mysql')
276IPAddressColType = String(length=StringLengths.IP_ADDRESS_MAX_LEN)
277# This is a plain string.
278# See also e.g. http://sqlalchemy-utils.readthedocs.io/en/latest/_modules/sqlalchemy_utils/types/ip_address.html # noqa
280LanguageCodeColType = String(length=StringLengths.LANGUAGE_CODE_MAX_LEN)
282# Large BLOB:
283# https://stackoverflow.com/questions/43791725/sqlalchemy-how-to-make-a-longblob-column-in-mysql # noqa
284# One of these:
285# noinspection PyTypeChecker
286LongBlob = LargeBinary().with_variant(mysql.LONGBLOB, "mysql")
287# LongBlob = LargeBinary(length=LONGBLOB_LONGTEXT_MAX_LEN) # doesn't translate to SQL Server # noqa
289# noinspection PyTypeChecker
290LongText = UnicodeText().with_variant(mysql.LONGTEXT, "mysql")
291# LongText = UnicodeText(length=LONGBLOB_LONGTEXT_MAX_LEN) # doesn't translate to SQL Server # noqa
293MfaMethodColType = String(length=StringLengths.MFA_METHOD_MAX_LEN)
294MimeTypeColType = String(length=StringLengths.MIMETYPE_MAX_LEN)
296PatientNameColType = Unicode(length=StringLengths.PATIENT_NAME_MAX_LEN)
298Rfc2822DateColType = String(length=StringLengths.RFC_2822_DATE_MAX_LEN)
300SessionTokenColType = String(length=StringLengths.SESSION_TOKEN_MAX_LEN)
301SexColType = String(length=1)
302SummaryCategoryColType = String(
303 length=StringLengths.TASK_SUMMARY_TEXT_FIELD_DEFAULT_MAX_LEN
304)
305# ... pretty generic
307TableNameColType = String(length=StringLengths.TABLENAME_MAX_LEN)
309UrlColType = String(length=StringLengths.URL_MAX_LEN)
310UserNameCamcopsColType = String(length=StringLengths.USERNAME_CAMCOPS_MAX_LEN)
311UserNameExternalColType = String(
312 length=StringLengths.USERNAME_EXTERNAL_MAX_LEN
313)
316# =============================================================================
317# Helper operations for PendulumDateTimeAsIsoTextColType
318# =============================================================================
319# Database string format is e.g.
320# 2013-07-24T20:04:07.123456+01:00
321# 2013-07-24T20:04:07.123+01:00
322# 0 1 2 3 } position in string; 1-based
323# 12345678901234567890123456789012 }
324#
325# So: rightmost 6 characters are time zone; rest is date/time.
326# leftmost 23 characters are time up to millisecond precision.
327# overall length is typically 29 (milliseconds) or 32 (microseconds)
329_TZ_LEN = 6 # length of the timezone part of the ISO8601 string
330_UTC_TZ_LITERAL = "'+00:00'"
331_SQLITE_DATETIME_FMT_FOR_PYTHON = "'%Y-%m-%d %H:%M:%f'"
333_MYSQL_DATETIME_LEN = 19
334_SQLSERVER_DATETIME_LEN = 19
335_SQLSERVER_DATETIME2_LEN = 27
338# -----------------------------------------------------------------------------
339# isotzdatetime_to_utcdatetime
340# -----------------------------------------------------------------------------
342# noinspection PyPep8Naming
343class isotzdatetime_to_utcdatetime(FunctionElement):
344 """
345 Used as an SQL operation by :class:`PendulumDateTimeAsIsoTextColType`.
347 Creates an SQL expression wrapping a field containing our ISO-8601 text,
348 making a ``DATETIME`` out of it, in the UTC timezone.
350 Implemented for different SQL dialects.
351 """
353 type = DateTime()
354 name = "isotzdatetime_to_utcdatetime"
357# noinspection PyUnusedLocal
358@compiles(isotzdatetime_to_utcdatetime)
359def isotzdatetime_to_utcdatetime_default(
360 element: "ClauseElement", compiler: "SQLCompiler", **kw
361) -> None:
362 """
363 Default implementation for :class:`isotzdatetime_to_utcdatetime`: fail.
364 """
365 fail_unknown_dialect(compiler, "perform isotzdatetime_to_utcdatetime")
368# noinspection PyUnusedLocal
369@compiles(isotzdatetime_to_utcdatetime, SqlaDialectName.MYSQL)
370def isotzdatetime_to_utcdatetime_mysql(
371 element: "ClauseElement", compiler: "SQLCompiler", **kw
372) -> str:
373 """
374 Implementation of :class:`isotzdatetime_to_utcdatetime` for MySQL.
376 For format, see
377 https://dev.mysql.com/doc/refman/5.5/en/date-and-time-functions.html#function_date-format
379 Note the use of "%i" for minutes.
381 Things after ``func.`` get passed to the database engine as literal SQL
382 functions; https://docs.sqlalchemy.org/en/latest/core/tutorial.html
383 """ # noqa
384 x = fetch_processed_single_clause(element, compiler)
386 # Let's do this in a clear way:
387 date_time_part = f"LEFT({x}, LENGTH({x}) - {_TZ_LEN})"
388 # ... drop the rightmost 6 chars (the timezone component)
389 fmt = compiler.process(text("'%Y-%m-%dT%H:%i:%S.%f'"))
390 # ... the text() part deals with the necessary escaping of % for the DBAPI
391 the_date_time = f"STR_TO_DATE({date_time_part}, {fmt})"
392 # ... STR_TO_DATE() returns a DATETIME if the string contains both date and
393 # time components.
394 old_timezone = f"RIGHT({x}, {_TZ_LEN})"
395 result_utc = (
396 f"CONVERT_TZ({the_date_time}, {old_timezone}, {_UTC_TZ_LITERAL})"
397 )
399 # log.debug(result_utc)
400 return result_utc
403# noinspection PyUnusedLocal
404@compiles(isotzdatetime_to_utcdatetime, SqlaDialectName.SQLITE)
405def isotzdatetime_to_utcdatetime_sqlite(
406 element: "ClauseElement", compiler: "SQLCompiler", **kw
407) -> str:
408 """
409 Implementation of :class:`isotzdatetime_to_utcdatetime` for SQLite.
411 - https://sqlite.org/lang_corefunc.html#substr
412 - https://sqlite.org/lang_datefunc.html
413 - https://www.sqlite.org/lang_expr.html
415 Get an SQL expression for the timezone adjustment in hours.
416 Note that if a time is 12:00+01:00, that means e.g. midday BST, which
417 is 11:00+00:00 or 11:00 UTC. So you SUBTRACT the displayed timezone from
418 the time, which I've always thought is a bit odd.
420 Ha! Was busy implementing this, but SQLite is magic; if there's a
421 timezone at the end, ``STRFTIME()`` will convert it to UTC automatically!
422 Moreover, the format is the OUTPUT format that a Python datetime will
423 recognize, so no 'T'.
425 The output format is like this: ``2018-06-01 00:00:00.000``. Note that
426 SQLite provides millisecond precision only (in general and via the ``%f``
427 argument to ``STRFTIME``).
429 See also SQLAlchemy's DATETIME support for SQLite:
431 - https://docs.sqlalchemy.org/en/13/dialects/sqlite.html?highlight=sqlite#sqlalchemy.dialects.sqlite.DATETIME
433 ... but that doesn't support timezones, so that doesn't help us.
435 One further problem -- see
436 :class:`camcops_server.tasks.core10.Core10ReportDateRangeTests` -- is that
437 comparisons are done by SQLite as text, so e.g.
439 .. code-block:: sql
441 SELECT '2018-06-01 00:00:00.000' >= '2018-06-01 00:00:00.000000'; -- 0, false
442 SELECT '2018-06-01 00:00:00.000' >= '2018-06-01 00:00:00.000'; -- 1, true
444 and therefore we need to ensure either that the SQLite side gets translated
445 to 6dp, or the bind param gets translated to 3dp. I don't think we can
446 always have control over the bind parameter. So we append '000' to the
447 SQLite side.
449 """ # noqa
450 x = fetch_processed_single_clause(element, compiler)
451 fmt = compiler.process(text(_SQLITE_DATETIME_FMT_FOR_PYTHON))
452 result = f"(STRFTIME({fmt}, {x}) || '000')"
453 # log.debug(result)
454 return result
457# noinspection PyUnusedLocal
458@compiles(isotzdatetime_to_utcdatetime, SqlaDialectName.SQLSERVER)
459def isotzdatetime_to_utcdatetime_sqlserver(
460 element: "ClauseElement", compiler: "SQLCompiler", **kw
461) -> str:
462 """
463 Implementation of :class:`isotzdatetime_to_utcdatetime` for SQL Server.
465 **Converting strings to DATETIME values**
467 - ``CAST()``: Part of ANSI SQL.
468 - ``CONVERT()``: Not part of ANSI SQL; has some extra formatting options.
470 Both methods work:
472 .. code-block:: sql
474 SELECT CAST('2001-01-31T21:30:49.123' AS DATETIME) AS via_cast,
475 CONVERT(DATETIME, '2001-01-31T21:30:49.123') AS via_convert;
477 ... fine on SQL Server 2005, with milliseconds in both cases.
478 However, going beyond milliseconds doesn't fail gracefully, it causes an
479 error (e.g. "...21:30.49.123456") both for CAST and CONVERT.
481 The ``DATETIME2`` format accepts greater precision, but requires SQL Server
482 2008 or higher. Then this works:
484 .. code-block:: sql
486 SELECT CAST('2001-01-31T21:30:49.123456' AS DATETIME2) AS via_cast,
487 CONVERT(DATETIME2, '2001-01-31T21:30:49.123456') AS via_convert;
489 So as not to be too optimistic: ``CAST(x AS DATETIME2)`` ignores (silently)
490 any timezone information in the string. So does ``CONVERT(DATETIME2, x, {0
491 or 1})``.
493 **Converting between time zones**
495 NO TIME ZONE SUPPORT in SQL Server 2005.
496 e.g. https://stackoverflow.com/questions/3200827/how-to-convert-timezones-in-sql-server-2005.
498 .. code-block:: none
500 TODATETIMEOFFSET(expression, time_zone):
501 expression: something that evaluates to a DATETIME2 value
502 time_zone: integer minutes, or string hours/minutes e.g. "+13.00"
503 -> produces a DATETIMEOFFSET value
505 Available from SQL Server 2008
506 (https://docs.microsoft.com/en-us/sql/t-sql/functions/todatetimeoffset-transact-sql).
508 .. code-block:: none
510 SWITCHOFFSET
511 -> converts one DATETIMEOFFSET value to another, preserving its UTC
512 time, but changing the displayed (local) time zone.
514 ... however, is that unnecessary? We want a plain ``DATETIME2`` in UTC, and
515 .conversion to UTC is automatically achieved by ``CONVERT(DATETIME2,
516 .some_datetimeoffset, 1)``
518 ... https://stackoverflow.com/questions/4953903/how-can-i-convert-a-sql-server-2008-datetimeoffset-to-a-datetime
520 ... but not by ``CAST(some_datetimeoffset AS DATETIME2)``, and not by
521 ``CONVERT(DATETIME2, some_datetimeoffset, 0)``
523 ... and styles 0 and 1 are the only ones permissible from SQL Server 2012
524 and up (empirically, and documented for the reverse direction at
525 https://docs.microsoft.com/en-us/sql/t-sql/functions/cast-and-convert-transact-sql?view=sql-server-2017)
527 ... this is not properly documented re UTC conversion, as far as I can
528 see. Let's use ``SWITCHOFFSET -> CAST`` to be explicit and clear.
530 ``AT TIME ZONE``: From SQL Server 2016 only.
531 https://docs.microsoft.com/en-us/sql/t-sql/queries/at-time-zone-transact-sql?view=sql-server-2017
533 **Therefore**
535 - We need to require SQL Server 2008 or higher.
536 - Therefore we can use the ``DATETIME2`` type.
537 - Note that ``LEN()``, not ``LENGTH()``, is ANSI SQL; SQL Server only
538 supports ``LEN``.
540 **Example (tested on SQL Server 2014)**
542 .. code-block:: sql
544 DECLARE @source AS VARCHAR(100) = '2001-01-31T21:30:49.123456+07:00';
546 SELECT CAST(
547 SWITCHOFFSET(
548 TODATETIMEOFFSET(
549 CAST(LEFT(@source, LEN(@source) - 6) AS DATETIME2),
550 RIGHT(@source, 6)
551 ),
552 '+00:00'
553 )
554 AS DATETIME2
555 ) -- 2001-01-31 14:30:49.1234560
557 """ # noqa
558 x = fetch_processed_single_clause(element, compiler)
560 date_time_part = f"LEFT({x}, LEN({x}) - {_TZ_LEN})" # a VARCHAR
561 old_timezone = f"RIGHT({x}, {_TZ_LEN})" # a VARCHAR
562 date_time_no_tz = f"CAST({date_time_part} AS DATETIME2)" # a DATETIME2
563 date_time_offset_with_old_tz = (
564 f"TODATETIMEOFFSET({date_time_no_tz}, {old_timezone})"
565 # a DATETIMEOFFSET
566 )
567 date_time_offset_with_utc_tz = (
568 f"SWITCHOFFSET({date_time_offset_with_old_tz}, {_UTC_TZ_LITERAL})"
569 # a DATETIMEOFFSET in UTC
570 )
571 result_utc = f"CAST({date_time_offset_with_utc_tz} AS DATETIME2)"
573 # log.debug(result_utc)
574 return result_utc
577# -----------------------------------------------------------------------------
578# unknown_field_to_utcdatetime
579# -----------------------------------------------------------------------------
581# noinspection PyPep8Naming
582class unknown_field_to_utcdatetime(FunctionElement):
583 """
584 Used as an SQL operation by :class:`PendulumDateTimeAsIsoTextColType`.
586 Creates an SQL expression wrapping a field containing something unknown,
587 which might be a ``DATETIME`` or an ISO-formatted field, and
588 making a ``DATETIME`` out of it, in the UTC timezone.
590 Implemented for different SQL dialects.
591 """
593 type = DateTime()
594 name = "unknown_field_to_utcdatetime"
597# noinspection PyUnusedLocal
598@compiles(unknown_field_to_utcdatetime)
599def unknown_field_to_utcdatetime_default(
600 element: "ClauseElement", compiler: "SQLCompiler", **kw
601) -> None:
602 """
603 Default implementation for :class:`unknown_field_to_utcdatetime`: fail.
604 """
605 fail_unknown_dialect(compiler, "perform unknown_field_to_utcdatetime")
608# noinspection PyUnusedLocal
609@compiles(unknown_field_to_utcdatetime, SqlaDialectName.MYSQL)
610def unknown_field_to_utcdatetime_mysql(
611 element: "ClauseElement", compiler: "SQLCompiler", **kw
612) -> str:
613 """
614 Implementation of :class:`unknown_field_to_utcdatetime` for MySQL.
616 If it's the length of a plain ``DATETIME`` e.g. ``2013-05-30 00:00:00``
617 (19), leave it as a ``DATETIME``; otherwise convert ISO -> ``DATETIME``.
618 """
619 x = fetch_processed_single_clause(element, compiler)
620 converted = isotzdatetime_to_utcdatetime_mysql(element, compiler, **kw)
621 result = f"IF(LENGTH({x}) = {_MYSQL_DATETIME_LEN}, {x}, {converted})"
622 # log.debug(result)
623 return result
626# noinspection PyUnusedLocal
627@compiles(unknown_field_to_utcdatetime, SqlaDialectName.SQLITE)
628def unknown_field_to_utcdatetime_sqlite(
629 element: "ClauseElement", compiler: "SQLCompiler", **kw
630) -> str:
631 """
632 Implementation of :class:`unknown_field_to_utcdatetime` for SQLite.
633 """
634 x = fetch_processed_single_clause(element, compiler)
635 fmt = compiler.process(text(_SQLITE_DATETIME_FMT_FOR_PYTHON))
636 result = f"STRFTIME({fmt}, {x})"
637 # log.debug(result)
638 return result
641# noinspection PyUnusedLocal
642@compiles(unknown_field_to_utcdatetime, SqlaDialectName.SQLSERVER)
643def unknown_field_to_utcdatetime_sqlserver(
644 element: "ClauseElement", compiler: "SQLCompiler", **kw
645) -> str:
646 """
647 Implementation of :class:`unknown_field_to_utcdatetime` for SQL Server.
649 We should cope also with the possibility of a ``DATETIME2`` field, not just
650 ``DATETIME``. It seems consistent that ``LEN(DATETIME2) = 27``, with
651 precision tenth of a microsecond, e.g. ``2001-01-31 21:30:49.1234567``
652 (27).
654 So, if it looks like a ``DATETIME`` or a ``DATETIME2``, then we leave it
655 alone; otherwise we put it through our ISO-to-datetime function.
657 Importantly, note that neither ``_SQLSERVER_DATETIME_LEN`` nor
658 ``_SQLSERVER_DATETIME2_LEN`` are the length of any of our ISO strings.
659 """
660 x = fetch_processed_single_clause(element, compiler)
661 # https://stackoverflow.com/questions/5487892/sql-server-case-when-or-then-else-end-the-or-is-not-supported # noqa
662 converted = isotzdatetime_to_utcdatetime_sqlserver(element, compiler, **kw)
663 result = (
664 f"CASE WHEN LEN({x}) IN "
665 f"({_SQLSERVER_DATETIME_LEN}, {_SQLSERVER_DATETIME2_LEN}) THEN {x} "
666 f"ELSE {converted} "
667 f"END"
668 )
669 # log.debug(result)
670 return result
673# =============================================================================
674# Custom date/time field as ISO-8601 text including timezone, using
675# pendulum.DateTime on the Python side.
676# =============================================================================
679class PendulumDateTimeAsIsoTextColType(TypeDecorator):
680 """
681 Stores date/time values as ISO-8601, in a specific format.
682 Uses Pendulum on the Python side.
683 """
685 impl = String(length=StringLengths.ISO8601_DATETIME_STRING_MAX_LEN)
686 # ... underlying SQL type
688 _coltype_name = "PendulumDateTimeAsIsoTextColType"
690 @property
691 def python_type(self) -> type:
692 """
693 The Python type of the object.
694 """
695 return Pendulum
697 @staticmethod
698 def pendulum_to_isostring(x: PotentialDatetimeType) -> Optional[str]:
699 """
700 From a Python datetime to an ISO-formatted string in our particular
701 format.
702 """
703 # https://docs.python.org/3.4/library/datetime.html#strftime-strptime-behavior # noqa
704 x = coerce_to_pendulum(x)
705 try:
706 mainpart = x.strftime(
707 "%Y-%m-%dT%H:%M:%S.%f"
708 ) # microsecond accuracy
709 timezone = x.strftime("%z") # won't have the colon in
710 return mainpart + timezone[:-2] + ":" + timezone[-2:]
711 except AttributeError:
712 return None
714 @staticmethod
715 def isostring_to_pendulum(x: Optional[str]) -> Optional[Pendulum]:
716 """
717 From an ISO-formatted string to a Python Pendulum, with timezone.
718 """
719 try:
720 return coerce_to_pendulum(x)
721 except (ParserError, ValueError):
722 log.warning("Bad ISO date/time string: {!r}", x)
723 return None
725 def process_bind_param(
726 self, value: Optional[Pendulum], dialect: Dialect
727 ) -> Optional[str]:
728 """
729 Convert parameters on the way from Python to the database.
730 """
731 retval = self.pendulum_to_isostring(value)
732 if DEBUG_DATETIME_AS_ISO_TEXT:
733 log.debug(
734 "{}.process_bind_param("
735 "self={!r}, value={!r}, dialect={!r}) -> {!r}",
736 self._coltype_name,
737 self,
738 value,
739 dialect,
740 retval,
741 )
742 return retval
744 def process_literal_param(
745 self, value: Optional[Pendulum], dialect: Dialect
746 ) -> Optional[str]:
747 """
748 Convert literals on the way from Python to the database.
749 """
750 retval = self.pendulum_to_isostring(value)
751 if DEBUG_DATETIME_AS_ISO_TEXT:
752 log.debug(
753 "{}.process_literal_param("
754 "self={!r}, value={!r}, dialect={!r}) -> {!r}",
755 self._coltype_name,
756 self,
757 value,
758 dialect,
759 retval,
760 )
761 return retval
763 def process_result_value(
764 self, value: Optional[str], dialect: Dialect
765 ) -> Optional[Pendulum]:
766 """
767 Convert things on the way from the database to Python.
768 """
769 retval = self.isostring_to_pendulum(value)
770 if DEBUG_DATETIME_AS_ISO_TEXT:
771 log.debug(
772 "{}.process_result_value("
773 "self={!r}, value={!r}, dialect={!r}) -> {!r}",
774 self._coltype_name,
775 self,
776 value,
777 dialect,
778 retval,
779 )
780 return retval
782 # noinspection PyPep8Naming
783 class comparator_factory(TypeDecorator.Comparator):
784 """
785 Process SQL for when we are comparing our column, in the database,
786 to something else.
788 We make this dialect-independent by calling functions like
790 .. code-block:: none
792 unknown_field_to_utcdatetime
793 isotzdatetime_to_utcdatetime
795 ... which we then specialize for specific dialects.
797 This function itself does not appear to be able to access any
798 information about the dialect.
799 """
801 def operate(self, op, *other, **kwargs):
802 assert len(other) == 1
803 assert not kwargs
804 other = other[0]
805 try:
806 processed_other = convert_datetime_to_utc(
807 coerce_to_pendulum(other)
808 )
809 # - If you try to call a dialect-specialized FunctionElement,
810 # it processes the clause to "?" (meaning "attach bind
811 # parameter here"); it's not the value itself.
812 # - For our SQLite "milliseconds only" comparator problem (see
813 # above), we can't do very much here without knowing the
814 # dialect. So we make the SQLite side look like it has
815 # microseconds by appending "000"...
816 except (AttributeError, ParserError, TypeError, ValueError):
817 # OK. At this point, "other" could be a plain DATETIME field,
818 # or a PendulumDateTimeAsIsoTextColType field (or potentially
819 # something else that we don't really care about). If it's a
820 # DATETIME, then we assume it is already in UTC.
821 processed_other = unknown_field_to_utcdatetime(other)
822 if DEBUG_DATETIME_AS_ISO_TEXT:
823 log.debug(
824 "operate(self={!r}, op={!r}, other={!r})", self, op, other
825 )
826 log.debug("self.expr = {!r}", self.expr)
827 log.debug("processed_other = {!r}", processed_other)
828 # traceback.print_stack()
829 return op(isotzdatetime_to_utcdatetime(self.expr), processed_other)
831 def reverse_operate(self, op, *other, **kwargs):
832 assert False, "I don't think this is ever being called"
835# =============================================================================
836# Custom duration field as ISO-8601 text, using pendulum.Duration on the Python
837# side.
838# =============================================================================
841class PendulumDurationAsIsoTextColType(TypeDecorator):
842 """
843 Stores time durations as ISO-8601, in a specific format.
844 Uses :class:`pendulum.Duration` on the Python side.
845 """
847 impl = String(length=StringLengths.ISO8601_DURATION_STRING_MAX_LEN)
848 # ... underlying SQL type
850 _coltype_name = "PendulumDurationAsIsoTextColType"
852 @property
853 def python_type(self) -> type:
854 """
855 The Python type of the object.
856 """
857 return Duration
859 @staticmethod
860 def pendulum_duration_to_isostring(x: Optional[Duration]) -> Optional[str]:
861 """
862 From a :class:`pendulum.Duration` (or ``None``) an ISO-formatted string
863 in our particular format (or ``NULL``).
864 """
865 if x is None:
866 return None
867 return duration_to_iso(
868 x, permit_years_months=True, minus_sign_at_front=True
869 )
871 @staticmethod
872 def isostring_to_pendulum_duration(x: Optional[str]) -> Optional[Duration]:
873 """
874 From an ISO-formatted string to a Python Pendulum, with timezone.
875 """
876 if not x: # None (NULL) or blank string
877 return None
878 try:
879 return duration_from_iso(x)
880 except (ISO8601Error, ValueError):
881 log.warning("Bad ISO duration string: {!r}", x)
882 return None
884 def process_bind_param(
885 self, value: Optional[Pendulum], dialect: Dialect
886 ) -> Optional[str]:
887 """
888 Convert parameters on the way from Python to the database.
889 """
890 retval = self.pendulum_duration_to_isostring(value)
891 if DEBUG_DURATION_AS_ISO_TEXT:
892 log.debug(
893 "{}.process_bind_param("
894 "self={!r}, value={!r}, dialect={!r}) -> {!r}",
895 self._coltype_name,
896 self,
897 value,
898 dialect,
899 retval,
900 )
901 return retval
903 def process_literal_param(
904 self, value: Optional[Pendulum], dialect: Dialect
905 ) -> Optional[str]:
906 """
907 Convert literals on the way from Python to the database.
908 """
909 retval = self.pendulum_duration_to_isostring(value)
910 if DEBUG_DURATION_AS_ISO_TEXT:
911 log.debug(
912 "{}.process_literal_param("
913 "self={!r}, value={!r}, dialect={!r}) -> {!r}",
914 self._coltype_name,
915 self,
916 value,
917 dialect,
918 retval,
919 )
920 return retval
922 def process_result_value(
923 self, value: Optional[str], dialect: Dialect
924 ) -> Optional[Pendulum]:
925 """
926 Convert things on the way from the database to Python.
927 """
928 retval = self.isostring_to_pendulum_duration(value)
929 if DEBUG_DURATION_AS_ISO_TEXT:
930 log.debug(
931 "{}.process_result_value("
932 "self={!r}, value={!r}, dialect={!r}) -> {!r}",
933 self._coltype_name,
934 self,
935 value,
936 dialect,
937 retval,
938 )
939 return retval
941 # No comparator_factory; we do not use SQL to compare ISO durations.
944# =============================================================================
945# Semantic version column type
946# =============================================================================
949class SemanticVersionColType(TypeDecorator):
950 """
951 Stores semantic versions in the database.
952 Uses :class:`semantic_version.Version` on the Python side.
953 """
955 impl = String(length=147) # https://github.com/mojombo/semver/issues/79
957 _coltype_name = "SemanticVersionColType"
959 @property
960 def python_type(self) -> type:
961 """
962 The Python type of the object.
963 """
964 return Version
966 def process_bind_param(
967 self, value: Optional[Version], dialect: Dialect
968 ) -> Optional[str]:
969 """
970 Convert parameters on the way from Python to the database.
971 """
972 retval = str(value) if value is not None else None
973 if DEBUG_SEMANTIC_VERSION:
974 log.debug(
975 "{}.process_bind_param("
976 "self={!r}, value={!r}, dialect={!r}) -> {!r}",
977 self._coltype_name,
978 self,
979 value,
980 dialect,
981 retval,
982 )
983 return retval
985 def process_literal_param(
986 self, value: Optional[Version], dialect: Dialect
987 ) -> Optional[str]:
988 """
989 Convert literals on the way from Python to the database.
990 """
991 retval = str(value) if value is not None else None
992 if DEBUG_SEMANTIC_VERSION:
993 log.debug(
994 "{}.process_literal_param("
995 "self={!r}, value={!r}, dialect={!r}) -> !r",
996 self._coltype_name,
997 self,
998 value,
999 dialect,
1000 retval,
1001 )
1002 return retval
1004 def process_result_value(
1005 self, value: Optional[str], dialect: Dialect
1006 ) -> Optional[Version]:
1007 """
1008 Convert things on the way from the database to Python.
1009 """
1010 if value is None:
1011 retval = None
1012 else:
1013 # Here we do some slightly fancier conversion to deal with all
1014 # sorts of potential rubbish coming in, so we get a properly
1015 # ordered Version out:
1016 retval = make_version(value)
1017 if DEBUG_SEMANTIC_VERSION:
1018 log.debug(
1019 "{}.process_result_value("
1020 "self={!r}, value={!r}, dialect={!r}) -> {!r}",
1021 self._coltype_name,
1022 self,
1023 value,
1024 dialect,
1025 retval,
1026 )
1027 return retval
1029 '''
1030 # noinspection PyPep8Naming
1031 class comparator_factory(TypeDecorator.Comparator):
1032 """
1033 Process SQL for when we are comparing our column, in the database,
1034 to something else.
1036 See https://docs.sqlalchemy.org/en/13/core/type_api.html#sqlalchemy.types.TypeEngine.comparator_factory.
1038 .. warning::
1040 I'm not sure this is either (a) correct or (b) used; it may
1041 produce a string comparison of e.g. ``14.0.0`` versus ``2.0.0``,
1042 which will be alphabetical and therefore wrong.
1043 Disabled on 2019-04-28.
1045 """ # noqa
1047 def operate(self, op, *other, **kwargs):
1048 assert len(other) == 1
1049 assert not kwargs
1050 other = other[0]
1051 if isinstance(other, Version):
1052 processed_other = str(Version)
1053 else:
1054 processed_other = other
1055 return op(self.expr, processed_other)
1057 def reverse_operate(self, op, *other, **kwargs):
1058 assert False, "I don't think this is ever being called"
1059 '''
1062# =============================================================================
1063# IdNumReferenceListColType
1064# =============================================================================
1067class IdNumReferenceListColType(TypeDecorator):
1068 """
1069 Stores a list of IdNumReference objects.
1070 On the database side, uses a comma-separated list of integers.
1071 """
1073 impl = Text()
1074 _coltype_name = "IdNumReferenceListColType"
1076 @property
1077 def python_type(self) -> type:
1078 """
1079 The Python type of the object.
1080 """
1081 return list
1083 @staticmethod
1084 def _idnumdef_list_to_dbstr(
1085 idnumdef_list: Optional[List[IdNumReference]],
1086 ) -> str:
1087 """
1088 Converts an optional list of
1089 :class:`camcops_server.cc_modules.cc_simpleobjects.IdNumReference`
1090 objects to a CSV string suitable for storing in the database.
1091 """
1092 if not idnumdef_list:
1093 return ""
1094 elements = [] # type: List[int]
1095 for idnumdef in idnumdef_list:
1096 elements.append(idnumdef.which_idnum)
1097 elements.append(idnumdef.idnum_value)
1098 return ",".join(str(x) for x in elements)
1100 @staticmethod
1101 def _dbstr_to_idnumdef_list(dbstr: Optional[str]) -> List[IdNumReference]:
1102 """
1103 Converts a CSV string (from the database) to a list of
1104 :class:`camcops_server.cc_modules.cc_simpleobjects.IdNumReference`
1105 objects.
1106 """
1107 idnumdef_list = [] # type: List[IdNumReference]
1108 try:
1109 intlist = [int(numstr) for numstr in dbstr.split(",")]
1110 except (AttributeError, TypeError, ValueError):
1111 return []
1112 length = len(intlist)
1113 if length == 0 or length % 2 != 0: # enforce pairs
1114 return []
1115 for which_idnum, idnum_value in chunks(intlist, n=2):
1116 if which_idnum < 0 or idnum_value < 0: # enforce positive integers
1117 return []
1118 idnumdef_list.append(
1119 IdNumReference(
1120 which_idnum=which_idnum, idnum_value=idnum_value
1121 )
1122 )
1123 return idnumdef_list
1125 def process_bind_param(
1126 self, value: Optional[List[IdNumReference]], dialect: Dialect
1127 ) -> str:
1128 """
1129 Convert parameters on the way from Python to the database.
1130 """
1131 retval = self._idnumdef_list_to_dbstr(value)
1132 if DEBUG_IDNUMDEF_LIST:
1133 log.debug(
1134 "{}.process_bind_param("
1135 "self={!r}, value={!r}, dialect={!r}) -> {!r}",
1136 self._coltype_name,
1137 self,
1138 value,
1139 dialect,
1140 retval,
1141 )
1142 return retval
1144 def process_literal_param(
1145 self, value: Optional[List[IdNumReference]], dialect: Dialect
1146 ) -> str:
1147 """
1148 Convert literals on the way from Python to the database.
1149 """
1150 retval = self._idnumdef_list_to_dbstr(value)
1151 if DEBUG_IDNUMDEF_LIST:
1152 log.debug(
1153 "{}.process_literal_param("
1154 "self={!r}, value={!r}, dialect={!r}) -> !r",
1155 self._coltype_name,
1156 self,
1157 value,
1158 dialect,
1159 retval,
1160 )
1161 return retval
1163 def process_result_value(
1164 self, value: Optional[str], dialect: Dialect
1165 ) -> List[IdNumReference]:
1166 """
1167 Convert things on the way from the database to Python.
1168 """
1169 retval = self._dbstr_to_idnumdef_list(value)
1170 if DEBUG_IDNUMDEF_LIST:
1171 log.debug(
1172 "{}.process_result_value("
1173 "self={!r}, value={!r}, dialect={!r}) -> {!r}",
1174 self._coltype_name,
1175 self,
1176 value,
1177 dialect,
1178 retval,
1179 )
1180 return retval
1183# =============================================================================
1184# UUID column type
1185# =============================================================================
1188class UuidColType(TypeDecorator):
1189 # Based on:
1190 # https://docs.sqlalchemy.org/en/13/core/custom_types.html#backend-agnostic-guid-type # noqa: E501
1191 # which will use postgresql UUID if relevant, not doing that here
1193 impl = CHAR(32)
1195 @property
1196 def python_type(self) -> type:
1197 return str
1199 def process_bind_param(
1200 self, value: uuid.UUID, dialect: Dialect
1201 ) -> Optional[str]:
1202 if value is None:
1203 return None
1205 return "%.32x" % value.int
1207 def process_result_value(
1208 self, value: Optional[str], dialect: Dialect
1209 ) -> Optional[uuid.UUID]:
1210 if value is None:
1211 return None
1213 return uuid.UUID(value)
1216# =============================================================================
1217# JSON column type
1218# =============================================================================
1221class JsonColType(TypeDecorator):
1222 # Unlike
1223 # https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.JSON
1224 # does not use vendor-specific JSON type
1225 impl = UnicodeText
1227 @property
1228 def python_type(self) -> type:
1229 return str
1231 def process_bind_param(
1232 self, value: Any, dialect: Dialect
1233 ) -> Optional[str]:
1234 if value is None:
1235 return None
1237 return json.dumps(value)
1239 def process_result_value(self, value: str, dialect: Dialect) -> Any:
1240 if value is None:
1241 return None
1243 return json.loads(value)
1246# =============================================================================
1247# Phone number column type
1248# =============================================================================
1251class PhoneNumberColType(TypeDecorator):
1252 impl = Unicode(length=StringLengths.PHONE_NUMBER_MAX_LEN)
1254 @property
1255 def python_type(self) -> type:
1256 return str
1258 def process_bind_param(
1259 self, value: Any, dialect: Dialect
1260 ) -> Optional[str]:
1261 if value is None:
1262 return None
1264 return phonenumbers.format_number(
1265 value, phonenumbers.PhoneNumberFormat.E164
1266 )
1268 def process_result_value(self, value: str, dialect: Dialect) -> Any:
1269 if not value:
1270 return None
1272 # Should be stored as E164 so no need to pass a region
1273 return phonenumbers.parse(value, None)
1276# =============================================================================
1277# PermittedValueChecker: used by CamcopsColumn
1278# =============================================================================
1281class PermittedValueChecker(object):
1282 """
1283 Represents permitted values (in columns belonging to CamCOPS tasks), and
1284 checks a value against them.
1285 """
1287 def __init__(
1288 self,
1289 not_null: bool = False,
1290 minimum: Union[int, float] = None,
1291 maximum: Union[int, float] = None,
1292 permitted_values: Sequence[Any] = None,
1293 ) -> None:
1294 """
1295 Args:
1296 not_null: must the value not be NULL?
1297 minimum: if specified, a numeric minimum value
1298 maximum: if specified, a numeric maximum value
1299 permitted_values: if specified, a list of permitted values
1300 """
1301 self.not_null = not_null
1302 self.minimum = minimum
1303 self.maximum = maximum
1304 self.permitted_values = permitted_values
1306 def is_ok(self, value: Any) -> bool:
1307 """
1308 Does the value pass our tests?
1309 """
1310 if value is None:
1311 return not self.not_null
1312 # If not_null is True, then the value is not OK; return False.
1313 # If not_null is False, then a null value passes all other tests.
1314 if (
1315 self.permitted_values is not None
1316 and value not in self.permitted_values
1317 ):
1318 return False
1319 if self.minimum is not None and value < self.minimum:
1320 return False
1321 if self.maximum is not None and value > self.maximum:
1322 return False
1323 return True
1325 def failure_msg(self, value: Any) -> str:
1326 """
1327 Why does the value not pass our tests?
1328 """
1329 if value is None:
1330 if self.not_null:
1331 return "value is None and NULL values are not permitted"
1332 else:
1333 return "" # value is OK
1334 if (
1335 self.permitted_values is not None
1336 and value not in self.permitted_values
1337 ):
1338 return (
1339 f"value {value!r} not in permitted values "
1340 f"{self.permitted_values!r}"
1341 )
1342 if self.minimum is not None and value < self.minimum:
1343 return f"value {value!r} less than minimum of {self.minimum!r}"
1344 if self.maximum is not None and value > self.maximum:
1345 return f"value {value!r} more than maximum of {self.maximum!r}"
1346 return ""
1348 def __repr__(self):
1349 return auto_repr(self)
1351 def permitted_values_inc_minmax(self) -> Tuple:
1352 """
1353 Returns permitted values, either specified directly or via a
1354 minimum/maximum.
1355 """
1356 if self.permitted_values:
1357 return tuple(self.permitted_values)
1358 # Take a punt that integer minima/maxima mean that only integers are
1359 # permitted...
1360 if isinstance(self.minimum, int) and isinstance(self.maximum, int):
1361 return tuple(range(self.minimum, self.maximum + 1))
1362 return ()
1364 def permitted_values_csv(self) -> str:
1365 """
1366 Returns a CSV representation of the permitted values.
1368 Primarily used for CRIS data dictionaries.
1369 """
1370 return ",".join(str(x) for x in self.permitted_values_inc_minmax())
1373# Specific instances, to reduce object duplication and magic numbers:
1375MIN_ZERO_CHECKER = PermittedValueChecker(minimum=0)
1377BIT_CHECKER = PermittedValueChecker(permitted_values=PV.BIT)
1378ZERO_TO_ONE_CHECKER = PermittedValueChecker(minimum=0, maximum=1)
1379ZERO_TO_TWO_CHECKER = PermittedValueChecker(minimum=0, maximum=2)
1380ZERO_TO_THREE_CHECKER = PermittedValueChecker(minimum=0, maximum=3)
1381ZERO_TO_FOUR_CHECKER = PermittedValueChecker(minimum=0, maximum=4)
1382ZERO_TO_FIVE_CHECKER = PermittedValueChecker(minimum=0, maximum=5)
1383ZERO_TO_SIX_CHECKER = PermittedValueChecker(minimum=0, maximum=6)
1384ZERO_TO_SEVEN_CHECKER = PermittedValueChecker(minimum=0, maximum=7)
1385ZERO_TO_EIGHT_CHECKER = PermittedValueChecker(minimum=0, maximum=8)
1386ZERO_TO_NINE_CHECKER = PermittedValueChecker(minimum=0, maximum=9)
1387ZERO_TO_10_CHECKER = PermittedValueChecker(minimum=0, maximum=10)
1388ZERO_TO_100_CHECKER = PermittedValueChecker(minimum=0, maximum=100)
1390ONE_TO_TWO_CHECKER = PermittedValueChecker(minimum=1, maximum=2)
1391ONE_TO_THREE_CHECKER = PermittedValueChecker(minimum=1, maximum=3)
1392ONE_TO_FOUR_CHECKER = PermittedValueChecker(minimum=1, maximum=4)
1393ONE_TO_FIVE_CHECKER = PermittedValueChecker(minimum=1, maximum=5)
1394ONE_TO_SIX_CHECKER = PermittedValueChecker(minimum=1, maximum=6)
1395ONE_TO_SEVEN_CHECKER = PermittedValueChecker(minimum=1, maximum=7)
1396ONE_TO_EIGHT_CHECKER = PermittedValueChecker(minimum=1, maximum=8)
1397ONE_TO_NINE_CHECKER = PermittedValueChecker(minimum=1, maximum=9)
1400# =============================================================================
1401# CamcopsColumn: provides extra functions over Column.
1402# =============================================================================
1404# Column attributes:
1405COLATTR_PERMITTED_VALUE_CHECKER = "permitted_value_checker"
1408# noinspection PyAbstractClass
1409class CamcopsColumn(Column):
1410 """
1411 A SQLAlchemy :class:`Column` class that supports some CamCOPS-specific
1412 flags, such as:
1414 - whether a field is a BLOB reference;
1415 - how it should be treated for anonymisation;
1416 - which values are permitted in the field (in a soft sense: duff values
1417 cause errors to be reported, but they're still stored).
1418 """
1420 def __init__(
1421 self,
1422 *args,
1423 include_in_anon_staging_db: bool = False,
1424 exempt_from_anonymisation: bool = False,
1425 identifies_patient: bool = False,
1426 is_blob_id_field: bool = False,
1427 blob_relationship_attr_name: str = "",
1428 permitted_value_checker: PermittedValueChecker = None,
1429 **kwargs,
1430 ) -> None:
1431 """
1433 Args:
1434 *args:
1435 Arguments to the :class:`Column` constructor.
1436 include_in_anon_staging_db:
1437 Ensure this is marked for inclusion in data dictionaries for an
1438 anonymisation staging database.
1439 exempt_from_anonymisation:
1440 If true: though this field might be text, it is guaranteed not
1441 to contain identifiers (e.g. it might contain only predefined
1442 disease severity descriptions) and does not require
1443 anonymisation.
1444 identifies_patient:
1445 If true: contains a patient identifier (e.g. name).
1446 is_blob_id_field:
1447 If true: this field contains a reference (client FK) to the
1448 BLOB table.
1449 blob_relationship_attr_name:
1450 For BLOB ID fields: the name of the associated relationship
1451 attribute (which, when accessed, yields the BLOB itself) in
1452 the owning class/object.
1453 permitted_value_checker:
1454 If specified, a :class:`PermittedValueChecker` that allows
1455 soft constraints to be specified on the field's contents. (That
1456 is, no constraints are specified at the database level, but we
1457 can moan if incorrect data are present.)
1458 **kwargs:
1459 Arguments to the :class:`Column` constructor.
1460 """
1461 self.include_in_anon_staging_db = include_in_anon_staging_db
1462 self.exempt_from_anonymisation = exempt_from_anonymisation
1463 self.identifies_patient = identifies_patient
1464 self.is_blob_id_field = is_blob_id_field
1465 self.blob_relationship_attr_name = blob_relationship_attr_name
1466 self.permitted_value_checker = permitted_value_checker
1467 if is_blob_id_field:
1468 assert blob_relationship_attr_name, (
1469 "If specifying a BLOB ID field, must give the attribute name "
1470 "of the relationship too"
1471 )
1472 super().__init__(*args, **kwargs)
1474 def _constructor(self, *args, **kwargs) -> "CamcopsColumn":
1475 """
1476 SQLAlchemy method (not clearly documented) to assist in copying
1477 objects. Returns a copy of this object.
1479 See
1480 https://bitbucket.org/zzzeek/sqlalchemy/issues/2284/please-make-column-easier-to-subclass
1481 """ # noqa
1482 kwargs["include_in_anon_staging_db"] = self.include_in_anon_staging_db
1483 kwargs["exempt_from_anonymisation"] = self.exempt_from_anonymisation
1484 kwargs["identifies_patient"] = self.identifies_patient
1485 kwargs["is_blob_id_field"] = self.is_blob_id_field
1486 kwargs[
1487 "blob_relationship_attr_name"
1488 ] = self.blob_relationship_attr_name # noqa
1489 kwargs[COLATTR_PERMITTED_VALUE_CHECKER] = self.permitted_value_checker
1490 # noinspection PyTypeChecker
1491 return self.__class__(*args, **kwargs)
1493 def __repr__(self) -> str:
1494 def kvp(attrname: str) -> str:
1495 return f"{attrname}={getattr(self, attrname)!r}"
1497 elements = [
1498 kvp("include_in_anon_staging_db"),
1499 kvp("exempt_from_anonymisation"),
1500 kvp("identifies_patient"),
1501 kvp("is_blob_id_field"),
1502 kvp("blob_relationship_attr_name"),
1503 kvp(COLATTR_PERMITTED_VALUE_CHECKER),
1504 f"super()={super().__repr__()}",
1505 ]
1506 return f"CamcopsColumn({', '.join(elements)})"
1508 def set_permitted_value_checker(
1509 self, permitted_value_checker: PermittedValueChecker
1510 ) -> None:
1511 """
1512 Sets the :class:`PermittedValueChecker` attribute.
1513 """
1514 self.permitted_value_checker = permitted_value_checker
1517# =============================================================================
1518# Operate on Column/CamcopsColumn properties
1519# =============================================================================
1522def gen_columns_matching_attrnames(
1523 obj, attrnames: List[str]
1524) -> Generator[Tuple[str, Column], None, None]:
1525 """
1526 Find columns of an SQLAlchemy ORM object whose attribute names match a
1527 list.
1529 Args:
1530 obj: SQLAlchemy ORM object to inspect
1531 attrnames: attribute names
1533 Yields:
1534 ``attrname, column`` tuples
1536 """
1537 for attrname, column in gen_columns(obj):
1538 if attrname in attrnames:
1539 yield attrname, column
1542def gen_camcops_columns(
1543 obj,
1544) -> Generator[Tuple[str, CamcopsColumn], None, None]:
1545 """
1546 Finds all columns of an object that are
1547 :class:`camcops_server.cc_modules.cc_sqla_coltypes.CamcopsColumn` columns.
1549 Args:
1550 obj: SQLAlchemy ORM object to inspect
1552 Yields:
1553 ``attrname, column`` tuples
1554 """
1555 for attrname, column in gen_columns(obj):
1556 if isinstance(column, CamcopsColumn):
1557 yield attrname, column
1560def gen_camcops_blob_columns(
1561 obj,
1562) -> Generator[Tuple[str, CamcopsColumn], None, None]:
1563 """
1564 Finds all columns of an object that are
1565 :class:`camcops_server.cc_modules.cc_sqla_coltypes.CamcopsColumn` columns
1566 referencing the BLOB table.
1568 Args:
1569 obj: SQLAlchemy ORM object to inspect
1571 Yields:
1572 ``attrname, column`` tuples
1573 """
1574 for attrname, column in gen_camcops_columns(obj):
1575 if column.is_blob_id_field:
1576 if attrname != column.name:
1577 log.warning(
1578 "BLOB field where attribute name {!r} != SQL "
1579 "column name {!r}",
1580 attrname,
1581 column.name,
1582 )
1583 yield attrname, column
1586def get_column_attr_names(obj) -> List[str]:
1587 """
1588 Get a list of column attribute names from an SQLAlchemy ORM object.
1589 """
1590 return [attrname for attrname, _ in gen_columns(obj)]
1593def get_camcops_column_attr_names(obj) -> List[str]:
1594 """
1595 Get a list of
1596 :class:`camcops_server.cc_modules.cc_sqla_coltypes.CamcopsColumn` column
1597 attribute names from an SQLAlchemy ORM object.
1598 """
1599 return [attrname for attrname, _ in gen_camcops_columns(obj)]
1602def get_camcops_blob_column_attr_names(obj) -> List[str]:
1603 """
1604 Get a list of
1605 :class:`camcops_server.cc_modules.cc_sqla_coltypes.CamcopsColumn` BLOB
1606 column attribute names from an SQLAlchemy ORM object.
1607 """
1608 return [attrname for attrname, _ in gen_camcops_blob_columns(obj)]
1611def permitted_value_failure_msgs(obj) -> List[str]:
1612 """
1613 Checks a SQLAlchemy ORM object instance against its permitted value checks
1614 (via its :class:`camcops_server.cc_modules.cc_sqla_coltypes.CamcopsColumn`
1615 columns), if it has any.
1617 Returns a list of failure messages (empty list means all OK).
1619 If you just want to know whether it passes, a quicker way is via
1620 :func:`permitted_values_ok`.
1621 """
1622 failure_msgs = []
1623 for attrname, camcops_column in gen_camcops_columns(obj):
1624 pv_checker = (
1625 camcops_column.permitted_value_checker
1626 ) # type: Optional[PermittedValueChecker]
1627 if pv_checker is None:
1628 continue
1629 value = getattr(obj, attrname)
1630 failure_msg = pv_checker.failure_msg(value)
1631 if failure_msg:
1632 failure_msgs.append(f"Invalid value for {attrname}: {failure_msg}")
1633 return failure_msgs
1636def permitted_values_ok(obj) -> bool:
1637 """
1638 Checks whether an instance passes its permitted value checks, if it has
1639 any.
1641 If you want to know why it failed, see
1642 :func:`permitted_value_failure_msgs`.
1643 """
1644 for attrname, camcops_column in gen_camcops_columns(obj):
1645 pv_checker = (
1646 camcops_column.permitted_value_checker
1647 ) # type: Optional[PermittedValueChecker]
1648 if pv_checker is None:
1649 continue
1650 value = getattr(obj, attrname)
1651 if not pv_checker.is_ok(value):
1652 return False
1653 return True
1656def gen_ancillary_relationships(
1657 obj,
1658) -> Generator[
1659 Tuple[str, RelationshipProperty, Type["GenericTabletRecordMixin"]],
1660 None,
1661 None,
1662]:
1663 """
1664 For an SQLAlchemy ORM object, yields tuples of ``attrname,
1665 relationship_property, related_class`` for all relationships that are
1666 marked as a CamCOPS ancillary relationship.
1667 """
1668 for attrname, rel_prop, related_class in gen_relationships(obj):
1669 if rel_prop.info.get(RelationshipInfo.IS_ANCILLARY, None) is True:
1670 yield attrname, rel_prop, related_class
1673def gen_blob_relationships(
1674 obj,
1675) -> Generator[
1676 Tuple[str, RelationshipProperty, Type["GenericTabletRecordMixin"]],
1677 None,
1678 None,
1679]:
1680 """
1681 For an SQLAlchemy ORM object, yields tuples of ``attrname,
1682 relationship_property, related_class`` for all relationships that are
1683 marked as a CamCOPS BLOB relationship.
1684 """
1685 for attrname, rel_prop, related_class in gen_relationships(obj):
1686 if rel_prop.info.get(RelationshipInfo.IS_BLOB, None) is True:
1687 yield attrname, rel_prop, related_class
1690# =============================================================================
1691# Specializations of CamcopsColumn to save typing
1692# =============================================================================
1695def _name_type_in_column_args(args: Tuple[Any, ...]) -> Tuple[bool, bool]:
1696 """
1697 SQLAlchemy doesn't encourage deriving from Column. If you do, you have to
1698 implement ``__init__()`` and ``_constructor()`` carefully. The
1699 ``__init__()`` function will be called by user code, and via SQLAlchemy
1700 internals, including via ``_constructor`` (e.g. from
1701 ``Column.make_proxy()``).
1703 It is likely that ``__init__`` will experience many combinations of the
1704 column name and type being passed either in ``*args`` or ``**kwargs``. It
1705 must pass them on to :class:`Column`. If you don't mess with the type,
1706 that's easy; just pass them on unmodified. But if you plan to mess with the
1707 type, as we do in :class:`BoolColumn` below, we must make sure that we
1708 don't pass either of ``name`` or ``type_`` in *both* ``args`` and
1709 ``kwargs``.
1711 This function tells you whether ``name`` and ``type_`` are present in args,
1712 using the same method as ``Column.__init__()``.
1713 """
1714 name_in_args = False
1715 type_in_args = False
1716 args = list(args) # make a copy, and make it a list not a tuple
1717 if args:
1718 if isinstance(args[0], util.string_types):
1719 name_in_args = True
1720 args.pop(0)
1721 if args:
1722 coltype = args[0]
1723 if hasattr(coltype, "_sqla_type"):
1724 type_in_args = True
1725 return name_in_args, type_in_args
1728# noinspection PyAbstractClass
1729class BoolColumn(CamcopsColumn):
1730 """
1731 A :class:`camcops_server.cc_modules.cc_sqla_coltypes.CamcopsColumn`
1732 representing a boolean value.
1733 """
1735 def __init__(self, *args: Any, **kwargs: Any) -> None:
1736 # Must pass on all arguments, ultimately to Column, or when using
1737 # AbstractConcreteBase, you can get this:
1738 #
1739 # TypeError: Could not create a copy of this <class 'camcops_server.
1740 # cc_modules.cc_sqla_coltypes.BoolColumn'> object. Ensure the class
1741 # includes a _constructor() attribute or method which accepts the
1742 # standard Column constructor arguments, or references the Column class
1743 # itself.
1744 #
1745 # During internal copying, "type_" can arrive here within kwargs, so
1746 # we must make sure that we don't send it on twice to super().__init().
1747 # Also, Column.make_proxy() calls our _constructor() with name and type
1748 # in args, so we must handle that, too...
1750 _, type_in_args = _name_type_in_column_args(args)
1751 self.constraint_name = kwargs.pop(
1752 "constraint_name", None
1753 ) # type: Optional[str]
1754 if not type_in_args:
1755 if self.constraint_name:
1756 constraint_name_conv = conv(self.constraint_name)
1757 # ... see help for ``conv``
1758 else:
1759 constraint_name_conv = None
1760 kwargs["type_"] = Boolean(name=constraint_name_conv)
1761 # The "name" parameter to Boolean() specifies the name of the
1762 # (0, 1) constraint.
1763 kwargs[COLATTR_PERMITTED_VALUE_CHECKER] = BIT_CHECKER
1764 super().__init__(*args, **kwargs)
1765 if (
1766 not self.constraint_name
1767 and len(self.name) >= LONG_COLUMN_NAME_WARNING_LIMIT
1768 ):
1769 log.warning(
1770 "BoolColumn with long column name and no constraint "
1771 "name: {!r}",
1772 self.name,
1773 )
1775 def __repr__(self) -> str:
1776 def kvp(attrname: str) -> str:
1777 return f"{attrname}={getattr(self, attrname)!r}"
1779 elements = [kvp("constraint_name"), f"super()={super().__repr__()}"]
1780 return f"BoolColumn({', '.join(elements)})"
1782 def _constructor(self, *args: Any, **kwargs: Any) -> "BoolColumn":
1783 """
1784 Make a copy; see
1785 https://bitbucket.org/zzzeek/sqlalchemy/issues/2284/please-make-column-easier-to-subclass
1786 """
1787 kwargs["constraint_name"] = self.constraint_name
1788 return super()._constructor(*args, **kwargs)