Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/cardinal_pythonlib/snomed.py : 39%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
2# cardinal_pythonlib/athena_ohdsi.py
4"""
5===============================================================================
7 Original code copyright (C) 2009-2021 Rudolf Cardinal (rudolf@pobox.com).
9 This file is part of cardinal_pythonlib.
11 Licensed under the Apache License, Version 2.0 (the "License");
12 you may not use this file except in compliance with the License.
13 You may obtain a copy of the License at
15 https://www.apache.org/licenses/LICENSE-2.0
17 Unless required by applicable law or agreed to in writing, software
18 distributed under the License is distributed on an "AS IS" BASIS,
19 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 See the License for the specific language governing permissions and
21 limitations under the License.
23===============================================================================
25**Functions to assist with SNOMED-CT.**
27See http://snomed.org/.
29Note that the licensing arrangements for SNOMED-CT mean that the actual codes
30must be separate (and not part of this code).
32A full SNOMED CT download is about 1.1 Gb; see
33https://digital.nhs.uk/services/terminology-and-classifications/snomed-ct.
34Within a file such as ``uk_sct2cl_26.0.2_20181107000001.zip``, relevant files
35include:
37.. code-block:: none
39 # Files with "Amoxicillin" in include two snapshots and two full files:
41 SnomedCT_UKClinicalRF2_PRODUCTION_20181031T000001Z/Full/Terminology/sct2_Description_Full-en-GB_GB1000000_20181031.txt
42 # ... 234,755 lines
44 SnomedCT_InternationalRF2_PRODUCTION_20180731T120000Z/Full/Terminology/sct2_Description_Full-en_INT_20180731.txt
45 # ... 2,513,953 lines; this is the main file.
47Note grammar:
49- http://snomed.org/scg
50- https://confluence.ihtsdotools.org/display/DOCSCG
51- https://confluence.ihtsdotools.org/download/attachments/33494865/SnomedCtExpo_Expressions_20161028_s2_20161101.pdf # noqa
52- https://confluence.ihtsdotools.org/display/SLPG/SNOMED+CT+Expression+Constraint+Language
54Test basic expressions:
56.. code-block:: python
58 import logging
59 from cardinal_pythonlib.logs import main_only_quicksetup_rootlogger
60 from cardinal_pythonlib.snomed import *
61 main_only_quicksetup_rootlogger(level=logging.DEBUG)
63 # ---------------------------------------------------------------------
64 # From the SNOMED-CT examples (http://snomed.org/scg), with some values
65 # fixed from the term browser:
66 # ---------------------------------------------------------------------
68 diabetes = SnomedConcept(73211009, "Diabetes mellitus (disorder)")
69 diabetes_expr = SnomedExpression(diabetes)
70 print(diabetes_expr.longform)
71 print(diabetes_expr.shortform)
73 pain = SnomedConcept(22253000, "Pain (finding)")
74 finding_site = SnomedConcept(36369800, "Finding site")
75 foot = SnomedConcept(56459004, "Foot")
77 pain_in_foot = SnomedExpression(pain, {finding_site: foot})
78 print(pain_in_foot.longform)
79 print(pain_in_foot.shortform)
81 amoxicillin_medicine = SnomedConcept(27658006, "Product containing amoxicillin (medicinal product)")
82 amoxicillin_substance = SnomedConcept(372687004, "Amoxicillin (substance)")
83 has_dose_form = SnomedConcept(411116001, "Has manufactured dose form (attribute)")
84 capsule = SnomedConcept(385049006, "Capsule (basic dose form)")
85 has_active_ingredient = SnomedConcept(127489000, "Has active ingredient (attribute)")
86 has_basis_of_strength_substance = SnomedConcept(732943007, "Has basis of strength substance (attribute)")
87 mass = SnomedConcept(118538004, "Mass, a measure of quantity of matter (property) (qualifier value)")
88 unit_of_measure = SnomedConcept(767524001, "Unit of measure (qualifier value)")
89 milligrams = SnomedConcept(258684004, "milligram (qualifier value)")
91 amoxicillin_500mg_capsule = SnomedExpression(
92 amoxicillin_medicine, [
93 SnomedAttributeSet({has_dose_form: capsule}),
94 SnomedAttributeGroup({
95 has_active_ingredient: amoxicillin_substance,
96 has_basis_of_strength_substance: SnomedExpression(
97 amoxicillin_substance, {
98 mass: 500,
99 unit_of_measure: milligrams,
100 }
101 ),
102 }),
103 ]
104 )
105 print(amoxicillin_500mg_capsule.longform)
106 print(amoxicillin_500mg_capsule.shortform)
108""" # noqa
110from typing import Dict, Iterable, List, Union
112from cardinal_pythonlib.reprfunc import simple_repr
115# =============================================================================
116# Constants
117# =============================================================================
119BACKSLASH = "\\"
120COLON = ":"
121COMMA = ","
122EQUALS = "="
123HASH = "#"
124LBRACE = "{"
125LBRACKET = "("
126PIPE = "|"
127PLUS = "+"
128QM = '"' # double quotation mark
129RBRACE = "}"
130RBRACKET = ")"
131TAB = "\t"
132NEWLINE = "\n"
134ID_MIN_DIGITS = 6
135ID_MAX_DIGITS = 18
137VALUE_TYPE = Union["SnomedConcept", "SnomedExpression", int, float, str]
138DICT_ATTR_TYPE = Dict["SnomedConcept", VALUE_TYPE]
140SNOMED_XML_NAME = "snomed_ct_expression"
143# =============================================================================
144# Quoting strings
145# =============================================================================
147def double_quoted(s: str) -> str:
148 r"""
149 Returns a representation of the string argument with double quotes and
150 escaped characters.
152 Args:
153 s: the argument
155 See:
157 - http://code.activestate.com/lists/python-list/272714/ -- does not work
158 as null values get escaped in different ways in modern Python, and in a
159 slightly unpredictable way
160 - https://mail.python.org/pipermail/python-list/2003-April/236940.html --
161 won't deal with repr() using triple-quotes
162 - https://stackoverflow.com/questions/1675181/get-str-repr-with-double-quotes-python
163 -- probably the right general approach
165 Test code:
167 .. code-block:: python
169 from cardinal_pythonlib.snomed import double_quoted
171 def test(s):
172 print(f"double_quoted({s!r}) -> {double_quoted(s)}")
175 test("ab'cd")
176 test("ab'c\"d")
177 test('ab"cd')
179 """ # noqa
180 # For efficiency, we use a list:
181 # https://stackoverflow.com/questions/3055477/how-slow-is-pythons-string-concatenation-vs-str-join # noqa
182 # https://waymoot.org/home/python_string/
183 dquote = '"'
184 ret = [dquote] # type: List[str]
185 for c in s:
186 # "Named" characters
187 if c == NEWLINE:
188 ret.append(r"\n")
189 elif c == TAB:
190 ret.append(r"\t")
191 elif c == QM:
192 ret.append(r'\"')
193 elif c == BACKSLASH:
194 ret.append(r"\\")
195 elif ord(c) < 32:
196 # two-digit hex format, e.g. \x1F for ASCII 31
197 ret.append(fr"\x{ord(c):02X}")
198 else:
199 ret.append(c)
200 ret.append(dquote)
201 return "".join(ret)
204# =============================================================================
205# SNOMED-CT concepts
206# =============================================================================
208class SnomedBase(object):
209 """
210 Common functions for SNOMED-CT classes
211 """
212 def as_string(self, longform: bool = True) -> str:
213 """
214 Returns the string form.
216 Args:
217 longform: print SNOMED-CT concepts in long form?
218 """
219 raise NotImplementedError("implement in subclass")
221 @property
222 def shortform(self) -> str:
223 """
224 Returns the short form, without terms.
225 """
226 return self.as_string(False)
228 @property
229 def longform(self) -> str:
230 return self.as_string(True)
232 def __str__(self) -> str:
233 return self.as_string(True)
236class SnomedConcept(SnomedBase):
237 """
238 Represents a SNOMED concept with its description (associated term).
239 """
240 def __init__(self, identifier: int, term: str) -> None:
241 """
242 Args:
243 identifier: SNOMED-CT identifier (code)
244 term: associated term (description)
245 """
246 assert isinstance(identifier, int), (
247 f"SNOMED-CT concept identifier is not an integer: {identifier!r}"
248 )
249 ndigits = len(str(identifier))
250 assert ID_MIN_DIGITS <= ndigits <= ID_MAX_DIGITS, (
251 f"SNOMED-CT concept identifier has wrong number of digits: "
252 f"{identifier!r}"
253 )
254 assert PIPE not in term, (
255 f"SNOMED-CT term has invalid pipe character: {term!r}"
256 )
257 self.identifier = identifier
258 self.term = term
260 def __repr__(self) -> str:
261 return simple_repr(self, ["identifier", "term"])
263 def as_string(self, longform: bool = True) -> str:
264 # Docstring in base class.
265 if longform:
266 return f"{self.identifier} {PIPE}{self.term}{PIPE}"
267 else:
268 return str(self.identifier)
270 def concept_reference(self, longform: bool = True) -> str:
271 """
272 Returns one of the string representations.
274 Args:
275 longform: in long form, with the description (associated term)?
276 """
277 return self.as_string(longform)
280# =============================================================================
281# SNOMED-CT expressions
282# =============================================================================
284class SnomedValue(SnomedBase):
285 """
286 Represents a value: either a concrete value (e.g. int, float, str), or a
287 SNOMED-CT concept/expression.
289 Implements the grammar elements: attributeValue, expressionValue,
290 stringValue, numericValue, integerValue, decimalValue.
291 """
292 def __init__(self, value: VALUE_TYPE) -> None:
293 """
294 Args:
295 value: the value
296 """
297 assert isinstance(value, (SnomedConcept, SnomedExpression,
298 int, float, str)), (
299 f"Invalid value type to SnomedValue: {value!r}"
300 )
301 self.value = value
303 def as_string(self, longform: bool = True) -> str:
304 # Docstring in base class
305 x = self.value
306 if isinstance(x, SnomedConcept):
307 return x.concept_reference(longform)
308 elif isinstance(x, SnomedExpression):
309 # As per p16 of formal reference cited above.
310 return f"{LBRACKET} {x.as_string(longform)} {RBRACKET}"
311 elif isinstance(x, (int, float)):
312 return HASH + str(x)
313 elif isinstance(x, str):
314 # On the basis that SNOMED's "QM" (quote mark) is 0x22, the double
315 # quote:
316 return double_quoted(x)
317 else:
318 raise ValueError("Bad input value type")
320 def __repr__(self) -> str:
321 return simple_repr(self, ["value"])
324class SnomedFocusConcept(SnomedBase):
325 """
326 Represents a SNOMED-CT focus concept, which is one or more concepts.
327 """
328 def __init__(self,
329 concept: Union[SnomedConcept, Iterable[SnomedConcept]]) \
330 -> None:
331 """
332 Args:
333 concept: the core concept(s); a :class:`SnomedCode` or an
334 iterable of them
335 """
336 if isinstance(concept, SnomedConcept):
337 self.concepts = [concept]
338 else:
339 self.concepts = list(concept)
340 assert all(isinstance(x, SnomedConcept) for x in self.concepts)
342 def as_string(self, longform: bool = True) -> str:
343 # Docstring in base class.
344 sep = " " + PLUS + " "
345 return sep.join(c.concept_reference(longform) for c in self.concepts)
347 def __repr__(self) -> str:
348 return simple_repr(self, ["concepts"])
351class SnomedAttribute(SnomedBase):
352 """
353 Represents a SNOMED-CT attribute, being a name/value pair.
354 """
355 def __init__(self, name: SnomedConcept, value: VALUE_TYPE) -> None:
356 """
357 Args:
358 name: a :class:`SnomedConcept` (attribute name)
359 value: an attribute value (:class:`SnomedConcept`, number, or
360 string)
361 """
362 assert isinstance(name, SnomedConcept)
363 if not isinstance(value, SnomedValue):
364 value = SnomedValue(value)
365 self.name = name
366 self.value = value
368 def as_string(self, longform: bool = True) -> str:
369 # Docstring in base class.
370 return (
371 f"{self.name.concept_reference(longform)} {EQUALS} "
372 f"{self.value.as_string(longform)}"
373 )
375 def __repr__(self) -> str:
376 return simple_repr(self, ["name", "value"])
379class SnomedAttributeSet(SnomedBase):
380 """
381 Represents an attribute set.
382 """
383 def __init__(self, attributes: Union[DICT_ATTR_TYPE,
384 Iterable[SnomedAttribute]]) -> None:
385 """
386 Args:
387 attributes: the attributes
388 """
389 if isinstance(attributes, dict):
390 self.attributes = [SnomedAttribute(k, v)
391 for k, v in attributes.items()]
392 else:
393 self.attributes = list(attributes)
394 assert all(isinstance(x, SnomedAttribute) for x in self.attributes)
396 def as_string(self, longform: bool = True) -> str:
397 # Docstring in base class.
398 attrsep = COMMA + " "
399 return attrsep.join(attr.as_string(longform)
400 for attr in self.attributes)
402 def __repr__(self) -> str:
403 return simple_repr(self, ["attributes"])
406class SnomedAttributeGroup(SnomedBase):
407 """
408 Represents a collected group of attribute/value pairs.
409 """
410 def __init__(self, attribute_set: Union[DICT_ATTR_TYPE,
411 SnomedAttributeSet]) -> None:
412 """
413 Args:
414 attribute_set: a :class:`SnomedAttributeSet` to group
415 """
416 if isinstance(attribute_set, dict):
417 attribute_set = SnomedAttributeSet(attribute_set)
418 assert isinstance(attribute_set, SnomedAttributeSet)
419 self.attribute_set = attribute_set
421 def as_string(self, longform: bool = True) -> str:
422 # Docstring in base class.
423 return f"{LBRACE} {self.attribute_set.as_string(longform)} {RBRACE}"
425 def __repr__(self) -> str:
426 return simple_repr(self, ["attribute_set"])
429class SnomedRefinement(SnomedBase):
430 """
431 Implements a SNOMED-CT "refinement", which is an attribute set +/- some
432 attribute groups.
433 """
434 def __init__(self,
435 refinements: Union[DICT_ATTR_TYPE,
436 Iterable[Union[SnomedAttributeSet,
437 SnomedAttributeGroup]]]) \
438 -> None:
439 """
440 Args:
441 refinements: iterable of :class:`SnomedAttributeSet` (but only
442 zero or one) and :class:`SnomedAttributeGroup` objects
443 """
444 if isinstance(refinements, dict):
445 refinements = [SnomedAttributeSet(refinements)]
446 self.attrsets = [] # type: List[SnomedBase]
447 self.attrgroups = [] # type: List[SnomedBase]
448 for r in refinements:
449 if isinstance(r, SnomedAttributeSet):
450 if self.attrsets:
451 raise ValueError("Only one SnomedAttributeSet allowed "
452 "to SnomedRefinement")
453 self.attrsets.append(r)
454 elif isinstance(r, SnomedAttributeGroup):
455 self.attrgroups.append(r)
456 else:
457 raise ValueError(f"Unknown object to SnomedRefinement: {r!r}")
459 def as_string(self, longform: bool = True) -> str:
460 # Docstring in base class.
461 # Ungrouped before grouped; see 6.5 in "SNOMED CT Compositional Grammar
462 # v2.3.1"
463 sep = COMMA + " "
464 return sep.join(x.as_string(longform)
465 for x in self.attrsets + self.attrgroups)
467 def __repr__(self) -> str:
468 return simple_repr(self, ["attrsets", "attrgroups"])
471class SnomedExpression(SnomedBase):
472 """
473 An expression containing several SNOMED-CT codes in relationships.
474 """
475 def __init__(self,
476 focus_concept: Union[SnomedConcept, SnomedFocusConcept],
477 refinement: Union[SnomedRefinement,
478 DICT_ATTR_TYPE,
479 List[Union[SnomedAttributeSet,
480 SnomedAttributeGroup]]] = None) \
481 -> None:
482 """
483 Args:
484 focus_concept: the core concept(s); a :class:`SnomedFocusConcept`
485 refinement: optional additional information; a
486 :class:`SnomedRefinement` or a dictionary or list that can be
487 converted to one
488 """
489 if isinstance(focus_concept, SnomedConcept):
490 focus_concept = SnomedFocusConcept(focus_concept)
491 assert isinstance(focus_concept, SnomedFocusConcept)
492 if isinstance(refinement, (dict, list)):
493 refinement = SnomedRefinement(refinement)
494 if refinement is not None:
495 assert isinstance(refinement, SnomedRefinement)
496 self.focus_concept = focus_concept
497 self.refinement = refinement
499 def as_string(self, longform: bool = True) -> str:
500 # Docstring in base class.
501 s = self.focus_concept.as_string(longform)
502 if self.refinement:
503 s += " " + COLON + " " + self.refinement.as_string(longform)
504 return s
506 def __repr__(self) -> str:
507 return simple_repr(self, ["focus_concept", "refinement"])