Coverage for cc_modules/cc_string.py: 61%
98 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-08 23:14 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-08 23:14 +0000
1#!/usr/bin/env python
3"""
4camcops_server/cc_modules/cc_string.py
6===============================================================================
8 Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
9 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
11 This file is part of CamCOPS.
13 CamCOPS is free software: you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 CamCOPS is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
26===============================================================================
28**Manage the "extra strings" that the server reads from XML files. The server
29uses these for displaying tasks, and provides them to client devices.**
31"""
33import glob
34import logging
35from typing import Dict, List
36import xml.etree.cElementTree as ElementTree
38# ... cElementTree is a faster implementation
39# ... http://docs.python.org/2/library/xml.etree.elementtree.html
40# ... http://effbot.org/zone/celementtree.htm
41from xml.etree.ElementTree import Element, tostring
43from cardinal_pythonlib.logs import BraceStyleAdapter
44from cardinal_pythonlib.text import unescape_newlines
46from camcops_server.cc_modules.cc_cache import cache_region_static, fkg
47from camcops_server.cc_modules.cc_config import get_config
48from camcops_server.cc_modules.cc_exception import raise_runtime_error
50log = BraceStyleAdapter(logging.getLogger(__name__))
53APPSTRING_TASKNAME = "camcops"
54MISSING_LOCALE = ""
57# =============================================================================
58# XML helper functions
59# =============================================================================
62def text_contents(e: Element, plain: bool = False, strip: bool = True) -> str:
63 """
64 Extract the exact text contents of an XML element, including any XML/HTML
65 tags within it.
67 A normal string looks like
69 .. code-block:: xml
71 <string name="stringname">words words words</string>
73 and we extract its contents ("words words words") with
75 .. code-block:: python
77 e.text
79 However, for this:
81 .. code-block:: xml
83 <string name="stringname">words <b>bold words</b> words</string>
85 we want to extract ``words <b>bold words</b> words`` and that's a little
86 trickier. This function does that.
88 Args:
89 e: the :class:`Element` to read
90 plain: remove all HTML/XML tags?
91 strip: strip leading/trailing whitespace?
93 Returns:
94 the text contents of the element
95 """
96 n_children = len(e)
97 if n_children == 0:
98 result = e.text or ""
99 elif plain:
100 result = "".join(e.itertext()) # e.g. "words bold words words"
101 else:
102 result = (
103 (e.text or "")
104 + "".join(tostring(child, encoding="unicode") for child in e)
105 + (e.tail or "")
106 )
107 if strip:
108 return result.strip()
109 else:
110 return result
113# =============================================================================
114# Localization strings
115# =============================================================================
116# In a change to thinking... Pyramid emphasizes: NO MUTABLE GLOBAL STATE.
117# https://docs.pylonsproject.org/projects/pyramid/en/latest/narr/advanced-features.html # noqa
118# This is a good thing. But it means that:
119# - because we configure our XML files in our config...
120# - and in principle even two different threads coming here may have different
121# configs...
122# - ... that string requests need to be attached to a Pyramid Request.
125class AS(object):
126 """
127 List of appstrings present in ``camcops.xml``.
129 Should match ``appstrings.cpp`` in the client, and of course
130 ``camcops.xml`` itself.
131 """
133 # -------------------------------------------------------------------------
134 # NHS Data Dictionary elements
135 # -------------------------------------------------------------------------
137 NHS_PERSON_MARITAL_STATUS_CODE_S = "nhs_person_marital_status_code_S"
138 NHS_PERSON_MARITAL_STATUS_CODE_M = "nhs_person_marital_status_code_M"
139 NHS_PERSON_MARITAL_STATUS_CODE_D = "nhs_person_marital_status_code_D"
140 NHS_PERSON_MARITAL_STATUS_CODE_W = "nhs_person_marital_status_code_W"
141 NHS_PERSON_MARITAL_STATUS_CODE_P = "nhs_person_marital_status_code_P"
142 NHS_PERSON_MARITAL_STATUS_CODE_N = "nhs_person_marital_status_code_N"
144 NHS_ETHNIC_CATEGORY_CODE_A = "nhs_ethnic_category_code_A"
145 NHS_ETHNIC_CATEGORY_CODE_B = "nhs_ethnic_category_code_B"
146 NHS_ETHNIC_CATEGORY_CODE_C = "nhs_ethnic_category_code_C"
147 NHS_ETHNIC_CATEGORY_CODE_D = "nhs_ethnic_category_code_D"
148 NHS_ETHNIC_CATEGORY_CODE_E = "nhs_ethnic_category_code_E"
149 NHS_ETHNIC_CATEGORY_CODE_F = "nhs_ethnic_category_code_F"
150 NHS_ETHNIC_CATEGORY_CODE_G = "nhs_ethnic_category_code_G"
151 NHS_ETHNIC_CATEGORY_CODE_H = "nhs_ethnic_category_code_H"
152 NHS_ETHNIC_CATEGORY_CODE_J = "nhs_ethnic_category_code_J"
153 NHS_ETHNIC_CATEGORY_CODE_K = "nhs_ethnic_category_code_K"
154 NHS_ETHNIC_CATEGORY_CODE_L = "nhs_ethnic_category_code_L"
155 NHS_ETHNIC_CATEGORY_CODE_M = "nhs_ethnic_category_code_M"
156 NHS_ETHNIC_CATEGORY_CODE_N = "nhs_ethnic_category_code_N"
157 NHS_ETHNIC_CATEGORY_CODE_P = "nhs_ethnic_category_code_P"
158 NHS_ETHNIC_CATEGORY_CODE_R = "nhs_ethnic_category_code_R"
159 NHS_ETHNIC_CATEGORY_CODE_S = "nhs_ethnic_category_code_S"
160 NHS_ETHNIC_CATEGORY_CODE_Z = "nhs_ethnic_category_code_Z"
162 # -------------------------------------------------------------------------
163 # String elements for specific restricted tasks (see camcops.xml)
164 # -------------------------------------------------------------------------
166 BDI_WHICH_SCALE = "bdi_which_scale"
167 GAF_SCORE = "gaf_score"
168 HADS_ANXIETY_SCORE = "hads_anxiety_score"
169 HADS_DEPRESSION_SCORE = "hads_depression_score"
170 IESR_A_PREFIX = "iesr_a"
171 WSAS_A_PREFIX = "wsas_a"
172 ZBI_A_PREFIX = "zbi_a"
174 # -------------------------------------------------------------------------
175 # Strings shared across several tasks
176 # -------------------------------------------------------------------------
178 DATA_COLLECTION_ONLY = "data_collection_only"
179 DATE_PERTAINS_TO = "date_pertains_to"
180 ICD10_SYMPTOMATIC_DISCLAIMER = "icd10_symptomatic_disclaimer"
181 SATIS_BAD_Q = "satis_bad_q"
182 SATIS_BAD_S = "satis_bad_s"
183 SATIS_GOOD_Q = "satis_good_q"
184 SATIS_GOOD_S = "satis_good_s"
185 SATIS_PT_RATING_Q = "satis_pt_rating_q"
186 SATIS_REF_GEN_RATING_Q = "satis_ref_gen_rating_q"
187 SATIS_REF_SPEC_RATING_Q = "satis_ref_spec_rating_q"
188 SATIS_RATING_A_PREFIX = "satis_rating_a"
189 SATIS_SERVICE_BEING_RATED = "satis_service_being_rated"
192@cache_region_static.cache_on_arguments(function_key_generator=fkg)
193def all_extra_strings_as_dicts(
194 config_filename: str,
195) -> Dict[str, Dict[str, Dict[str, str]]]:
196 r"""
197 Returns strings from the all the extra XML string files.
199 The result is cached (via a proper cache).
201 Args:
202 config_filename: a CamCOPS config filename
204 Returns: a dictionary like
206 .. code-block:: none
208 {
209 'task1': {
210 'stringname1': {
211 "en-GB": "a string in British English",
212 "da-DK": "a string in Danish",
213 },
214 'stringname1': {
215 },
216 },
217 'task2: {
218 ...
219 },
220 ...
221 }
223 ... in other words a ``Dict[taskname: str, Dict[stringname: str,
224 Dict[locale: str, stringvalue: str]]]``.
226 For example, ``result['phq9']['q5'][locale] == "5. Poor appetite or
227 overeating"``. There is also a top-level dictionary with the key
228 ``APPSTRING_TASKNAME``.
230 **XML format**
232 The extra string files should look like this:
234 .. code-block:: xml
236 <?xml version="1.0" encoding="UTF-8"?>
237 <resources>
238 <task name="TASK_1" locale="en_GB">
239 <string name="NAME_1">VALUE</string>
240 <string name="NAME_2">VALUE WITH\nNEWLINE</string>
241 <!-- ... -->
242 </task>
243 <!-- ... -->
244 </resources>
246 If the ``language`` attribute is not specified, a language tag of ``""`` is
247 used internally and will be the fallback position if nothing else is found.
249 """
250 _ = """
251 The extra string files looked like this prior to 2019-05-05:
253 .. code-block:: xml
255 <?xml version="1.0" encoding="UTF-8"?>
256 <resources>
257 <task name="TASK_1">
258 <string name="NAME_1">VALUE</string>
259 <string name="NAME_2">VALUE WITH\nNEWLINE</string>
260 <!-- ... -->
261 </task>
262 <!-- ... -->
263 </resources>
265 Designing XML:
267 - an "element" looks like ``<thing>blah</thing>``, or ``<thing />``;
268 the "element name" is "thing" in this example, and "blah" is called the
269 "content".
270 - the delimiters of an element are tags: start tags such as ``<thing>``,
271 end tags such as ``</thing>``, or empty-element tags such as
272 ``<thing />``.
273 - an "attribute" is a name-value pair, e.g. ``<tagname attrname=value
274 ...>``; "attrname" in this example is called the "attribute name".
275 - So you can add information via the element structure or the attribute
276 system.
278 So, as we add language support (2019-05-05), we start with:
280 - element names for types of information (task, string)
281 - attribute values for labelling the content
282 - content for the string data
284 There are many ways we could add language information. Adding an attribute
285 to every string seems verbose, though. We could use one of these systems:
287 .. code-block:: xml
289 <?xml version="1.0" encoding="UTF-8"?>
290 <resources>
291 <task name="TASK_1">
292 <locale name="en_GB">
293 <string name="NAME_1">VALUE</string>
294 <string name="NAME_2">VALUE WITH\nNEWLINE</string>
295 <!-- ... -->
296 </locale>
297 </task>
298 <!-- ... -->
299 </resources>
301 .. code-block:: xml
303 <?xml version="1.0" encoding="UTF-8"?>
304 <resources>
305 <task name="TASK_1" locale="en_GB">
306 <string name="NAME_1">VALUE</string>
307 <string name="NAME_2">VALUE WITH\nNEWLINE</string>
308 <!-- ... -->
309 </task>
310 <!-- ... -->
311 </resources>
313 The second seems a bit clearer (fewer levels). Let's do that. It also makes
314 all existing XML files automatically compatible (with minor code
315 adaptations). If the ``locale`` parameter is missing, strings go into a
316 "no-locale" state and serve as the default.
317 """
319 cfg = get_config(config_filename)
320 assert cfg.extra_string_files is not None
321 filenames = [] # type: List [str]
322 for filespec in cfg.extra_string_files:
323 possibles = glob.glob(filespec)
324 filenames.extend(possibles)
325 filenames = sorted(set(filenames)) # just unique ones
326 if not filenames:
327 raise_runtime_error(
328 "No CamCOPS extra string files specified; "
329 "config is misconfigured; aborting"
330 )
331 allstrings = {} # type: Dict[str, Dict[str, Dict[str, str]]]
332 for filename in filenames:
333 log.info("Loading string XML file: {}", filename)
334 parser = ElementTree.XMLParser(encoding="UTF-8")
335 tree = ElementTree.parse(filename, parser=parser)
336 root = tree.getroot()
337 # We'll search via an XPath. See
338 # https://docs.python.org/3.7/library/xml.etree.elementtree.html#xpath-support # noqa
339 for taskroot in root.findall("./task[@name]"):
340 # ... "all elements with the tag 'task' that have an attribute
341 # named 'name'"
342 taskname = taskroot.attrib.get("name")
343 locale = taskroot.attrib.get("locale", MISSING_LOCALE)
344 taskstrings = allstrings.setdefault(
345 taskname, {}
346 ) # type: Dict[str, Dict[str, str]] # noqa
347 for e in taskroot.findall("./string[@name]"):
348 # ... "all elements with the tag 'string' that have an
349 # attribute named 'name'"
350 stringname = e.attrib.get("name")
351 final_string = text_contents(e)
352 final_string = unescape_newlines(final_string)
353 langversions = taskstrings.setdefault(
354 stringname, {}
355 ) # type: Dict[str, str] # noqa
356 langversions[locale] = final_string
358 if APPSTRING_TASKNAME not in allstrings:
359 raise_runtime_error(
360 "Extra string files do not contain core CamCOPS strings; "
361 "config is misconfigured; aborting"
362 )
364 return allstrings