Coverage for cc_modules/cc_string.py : 96%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
3"""
4camcops_server/cc_modules/cc_string.py
6===============================================================================
8 Copyright (C) 2012-2020 Rudolf Cardinal (rudolf@pobox.com).
10 This file is part of CamCOPS.
12 CamCOPS is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
17 CamCOPS is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
25===============================================================================
27**Manage the "extra strings" that the server reads from XML files. The server
28uses these for displaying tasks, and provides them to client devices.**
30"""
32import glob
33import logging
34from typing import Dict, List
35import xml.etree.cElementTree as ElementTree
36# ... cElementTree is a faster implementation
37# ... http://docs.python.org/2/library/xml.etree.elementtree.html
38# ... http://effbot.org/zone/celementtree.htm
39from xml.etree.ElementTree import Element, tostring
41from cardinal_pythonlib.logs import BraceStyleAdapter
42from cardinal_pythonlib.text import unescape_newlines
44from camcops_server.cc_modules.cc_cache import cache_region_static, fkg
45from camcops_server.cc_modules.cc_config import get_config
46from camcops_server.cc_modules.cc_exception import raise_runtime_error
48log = BraceStyleAdapter(logging.getLogger(__name__))
51APPSTRING_TASKNAME = "camcops"
52MISSING_LOCALE = ""
55# =============================================================================
56# XML helper functions
57# =============================================================================
59def text_contents(e: Element, plain: bool = False, strip: bool = True) -> str:
60 """
61 Extract the exact text contents of an XML element, including any XML/HTML
62 tags within it.
64 A normal string looks like
66 .. code-block:: xml
68 <string name="stringname">words words words</string>
70 and we extract its contents ("words words words") with
72 .. code-block:: python
74 e.text
76 However, for this:
78 .. code-block:: xml
80 <string name="stringname">words <b>bold words</b> words</string>
82 we want to extract ``words <b>bold words</b> words`` and that's a little
83 trickier. This function does that.
85 Args:
86 e: the :class:`Element` to read
87 plain: remove all HTML/XML tags?
88 strip: strip leading/trailing whitespace?
90 Returns:
91 the text contents of the element
92 """
93 n_children = len(e)
94 if n_children == 0:
95 result = e.text or ""
96 elif plain:
97 result = "".join(e.itertext()) # e.g. "words bold words words"
98 else:
99 result = (
100 (e.text or "") +
101 "".join(tostring(child, encoding="unicode") for child in e) +
102 (e.tail or "")
103 )
104 if strip:
105 return result.strip()
106 else:
107 return result
110# =============================================================================
111# Localization strings
112# =============================================================================
113# In a change to thinking... Pyramid emphasizes: NO MUTABLE GLOBAL STATE.
114# https://docs.pylonsproject.org/projects/pyramid/en/latest/narr/advanced-features.html # noqa
115# This is a good thing. But it means that:
116# - because we configure our XML files in our config...
117# - and in principle even two different threads coming here may have different
118# configs...
119# - ... that string requests need to be attached to a Pyramid Request.
121class AS(object):
122 """
123 List of appstrings present in ``camcops.xml``.
125 Should match ``appstrings.cpp`` in the client, and of course
126 ``camcops.xml`` itself.
127 """
129 # -------------------------------------------------------------------------
130 # NHS Data Dictionary elements
131 # -------------------------------------------------------------------------
133 NHS_PERSON_MARITAL_STATUS_CODE_S = "nhs_person_marital_status_code_S"
134 NHS_PERSON_MARITAL_STATUS_CODE_M = "nhs_person_marital_status_code_M"
135 NHS_PERSON_MARITAL_STATUS_CODE_D = "nhs_person_marital_status_code_D"
136 NHS_PERSON_MARITAL_STATUS_CODE_W = "nhs_person_marital_status_code_W"
137 NHS_PERSON_MARITAL_STATUS_CODE_P = "nhs_person_marital_status_code_P"
138 NHS_PERSON_MARITAL_STATUS_CODE_N = "nhs_person_marital_status_code_N"
140 NHS_ETHNIC_CATEGORY_CODE_A = "nhs_ethnic_category_code_A"
141 NHS_ETHNIC_CATEGORY_CODE_B = "nhs_ethnic_category_code_B"
142 NHS_ETHNIC_CATEGORY_CODE_C = "nhs_ethnic_category_code_C"
143 NHS_ETHNIC_CATEGORY_CODE_D = "nhs_ethnic_category_code_D"
144 NHS_ETHNIC_CATEGORY_CODE_E = "nhs_ethnic_category_code_E"
145 NHS_ETHNIC_CATEGORY_CODE_F = "nhs_ethnic_category_code_F"
146 NHS_ETHNIC_CATEGORY_CODE_G = "nhs_ethnic_category_code_G"
147 NHS_ETHNIC_CATEGORY_CODE_H = "nhs_ethnic_category_code_H"
148 NHS_ETHNIC_CATEGORY_CODE_J = "nhs_ethnic_category_code_J"
149 NHS_ETHNIC_CATEGORY_CODE_K = "nhs_ethnic_category_code_K"
150 NHS_ETHNIC_CATEGORY_CODE_L = "nhs_ethnic_category_code_L"
151 NHS_ETHNIC_CATEGORY_CODE_M = "nhs_ethnic_category_code_M"
152 NHS_ETHNIC_CATEGORY_CODE_N = "nhs_ethnic_category_code_N"
153 NHS_ETHNIC_CATEGORY_CODE_P = "nhs_ethnic_category_code_P"
154 NHS_ETHNIC_CATEGORY_CODE_R = "nhs_ethnic_category_code_R"
155 NHS_ETHNIC_CATEGORY_CODE_S = "nhs_ethnic_category_code_S"
156 NHS_ETHNIC_CATEGORY_CODE_Z = "nhs_ethnic_category_code_Z"
158 # -------------------------------------------------------------------------
159 # String elements for specific restricted tasks (see camcops.xml)
160 # -------------------------------------------------------------------------
162 BDI_WHICH_SCALE = "bdi_which_scale"
163 GAF_SCORE = "gaf_score"
164 HADS_ANXIETY_SCORE = "hads_anxiety_score"
165 HADS_DEPRESSION_SCORE = "hads_depression_score"
166 IESR_A_PREFIX = "iesr_a"
167 WSAS_A_PREFIX = "wsas_a"
168 ZBI_A_PREFIX = "zbi_a"
170 # -------------------------------------------------------------------------
171 # Strings shared across several tasks
172 # -------------------------------------------------------------------------
174 DATA_COLLECTION_ONLY = "data_collection_only"
175 DATE_PERTAINS_TO = "date_pertains_to"
176 ICD10_SYMPTOMATIC_DISCLAIMER = "icd10_symptomatic_disclaimer"
177 SATIS_BAD_Q = "satis_bad_q"
178 SATIS_BAD_S = "satis_bad_s"
179 SATIS_GOOD_Q = "satis_good_q"
180 SATIS_GOOD_S = "satis_good_s"
181 SATIS_PT_RATING_Q = "satis_pt_rating_q"
182 SATIS_REF_GEN_RATING_Q = "satis_ref_gen_rating_q"
183 SATIS_REF_SPEC_RATING_Q = "satis_ref_spec_rating_q"
184 SATIS_RATING_A_PREFIX = "satis_rating_a"
185 SATIS_SERVICE_BEING_RATED = "satis_service_being_rated"
188@cache_region_static.cache_on_arguments(function_key_generator=fkg)
189def all_extra_strings_as_dicts(
190 config_filename: str) -> Dict[str, Dict[str, Dict[str, str]]]:
191 r"""
192 Returns strings from the all the extra XML string files.
194 The result is cached (via a proper cache).
196 Args:
197 config_filename: a CamCOPS config filename
199 Returns: a dictionary like
201 .. code-block:: none
203 {
204 'task1': {
205 'stringname1': {
206 "en-GB": "a string in British English",
207 "da-DK": "a string in Danish",
208 },
209 'stringname1': {
210 },
211 },
212 'task2: {
213 ...
214 },
215 ...
216 }
218 ... in other words a ``Dict[taskname: str, Dict[stringname: str,
219 Dict[locale: str, stringvalue: str]]]``.
221 For example, ``result['phq9']['q5'][locale] == "5. Poor appetite or
222 overeating"``. There is also a top-level dictionary with the key
223 ``APPSTRING_TASKNAME``.
225 **XML format**
227 The extra string files should look like this:
229 .. code-block:: xml
231 <?xml version="1.0" encoding="UTF-8"?>
232 <resources>
233 <task name="TASK_1" locale="en_GB">
234 <string name="NAME_1">VALUE</string>
235 <string name="NAME_2">VALUE WITH\nNEWLINE</string>
236 <!-- ... -->
237 </task>
238 <!-- ... -->
239 </resources>
241 If the ``language`` attribute is not specified, a language tag of ``""`` is
242 used internally and will be the fallback position if nothing else is found.
244 """
245 _ = """
246 The extra string files looked like this prior to 2019-05-05:
248 .. code-block:: xml
250 <?xml version="1.0" encoding="UTF-8"?>
251 <resources>
252 <task name="TASK_1">
253 <string name="NAME_1">VALUE</string>
254 <string name="NAME_2">VALUE WITH\nNEWLINE</string>
255 <!-- ... -->
256 </task>
257 <!-- ... -->
258 </resources>
260 Designing XML:
262 - an "element" looks like ``<thing>blah</thing>``, or ``<thing />``;
263 the "element name" is "thing" in this example, and "blah" is called the
264 "content".
265 - the delimiters of an element are tags: start tags such as ``<thing>``,
266 end tags such as ``</thing>``, or empty-element tags such as
267 ``<thing />``.
268 - an "attribute" is a name-value pair, e.g. ``<tagname attrname=value
269 ...>``; "attrname" in this example is called the "attribute name".
270 - So you can add information via the element structure or the attribute
271 system.
273 So, as we add language support (2019-05-05), we start with:
275 - element names for types of information (task, string)
276 - attribute values for labelling the content
277 - content for the string data
279 There are many ways we could add language information. Adding an attribute
280 to every string seems verbose, though. We could use one of these systems:
282 .. code-block:: xml
284 <?xml version="1.0" encoding="UTF-8"?>
285 <resources>
286 <task name="TASK_1">
287 <locale name="en_GB">
288 <string name="NAME_1">VALUE</string>
289 <string name="NAME_2">VALUE WITH\nNEWLINE</string>
290 <!-- ... -->
291 </locale>
292 </task>
293 <!-- ... -->
294 </resources>
296 .. code-block:: xml
298 <?xml version="1.0" encoding="UTF-8"?>
299 <resources>
300 <task name="TASK_1" locale="en_GB">
301 <string name="NAME_1">VALUE</string>
302 <string name="NAME_2">VALUE WITH\nNEWLINE</string>
303 <!-- ... -->
304 </task>
305 <!-- ... -->
306 </resources>
308 The second seems a bit clearer (fewer levels). Let's do that. It also makes
309 all existing XML files automatically compatible (with minor code
310 adaptations). If the ``locale`` parameter is missing, strings go into a
311 "no-locale" state and serve as the default.
312 """
314 cfg = get_config(config_filename)
315 assert cfg.extra_string_files is not None
316 filenames = [] # type: List [str]
317 for filespec in cfg.extra_string_files:
318 possibles = glob.glob(filespec)
319 filenames.extend(possibles)
320 filenames = sorted(set(filenames)) # just unique ones
321 if not filenames:
322 raise_runtime_error("No CamCOPS extra string files specified; "
323 "config is misconfigured; aborting")
324 allstrings = {} # type: Dict[str, Dict[str, Dict[str, str]]]
325 for filename in filenames:
326 log.info("Loading string XML file: {}", filename)
327 parser = ElementTree.XMLParser(encoding="UTF-8")
328 tree = ElementTree.parse(filename, parser=parser)
329 root = tree.getroot()
330 # We'll search via an XPath. See
331 # https://docs.python.org/3.7/library/xml.etree.elementtree.html#xpath-support # noqa
332 for taskroot in root.findall("./task[@name]"):
333 # ... "all elements with the tag 'task' that have an attribute
334 # named 'name'"
335 taskname = taskroot.attrib.get("name")
336 locale = taskroot.attrib.get("locale", MISSING_LOCALE)
337 taskstrings = allstrings.setdefault(taskname, {}) # type: Dict[str, Dict[str, str]] # noqa
338 for e in taskroot.findall("./string[@name]"):
339 # ... "all elements with the tag 'string' that have an attribute
340 # named 'name'"
341 stringname = e.attrib.get("name")
342 final_string = text_contents(e)
343 final_string = unescape_newlines(final_string)
344 langversions = taskstrings.setdefault(stringname, {}) # type: Dict[str, str] # noqa
345 langversions[locale] = final_string
347 if APPSTRING_TASKNAME not in allstrings:
348 raise_runtime_error(
349 "Extra string files do not contain core CamCOPS strings; "
350 "config is misconfigured; aborting")
352 return allstrings