Coverage for cc_modules/cc_string.py: 61%

98 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-08 23:14 +0000

1#!/usr/bin/env python 

2 

3""" 

4camcops_server/cc_modules/cc_string.py 

5 

6=============================================================================== 

7 

8 Copyright (C) 2012, University of Cambridge, Department of Psychiatry. 

9 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

10 

11 This file is part of CamCOPS. 

12 

13 CamCOPS is free software: you can redistribute it and/or modify 

14 it under the terms of the GNU General Public License as published by 

15 the Free Software Foundation, either version 3 of the License, or 

16 (at your option) any later version. 

17 

18 CamCOPS is distributed in the hope that it will be useful, 

19 but WITHOUT ANY WARRANTY; without even the implied warranty of 

20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

21 GNU General Public License for more details. 

22 

23 You should have received a copy of the GNU General Public License 

24 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>. 

25 

26=============================================================================== 

27 

28**Manage the "extra strings" that the server reads from XML files. The server 

29uses these for displaying tasks, and provides them to client devices.** 

30 

31""" 

32 

33import glob 

34import logging 

35from typing import Dict, List 

36import xml.etree.cElementTree as ElementTree 

37 

38# ... cElementTree is a faster implementation 

39# ... http://docs.python.org/2/library/xml.etree.elementtree.html 

40# ... http://effbot.org/zone/celementtree.htm 

41from xml.etree.ElementTree import Element, tostring 

42 

43from cardinal_pythonlib.logs import BraceStyleAdapter 

44from cardinal_pythonlib.text import unescape_newlines 

45 

46from camcops_server.cc_modules.cc_cache import cache_region_static, fkg 

47from camcops_server.cc_modules.cc_config import get_config 

48from camcops_server.cc_modules.cc_exception import raise_runtime_error 

49 

50log = BraceStyleAdapter(logging.getLogger(__name__)) 

51 

52 

53APPSTRING_TASKNAME = "camcops" 

54MISSING_LOCALE = "" 

55 

56 

57# ============================================================================= 

58# XML helper functions 

59# ============================================================================= 

60 

61 

62def text_contents(e: Element, plain: bool = False, strip: bool = True) -> str: 

63 """ 

64 Extract the exact text contents of an XML element, including any XML/HTML 

65 tags within it. 

66 

67 A normal string looks like 

68 

69 .. code-block:: xml 

70 

71 <string name="stringname">words words words</string> 

72 

73 and we extract its contents ("words words words") with 

74 

75 .. code-block:: python 

76 

77 e.text 

78 

79 However, for this: 

80 

81 .. code-block:: xml 

82 

83 <string name="stringname">words <b>bold words</b> words</string> 

84 

85 we want to extract ``words <b>bold words</b> words`` and that's a little 

86 trickier. This function does that. 

87 

88 Args: 

89 e: the :class:`Element` to read 

90 plain: remove all HTML/XML tags? 

91 strip: strip leading/trailing whitespace? 

92 

93 Returns: 

94 the text contents of the element 

95 """ 

96 n_children = len(e) 

97 if n_children == 0: 

98 result = e.text or "" 

99 elif plain: 

100 result = "".join(e.itertext()) # e.g. "words bold words words" 

101 else: 

102 result = ( 

103 (e.text or "") 

104 + "".join(tostring(child, encoding="unicode") for child in e) 

105 + (e.tail or "") 

106 ) 

107 if strip: 

108 return result.strip() 

109 else: 

110 return result 

111 

112 

113# ============================================================================= 

114# Localization strings 

115# ============================================================================= 

116# In a change to thinking... Pyramid emphasizes: NO MUTABLE GLOBAL STATE. 

117# https://docs.pylonsproject.org/projects/pyramid/en/latest/narr/advanced-features.html # noqa 

118# This is a good thing. But it means that: 

119# - because we configure our XML files in our config... 

120# - and in principle even two different threads coming here may have different 

121# configs... 

122# - ... that string requests need to be attached to a Pyramid Request. 

123 

124 

125class AS(object): 

126 """ 

127 List of appstrings present in ``camcops.xml``. 

128 

129 Should match ``appstrings.cpp`` in the client, and of course 

130 ``camcops.xml`` itself. 

131 """ 

132 

133 # ------------------------------------------------------------------------- 

134 # NHS Data Dictionary elements 

135 # ------------------------------------------------------------------------- 

136 

137 NHS_PERSON_MARITAL_STATUS_CODE_S = "nhs_person_marital_status_code_S" 

138 NHS_PERSON_MARITAL_STATUS_CODE_M = "nhs_person_marital_status_code_M" 

139 NHS_PERSON_MARITAL_STATUS_CODE_D = "nhs_person_marital_status_code_D" 

140 NHS_PERSON_MARITAL_STATUS_CODE_W = "nhs_person_marital_status_code_W" 

141 NHS_PERSON_MARITAL_STATUS_CODE_P = "nhs_person_marital_status_code_P" 

142 NHS_PERSON_MARITAL_STATUS_CODE_N = "nhs_person_marital_status_code_N" 

143 

144 NHS_ETHNIC_CATEGORY_CODE_A = "nhs_ethnic_category_code_A" 

145 NHS_ETHNIC_CATEGORY_CODE_B = "nhs_ethnic_category_code_B" 

146 NHS_ETHNIC_CATEGORY_CODE_C = "nhs_ethnic_category_code_C" 

147 NHS_ETHNIC_CATEGORY_CODE_D = "nhs_ethnic_category_code_D" 

148 NHS_ETHNIC_CATEGORY_CODE_E = "nhs_ethnic_category_code_E" 

149 NHS_ETHNIC_CATEGORY_CODE_F = "nhs_ethnic_category_code_F" 

150 NHS_ETHNIC_CATEGORY_CODE_G = "nhs_ethnic_category_code_G" 

151 NHS_ETHNIC_CATEGORY_CODE_H = "nhs_ethnic_category_code_H" 

152 NHS_ETHNIC_CATEGORY_CODE_J = "nhs_ethnic_category_code_J" 

153 NHS_ETHNIC_CATEGORY_CODE_K = "nhs_ethnic_category_code_K" 

154 NHS_ETHNIC_CATEGORY_CODE_L = "nhs_ethnic_category_code_L" 

155 NHS_ETHNIC_CATEGORY_CODE_M = "nhs_ethnic_category_code_M" 

156 NHS_ETHNIC_CATEGORY_CODE_N = "nhs_ethnic_category_code_N" 

157 NHS_ETHNIC_CATEGORY_CODE_P = "nhs_ethnic_category_code_P" 

158 NHS_ETHNIC_CATEGORY_CODE_R = "nhs_ethnic_category_code_R" 

159 NHS_ETHNIC_CATEGORY_CODE_S = "nhs_ethnic_category_code_S" 

160 NHS_ETHNIC_CATEGORY_CODE_Z = "nhs_ethnic_category_code_Z" 

161 

162 # ------------------------------------------------------------------------- 

163 # String elements for specific restricted tasks (see camcops.xml) 

164 # ------------------------------------------------------------------------- 

165 

166 BDI_WHICH_SCALE = "bdi_which_scale" 

167 GAF_SCORE = "gaf_score" 

168 HADS_ANXIETY_SCORE = "hads_anxiety_score" 

169 HADS_DEPRESSION_SCORE = "hads_depression_score" 

170 IESR_A_PREFIX = "iesr_a" 

171 WSAS_A_PREFIX = "wsas_a" 

172 ZBI_A_PREFIX = "zbi_a" 

173 

174 # ------------------------------------------------------------------------- 

175 # Strings shared across several tasks 

176 # ------------------------------------------------------------------------- 

177 

178 DATA_COLLECTION_ONLY = "data_collection_only" 

179 DATE_PERTAINS_TO = "date_pertains_to" 

180 ICD10_SYMPTOMATIC_DISCLAIMER = "icd10_symptomatic_disclaimer" 

181 SATIS_BAD_Q = "satis_bad_q" 

182 SATIS_BAD_S = "satis_bad_s" 

183 SATIS_GOOD_Q = "satis_good_q" 

184 SATIS_GOOD_S = "satis_good_s" 

185 SATIS_PT_RATING_Q = "satis_pt_rating_q" 

186 SATIS_REF_GEN_RATING_Q = "satis_ref_gen_rating_q" 

187 SATIS_REF_SPEC_RATING_Q = "satis_ref_spec_rating_q" 

188 SATIS_RATING_A_PREFIX = "satis_rating_a" 

189 SATIS_SERVICE_BEING_RATED = "satis_service_being_rated" 

190 

191 

192@cache_region_static.cache_on_arguments(function_key_generator=fkg) 

193def all_extra_strings_as_dicts( 

194 config_filename: str, 

195) -> Dict[str, Dict[str, Dict[str, str]]]: 

196 r""" 

197 Returns strings from the all the extra XML string files. 

198 

199 The result is cached (via a proper cache). 

200 

201 Args: 

202 config_filename: a CamCOPS config filename 

203 

204 Returns: a dictionary like 

205 

206 .. code-block:: none 

207 

208 { 

209 'task1': { 

210 'stringname1': { 

211 "en-GB": "a string in British English", 

212 "da-DK": "a string in Danish", 

213 }, 

214 'stringname1': { 

215 }, 

216 }, 

217 'task2: { 

218 ... 

219 }, 

220 ... 

221 } 

222 

223 ... in other words a ``Dict[taskname: str, Dict[stringname: str, 

224 Dict[locale: str, stringvalue: str]]]``. 

225 

226 For example, ``result['phq9']['q5'][locale] == "5. Poor appetite or 

227 overeating"``. There is also a top-level dictionary with the key 

228 ``APPSTRING_TASKNAME``. 

229 

230 **XML format** 

231 

232 The extra string files should look like this: 

233 

234 .. code-block:: xml 

235 

236 <?xml version="1.0" encoding="UTF-8"?> 

237 <resources> 

238 <task name="TASK_1" locale="en_GB"> 

239 <string name="NAME_1">VALUE</string> 

240 <string name="NAME_2">VALUE WITH\nNEWLINE</string> 

241 <!-- ... --> 

242 </task> 

243 <!-- ... --> 

244 </resources> 

245 

246 If the ``language`` attribute is not specified, a language tag of ``""`` is 

247 used internally and will be the fallback position if nothing else is found. 

248 

249 """ 

250 _ = """ 

251 The extra string files looked like this prior to 2019-05-05: 

252 

253 .. code-block:: xml 

254 

255 <?xml version="1.0" encoding="UTF-8"?> 

256 <resources> 

257 <task name="TASK_1"> 

258 <string name="NAME_1">VALUE</string> 

259 <string name="NAME_2">VALUE WITH\nNEWLINE</string> 

260 <!-- ... --> 

261 </task> 

262 <!-- ... --> 

263 </resources> 

264 

265 Designing XML: 

266 

267 - an "element" looks like ``<thing>blah</thing>``, or ``<thing />``; 

268 the "element name" is "thing" in this example, and "blah" is called the 

269 "content". 

270 - the delimiters of an element are tags: start tags such as ``<thing>``, 

271 end tags such as ``</thing>``, or empty-element tags such as 

272 ``<thing />``. 

273 - an "attribute" is a name-value pair, e.g. ``<tagname attrname=value 

274 ...>``; "attrname" in this example is called the "attribute name". 

275 - So you can add information via the element structure or the attribute 

276 system. 

277 

278 So, as we add language support (2019-05-05), we start with: 

279 

280 - element names for types of information (task, string) 

281 - attribute values for labelling the content 

282 - content for the string data 

283 

284 There are many ways we could add language information. Adding an attribute 

285 to every string seems verbose, though. We could use one of these systems: 

286 

287 .. code-block:: xml 

288 

289 <?xml version="1.0" encoding="UTF-8"?> 

290 <resources> 

291 <task name="TASK_1"> 

292 <locale name="en_GB"> 

293 <string name="NAME_1">VALUE</string> 

294 <string name="NAME_2">VALUE WITH\nNEWLINE</string> 

295 <!-- ... --> 

296 </locale> 

297 </task> 

298 <!-- ... --> 

299 </resources> 

300 

301 .. code-block:: xml 

302 

303 <?xml version="1.0" encoding="UTF-8"?> 

304 <resources> 

305 <task name="TASK_1" locale="en_GB"> 

306 <string name="NAME_1">VALUE</string> 

307 <string name="NAME_2">VALUE WITH\nNEWLINE</string> 

308 <!-- ... --> 

309 </task> 

310 <!-- ... --> 

311 </resources> 

312 

313 The second seems a bit clearer (fewer levels). Let's do that. It also makes 

314 all existing XML files automatically compatible (with minor code 

315 adaptations). If the ``locale`` parameter is missing, strings go into a 

316 "no-locale" state and serve as the default. 

317 """ 

318 

319 cfg = get_config(config_filename) 

320 assert cfg.extra_string_files is not None 

321 filenames = [] # type: List [str] 

322 for filespec in cfg.extra_string_files: 

323 possibles = glob.glob(filespec) 

324 filenames.extend(possibles) 

325 filenames = sorted(set(filenames)) # just unique ones 

326 if not filenames: 

327 raise_runtime_error( 

328 "No CamCOPS extra string files specified; " 

329 "config is misconfigured; aborting" 

330 ) 

331 allstrings = {} # type: Dict[str, Dict[str, Dict[str, str]]] 

332 for filename in filenames: 

333 log.info("Loading string XML file: {}", filename) 

334 parser = ElementTree.XMLParser(encoding="UTF-8") 

335 tree = ElementTree.parse(filename, parser=parser) 

336 root = tree.getroot() 

337 # We'll search via an XPath. See 

338 # https://docs.python.org/3.7/library/xml.etree.elementtree.html#xpath-support # noqa 

339 for taskroot in root.findall("./task[@name]"): 

340 # ... "all elements with the tag 'task' that have an attribute 

341 # named 'name'" 

342 taskname = taskroot.attrib.get("name") 

343 locale = taskroot.attrib.get("locale", MISSING_LOCALE) 

344 taskstrings = allstrings.setdefault( 

345 taskname, {} 

346 ) # type: Dict[str, Dict[str, str]] # noqa 

347 for e in taskroot.findall("./string[@name]"): 

348 # ... "all elements with the tag 'string' that have an 

349 # attribute named 'name'" 

350 stringname = e.attrib.get("name") 

351 final_string = text_contents(e) 

352 final_string = unescape_newlines(final_string) 

353 langversions = taskstrings.setdefault( 

354 stringname, {} 

355 ) # type: Dict[str, str] # noqa 

356 langversions[locale] = final_string 

357 

358 if APPSTRING_TASKNAME not in allstrings: 

359 raise_runtime_error( 

360 "Extra string files do not contain core CamCOPS strings; " 

361 "config is misconfigured; aborting" 

362 ) 

363 

364 return allstrings