Coverage for cc_modules/cc_dummy_database.py: 26%

220 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-08 23:14 +0000

1#!/usr/bin/env python 

2 

3""" 

4camcops_server/cc_modules/cc_dummy_database.py 

5 

6=============================================================================== 

7 

8 Copyright (C) 2012, University of Cambridge, Department of Psychiatry. 

9 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

10 

11 This file is part of CamCOPS. 

12 

13 CamCOPS is free software: you can redistribute it and/or modify 

14 it under the terms of the GNU General Public License as published by 

15 the Free Software Foundation, either version 3 of the License, or 

16 (at your option) any later version. 

17 

18 CamCOPS is distributed in the hope that it will be useful, 

19 but WITHOUT ANY WARRANTY; without even the implied warranty of 

20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

21 GNU General Public License for more details. 

22 

23 You should have received a copy of the GNU General Public License 

24 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>. 

25 

26=============================================================================== 

27 

28**Functions for dummy database creation for manual testing.** 

29 

30""" 

31 

32import logging 

33import random 

34from typing import Optional, TYPE_CHECKING 

35 

36from cardinal_pythonlib.datetimefunc import ( 

37 convert_datetime_to_utc, 

38 format_datetime, 

39) 

40from cardinal_pythonlib.logs import BraceStyleAdapter 

41from cardinal_pythonlib.nhs import generate_random_nhs_number 

42from faker import Faker 

43import pendulum 

44from sqlalchemy.exc import IntegrityError 

45from sqlalchemy.orm.session import sessionmaker 

46from sqlalchemy.sql.expression import func 

47from sqlalchemy.sql.schema import Column 

48from sqlalchemy.sql.sqltypes import ( 

49 Boolean, 

50 Date, 

51 Float, 

52 Integer, 

53 String, 

54 UnicodeText, 

55) 

56 

57from camcops_server.cc_modules.cc_constants import DateFormat 

58from camcops_server.cc_modules.cc_db import TASK_FREQUENT_AND_FK_FIELDS 

59from camcops_server.cc_modules.cc_device import Device 

60from camcops_server.cc_modules.cc_group import Group 

61from camcops_server.cc_modules.cc_idnumdef import IdNumDefinition 

62from camcops_server.cc_modules.cc_patient import Patient 

63from camcops_server.cc_modules.cc_patientidnum import PatientIdNum 

64from camcops_server.cc_modules.cc_sqla_coltypes import ( 

65 COLATTR_PERMITTED_VALUE_CHECKER, 

66 PendulumDateTimeAsIsoTextColType, 

67) 

68 

69from camcops_server.cc_modules.cc_task import Task 

70from camcops_server.cc_modules.cc_user import User 

71from camcops_server.cc_modules.cc_version import CAMCOPS_SERVER_VERSION 

72 

73 

74if TYPE_CHECKING: 

75 from sqlalchemy.orm import Session as SqlASession 

76 from camcops_server.cc_modules.cc_config import CamcopsConfig 

77 from camcops_server.cc_modules.cc_db import GenericTabletRecordMixin 

78 

79log = BraceStyleAdapter(logging.getLogger(__name__)) 

80 

81 

82# ============================================================================= 

83# DummyDataInserter 

84# ============================================================================= 

85 

86 

87class DummyDataInserter: 

88 """ 

89 Class to insert random data (within constraints) to tasks and other 

90 objects. It does not touch an actual database, so its methods can be used 

91 for free-floating items. 

92 """ 

93 

94 DEFAULT_MIN_FLOAT = 0 

95 DEFAULT_MAX_FLOAT = 1000 

96 

97 DEFAULT_MIN_INTEGER = 0 

98 DEFAULT_MAX_INTEGER = 1000 

99 

100 def __init__(self) -> None: 

101 self.faker = Faker("en_GB") 

102 

103 @staticmethod 

104 def column_is_q_field(column: Column) -> bool: 

105 if column.name.startswith("_"): 

106 return False 

107 

108 if column.name in TASK_FREQUENT_AND_FK_FIELDS: 

109 # It's that or TASK_FREQUENT_FIELDS. 

110 return False 

111 

112 return True 

113 

114 def fill_in_task_fields(self, task: Task) -> None: 

115 """ 

116 Inserts random data into a task (within any known constraints). 

117 """ 

118 # noinspection PyUnresolvedReferences 

119 for column in task.__table__.columns: 

120 if not self.column_is_q_field(column): 

121 continue 

122 

123 if isinstance(column.type, Integer): 

124 self.set_integer_field(task, column) 

125 continue 

126 

127 if isinstance(column.type, Float): 

128 self.set_float_field(task, column) 

129 continue 

130 

131 if isinstance(column.type, Boolean): 

132 self.set_bool_field(task, column) 

133 continue 

134 

135 if isinstance(column.type, Date): 

136 self.set_date_field(task, column) 

137 continue 

138 

139 if isinstance(column.type, PendulumDateTimeAsIsoTextColType): 

140 self.set_datetime_field(task, column) 

141 continue 

142 

143 if isinstance(column.type, UnicodeText): 

144 self.set_unicode_text_field(task, column) 

145 

146 if isinstance(column.type, String): 

147 # covers String, Text, UnicodeText 

148 self.set_string_field(task, column) 

149 

150 def set_integer_field(self, task: Task, column: Column) -> None: 

151 setattr(task, column.name, self.get_valid_integer_for_field(column)) 

152 

153 def set_float_field(self, task: Task, column: Column) -> None: 

154 setattr(task, column.name, self.get_valid_float_for_field(column)) 

155 

156 def set_bool_field(self, task: Task, column: Column) -> None: 

157 setattr(task, column.name, self.faker.random.choice([False, True])) 

158 

159 def set_date_field(self, task: Task, column: Column) -> None: 

160 setattr(task, column.name, self.faker.date_object()) 

161 

162 def set_datetime_field(self, task: Task, column: Column) -> None: 

163 setattr(task, column.name, self.faker.date_time()) 

164 

165 def set_unicode_text_field(self, task: Task, column: Column) -> None: 

166 setattr(task, column.name, self.faker.text()) 

167 

168 def set_string_field(self, task: Task, column: Column) -> None: 

169 setattr(task, column.name, self.get_valid_string_for_field(column)) 

170 

171 def get_valid_integer_for_field(self, column: Column) -> int: 

172 min_value = self.DEFAULT_MIN_INTEGER 

173 max_value = self.DEFAULT_MAX_INTEGER 

174 

175 value_checker = getattr(column, COLATTR_PERMITTED_VALUE_CHECKER, None) 

176 

177 if value_checker is not None: 

178 if value_checker.permitted_values is not None: 

179 return self.faker.random.choice(value_checker.permitted_values) 

180 

181 if value_checker.minimum is not None: 

182 min_value = value_checker.minimum 

183 

184 if value_checker.maximum is not None: 

185 max_value = value_checker.maximum 

186 

187 return self.faker.random.randint(min_value, max_value) 

188 

189 def get_valid_float_for_field(self, column: Column) -> float: 

190 min_value = self.DEFAULT_MIN_FLOAT 

191 max_value = self.DEFAULT_MAX_FLOAT 

192 

193 value_checker = getattr(column, COLATTR_PERMITTED_VALUE_CHECKER, None) 

194 

195 if value_checker is not None: 

196 if value_checker.permitted_values is not None: 

197 return self.faker.random.choice(value_checker.permitted_values) 

198 

199 if value_checker.minimum is not None: 

200 min_value = value_checker.minimum 

201 

202 if value_checker.maximum is not None: 

203 max_value = value_checker.maximum 

204 

205 return self.faker.random.uniform(min_value, max_value) 

206 

207 def get_valid_string_for_field(self, column: Column) -> str: 

208 value_checker = getattr(column, COLATTR_PERMITTED_VALUE_CHECKER, None) 

209 

210 if value_checker is not None: 

211 if value_checker.permitted_values is not None: 

212 return self.faker.random.choice(value_checker.permitted_values) 

213 text = self.faker.text() 

214 

215 if column.type.length is None: 

216 return text 

217 

218 return text[: column.type.length] 

219 

220 

221# ============================================================================= 

222# DummyDataFactory 

223# ============================================================================= 

224 

225 

226class DummyDataFactory(DummyDataInserter): 

227 """ 

228 Factory to insert random data (within constraints) to tasks and other 

229 objects in a dummy database. Unlike its parent, this concerns itself with 

230 an actual data. 

231 """ 

232 

233 FIRST_PATIENT_ID = 10001 

234 NUM_PATIENTS = 5 

235 

236 def __init__(self, cfg: "CamcopsConfig") -> None: 

237 super().__init__() 

238 engine = cfg.get_sqla_engine() 

239 self.dbsession = sessionmaker()(bind=engine) # type: SqlASession 

240 

241 self.era_time = pendulum.now() 

242 self.era_time_utc = convert_datetime_to_utc(self.era_time) 

243 self.era = format_datetime(self.era_time, DateFormat.ISO8601) 

244 

245 self.group = None # type: Optional[Group] 

246 self.user = None # type: Optional[User] 

247 self.device = None # type: Optional[Device] 

248 self.nhs_iddef = None # type: Optional[IdNumDefinition] 

249 

250 def add_data(self) -> None: 

251 # noinspection PyTypeChecker 

252 next_id = self.next_id(Group.id) 

253 

254 self.group = Group() 

255 self.group.name = f"dummygroup{next_id}" 

256 self.group.description = "Dummy group" 

257 self.group.upload_policy = "sex AND anyidnum" 

258 self.group.finalize_policy = "sex AND idnum1001" 

259 self.dbsession.add(self.group) 

260 self.dbsession.commit() # sets PK fields 

261 

262 self.user = User.get_system_user(self.dbsession) 

263 self.user.upload_group_id = self.group.id 

264 

265 self.device = self.get_device(self.dbsession) 

266 self.dbsession.commit() 

267 

268 self.nhs_iddef = IdNumDefinition( 

269 which_idnum=1001, 

270 description="NHS number (TEST)", 

271 short_description="NHS#", 

272 hl7_assigning_authority="NHS", 

273 hl7_id_type="NHSN", 

274 ) 

275 self.dbsession.add(self.nhs_iddef) 

276 try: 

277 self.dbsession.commit() 

278 except IntegrityError: 

279 self.dbsession.rollback() 

280 

281 for patient_id in range( 

282 self.FIRST_PATIENT_ID, self.FIRST_PATIENT_ID + self.NUM_PATIENTS 

283 ): 

284 Faker.seed(patient_id) 

285 self.add_patient(patient_id) 

286 log.info(f"Adding tasks for patient {patient_id}") 

287 

288 Faker.seed() 

289 self.add_tasks(patient_id) 

290 

291 # noinspection PyMethodMayBeStatic 

292 def get_device(self, dbsession: "SqlASession") -> "Device": 

293 dummy_device_name = "dummy_device" 

294 

295 device = Device.get_device_by_name(dbsession, dummy_device_name) 

296 if device is None: 

297 device = Device() 

298 device.name = dummy_device_name 

299 device.friendly_name = "Dummy tablet device" 

300 device.registered_by_user = User.get_system_user(dbsession) 

301 device.when_registered_utc = pendulum.DateTime.utcnow() 

302 device.camcops_version = CAMCOPS_SERVER_VERSION 

303 dbsession.add(device) 

304 dbsession.flush() # So that we can use the PK elsewhere 

305 return device 

306 

307 def add_patient(self, patient_id: int) -> Patient: 

308 log.info(f"Adding patient {patient_id}") 

309 

310 patient = Patient() 

311 

312 patient.id = patient_id 

313 self.apply_standard_db_fields(patient) 

314 

315 patient.sex = self.faker.random.choices( 

316 ["M", "F", "X"], weights=[49.8, 49.8, 0.4] 

317 )[0] 

318 

319 if patient.sex == "M": 

320 patient.forename = self.faker.first_name_male() 

321 elif patient.sex == "F": 

322 patient.forename = self.faker.first_name_female() 

323 else: 

324 patient.forename = self.faker.first_name()[:1] 

325 

326 patient.surname = self.faker.last_name() 

327 

328 # Faker date_of_birth calculates from the current time so gives 

329 # different results on different days. By fixing the dates we get 

330 # consistent results but our population ages over time. 

331 patient.dob = self.faker.date_between_dates( 

332 date_start=pendulum.date(1900, 1, 1), 

333 date_end=pendulum.date(2020, 1, 1), 

334 ) 

335 self.dbsession.add(patient) 

336 

337 self.add_patient_idnum(patient_id) 

338 self.dbsession.commit() 

339 

340 return patient 

341 

342 # noinspection PyTypeChecker 

343 def add_patient_idnum(self, patient_id: int) -> None: 

344 next_id = self.next_id(PatientIdNum.id) 

345 

346 patient_idnum = PatientIdNum() 

347 patient_idnum.id = next_id 

348 self.apply_standard_db_fields(patient_idnum) 

349 patient_idnum.patient_id = patient_id 

350 patient_idnum.which_idnum = self.nhs_iddef.which_idnum 

351 

352 # Always create the same NHS number for each patient. 

353 # Uses a different random object to faker. 

354 # Restores the master RNG state afterwards. 

355 old_random_state = random.getstate() 

356 random.seed(patient_id) 

357 patient_idnum.idnum_value = generate_random_nhs_number() 

358 random.setstate(old_random_state) 

359 

360 self.dbsession.add(patient_idnum) 

361 

362 def add_tasks(self, patient_id: int): 

363 for cls in Task.all_subclasses_by_tablename(): 

364 task = cls() 

365 task.id = self.next_id(cls.id) 

366 self.apply_standard_task_fields(task) 

367 if task.has_patient: 

368 task.patient_id = patient_id 

369 

370 self.fill_in_task_fields(task) 

371 

372 self.dbsession.add(task) 

373 self.dbsession.commit() 

374 

375 def next_id(self, column: Column) -> int: 

376 max_id = self.dbsession.query(func.max(column)).scalar() 

377 if max_id is None: 

378 return 1 

379 

380 return max_id + 1 

381 

382 def apply_standard_task_fields(self, task: Task) -> None: 

383 """ 

384 Writes some default values to an SQLAlchemy ORM object representing 

385 a task. 

386 """ 

387 self.apply_standard_db_fields(task) 

388 task.when_created = self.era_time 

389 

390 def apply_standard_db_fields( 

391 self, obj: "GenericTabletRecordMixin" 

392 ) -> None: 

393 """ 

394 Writes some default values to an SQLAlchemy ORM object representing a 

395 record uploaded from a client (tablet) device. 

396 """ 

397 obj._device_id = self.device.id 

398 obj._era = self.era 

399 obj._group_id = self.group.id 

400 obj._current = True 

401 obj._adding_user_id = self.user.id 

402 obj._when_added_batch_utc = self.era_time_utc