Coverage for cc_modules/cc_validators.py: 40%

149 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-08 23:14 +0000

1#!/usr/bin/env python 

2 

3""" 

4camcops_server/cc_modules/cc_validators.py 

5 

6=============================================================================== 

7 

8 Copyright (C) 2012, University of Cambridge, Department of Psychiatry. 

9 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

10 

11 This file is part of CamCOPS. 

12 

13 CamCOPS is free software: you can redistribute it and/or modify 

14 it under the terms of the GNU General Public License as published by 

15 the Free Software Foundation, either version 3 of the License, or 

16 (at your option) any later version. 

17 

18 CamCOPS is distributed in the hope that it will be useful, 

19 but WITHOUT ANY WARRANTY; without even the implied warranty of 

20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

21 GNU General Public License for more details. 

22 

23 You should have received a copy of the GNU General Public License 

24 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>. 

25 

26=============================================================================== 

27 

28**String validators and the like.** 

29 

30All functions starting ``validate_`` do nothing if the input is good, and raise 

31:exc:`ValueError` if it's bad, with a descriptive error (you can use ``str()`` 

32on the exception). 

33 

34All validators take a 

35:class:`camcops_server.cc_modules.cc_request.CamcopsRequest` parameter, for 

36internationalized error messages. 

37 

38WARNING: even the error messages shouldn't contain the error-producing strings. 

39""" 

40 

41import ipaddress 

42import logging 

43import re 

44from typing import Callable, List, Optional, TYPE_CHECKING 

45import urllib.parse 

46 

47from cardinal_pythonlib.logs import BraceStyleAdapter 

48from colander import EMAIL_RE 

49 

50from camcops_server.cc_modules.cc_constants import ( 

51 MINIMUM_PASSWORD_LENGTH, 

52 StringLengths, 

53) 

54from camcops_server.cc_modules.cc_password import password_prohibited 

55 

56if TYPE_CHECKING: 

57 from camcops_server.cc_modules.cc_request import CamcopsRequest 

58 

59log = BraceStyleAdapter(logging.getLogger(__name__)) 

60 

61 

62# ============================================================================= 

63# Typing constants 

64# ============================================================================= 

65 

66STRING_VALIDATOR_TYPE = Callable[[str, Optional["CamcopsRequest"]], None] 

67# ... string validators raise ValueError if the string is invalid 

68 

69 

70# ============================================================================= 

71# Raising exceptions: sometimes internationalized, sometimes not 

72# ============================================================================= 

73 

74 

75def dummy_gettext(x: str) -> str: 

76 """ 

77 Returns the input directly. 

78 """ 

79 return x 

80 

81 

82# ============================================================================= 

83# Regex manipulation 

84# ============================================================================= 

85 

86 

87def anchor( 

88 expression: str, anchor_start: bool = True, anchor_end: bool = True 

89) -> str: 

90 """ 

91 Adds start/end anchors. 

92 """ 

93 start = "^" if anchor_start else "" 

94 end = "$" if anchor_end else "" 

95 return f"{start}{expression}{end}" 

96 

97 

98def zero_or_more(expression: str) -> str: 

99 """ 

100 Regex for zero or more copies. 

101 """ 

102 return f"{expression}*" 

103 

104 

105def one_or_more(expression: str) -> str: 

106 """ 

107 Regex for one or more copies. 

108 """ 

109 return f"{expression}+" 

110 

111 

112def min_max_copies(expression: str, max_count: int, min_count: int = 1) -> str: 

113 """ 

114 Given a regex expression, permit it a minimum/maximum number of times. For 

115 example, for a regex group ``x``, produce ``x{min,max}``. 

116 

117 Be very careful if you use ``min_count == 0`` -- without other 

118 restrictions, your regex may match an empty string. 

119 """ 

120 assert 0 <= min_count <= max_count 

121 return f"{expression}{{{min_count},{max_count}}}" 

122 

123 

124def describe_regex_permitted_char( 

125 expression: str, 

126 req: Optional["CamcopsRequest"] = None, 

127 invalid_prefix: bool = True, 

128) -> str: 

129 """ 

130 Describes the characters permitted in a regular expression character 

131 selector -- as long as it's simple! This won't handle arbitrary regexes. 

132 """ 

133 assert expression.startswith("[") and expression.endswith("]") 

134 content = expression[1:-1] # strip off surrounding [] 

135 permitted = [] # type: List[str] 

136 length = len(content) 

137 _ = req.gettext if req else dummy_gettext 

138 i = 0 

139 while i < length: 

140 if content[i] == "\\": 

141 # backslash preceding another character: regex code or escaped char 

142 assert i + 1 < length, f"Bad escaping in {expression!r}" 

143 escaped = content[i + 1] 

144 if escaped == "w": 

145 permitted.append(_("word character")) 

146 elif escaped == "W": 

147 permitted.append(_("non-word character")) 

148 elif escaped == "d": 

149 permitted.append(_("digit")) 

150 elif escaped == "D": 

151 permitted.append(_("non-digit")) 

152 elif escaped == "s": 

153 permitted.append(_("whitespace")) 

154 elif escaped == "S": 

155 permitted.append(_("non-whitespace")) 

156 else: 

157 permitted.append(repr(escaped)) 

158 i += 2 

159 elif i + 1 < length and content[i + 1] == "-": 

160 # range like A-Z 

161 assert i + 2 < length, f"Bad range specification in {expression!r}" 

162 permitted.append(content[i : i + 3]) # noqa: E203 

163 i += 3 

164 else: 

165 char = content[i] 

166 if char == ".": 

167 permitted.append(_("any character")) 

168 else: 

169 permitted.append(repr(char)) 

170 i += 1 

171 description = ", ".join(permitted) 

172 prefix = _("Invalid string.") + " " if invalid_prefix else "" 

173 return prefix + _("Permitted characters:") + " " + description 

174 

175 

176def describe_regex_permitted_char_length( 

177 expression: str, 

178 max_length: int, 

179 min_length: int = 1, 

180 req: Optional["CamcopsRequest"] = None, 

181) -> str: 

182 """ 

183 Describes a valid string by permitted characters and length. 

184 """ 

185 _ = req.gettext if req else dummy_gettext 

186 return ( 

187 _("Invalid string.") 

188 + " " 

189 + _("Minimum length = {}. Maximum length = {}.").format( 

190 min_length, max_length 

191 ) 

192 + " " 

193 + describe_regex_permitted_char(expression, req, invalid_prefix=False) 

194 ) 

195 

196 

197# ============================================================================= 

198# Generic validation functions 

199# ============================================================================= 

200 

201 

202def validate_by_char_and_length( 

203 x: str, 

204 permitted_char_expression: str, 

205 max_length: int, 

206 min_length: int = 1, 

207 req: Optional["CamcopsRequest"] = None, 

208 flags: int = 0, 

209) -> None: 

210 """ 

211 Validate a string based on permitted characters and length. 

212 """ 

213 regex = re.compile( 

214 anchor( 

215 min_max_copies( 

216 expression=permitted_char_expression, 

217 min_count=min_length, 

218 max_count=max_length, 

219 ) 

220 ), 

221 flags=flags, 

222 ) 

223 if not regex.match(x): 

224 raise ValueError( 

225 describe_regex_permitted_char_length( 

226 permitted_char_expression, 

227 min_length=min_length, 

228 max_length=max_length, 

229 req=req, 

230 ) 

231 ) 

232 

233 

234# ============================================================================= 

235# Generic strings 

236# ============================================================================= 

237 

238ALPHA_CHAR = "[A-Za-z]" 

239 

240ALPHANUM_UNDERSCORE_CHAR = "[A-Za-z0-9_]" 

241ALPHANUM_UNDERSCORE_REGEX = re.compile( 

242 anchor(one_or_more(ALPHANUM_UNDERSCORE_CHAR)) 

243) 

244 

245ALPHANUM_UNDERSCORE_HYPHEN_CHAR = r"[A-Za-z0-9_\-]" 

246ALPHANUM_UNDERSCORE_HYPHEN_DOT_CHAR = r"[A-Za-z0-9_\-\.]" 

247ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR = r"[A-Za-z0-9,_\-\{\}]" 

248ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR = r"[A-Za-z0-9_\- ]" 

249 

250HUMAN_NAME_CHAR_UNICODE = r"[\w\-'’ \.]" 

251# \w is a word character; with the re.UNICODE flag, that includes accented 

252# characters. Then we allow hyphen, plain apostrophe, Unicode apostrophe, 

253# space, dot. 

254HUMAN_MANDATORY_CHAR_REGEX = re.compile(r"\w+", re.UNICODE) 

255# ... for "at least one word character somewhere" 

256 

257 

258# ----------------------------------------------------------------------------- 

259# Level 1. Computer-style simple strings with no spaces. 

260# ----------------------------------------------------------------------------- 

261 

262 

263def validate_alphanum(x: str, req: Optional["CamcopsRequest"] = None) -> None: 

264 """ 

265 Validates a generic alphanumeric string. 

266 """ 

267 if not x.isalnum(): 

268 _ = req.gettext if req else dummy_gettext 

269 raise ValueError(_("Invalid alphanumeric string")) 

270 

271 

272def validate_alphanum_underscore( 

273 x: str, req: Optional["CamcopsRequest"] = None 

274) -> None: 

275 """ 

276 Validates a string that can be alphanumeric or contain an underscore. 

277 """ 

278 if not ALPHANUM_UNDERSCORE_REGEX.match(x): 

279 raise ValueError( 

280 describe_regex_permitted_char(ALPHANUM_UNDERSCORE_CHAR, req) 

281 ) 

282 

283 

284# ----------------------------------------------------------------------------- 

285# Level 2. Human-style simple strings, allowing spaces but only minimal 

286# punctuation. 

287# ----------------------------------------------------------------------------- 

288 

289# ... see specific validators. 

290 

291# ----------------------------------------------------------------------------- 

292# Level 3. Human-style strings, such as people's names; may involve accented 

293# characters, spaces, some punctuation; may be used as Python or SQL search 

294# literals (with suitable precautions). 

295# ----------------------------------------------------------------------------- 

296 

297# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

298# 3(a). Human names 

299# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

300 

301 

302def validate_human_name( 

303 x: str, 

304 req: Optional["CamcopsRequest"] = None, 

305 min_length: int = 0, 

306 max_length: int = StringLengths.PATIENT_NAME_MAX_LEN, 

307) -> None: 

308 """ 

309 Accepts spaces, accents, etc. 

310 

311 This is hard. See 

312 https://stackoverflow.com/questions/888838/regular-expression-for-validating-names-and-surnames 

313 """ # noqa 

314 validate_by_char_and_length( 

315 x, 

316 permitted_char_expression=HUMAN_NAME_CHAR_UNICODE, 

317 min_length=min_length, 

318 max_length=max_length, 

319 req=req, 

320 ) 

321 if not HUMAN_MANDATORY_CHAR_REGEX.match(x): 

322 _ = req.gettext if req else dummy_gettext 

323 raise ValueError("Names require at least one 'word' character") 

324 

325 

326# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

327# 3(c). Search terms for simple near-alphanumeric SQL content, allowing 

328# wildcards. 

329# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

330 

331RESTRICTED_SQL_SEARCH_LITERAL_CHAR = r"[A-Za-z0-9\- _%]" 

332# ... hyphens are meaningful in regexes, so escape it 

333 

334 

335def validate_restricted_sql_search_literal( 

336 x: str, 

337 req: Optional["CamcopsRequest"] = None, 

338 min_length: int = 0, 

339 max_length: int = StringLengths.SQL_SEARCH_LITERAL_MAX_LENGTH, 

340) -> None: 

341 """ 

342 Validates a string that can be fairly broad, and can do SQL finding via 

343 wildcards such as ``%`` and ``_``, but should be syntactically safe in 

344 terms of HTML etc. It does not permit arbitrary strings; it's a subset of 

345 what might be possible in SQL. 

346 """ 

347 validate_by_char_and_length( 

348 x, 

349 permitted_char_expression=RESTRICTED_SQL_SEARCH_LITERAL_CHAR, 

350 min_length=min_length, 

351 max_length=max_length, 

352 req=req, 

353 ) 

354 

355 

356# ----------------------------------------------------------------------------- 

357# Level 4. Infinitely worrying. 

358# ----------------------------------------------------------------------------- 

359 

360# noinspection PyUnusedLocal 

361def validate_anything(x: str, req: Optional["CamcopsRequest"] = None) -> None: 

362 """ 

363 Lets anything through. May be unwise. 

364 """ 

365 pass 

366 

367 

368# ============================================================================= 

369# Specific well-known computer formats 

370# ============================================================================= 

371 

372# ----------------------------------------------------------------------------- 

373# Base 64 encoding 

374# ----------------------------------------------------------------------------- 

375 

376# BASE64_REGEX = re.compile( 

377# "^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$" 

378# # https://stackoverflow.com/questions/475074/regex-to-parse-or-validate-base64-data # noqa 

379# ) 

380 

381 

382# ----------------------------------------------------------------------------- 

383# Email addresses 

384# ----------------------------------------------------------------------------- 

385 

386EMAIL_RE_COMPILED = re.compile(EMAIL_RE) 

387 

388 

389def validate_email(email: str, req: Optional["CamcopsRequest"] = None) -> None: 

390 """ 

391 Validate an e-mail address. 

392 

393 Is this a valid e-mail address? 

394 

395 We use the same validation system as our web form (which uses Colander's 

396 method plus a length constraint). 

397 """ 

398 if len( 

399 email 

400 ) > StringLengths.EMAIL_ADDRESS_MAX_LEN or not EMAIL_RE_COMPILED.match( 

401 email 

402 ): 

403 _ = req.gettext if req else dummy_gettext 

404 raise ValueError(_("Invalid e-mail address")) 

405 

406 

407# ----------------------------------------------------------------------------- 

408# IP addresses 

409# ----------------------------------------------------------------------------- 

410 

411 

412def validate_ip_address( 

413 x: str, req: Optional["CamcopsRequest"] = None 

414) -> None: 

415 """ 

416 Validates an IP address. 

417 """ 

418 # https://stackoverflow.com/questions/3462784/check-if-a-string-matches-an-ip-address-pattern-in-python # noqa 

419 try: 

420 ipaddress.ip_address(x) 

421 except ValueError: 

422 _ = req.gettext if req else dummy_gettext 

423 raise ValueError(_("Invalid IP address")) 

424 

425 

426# ----------------------------------------------------------------------------- 

427# URLs 

428# ----------------------------------------------------------------------------- 

429 

430# Per https://mathiasbynens.be/demo/url-regex, using @stephenhay's regex but 

431# restricted further. 

432VALID_REDIRECT_URL_REGEX = re.compile(r"^https?://[^\s/$.?#].[^\s]*$") 

433 

434 

435def validate_any_url(url: str, req: Optional["CamcopsRequest"] = None) -> None: 

436 """ 

437 Validates a URL. If valid, returns the URL; if not, returns ``default``. 

438 See https://stackoverflow.com/questions/22238090/validating-urls-in-python 

439 

440 However, avoid this one. For example, a URL such as 

441 xxhttps://127.0.0.1:8088/ can trigger Chrome to launch ``xdg-open``. 

442 """ 

443 log.warning("Avoid this validator! It allows open-this-file URLs!") 

444 result = urllib.parse.urlparse(url) 

445 if not result.scheme or not result.netloc: 

446 _ = req.gettext if req else dummy_gettext 

447 raise ValueError(_("Invalid URL")) 

448 

449 

450def validate_redirect_url( 

451 url: str, req: Optional["CamcopsRequest"] = None 

452) -> None: 

453 """ 

454 Validates a URL. If valid, returns the URL; if not, returns ``default``. 

455 See https://stackoverflow.com/questions/22238090/validating-urls-in-python 

456 """ 

457 if not VALID_REDIRECT_URL_REGEX.match(url): 

458 _ = req.gettext if req else dummy_gettext 

459 raise ValueError(_("Invalid redirection URL")) 

460 

461 

462# ============================================================================= 

463# CamCOPS system-oriented names 

464# ============================================================================= 

465 

466# ----------------------------------------------------------------------------- 

467# Group names 

468# ----------------------------------------------------------------------------- 

469 

470 

471def validate_group_name( 

472 name: str, req: Optional["CamcopsRequest"] = None 

473) -> None: 

474 """ 

475 Is the string a valid group name? 

476 

477 Group descriptions can be anything, but group names shouldn't have odd 

478 characters in -- this greatly facilitates config file handling etc. (for 

479 example: no spaces, no commas). 

480 """ 

481 validate_by_char_and_length( 

482 name, 

483 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_CHAR, 

484 min_length=StringLengths.GROUP_NAME_MIN_LEN, 

485 max_length=StringLengths.GROUP_NAME_MAX_LEN, 

486 req=req, 

487 ) 

488 

489 

490# ----------------------------------------------------------------------------- 

491# Usernames 

492# ----------------------------------------------------------------------------- 

493 

494 

495def validate_username( 

496 name: str, req: Optional["CamcopsRequest"] = None 

497) -> None: 

498 """ 

499 Is the string a valid user name? 

500 """ 

501 validate_by_char_and_length( 

502 name, 

503 permitted_char_expression=ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR, 

504 min_length=StringLengths.USERNAME_CAMCOPS_MIN_LEN, 

505 max_length=StringLengths.USERNAME_CAMCOPS_MAX_LEN, 

506 req=req, 

507 ) 

508 

509 

510# ----------------------------------------------------------------------------- 

511# Devices 

512# ----------------------------------------------------------------------------- 

513 

514 

515def validate_device_name( 

516 x: str, req: Optional["CamcopsRequest"] = None 

517) -> None: 

518 """ 

519 Validate a client device name -- the computer-oriented one, not the 

520 friendly one. 

521 """ 

522 validate_by_char_and_length( 

523 x, 

524 permitted_char_expression=ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR, 

525 min_length=1, 

526 max_length=StringLengths.DEVICE_NAME_MAX_LEN, 

527 req=req, 

528 ) 

529 

530 

531# ----------------------------------------------------------------------------- 

532# Export recipients 

533# ----------------------------------------------------------------------------- 

534 

535 

536def validate_export_recipient_name( 

537 x: str, req: Optional["CamcopsRequest"] = None 

538) -> None: 

539 validate_by_char_and_length( 

540 x, 

541 permitted_char_expression=ALPHANUM_UNDERSCORE_CHAR, 

542 min_length=StringLengths.EXPORT_RECIPIENT_NAME_MIN_LEN, 

543 max_length=StringLengths.EXPORT_RECIPIENT_NAME_MAX_LEN, 

544 req=req, 

545 ) 

546 

547 

548# ----------------------------------------------------------------------------- 

549# Passwords 

550# ----------------------------------------------------------------------------- 

551 

552 

553def validate_new_password( 

554 x: str, req: Optional["CamcopsRequest"] = None 

555) -> None: 

556 """ 

557 Validate a proposed new password. Enforce our password policy. 

558 """ 

559 _ = req.gettext if req else dummy_gettext 

560 if not x or not x.strip(): 

561 raise ValueError(_("Passwords can't be blank")) 

562 if len(x) < MINIMUM_PASSWORD_LENGTH: 

563 raise ValueError( 

564 _("Passwords can't be shorter than {} characters").format( 

565 MINIMUM_PASSWORD_LENGTH 

566 ) 

567 ) 

568 # No maximum length, because we store a hash. 

569 # No other character limitations. 

570 if password_prohibited(x): 

571 raise ValueError(_("That password is used too commonly; try again")) 

572 

573 

574# ----------------------------------------------------------------------------- 

575# HL7 

576# ----------------------------------------------------------------------------- 

577 

578 

579def validate_hl7_id_type( 

580 x: str, req: Optional["CamcopsRequest"] = None 

581) -> None: 

582 """ 

583 Validate HL7 Identifier Type. 

584 """ 

585 validate_by_char_and_length( 

586 x, 

587 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR, 

588 min_length=0, 

589 max_length=StringLengths.HL7_ID_TYPE_MAX_LEN, 

590 req=req, 

591 ) 

592 

593 

594def validate_hl7_aa(x: str, req: Optional["CamcopsRequest"] = None) -> None: 

595 """ 

596 Validate HL7 Assigning Authority. 

597 """ 

598 validate_by_char_and_length( 

599 x, 

600 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR, 

601 min_length=0, 

602 max_length=StringLengths.HL7_AA_MAX_LEN, 

603 req=req, 

604 ) 

605 

606 

607# ----------------------------------------------------------------------------- 

608# Task table names 

609# ----------------------------------------------------------------------------- 

610 

611TASK_TABLENAME_REGEX = re.compile( 

612 anchor(ALPHA_CHAR, anchor_start=True, anchor_end=False) 

613 + 

614 # ... don't start with a number 

615 # ... and although tables can and do start with underscores, task tables 

616 # don't. 

617 anchor( 

618 min_max_copies( 

619 ALPHANUM_UNDERSCORE_CHAR, 

620 min_count=0, 

621 max_count=StringLengths.TABLENAME_MAX_LEN - 1, 

622 ), 

623 anchor_start=False, 

624 anchor_end=True, 

625 ) 

626) 

627 

628 

629def validate_task_tablename( 

630 x: str, req: Optional["CamcopsRequest"] = None 

631) -> None: 

632 """ 

633 Validates a string that could be a task tablename. 

634 """ 

635 if not TASK_TABLENAME_REGEX.match(x): 

636 _ = req.gettext if req else dummy_gettext 

637 raise ValueError( 

638 _( 

639 "Task table names must start with a letter, and contain only " 

640 "contain alphanumeric characters (A-Z, a-z, 0-9) or " 

641 "underscores (_)." 

642 ) 

643 ) 

644 

645 

646# ----------------------------------------------------------------------------- 

647# Filenames 

648# ----------------------------------------------------------------------------- 

649 

650DOWNLOAD_FILENAME_REGEX = re.compile(r"\w[\w-]*.[\w]+") 

651# \w is equivalent to [A-Za-z0-9_]; see https://regexr.com/ 

652 

653 

654def validate_download_filename( 

655 x: str, req: Optional["CamcopsRequest"] = None 

656) -> None: 

657 """ 

658 Validate a file for user download. 

659 

660 - Permit e.g. ``CamCOPS_dump_2021-06-04T100622.zip``. 

661 - Prohibit silly things (like directory/drive delimiters). 

662 """ 

663 if not DOWNLOAD_FILENAME_REGEX.match(x): 

664 _ = req.gettext if req else dummy_gettext 

665 raise ValueError( 

666 _( 

667 "Download filenames must (1) begin with an " 

668 "alphanumeric/underscore character; (2) contain only " 

669 "alphanumeric characters, underscores, and hyphens; and " 

670 "(3) end with a full stop followed by an " 

671 "alphanumeric/underscore extension." 

672 ) 

673 )