Coverage for cc_modules/cc_validators.py: 40%
149 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-08 23:14 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-08 23:14 +0000
1#!/usr/bin/env python
3"""
4camcops_server/cc_modules/cc_validators.py
6===============================================================================
8 Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
9 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
11 This file is part of CamCOPS.
13 CamCOPS is free software: you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
18 CamCOPS is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
26===============================================================================
28**String validators and the like.**
30All functions starting ``validate_`` do nothing if the input is good, and raise
31:exc:`ValueError` if it's bad, with a descriptive error (you can use ``str()``
32on the exception).
34All validators take a
35:class:`camcops_server.cc_modules.cc_request.CamcopsRequest` parameter, for
36internationalized error messages.
38WARNING: even the error messages shouldn't contain the error-producing strings.
39"""
41import ipaddress
42import logging
43import re
44from typing import Callable, List, Optional, TYPE_CHECKING
45import urllib.parse
47from cardinal_pythonlib.logs import BraceStyleAdapter
48from colander import EMAIL_RE
50from camcops_server.cc_modules.cc_constants import (
51 MINIMUM_PASSWORD_LENGTH,
52 StringLengths,
53)
54from camcops_server.cc_modules.cc_password import password_prohibited
56if TYPE_CHECKING:
57 from camcops_server.cc_modules.cc_request import CamcopsRequest
59log = BraceStyleAdapter(logging.getLogger(__name__))
62# =============================================================================
63# Typing constants
64# =============================================================================
66STRING_VALIDATOR_TYPE = Callable[[str, Optional["CamcopsRequest"]], None]
67# ... string validators raise ValueError if the string is invalid
70# =============================================================================
71# Raising exceptions: sometimes internationalized, sometimes not
72# =============================================================================
75def dummy_gettext(x: str) -> str:
76 """
77 Returns the input directly.
78 """
79 return x
82# =============================================================================
83# Regex manipulation
84# =============================================================================
87def anchor(
88 expression: str, anchor_start: bool = True, anchor_end: bool = True
89) -> str:
90 """
91 Adds start/end anchors.
92 """
93 start = "^" if anchor_start else ""
94 end = "$" if anchor_end else ""
95 return f"{start}{expression}{end}"
98def zero_or_more(expression: str) -> str:
99 """
100 Regex for zero or more copies.
101 """
102 return f"{expression}*"
105def one_or_more(expression: str) -> str:
106 """
107 Regex for one or more copies.
108 """
109 return f"{expression}+"
112def min_max_copies(expression: str, max_count: int, min_count: int = 1) -> str:
113 """
114 Given a regex expression, permit it a minimum/maximum number of times. For
115 example, for a regex group ``x``, produce ``x{min,max}``.
117 Be very careful if you use ``min_count == 0`` -- without other
118 restrictions, your regex may match an empty string.
119 """
120 assert 0 <= min_count <= max_count
121 return f"{expression}{{{min_count},{max_count}}}"
124def describe_regex_permitted_char(
125 expression: str,
126 req: Optional["CamcopsRequest"] = None,
127 invalid_prefix: bool = True,
128) -> str:
129 """
130 Describes the characters permitted in a regular expression character
131 selector -- as long as it's simple! This won't handle arbitrary regexes.
132 """
133 assert expression.startswith("[") and expression.endswith("]")
134 content = expression[1:-1] # strip off surrounding []
135 permitted = [] # type: List[str]
136 length = len(content)
137 _ = req.gettext if req else dummy_gettext
138 i = 0
139 while i < length:
140 if content[i] == "\\":
141 # backslash preceding another character: regex code or escaped char
142 assert i + 1 < length, f"Bad escaping in {expression!r}"
143 escaped = content[i + 1]
144 if escaped == "w":
145 permitted.append(_("word character"))
146 elif escaped == "W":
147 permitted.append(_("non-word character"))
148 elif escaped == "d":
149 permitted.append(_("digit"))
150 elif escaped == "D":
151 permitted.append(_("non-digit"))
152 elif escaped == "s":
153 permitted.append(_("whitespace"))
154 elif escaped == "S":
155 permitted.append(_("non-whitespace"))
156 else:
157 permitted.append(repr(escaped))
158 i += 2
159 elif i + 1 < length and content[i + 1] == "-":
160 # range like A-Z
161 assert i + 2 < length, f"Bad range specification in {expression!r}"
162 permitted.append(content[i : i + 3]) # noqa: E203
163 i += 3
164 else:
165 char = content[i]
166 if char == ".":
167 permitted.append(_("any character"))
168 else:
169 permitted.append(repr(char))
170 i += 1
171 description = ", ".join(permitted)
172 prefix = _("Invalid string.") + " " if invalid_prefix else ""
173 return prefix + _("Permitted characters:") + " " + description
176def describe_regex_permitted_char_length(
177 expression: str,
178 max_length: int,
179 min_length: int = 1,
180 req: Optional["CamcopsRequest"] = None,
181) -> str:
182 """
183 Describes a valid string by permitted characters and length.
184 """
185 _ = req.gettext if req else dummy_gettext
186 return (
187 _("Invalid string.")
188 + " "
189 + _("Minimum length = {}. Maximum length = {}.").format(
190 min_length, max_length
191 )
192 + " "
193 + describe_regex_permitted_char(expression, req, invalid_prefix=False)
194 )
197# =============================================================================
198# Generic validation functions
199# =============================================================================
202def validate_by_char_and_length(
203 x: str,
204 permitted_char_expression: str,
205 max_length: int,
206 min_length: int = 1,
207 req: Optional["CamcopsRequest"] = None,
208 flags: int = 0,
209) -> None:
210 """
211 Validate a string based on permitted characters and length.
212 """
213 regex = re.compile(
214 anchor(
215 min_max_copies(
216 expression=permitted_char_expression,
217 min_count=min_length,
218 max_count=max_length,
219 )
220 ),
221 flags=flags,
222 )
223 if not regex.match(x):
224 raise ValueError(
225 describe_regex_permitted_char_length(
226 permitted_char_expression,
227 min_length=min_length,
228 max_length=max_length,
229 req=req,
230 )
231 )
234# =============================================================================
235# Generic strings
236# =============================================================================
238ALPHA_CHAR = "[A-Za-z]"
240ALPHANUM_UNDERSCORE_CHAR = "[A-Za-z0-9_]"
241ALPHANUM_UNDERSCORE_REGEX = re.compile(
242 anchor(one_or_more(ALPHANUM_UNDERSCORE_CHAR))
243)
245ALPHANUM_UNDERSCORE_HYPHEN_CHAR = r"[A-Za-z0-9_\-]"
246ALPHANUM_UNDERSCORE_HYPHEN_DOT_CHAR = r"[A-Za-z0-9_\-\.]"
247ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR = r"[A-Za-z0-9,_\-\{\}]"
248ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR = r"[A-Za-z0-9_\- ]"
250HUMAN_NAME_CHAR_UNICODE = r"[\w\-'’ \.]"
251# \w is a word character; with the re.UNICODE flag, that includes accented
252# characters. Then we allow hyphen, plain apostrophe, Unicode apostrophe,
253# space, dot.
254HUMAN_MANDATORY_CHAR_REGEX = re.compile(r"\w+", re.UNICODE)
255# ... for "at least one word character somewhere"
258# -----------------------------------------------------------------------------
259# Level 1. Computer-style simple strings with no spaces.
260# -----------------------------------------------------------------------------
263def validate_alphanum(x: str, req: Optional["CamcopsRequest"] = None) -> None:
264 """
265 Validates a generic alphanumeric string.
266 """
267 if not x.isalnum():
268 _ = req.gettext if req else dummy_gettext
269 raise ValueError(_("Invalid alphanumeric string"))
272def validate_alphanum_underscore(
273 x: str, req: Optional["CamcopsRequest"] = None
274) -> None:
275 """
276 Validates a string that can be alphanumeric or contain an underscore.
277 """
278 if not ALPHANUM_UNDERSCORE_REGEX.match(x):
279 raise ValueError(
280 describe_regex_permitted_char(ALPHANUM_UNDERSCORE_CHAR, req)
281 )
284# -----------------------------------------------------------------------------
285# Level 2. Human-style simple strings, allowing spaces but only minimal
286# punctuation.
287# -----------------------------------------------------------------------------
289# ... see specific validators.
291# -----------------------------------------------------------------------------
292# Level 3. Human-style strings, such as people's names; may involve accented
293# characters, spaces, some punctuation; may be used as Python or SQL search
294# literals (with suitable precautions).
295# -----------------------------------------------------------------------------
297# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
298# 3(a). Human names
299# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
302def validate_human_name(
303 x: str,
304 req: Optional["CamcopsRequest"] = None,
305 min_length: int = 0,
306 max_length: int = StringLengths.PATIENT_NAME_MAX_LEN,
307) -> None:
308 """
309 Accepts spaces, accents, etc.
311 This is hard. See
312 https://stackoverflow.com/questions/888838/regular-expression-for-validating-names-and-surnames
313 """ # noqa
314 validate_by_char_and_length(
315 x,
316 permitted_char_expression=HUMAN_NAME_CHAR_UNICODE,
317 min_length=min_length,
318 max_length=max_length,
319 req=req,
320 )
321 if not HUMAN_MANDATORY_CHAR_REGEX.match(x):
322 _ = req.gettext if req else dummy_gettext
323 raise ValueError("Names require at least one 'word' character")
326# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
327# 3(c). Search terms for simple near-alphanumeric SQL content, allowing
328# wildcards.
329# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
331RESTRICTED_SQL_SEARCH_LITERAL_CHAR = r"[A-Za-z0-9\- _%]"
332# ... hyphens are meaningful in regexes, so escape it
335def validate_restricted_sql_search_literal(
336 x: str,
337 req: Optional["CamcopsRequest"] = None,
338 min_length: int = 0,
339 max_length: int = StringLengths.SQL_SEARCH_LITERAL_MAX_LENGTH,
340) -> None:
341 """
342 Validates a string that can be fairly broad, and can do SQL finding via
343 wildcards such as ``%`` and ``_``, but should be syntactically safe in
344 terms of HTML etc. It does not permit arbitrary strings; it's a subset of
345 what might be possible in SQL.
346 """
347 validate_by_char_and_length(
348 x,
349 permitted_char_expression=RESTRICTED_SQL_SEARCH_LITERAL_CHAR,
350 min_length=min_length,
351 max_length=max_length,
352 req=req,
353 )
356# -----------------------------------------------------------------------------
357# Level 4. Infinitely worrying.
358# -----------------------------------------------------------------------------
360# noinspection PyUnusedLocal
361def validate_anything(x: str, req: Optional["CamcopsRequest"] = None) -> None:
362 """
363 Lets anything through. May be unwise.
364 """
365 pass
368# =============================================================================
369# Specific well-known computer formats
370# =============================================================================
372# -----------------------------------------------------------------------------
373# Base 64 encoding
374# -----------------------------------------------------------------------------
376# BASE64_REGEX = re.compile(
377# "^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$"
378# # https://stackoverflow.com/questions/475074/regex-to-parse-or-validate-base64-data # noqa
379# )
382# -----------------------------------------------------------------------------
383# Email addresses
384# -----------------------------------------------------------------------------
386EMAIL_RE_COMPILED = re.compile(EMAIL_RE)
389def validate_email(email: str, req: Optional["CamcopsRequest"] = None) -> None:
390 """
391 Validate an e-mail address.
393 Is this a valid e-mail address?
395 We use the same validation system as our web form (which uses Colander's
396 method plus a length constraint).
397 """
398 if len(
399 email
400 ) > StringLengths.EMAIL_ADDRESS_MAX_LEN or not EMAIL_RE_COMPILED.match(
401 email
402 ):
403 _ = req.gettext if req else dummy_gettext
404 raise ValueError(_("Invalid e-mail address"))
407# -----------------------------------------------------------------------------
408# IP addresses
409# -----------------------------------------------------------------------------
412def validate_ip_address(
413 x: str, req: Optional["CamcopsRequest"] = None
414) -> None:
415 """
416 Validates an IP address.
417 """
418 # https://stackoverflow.com/questions/3462784/check-if-a-string-matches-an-ip-address-pattern-in-python # noqa
419 try:
420 ipaddress.ip_address(x)
421 except ValueError:
422 _ = req.gettext if req else dummy_gettext
423 raise ValueError(_("Invalid IP address"))
426# -----------------------------------------------------------------------------
427# URLs
428# -----------------------------------------------------------------------------
430# Per https://mathiasbynens.be/demo/url-regex, using @stephenhay's regex but
431# restricted further.
432VALID_REDIRECT_URL_REGEX = re.compile(r"^https?://[^\s/$.?#].[^\s]*$")
435def validate_any_url(url: str, req: Optional["CamcopsRequest"] = None) -> None:
436 """
437 Validates a URL. If valid, returns the URL; if not, returns ``default``.
438 See https://stackoverflow.com/questions/22238090/validating-urls-in-python
440 However, avoid this one. For example, a URL such as
441 xxhttps://127.0.0.1:8088/ can trigger Chrome to launch ``xdg-open``.
442 """
443 log.warning("Avoid this validator! It allows open-this-file URLs!")
444 result = urllib.parse.urlparse(url)
445 if not result.scheme or not result.netloc:
446 _ = req.gettext if req else dummy_gettext
447 raise ValueError(_("Invalid URL"))
450def validate_redirect_url(
451 url: str, req: Optional["CamcopsRequest"] = None
452) -> None:
453 """
454 Validates a URL. If valid, returns the URL; if not, returns ``default``.
455 See https://stackoverflow.com/questions/22238090/validating-urls-in-python
456 """
457 if not VALID_REDIRECT_URL_REGEX.match(url):
458 _ = req.gettext if req else dummy_gettext
459 raise ValueError(_("Invalid redirection URL"))
462# =============================================================================
463# CamCOPS system-oriented names
464# =============================================================================
466# -----------------------------------------------------------------------------
467# Group names
468# -----------------------------------------------------------------------------
471def validate_group_name(
472 name: str, req: Optional["CamcopsRequest"] = None
473) -> None:
474 """
475 Is the string a valid group name?
477 Group descriptions can be anything, but group names shouldn't have odd
478 characters in -- this greatly facilitates config file handling etc. (for
479 example: no spaces, no commas).
480 """
481 validate_by_char_and_length(
482 name,
483 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_CHAR,
484 min_length=StringLengths.GROUP_NAME_MIN_LEN,
485 max_length=StringLengths.GROUP_NAME_MAX_LEN,
486 req=req,
487 )
490# -----------------------------------------------------------------------------
491# Usernames
492# -----------------------------------------------------------------------------
495def validate_username(
496 name: str, req: Optional["CamcopsRequest"] = None
497) -> None:
498 """
499 Is the string a valid user name?
500 """
501 validate_by_char_and_length(
502 name,
503 permitted_char_expression=ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR,
504 min_length=StringLengths.USERNAME_CAMCOPS_MIN_LEN,
505 max_length=StringLengths.USERNAME_CAMCOPS_MAX_LEN,
506 req=req,
507 )
510# -----------------------------------------------------------------------------
511# Devices
512# -----------------------------------------------------------------------------
515def validate_device_name(
516 x: str, req: Optional["CamcopsRequest"] = None
517) -> None:
518 """
519 Validate a client device name -- the computer-oriented one, not the
520 friendly one.
521 """
522 validate_by_char_and_length(
523 x,
524 permitted_char_expression=ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR,
525 min_length=1,
526 max_length=StringLengths.DEVICE_NAME_MAX_LEN,
527 req=req,
528 )
531# -----------------------------------------------------------------------------
532# Export recipients
533# -----------------------------------------------------------------------------
536def validate_export_recipient_name(
537 x: str, req: Optional["CamcopsRequest"] = None
538) -> None:
539 validate_by_char_and_length(
540 x,
541 permitted_char_expression=ALPHANUM_UNDERSCORE_CHAR,
542 min_length=StringLengths.EXPORT_RECIPIENT_NAME_MIN_LEN,
543 max_length=StringLengths.EXPORT_RECIPIENT_NAME_MAX_LEN,
544 req=req,
545 )
548# -----------------------------------------------------------------------------
549# Passwords
550# -----------------------------------------------------------------------------
553def validate_new_password(
554 x: str, req: Optional["CamcopsRequest"] = None
555) -> None:
556 """
557 Validate a proposed new password. Enforce our password policy.
558 """
559 _ = req.gettext if req else dummy_gettext
560 if not x or not x.strip():
561 raise ValueError(_("Passwords can't be blank"))
562 if len(x) < MINIMUM_PASSWORD_LENGTH:
563 raise ValueError(
564 _("Passwords can't be shorter than {} characters").format(
565 MINIMUM_PASSWORD_LENGTH
566 )
567 )
568 # No maximum length, because we store a hash.
569 # No other character limitations.
570 if password_prohibited(x):
571 raise ValueError(_("That password is used too commonly; try again"))
574# -----------------------------------------------------------------------------
575# HL7
576# -----------------------------------------------------------------------------
579def validate_hl7_id_type(
580 x: str, req: Optional["CamcopsRequest"] = None
581) -> None:
582 """
583 Validate HL7 Identifier Type.
584 """
585 validate_by_char_and_length(
586 x,
587 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR,
588 min_length=0,
589 max_length=StringLengths.HL7_ID_TYPE_MAX_LEN,
590 req=req,
591 )
594def validate_hl7_aa(x: str, req: Optional["CamcopsRequest"] = None) -> None:
595 """
596 Validate HL7 Assigning Authority.
597 """
598 validate_by_char_and_length(
599 x,
600 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR,
601 min_length=0,
602 max_length=StringLengths.HL7_AA_MAX_LEN,
603 req=req,
604 )
607# -----------------------------------------------------------------------------
608# Task table names
609# -----------------------------------------------------------------------------
611TASK_TABLENAME_REGEX = re.compile(
612 anchor(ALPHA_CHAR, anchor_start=True, anchor_end=False)
613 +
614 # ... don't start with a number
615 # ... and although tables can and do start with underscores, task tables
616 # don't.
617 anchor(
618 min_max_copies(
619 ALPHANUM_UNDERSCORE_CHAR,
620 min_count=0,
621 max_count=StringLengths.TABLENAME_MAX_LEN - 1,
622 ),
623 anchor_start=False,
624 anchor_end=True,
625 )
626)
629def validate_task_tablename(
630 x: str, req: Optional["CamcopsRequest"] = None
631) -> None:
632 """
633 Validates a string that could be a task tablename.
634 """
635 if not TASK_TABLENAME_REGEX.match(x):
636 _ = req.gettext if req else dummy_gettext
637 raise ValueError(
638 _(
639 "Task table names must start with a letter, and contain only "
640 "contain alphanumeric characters (A-Z, a-z, 0-9) or "
641 "underscores (_)."
642 )
643 )
646# -----------------------------------------------------------------------------
647# Filenames
648# -----------------------------------------------------------------------------
650DOWNLOAD_FILENAME_REGEX = re.compile(r"\w[\w-]*.[\w]+")
651# \w is equivalent to [A-Za-z0-9_]; see https://regexr.com/
654def validate_download_filename(
655 x: str, req: Optional["CamcopsRequest"] = None
656) -> None:
657 """
658 Validate a file for user download.
660 - Permit e.g. ``CamCOPS_dump_2021-06-04T100622.zip``.
661 - Prohibit silly things (like directory/drive delimiters).
662 """
663 if not DOWNLOAD_FILENAME_REGEX.match(x):
664 _ = req.gettext if req else dummy_gettext
665 raise ValueError(
666 _(
667 "Download filenames must (1) begin with an "
668 "alphanumeric/underscore character; (2) contain only "
669 "alphanumeric characters, underscores, and hyphens; and "
670 "(3) end with a full stop followed by an "
671 "alphanumeric/underscore extension."
672 )
673 )