Coverage for cc_modules/cc_proquint.py: 24%
74 statements
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-08 23:14 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2022-11-08 23:14 +0000
1"""
2camcops_server/cc_modules/cc_proquint.py
4===============================================================================
6 Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CamCOPS.
11 CamCOPS is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CamCOPS is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26Convert integers into Pronounceable Quintuplets (proquints)
27https://arxiv.org/html/0901.4016
29Based on https://github.com/dsw/proquint, which has the following licence:
31--8<---------------------------------------------------------------------------
33Copyright (c) 2009 Daniel S. Wilkerson
34All rights reserved.
36Redistribution and use in source and binary forms, with or without
37modification, are permitted provided that the following conditions are
38met:
40 Redistributions of source code must retain the above copyright
41 notice, this list of conditions and the following disclaimer.
42 Redistributions in binary form must reproduce the above copyright
43 notice, this list of conditions and the following disclaimer in
44 the documentation and/or other materials provided with the
45 distribution.
47 Neither the name of Daniel S. Wilkerson nor the names of its
48 contributors may be used to endorse or promote products derived
49 from this software without specific prior written permission.
51THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
52"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
53LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
54A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
55OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
56SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
57LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
58DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
59THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
60(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
61OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
63--8<---------------------------------------------------------------------------
66"""
67import uuid
69CONSONANTS = "bdfghjklmnprstvz"
70VOWELS = "aiou"
72SIZE_OF_CONSONANT = 4
73SIZE_OF_VOWEL = 2
75LOOKUP_CONSONANTS = {
76 "b": 0x0,
77 "d": 0x1,
78 "f": 0x2,
79 "g": 0x3,
80 "h": 0x4,
81 "j": 0x5,
82 "k": 0x6,
83 "l": 0x7,
84 "m": 0x8,
85 "n": 0x9,
86 "p": 0xA,
87 "r": 0xB,
88 "s": 0xC,
89 "t": 0xD,
90 "v": 0xE,
91 "z": 0xF,
92}
93LOOKUP_VOWELS = {"a": 0x0, "i": 0x1, "o": 0x2, "u": 0x3}
94LOOKUP_TABLE = {**LOOKUP_CONSONANTS, **LOOKUP_VOWELS}
97class InvalidProquintException(Exception):
98 pass
101def proquint_from_uuid(uuid_obj: uuid.UUID) -> str:
102 """
103 Convert UUID to proquint (via the UUID's 128-bit integer representation).
104 """
105 return proquint_from_int(uuid_obj.int, 128)
108def proquint_from_int(int_value: int, size_in_bits: int) -> str:
109 """Convert integer value into proquint
111 .. code-block:: none
113 >>> proquint_from_int(0x493b05ee, 32)
114 hohur-bilov
116 0x493b05ee in binary is:
117 0100 1001 0011 1011 - 0000 0101 1110 1110
119 grouped into alternating 4 and 2 bit values:
121 cons vo cons vo cons - cons vo cons vo cons
122 0100 10 0100 11 1011 - 0000 01 0111 10 1110
124 h o h u r - b i l o v
126 Args:
127 int_value:
128 integer value to encode
129 size_in_bits:
130 size of integer in bits (must be a multiple of 16)
132 Returns:
133 proquint string identifier
134 """
135 proquint = []
137 if size_in_bits % 16 != 0:
138 raise ValueError(
139 f"size_in_bits ({size_in_bits}) must be a multiple of 16"
140 )
142 for i in range(size_in_bits // 16):
143 proquint.insert(0, _proquint_from_int16(int_value & 0xFFFF))
145 int_value >>= 16
147 check_character = _generate_check_character("".join(proquint))
149 proquint.append(check_character)
151 return "-".join(proquint)
154def _generate_check_character(proquint: str) -> str:
155 """
156 Luhn mod 16 check digit
158 https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm
160 .. code-block:: none
161 consonant_values = {
162 'b': 0x0, 'd': 0x1, 'f': 0x2, 'g': 0x3,
163 'h': 0x4, 'j': 0x5, 'k': 0x6, 'l': 0x7,
164 'm': 0x8, 'n': 0x9, 'p': 0xa, 'r': 0xb,
165 's': 0xc, 't': 0xd, 'v': 0xe, 'z': 0xf,
166 }
168 vowel_values = {
169 'a': 0x0, 'i': 0x1, 'o': 0x2, 'u': 0x3,
170 }
172 To generate the check character, start with the last character in the
173 string and move left doubling every other code-point. The "digits" of
174 the code-points as written in hex (since there are 16 valid input
175 characters) should then be summed up:
177 Example (all in hex):
179 hohur-bilov
181 Character h o h u r b i l o v
182 Code point 4 2 4 3 b 0 1 7 2 e
183 Double 4 6 0 e 1c
184 Reduce 4 4 4 6 b 0 1 e 2 1+c
185 Sum 4 4 4 6 b 0 1 e 2 d
187 Total sum = 4 + 4 + 4 + 6 + b + 0 + 1 + e + 2 + d = 0x3b
188 Next multiple of 0x10 is 0x40
190 Check character code = 0x40 - 0x3b = 0x5
191 So check character is 'j'
193 """
195 remainder = _generate_luhn_mod_16_remainder(proquint, 2)
197 check_code_point = (16 - remainder) % 16
199 return CONSONANTS[check_code_point]
202def _proquint_from_int16(int16_value: int) -> str:
203 """
204 Convert 16-bit integer into proquint.
205 """
206 proquint = []
207 for i in range(5):
208 if i & 1:
209 letters = VOWELS
210 mask = 0x3
211 shift = SIZE_OF_VOWEL
212 else:
213 letters = CONSONANTS
214 mask = 0xF
215 shift = SIZE_OF_CONSONANT
217 index = int16_value & mask
218 proquint.insert(0, letters[index])
219 int16_value >>= shift
221 return "".join(proquint)
224def uuid_from_proquint(proquint: str) -> uuid.UUID:
225 """
226 Convert proquint to UUID.
227 """
228 int_value = int_from_proquint(proquint)
230 return uuid.UUID(int=int_value)
233def int_from_proquint(proquint: str) -> int:
234 """
235 Convert proquint string into integer.
237 .. code-block:; none
239 >>> hex(int_from_proquint('hohur-bilov-j'))
240 0x493b05ee
242 h o h u r - b i l o v
243 0x4 0x2 0x4 0x3 0xb - 0x0 0x1 0x7 0x2 0xe
245 0100 10 0100 11 1011 - 0000 01 0111 10 1110
246 0100 1001 0011 1011 - 0000 0101 1110 1110
247 0x4 0x9 0x3 0xb - 0x0 0x5 0xe 0xe
249 Args:
250 proquint:
251 string to decode
252 Returns:
253 converted integer value
254 """
256 int_value = 0
258 words = proquint.split("-")
260 if not _is_valid_proquint("".join(words)):
261 raise InvalidProquintException(
262 f"'{proquint}' is not valid (check character mismatch)"
263 )
265 # Remove check character
266 words.pop()
268 for word in words:
269 for (i, c) in enumerate(word):
270 if i & 1:
271 lookup_table = LOOKUP_VOWELS
272 shift = SIZE_OF_VOWEL
273 else:
274 lookup_table = LOOKUP_CONSONANTS
275 shift = SIZE_OF_CONSONANT
277 value = lookup_table.get(c)
279 if value is None:
280 raise InvalidProquintException(
281 f"'{proquint}' contains invalid or transposed characters"
282 )
284 int_value <<= shift
285 int_value += value
287 return int_value
290def _is_valid_proquint(proquint: str) -> bool:
291 """
292 Does the proquint validate?
293 """
294 return _generate_luhn_mod_16_remainder(proquint, 1) == 0
297def _generate_luhn_mod_16_remainder(proquint: str, start_factor: int) -> int:
298 """
299 Part of the checksum calculations; see :func:`_generate_check_character`.
300 For a valid sequence, the overall remainder should be 0.
301 See https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm.
302 """
303 factor = start_factor
304 sum_ = 0
306 for char in reversed(proquint):
307 value = LOOKUP_TABLE[char] * factor
308 sum_ = sum_ + value // 16 + value % 16
310 if factor == 2:
311 factor = 1
312 else:
313 factor = 2
315 return sum_ % 16