Coverage for cc_modules/cc_proquint.py: 24%

74 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-11-08 23:14 +0000

1""" 

2camcops_server/cc_modules/cc_proquint.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2012, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CamCOPS. 

10 

11 CamCOPS is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CamCOPS is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26Convert integers into Pronounceable Quintuplets (proquints) 

27https://arxiv.org/html/0901.4016 

28 

29Based on https://github.com/dsw/proquint, which has the following licence: 

30 

31--8<--------------------------------------------------------------------------- 

32 

33Copyright (c) 2009 Daniel S. Wilkerson 

34All rights reserved. 

35 

36Redistribution and use in source and binary forms, with or without 

37modification, are permitted provided that the following conditions are 

38met: 

39 

40 Redistributions of source code must retain the above copyright 

41 notice, this list of conditions and the following disclaimer. 

42 Redistributions in binary form must reproduce the above copyright 

43 notice, this list of conditions and the following disclaimer in 

44 the documentation and/or other materials provided with the 

45 distribution. 

46 

47 Neither the name of Daniel S. Wilkerson nor the names of its 

48 contributors may be used to endorse or promote products derived 

49 from this software without specific prior written permission. 

50 

51THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

52"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

53LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

54A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

55OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

56SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 

57LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 

58DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 

59THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 

60(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

61OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

62 

63--8<--------------------------------------------------------------------------- 

64 

65 

66""" 

67import uuid 

68 

69CONSONANTS = "bdfghjklmnprstvz" 

70VOWELS = "aiou" 

71 

72SIZE_OF_CONSONANT = 4 

73SIZE_OF_VOWEL = 2 

74 

75LOOKUP_CONSONANTS = { 

76 "b": 0x0, 

77 "d": 0x1, 

78 "f": 0x2, 

79 "g": 0x3, 

80 "h": 0x4, 

81 "j": 0x5, 

82 "k": 0x6, 

83 "l": 0x7, 

84 "m": 0x8, 

85 "n": 0x9, 

86 "p": 0xA, 

87 "r": 0xB, 

88 "s": 0xC, 

89 "t": 0xD, 

90 "v": 0xE, 

91 "z": 0xF, 

92} 

93LOOKUP_VOWELS = {"a": 0x0, "i": 0x1, "o": 0x2, "u": 0x3} 

94LOOKUP_TABLE = {**LOOKUP_CONSONANTS, **LOOKUP_VOWELS} 

95 

96 

97class InvalidProquintException(Exception): 

98 pass 

99 

100 

101def proquint_from_uuid(uuid_obj: uuid.UUID) -> str: 

102 """ 

103 Convert UUID to proquint (via the UUID's 128-bit integer representation). 

104 """ 

105 return proquint_from_int(uuid_obj.int, 128) 

106 

107 

108def proquint_from_int(int_value: int, size_in_bits: int) -> str: 

109 """Convert integer value into proquint 

110 

111 .. code-block:: none 

112 

113 >>> proquint_from_int(0x493b05ee, 32) 

114 hohur-bilov 

115 

116 0x493b05ee in binary is: 

117 0100 1001 0011 1011 - 0000 0101 1110 1110 

118 

119 grouped into alternating 4 and 2 bit values: 

120 

121 cons vo cons vo cons - cons vo cons vo cons 

122 0100 10 0100 11 1011 - 0000 01 0111 10 1110 

123 

124 h o h u r - b i l o v 

125 

126 Args: 

127 int_value: 

128 integer value to encode 

129 size_in_bits: 

130 size of integer in bits (must be a multiple of 16) 

131 

132 Returns: 

133 proquint string identifier 

134 """ 

135 proquint = [] 

136 

137 if size_in_bits % 16 != 0: 

138 raise ValueError( 

139 f"size_in_bits ({size_in_bits}) must be a multiple of 16" 

140 ) 

141 

142 for i in range(size_in_bits // 16): 

143 proquint.insert(0, _proquint_from_int16(int_value & 0xFFFF)) 

144 

145 int_value >>= 16 

146 

147 check_character = _generate_check_character("".join(proquint)) 

148 

149 proquint.append(check_character) 

150 

151 return "-".join(proquint) 

152 

153 

154def _generate_check_character(proquint: str) -> str: 

155 """ 

156 Luhn mod 16 check digit 

157 

158 https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm 

159 

160 .. code-block:: none 

161 consonant_values = { 

162 'b': 0x0, 'd': 0x1, 'f': 0x2, 'g': 0x3, 

163 'h': 0x4, 'j': 0x5, 'k': 0x6, 'l': 0x7, 

164 'm': 0x8, 'n': 0x9, 'p': 0xa, 'r': 0xb, 

165 's': 0xc, 't': 0xd, 'v': 0xe, 'z': 0xf, 

166 } 

167 

168 vowel_values = { 

169 'a': 0x0, 'i': 0x1, 'o': 0x2, 'u': 0x3, 

170 } 

171 

172 To generate the check character, start with the last character in the 

173 string and move left doubling every other code-point. The "digits" of 

174 the code-points as written in hex (since there are 16 valid input 

175 characters) should then be summed up: 

176 

177 Example (all in hex): 

178 

179 hohur-bilov 

180 

181 Character h o h u r b i l o v 

182 Code point 4 2 4 3 b 0 1 7 2 e 

183 Double 4 6 0 e 1c 

184 Reduce 4 4 4 6 b 0 1 e 2 1+c 

185 Sum 4 4 4 6 b 0 1 e 2 d 

186 

187 Total sum = 4 + 4 + 4 + 6 + b + 0 + 1 + e + 2 + d = 0x3b 

188 Next multiple of 0x10 is 0x40 

189 

190 Check character code = 0x40 - 0x3b = 0x5 

191 So check character is 'j' 

192 

193 """ 

194 

195 remainder = _generate_luhn_mod_16_remainder(proquint, 2) 

196 

197 check_code_point = (16 - remainder) % 16 

198 

199 return CONSONANTS[check_code_point] 

200 

201 

202def _proquint_from_int16(int16_value: int) -> str: 

203 """ 

204 Convert 16-bit integer into proquint. 

205 """ 

206 proquint = [] 

207 for i in range(5): 

208 if i & 1: 

209 letters = VOWELS 

210 mask = 0x3 

211 shift = SIZE_OF_VOWEL 

212 else: 

213 letters = CONSONANTS 

214 mask = 0xF 

215 shift = SIZE_OF_CONSONANT 

216 

217 index = int16_value & mask 

218 proquint.insert(0, letters[index]) 

219 int16_value >>= shift 

220 

221 return "".join(proquint) 

222 

223 

224def uuid_from_proquint(proquint: str) -> uuid.UUID: 

225 """ 

226 Convert proquint to UUID. 

227 """ 

228 int_value = int_from_proquint(proquint) 

229 

230 return uuid.UUID(int=int_value) 

231 

232 

233def int_from_proquint(proquint: str) -> int: 

234 """ 

235 Convert proquint string into integer. 

236 

237 .. code-block:; none 

238 

239 >>> hex(int_from_proquint('hohur-bilov-j')) 

240 0x493b05ee 

241 

242 h o h u r - b i l o v 

243 0x4 0x2 0x4 0x3 0xb - 0x0 0x1 0x7 0x2 0xe 

244 

245 0100 10 0100 11 1011 - 0000 01 0111 10 1110 

246 0100 1001 0011 1011 - 0000 0101 1110 1110 

247 0x4 0x9 0x3 0xb - 0x0 0x5 0xe 0xe 

248 

249 Args: 

250 proquint: 

251 string to decode 

252 Returns: 

253 converted integer value 

254 """ 

255 

256 int_value = 0 

257 

258 words = proquint.split("-") 

259 

260 if not _is_valid_proquint("".join(words)): 

261 raise InvalidProquintException( 

262 f"'{proquint}' is not valid (check character mismatch)" 

263 ) 

264 

265 # Remove check character 

266 words.pop() 

267 

268 for word in words: 

269 for (i, c) in enumerate(word): 

270 if i & 1: 

271 lookup_table = LOOKUP_VOWELS 

272 shift = SIZE_OF_VOWEL 

273 else: 

274 lookup_table = LOOKUP_CONSONANTS 

275 shift = SIZE_OF_CONSONANT 

276 

277 value = lookup_table.get(c) 

278 

279 if value is None: 

280 raise InvalidProquintException( 

281 f"'{proquint}' contains invalid or transposed characters" 

282 ) 

283 

284 int_value <<= shift 

285 int_value += value 

286 

287 return int_value 

288 

289 

290def _is_valid_proquint(proquint: str) -> bool: 

291 """ 

292 Does the proquint validate? 

293 """ 

294 return _generate_luhn_mod_16_remainder(proquint, 1) == 0 

295 

296 

297def _generate_luhn_mod_16_remainder(proquint: str, start_factor: int) -> int: 

298 """ 

299 Part of the checksum calculations; see :func:`_generate_check_character`. 

300 For a valid sequence, the overall remainder should be 0. 

301 See https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm. 

302 """ 

303 factor = start_factor 

304 sum_ = 0 

305 

306 for char in reversed(proquint): 

307 value = LOOKUP_TABLE[char] * factor 

308 sum_ = sum_ + value // 16 + value % 16 

309 

310 if factor == 2: 

311 factor = 1 

312 else: 

313 factor = 2 

314 

315 return sum_ % 16