Coverage for jutil/format.py: 82%

272 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-10-07 16:40 -0500

1import csv 

2import html 

3import json 

4import logging 

5import os 

6import re 

7import tempfile 

8from collections import OrderedDict 

9from datetime import timedelta 

10from decimal import Decimal 

11import subprocess 

12from io import StringIO 

13from typing import List, Any, Optional, Union, Dict, Sequence, Tuple, TypeVar 

14from django.conf import settings 

15from django.core.exceptions import ValidationError, ImproperlyConfigured 

16from django.utils.functional import lazy 

17import xml.dom.minidom # type: ignore 

18from django.utils.safestring import mark_safe 

19from django.utils.text import capfirst 

20 

21logger = logging.getLogger(__name__) 

22 

23S = TypeVar("S") 

24 

25 

26def format_full_name(first_name: str, last_name: str, max_length: int = 20) -> str: 

27 """ 

28 Limits name length to specified length. Tries to keep name as human-readable an natural as possible. 

29 :param first_name: First name 

30 :param last_name: Last name 

31 :param max_length: Maximum length 

32 :return: Full name of shortened version depending on length 

33 """ 

34 # dont allow commas in limited names 

35 first_name = first_name.replace(",", " ") 

36 last_name = last_name.replace(",", " ") 

37 

38 # accept short full names as is 

39 original_full_name = first_name + " " + last_name 

40 if len(original_full_name) <= max_length: 

41 return original_full_name 

42 

43 # drop middle names 

44 first_name = first_name.split(" ")[0] 

45 full_name = first_name + " " + last_name 

46 if len(full_name) <= max_length: 

47 return full_name 

48 

49 # drop latter parts of combined first names 

50 first_name = re.split(r"[\s\-]", first_name)[0] 

51 full_name = first_name + " " + last_name 

52 if len(full_name) <= max_length: 

53 return full_name 

54 

55 # drop latter parts of multi part last names 

56 last_name = re.split(r"[\s\-]", last_name)[0] 

57 full_name = first_name + " " + last_name 

58 if len(full_name) <= max_length: 

59 return full_name 

60 

61 # shorten last name to one letter 

62 last_name = last_name[:1] 

63 

64 full_name = first_name + " " + last_name 

65 if len(full_name) > max_length: 

66 raise Exception("Failed to shorten name {}".format(original_full_name)) 

67 return full_name 

68 

69 

70def format_timedelta(dt: timedelta, days_label: str = "d", hours_label: str = "h", minutes_label: str = "min", seconds_label: str = "s") -> str: 

71 """ 

72 Formats timedelta to readable format, e.g. 1h30min15s. 

73 :param dt: timedelta 

74 :param days_label: Label for days. Leave empty '' if value should be skipped / ignored. 

75 :param hours_label: Label for hours. Leave empty '' if value should be skipped / ignored. 

76 :param minutes_label: Label for minutes. Leave empty '' if value should be skipped / ignored. 

77 :param seconds_label: Label for seconds. Leave empty '' if value should be skipped / ignored. 

78 :return: str 

79 """ 

80 parts = ( 

81 (86400, days_label), 

82 (3600, hours_label), 

83 (60, minutes_label), 

84 (1, seconds_label), 

85 ) 

86 out = "" 

87 seconds_f = dt.total_seconds() 

88 seconds = int(seconds_f) 

89 for n_secs, label in parts: 

90 n, remainder = divmod(seconds, n_secs) 

91 if n > 0 and label: 

92 out += str(n) + label 

93 seconds = remainder 

94 out_str = out.strip() 

95 if not out_str: 

96 if seconds_f >= 0.001: 96 ↛ 99line 96 didn't jump to line 99, because the condition on line 96 was never false

97 out_str = "{:0.3f}".format(int(seconds_f * 1000.0) * 0.001) + seconds_label 

98 else: 

99 out_str = "0" + seconds_label 

100 return out_str.strip() 

101 

102 

103def format_xml(content: str, encoding: str = "UTF-8", exceptions: bool = False) -> str: 

104 """ 

105 Formats XML document as human-readable plain text. 

106 If settings.XMLLINT_PATH is defined xmllint is used for formatting (higher quality). Otherwise minidom toprettyxml is used. 

107 :param content: XML data as str 

108 :param encoding: XML file encoding 

109 :param exceptions: Raise exceptions on error 

110 :return: str (Formatted XML str) 

111 """ 

112 assert isinstance(content, str) 

113 try: 

114 if hasattr(settings, "XMLLINT_PATH") and settings.XMLLINT_PATH: 114 ↛ 120line 114 didn't jump to line 120, because the condition on line 114 was never false

115 with tempfile.NamedTemporaryFile() as fp: 

116 fp.write(content.encode(encoding=encoding)) 

117 fp.flush() 

118 out = subprocess.check_output([settings.XMLLINT_PATH, "--format", fp.name]) 

119 return out.decode(encoding=encoding) 

120 return xml.dom.minidom.parseString(content).toprettyxml() 

121 except Exception as e: 

122 logger.error("format_xml failed: %s", e) 

123 if exceptions: 

124 raise 

125 return content 

126 

127 

128def format_xml_bytes(content: bytes, encoding: str = "UTF-8", exceptions: bool = False) -> bytes: 

129 """ 

130 Formats XML document as human-readable plain text and returns result in bytes. 

131 If settings.XMLLINT_PATH is defined xmllint is used for formatting (higher quality). Otherwise minidom toprettyxml is used. 

132 :param content: XML data as bytes 

133 :param encoding: XML file encoding 

134 :param exceptions: Raise exceptions on error 

135 :return: bytes (Formatted XML as bytes) 

136 """ 

137 assert isinstance(content, bytes) 

138 try: 

139 if hasattr(settings, "XMLLINT_PATH") and settings.XMLLINT_PATH: 139 ↛ 145line 139 didn't jump to line 145, because the condition on line 139 was never false

140 with tempfile.NamedTemporaryFile() as fp: 

141 fp.write(content) 

142 fp.flush() 

143 out = subprocess.check_output([settings.XMLLINT_PATH, "--format", fp.name]) 

144 return out 

145 return xml.dom.minidom.parseString(content.decode(encoding=encoding)).toprettyxml(encoding=encoding) 

146 except Exception as e: 

147 logger.error("format_xml_bytes failed: %s", e) 

148 if exceptions: 

149 raise 

150 return content 

151 

152 

153def format_xml_file(full_path: str, encoding: str = "UTF-8", exceptions: bool = False) -> bytes: 

154 """ 

155 Formats XML file as human-readable plain text and returns result in bytes. 

156 Tries to format XML file first, if formatting fails the file content is returned as is. 

157 If the file does not exist empty bytes is returned. 

158 If settings.XMLLINT_PATH is defined xmllint is used for formatting (higher quality). Otherwise minidom toprettyxml is used. 

159 :param full_path: Full path to XML file 

160 :param encoding: XML file encoding 

161 :param exceptions: Raise exceptions on error 

162 :return: bytes 

163 """ 

164 try: 

165 if hasattr(settings, "XMLLINT_PATH") and settings.XMLLINT_PATH: 

166 return subprocess.check_output([settings.XMLLINT_PATH, "--format", full_path]) 

167 with open(full_path, "rb") as fp: 

168 return xml.dom.minidom.parse(fp).toprettyxml(encoding=encoding) # type: ignore 

169 except Exception as e: 

170 logger.error("format_xml_file failed (1): %s", e) 

171 if exceptions: 

172 raise 

173 try: 

174 with open(full_path, "rb") as fp: 

175 return fp.read() 

176 except Exception as e: 

177 logger.error("format_xml_file failed (2): %s", e) 

178 return b"" 

179 

180 

181def format_as_html_json(value: Any) -> str: 

182 """ 

183 Returns value as JSON-formatted value in HTML. 

184 :param value: Any value which can be converted to JSON by json.dumps 

185 :return: str 

186 """ 

187 return mark_safe(html.escape(json.dumps(value, indent=4)).replace("\n", "<br/>").replace(" ", "&nbsp;")) 

188 

189 

190def _format_dict_as_html_key(k: str) -> str: 

191 if k.startswith("@"): 

192 k = k[1:] 

193 k = k.replace("_", " ") 

194 k = re.sub(r"((?<=[a-z])[A-Z]|(?<!\A)[A-Z](?=[a-z]))", r" \1", k) 

195 parts = k.split(" ") 

196 out: List[str] = [str(capfirst(parts[0].strip()))] 

197 for p in parts[1:]: 

198 p2 = p.strip().lower() 

199 if p2: 199 ↛ 197line 199 didn't jump to line 197, because the condition on line 199 was never false

200 out.append(p2) 

201 return " ".join(out) 

202 

203 

204def _format_dict_as_html_r(data: Dict[str, Any], margin: str = "", format_keys: bool = True) -> str: 

205 if not isinstance(data, dict): 205 ↛ 206line 205 didn't jump to line 206, because the condition on line 205 was never true

206 return "{}{}\n".format(margin, data) 

207 out = "" 

208 for k, v in OrderedDict(sorted(data.items())).items(): 

209 if isinstance(v, dict): 

210 out += "{}{}:\n".format(margin, _format_dict_as_html_key(k) if format_keys else k) 

211 out += _format_dict_as_html_r(v, margin + " ", format_keys=format_keys) 

212 out += "\n" 

213 elif isinstance(v, list): 213 ↛ 214line 213 didn't jump to line 214, because the condition on line 213 was never true

214 for v2 in v: 

215 out += "{}{}:\n".format(margin, _format_dict_as_html_key(k) if format_keys else k) 

216 out += _format_dict_as_html_r(v2, margin + " ", format_keys=format_keys) 

217 out += "\n" 

218 else: 

219 out += "{}{}: {}\n".format(margin, _format_dict_as_html_key(k) if format_keys else k, v) 

220 return out 

221 

222 

223def format_dict_as_html(data: Dict[str, Any], format_keys: bool = True) -> str: 

224 """ 

225 Formats dict to simple human readable pre-formatted html (<pre> tag). 

226 :param data: dict 

227 :param format_keys: Re-format 'additionalInfo' and 'additional_info' type of keys as 'Additional info' 

228 :return: str (html) 

229 """ 

230 return "<pre>" + _format_dict_as_html_r(data, format_keys=format_keys) + "</pre>" 

231 

232 

233def format_csv(rows: List[List[Any]], dialect: str = "excel") -> str: 

234 """ 

235 Formats rows to CSV string content. 

236 :param rows: List[List[Any]] 

237 :param dialect: See csv.writer dialect 

238 :return: str 

239 """ 

240 f = StringIO() 

241 writer = csv.writer(f, dialect=dialect) 

242 for row in rows: 

243 writer.writerow(row) 

244 return f.getvalue() 

245 

246 

247def format_table( # noqa 

248 rows: List[List[Any]], 

249 max_col: Optional[int] = None, 

250 max_line: Optional[int] = 200, 

251 col_sep: str = "|", 

252 row_sep: str = "-", 

253 row_begin: str = "|", 

254 row_end: str = "|", 

255 has_label_row: bool = False, 

256 left_align: Optional[List[int]] = None, 

257 center_align: Optional[List[int]] = None, 

258) -> str: 

259 """ 

260 Formats "ASCII-table" rows by padding column widths to longest column value, optionally limiting column widths. 

261 Optionally separates colums with ' | ' character and header row with '-' characters. 

262 Supports left, right and center alignment. Useful for console apps / debugging. 

263 

264 :param rows: List[List[Any]] 

265 :param max_col: Max column value width. Pass None for unlimited length. 

266 :param max_line: Maximum single line length. Exceeding columns truncated. Pass None for unlimited length. 

267 :param col_sep: Column separator string. 

268 :param row_sep: Row separator character used before first row, end, after first row (if has_label_row). 

269 :param row_begin: Row begin string, inserted before each row. 

270 :param row_end: Row end string, appended after each row. 

271 :param has_label_row: Set to True if table starts with column label row. 

272 :param left_align: Indexes of left-aligned columns. By default all are right aligned. 

273 :param center_align: Indexes of center-aligned columns. By default all are right aligned. 

274 :return: str 

275 """ 

276 # validate parameters 

277 assert max_col is None or max_col > 2 

278 if left_align is None: 

279 left_align = [] 

280 if center_align is None: 

281 center_align = [] 

282 if left_align: 

283 if set(left_align) & set(center_align): 283 ↛ 284line 283 didn't jump to line 284, because the condition on line 283 was never true

284 raise ValidationError("Left align columns {} overlap with center align {}".format(left_align, center_align)) 

285 

286 # find out number of columns 

287 ncols = 0 

288 for row in rows: 

289 ncols = max(ncols, len(row)) 

290 

291 # find out full-width column lengths 

292 col_lens0: List[int] = [0] * ncols 

293 for row in rows: 

294 for ix, v in enumerate(row): 

295 v = str(v) 

296 col_lens0[ix] = max(col_lens0[ix], len(v)) 

297 

298 # adjust max_col if needed 

299 if max_line and (not max_col or sum(col_lens0) > max_line): 299 ↛ 300line 299 didn't jump to line 300, because the condition on line 299 was never true

300 max_col = max_line // ncols 

301 

302 # length limited lines and final column lengths 

303 col_lens = [0] * ncols 

304 lines: List[List[str]] = [] 

305 for row in rows: 

306 line = [] 

307 for ix, v in enumerate(row): 

308 v = str(v) 

309 if max_col and len(v) > max_col: 

310 v = v[: max_col - 2] + ".." 

311 line.append(v) 

312 col_lens[ix] = max(col_lens[ix], len(v)) 

313 while len(line) < ncols: 313 ↛ 314line 313 didn't jump to line 314, because the condition on line 313 was never true

314 line.append("") 

315 lines.append(line) 

316 

317 # padded lines 

318 lines2: List[List[str]] = [] 

319 for line in lines: 

320 line2 = [] 

321 for ix, v in enumerate(line): 

322 col_len = col_lens[ix] 

323 if len(v) < col_len: 

324 if ix in left_align: 

325 v = v + " " * (col_len - len(v)) 

326 elif ix in center_align: 

327 pad = col_len - len(v) 

328 lpad = int(pad / 2) 

329 rpad = pad - lpad 

330 v = " " * lpad + v + " " * rpad 

331 else: 

332 v = " " * (col_len - len(v)) + v 

333 line2.append(v) 

334 lines2.append(line2) 

335 

336 # calculate max number of columns and max line length 

337 max_line_len = 0 

338 col_sep_len = len(col_sep) 

339 ncols0 = ncols 

340 for line in lines2: 

341 if max_line is not None: 341 ↛ 340line 341 didn't jump to line 340, because the condition on line 341 was never false

342 line_len = len(row_begin) + sum(len(v) + col_sep_len for v in line[:ncols]) - col_sep_len + len(row_end) 

343 while line_len > max_line: 

344 ncols -= 1 

345 line_len = len(row_begin) + sum(len(v) + col_sep_len for v in line[:ncols]) - col_sep_len + len(row_end) 

346 max_line_len = max(max_line_len, line_len) 

347 

348 # find out how we should terminate lines/rows 

349 line_term = "" 

350 row_sep_term = "" 

351 if ncols0 > ncols: 

352 line_term = ".." 

353 row_sep_term = row_sep * int(2 / len(row_sep)) 

354 

355 # final output with row and column separators 

356 lines3 = [] 

357 if row_sep: 357 ↛ 359line 357 didn't jump to line 359, because the condition on line 357 was never false

358 lines3.append(row_sep * max_line_len + row_sep_term) 

359 for line_ix, line in enumerate(lines2): 

360 while len(line) > ncols: 

361 line.pop() 

362 line_out = col_sep.join(line) 

363 lines3.append(row_begin + line_out + row_end + line_term) 

364 if line_ix == 0 and row_sep and has_label_row: 

365 lines3.append(row_sep * max_line_len + row_sep_term) 

366 if row_sep: 366 ↛ 368line 366 didn't jump to line 368, because the condition on line 366 was never false

367 lines3.append(row_sep * max_line_len + row_sep_term) 

368 return "\n".join(lines3) 

369 

370 

371def _capfirst_lazy(x): 

372 """ 

373 capfirst() keeping lazy strings lazy. 

374 """ 

375 return x[0:1].upper() + x[1:] if x else "" 

376 

377 

378capfirst_lazy = lazy(_capfirst_lazy, str) 

379 

380 

381def dec0(a: Union[float, int, Decimal, str]) -> Decimal: 

382 """ 

383 Converts number to Decimal with 0 decimal digits. 

384 :param a: Number 

385 :return: Decimal with 0 decimal digits 

386 """ 

387 return Decimal(a).quantize(Decimal("1")) 

388 

389 

390def dec1(a: Union[float, int, Decimal, str]) -> Decimal: 

391 """ 

392 Converts number to Decimal with 1 decimal digits. 

393 :param a: Number 

394 :return: Decimal with 1 decimal digits 

395 """ 

396 return Decimal(a).quantize(Decimal("1.0")) 

397 

398 

399def dec2(a: Union[float, int, Decimal, str]) -> Decimal: 

400 """ 

401 Converts number to Decimal with 2 decimal digits. 

402 :param a: Number 

403 :return: Decimal with 2 decimal digits 

404 """ 

405 return Decimal(a).quantize(Decimal("1.00")) 

406 

407 

408def dec3(a: Union[float, int, Decimal, str]) -> Decimal: 

409 """ 

410 Converts number to Decimal with 3 decimal digits. 

411 :param a: Number 

412 :return: Decimal with 3 decimal digits 

413 """ 

414 return Decimal(a).quantize(Decimal("1.000")) 

415 

416 

417def dec4(a: Union[float, int, Decimal, str]) -> Decimal: 

418 """ 

419 Converts number to Decimal with 4 decimal digits. 

420 :param a: Number 

421 :return: Decimal with 4 decimal digits 

422 """ 

423 return Decimal(a).quantize(Decimal("1.0000")) 

424 

425 

426def dec5(a: Union[float, int, Decimal, str]) -> Decimal: 

427 """ 

428 Converts number to Decimal with 5 decimal digits. 

429 :param a: Number 

430 :return: Decimal with 4 decimal digits 

431 """ 

432 return Decimal(a).quantize(Decimal("1.00000")) 

433 

434 

435def dec6(a: Union[float, int, Decimal, str]) -> Decimal: 

436 """ 

437 Converts number to Decimal with 6 decimal digits. 

438 :param a: Number 

439 :return: Decimal with 4 decimal digits 

440 """ 

441 return Decimal(a).quantize(Decimal("1.000000")) 

442 

443 

444def is_media_full_path(file_path: str) -> bool: 

445 """ 

446 Checks if file path is under (settings) MEDIA_ROOT. 

447 """ 

448 if not hasattr(settings, "MEDIA_ROOT") or not settings.MEDIA_ROOT: 

449 raise ImproperlyConfigured("MEDIA_ROOT not defined") 

450 full_path = os.path.abspath(file_path) 

451 return full_path.startswith(str(settings.MEDIA_ROOT)) 

452 

453 

454def strip_media_root(file_path: str) -> str: 

455 """ 

456 If file path starts with (settings) MEDIA_ROOT, 

457 the MEDIA_ROOT part gets stripped and only relative path is returned. 

458 Otherwise file path is returned as is. This enabled stored file names in more 

459 portable format for different environment / storage. 

460 If MEDIA_ROOT is missing or empty, the filename is returned as is. 

461 Reverse operation of this is get_media_full_path(). 

462 :param file_path: str 

463 :return: str 

464 """ 

465 if not hasattr(settings, "MEDIA_ROOT") or not settings.MEDIA_ROOT: 

466 raise ImproperlyConfigured("MEDIA_ROOT not defined") 

467 full_path = os.path.abspath(file_path) 

468 if not full_path.startswith(str(settings.MEDIA_ROOT)): 468 ↛ 469line 468 didn't jump to line 469, because the condition on line 468 was never true

469 raise ValueError("Path {} not under MEDIA_ROOT".format(file_path)) 

470 file_path = full_path[len(str(settings.MEDIA_ROOT)) :] 

471 if file_path.startswith("/"): 471 ↛ 473line 471 didn't jump to line 473, because the condition on line 471 was never false

472 return file_path[1:] 

473 return file_path 

474 

475 

476def get_media_full_path(file_path: str) -> str: 

477 """ 

478 Returns the absolute path from a (relative) path to (settings) MEDIA_ROOT. 

479 This enabled stored file names in more portable format for different environment / storage. 

480 If MEDIA_ROOT is missing or non-media path is passed to function, exception is raised. 

481 Reverse operation of this is strip_media_root(). 

482 :param file_path: str 

483 :return: str 

484 """ 

485 if not hasattr(settings, "MEDIA_ROOT") or not settings.MEDIA_ROOT: 

486 raise ImproperlyConfigured("MEDIA_ROOT not defined") 

487 full_path = os.path.abspath(file_path) if os.path.isabs(file_path) else os.path.join(settings.MEDIA_ROOT, file_path) 

488 if not full_path.startswith(str(settings.MEDIA_ROOT)): 488 ↛ 489line 488 didn't jump to line 489, because the condition on line 488 was never true

489 raise ValueError("Path {} not under MEDIA_ROOT".format(file_path)) 

490 return full_path 

491 

492 

493def camel_case_to_underscore(s: str) -> str: 

494 """ 

495 Converts camelCaseWord to camel_case_word. 

496 :param s: str 

497 :return: str 

498 """ 

499 if s: 499 ↛ 503line 499 didn't jump to line 503, because the condition on line 499 was never false

500 s = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", s) 

501 s = re.sub(r"([a-z\d])([A-Z])", r"\1_\2", s) 

502 s = s.replace("-", "_") 

503 return s.lower() 

504 

505 

506def underscore_to_camel_case(s: str) -> str: 

507 """ 

508 Converts under_score_word to underScoreWord. 

509 :param s: str 

510 :return: str 

511 """ 

512 if s: 512 ↛ 515line 512 didn't jump to line 515, because the condition on line 512 was never false

513 p = s.split("_") 

514 s = p[0] + "".join([capfirst(w) or "" for w in p[1:]]) 

515 return s 

516 

517 

518def choices_label(choices: Sequence[Tuple[S, str]], value: S) -> str: 

519 """ 

520 Iterates (value,label) list and returns label matching the choice 

521 :param choices: [(choice1, label1), (choice2, label2), ...] 

522 :param value: Value to find 

523 :return: label or None 

524 """ 

525 for key, label in choices: 525 ↛ 528line 525 didn't jump to line 528, because the loop on line 525 didn't complete

526 if key == value: 526 ↛ 525line 526 didn't jump to line 525, because the condition on line 526 was never false

527 return label 

528 return ""