Coverage for /Users/martin/prj/git/benchman_pre/src/benchman/util.py: 41%

247 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-12-24 08:16 +0100

1# (c) 2024 Martin Wendt; see https://github.com/mar10/benchman 

2# Licensed under the MIT license: https://www.opensource.org/licenses/mit-license.php 

3from __future__ import annotations 

4 

5import hashlib 

6import importlib.metadata 

7import json 

8import logging 

9import operator 

10import os 

11import re 

12import shutil 

13import subprocess 

14import sys 

15import threading 

16import uuid 

17from io import TextIOBase 

18from pathlib import Path 

19from typing import Any, Union, cast 

20 

21import build.util 

22import toml 

23from typing_extensions import Literal 

24 

25logger = logging.getLogger("benchman") 

26 

27PYTHON_VERSION = f"{sys.version_info[0]}.{sys.version_info[1]}.{sys.version_info[2]}" 

28 

29 

30def find_project_root(start_path: str | Path | None = None) -> Path | None: 

31 """Find the root folder of the current project.""" 

32 root: Path | None = None 

33 try: 

34 logger.debug("Looking for Git repository...") 

35 # Run 'git rev-parse' to find the top-level directory 

36 git_root = subprocess.check_output( 

37 ["git", "rev-parse", "--show-toplevel"], text=True 

38 ).strip() 

39 root = Path(git_root) 

40 logger.debug(f"Git repository found: {root}") 

41 except subprocess.CalledProcessError: 

42 # Not a Git repository 

43 logger.debug( 

44 "No Git repository found, looking for pyproject.toml or setup.py..." 

45 ) 

46 

47 if root is None: 

48 # Search parent folders for pyproject.toml or setup.py 

49 # Start from the current working directory if not specified 

50 current_path = Path(start_path or os.getcwd()) / "dummy-file.txt" 

51 for parent in current_path.parents: 

52 logger.debug(f"Checking {parent}...") 

53 # Look for a defining file, such as 'pyproject.toml' or '.git' 

54 if ( 

55 (parent / "pyproject.toml").exists() 

56 or (parent / "setup.py").exists() 

57 or (parent / ".git").exists() 

58 ): 

59 root = parent 

60 return root 

61 

62 

63def get_project_info(path: Path | None = None) -> dict[str, Any]: 

64 project_root = find_project_root(path) 

65 

66 if project_root is None: 

67 raise FileNotFoundError(f"Project root not found in {path} or parent folders") 

68 

69 if (project_root / "pyproject.toml").is_file(): 

70 with open(project_root / "pyproject.toml") as f: 

71 pyproject_toml = toml.load(f) 

72 if "project" in pyproject_toml: 

73 project_name: str = pyproject_toml["project"]["name"] 

74 else: 

75 logger.warning( 

76 "pyproject.toml does not contain a [project] section " 

77 "(trying setup.cfg)..." 

78 ) 

79 

80 wm = build.util.project_wheel_metadata(project_root) 

81 pn = wm.get("name") 

82 assert pn 

83 project_name = pn 

84 

85 # from setuptools.config import read_configuration 

86 

87 # conf_dict = read_configuration(project_root / "setup.cfg") 

88 # project_name = conf_dict["metadata"]["name"] 

89 

90 else: # setup.py 

91 raise FileNotFoundError(f"pyproject.toml not found in {project_root}") 

92 

93 # Get the project version from the installed package metadata 

94 project_version = importlib.metadata.version(project_name) 

95 # Get the project name again, because it might be different from the package name 

96 meta = importlib.metadata.metadata(project_name) 

97 project_name = meta["Name"] 

98 

99 return { 

100 "project_name": project_name, 

101 "project_version": project_version, 

102 "project_root": project_root, 

103 "pyproject_toml": pyproject_toml, 

104 } 

105 

106 

107# PROJECT_ROOT: Path = find_project_root() 

108 

109 

110class BenchmarkSuiteFile: 

111 def __init__(self, path: Path | str): 

112 self.path = Path(path) 

113 file_name = self.path.name 

114 if not file_name.endswith(".bench.json"): 

115 raise ValueError(f"Invalid file name: {self.path}") 

116 file_name = file_name[:-11] 

117 self.project, self.client_id, self.tag = file_name.split(".", 2) 

118 

119 def __repr__(self): 

120 return f"{self.__class__.__name__}<{self.path}>" 

121 

122 def __str__(self): 

123 return str(self.path) 

124 

125 @property 

126 def name(self) -> str: 

127 return self.path.name 

128 

129 def _read(self): 

130 with self.path.open("r") as f: 

131 return json.load(f) 

132 

133 def _patch(self, json: dict, keep_date: bool = False): 

134 mdate = self.path.stat().st_mtime 

135 with self.path.open("w") as f: 

136 json_dump(json, f, pretty=True) 

137 if keep_date: 

138 os.utime(self.path, (mdate, mdate)) 

139 

140 def save_tag( 

141 self, new_tag: str, *, replace: bool, keep_time: bool = False 

142 ) -> BenchmarkSuiteFile: 

143 """Save the benchmark file with a new tag (filename and json data). 

144 

145 If `replace` is True, this original file is replaced with the new tag. 

146 Otherwise, a new file is created with the new tag and returned. 

147 

148 If `keep_time` is True, the original file's modification time is preserved. 

149 """ 

150 if self.tag == new_tag: 

151 raise ValueError(f"Tag is already '{new_tag}'") 

152 

153 new_path = self.path.with_name( 

154 f"{self.project}.{self.client_id}.{new_tag}.bench.json" 

155 ) 

156 if replace: 

157 result = self 

158 self.tag = new_tag 

159 self.path.replace(new_path) 

160 self.path = new_path 

161 logger.info(f"Renamed {self.path} to {new_path.name}") 

162 else: 

163 shutil.copy(self.path, new_path) 

164 result = BenchmarkSuiteFile(new_path) 

165 logger.info(f"Copied {self.path} to {new_path.name}") 

166 data = result._read() 

167 data["tag"] = new_tag 

168 result._patch(data, keep_date=keep_time) 

169 return result 

170 

171 @classmethod 

172 def find_files(cls, folder: Path | str) -> list[BenchmarkSuiteFile]: 

173 folder = Path(folder) 

174 return [cls(p) for p in folder.glob("*.bench.json")] 

175 

176 

177def is_running_on_ci() -> bool: 

178 return bool(os.environ.get("CI") or os.environ.get("GITHUB_ACTIONS")) 

179 

180 

181def extract_items( 

182 d: dict[str, Any], keys: list[str], *, remove: bool = False 

183) -> dict[str, Any]: 

184 """Create a subset of a dictionary by extracting specific keys. 

185 

186 Args: 

187 d (dict): The source dictionary. 

188 keys (list): The keys to extract. 

189 remove (bool): If True, the keys are removed from the source dictionary. 

190 """ 

191 if remove: 

192 return {k: d.pop(k, None) for k in keys if k in d} 

193 return {k: d[k] for k in keys if k in d} 

194 

195 

196def json_dump(data: Any, file: TextIOBase, *, pretty: bool) -> None: 

197 """Write data to a file in JSON format, compact or pretty.""" 

198 if pretty: 

199 json.dump(data, file, indent=2, separators=(",", ": "), sort_keys=True) 

200 else: 

201 json.dump(data, file, indent=0, separators=(",", ":"), sort_keys=True) 

202 

203 

204class FileOrStdout: 

205 def __init__(self, out: Union[Path, str, TextIOBase, None] = None): 

206 self.out = out 

207 self.file: TextIOBase | None = None 

208 

209 def __enter__(self) -> TextIOBase: 

210 if self.out is None: 

211 self.file = cast(TextIOBase, sys.stdout) 

212 elif isinstance(self.out, TextIOBase): 

213 self.file = self.out 

214 else: 

215 self.file = open(self.out, "w") 

216 return self.file 

217 

218 def __exit__(self, exc_type, exc_value, traceback) -> None: 

219 if self.file is not sys.stdout and self.file is not None: 

220 self.file.close() 

221 

222 

223class ExpressionFilter: 

224 """Filter a list of objects based on a rule string. 

225 

226 Attribute Comparison: attribute operator value 

227 Logical AND: Separate conditions with a comma `,` 

228 Logical OR: Separate conditions with a semicolon `;` 

229 Example Syntax: 

230 "name eq alice, age gt 20": Logical AND, matches objects where name is alice 

231 and age is greater than 20. 

232 "name eq alice; age gt 20": Logical OR, matches objects where name is alice 

233 or age is greater than 20. 

234 

235 Supported Operators: 

236 eq: Equal to 

237 ne: Not equal to 

238 gt: Greater than 

239 ge: Greater than or equal to 

240 lt: Less than 

241 le: Less than or equal to 

242 *=: Contains 

243 ^=: Starts with 

244 

245 Args: 

246 objects (list[Any]): _description_ 

247 rule (str): _description_ 

248 

249 Returns: 

250 list[Any]: _description_ 

251 """ 

252 

253 ops = { 

254 "!=": operator.ne, 

255 "*=": operator.contains, 

256 "!*": lambda a, b: not operator.contains(a, b), 

257 "^=": lambda a, b: str(a).startswith(str(b)), 

258 "!^": lambda a, b: not str(a).startswith(str(b)), 

259 "<": operator.lt, 

260 "<=": operator.le, 

261 "<>": operator.ne, 

262 "==": operator.eq, 

263 ">": operator.gt, 

264 ">=": operator.ge, 

265 "eq": operator.eq, 

266 "ge": operator.ge, 

267 "gt": operator.gt, 

268 "le": operator.le, 

269 "lt": operator.lt, 

270 } 

271 

272 def __init__(self, rule: str): 

273 self.rule = rule 

274 self.conditions = self._parse_rule() 

275 

276 def __repr__(self): 

277 return f"self.__class__.__name__<{self.rule}>" 

278 

279 def _parse_rule(self) -> list[tuple]: 

280 conditions = [] 

281 for cond in self.rule.split(","): 

282 attr, op, svalue = cond.strip().split() 

283 value: str | float = svalue 

284 try: 

285 value = float(svalue) 

286 except ValueError: 

287 pass 

288 conditions.append((attr, self.ops[op], value)) 

289 return conditions 

290 

291 def matches(self, obj: Any) -> bool: 

292 return all(op(getattr(obj, attr), value) for attr, op, value in self.conditions) 

293 

294 def filter(self, object_list: list[Any]) -> list[Any]: 

295 return [obj for obj in object_list if self.matches(obj)] 

296 

297 

298def filter_objects(objects: list[Any], rule: str) -> list[Any]: 

299 """Filter a list of objects based on a rule string.""" 

300 ef = ExpressionFilter(rule) 

301 return ef.filter(object_list=objects) 

302 

303 

304def split_tokens(s: str) -> list[str]: 

305 """Split a comma separated string into tokens, removing whitespace.""" 

306 return [s.strip() for s in s.split(",")] 

307 

308 

309def singleton(cls): 

310 """ 

311 A thread-safe decorator to ensure a class follows the Singleton 

312 design pattern. 

313 

314 This decorator allows a class to have only one instance throughout 

315 the application. If the instance does not exist, it will create one; 

316 otherwise, it will return the existing instance. This implementation 

317 is thread-safe, ensuring that only one instance is created even in 

318 multithreaded environments. 

319 

320 :param: cls (type): The class to be decorated as a Singleton. 

321 :return: function: A function that returns the single instance of the 

322 class. 

323 """ 

324 instances = {} 

325 lock = threading.Lock() 

326 

327 def get_instance(*args, **kwargs) -> object: 

328 """ 

329 Return a single instance of the decorated class, creating it 

330 if necessary. 

331 

332 This function ensures that only one instance of the class exists. 

333 It uses a thread-safe approach to check if an instance of the class 

334 already exists in the `instances` dictionary. If it does not exist, 

335 it creates a new instance with the provided arguments. If it does 

336 exist, it returns the existing instance. 

337 

338 :param: *args: Variable length argument list for the class constructor. 

339 :param: **kwargs: Arbitrary keyword arguments for the class constructor. 

340 :return: object: The single instance of the class. 

341 """ 

342 with lock: 

343 if cls not in instances: 

344 instances[cls] = cls(*args, **kwargs) 

345 return instances[cls] 

346 

347 return get_instance 

348 

349 

350allowed_slug = set( 

351 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.~()" 

352) 

353 

354 

355def sluggify(text: str) -> str: 

356 """ 

357 Convert a string to a slug by replacing spaces with underscores and 

358 removing any non-alphanumeric characters. 

359 

360 :param text: The input string to be converted to a slug. 

361 :return: str: The slug version of the input string. 

362 """ 

363 return "".join(c if c in allowed_slug else "_" for c in text).strip("_.-~") 

364 

365 

366def hash_string(s: str, *, length: int = 16) -> str: 

367 """ 

368 Calculate a hash value for a given string. 

369 

370 :param s: The string to be hashed. 

371 :return: int: The hash value of the input string. 

372 """ 

373 h = hashlib.sha256(s.encode()).hexdigest() 

374 if length <= len(h): 

375 return h[:length] # Truncate to the desired length 

376 else: 

377 # If the length is greater than the hash, pad with zeros 

378 return (h + "0" * length)[:length] 

379 

380 

381def smart_sort_key(val) -> Any: 

382 """Sort by numeric parts and string parts (pass to `sort(key=...)`).""" 

383 # org_val = val 

384 if isinstance(val, str): 

385 parts = [] 

386 # Split the string into numeric (include '.' and ',') and non-numeric parts 

387 for part in re.split(r"(\d[\d,]*)", val): 

388 # for part in re.split(r"(\d[\d,]*(?:\.\d+)?)", val): 

389 try: 

390 # discarding thousands separator 

391 part = int(part.replace(",", "")) 

392 # print("int", repr(part)) 

393 except ValueError: 

394 pass 

395 parts.append(part) 

396 if len(parts) > 1: 

397 val = tuple(parts) 

398 

399 # print("sort", org_val, val) 

400 return val 

401 

402 

403TimeUnitType = Literal["fsec", "psec", "nsec", "μsec", "msec", "sec"] 

404time_units: dict[TimeUnitType, float] = { 

405 "fsec": 1e-15, # femto 

406 "psec": 1e-12, # pico 

407 "nsec": 1e-9, # nano 

408 "μsec": 1e-6, # micro 

409 "msec": 1e-3, # milli 

410 "sec": 1.0, 

411} 

412time_scales: list[tuple[float, TimeUnitType]] = [ 

413 (scale, unit) for unit, scale in time_units.items() 

414] 

415time_scales.sort(reverse=True) 

416 

417 

418def get_time_unit(seconds: float) -> tuple[TimeUnitType, float]: 

419 for scale, unit in time_scales: 

420 if seconds >= scale: 

421 return (unit, scale) 

422 return ("sec", 1.0) 

423 

424 

425def calculate_q1_q2_q3(data: list[float]) -> tuple[float, float, float]: 

426 """Calculate the first, second, and third quartiles of a list of numbers.""" 

427 data = sorted(data) 

428 n = len(data) 

429 q1 = data[n // 4] 

430 q2 = data[n // 2] 

431 q3 = data[(3 * n) // 4] 

432 return q1, q2, q3 

433 

434 

435def format_time( 

436 seconds: float, 

437 *, 

438 unit: Union[TimeUnitType, None] = None, 

439 precision: int = 3, 

440) -> str: 

441 if unit is None: 

442 unit, scale = get_time_unit(seconds) 

443 else: 

444 scale = time_units[unit] 

445 

446 return "{secs:,.{prec}f} {unit}".format( 

447 prec=precision, secs=seconds / scale, unit=unit 

448 ) 

449 

450 

451def byte_number_string( 

452 number: float, 

453 thousands_sep: bool = True, 

454 partition: bool = True, 

455 base1024: bool = False, 

456 append_bytes: bool = False, 

457 prec: int = 3, 

458) -> str: 

459 """Convert bytes into human-readable representation.""" 

460 magsuffix = "" 

461 bytesuffix = "" 

462 assert append_bytes in (False, True, "short", "iec") 

463 if partition: 

464 magnitude = 0 

465 if base1024: 

466 while number >= 1024: 

467 magnitude += 1 

468 # number = number >> 10 

469 number /= 1024.0 

470 else: 

471 while number >= 1000: 

472 magnitude += 1 

473 number /= 1000.0 

474 magsuffix = ["", "K", "M", "G", "T", "P"][magnitude] 

475 if magsuffix: 

476 magsuffix = " " + magsuffix 

477 

478 if append_bytes: 

479 if append_bytes == "iec" and magsuffix: 

480 bytesuffix = "iB" if base1024 else "B" 

481 elif append_bytes == "short" and magsuffix: 

482 bytesuffix = "B" 

483 elif number == 1: 

484 bytesuffix = " Byte" 

485 else: 

486 bytesuffix = " Bytes" 

487 

488 if thousands_sep and (number >= 1000 or magsuffix): 

489 # locale.setlocale(locale.LC_ALL, "") 

490 # TODO: make precision configurable 

491 if prec > 0: 

492 # fs = "%.{}f".format(prec) 

493 # snum = locale.format_string(fs, number, thousandsSep) 

494 snum = f"{number:,.{prec}g}" 

495 else: 

496 # snum = locale.format("%d", number, thousandsSep) 

497 snum = f"{number:,g}" 

498 # Some countries like france use non-breaking-space (hex=a0) as group- 

499 # seperator, that's not plain-ascii, so we have to replace the hex-byte 

500 # "a0" with hex-byte "20" (space) 

501 # snum = hexlify(snum).replace("a0", "20").decode("hex") 

502 else: 

503 snum = str(number) 

504 

505 return f"{snum}{magsuffix}{bytesuffix}" 

506 

507 

508def get_machine_id() -> str: 

509 """Return a unique identifier for this machine.""" 

510 # See https://stackoverflow.com/a/74058166/19166 

511 

512 def run(cmd) -> Union[str, None]: 

513 try: 

514 return subprocess.run( 

515 cmd, shell=True, capture_output=True, check=True, encoding="utf-8" 

516 ).stdout.strip() 

517 except Exception: 

518 return None 

519 

520 if sys.platform == "darwin": 

521 res = run( 

522 "ioreg -d2 -c IOPlatformExpertDevice " 

523 "| awk -F\\\" '/IOPlatformUUID/{print $(NF-1)}'" 

524 ) 

525 elif sys.platform == "win32" or sys.platform == "cygwin" or sys.platform == "msys": 

526 res = run("wmic csproduct get uuid").split("\n")[2].strip() 

527 

528 elif sys.platform.startswith("linux"): 

529 res = run("cat /var/lib/dbus/machine-id") or run("cat /etc/machine-id") 

530 

531 elif sys.platform.startswith("openbsd") or sys.platform.startswith("freebsd"): 

532 res = run("cat /etc/hostid") or run("kenv -q smbios.system.uuid") 

533 

534 return res or str(uuid.getnode())