Coverage for /Users/martin/prj/git/benchman_pre/src/benchman/util.py: 41%
247 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-12-24 08:16 +0100
« prev ^ index » next coverage.py v7.6.4, created at 2024-12-24 08:16 +0100
1# (c) 2024 Martin Wendt; see https://github.com/mar10/benchman
2# Licensed under the MIT license: https://www.opensource.org/licenses/mit-license.php
3from __future__ import annotations
5import hashlib
6import importlib.metadata
7import json
8import logging
9import operator
10import os
11import re
12import shutil
13import subprocess
14import sys
15import threading
16import uuid
17from io import TextIOBase
18from pathlib import Path
19from typing import Any, Union, cast
21import build.util
22import toml
23from typing_extensions import Literal
25logger = logging.getLogger("benchman")
27PYTHON_VERSION = f"{sys.version_info[0]}.{sys.version_info[1]}.{sys.version_info[2]}"
30def find_project_root(start_path: str | Path | None = None) -> Path | None:
31 """Find the root folder of the current project."""
32 root: Path | None = None
33 try:
34 logger.debug("Looking for Git repository...")
35 # Run 'git rev-parse' to find the top-level directory
36 git_root = subprocess.check_output(
37 ["git", "rev-parse", "--show-toplevel"], text=True
38 ).strip()
39 root = Path(git_root)
40 logger.debug(f"Git repository found: {root}")
41 except subprocess.CalledProcessError:
42 # Not a Git repository
43 logger.debug(
44 "No Git repository found, looking for pyproject.toml or setup.py..."
45 )
47 if root is None:
48 # Search parent folders for pyproject.toml or setup.py
49 # Start from the current working directory if not specified
50 current_path = Path(start_path or os.getcwd()) / "dummy-file.txt"
51 for parent in current_path.parents:
52 logger.debug(f"Checking {parent}...")
53 # Look for a defining file, such as 'pyproject.toml' or '.git'
54 if (
55 (parent / "pyproject.toml").exists()
56 or (parent / "setup.py").exists()
57 or (parent / ".git").exists()
58 ):
59 root = parent
60 return root
63def get_project_info(path: Path | None = None) -> dict[str, Any]:
64 project_root = find_project_root(path)
66 if project_root is None:
67 raise FileNotFoundError(f"Project root not found in {path} or parent folders")
69 if (project_root / "pyproject.toml").is_file():
70 with open(project_root / "pyproject.toml") as f:
71 pyproject_toml = toml.load(f)
72 if "project" in pyproject_toml:
73 project_name: str = pyproject_toml["project"]["name"]
74 else:
75 logger.warning(
76 "pyproject.toml does not contain a [project] section "
77 "(trying setup.cfg)..."
78 )
80 wm = build.util.project_wheel_metadata(project_root)
81 pn = wm.get("name")
82 assert pn
83 project_name = pn
85 # from setuptools.config import read_configuration
87 # conf_dict = read_configuration(project_root / "setup.cfg")
88 # project_name = conf_dict["metadata"]["name"]
90 else: # setup.py
91 raise FileNotFoundError(f"pyproject.toml not found in {project_root}")
93 # Get the project version from the installed package metadata
94 project_version = importlib.metadata.version(project_name)
95 # Get the project name again, because it might be different from the package name
96 meta = importlib.metadata.metadata(project_name)
97 project_name = meta["Name"]
99 return {
100 "project_name": project_name,
101 "project_version": project_version,
102 "project_root": project_root,
103 "pyproject_toml": pyproject_toml,
104 }
107# PROJECT_ROOT: Path = find_project_root()
110class BenchmarkSuiteFile:
111 def __init__(self, path: Path | str):
112 self.path = Path(path)
113 file_name = self.path.name
114 if not file_name.endswith(".bench.json"):
115 raise ValueError(f"Invalid file name: {self.path}")
116 file_name = file_name[:-11]
117 self.project, self.client_id, self.tag = file_name.split(".", 2)
119 def __repr__(self):
120 return f"{self.__class__.__name__}<{self.path}>"
122 def __str__(self):
123 return str(self.path)
125 @property
126 def name(self) -> str:
127 return self.path.name
129 def _read(self):
130 with self.path.open("r") as f:
131 return json.load(f)
133 def _patch(self, json: dict, keep_date: bool = False):
134 mdate = self.path.stat().st_mtime
135 with self.path.open("w") as f:
136 json_dump(json, f, pretty=True)
137 if keep_date:
138 os.utime(self.path, (mdate, mdate))
140 def save_tag(
141 self, new_tag: str, *, replace: bool, keep_time: bool = False
142 ) -> BenchmarkSuiteFile:
143 """Save the benchmark file with a new tag (filename and json data).
145 If `replace` is True, this original file is replaced with the new tag.
146 Otherwise, a new file is created with the new tag and returned.
148 If `keep_time` is True, the original file's modification time is preserved.
149 """
150 if self.tag == new_tag:
151 raise ValueError(f"Tag is already '{new_tag}'")
153 new_path = self.path.with_name(
154 f"{self.project}.{self.client_id}.{new_tag}.bench.json"
155 )
156 if replace:
157 result = self
158 self.tag = new_tag
159 self.path.replace(new_path)
160 self.path = new_path
161 logger.info(f"Renamed {self.path} to {new_path.name}")
162 else:
163 shutil.copy(self.path, new_path)
164 result = BenchmarkSuiteFile(new_path)
165 logger.info(f"Copied {self.path} to {new_path.name}")
166 data = result._read()
167 data["tag"] = new_tag
168 result._patch(data, keep_date=keep_time)
169 return result
171 @classmethod
172 def find_files(cls, folder: Path | str) -> list[BenchmarkSuiteFile]:
173 folder = Path(folder)
174 return [cls(p) for p in folder.glob("*.bench.json")]
177def is_running_on_ci() -> bool:
178 return bool(os.environ.get("CI") or os.environ.get("GITHUB_ACTIONS"))
181def extract_items(
182 d: dict[str, Any], keys: list[str], *, remove: bool = False
183) -> dict[str, Any]:
184 """Create a subset of a dictionary by extracting specific keys.
186 Args:
187 d (dict): The source dictionary.
188 keys (list): The keys to extract.
189 remove (bool): If True, the keys are removed from the source dictionary.
190 """
191 if remove:
192 return {k: d.pop(k, None) for k in keys if k in d}
193 return {k: d[k] for k in keys if k in d}
196def json_dump(data: Any, file: TextIOBase, *, pretty: bool) -> None:
197 """Write data to a file in JSON format, compact or pretty."""
198 if pretty:
199 json.dump(data, file, indent=2, separators=(",", ": "), sort_keys=True)
200 else:
201 json.dump(data, file, indent=0, separators=(",", ":"), sort_keys=True)
204class FileOrStdout:
205 def __init__(self, out: Union[Path, str, TextIOBase, None] = None):
206 self.out = out
207 self.file: TextIOBase | None = None
209 def __enter__(self) -> TextIOBase:
210 if self.out is None:
211 self.file = cast(TextIOBase, sys.stdout)
212 elif isinstance(self.out, TextIOBase):
213 self.file = self.out
214 else:
215 self.file = open(self.out, "w")
216 return self.file
218 def __exit__(self, exc_type, exc_value, traceback) -> None:
219 if self.file is not sys.stdout and self.file is not None:
220 self.file.close()
223class ExpressionFilter:
224 """Filter a list of objects based on a rule string.
226 Attribute Comparison: attribute operator value
227 Logical AND: Separate conditions with a comma `,`
228 Logical OR: Separate conditions with a semicolon `;`
229 Example Syntax:
230 "name eq alice, age gt 20": Logical AND, matches objects where name is alice
231 and age is greater than 20.
232 "name eq alice; age gt 20": Logical OR, matches objects where name is alice
233 or age is greater than 20.
235 Supported Operators:
236 eq: Equal to
237 ne: Not equal to
238 gt: Greater than
239 ge: Greater than or equal to
240 lt: Less than
241 le: Less than or equal to
242 *=: Contains
243 ^=: Starts with
245 Args:
246 objects (list[Any]): _description_
247 rule (str): _description_
249 Returns:
250 list[Any]: _description_
251 """
253 ops = {
254 "!=": operator.ne,
255 "*=": operator.contains,
256 "!*": lambda a, b: not operator.contains(a, b),
257 "^=": lambda a, b: str(a).startswith(str(b)),
258 "!^": lambda a, b: not str(a).startswith(str(b)),
259 "<": operator.lt,
260 "<=": operator.le,
261 "<>": operator.ne,
262 "==": operator.eq,
263 ">": operator.gt,
264 ">=": operator.ge,
265 "eq": operator.eq,
266 "ge": operator.ge,
267 "gt": operator.gt,
268 "le": operator.le,
269 "lt": operator.lt,
270 }
272 def __init__(self, rule: str):
273 self.rule = rule
274 self.conditions = self._parse_rule()
276 def __repr__(self):
277 return f"self.__class__.__name__<{self.rule}>"
279 def _parse_rule(self) -> list[tuple]:
280 conditions = []
281 for cond in self.rule.split(","):
282 attr, op, svalue = cond.strip().split()
283 value: str | float = svalue
284 try:
285 value = float(svalue)
286 except ValueError:
287 pass
288 conditions.append((attr, self.ops[op], value))
289 return conditions
291 def matches(self, obj: Any) -> bool:
292 return all(op(getattr(obj, attr), value) for attr, op, value in self.conditions)
294 def filter(self, object_list: list[Any]) -> list[Any]:
295 return [obj for obj in object_list if self.matches(obj)]
298def filter_objects(objects: list[Any], rule: str) -> list[Any]:
299 """Filter a list of objects based on a rule string."""
300 ef = ExpressionFilter(rule)
301 return ef.filter(object_list=objects)
304def split_tokens(s: str) -> list[str]:
305 """Split a comma separated string into tokens, removing whitespace."""
306 return [s.strip() for s in s.split(",")]
309def singleton(cls):
310 """
311 A thread-safe decorator to ensure a class follows the Singleton
312 design pattern.
314 This decorator allows a class to have only one instance throughout
315 the application. If the instance does not exist, it will create one;
316 otherwise, it will return the existing instance. This implementation
317 is thread-safe, ensuring that only one instance is created even in
318 multithreaded environments.
320 :param: cls (type): The class to be decorated as a Singleton.
321 :return: function: A function that returns the single instance of the
322 class.
323 """
324 instances = {}
325 lock = threading.Lock()
327 def get_instance(*args, **kwargs) -> object:
328 """
329 Return a single instance of the decorated class, creating it
330 if necessary.
332 This function ensures that only one instance of the class exists.
333 It uses a thread-safe approach to check if an instance of the class
334 already exists in the `instances` dictionary. If it does not exist,
335 it creates a new instance with the provided arguments. If it does
336 exist, it returns the existing instance.
338 :param: *args: Variable length argument list for the class constructor.
339 :param: **kwargs: Arbitrary keyword arguments for the class constructor.
340 :return: object: The single instance of the class.
341 """
342 with lock:
343 if cls not in instances:
344 instances[cls] = cls(*args, **kwargs)
345 return instances[cls]
347 return get_instance
350allowed_slug = set(
351 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.~()"
352)
355def sluggify(text: str) -> str:
356 """
357 Convert a string to a slug by replacing spaces with underscores and
358 removing any non-alphanumeric characters.
360 :param text: The input string to be converted to a slug.
361 :return: str: The slug version of the input string.
362 """
363 return "".join(c if c in allowed_slug else "_" for c in text).strip("_.-~")
366def hash_string(s: str, *, length: int = 16) -> str:
367 """
368 Calculate a hash value for a given string.
370 :param s: The string to be hashed.
371 :return: int: The hash value of the input string.
372 """
373 h = hashlib.sha256(s.encode()).hexdigest()
374 if length <= len(h):
375 return h[:length] # Truncate to the desired length
376 else:
377 # If the length is greater than the hash, pad with zeros
378 return (h + "0" * length)[:length]
381def smart_sort_key(val) -> Any:
382 """Sort by numeric parts and string parts (pass to `sort(key=...)`)."""
383 # org_val = val
384 if isinstance(val, str):
385 parts = []
386 # Split the string into numeric (include '.' and ',') and non-numeric parts
387 for part in re.split(r"(\d[\d,]*)", val):
388 # for part in re.split(r"(\d[\d,]*(?:\.\d+)?)", val):
389 try:
390 # discarding thousands separator
391 part = int(part.replace(",", ""))
392 # print("int", repr(part))
393 except ValueError:
394 pass
395 parts.append(part)
396 if len(parts) > 1:
397 val = tuple(parts)
399 # print("sort", org_val, val)
400 return val
403TimeUnitType = Literal["fsec", "psec", "nsec", "μsec", "msec", "sec"]
404time_units: dict[TimeUnitType, float] = {
405 "fsec": 1e-15, # femto
406 "psec": 1e-12, # pico
407 "nsec": 1e-9, # nano
408 "μsec": 1e-6, # micro
409 "msec": 1e-3, # milli
410 "sec": 1.0,
411}
412time_scales: list[tuple[float, TimeUnitType]] = [
413 (scale, unit) for unit, scale in time_units.items()
414]
415time_scales.sort(reverse=True)
418def get_time_unit(seconds: float) -> tuple[TimeUnitType, float]:
419 for scale, unit in time_scales:
420 if seconds >= scale:
421 return (unit, scale)
422 return ("sec", 1.0)
425def calculate_q1_q2_q3(data: list[float]) -> tuple[float, float, float]:
426 """Calculate the first, second, and third quartiles of a list of numbers."""
427 data = sorted(data)
428 n = len(data)
429 q1 = data[n // 4]
430 q2 = data[n // 2]
431 q3 = data[(3 * n) // 4]
432 return q1, q2, q3
435def format_time(
436 seconds: float,
437 *,
438 unit: Union[TimeUnitType, None] = None,
439 precision: int = 3,
440) -> str:
441 if unit is None:
442 unit, scale = get_time_unit(seconds)
443 else:
444 scale = time_units[unit]
446 return "{secs:,.{prec}f} {unit}".format(
447 prec=precision, secs=seconds / scale, unit=unit
448 )
451def byte_number_string(
452 number: float,
453 thousands_sep: bool = True,
454 partition: bool = True,
455 base1024: bool = False,
456 append_bytes: bool = False,
457 prec: int = 3,
458) -> str:
459 """Convert bytes into human-readable representation."""
460 magsuffix = ""
461 bytesuffix = ""
462 assert append_bytes in (False, True, "short", "iec")
463 if partition:
464 magnitude = 0
465 if base1024:
466 while number >= 1024:
467 magnitude += 1
468 # number = number >> 10
469 number /= 1024.0
470 else:
471 while number >= 1000:
472 magnitude += 1
473 number /= 1000.0
474 magsuffix = ["", "K", "M", "G", "T", "P"][magnitude]
475 if magsuffix:
476 magsuffix = " " + magsuffix
478 if append_bytes:
479 if append_bytes == "iec" and magsuffix:
480 bytesuffix = "iB" if base1024 else "B"
481 elif append_bytes == "short" and magsuffix:
482 bytesuffix = "B"
483 elif number == 1:
484 bytesuffix = " Byte"
485 else:
486 bytesuffix = " Bytes"
488 if thousands_sep and (number >= 1000 or magsuffix):
489 # locale.setlocale(locale.LC_ALL, "")
490 # TODO: make precision configurable
491 if prec > 0:
492 # fs = "%.{}f".format(prec)
493 # snum = locale.format_string(fs, number, thousandsSep)
494 snum = f"{number:,.{prec}g}"
495 else:
496 # snum = locale.format("%d", number, thousandsSep)
497 snum = f"{number:,g}"
498 # Some countries like france use non-breaking-space (hex=a0) as group-
499 # seperator, that's not plain-ascii, so we have to replace the hex-byte
500 # "a0" with hex-byte "20" (space)
501 # snum = hexlify(snum).replace("a0", "20").decode("hex")
502 else:
503 snum = str(number)
505 return f"{snum}{magsuffix}{bytesuffix}"
508def get_machine_id() -> str:
509 """Return a unique identifier for this machine."""
510 # See https://stackoverflow.com/a/74058166/19166
512 def run(cmd) -> Union[str, None]:
513 try:
514 return subprocess.run(
515 cmd, shell=True, capture_output=True, check=True, encoding="utf-8"
516 ).stdout.strip()
517 except Exception:
518 return None
520 if sys.platform == "darwin":
521 res = run(
522 "ioreg -d2 -c IOPlatformExpertDevice "
523 "| awk -F\\\" '/IOPlatformUUID/{print $(NF-1)}'"
524 )
525 elif sys.platform == "win32" or sys.platform == "cygwin" or sys.platform == "msys":
526 res = run("wmic csproduct get uuid").split("\n")[2].strip()
528 elif sys.platform.startswith("linux"):
529 res = run("cat /var/lib/dbus/machine-id") or run("cat /etc/machine-id")
531 elif sys.platform.startswith("openbsd") or sys.platform.startswith("freebsd"):
532 res = run("cat /etc/hostid") or run("kenv -q smbios.system.uuid")
534 return res or str(uuid.getnode())