Coverage for /Users/martin/prj/git/benchman_pre/src/benchman/benchman.py: 38%
308 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-12-24 08:16 +0100
« prev ^ index » next coverage.py v7.6.4, created at 2024-12-24 08:16 +0100
1# (c) 2024 Martin Wendt; see https://github.com/mar10/benchman
2# Licensed under the MIT license: https://www.opensource.org/licenses/mit-license.php
3""" """
5from __future__ import annotations
7import json
8import logging
9import math
10import time
11import timeit
12import warnings
13from collections.abc import Iterator
14from pathlib import Path
15from typing import Any, cast
17from typing_extensions import Self, dataclass_transform
19from benchman.context_info import BaseContextInfo
20from benchman.timings import TimingsResult, run_timings
21from benchman.util import (
22 TimeUnitType,
23 byte_number_string,
24 format_time,
25 get_time_unit,
26 json_dump,
27 sluggify,
28)
30logger = logging.getLogger("benchman")
33TAG_LATEST = "latest"
34TAG_BASE = "base"
37@dataclass_transform()
38class IQRValues:
39 q1: float
40 q3: float
41 iqr: float
42 lower_bound: float
43 upper_bound: float
46def get_benchmark_filepath(tag_or_path: Path | str, *, must_exist=False) -> Path:
47 """Return the path to the benchmark file for the given tag or path."""
49 if isinstance(tag_or_path, Path) or "." in tag_or_path:
50 # Assume this is a path to a file
51 path = Path(tag_or_path)
52 if not str(path).endswith(".bench.json"):
53 raise ValueError(f"Expected file extension '.bench.json': {path}")
54 else:
55 # Assume this is a tag
56 bm = BenchmarkManager.singleton()
57 file_name = bm.make_slug(tag=tag_or_path)
58 path = bm.folder / f"{file_name}.bench.json"
60 # path_list = list(folder.glob(f"*.{tag_or_path}.bench.json"))
61 # if not path_list:
62 # raise FileNotFoundError(
63 # f"No benchmark file found for tag '{tag_or_path}' in {folder}"
64 # )
65 # if len(path_list) > 1:
66 # msg = (
67 # f"Multiple benchmark files found for tag '{tag_or_path}' in {folder}:\n"
68 # f" {path_list}"
69 # )
70 # raise ValueError(msg)
71 # path = path_list[0]
73 if must_exist and not path.is_file():
74 raise FileNotFoundError(path)
76 return path
79class Benchmark:
80 """One single micro benchmark run.
82 Note: it's tempting to calculate mean and standard deviation from the result
83 vector and report these. However, this is not very useful.
84 In a typical case, the lowest value gives a lower bound for how fast your
85 machine can run the given code snippet; higher values in the result vector
86 are typically not caused by variability in Python's speed, but by other
87 processes interfering with your timing accuracy.
88 So the min() of the result is probably the only number you should be
89 interested in.
90 After that, you should look at the entire vector and apply common sense
91 rather than statistics.
92 """
94 def __init__(
95 self, benchmark_manager: BenchmarkManager, name: str, *, variant: str = ""
96 ):
97 assert name, "name must not be empty"
99 self.benchmark_manager: BenchmarkManager = benchmark_manager
100 #: A name for this benchmark.
101 self.name: str = name.strip()
102 #: A variant name for this benchmark run (optional, defaults to "").
103 self.variant: str = variant.strip()
104 #: Python version number
105 self.python: str = ""
106 #: Start time of this benchmark run
107 self.start_time: float = 0.0
108 #: Total time for the whole benchmark loop
109 self.elap: float = 0.0
110 #: Informational detail, e.g. the number of items processed in one run.
111 #: Can be used to evalue the implact of the sample size on the performance.
112 self.sample_size: int = 1
113 #: Number of iterations in one run (used for 'items per sec.')
114 self.iterations: int = 0
115 #: List of timings for each run divided by `iterations`, i.e. 'seconds per
116 #: iteration'
117 self.timings: list[float] = []
118 # The interquartile range (IQR) is a measure of statistical dispersion,
119 # (cached for performance)
120 self._iqr_values: IQRValues | None = None
122 def __str__(self) -> str:
123 return self.to_str()
125 def __repr__(self) -> str:
126 return f"{self.__class__.__name__}<{self.full_name}, {self.elap}s>"
128 def __lt__(self, other: Any) -> bool:
129 if not isinstance(other, Benchmark):
130 return NotImplemented
131 return self.min < other.min
133 def to_str(self, *, time_unit: TimeUnitType | None = None) -> str:
134 return "{}: {:,d} loop{}, best of {:,}: {} per loop ({} per sec.)".format(
135 self.full_name,
136 self.iterations,
137 "" if self.iterations == 1 else "s",
138 self.repeat,
139 format_time(self.min, unit=time_unit),
140 byte_number_string(self.iterations / self.min),
141 )
143 @property
144 def full_name(self) -> str:
145 variant = self.variant
146 if self.sample_size > 1:
147 if variant:
148 variant += ", "
149 variant += f"n={self.sample_size:,}"
150 return f"{self.name}({variant})" if variant else self.name
152 @property
153 def version(self) -> str:
154 return self.benchmark_manager.project_version
156 @property
157 def repeat(self) -> int:
158 return len(self.timings)
160 @property
161 def min(self) -> float:
162 return min(self.timings)
164 @property
165 def max(self) -> float:
166 return max(self.timings)
168 @property
169 def mean(self) -> float:
170 """Return the arithmetic average time per iteration, aka 'X̄'."""
171 return sum(self.timings) / len(self.timings)
173 @property
174 def stdev(self) -> float:
175 """Return the standard deviation of the time per iteration (aka SD, σ)."""
176 n = len(self.timings)
178 if n <= 1:
179 return 0.0
180 mean: float = self.mean
181 return math.sqrt(sum((x - mean) ** 2 for x in self.timings) / n)
183 @property
184 def median(self) -> float:
185 """Return the median time per iteration (aka med(x))."""
186 timings = sorted(self.timings)
187 n = len(timings)
188 if n % 2 == 0:
189 return (timings[n // 2 - 1] + timings[n // 2]) / 2
190 return timings[n // 2]
192 def _calc_iqr(self) -> IQRValues:
193 if not self._iqr_values:
194 timings = sorted(self.timings)
195 n = len(timings)
196 q1 = timings[n // 4]
197 q3 = timings[3 * n // 4]
198 iqr = q3 - q1
199 lower_bound = q1 - 1.5 * iqr
200 upper_bound = q3 + 1.5 * iqr
201 self._iqr_values = IQRValues(
202 q1=q1, # type: ignore
203 q3=q3, # type: ignore
204 iqr=iqr, # type: ignore
205 lower_bound=lower_bound, # type: ignore
206 upper_bound=upper_bound, # type: ignore
207 )
208 return self._iqr_values
210 @property
211 def q1(self) -> float:
212 return self._calc_iqr().q1
214 @property
215 def q3(self) -> float:
216 return self._calc_iqr().q3
218 @property
219 def iqr(self) -> float:
220 return self._calc_iqr().iqr
222 @property
223 def ops(self) -> float:
224 return self.iterations / self.min
226 @property
227 def ops_rel(self) -> float:
228 return self.iterations / self.min / self.sample_size
230 @property
231 def outliers(self) -> list[float]:
232 """Return a list of timings that are considered outliers."""
233 iqrv = self._calc_iqr()
234 # https://en.wikipedia.org/wiki/Outlier
235 # https://en.wikipedia.org/wiki/Interquartile_range
236 return [x for x in self.timings if x < iqrv.lower_bound or x > iqrv.upper_bound]
238 def slug(self) -> str:
239 ctx = self.benchmark_manager.context
240 v = ctx.project.version
241 py = ctx.python.implementation_version(strip_patch=True)
242 return sluggify(f"v{v}_{py}_{self.full_name}")
244 def __enter__(self) -> Self:
245 self.start_time = time.monotonic()
246 return self
248 def __exit__(self, exc_type, exc_val, exc_tb) -> None:
249 self.elap = time.monotonic() - self.start_time
251 def loaded_state(self) -> dict[str, Any]:
252 lctx = self.benchmark_manager.loaded_context
254 res = {
255 "name": self.name,
256 "variant": self.variant,
257 "python": self.python,
258 "sample_size": self.sample_size,
259 }
260 res.update(lctx)
261 return res
263 def to_dict(self, add_meta: bool = True) -> dict[str, Any]:
264 res = {
265 "name": self.name,
266 "variant": self.variant,
267 "start_time": self.start_time,
268 "elap": self.elap,
269 "iterations": self.iterations,
270 "sample_size": self.sample_size,
271 "timings": self.timings,
272 }
273 if add_meta:
274 ctx = self.benchmark_manager.context
275 res.update(
276 {
277 "python": ctx.python.version,
278 "project": ctx.project.version,
279 "debug_mode": ctx.python.debug_mode,
280 "hardware": ctx.hw.slug(),
281 "system": ctx.os.slug(),
282 "client": ctx.client_slug(),
283 }
284 )
285 return res
287 def save(self):
288 folder = self.benchmark_manager.folder
289 path = folder / f"{self.slug()}.bmr.json"
290 _ = path.exists()
291 with path.open("w") as f:
292 json_dump(self.to_dict(), f, pretty=True)
294 @classmethod
295 def from_dict(cls, bm: BenchmarkManager, item: dict[str, Any]) -> Self:
296 self = cls(bm, item["name"], variant=item.get("variant", ""))
297 self.start_time = item["start_time"]
298 self.timings = item["timings"]
299 self.sample_size = item["sample_size"]
300 self.iterations = item["iterations"]
301 self.python = item["python"]
302 self.variant = item["variant"]
303 return self
306class BenchmarkRunner:
307 """Define default arguments for subsequent calls to `.run()`."""
309 def __init__(
310 self,
311 *,
312 bm: BenchmarkManager | None = None,
313 #: A name for this benchmark run.
314 name: str,
315 #: A variant name for this benchmark run.
316 variant: str = "",
317 #: A setup statement to execute before the main statement (not timed).
318 setup: str = "pass",
319 #: Verbosity level (0: quiet, 1: normal, 2: verbose)
320 verbose: int = 0,
321 #: Number of times to repeat the test.
322 repeat: int = 5,
323 #: Number of loops to run. If 0, `timeit` will determine the iterations
324 #: automatically.
325 iterations: int = 0,
326 #:
327 sample_size: int = 1,
328 #: A dict containing the global variables.
329 globals: dict[str, Any] | None = None,
330 #: Use `time.process_time` instead of `time.monotonic` for measuring CPU time.
331 process_time: bool = False,
332 #: A group name for this benchmark run.
333 group: str = "",
334 #: Save results to disk.
335 save_results: bool = True,
336 ):
337 self.run_list: list[Benchmark] = []
338 self.benchmark_manager = bm or BenchmarkManager.singleton()
339 self.name = name
340 self.variant = variant
341 self.setup = setup
342 self.verbose = verbose
343 self.repeat = repeat
344 self.iterations = iterations
345 self.sample_size = sample_size
346 self.globals = globals
347 self.process_time = process_time
348 self.group = group
349 self.save_results = save_results
351 def run(
352 self,
353 stmt: str,
354 *,
355 variant: str,
356 setup: str | None = None,
357 verbose: int | None = None,
358 repeat: int | None = None,
359 iterations: int | None = None,
360 sample_size: int | None = None,
361 globals: dict[str, Any] | None = None,
362 process_time: bool | None = None,
363 group: str | None = None,
364 save_results: bool | None = None,
365 ):
366 bm = self.benchmark_manager
367 res = bm.run_timings(
368 name=self.name,
369 stmt=stmt,
370 variant=variant,
371 setup=setup if setup is not None else self.setup,
372 verbose=verbose if verbose is not None else self.verbose,
373 repeat=repeat if repeat is not None else self.repeat,
374 iterations=iterations if iterations is not None else self.iterations,
375 sample_size=sample_size if sample_size is not None else self.sample_size,
376 globals=globals if globals is not None else self.globals,
377 process_time=process_time
378 if process_time is not None
379 else self.process_time,
380 group=group if group is not None else self.group,
381 save_results=save_results
382 if save_results is not None
383 else self.save_results,
384 )
385 self.run_list.append(res)
387 def print(self):
388 print(f"BenchmarkRunner: {self.name}") # noqa: T201
389 for i, benchmark in enumerate(sorted(self.run_list), 1):
390 print(f" {i}: {benchmark}") # noqa: T201
393class BenchmarkManager:
394 """Manage a suite of multiple benchmarks."""
396 # DEFAULT_OPTIONS = {
397 # "results_file": "benchman-results.json",
398 # }
400 def __init__(self, *, path: Path | str | None = None, create_folder=True) -> None:
401 #: The context for this benchmark run.
402 self.context = BaseContextInfo(path=path)
403 #: A tag for this benchmark run (optional, defaults to None).
404 self.tag: str = "latest"
405 #:
406 self.combine_date: str | None = None
407 #:
408 self.loaded_context: dict[str, Any] = {}
409 #: A list of all benchmarks, grouped by group name.
410 self.benchmarks: dict[str, list[Benchmark]] = {"": []}
412 self.folder: Path = self.context.project.root_folder / ".benchman"
413 if create_folder:
414 self.folder.mkdir(parents=False, exist_ok=True)
416 # Load options from pyproject.toml `[tool.benchman]`
417 self.options: dict[str, Any] = {}
418 pyproject_toml = self.context.project.pyproject_toml
419 if pyproject_toml:
420 self.options.update(pyproject_toml.get("tool", {}).get("benchman", {}))
422 #: self.timer = timing.
423 self.timer = timeit.default_timer
424 # if process_time:
425 # self.timer = time.process_time
427 # pprint.pprint(self.context.to_dict())
428 return
430 def __repr__(self):
431 return (
432 f"{self.__class__.__name__}<{self.context}, "
433 f"n={len(list(self.iter_benchmarks()))}>"
434 )
436 def count(self):
437 return len(list(self.iter_benchmarks()))
439 _global_benchman: Self | None = None
441 @classmethod
442 def singleton(cls) -> Self:
443 """Return the global `BenchmarkManager` instance."""
444 if cls._global_benchman is None:
445 cls._global_benchman = cls()
446 assert cls._global_benchman
447 return cast(Self, cls._global_benchman)
449 @property
450 def project_name(self) -> str:
451 return self.context.project.name
453 @property
454 def project_version(self) -> str:
455 return self.context.project.version
457 def make_slug(self, *, tag: str | None = None) -> str:
458 sl = [
459 self.project_name,
460 self.context.client_slug(),
461 ]
462 if not tag:
463 tag = "latest"
465 if tag in ("base", "latest"):
466 # pv = self.project_version.replace(".", "_")
467 # sl.append(f"v{pv}_{tag}")
468 sl.append(tag)
469 elif tag:
470 sl.append(tag)
472 return ".".join(sl)
474 def iter_benchmarks(
475 self, *, group: str | None = None, name: str | None = None
476 ) -> Iterator[Benchmark]:
477 if group is None:
478 assert name is None
479 for _group, benchmarks in self.benchmarks.items():
480 yield from benchmarks
481 elif name:
482 for bench in self.benchmarks.get(group, []):
483 if bench.name == name:
484 yield bench
485 else:
486 yield from self.benchmarks.get(group, [])
488 def get_best(
489 self, *, group: str | None = None, name: str | None = None
490 ) -> Benchmark | None:
491 """Return the benchmark with the best runtime."""
492 assert self.benchmarks
493 best: Benchmark | None = None
494 for b in self.iter_benchmarks(group=group, name=name):
495 if not best or b.min < best.min:
496 best = b
497 return best
499 def get_best_time_unit(
500 self, *, group: str | None = None, name: str | None = None
501 ) -> TimeUnitType:
502 """Return the time unit of the benchmark with the best runtime."""
503 best = self.get_best(group=group, name=name)
504 if best is None:
505 return "sec"
506 unit, _scale = get_time_unit(best.min)
507 return unit
509 def _path_and_prefix(self, *, group: str) -> tuple[Path, str]:
510 path = self.folder / ".benchman" / self.context.slug()
511 prefix = "$".join([group])
512 return path, prefix
514 def add_benchmark(self, benchmark: Benchmark, *, group: str = "") -> None:
515 if group not in self.benchmarks:
516 self.benchmarks[group] = []
517 self.benchmarks[group].append(benchmark)
519 def save(self):
520 pass
522 @classmethod
523 def load(cls, path: Path | str) -> Self:
524 path = Path(path)
525 if not path.is_file():
526 raise FileNotFoundError(path)
528 self = cls(path=path.parent, create_folder=False)
530 with path.open("r") as f:
531 content = json.load(f)
532 self.tag = content.get("tag", "latest")
533 self.combine_date = content.get("combine_date", None)
534 self.loaded_context = content["context"]
536 for item in content["data"]:
537 bmr = Benchmark.from_dict(self, item)
538 self.add_benchmark(bmr)
539 return self
541 def compare_results(self, other):
542 pass
544 def format_results(self) -> list[str]:
545 results = []
546 # Sort by group name
547 for group, benchmarks in self.benchmarks.items():
548 results.append(f"Group: {group or 'default'}")
549 # TODO: use get_best_time_unit() to unify unit for the group?
550 # Sort by best time
551 for i, benchmark in enumerate(sorted(benchmarks), 1):
552 results.append(f" {i}: {benchmark}")
553 # ol = benchmark.outliers
554 # results.append(f" {i}: {benchmark}, {len(ol)} outliers")
556 return results
558 def print_results(self):
559 for line in self.format_results():
560 print(line) # noqa: T201
562 def run_timings(
563 self,
564 #: A name for this benchmark run.
565 name: str,
566 *,
567 #: The statement to be timed.
568 stmt: str,
569 #: A variant name for this benchmark run.
570 variant: str = "",
571 #: A setup statement to execute before the main statement (not timed).
572 setup: str = "pass",
573 #: Verbosity level (0: quiet, 1: normal, 2: verbose)
574 verbose: int = 0,
575 #: Number of times to repeat the test.
576 repeat: int = 5,
577 #: Number of loops to run. If 0, `timeit` will determine the iterations
578 #: automatically.
579 iterations: int = 0,
580 #:
581 sample_size: int = 1,
582 #: A dict containing the global variables.
583 globals: dict[str, Any] | None = None,
584 #: Use `time.process_time` instead of `time.monotonic` for measuring CPU time.
585 process_time: bool = False,
586 #: A group name for this benchmark run.
587 group: str = "",
588 #: Save results to disk.
589 save_results: bool = True,
590 ) -> Benchmark:
591 """Run `stmt` in a loop and return a `BenchmarkRun` object."""
592 if self.context.python.debug_mode:
593 warnings.warn(
594 "Application is running in debug mode. "
595 "This may be due coverage, a debugger or other instrumentation. "
596 "Performance timings may be affected!",
597 stacklevel=2,
598 )
600 start: float = time.monotonic()
601 res: TimingsResult = run_timings(
602 name=name,
603 stmt=stmt,
604 setup=setup,
605 verbose=verbose,
606 repeat=repeat,
607 iterations=iterations,
608 globals=globals,
609 process_time=process_time,
610 )
611 elap = time.monotonic() - start
613 benchmark = Benchmark(self, name, variant=variant)
614 benchmark.start_time = start
615 benchmark.elap = elap
616 benchmark.iterations = res.iterations
617 benchmark.sample_size = sample_size
618 benchmark.timings = res.timings.copy()
620 self.add_benchmark(benchmark, group=group)
621 if save_results:
622 benchmark.save()
623 return benchmark
625 def report(self, format: str = "terminal") -> None:
626 self.print_results()
628 def make_runner(
629 self,
630 *,
631 name: str,
632 variant: str = "",
633 setup: str = "pass",
634 verbose: int = 0,
635 repeat: int = 5,
636 iterations: int = 0,
637 sample_size: int = 1,
638 globals: dict[str, Any] | None = None,
639 process_time: bool = False,
640 group: str = "",
641 save_results: bool = True,
642 ):
643 bmr = BenchmarkRunner(
644 bm=self,
645 name=name,
646 variant=variant,
647 setup=setup,
648 verbose=verbose,
649 repeat=repeat,
650 iterations=iterations,
651 sample_size=sample_size,
652 globals=globals,
653 process_time=process_time,
654 group=group,
655 save_results=save_results,
656 )
657 return bmr