Coverage for /Users/martin/prj/git/benchman_pre/src/benchman/benchman.py: 38%

308 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-12-24 08:16 +0100

1# (c) 2024 Martin Wendt; see https://github.com/mar10/benchman 

2# Licensed under the MIT license: https://www.opensource.org/licenses/mit-license.php 

3""" """ 

4 

5from __future__ import annotations 

6 

7import json 

8import logging 

9import math 

10import time 

11import timeit 

12import warnings 

13from collections.abc import Iterator 

14from pathlib import Path 

15from typing import Any, cast 

16 

17from typing_extensions import Self, dataclass_transform 

18 

19from benchman.context_info import BaseContextInfo 

20from benchman.timings import TimingsResult, run_timings 

21from benchman.util import ( 

22 TimeUnitType, 

23 byte_number_string, 

24 format_time, 

25 get_time_unit, 

26 json_dump, 

27 sluggify, 

28) 

29 

30logger = logging.getLogger("benchman") 

31 

32 

33TAG_LATEST = "latest" 

34TAG_BASE = "base" 

35 

36 

37@dataclass_transform() 

38class IQRValues: 

39 q1: float 

40 q3: float 

41 iqr: float 

42 lower_bound: float 

43 upper_bound: float 

44 

45 

46def get_benchmark_filepath(tag_or_path: Path | str, *, must_exist=False) -> Path: 

47 """Return the path to the benchmark file for the given tag or path.""" 

48 

49 if isinstance(tag_or_path, Path) or "." in tag_or_path: 

50 # Assume this is a path to a file 

51 path = Path(tag_or_path) 

52 if not str(path).endswith(".bench.json"): 

53 raise ValueError(f"Expected file extension '.bench.json': {path}") 

54 else: 

55 # Assume this is a tag 

56 bm = BenchmarkManager.singleton() 

57 file_name = bm.make_slug(tag=tag_or_path) 

58 path = bm.folder / f"{file_name}.bench.json" 

59 

60 # path_list = list(folder.glob(f"*.{tag_or_path}.bench.json")) 

61 # if not path_list: 

62 # raise FileNotFoundError( 

63 # f"No benchmark file found for tag '{tag_or_path}' in {folder}" 

64 # ) 

65 # if len(path_list) > 1: 

66 # msg = ( 

67 # f"Multiple benchmark files found for tag '{tag_or_path}' in {folder}:\n" 

68 # f" {path_list}" 

69 # ) 

70 # raise ValueError(msg) 

71 # path = path_list[0] 

72 

73 if must_exist and not path.is_file(): 

74 raise FileNotFoundError(path) 

75 

76 return path 

77 

78 

79class Benchmark: 

80 """One single micro benchmark run. 

81 

82 Note: it's tempting to calculate mean and standard deviation from the result 

83 vector and report these. However, this is not very useful. 

84 In a typical case, the lowest value gives a lower bound for how fast your 

85 machine can run the given code snippet; higher values in the result vector 

86 are typically not caused by variability in Python's speed, but by other 

87 processes interfering with your timing accuracy. 

88 So the min() of the result is probably the only number you should be 

89 interested in. 

90 After that, you should look at the entire vector and apply common sense 

91 rather than statistics. 

92 """ 

93 

94 def __init__( 

95 self, benchmark_manager: BenchmarkManager, name: str, *, variant: str = "" 

96 ): 

97 assert name, "name must not be empty" 

98 

99 self.benchmark_manager: BenchmarkManager = benchmark_manager 

100 #: A name for this benchmark. 

101 self.name: str = name.strip() 

102 #: A variant name for this benchmark run (optional, defaults to ""). 

103 self.variant: str = variant.strip() 

104 #: Python version number 

105 self.python: str = "" 

106 #: Start time of this benchmark run 

107 self.start_time: float = 0.0 

108 #: Total time for the whole benchmark loop 

109 self.elap: float = 0.0 

110 #: Informational detail, e.g. the number of items processed in one run. 

111 #: Can be used to evalue the implact of the sample size on the performance. 

112 self.sample_size: int = 1 

113 #: Number of iterations in one run (used for 'items per sec.') 

114 self.iterations: int = 0 

115 #: List of timings for each run divided by `iterations`, i.e. 'seconds per 

116 #: iteration' 

117 self.timings: list[float] = [] 

118 # The interquartile range (IQR) is a measure of statistical dispersion, 

119 # (cached for performance) 

120 self._iqr_values: IQRValues | None = None 

121 

122 def __str__(self) -> str: 

123 return self.to_str() 

124 

125 def __repr__(self) -> str: 

126 return f"{self.__class__.__name__}<{self.full_name}, {self.elap}s>" 

127 

128 def __lt__(self, other: Any) -> bool: 

129 if not isinstance(other, Benchmark): 

130 return NotImplemented 

131 return self.min < other.min 

132 

133 def to_str(self, *, time_unit: TimeUnitType | None = None) -> str: 

134 return "{}: {:,d} loop{}, best of {:,}: {} per loop ({} per sec.)".format( 

135 self.full_name, 

136 self.iterations, 

137 "" if self.iterations == 1 else "s", 

138 self.repeat, 

139 format_time(self.min, unit=time_unit), 

140 byte_number_string(self.iterations / self.min), 

141 ) 

142 

143 @property 

144 def full_name(self) -> str: 

145 variant = self.variant 

146 if self.sample_size > 1: 

147 if variant: 

148 variant += ", " 

149 variant += f"n={self.sample_size:,}" 

150 return f"{self.name}({variant})" if variant else self.name 

151 

152 @property 

153 def version(self) -> str: 

154 return self.benchmark_manager.project_version 

155 

156 @property 

157 def repeat(self) -> int: 

158 return len(self.timings) 

159 

160 @property 

161 def min(self) -> float: 

162 return min(self.timings) 

163 

164 @property 

165 def max(self) -> float: 

166 return max(self.timings) 

167 

168 @property 

169 def mean(self) -> float: 

170 """Return the arithmetic average time per iteration, aka 'X̄'.""" 

171 return sum(self.timings) / len(self.timings) 

172 

173 @property 

174 def stdev(self) -> float: 

175 """Return the standard deviation of the time per iteration (aka SD, σ).""" 

176 n = len(self.timings) 

177 

178 if n <= 1: 

179 return 0.0 

180 mean: float = self.mean 

181 return math.sqrt(sum((x - mean) ** 2 for x in self.timings) / n) 

182 

183 @property 

184 def median(self) -> float: 

185 """Return the median time per iteration (aka med(x)).""" 

186 timings = sorted(self.timings) 

187 n = len(timings) 

188 if n % 2 == 0: 

189 return (timings[n // 2 - 1] + timings[n // 2]) / 2 

190 return timings[n // 2] 

191 

192 def _calc_iqr(self) -> IQRValues: 

193 if not self._iqr_values: 

194 timings = sorted(self.timings) 

195 n = len(timings) 

196 q1 = timings[n // 4] 

197 q3 = timings[3 * n // 4] 

198 iqr = q3 - q1 

199 lower_bound = q1 - 1.5 * iqr 

200 upper_bound = q3 + 1.5 * iqr 

201 self._iqr_values = IQRValues( 

202 q1=q1, # type: ignore 

203 q3=q3, # type: ignore 

204 iqr=iqr, # type: ignore 

205 lower_bound=lower_bound, # type: ignore 

206 upper_bound=upper_bound, # type: ignore 

207 ) 

208 return self._iqr_values 

209 

210 @property 

211 def q1(self) -> float: 

212 return self._calc_iqr().q1 

213 

214 @property 

215 def q3(self) -> float: 

216 return self._calc_iqr().q3 

217 

218 @property 

219 def iqr(self) -> float: 

220 return self._calc_iqr().iqr 

221 

222 @property 

223 def ops(self) -> float: 

224 return self.iterations / self.min 

225 

226 @property 

227 def ops_rel(self) -> float: 

228 return self.iterations / self.min / self.sample_size 

229 

230 @property 

231 def outliers(self) -> list[float]: 

232 """Return a list of timings that are considered outliers.""" 

233 iqrv = self._calc_iqr() 

234 # https://en.wikipedia.org/wiki/Outlier 

235 # https://en.wikipedia.org/wiki/Interquartile_range 

236 return [x for x in self.timings if x < iqrv.lower_bound or x > iqrv.upper_bound] 

237 

238 def slug(self) -> str: 

239 ctx = self.benchmark_manager.context 

240 v = ctx.project.version 

241 py = ctx.python.implementation_version(strip_patch=True) 

242 return sluggify(f"v{v}_{py}_{self.full_name}") 

243 

244 def __enter__(self) -> Self: 

245 self.start_time = time.monotonic() 

246 return self 

247 

248 def __exit__(self, exc_type, exc_val, exc_tb) -> None: 

249 self.elap = time.monotonic() - self.start_time 

250 

251 def loaded_state(self) -> dict[str, Any]: 

252 lctx = self.benchmark_manager.loaded_context 

253 

254 res = { 

255 "name": self.name, 

256 "variant": self.variant, 

257 "python": self.python, 

258 "sample_size": self.sample_size, 

259 } 

260 res.update(lctx) 

261 return res 

262 

263 def to_dict(self, add_meta: bool = True) -> dict[str, Any]: 

264 res = { 

265 "name": self.name, 

266 "variant": self.variant, 

267 "start_time": self.start_time, 

268 "elap": self.elap, 

269 "iterations": self.iterations, 

270 "sample_size": self.sample_size, 

271 "timings": self.timings, 

272 } 

273 if add_meta: 

274 ctx = self.benchmark_manager.context 

275 res.update( 

276 { 

277 "python": ctx.python.version, 

278 "project": ctx.project.version, 

279 "debug_mode": ctx.python.debug_mode, 

280 "hardware": ctx.hw.slug(), 

281 "system": ctx.os.slug(), 

282 "client": ctx.client_slug(), 

283 } 

284 ) 

285 return res 

286 

287 def save(self): 

288 folder = self.benchmark_manager.folder 

289 path = folder / f"{self.slug()}.bmr.json" 

290 _ = path.exists() 

291 with path.open("w") as f: 

292 json_dump(self.to_dict(), f, pretty=True) 

293 

294 @classmethod 

295 def from_dict(cls, bm: BenchmarkManager, item: dict[str, Any]) -> Self: 

296 self = cls(bm, item["name"], variant=item.get("variant", "")) 

297 self.start_time = item["start_time"] 

298 self.timings = item["timings"] 

299 self.sample_size = item["sample_size"] 

300 self.iterations = item["iterations"] 

301 self.python = item["python"] 

302 self.variant = item["variant"] 

303 return self 

304 

305 

306class BenchmarkRunner: 

307 """Define default arguments for subsequent calls to `.run()`.""" 

308 

309 def __init__( 

310 self, 

311 *, 

312 bm: BenchmarkManager | None = None, 

313 #: A name for this benchmark run. 

314 name: str, 

315 #: A variant name for this benchmark run. 

316 variant: str = "", 

317 #: A setup statement to execute before the main statement (not timed). 

318 setup: str = "pass", 

319 #: Verbosity level (0: quiet, 1: normal, 2: verbose) 

320 verbose: int = 0, 

321 #: Number of times to repeat the test. 

322 repeat: int = 5, 

323 #: Number of loops to run. If 0, `timeit` will determine the iterations 

324 #: automatically. 

325 iterations: int = 0, 

326 #: 

327 sample_size: int = 1, 

328 #: A dict containing the global variables. 

329 globals: dict[str, Any] | None = None, 

330 #: Use `time.process_time` instead of `time.monotonic` for measuring CPU time. 

331 process_time: bool = False, 

332 #: A group name for this benchmark run. 

333 group: str = "", 

334 #: Save results to disk. 

335 save_results: bool = True, 

336 ): 

337 self.run_list: list[Benchmark] = [] 

338 self.benchmark_manager = bm or BenchmarkManager.singleton() 

339 self.name = name 

340 self.variant = variant 

341 self.setup = setup 

342 self.verbose = verbose 

343 self.repeat = repeat 

344 self.iterations = iterations 

345 self.sample_size = sample_size 

346 self.globals = globals 

347 self.process_time = process_time 

348 self.group = group 

349 self.save_results = save_results 

350 

351 def run( 

352 self, 

353 stmt: str, 

354 *, 

355 variant: str, 

356 setup: str | None = None, 

357 verbose: int | None = None, 

358 repeat: int | None = None, 

359 iterations: int | None = None, 

360 sample_size: int | None = None, 

361 globals: dict[str, Any] | None = None, 

362 process_time: bool | None = None, 

363 group: str | None = None, 

364 save_results: bool | None = None, 

365 ): 

366 bm = self.benchmark_manager 

367 res = bm.run_timings( 

368 name=self.name, 

369 stmt=stmt, 

370 variant=variant, 

371 setup=setup if setup is not None else self.setup, 

372 verbose=verbose if verbose is not None else self.verbose, 

373 repeat=repeat if repeat is not None else self.repeat, 

374 iterations=iterations if iterations is not None else self.iterations, 

375 sample_size=sample_size if sample_size is not None else self.sample_size, 

376 globals=globals if globals is not None else self.globals, 

377 process_time=process_time 

378 if process_time is not None 

379 else self.process_time, 

380 group=group if group is not None else self.group, 

381 save_results=save_results 

382 if save_results is not None 

383 else self.save_results, 

384 ) 

385 self.run_list.append(res) 

386 

387 def print(self): 

388 print(f"BenchmarkRunner: {self.name}") # noqa: T201 

389 for i, benchmark in enumerate(sorted(self.run_list), 1): 

390 print(f" {i}: {benchmark}") # noqa: T201 

391 

392 

393class BenchmarkManager: 

394 """Manage a suite of multiple benchmarks.""" 

395 

396 # DEFAULT_OPTIONS = { 

397 # "results_file": "benchman-results.json", 

398 # } 

399 

400 def __init__(self, *, path: Path | str | None = None, create_folder=True) -> None: 

401 #: The context for this benchmark run. 

402 self.context = BaseContextInfo(path=path) 

403 #: A tag for this benchmark run (optional, defaults to None). 

404 self.tag: str = "latest" 

405 #: 

406 self.combine_date: str | None = None 

407 #: 

408 self.loaded_context: dict[str, Any] = {} 

409 #: A list of all benchmarks, grouped by group name. 

410 self.benchmarks: dict[str, list[Benchmark]] = {"": []} 

411 

412 self.folder: Path = self.context.project.root_folder / ".benchman" 

413 if create_folder: 

414 self.folder.mkdir(parents=False, exist_ok=True) 

415 

416 # Load options from pyproject.toml `[tool.benchman]` 

417 self.options: dict[str, Any] = {} 

418 pyproject_toml = self.context.project.pyproject_toml 

419 if pyproject_toml: 

420 self.options.update(pyproject_toml.get("tool", {}).get("benchman", {})) 

421 

422 #: self.timer = timing. 

423 self.timer = timeit.default_timer 

424 # if process_time: 

425 # self.timer = time.process_time 

426 

427 # pprint.pprint(self.context.to_dict()) 

428 return 

429 

430 def __repr__(self): 

431 return ( 

432 f"{self.__class__.__name__}<{self.context}, " 

433 f"n={len(list(self.iter_benchmarks()))}>" 

434 ) 

435 

436 def count(self): 

437 return len(list(self.iter_benchmarks())) 

438 

439 _global_benchman: Self | None = None 

440 

441 @classmethod 

442 def singleton(cls) -> Self: 

443 """Return the global `BenchmarkManager` instance.""" 

444 if cls._global_benchman is None: 

445 cls._global_benchman = cls() 

446 assert cls._global_benchman 

447 return cast(Self, cls._global_benchman) 

448 

449 @property 

450 def project_name(self) -> str: 

451 return self.context.project.name 

452 

453 @property 

454 def project_version(self) -> str: 

455 return self.context.project.version 

456 

457 def make_slug(self, *, tag: str | None = None) -> str: 

458 sl = [ 

459 self.project_name, 

460 self.context.client_slug(), 

461 ] 

462 if not tag: 

463 tag = "latest" 

464 

465 if tag in ("base", "latest"): 

466 # pv = self.project_version.replace(".", "_") 

467 # sl.append(f"v{pv}_{tag}") 

468 sl.append(tag) 

469 elif tag: 

470 sl.append(tag) 

471 

472 return ".".join(sl) 

473 

474 def iter_benchmarks( 

475 self, *, group: str | None = None, name: str | None = None 

476 ) -> Iterator[Benchmark]: 

477 if group is None: 

478 assert name is None 

479 for _group, benchmarks in self.benchmarks.items(): 

480 yield from benchmarks 

481 elif name: 

482 for bench in self.benchmarks.get(group, []): 

483 if bench.name == name: 

484 yield bench 

485 else: 

486 yield from self.benchmarks.get(group, []) 

487 

488 def get_best( 

489 self, *, group: str | None = None, name: str | None = None 

490 ) -> Benchmark | None: 

491 """Return the benchmark with the best runtime.""" 

492 assert self.benchmarks 

493 best: Benchmark | None = None 

494 for b in self.iter_benchmarks(group=group, name=name): 

495 if not best or b.min < best.min: 

496 best = b 

497 return best 

498 

499 def get_best_time_unit( 

500 self, *, group: str | None = None, name: str | None = None 

501 ) -> TimeUnitType: 

502 """Return the time unit of the benchmark with the best runtime.""" 

503 best = self.get_best(group=group, name=name) 

504 if best is None: 

505 return "sec" 

506 unit, _scale = get_time_unit(best.min) 

507 return unit 

508 

509 def _path_and_prefix(self, *, group: str) -> tuple[Path, str]: 

510 path = self.folder / ".benchman" / self.context.slug() 

511 prefix = "$".join([group]) 

512 return path, prefix 

513 

514 def add_benchmark(self, benchmark: Benchmark, *, group: str = "") -> None: 

515 if group not in self.benchmarks: 

516 self.benchmarks[group] = [] 

517 self.benchmarks[group].append(benchmark) 

518 

519 def save(self): 

520 pass 

521 

522 @classmethod 

523 def load(cls, path: Path | str) -> Self: 

524 path = Path(path) 

525 if not path.is_file(): 

526 raise FileNotFoundError(path) 

527 

528 self = cls(path=path.parent, create_folder=False) 

529 

530 with path.open("r") as f: 

531 content = json.load(f) 

532 self.tag = content.get("tag", "latest") 

533 self.combine_date = content.get("combine_date", None) 

534 self.loaded_context = content["context"] 

535 

536 for item in content["data"]: 

537 bmr = Benchmark.from_dict(self, item) 

538 self.add_benchmark(bmr) 

539 return self 

540 

541 def compare_results(self, other): 

542 pass 

543 

544 def format_results(self) -> list[str]: 

545 results = [] 

546 # Sort by group name 

547 for group, benchmarks in self.benchmarks.items(): 

548 results.append(f"Group: {group or 'default'}") 

549 # TODO: use get_best_time_unit() to unify unit for the group? 

550 # Sort by best time 

551 for i, benchmark in enumerate(sorted(benchmarks), 1): 

552 results.append(f" {i}: {benchmark}") 

553 # ol = benchmark.outliers 

554 # results.append(f" {i}: {benchmark}, {len(ol)} outliers") 

555 

556 return results 

557 

558 def print_results(self): 

559 for line in self.format_results(): 

560 print(line) # noqa: T201 

561 

562 def run_timings( 

563 self, 

564 #: A name for this benchmark run. 

565 name: str, 

566 *, 

567 #: The statement to be timed. 

568 stmt: str, 

569 #: A variant name for this benchmark run. 

570 variant: str = "", 

571 #: A setup statement to execute before the main statement (not timed). 

572 setup: str = "pass", 

573 #: Verbosity level (0: quiet, 1: normal, 2: verbose) 

574 verbose: int = 0, 

575 #: Number of times to repeat the test. 

576 repeat: int = 5, 

577 #: Number of loops to run. If 0, `timeit` will determine the iterations 

578 #: automatically. 

579 iterations: int = 0, 

580 #: 

581 sample_size: int = 1, 

582 #: A dict containing the global variables. 

583 globals: dict[str, Any] | None = None, 

584 #: Use `time.process_time` instead of `time.monotonic` for measuring CPU time. 

585 process_time: bool = False, 

586 #: A group name for this benchmark run. 

587 group: str = "", 

588 #: Save results to disk. 

589 save_results: bool = True, 

590 ) -> Benchmark: 

591 """Run `stmt` in a loop and return a `BenchmarkRun` object.""" 

592 if self.context.python.debug_mode: 

593 warnings.warn( 

594 "Application is running in debug mode. " 

595 "This may be due coverage, a debugger or other instrumentation. " 

596 "Performance timings may be affected!", 

597 stacklevel=2, 

598 ) 

599 

600 start: float = time.monotonic() 

601 res: TimingsResult = run_timings( 

602 name=name, 

603 stmt=stmt, 

604 setup=setup, 

605 verbose=verbose, 

606 repeat=repeat, 

607 iterations=iterations, 

608 globals=globals, 

609 process_time=process_time, 

610 ) 

611 elap = time.monotonic() - start 

612 

613 benchmark = Benchmark(self, name, variant=variant) 

614 benchmark.start_time = start 

615 benchmark.elap = elap 

616 benchmark.iterations = res.iterations 

617 benchmark.sample_size = sample_size 

618 benchmark.timings = res.timings.copy() 

619 

620 self.add_benchmark(benchmark, group=group) 

621 if save_results: 

622 benchmark.save() 

623 return benchmark 

624 

625 def report(self, format: str = "terminal") -> None: 

626 self.print_results() 

627 

628 def make_runner( 

629 self, 

630 *, 

631 name: str, 

632 variant: str = "", 

633 setup: str = "pass", 

634 verbose: int = 0, 

635 repeat: int = 5, 

636 iterations: int = 0, 

637 sample_size: int = 1, 

638 globals: dict[str, Any] | None = None, 

639 process_time: bool = False, 

640 group: str = "", 

641 save_results: bool = True, 

642 ): 

643 bmr = BenchmarkRunner( 

644 bm=self, 

645 name=name, 

646 variant=variant, 

647 setup=setup, 

648 verbose=verbose, 

649 repeat=repeat, 

650 iterations=iterations, 

651 sample_size=sample_size, 

652 globals=globals, 

653 process_time=process_time, 

654 group=group, 

655 save_results=save_results, 

656 ) 

657 return bmr