Coverage for /Users/martin/prj/git/benchman_pre/src/benchman/dataset.py: 0%

333 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-12-24 08:16 +0100

1# (c) 2024 Martin Wendt; see https://github.com/mar10/benchman 

2# Licensed under the MIT license: https://www.opensource.org/licenses/mit-license.php 

3from __future__ import annotations 

4 

5import pprint 

6from collections import defaultdict 

7from collections.abc import Iterator 

8from dataclasses import dataclass, field 

9from typing import Any, Callable 

10 

11from typing_extensions import Literal 

12 

13from benchman import Benchmark, BenchmarkManager 

14from benchman.util import ( 

15 ExpressionFilter, 

16 calculate_q1_q2_q3, 

17 logger, 

18 smart_sort_key, 

19 split_tokens, 

20) 

21 

22 

23@dataclass(frozen=True) 

24class ColumnInfo: 

25 id: str 

26 short: str 

27 title: str 

28 description: str 

29 unit: Literal["", "n", "s", "1/s"] 

30 #: If True, this column is a metric (e.g. min, max, mean) 

31 metric: bool 

32 #: If True, larger is better 

33 inverse: bool 

34 

35 def __str__(self): 

36 return f"{self.id} ({self.short}): {self.title} ({self.description})" 

37 

38 def __repr__(self): 

39 return f"col_info<{self.id}>" 

40 

41 

42COL_INFO_LIST = [ 

43 # --- Dynamic columns --- 

44 # Properties that are known for every Benchmark instance. 

45 # However, the values can occur more than once in a benchmark suite. 

46 # These can be used as row headers or dynamic columns in a dataset. 

47 # 

48 ColumnInfo( 

49 "full_name", 

50 "Benchmark", 

51 "Full Name", 

52 "Full benchmark name (name, variant, sample size)", 

53 "s", 

54 False, 

55 False, 

56 ), 

57 ColumnInfo("name", "Name", "Name", "Short benchmark name", "", False, False), 

58 ColumnInfo("project", "Project", "Project", "Project name", "", False, False), 

59 ColumnInfo("python", "Python", "Python", "Python version", "", False, False), 

60 ColumnInfo( 

61 "sample_size", "Samples", "Samples", "Number of samples", "n", False, False 

62 ), 

63 ColumnInfo("tag", "Tag", "Tag", "Benchmark tag", "s", False, False), 

64 ColumnInfo("variant", "Variant", "Variant", "Benchmark variant", "", False, False), 

65 ColumnInfo("version", "ver", "Proj. Ver", "Project version", "", False, False), 

66 # 

67 # --- Metric columns --- 

68 # Properties that are unique for one single Benchmark instance. 

69 # The order of these columns is determines the default order in reports. 

70 # 

71 ColumnInfo("min", "min", "Minimum time", "Minimum time (best)", "s", True, False), 

72 ColumnInfo( 

73 "mean", "x̄", "Mean (x̄)", "Arithmetic mean ('average')", "s", True, False 

74 ), 

75 ColumnInfo("median", "Median", "Median", "Middle value", "s", True, False), 

76 ColumnInfo("q1", "Q1", "Q1", "First quartile", "s", True, False), 

77 ColumnInfo("iqr", "IQR", "IQR", "Interquartile Range, Q3 - Q1", "s", True, False), 

78 ColumnInfo("q3", "Q3", "Q3", "Third quartile", "s", True, False), 

79 ColumnInfo("max", "max", "Max. time", "Maximum time (worst)", "s", True, False), 

80 ColumnInfo("stdev", "σ", "Std Dev (σ)", "Standard deviation", "s", True, False), 

81 ColumnInfo("outliers", "Outliers", "", "", "n", True, False), 

82 ColumnInfo( 

83 "ops", "maxOPS", "maxOPS", "Maximum operations per second", "1/s", True, True 

84 ), 

85 ColumnInfo( 

86 "ops_rel", 

87 "OPSrel", 

88 "OPSrel", 

89 "Maximum OPS relative to sample_size", 

90 "1/s", 

91 True, 

92 True, 

93 ), 

94] 

95 

96#: Map column id -> ColumnInfo 

97COL_INFO_MAP = {col.id: col for col in COL_INFO_LIST} 

98 

99#: All known column ids 

100COL_ID_SET = set(COL_INFO_MAP.keys()) 

101 

102#: Properties that are known for every Benchmark instance. 

103#: However, the values can occur more than once in a benchmark suite. 

104#: These can be used as row headers or dynamic columns in a dataset. 

105DYNAMIC_COL_ID_SET = set(col.id for col in COL_INFO_LIST if col.metric is False) 

106 

107#: Properties that are unique for one single Benchmark instance. 

108#: These can be used as static columns in a dataset. 

109METRIC_COL_ID_LIST = [col.id for col in COL_INFO_LIST if col.metric is True] 

110 

111METRIC_COL_ID_SET = set(METRIC_COL_ID_LIST) 

112 

113 

114def col_info(col: str) -> ColumnInfo: 

115 """Lookup the information for a column id.""" 

116 return COL_INFO_MAP[col] 

117 

118 

119class DataCell: 

120 """A single cell in a dataset.""" 

121 

122 def __init__( 

123 self, 

124 attr: str, 

125 value: float | str | None, 

126 *, 

127 row: DataRow, 

128 col: str | None = None, 

129 ): 

130 self.attr = attr 

131 self.col = attr if col is None else col 

132 self.value = value 

133 self.row = row 

134 self.classes: set[str] = set() 

135 

136 def __str__(self): 

137 return f"{self.attr}={self.value!r} ({','.join(self.classes)})" 

138 

139 def __repr__(self): 

140 return f"{self.__class__.__name__}<{self}>" 

141 

142 def __eq__(self, other): 

143 return self.value == other 

144 

145 def __lt__(self, other): 

146 return self.value < other 

147 

148 def __le__(self, other): 

149 return self.value <= other 

150 

151 def __gt__(self, other): 

152 return self.value > other 

153 

154 def __ge__(self, other): 

155 return self.value >= other 

156 

157 @property 

158 def benchmark(self) -> Benchmark: 

159 return self.row.benchmark 

160 

161 @property 

162 def col_info(self) -> ColumnInfo: 

163 return COL_INFO_MAP[self.attr] 

164 

165 @property 

166 def dataset(self) -> Dataset: 

167 return self.row.dataset 

168 

169 @property 

170 def is_dynamic(self) -> bool: 

171 return self.attr != self.col 

172 

173 @property 

174 def is_metric(self) -> bool: 

175 return self.attr in METRIC_COL_ID_SET 

176 

177 @property 

178 def is_fixed(self) -> bool: 

179 return not (self.is_dynamic or self.is_metric) 

180 

181 

182@dataclass(frozen=True) 

183class DataRow: 

184 """Single row of data in a dataset. 

185 

186 Columns are represented by Benchmark instances, so we can easily 

187 access the values (e.v. `br.python`) and metrics (e.g. `br.min`). 

188 """ 

189 

190 dataset: Dataset 

191 benchmark: Benchmark 

192 cells: list[DataCell] = field(default_factory=list) 

193 classes: set[str] = field(default_factory=set) 

194 

195 def __str__(self): 

196 return f"{self.cells}" 

197 

198 def __repr__(self): 

199 return f"DataRow<{self.cells}>" 

200 

201 def __iter__(self): 

202 yield from self.cells 

203 

204 def __getitem__(self, key: int) -> DataCell: 

205 return self.cells[key] 

206 

207 def __lt__(self, other): 

208 return self.cells < other.values 

209 

210 def append_value(self, attr_name: str, value: float | str | None) -> DataCell: 

211 cell = DataCell(attr_name, value, row=self) 

212 self.cells.append(cell) 

213 return cell 

214 

215 def float_values(self, *, param: str) -> list[float]: 

216 return [getattr(v, param) for v in self.cells] 

217 

218 

219class Dataset: 

220 """A class to represent a dataset of benchmarks structured by two parameters.""" 

221 

222 def __init__( 

223 self, 

224 *, 

225 name: str, 

226 bm: BenchmarkManager, 

227 cols: str | list[str], # = ["full_name", "python", "best", "stdev"], 

228 dyn_col_name_attr: str | None = None, 

229 dyn_col_value_attr: str | None = None, 

230 filter: Callable[[Benchmark], bool] | str | None = None, 

231 sort_cols: Callable[[DataRow], Any] | str | None = None, 

232 ): 

233 self._initialized = False 

234 

235 self.name: str = name 

236 self.bm = bm 

237 

238 self.hardware = bm.loaded_context.get("hardware", bm.context.hw.slug()) 

239 self.sysstem = bm.loaded_context.get("sysstem", bm.context.os.slug()) 

240 

241 self.dyn_col_name_attr = dyn_col_name_attr 

242 if dyn_col_name_attr and dyn_col_name_attr not in DYNAMIC_COL_ID_SET: 

243 raise ValueError( 

244 f"Invalid dyn_col_name_attr {dyn_col_name_attr!r}. " 

245 f"Expected one of {DYNAMIC_COL_ID_SET}" 

246 ) 

247 self.dyn_col_value_attr = dyn_col_value_attr 

248 if dyn_col_value_attr and dyn_col_value_attr not in METRIC_COL_ID_SET: 

249 raise ValueError( 

250 f"Invalid dyn_col_value_attr {dyn_col_value_attr!r}. " 

251 f"Expected one of {METRIC_COL_ID_SET}" 

252 ) 

253 

254 if isinstance(cols, str): 

255 self.cols = [cols] 

256 else: 

257 self.cols = cols 

258 col_id_set = set(self.cols) 

259 if not col_id_set.issubset(COL_ID_SET): 

260 raise ValueError( 

261 f"Invalid column(s) {col_id_set - COL_ID_SET}. " 

262 f"Expected one of {COL_ID_SET}" 

263 ) 

264 

265 # Headers are a combination of fixed columns and dynamic columns 

266 # (if any). We also expand the 'full_metrics' keyword to all metrics. 

267 header = [] 

268 for col in self.cols.copy(): 

269 if col == "full_metrics": 

270 header.extend(METRIC_COL_ID_LIST) 

271 self.cols.remove(col) 

272 self.cols.extend(METRIC_COL_ID_LIST) 

273 else: 

274 header.append(col) 

275 

276 self.header_titles: list[str] = header 

277 

278 self.rows: list[DataRow] = [] 

279 self.col_to_index: dict[str, int] = {} 

280 

281 self.original_count = bm.count() 

282 

283 self.invisible_constant_dimensions: dict[str, Any] = {} 

284 self.ambigous_dimensions: set[str] = set() 

285 

286 if bool(dyn_col_name_attr) != bool(dyn_col_value_attr): 

287 raise ValueError( 

288 "col_name_attr and col_value_attr must be both set or None" 

289 ) 

290 self.is_dynamic = bool(dyn_col_name_attr) 

291 

292 # The filter can be a callable or a string that is parsed as an expression 

293 # (e.g. "python == '3.9' and best > 1000") 

294 self._filter_rule: str | None = None 

295 self.filter: Callable[[Benchmark], bool] | None = None 

296 if isinstance(filter, str): 

297 self._filter_rule = filter 

298 ef = ExpressionFilter(filter) 

299 self.filter = ef.matches 

300 elif callable(filter): 

301 self.filter = filter 

302 

303 self._sort_col_attrs: list[str] | None = None 

304 

305 self.sort_cols: Callable[[DataRow], Any] | None = None 

306 

307 if sort_cols is None: 

308 self._sort_col_attrs = [self.cols[0]] 

309 self.sort_cols = self._sort_col_key 

310 

311 if isinstance(sort_cols, str): 

312 self._sort_col_attrs = split_tokens(sort_cols) 

313 self.sort_cols = self._sort_col_key 

314 elif callable(sort_cols): 

315 self.sort = sort_cols 

316 

317 self._aggregate() 

318 

319 self._classify() 

320 

321 def __str__(self): 

322 return f"Dataset<{self.name!r}, {self.header_titles}, n={len(self.rows)}>" 

323 

324 def __repr__(self): 

325 return str(self) 

326 

327 def _get_row_val(self, row: DataRow, col: str) -> str | float | None: 

328 """Return the value of a column for a given row.""" 

329 return row[self.col_to_index[col]].value 

330 

331 def _iter_row_values( 

332 self, row: DataRow 

333 ) -> Iterator[tuple[str, str | float | None]]: 

334 """Return al column values as (attr_name, value) pairs.""" 

335 for col in self.cols: 

336 yield col, self._get_row_val(row, col) 

337 

338 def _sort_col_key(self, row: DataRow) -> tuple[str | float | None, ...]: 

339 """Return a tuple of values that can be used to sort a row.""" 

340 res = [] 

341 assert self._sort_col_attrs 

342 for col in self._sort_col_attrs: 

343 reverse = False 

344 if col.startswith("-"): 

345 col = col[1:] 

346 reverse = True 

347 val = row[self.col_to_index[col]].value 

348 val = smart_sort_key(val) 

349 if reverse and isinstance(val, float): 

350 val = -val 

351 res.append(val) 

352 return tuple(res) 

353 

354 def print(self): 

355 logger.info(self) 

356 # logger.info(self.header) 

357 for row in self.rows: 

358 logger.info(row) 

359 

360 @classmethod 

361 def _classify_cell_list(cls, cells: list[DataCell], class_prefix: str) -> None: 

362 """Classify a list of DataCell objects into good/bad.""" 

363 cell_values: list[float] = [ 

364 float(cell.value) for cell in cells if cell.value is not None 

365 ] 

366 if not cell_values: 

367 return # need at leas one float value to classify 

368 

369 q1, _q2, q3 = calculate_q1_q2_q3(cell_values) 

370 min_val = min(cell_values) 

371 max_val = max(cell_values) 

372 

373 inverse = cells[0].col_info.inverse 

374 if inverse: 

375 best, good, bad, worst = "worst", "bad", "good", "best" 

376 else: 

377 best, good, bad, worst = "best", "good", "bad", "worst" 

378 

379 for cell in cells: 

380 value = float(cell.value) # type: ignore 

381 if value == min_val: 

382 cell.classes.add(f"{class_prefix}-{best}") 

383 elif value < q1: 

384 cell.classes.add(f"{class_prefix}-{good}") 

385 

386 if value == max_val: 

387 cell.classes.add(f"{class_prefix}-{worst}") 

388 elif value > q3: 

389 cell.classes.add(f"{class_prefix}-{bad}") 

390 # print("cells", inverse, cells) 

391 return 

392 

393 def _classify(self) -> None: 

394 """Classify all cells in the dataset.""" 

395 

396 cells_by_name: dict[tuple[str, str], list[DataCell]] = defaultdict(list) 

397 cells_by_variant: dict[tuple[str, str, str], list[DataCell]] = defaultdict(list) 

398 

399 for row in self.rows: 

400 dyn_row_cells: list[DataCell] = [] 

401 for cell in row.cells: 

402 cbm = cell.benchmark 

403 if cell.is_metric: 

404 cell.classes.add("metric") 

405 cells_by_name[(cbm.name, cell.col)].append(cell) 

406 cells_by_variant[(cbm.name, cbm.variant, cell.col)].append(cell) 

407 

408 if cell.is_dynamic: 

409 cell.classes.add("dynamic") 

410 dyn_row_cells.append(cell) 

411 

412 if cell.is_fixed: 

413 cell.classes.add("fixed") 

414 

415 self._classify_cell_list(dyn_row_cells, "row") 

416 self._classify_cell_list(dyn_row_cells, "row") 

417 

418 for cells in cells_by_name.values(): 

419 self._classify_cell_list(cells, "name") 

420 for cells in cells_by_variant.values(): 

421 self._classify_cell_list(cells, "variant") 

422 return 

423 

424 def _aggregate(self) -> None: 

425 if self._initialized: 

426 raise ValueError("Dataset is already initialized") 

427 self._initialized = True 

428 

429 bm = self.bm 

430 filter = self.filter 

431 is_dynamic = self.is_dynamic 

432 

433 # --- Pass 1: Collect all possible values for dynamic column headers 

434 

435 dyn_col_names: list[str] = [] 

436 if is_dynamic: 

437 assert self.dyn_col_name_attr and self.dyn_col_value_attr 

438 for br in bm.iter_benchmarks(): 

439 if filter and not filter(br): 

440 continue 

441 col_name = getattr(br, self.dyn_col_name_attr) 

442 if col_name not in dyn_col_names: 

443 dyn_col_names.append(col_name) 

444 dyn_col_names.sort(key=smart_sort_key) 

445 

446 # --- Pass 2: Append fixed cells for all rows and collect dynamic cells 

447 

448 row_dict: dict[tuple[str], DataRow] = {} 

449 dyn_col_dict: dict[tuple[str], dict[str, DataCell]] = defaultdict(dict) 

450 ambiguous_benchmarks: list[Benchmark] = [] 

451 

452 # Fixed colums may be dimensions and/or metrics. Only dimensions are 

453 # used to detect duplicate rows. 

454 unique_fixed_cols = {c for c in self.cols if c not in METRIC_COL_ID_LIST} 

455 

456 filtered = total = dropped = 0 

457 

458 for br in bm.iter_benchmarks(): 

459 total += 1 

460 

461 if filter and not filter(br): 

462 filtered += 1 

463 logger.debug(f"Skipping unmatched row: {br}") 

464 continue 

465 

466 row_key = tuple([getattr(br, p) for p in unique_fixed_cols]) 

467 # print(f"{row_key=}, {br=}") 

468 

469 if row_key in row_dict and not is_dynamic: 

470 dropped += 1 

471 ambiguous_benchmarks.append(br) 

472 self.ambigous_dimensions.add(str(row_key)) 

473 logger.warning(f"Skipping ambiguous row: {row_key}: {br}") 

474 continue 

475 

476 data_row = DataRow(self, br) 

477 

478 if is_dynamic: 

479 # Collect dynamic column cells in a dict, so we can append them 

480 # to the row later 

481 assert self.dyn_col_name_attr and self.dyn_col_value_attr 

482 

483 dyn_col_name = getattr(br, self.dyn_col_name_attr) 

484 if dyn_col_name in dyn_col_dict[row_key]: 

485 ambiguous_benchmarks.append(br) 

486 self.ambigous_dimensions.add(f"{dyn_col_name}") 

487 logger.warning( 

488 f"Skipping ambiguous row: {row_key} + {dyn_col_name}: {br}" 

489 ) 

490 continue 

491 dyn_col_value = getattr(br, self.dyn_col_value_attr) 

492 dyn_col_cell = DataCell( 

493 self.dyn_col_value_attr, 

494 dyn_col_value, 

495 row=data_row, 

496 col=dyn_col_name, 

497 ) 

498 dyn_col_dict[row_key][dyn_col_name] = dyn_col_cell 

499 

500 # Add fixed column cells to the row 

501 for col in self.cols: 

502 data_row.append_value(col, getattr(br, col)) 

503 

504 row_dict[row_key] = data_row 

505 

506 # --- Pass 3: Append dynamic column cells to the rows 

507 

508 if is_dynamic: 

509 assert self.dyn_col_name_attr 

510 # Append dynamic column cells to the rows 

511 for row_key, dyn_cols in dyn_col_dict.items(): 

512 data_row = row_dict[row_key] 

513 

514 for dcn in dyn_col_names: 

515 dc = dyn_cols.get(dcn) 

516 if dc is None: 

517 dc = DataCell( 

518 self.dyn_col_name_attr, None, row=data_row, col=dcn 

519 ) 

520 raise 

521 data_row.cells.append(dc) 

522 

523 # --- Pass 4: Create Dataset instance 

524 

525 self.all_cols = self.cols + dyn_col_names 

526 self.col_to_index = {c: i for i, c in enumerate(self.all_cols)} 

527 

528 # Use 'Header title' from `col_info_list` 

529 self.header_titles = [ 

530 col_info(col).title if col in COL_INFO_MAP else col for col in self.all_cols 

531 ] 

532 

533 # --- Pass 5: Create DataRow instances and sort them 

534 

535 self.rows = list(row_dict.values()) 

536 

537 if self._sort_col_attrs and any( 

538 col.lstrip("-") not in self.col_to_index for col in self._sort_col_attrs 

539 ): 

540 raise ValueError( 

541 f"Invalid sort column(s): {self._sort_col_attrs}" 

542 f"Expected columns from {self.cols}" 

543 ) 

544 self.rows.sort(key=self.sort_cols) 

545 

546 # Check some statistics 

547 

548 if filtered: 

549 logger.info( 

550 f"Skipped {filtered}/{total} benchmarks " 

551 f"(did not match filter {self._filter_rule!r})" 

552 ) 

553 

554 if ambiguous_benchmarks: 

555 logger.warning(f"Warning: Skipped {len(ambiguous_benchmarks)} benchmarks.") 

556 logger.warning( 

557 "This happens most likely when multiple benchmarks have different " 

558 "values for one dimension that is not displayed as a column." 

559 ) 

560 logger.warning( 

561 "To resolve this, either add the dimension as fixed column or " 

562 "use a filter restict benchmarks to a single dimension value." 

563 ) 

564 logger.warning( 

565 f"Skipped benchmarks: {pprint.pformat(ambiguous_benchmarks, indent=4)}" 

566 ) 

567 

568 def get_description_info(self) -> dict[str, str | list[str]]: 

569 """Return a description of the dataset.""" 

570 # Find all dimension columns that are not displayed but have a constant 

571 # value 

572 attrs_with_multi_values = set() 

573 

574 const_dim_vals = {} 

575 for row in self.rows: 

576 bmr = row.benchmark 

577 for col_name, value in bmr.loaded_state().items(): 

578 if col_name not in const_dim_vals: 

579 const_dim_vals[col_name] = value 

580 elif const_dim_vals[col_name] != value: 

581 attrs_with_multi_values.add(col_name) 

582 

583 for attr in attrs_with_multi_values: 

584 const_dim_vals.pop(attr) 

585 

586 self.invisible_constant_dimensions = const_dim_vals 

587 

588 # 

589 title = self.name 

590 if title is None: 

591 if self.is_dynamic: 

592 title = f"{self.dyn_col_value_attr} by {self.dyn_col_name_attr}" 

593 title = title.capitalize() 

594 else: 

595 title = "Benchmark Data" 

596 

597 legend: list[str] = [] 

598 warnings: list[str] = [] 

599 subtitle: list[str] = [] 

600 res: dict[str, str | list[str]] = { 

601 "title": f"{title}", 

602 "subtitle": subtitle, 

603 "legend": legend, 

604 "warnings": warnings, 

605 } 

606 

607 subtitle.append(f"Client: {self.hardware}, {self.sysstem}") 

608 

609 legend.append(f"Benchmark date: {self.bm.combine_date}") 

610 

611 if const_dim_vals: 

612 vals = ", ".join(f"{k}={v!r}" for k, v in sorted(const_dim_vals.items())) 

613 legend.append(f"Fixed dataset values : {vals}.") 

614 

615 if attrs_with_multi_values: 

616 legend.append( 

617 f"Variant dataset values: {', '.join(sorted(attrs_with_multi_values))}." 

618 ) 

619 

620 if self._filter_rule and self.original_count != len(self.rows): 

621 legend.append( 

622 f"Showing {len(self.rows)} of {self.original_count} rows, " 

623 f"applied filter: {self._filter_rule!r}." 

624 ) 

625 elif self.original_count != len(self.rows): 

626 legend.append(f"Showing {len(self.rows)} of {self.original_count} rows.") 

627 else: 

628 legend.append(f"Showing {len(self.rows)} rows.") 

629 

630 if self._sort_col_attrs: 

631 legend.append(f"Sort order: {', '.join(self._sort_col_attrs)}.") 

632 

633 if self.ambigous_dimensions: 

634 warnings.append( 

635 "WARNING: Skipped one or more rows with ambiguous column values: " 

636 "Results are probably inaccurate!\n" 

637 " This happens most likely when multiple benchmarks have " 

638 "different values for one dimension that is not displayed as a " 

639 "column.\n" 

640 " To resolve this, either add the dimension as column or " 

641 "use a filter to restrict benchmarks to a single dimension value." 

642 ) 

643 

644 return res