Coverage for /Users/martin/prj/git/benchman_pre/src/benchman/dataset.py: 0%
333 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-12-24 08:16 +0100
« prev ^ index » next coverage.py v7.6.4, created at 2024-12-24 08:16 +0100
1# (c) 2024 Martin Wendt; see https://github.com/mar10/benchman
2# Licensed under the MIT license: https://www.opensource.org/licenses/mit-license.php
3from __future__ import annotations
5import pprint
6from collections import defaultdict
7from collections.abc import Iterator
8from dataclasses import dataclass, field
9from typing import Any, Callable
11from typing_extensions import Literal
13from benchman import Benchmark, BenchmarkManager
14from benchman.util import (
15 ExpressionFilter,
16 calculate_q1_q2_q3,
17 logger,
18 smart_sort_key,
19 split_tokens,
20)
23@dataclass(frozen=True)
24class ColumnInfo:
25 id: str
26 short: str
27 title: str
28 description: str
29 unit: Literal["", "n", "s", "1/s"]
30 #: If True, this column is a metric (e.g. min, max, mean)
31 metric: bool
32 #: If True, larger is better
33 inverse: bool
35 def __str__(self):
36 return f"{self.id} ({self.short}): {self.title} ({self.description})"
38 def __repr__(self):
39 return f"col_info<{self.id}>"
42COL_INFO_LIST = [
43 # --- Dynamic columns ---
44 # Properties that are known for every Benchmark instance.
45 # However, the values can occur more than once in a benchmark suite.
46 # These can be used as row headers or dynamic columns in a dataset.
47 #
48 ColumnInfo(
49 "full_name",
50 "Benchmark",
51 "Full Name",
52 "Full benchmark name (name, variant, sample size)",
53 "s",
54 False,
55 False,
56 ),
57 ColumnInfo("name", "Name", "Name", "Short benchmark name", "", False, False),
58 ColumnInfo("project", "Project", "Project", "Project name", "", False, False),
59 ColumnInfo("python", "Python", "Python", "Python version", "", False, False),
60 ColumnInfo(
61 "sample_size", "Samples", "Samples", "Number of samples", "n", False, False
62 ),
63 ColumnInfo("tag", "Tag", "Tag", "Benchmark tag", "s", False, False),
64 ColumnInfo("variant", "Variant", "Variant", "Benchmark variant", "", False, False),
65 ColumnInfo("version", "ver", "Proj. Ver", "Project version", "", False, False),
66 #
67 # --- Metric columns ---
68 # Properties that are unique for one single Benchmark instance.
69 # The order of these columns is determines the default order in reports.
70 #
71 ColumnInfo("min", "min", "Minimum time", "Minimum time (best)", "s", True, False),
72 ColumnInfo(
73 "mean", "x̄", "Mean (x̄)", "Arithmetic mean ('average')", "s", True, False
74 ),
75 ColumnInfo("median", "Median", "Median", "Middle value", "s", True, False),
76 ColumnInfo("q1", "Q1", "Q1", "First quartile", "s", True, False),
77 ColumnInfo("iqr", "IQR", "IQR", "Interquartile Range, Q3 - Q1", "s", True, False),
78 ColumnInfo("q3", "Q3", "Q3", "Third quartile", "s", True, False),
79 ColumnInfo("max", "max", "Max. time", "Maximum time (worst)", "s", True, False),
80 ColumnInfo("stdev", "σ", "Std Dev (σ)", "Standard deviation", "s", True, False),
81 ColumnInfo("outliers", "Outliers", "", "", "n", True, False),
82 ColumnInfo(
83 "ops", "maxOPS", "maxOPS", "Maximum operations per second", "1/s", True, True
84 ),
85 ColumnInfo(
86 "ops_rel",
87 "OPSrel",
88 "OPSrel",
89 "Maximum OPS relative to sample_size",
90 "1/s",
91 True,
92 True,
93 ),
94]
96#: Map column id -> ColumnInfo
97COL_INFO_MAP = {col.id: col for col in COL_INFO_LIST}
99#: All known column ids
100COL_ID_SET = set(COL_INFO_MAP.keys())
102#: Properties that are known for every Benchmark instance.
103#: However, the values can occur more than once in a benchmark suite.
104#: These can be used as row headers or dynamic columns in a dataset.
105DYNAMIC_COL_ID_SET = set(col.id for col in COL_INFO_LIST if col.metric is False)
107#: Properties that are unique for one single Benchmark instance.
108#: These can be used as static columns in a dataset.
109METRIC_COL_ID_LIST = [col.id for col in COL_INFO_LIST if col.metric is True]
111METRIC_COL_ID_SET = set(METRIC_COL_ID_LIST)
114def col_info(col: str) -> ColumnInfo:
115 """Lookup the information for a column id."""
116 return COL_INFO_MAP[col]
119class DataCell:
120 """A single cell in a dataset."""
122 def __init__(
123 self,
124 attr: str,
125 value: float | str | None,
126 *,
127 row: DataRow,
128 col: str | None = None,
129 ):
130 self.attr = attr
131 self.col = attr if col is None else col
132 self.value = value
133 self.row = row
134 self.classes: set[str] = set()
136 def __str__(self):
137 return f"{self.attr}={self.value!r} ({','.join(self.classes)})"
139 def __repr__(self):
140 return f"{self.__class__.__name__}<{self}>"
142 def __eq__(self, other):
143 return self.value == other
145 def __lt__(self, other):
146 return self.value < other
148 def __le__(self, other):
149 return self.value <= other
151 def __gt__(self, other):
152 return self.value > other
154 def __ge__(self, other):
155 return self.value >= other
157 @property
158 def benchmark(self) -> Benchmark:
159 return self.row.benchmark
161 @property
162 def col_info(self) -> ColumnInfo:
163 return COL_INFO_MAP[self.attr]
165 @property
166 def dataset(self) -> Dataset:
167 return self.row.dataset
169 @property
170 def is_dynamic(self) -> bool:
171 return self.attr != self.col
173 @property
174 def is_metric(self) -> bool:
175 return self.attr in METRIC_COL_ID_SET
177 @property
178 def is_fixed(self) -> bool:
179 return not (self.is_dynamic or self.is_metric)
182@dataclass(frozen=True)
183class DataRow:
184 """Single row of data in a dataset.
186 Columns are represented by Benchmark instances, so we can easily
187 access the values (e.v. `br.python`) and metrics (e.g. `br.min`).
188 """
190 dataset: Dataset
191 benchmark: Benchmark
192 cells: list[DataCell] = field(default_factory=list)
193 classes: set[str] = field(default_factory=set)
195 def __str__(self):
196 return f"{self.cells}"
198 def __repr__(self):
199 return f"DataRow<{self.cells}>"
201 def __iter__(self):
202 yield from self.cells
204 def __getitem__(self, key: int) -> DataCell:
205 return self.cells[key]
207 def __lt__(self, other):
208 return self.cells < other.values
210 def append_value(self, attr_name: str, value: float | str | None) -> DataCell:
211 cell = DataCell(attr_name, value, row=self)
212 self.cells.append(cell)
213 return cell
215 def float_values(self, *, param: str) -> list[float]:
216 return [getattr(v, param) for v in self.cells]
219class Dataset:
220 """A class to represent a dataset of benchmarks structured by two parameters."""
222 def __init__(
223 self,
224 *,
225 name: str,
226 bm: BenchmarkManager,
227 cols: str | list[str], # = ["full_name", "python", "best", "stdev"],
228 dyn_col_name_attr: str | None = None,
229 dyn_col_value_attr: str | None = None,
230 filter: Callable[[Benchmark], bool] | str | None = None,
231 sort_cols: Callable[[DataRow], Any] | str | None = None,
232 ):
233 self._initialized = False
235 self.name: str = name
236 self.bm = bm
238 self.hardware = bm.loaded_context.get("hardware", bm.context.hw.slug())
239 self.sysstem = bm.loaded_context.get("sysstem", bm.context.os.slug())
241 self.dyn_col_name_attr = dyn_col_name_attr
242 if dyn_col_name_attr and dyn_col_name_attr not in DYNAMIC_COL_ID_SET:
243 raise ValueError(
244 f"Invalid dyn_col_name_attr {dyn_col_name_attr!r}. "
245 f"Expected one of {DYNAMIC_COL_ID_SET}"
246 )
247 self.dyn_col_value_attr = dyn_col_value_attr
248 if dyn_col_value_attr and dyn_col_value_attr not in METRIC_COL_ID_SET:
249 raise ValueError(
250 f"Invalid dyn_col_value_attr {dyn_col_value_attr!r}. "
251 f"Expected one of {METRIC_COL_ID_SET}"
252 )
254 if isinstance(cols, str):
255 self.cols = [cols]
256 else:
257 self.cols = cols
258 col_id_set = set(self.cols)
259 if not col_id_set.issubset(COL_ID_SET):
260 raise ValueError(
261 f"Invalid column(s) {col_id_set - COL_ID_SET}. "
262 f"Expected one of {COL_ID_SET}"
263 )
265 # Headers are a combination of fixed columns and dynamic columns
266 # (if any). We also expand the 'full_metrics' keyword to all metrics.
267 header = []
268 for col in self.cols.copy():
269 if col == "full_metrics":
270 header.extend(METRIC_COL_ID_LIST)
271 self.cols.remove(col)
272 self.cols.extend(METRIC_COL_ID_LIST)
273 else:
274 header.append(col)
276 self.header_titles: list[str] = header
278 self.rows: list[DataRow] = []
279 self.col_to_index: dict[str, int] = {}
281 self.original_count = bm.count()
283 self.invisible_constant_dimensions: dict[str, Any] = {}
284 self.ambigous_dimensions: set[str] = set()
286 if bool(dyn_col_name_attr) != bool(dyn_col_value_attr):
287 raise ValueError(
288 "col_name_attr and col_value_attr must be both set or None"
289 )
290 self.is_dynamic = bool(dyn_col_name_attr)
292 # The filter can be a callable or a string that is parsed as an expression
293 # (e.g. "python == '3.9' and best > 1000")
294 self._filter_rule: str | None = None
295 self.filter: Callable[[Benchmark], bool] | None = None
296 if isinstance(filter, str):
297 self._filter_rule = filter
298 ef = ExpressionFilter(filter)
299 self.filter = ef.matches
300 elif callable(filter):
301 self.filter = filter
303 self._sort_col_attrs: list[str] | None = None
305 self.sort_cols: Callable[[DataRow], Any] | None = None
307 if sort_cols is None:
308 self._sort_col_attrs = [self.cols[0]]
309 self.sort_cols = self._sort_col_key
311 if isinstance(sort_cols, str):
312 self._sort_col_attrs = split_tokens(sort_cols)
313 self.sort_cols = self._sort_col_key
314 elif callable(sort_cols):
315 self.sort = sort_cols
317 self._aggregate()
319 self._classify()
321 def __str__(self):
322 return f"Dataset<{self.name!r}, {self.header_titles}, n={len(self.rows)}>"
324 def __repr__(self):
325 return str(self)
327 def _get_row_val(self, row: DataRow, col: str) -> str | float | None:
328 """Return the value of a column for a given row."""
329 return row[self.col_to_index[col]].value
331 def _iter_row_values(
332 self, row: DataRow
333 ) -> Iterator[tuple[str, str | float | None]]:
334 """Return al column values as (attr_name, value) pairs."""
335 for col in self.cols:
336 yield col, self._get_row_val(row, col)
338 def _sort_col_key(self, row: DataRow) -> tuple[str | float | None, ...]:
339 """Return a tuple of values that can be used to sort a row."""
340 res = []
341 assert self._sort_col_attrs
342 for col in self._sort_col_attrs:
343 reverse = False
344 if col.startswith("-"):
345 col = col[1:]
346 reverse = True
347 val = row[self.col_to_index[col]].value
348 val = smart_sort_key(val)
349 if reverse and isinstance(val, float):
350 val = -val
351 res.append(val)
352 return tuple(res)
354 def print(self):
355 logger.info(self)
356 # logger.info(self.header)
357 for row in self.rows:
358 logger.info(row)
360 @classmethod
361 def _classify_cell_list(cls, cells: list[DataCell], class_prefix: str) -> None:
362 """Classify a list of DataCell objects into good/bad."""
363 cell_values: list[float] = [
364 float(cell.value) for cell in cells if cell.value is not None
365 ]
366 if not cell_values:
367 return # need at leas one float value to classify
369 q1, _q2, q3 = calculate_q1_q2_q3(cell_values)
370 min_val = min(cell_values)
371 max_val = max(cell_values)
373 inverse = cells[0].col_info.inverse
374 if inverse:
375 best, good, bad, worst = "worst", "bad", "good", "best"
376 else:
377 best, good, bad, worst = "best", "good", "bad", "worst"
379 for cell in cells:
380 value = float(cell.value) # type: ignore
381 if value == min_val:
382 cell.classes.add(f"{class_prefix}-{best}")
383 elif value < q1:
384 cell.classes.add(f"{class_prefix}-{good}")
386 if value == max_val:
387 cell.classes.add(f"{class_prefix}-{worst}")
388 elif value > q3:
389 cell.classes.add(f"{class_prefix}-{bad}")
390 # print("cells", inverse, cells)
391 return
393 def _classify(self) -> None:
394 """Classify all cells in the dataset."""
396 cells_by_name: dict[tuple[str, str], list[DataCell]] = defaultdict(list)
397 cells_by_variant: dict[tuple[str, str, str], list[DataCell]] = defaultdict(list)
399 for row in self.rows:
400 dyn_row_cells: list[DataCell] = []
401 for cell in row.cells:
402 cbm = cell.benchmark
403 if cell.is_metric:
404 cell.classes.add("metric")
405 cells_by_name[(cbm.name, cell.col)].append(cell)
406 cells_by_variant[(cbm.name, cbm.variant, cell.col)].append(cell)
408 if cell.is_dynamic:
409 cell.classes.add("dynamic")
410 dyn_row_cells.append(cell)
412 if cell.is_fixed:
413 cell.classes.add("fixed")
415 self._classify_cell_list(dyn_row_cells, "row")
416 self._classify_cell_list(dyn_row_cells, "row")
418 for cells in cells_by_name.values():
419 self._classify_cell_list(cells, "name")
420 for cells in cells_by_variant.values():
421 self._classify_cell_list(cells, "variant")
422 return
424 def _aggregate(self) -> None:
425 if self._initialized:
426 raise ValueError("Dataset is already initialized")
427 self._initialized = True
429 bm = self.bm
430 filter = self.filter
431 is_dynamic = self.is_dynamic
433 # --- Pass 1: Collect all possible values for dynamic column headers
435 dyn_col_names: list[str] = []
436 if is_dynamic:
437 assert self.dyn_col_name_attr and self.dyn_col_value_attr
438 for br in bm.iter_benchmarks():
439 if filter and not filter(br):
440 continue
441 col_name = getattr(br, self.dyn_col_name_attr)
442 if col_name not in dyn_col_names:
443 dyn_col_names.append(col_name)
444 dyn_col_names.sort(key=smart_sort_key)
446 # --- Pass 2: Append fixed cells for all rows and collect dynamic cells
448 row_dict: dict[tuple[str], DataRow] = {}
449 dyn_col_dict: dict[tuple[str], dict[str, DataCell]] = defaultdict(dict)
450 ambiguous_benchmarks: list[Benchmark] = []
452 # Fixed colums may be dimensions and/or metrics. Only dimensions are
453 # used to detect duplicate rows.
454 unique_fixed_cols = {c for c in self.cols if c not in METRIC_COL_ID_LIST}
456 filtered = total = dropped = 0
458 for br in bm.iter_benchmarks():
459 total += 1
461 if filter and not filter(br):
462 filtered += 1
463 logger.debug(f"Skipping unmatched row: {br}")
464 continue
466 row_key = tuple([getattr(br, p) for p in unique_fixed_cols])
467 # print(f"{row_key=}, {br=}")
469 if row_key in row_dict and not is_dynamic:
470 dropped += 1
471 ambiguous_benchmarks.append(br)
472 self.ambigous_dimensions.add(str(row_key))
473 logger.warning(f"Skipping ambiguous row: {row_key}: {br}")
474 continue
476 data_row = DataRow(self, br)
478 if is_dynamic:
479 # Collect dynamic column cells in a dict, so we can append them
480 # to the row later
481 assert self.dyn_col_name_attr and self.dyn_col_value_attr
483 dyn_col_name = getattr(br, self.dyn_col_name_attr)
484 if dyn_col_name in dyn_col_dict[row_key]:
485 ambiguous_benchmarks.append(br)
486 self.ambigous_dimensions.add(f"{dyn_col_name}")
487 logger.warning(
488 f"Skipping ambiguous row: {row_key} + {dyn_col_name}: {br}"
489 )
490 continue
491 dyn_col_value = getattr(br, self.dyn_col_value_attr)
492 dyn_col_cell = DataCell(
493 self.dyn_col_value_attr,
494 dyn_col_value,
495 row=data_row,
496 col=dyn_col_name,
497 )
498 dyn_col_dict[row_key][dyn_col_name] = dyn_col_cell
500 # Add fixed column cells to the row
501 for col in self.cols:
502 data_row.append_value(col, getattr(br, col))
504 row_dict[row_key] = data_row
506 # --- Pass 3: Append dynamic column cells to the rows
508 if is_dynamic:
509 assert self.dyn_col_name_attr
510 # Append dynamic column cells to the rows
511 for row_key, dyn_cols in dyn_col_dict.items():
512 data_row = row_dict[row_key]
514 for dcn in dyn_col_names:
515 dc = dyn_cols.get(dcn)
516 if dc is None:
517 dc = DataCell(
518 self.dyn_col_name_attr, None, row=data_row, col=dcn
519 )
520 raise
521 data_row.cells.append(dc)
523 # --- Pass 4: Create Dataset instance
525 self.all_cols = self.cols + dyn_col_names
526 self.col_to_index = {c: i for i, c in enumerate(self.all_cols)}
528 # Use 'Header title' from `col_info_list`
529 self.header_titles = [
530 col_info(col).title if col in COL_INFO_MAP else col for col in self.all_cols
531 ]
533 # --- Pass 5: Create DataRow instances and sort them
535 self.rows = list(row_dict.values())
537 if self._sort_col_attrs and any(
538 col.lstrip("-") not in self.col_to_index for col in self._sort_col_attrs
539 ):
540 raise ValueError(
541 f"Invalid sort column(s): {self._sort_col_attrs}"
542 f"Expected columns from {self.cols}"
543 )
544 self.rows.sort(key=self.sort_cols)
546 # Check some statistics
548 if filtered:
549 logger.info(
550 f"Skipped {filtered}/{total} benchmarks "
551 f"(did not match filter {self._filter_rule!r})"
552 )
554 if ambiguous_benchmarks:
555 logger.warning(f"Warning: Skipped {len(ambiguous_benchmarks)} benchmarks.")
556 logger.warning(
557 "This happens most likely when multiple benchmarks have different "
558 "values for one dimension that is not displayed as a column."
559 )
560 logger.warning(
561 "To resolve this, either add the dimension as fixed column or "
562 "use a filter restict benchmarks to a single dimension value."
563 )
564 logger.warning(
565 f"Skipped benchmarks: {pprint.pformat(ambiguous_benchmarks, indent=4)}"
566 )
568 def get_description_info(self) -> dict[str, str | list[str]]:
569 """Return a description of the dataset."""
570 # Find all dimension columns that are not displayed but have a constant
571 # value
572 attrs_with_multi_values = set()
574 const_dim_vals = {}
575 for row in self.rows:
576 bmr = row.benchmark
577 for col_name, value in bmr.loaded_state().items():
578 if col_name not in const_dim_vals:
579 const_dim_vals[col_name] = value
580 elif const_dim_vals[col_name] != value:
581 attrs_with_multi_values.add(col_name)
583 for attr in attrs_with_multi_values:
584 const_dim_vals.pop(attr)
586 self.invisible_constant_dimensions = const_dim_vals
588 #
589 title = self.name
590 if title is None:
591 if self.is_dynamic:
592 title = f"{self.dyn_col_value_attr} by {self.dyn_col_name_attr}"
593 title = title.capitalize()
594 else:
595 title = "Benchmark Data"
597 legend: list[str] = []
598 warnings: list[str] = []
599 subtitle: list[str] = []
600 res: dict[str, str | list[str]] = {
601 "title": f"{title}",
602 "subtitle": subtitle,
603 "legend": legend,
604 "warnings": warnings,
605 }
607 subtitle.append(f"Client: {self.hardware}, {self.sysstem}")
609 legend.append(f"Benchmark date: {self.bm.combine_date}")
611 if const_dim_vals:
612 vals = ", ".join(f"{k}={v!r}" for k, v in sorted(const_dim_vals.items()))
613 legend.append(f"Fixed dataset values : {vals}.")
615 if attrs_with_multi_values:
616 legend.append(
617 f"Variant dataset values: {', '.join(sorted(attrs_with_multi_values))}."
618 )
620 if self._filter_rule and self.original_count != len(self.rows):
621 legend.append(
622 f"Showing {len(self.rows)} of {self.original_count} rows, "
623 f"applied filter: {self._filter_rule!r}."
624 )
625 elif self.original_count != len(self.rows):
626 legend.append(f"Showing {len(self.rows)} of {self.original_count} rows.")
627 else:
628 legend.append(f"Showing {len(self.rows)} rows.")
630 if self._sort_col_attrs:
631 legend.append(f"Sort order: {', '.join(self._sort_col_attrs)}.")
633 if self.ambigous_dimensions:
634 warnings.append(
635 "WARNING: Skipped one or more rows with ambiguous column values: "
636 "Results are probably inaccurate!\n"
637 " This happens most likely when multiple benchmarks have "
638 "different values for one dimension that is not displayed as a "
639 "column.\n"
640 " To resolve this, either add the dimension as column or "
641 "use a filter to restrict benchmarks to a single dimension value."
642 )
644 return res