wallaroo.assay
1from typing import TYPE_CHECKING, Any, Dict, List, Optional 2 3import gql # type: ignore 4import matplotlib.pyplot as plt 5import numpy as np 6import pandas as pd 7 8from .inference_decode import dict_list_to_dataframe 9from .object import * 10from .wallaroo_ml_ops_api_client.api.assay import assays_set_active 11from .wallaroo_ml_ops_api_client.models.assays_set_active_json_body import ( 12 AssaysSetActiveJsonBody, 13) 14 15if TYPE_CHECKING: 16 from .client import Client 17 18 19class Assay(Object): 20 """An Assay represents a record in the database. An assay contains 21 some high level attributes such as name, status, active, etc. as well 22 as the sub objects Baseline, Window and Summarizer which specify how 23 the Baseline is derived, how the Windows should be created and how the 24 analysis should be conducted.""" 25 26 def __init__(self, client: Optional["Client"], data: Dict[str, Any]) -> None: 27 self.client = client 28 assert client is not None 29 super().__init__(gql_client=client._gql_client, data=data) 30 31 def _fill(self, data: Dict[str, Any]) -> None: 32 for required_attribute in ["id"]: 33 if required_attribute not in data: 34 raise RequiredAttributeMissing( 35 self.__class__.__name__, required_attribute 36 ) 37 self._id = data["id"] 38 39 for k in [ 40 "active", 41 "status", 42 "name", 43 "warning_threshold", 44 "alert_threshold", 45 "pipeline_name", 46 ]: 47 if k in data: 48 setattr(self, f"_{k}", data[k]) 49 50 def _fetch_attributes(self) -> Dict[str, Any]: 51 return self._gql_client.execute( 52 gql.gql( 53 """ 54 query GetAssay($id: bigint) { 55 assay(where: {id: {_eq: $id}}) { 56 id 57 name 58 active 59 status 60 warning_threshold 61 alert_threshold 62 pipeline_name 63 } 64 } 65 """ 66 ), 67 variable_values={ 68 "id": self._id, 69 }, 70 )["assay"] 71 72 def turn_on(self): 73 """Sets the Assay to active causing it to run and backfill any 74 missing analysis.""" 75 76 ret = assays_set_active.sync( 77 client=self.client.mlops(), 78 json_body=AssaysSetActiveJsonBody(self._id, True), 79 ) 80 self._active = True 81 return ret 82 83 def turn_off(self): 84 """Disables the Assay. No further analysis will be conducted until the assay 85 is enabled.""" 86 ret = assays_set_active.sync( 87 client=self.client.mlops(), 88 json_body=AssaysSetActiveJsonBody(self._id, False), 89 ) 90 self._active = False 91 return ret 92 93 def set_alert_threshold(self, threshold: float): 94 """Sets the alert threshold at the specified level. The status in the AssayAnalysis 95 will show if this level is exceeded however currently alerting/notifications are 96 not implemented.""" 97 res = self._gql_client.execute( 98 gql.gql( 99 """ 100 mutation SetActive($id: bigint!, $alert_threshold: Float!) { 101 update_assay_by_pk(pk_columns: {id: $id}, _set: {alert_threshold: $alert_threshold}) { 102 id 103 active 104 } 105 } 106 """ 107 ), 108 variable_values={"id": self._id, "alert_threshold": threshold}, 109 )["update_assay_by_pk"] 110 self._alert_threshold = threshold 111 return res 112 113 def set_warning_threshold(self, threshold: float): 114 """Sets the warning threshold at the specified level. The status in the AssayAnalysis 115 will show if this level is exceeded however currently alerting/notifications are 116 not implemented.""" 117 118 res = self._gql_client.execute( 119 gql.gql( 120 """ 121 mutation SetActive($id: bigint!, $warning_threshold: Float!) { 122 update_assay_by_pk(pk_columns: {id: $id}, _set: {warning_threshold: $warning_threshold}) { 123 id 124 active 125 } 126 } 127 """ 128 ), 129 variable_values={"id": self._id, "warning_threshold": threshold}, 130 )["update_assay_by_pk"] 131 self._warning_threshold = threshold 132 return res 133 134 135def meta_df(assay_result: Dict, index_name) -> pd.DataFrame: 136 """Creates a dataframe for the meta data in the baseline or window excluding the 137 edge information. 138 :param assay_result: The dict of the raw asset result""" 139 return pd.DataFrame( 140 { 141 k: [assay_result[k]] 142 for k in assay_result.keys() 143 if k not in ["edges", "edge_names", "aggregated_values", "aggregation"] 144 }, 145 index=[index_name], 146 ) 147 148 149def edge_df(window_or_baseline: Dict) -> pd.DataFrame: 150 """Creates a dataframe specifically for the edge information in the baseline or window. 151 :param window_or_baseline: The dict from the assay result of either the window or baseline""" 152 153 data = { 154 k: window_or_baseline[k] 155 for k in ["edges", "edge_names", "aggregated_values", "aggregation"] 156 } 157 return pd.DataFrame(data) 158 159 160class AssayAnalysis(object): 161 """The AssayAnalysis class helps handle the assay analysis logs from the Plateau 162 logs. These logs are a json document with meta information on the assay and analysis 163 as well as summary information on the baseline and window and information on the comparison 164 between them.""" 165 166 def __init__(self, raw: Dict[str, Any]): 167 self.assay_id = 0 168 self.name = "" 169 self.raw = raw 170 self.iopath = "" 171 self.score = 0.0 172 self.status = "" 173 self.alert_threshold = None 174 self.warning_threshold = None 175 self.window_summary: Dict[str, Any] = {} 176 for k, v in raw.items(): 177 setattr(self, k, v) 178 179 def compare_basic_stats(self) -> pd.DataFrame: 180 """Creates a simple dataframe making it easy to compare a baseline and window.""" 181 r = self.raw 182 baseline = r["baseline_summary"] 183 window = r["window_summary"] 184 185 bs_df = meta_df(baseline, "Baseline") 186 ws_df = meta_df(window, "Window") 187 df = pd.concat([bs_df, ws_df]) 188 189 text_cols = ["start", "end"] 190 tdf = df[text_cols] 191 df = df.drop(text_cols, axis=1) 192 193 df.loc["diff"] = df.loc["Window"] - df.loc["Baseline"] 194 df.loc["pct_diff"] = df.loc["diff"] / df.loc["Baseline"] * 100.0 195 return pd.concat([df.T, tdf.T]) 196 197 def baseline_stats(self) -> pd.DataFrame: 198 """Creates a simple dataframe with the basic stats data for a baseline.""" 199 r = self.raw 200 baseline = r["baseline_summary"] 201 bs_df = meta_df(baseline, "Baseline") 202 return bs_df.T 203 204 def compare_bins(self) -> pd.DataFrame: 205 """Creates a simple dataframe to compare the bin/edge information of baseline and window.""" 206 r = self.raw 207 is_baseline_run = r["status"] == "BaselineRun" 208 209 baseline = r["baseline_summary"] 210 window = r["window_summary"] 211 bs_df = edge_df(baseline) 212 ws_df = edge_df(window) 213 bs_df.columns = [f"b_{c}" for c in bs_df.columns] # type: ignore 214 ws_df.columns = [f"w_{c}" for c in ws_df.columns] # type: ignore 215 if is_baseline_run: 216 df = bs_df 217 else: 218 df = pd.concat([bs_df, ws_df], axis=1) 219 df["diff_in_pcts"] = df["w_aggregated_values"] - df["b_aggregated_values"] 220 return df 221 222 def baseline_bins(self) -> pd.DataFrame: 223 """Creates a simple dataframe to with the edge/bin data for a baseline.""" 224 r = self.raw 225 226 baseline = r["baseline_summary"] 227 bs_df = edge_df(baseline) 228 bs_df.columns = [f"b_{c}" for c in bs_df.columns] # type: ignore 229 return bs_df.fillna(np.inf) 230 231 def chart(self, show_scores=True): 232 """Quickly create a chart showing the bins, values and scores of an assay analysis. 233 show_scores will also label each bin with its final weighted (if specified) score. 234 """ 235 r = self.raw 236 is_baseline_run = r["status"] == "BaselineRun" 237 baseline = r["baseline_summary"] 238 window = r["window_summary"] 239 240 summarizer = r["summarizer"] 241 es = summarizer["bin_mode"] 242 vk = baseline["aggregation"] 243 metric = summarizer["metric"] 244 num_bins = summarizer["num_bins"] 245 weighted = True if summarizer["bin_weights"] is not None else False 246 score = r["score"] 247 scores = r["scores"] 248 index = r["bin_index"] 249 250 print(f"baseline mean = {baseline['mean']}") 251 if not is_baseline_run: 252 print(f"window mean = {window['mean']}") 253 print(f"baseline median = {baseline['median']}") 254 if not is_baseline_run: 255 print(f"window median = {window['median']}") 256 print(f"bin_mode = {es}") 257 print(f"aggregation = {vk}") 258 print(f"metric = {metric}") 259 print(f"weighted = {weighted}") 260 if not is_baseline_run: 261 print(f"score = {score}") 262 print(f"scores = {scores}") 263 print(f"index = {index}") 264 265 title = f"{num_bins} {es} {vk} {metric}={score:5.3f} bin#={index} Weighted={weighted} {window['start']}" 266 267 if ( 268 len(baseline["aggregated_values"]) 269 == len(window["aggregated_values"]) 270 == len(baseline["edge_names"]) 271 ): 272 if vk == "Edges": 273 fig, ax = plt.subplots() 274 for n, v in enumerate(baseline["aggregated_values"]): 275 plt.axvline(x=v, color="blue", alpha=0.5) 276 plt.text(v, 0, f"e{n}", color="blue") 277 for n, v in enumerate(window["aggregated_values"]): 278 plt.axvline(x=v, color="orange", alpha=0.5) 279 plt.text(v, 0.1, f"e{n}", color="orange") 280 else: 281 fig, ax = plt.subplots() 282 283 last = "Min" 284 bin_begin = "[" 285 bin_end = ")" 286 edge_names = [] 287 for idx, (n, e) in enumerate( 288 zip(baseline["edge_names"], baseline["edges"]) 289 ): 290 if e is not None: 291 next = f"{e:.1E}" 292 name = f"{n}\n{bin_begin}{last}, {next}{bin_end}" 293 last = next 294 else: 295 name = f"{n}\n({last}, Max]" 296 edge_names.append(name) 297 if idx >= 1: 298 bin_begin = "(" 299 bin_end = "]" 300 301 bar1 = plt.bar( 302 edge_names, 303 baseline["aggregated_values"], 304 alpha=0.50, 305 label=f"Baseline ({baseline['count']})", 306 ) 307 if not is_baseline_run: 308 bar2 = plt.bar( 309 edge_names, 310 window["aggregated_values"], 311 alpha=0.50, 312 label=f"Window ({window['count']})", 313 ) 314 if len(edge_names) > 7: 315 ax.set_xticklabels(labels=edge_names, rotation=45) 316 317 if show_scores and not is_baseline_run: 318 for i, bar in enumerate(bar1.patches): 319 ax.annotate( 320 f"{scores[i]:.4f}", 321 (bar.get_x() + bar.get_width() / 2, bar.get_height()), 322 ha="center", 323 va="center", 324 size=9, 325 xytext=(0, 8), 326 textcoords="offset points", 327 ) 328 plt.legend() 329 ax.set_title(title) 330 plt.xticks(rotation=45) 331 plt.show() 332 else: 333 print(title) 334 print( 335 len(baseline["aggregated_values"]), 336 len(window["aggregated_values"]), 337 len(baseline["edge_names"]), 338 len(window["edge_names"]), 339 ) 340 print(baseline["aggregated_values"]) 341 print(window["aggregated_values"]) 342 print(baseline["edge_names"]) 343 print(window["edge_names"]) 344 return r 345 346 347class AssayAnalysisList(object): 348 """Helper class primarily to easily create a dataframe from a list 349 of AssayAnalysis objects.""" 350 351 def __init__(self, raw: List[AssayAnalysis]): 352 self.raw = raw 353 354 def __getitem__(self, index): 355 return self.raw[index] 356 357 def __len__(self): 358 return len(self.raw) 359 360 def to_dataframe(self) -> pd.DataFrame: 361 """Creates and returns a summary dataframe from the assay results.""" 362 return pd.DataFrame( 363 [ 364 { 365 "assay_id": a.assay_id, 366 "name": a.name, 367 "iopath": a.iopath, 368 "score": a.score, 369 "start": a.window_summary["start"], 370 "min": a.window_summary["min"], 371 "max": a.window_summary["max"], 372 "mean": a.window_summary["mean"], 373 "median": a.window_summary["median"], 374 "std": a.window_summary["std"], 375 "std": a.window_summary["std"], 376 "warning_threshold": a.warning_threshold, 377 "alert_threshold": a.alert_threshold, 378 "status": a.status, 379 } 380 for a in self.raw 381 ] 382 ) 383 384 def to_full_dataframe(self) -> pd.DataFrame: 385 """Creates and returns a dataframe with all values including inputs 386 and outputs from the assay results.""" 387 388 return dict_list_to_dataframe([a.raw for a in self.raw]) 389 390 def chart_df(self, df: Union[pd.DataFrame, pd.Series], title: str, nth_x_tick=None): 391 """Creates a basic chart of the scores from dataframe created from assay analysis list""" 392 393 if nth_x_tick is None: 394 if len(df) > 10: 395 nth_x_tick = len(df) / 10 396 else: 397 nth_x_tick = 1 398 399 plt.scatter(df.start, df.score, color=self.__pick_colors(df.status)) 400 plt.title(title) 401 402 old_ticks = plt.xticks()[0] 403 new_ticks = [t for i, t in enumerate(old_ticks) if i % nth_x_tick == 0] # type: ignore 404 plt.xticks(ticks=new_ticks, rotation=90) 405 406 plt.grid() 407 plt.show() 408 409 def chart_scores(self, title: Optional[str] = None, nth_x_tick=4): 410 """Creates a basic chart of the scores from an AssayAnalysisList""" 411 if title is None: 412 title = f"Model Insights Score" 413 ardf = self.to_dataframe() 414 if ardf.shape == (0, 0): 415 raise ValueError("No data in this AssayAnalysisList.") 416 417 self.chart_df(ardf, title, nth_x_tick=nth_x_tick) 418 419 def chart_iopaths( 420 self, 421 labels: Optional[List[str]] = None, 422 selected_labels: Optional[List[str]] = None, 423 nth_x_tick=None, 424 ): 425 """Creates a basic charts of the scores for each unique iopath of an AssayAnalysisList""" 426 427 iadf = self.to_dataframe() 428 if iadf.shape == (0, 0): 429 raise ValueError("No io paths in this AssayAnalysisList.") 430 431 for i, iopath in enumerate(iadf["iopath"].unique()): 432 if selected_labels is None or ( 433 labels is not None and labels[i] in selected_labels 434 ): 435 tempdf = iadf[iadf["iopath"] == iopath] 436 if labels: 437 label = ( 438 f"Model Insights Score on '{labels[i]}' ({iopath}) vs Baseline" 439 ) 440 else: 441 label = f"Model Insights Score on '{iopath}' vs Baseline" 442 443 self.chart_df(tempdf, label, nth_x_tick=nth_x_tick) 444 445 def __status_color(self, status: str): 446 if status == "Ok": 447 return "green" 448 elif status == "Warning": 449 return "orange" 450 else: 451 return "red" 452 453 def __pick_colors(self, s): 454 return [self.__status_color(status) for status in s] 455 456 457class Assays(List[Assay]): 458 """Wraps a list of assays for display in an HTML display-aware environment like Jupyter.""" 459 460 def _repr_html_(self) -> str: 461 def row(assay) -> str: 462 return ( 463 "<tr>" 464 + f"<td>{assay._name}</td>" 465 + f"<td>{assay._active}</td>" 466 + f"<td>{assay._status}</td>" 467 + f"<td>{assay._warning_threshold}</td>" 468 + f"<td>{assay._alert_threshold}</td>" 469 + f"<td>{assay._pipeline_name}</td>" 470 + "</tr>" 471 ) 472 473 fields = [ 474 "name", 475 "active", 476 "status", 477 "warning_threshold", 478 "alert_threshold", 479 "pipeline_name", 480 ] 481 482 if self == []: 483 return "(no assays)" 484 else: 485 return ( 486 "<table>" 487 + "<tr><th>" 488 + "</th><th>".join(fields) 489 + "</th></tr>" 490 + ("".join([row(assay) for assay in self])) 491 + "</table>" 492 )
20class Assay(Object): 21 """An Assay represents a record in the database. An assay contains 22 some high level attributes such as name, status, active, etc. as well 23 as the sub objects Baseline, Window and Summarizer which specify how 24 the Baseline is derived, how the Windows should be created and how the 25 analysis should be conducted.""" 26 27 def __init__(self, client: Optional["Client"], data: Dict[str, Any]) -> None: 28 self.client = client 29 assert client is not None 30 super().__init__(gql_client=client._gql_client, data=data) 31 32 def _fill(self, data: Dict[str, Any]) -> None: 33 for required_attribute in ["id"]: 34 if required_attribute not in data: 35 raise RequiredAttributeMissing( 36 self.__class__.__name__, required_attribute 37 ) 38 self._id = data["id"] 39 40 for k in [ 41 "active", 42 "status", 43 "name", 44 "warning_threshold", 45 "alert_threshold", 46 "pipeline_name", 47 ]: 48 if k in data: 49 setattr(self, f"_{k}", data[k]) 50 51 def _fetch_attributes(self) -> Dict[str, Any]: 52 return self._gql_client.execute( 53 gql.gql( 54 """ 55 query GetAssay($id: bigint) { 56 assay(where: {id: {_eq: $id}}) { 57 id 58 name 59 active 60 status 61 warning_threshold 62 alert_threshold 63 pipeline_name 64 } 65 } 66 """ 67 ), 68 variable_values={ 69 "id": self._id, 70 }, 71 )["assay"] 72 73 def turn_on(self): 74 """Sets the Assay to active causing it to run and backfill any 75 missing analysis.""" 76 77 ret = assays_set_active.sync( 78 client=self.client.mlops(), 79 json_body=AssaysSetActiveJsonBody(self._id, True), 80 ) 81 self._active = True 82 return ret 83 84 def turn_off(self): 85 """Disables the Assay. No further analysis will be conducted until the assay 86 is enabled.""" 87 ret = assays_set_active.sync( 88 client=self.client.mlops(), 89 json_body=AssaysSetActiveJsonBody(self._id, False), 90 ) 91 self._active = False 92 return ret 93 94 def set_alert_threshold(self, threshold: float): 95 """Sets the alert threshold at the specified level. The status in the AssayAnalysis 96 will show if this level is exceeded however currently alerting/notifications are 97 not implemented.""" 98 res = self._gql_client.execute( 99 gql.gql( 100 """ 101 mutation SetActive($id: bigint!, $alert_threshold: Float!) { 102 update_assay_by_pk(pk_columns: {id: $id}, _set: {alert_threshold: $alert_threshold}) { 103 id 104 active 105 } 106 } 107 """ 108 ), 109 variable_values={"id": self._id, "alert_threshold": threshold}, 110 )["update_assay_by_pk"] 111 self._alert_threshold = threshold 112 return res 113 114 def set_warning_threshold(self, threshold: float): 115 """Sets the warning threshold at the specified level. The status in the AssayAnalysis 116 will show if this level is exceeded however currently alerting/notifications are 117 not implemented.""" 118 119 res = self._gql_client.execute( 120 gql.gql( 121 """ 122 mutation SetActive($id: bigint!, $warning_threshold: Float!) { 123 update_assay_by_pk(pk_columns: {id: $id}, _set: {warning_threshold: $warning_threshold}) { 124 id 125 active 126 } 127 } 128 """ 129 ), 130 variable_values={"id": self._id, "warning_threshold": threshold}, 131 )["update_assay_by_pk"] 132 self._warning_threshold = threshold 133 return res
An Assay represents a record in the database. An assay contains some high level attributes such as name, status, active, etc. as well as the sub objects Baseline, Window and Summarizer which specify how the Baseline is derived, how the Windows should be created and how the analysis should be conducted.
27 def __init__(self, client: Optional["Client"], data: Dict[str, Any]) -> None: 28 self.client = client 29 assert client is not None 30 super().__init__(gql_client=client._gql_client, data=data)
Base constructor.
Each object requires:
- a GraphQL client - in order to fill its missing members dynamically
- an initial data blob - typically from unserialized JSON, contains at
- least the data for required members (typically the object's primary key) and optionally other data members.
73 def turn_on(self): 74 """Sets the Assay to active causing it to run and backfill any 75 missing analysis.""" 76 77 ret = assays_set_active.sync( 78 client=self.client.mlops(), 79 json_body=AssaysSetActiveJsonBody(self._id, True), 80 ) 81 self._active = True 82 return ret
Sets the Assay to active causing it to run and backfill any missing analysis.
84 def turn_off(self): 85 """Disables the Assay. No further analysis will be conducted until the assay 86 is enabled.""" 87 ret = assays_set_active.sync( 88 client=self.client.mlops(), 89 json_body=AssaysSetActiveJsonBody(self._id, False), 90 ) 91 self._active = False 92 return ret
Disables the Assay. No further analysis will be conducted until the assay is enabled.
94 def set_alert_threshold(self, threshold: float): 95 """Sets the alert threshold at the specified level. The status in the AssayAnalysis 96 will show if this level is exceeded however currently alerting/notifications are 97 not implemented.""" 98 res = self._gql_client.execute( 99 gql.gql( 100 """ 101 mutation SetActive($id: bigint!, $alert_threshold: Float!) { 102 update_assay_by_pk(pk_columns: {id: $id}, _set: {alert_threshold: $alert_threshold}) { 103 id 104 active 105 } 106 } 107 """ 108 ), 109 variable_values={"id": self._id, "alert_threshold": threshold}, 110 )["update_assay_by_pk"] 111 self._alert_threshold = threshold 112 return res
Sets the alert threshold at the specified level. The status in the AssayAnalysis will show if this level is exceeded however currently alerting/notifications are not implemented.
114 def set_warning_threshold(self, threshold: float): 115 """Sets the warning threshold at the specified level. The status in the AssayAnalysis 116 will show if this level is exceeded however currently alerting/notifications are 117 not implemented.""" 118 119 res = self._gql_client.execute( 120 gql.gql( 121 """ 122 mutation SetActive($id: bigint!, $warning_threshold: Float!) { 123 update_assay_by_pk(pk_columns: {id: $id}, _set: {warning_threshold: $warning_threshold}) { 124 id 125 active 126 } 127 } 128 """ 129 ), 130 variable_values={"id": self._id, "warning_threshold": threshold}, 131 )["update_assay_by_pk"] 132 self._warning_threshold = threshold 133 return res
Sets the warning threshold at the specified level. The status in the AssayAnalysis will show if this level is exceeded however currently alerting/notifications are not implemented.
136def meta_df(assay_result: Dict, index_name) -> pd.DataFrame: 137 """Creates a dataframe for the meta data in the baseline or window excluding the 138 edge information. 139 :param assay_result: The dict of the raw asset result""" 140 return pd.DataFrame( 141 { 142 k: [assay_result[k]] 143 for k in assay_result.keys() 144 if k not in ["edges", "edge_names", "aggregated_values", "aggregation"] 145 }, 146 index=[index_name], 147 )
Creates a dataframe for the meta data in the baseline or window excluding the edge information.
Parameters
- assay_result: The dict of the raw asset result
150def edge_df(window_or_baseline: Dict) -> pd.DataFrame: 151 """Creates a dataframe specifically for the edge information in the baseline or window. 152 :param window_or_baseline: The dict from the assay result of either the window or baseline""" 153 154 data = { 155 k: window_or_baseline[k] 156 for k in ["edges", "edge_names", "aggregated_values", "aggregation"] 157 } 158 return pd.DataFrame(data)
Creates a dataframe specifically for the edge information in the baseline or window.
Parameters
- window_or_baseline: The dict from the assay result of either the window or baseline
161class AssayAnalysis(object): 162 """The AssayAnalysis class helps handle the assay analysis logs from the Plateau 163 logs. These logs are a json document with meta information on the assay and analysis 164 as well as summary information on the baseline and window and information on the comparison 165 between them.""" 166 167 def __init__(self, raw: Dict[str, Any]): 168 self.assay_id = 0 169 self.name = "" 170 self.raw = raw 171 self.iopath = "" 172 self.score = 0.0 173 self.status = "" 174 self.alert_threshold = None 175 self.warning_threshold = None 176 self.window_summary: Dict[str, Any] = {} 177 for k, v in raw.items(): 178 setattr(self, k, v) 179 180 def compare_basic_stats(self) -> pd.DataFrame: 181 """Creates a simple dataframe making it easy to compare a baseline and window.""" 182 r = self.raw 183 baseline = r["baseline_summary"] 184 window = r["window_summary"] 185 186 bs_df = meta_df(baseline, "Baseline") 187 ws_df = meta_df(window, "Window") 188 df = pd.concat([bs_df, ws_df]) 189 190 text_cols = ["start", "end"] 191 tdf = df[text_cols] 192 df = df.drop(text_cols, axis=1) 193 194 df.loc["diff"] = df.loc["Window"] - df.loc["Baseline"] 195 df.loc["pct_diff"] = df.loc["diff"] / df.loc["Baseline"] * 100.0 196 return pd.concat([df.T, tdf.T]) 197 198 def baseline_stats(self) -> pd.DataFrame: 199 """Creates a simple dataframe with the basic stats data for a baseline.""" 200 r = self.raw 201 baseline = r["baseline_summary"] 202 bs_df = meta_df(baseline, "Baseline") 203 return bs_df.T 204 205 def compare_bins(self) -> pd.DataFrame: 206 """Creates a simple dataframe to compare the bin/edge information of baseline and window.""" 207 r = self.raw 208 is_baseline_run = r["status"] == "BaselineRun" 209 210 baseline = r["baseline_summary"] 211 window = r["window_summary"] 212 bs_df = edge_df(baseline) 213 ws_df = edge_df(window) 214 bs_df.columns = [f"b_{c}" for c in bs_df.columns] # type: ignore 215 ws_df.columns = [f"w_{c}" for c in ws_df.columns] # type: ignore 216 if is_baseline_run: 217 df = bs_df 218 else: 219 df = pd.concat([bs_df, ws_df], axis=1) 220 df["diff_in_pcts"] = df["w_aggregated_values"] - df["b_aggregated_values"] 221 return df 222 223 def baseline_bins(self) -> pd.DataFrame: 224 """Creates a simple dataframe to with the edge/bin data for a baseline.""" 225 r = self.raw 226 227 baseline = r["baseline_summary"] 228 bs_df = edge_df(baseline) 229 bs_df.columns = [f"b_{c}" for c in bs_df.columns] # type: ignore 230 return bs_df.fillna(np.inf) 231 232 def chart(self, show_scores=True): 233 """Quickly create a chart showing the bins, values and scores of an assay analysis. 234 show_scores will also label each bin with its final weighted (if specified) score. 235 """ 236 r = self.raw 237 is_baseline_run = r["status"] == "BaselineRun" 238 baseline = r["baseline_summary"] 239 window = r["window_summary"] 240 241 summarizer = r["summarizer"] 242 es = summarizer["bin_mode"] 243 vk = baseline["aggregation"] 244 metric = summarizer["metric"] 245 num_bins = summarizer["num_bins"] 246 weighted = True if summarizer["bin_weights"] is not None else False 247 score = r["score"] 248 scores = r["scores"] 249 index = r["bin_index"] 250 251 print(f"baseline mean = {baseline['mean']}") 252 if not is_baseline_run: 253 print(f"window mean = {window['mean']}") 254 print(f"baseline median = {baseline['median']}") 255 if not is_baseline_run: 256 print(f"window median = {window['median']}") 257 print(f"bin_mode = {es}") 258 print(f"aggregation = {vk}") 259 print(f"metric = {metric}") 260 print(f"weighted = {weighted}") 261 if not is_baseline_run: 262 print(f"score = {score}") 263 print(f"scores = {scores}") 264 print(f"index = {index}") 265 266 title = f"{num_bins} {es} {vk} {metric}={score:5.3f} bin#={index} Weighted={weighted} {window['start']}" 267 268 if ( 269 len(baseline["aggregated_values"]) 270 == len(window["aggregated_values"]) 271 == len(baseline["edge_names"]) 272 ): 273 if vk == "Edges": 274 fig, ax = plt.subplots() 275 for n, v in enumerate(baseline["aggregated_values"]): 276 plt.axvline(x=v, color="blue", alpha=0.5) 277 plt.text(v, 0, f"e{n}", color="blue") 278 for n, v in enumerate(window["aggregated_values"]): 279 plt.axvline(x=v, color="orange", alpha=0.5) 280 plt.text(v, 0.1, f"e{n}", color="orange") 281 else: 282 fig, ax = plt.subplots() 283 284 last = "Min" 285 bin_begin = "[" 286 bin_end = ")" 287 edge_names = [] 288 for idx, (n, e) in enumerate( 289 zip(baseline["edge_names"], baseline["edges"]) 290 ): 291 if e is not None: 292 next = f"{e:.1E}" 293 name = f"{n}\n{bin_begin}{last}, {next}{bin_end}" 294 last = next 295 else: 296 name = f"{n}\n({last}, Max]" 297 edge_names.append(name) 298 if idx >= 1: 299 bin_begin = "(" 300 bin_end = "]" 301 302 bar1 = plt.bar( 303 edge_names, 304 baseline["aggregated_values"], 305 alpha=0.50, 306 label=f"Baseline ({baseline['count']})", 307 ) 308 if not is_baseline_run: 309 bar2 = plt.bar( 310 edge_names, 311 window["aggregated_values"], 312 alpha=0.50, 313 label=f"Window ({window['count']})", 314 ) 315 if len(edge_names) > 7: 316 ax.set_xticklabels(labels=edge_names, rotation=45) 317 318 if show_scores and not is_baseline_run: 319 for i, bar in enumerate(bar1.patches): 320 ax.annotate( 321 f"{scores[i]:.4f}", 322 (bar.get_x() + bar.get_width() / 2, bar.get_height()), 323 ha="center", 324 va="center", 325 size=9, 326 xytext=(0, 8), 327 textcoords="offset points", 328 ) 329 plt.legend() 330 ax.set_title(title) 331 plt.xticks(rotation=45) 332 plt.show() 333 else: 334 print(title) 335 print( 336 len(baseline["aggregated_values"]), 337 len(window["aggregated_values"]), 338 len(baseline["edge_names"]), 339 len(window["edge_names"]), 340 ) 341 print(baseline["aggregated_values"]) 342 print(window["aggregated_values"]) 343 print(baseline["edge_names"]) 344 print(window["edge_names"]) 345 return r
The AssayAnalysis class helps handle the assay analysis logs from the Plateau logs. These logs are a json document with meta information on the assay and analysis as well as summary information on the baseline and window and information on the comparison between them.
167 def __init__(self, raw: Dict[str, Any]): 168 self.assay_id = 0 169 self.name = "" 170 self.raw = raw 171 self.iopath = "" 172 self.score = 0.0 173 self.status = "" 174 self.alert_threshold = None 175 self.warning_threshold = None 176 self.window_summary: Dict[str, Any] = {} 177 for k, v in raw.items(): 178 setattr(self, k, v)
180 def compare_basic_stats(self) -> pd.DataFrame: 181 """Creates a simple dataframe making it easy to compare a baseline and window.""" 182 r = self.raw 183 baseline = r["baseline_summary"] 184 window = r["window_summary"] 185 186 bs_df = meta_df(baseline, "Baseline") 187 ws_df = meta_df(window, "Window") 188 df = pd.concat([bs_df, ws_df]) 189 190 text_cols = ["start", "end"] 191 tdf = df[text_cols] 192 df = df.drop(text_cols, axis=1) 193 194 df.loc["diff"] = df.loc["Window"] - df.loc["Baseline"] 195 df.loc["pct_diff"] = df.loc["diff"] / df.loc["Baseline"] * 100.0 196 return pd.concat([df.T, tdf.T])
Creates a simple dataframe making it easy to compare a baseline and window.
198 def baseline_stats(self) -> pd.DataFrame: 199 """Creates a simple dataframe with the basic stats data for a baseline.""" 200 r = self.raw 201 baseline = r["baseline_summary"] 202 bs_df = meta_df(baseline, "Baseline") 203 return bs_df.T
Creates a simple dataframe with the basic stats data for a baseline.
205 def compare_bins(self) -> pd.DataFrame: 206 """Creates a simple dataframe to compare the bin/edge information of baseline and window.""" 207 r = self.raw 208 is_baseline_run = r["status"] == "BaselineRun" 209 210 baseline = r["baseline_summary"] 211 window = r["window_summary"] 212 bs_df = edge_df(baseline) 213 ws_df = edge_df(window) 214 bs_df.columns = [f"b_{c}" for c in bs_df.columns] # type: ignore 215 ws_df.columns = [f"w_{c}" for c in ws_df.columns] # type: ignore 216 if is_baseline_run: 217 df = bs_df 218 else: 219 df = pd.concat([bs_df, ws_df], axis=1) 220 df["diff_in_pcts"] = df["w_aggregated_values"] - df["b_aggregated_values"] 221 return df
Creates a simple dataframe to compare the bin/edge information of baseline and window.
223 def baseline_bins(self) -> pd.DataFrame: 224 """Creates a simple dataframe to with the edge/bin data for a baseline.""" 225 r = self.raw 226 227 baseline = r["baseline_summary"] 228 bs_df = edge_df(baseline) 229 bs_df.columns = [f"b_{c}" for c in bs_df.columns] # type: ignore 230 return bs_df.fillna(np.inf)
Creates a simple dataframe to with the edge/bin data for a baseline.
232 def chart(self, show_scores=True): 233 """Quickly create a chart showing the bins, values and scores of an assay analysis. 234 show_scores will also label each bin with its final weighted (if specified) score. 235 """ 236 r = self.raw 237 is_baseline_run = r["status"] == "BaselineRun" 238 baseline = r["baseline_summary"] 239 window = r["window_summary"] 240 241 summarizer = r["summarizer"] 242 es = summarizer["bin_mode"] 243 vk = baseline["aggregation"] 244 metric = summarizer["metric"] 245 num_bins = summarizer["num_bins"] 246 weighted = True if summarizer["bin_weights"] is not None else False 247 score = r["score"] 248 scores = r["scores"] 249 index = r["bin_index"] 250 251 print(f"baseline mean = {baseline['mean']}") 252 if not is_baseline_run: 253 print(f"window mean = {window['mean']}") 254 print(f"baseline median = {baseline['median']}") 255 if not is_baseline_run: 256 print(f"window median = {window['median']}") 257 print(f"bin_mode = {es}") 258 print(f"aggregation = {vk}") 259 print(f"metric = {metric}") 260 print(f"weighted = {weighted}") 261 if not is_baseline_run: 262 print(f"score = {score}") 263 print(f"scores = {scores}") 264 print(f"index = {index}") 265 266 title = f"{num_bins} {es} {vk} {metric}={score:5.3f} bin#={index} Weighted={weighted} {window['start']}" 267 268 if ( 269 len(baseline["aggregated_values"]) 270 == len(window["aggregated_values"]) 271 == len(baseline["edge_names"]) 272 ): 273 if vk == "Edges": 274 fig, ax = plt.subplots() 275 for n, v in enumerate(baseline["aggregated_values"]): 276 plt.axvline(x=v, color="blue", alpha=0.5) 277 plt.text(v, 0, f"e{n}", color="blue") 278 for n, v in enumerate(window["aggregated_values"]): 279 plt.axvline(x=v, color="orange", alpha=0.5) 280 plt.text(v, 0.1, f"e{n}", color="orange") 281 else: 282 fig, ax = plt.subplots() 283 284 last = "Min" 285 bin_begin = "[" 286 bin_end = ")" 287 edge_names = [] 288 for idx, (n, e) in enumerate( 289 zip(baseline["edge_names"], baseline["edges"]) 290 ): 291 if e is not None: 292 next = f"{e:.1E}" 293 name = f"{n}\n{bin_begin}{last}, {next}{bin_end}" 294 last = next 295 else: 296 name = f"{n}\n({last}, Max]" 297 edge_names.append(name) 298 if idx >= 1: 299 bin_begin = "(" 300 bin_end = "]" 301 302 bar1 = plt.bar( 303 edge_names, 304 baseline["aggregated_values"], 305 alpha=0.50, 306 label=f"Baseline ({baseline['count']})", 307 ) 308 if not is_baseline_run: 309 bar2 = plt.bar( 310 edge_names, 311 window["aggregated_values"], 312 alpha=0.50, 313 label=f"Window ({window['count']})", 314 ) 315 if len(edge_names) > 7: 316 ax.set_xticklabels(labels=edge_names, rotation=45) 317 318 if show_scores and not is_baseline_run: 319 for i, bar in enumerate(bar1.patches): 320 ax.annotate( 321 f"{scores[i]:.4f}", 322 (bar.get_x() + bar.get_width() / 2, bar.get_height()), 323 ha="center", 324 va="center", 325 size=9, 326 xytext=(0, 8), 327 textcoords="offset points", 328 ) 329 plt.legend() 330 ax.set_title(title) 331 plt.xticks(rotation=45) 332 plt.show() 333 else: 334 print(title) 335 print( 336 len(baseline["aggregated_values"]), 337 len(window["aggregated_values"]), 338 len(baseline["edge_names"]), 339 len(window["edge_names"]), 340 ) 341 print(baseline["aggregated_values"]) 342 print(window["aggregated_values"]) 343 print(baseline["edge_names"]) 344 print(window["edge_names"]) 345 return r
Quickly create a chart showing the bins, values and scores of an assay analysis. show_scores will also label each bin with its final weighted (if specified) score.
348class AssayAnalysisList(object): 349 """Helper class primarily to easily create a dataframe from a list 350 of AssayAnalysis objects.""" 351 352 def __init__(self, raw: List[AssayAnalysis]): 353 self.raw = raw 354 355 def __getitem__(self, index): 356 return self.raw[index] 357 358 def __len__(self): 359 return len(self.raw) 360 361 def to_dataframe(self) -> pd.DataFrame: 362 """Creates and returns a summary dataframe from the assay results.""" 363 return pd.DataFrame( 364 [ 365 { 366 "assay_id": a.assay_id, 367 "name": a.name, 368 "iopath": a.iopath, 369 "score": a.score, 370 "start": a.window_summary["start"], 371 "min": a.window_summary["min"], 372 "max": a.window_summary["max"], 373 "mean": a.window_summary["mean"], 374 "median": a.window_summary["median"], 375 "std": a.window_summary["std"], 376 "std": a.window_summary["std"], 377 "warning_threshold": a.warning_threshold, 378 "alert_threshold": a.alert_threshold, 379 "status": a.status, 380 } 381 for a in self.raw 382 ] 383 ) 384 385 def to_full_dataframe(self) -> pd.DataFrame: 386 """Creates and returns a dataframe with all values including inputs 387 and outputs from the assay results.""" 388 389 return dict_list_to_dataframe([a.raw for a in self.raw]) 390 391 def chart_df(self, df: Union[pd.DataFrame, pd.Series], title: str, nth_x_tick=None): 392 """Creates a basic chart of the scores from dataframe created from assay analysis list""" 393 394 if nth_x_tick is None: 395 if len(df) > 10: 396 nth_x_tick = len(df) / 10 397 else: 398 nth_x_tick = 1 399 400 plt.scatter(df.start, df.score, color=self.__pick_colors(df.status)) 401 plt.title(title) 402 403 old_ticks = plt.xticks()[0] 404 new_ticks = [t for i, t in enumerate(old_ticks) if i % nth_x_tick == 0] # type: ignore 405 plt.xticks(ticks=new_ticks, rotation=90) 406 407 plt.grid() 408 plt.show() 409 410 def chart_scores(self, title: Optional[str] = None, nth_x_tick=4): 411 """Creates a basic chart of the scores from an AssayAnalysisList""" 412 if title is None: 413 title = f"Model Insights Score" 414 ardf = self.to_dataframe() 415 if ardf.shape == (0, 0): 416 raise ValueError("No data in this AssayAnalysisList.") 417 418 self.chart_df(ardf, title, nth_x_tick=nth_x_tick) 419 420 def chart_iopaths( 421 self, 422 labels: Optional[List[str]] = None, 423 selected_labels: Optional[List[str]] = None, 424 nth_x_tick=None, 425 ): 426 """Creates a basic charts of the scores for each unique iopath of an AssayAnalysisList""" 427 428 iadf = self.to_dataframe() 429 if iadf.shape == (0, 0): 430 raise ValueError("No io paths in this AssayAnalysisList.") 431 432 for i, iopath in enumerate(iadf["iopath"].unique()): 433 if selected_labels is None or ( 434 labels is not None and labels[i] in selected_labels 435 ): 436 tempdf = iadf[iadf["iopath"] == iopath] 437 if labels: 438 label = ( 439 f"Model Insights Score on '{labels[i]}' ({iopath}) vs Baseline" 440 ) 441 else: 442 label = f"Model Insights Score on '{iopath}' vs Baseline" 443 444 self.chart_df(tempdf, label, nth_x_tick=nth_x_tick) 445 446 def __status_color(self, status: str): 447 if status == "Ok": 448 return "green" 449 elif status == "Warning": 450 return "orange" 451 else: 452 return "red" 453 454 def __pick_colors(self, s): 455 return [self.__status_color(status) for status in s]
Helper class primarily to easily create a dataframe from a list of AssayAnalysis objects.
361 def to_dataframe(self) -> pd.DataFrame: 362 """Creates and returns a summary dataframe from the assay results.""" 363 return pd.DataFrame( 364 [ 365 { 366 "assay_id": a.assay_id, 367 "name": a.name, 368 "iopath": a.iopath, 369 "score": a.score, 370 "start": a.window_summary["start"], 371 "min": a.window_summary["min"], 372 "max": a.window_summary["max"], 373 "mean": a.window_summary["mean"], 374 "median": a.window_summary["median"], 375 "std": a.window_summary["std"], 376 "std": a.window_summary["std"], 377 "warning_threshold": a.warning_threshold, 378 "alert_threshold": a.alert_threshold, 379 "status": a.status, 380 } 381 for a in self.raw 382 ] 383 )
Creates and returns a summary dataframe from the assay results.
385 def to_full_dataframe(self) -> pd.DataFrame: 386 """Creates and returns a dataframe with all values including inputs 387 and outputs from the assay results.""" 388 389 return dict_list_to_dataframe([a.raw for a in self.raw])
Creates and returns a dataframe with all values including inputs and outputs from the assay results.
391 def chart_df(self, df: Union[pd.DataFrame, pd.Series], title: str, nth_x_tick=None): 392 """Creates a basic chart of the scores from dataframe created from assay analysis list""" 393 394 if nth_x_tick is None: 395 if len(df) > 10: 396 nth_x_tick = len(df) / 10 397 else: 398 nth_x_tick = 1 399 400 plt.scatter(df.start, df.score, color=self.__pick_colors(df.status)) 401 plt.title(title) 402 403 old_ticks = plt.xticks()[0] 404 new_ticks = [t for i, t in enumerate(old_ticks) if i % nth_x_tick == 0] # type: ignore 405 plt.xticks(ticks=new_ticks, rotation=90) 406 407 plt.grid() 408 plt.show()
Creates a basic chart of the scores from dataframe created from assay analysis list
410 def chart_scores(self, title: Optional[str] = None, nth_x_tick=4): 411 """Creates a basic chart of the scores from an AssayAnalysisList""" 412 if title is None: 413 title = f"Model Insights Score" 414 ardf = self.to_dataframe() 415 if ardf.shape == (0, 0): 416 raise ValueError("No data in this AssayAnalysisList.") 417 418 self.chart_df(ardf, title, nth_x_tick=nth_x_tick)
Creates a basic chart of the scores from an AssayAnalysisList
420 def chart_iopaths( 421 self, 422 labels: Optional[List[str]] = None, 423 selected_labels: Optional[List[str]] = None, 424 nth_x_tick=None, 425 ): 426 """Creates a basic charts of the scores for each unique iopath of an AssayAnalysisList""" 427 428 iadf = self.to_dataframe() 429 if iadf.shape == (0, 0): 430 raise ValueError("No io paths in this AssayAnalysisList.") 431 432 for i, iopath in enumerate(iadf["iopath"].unique()): 433 if selected_labels is None or ( 434 labels is not None and labels[i] in selected_labels 435 ): 436 tempdf = iadf[iadf["iopath"] == iopath] 437 if labels: 438 label = ( 439 f"Model Insights Score on '{labels[i]}' ({iopath}) vs Baseline" 440 ) 441 else: 442 label = f"Model Insights Score on '{iopath}' vs Baseline" 443 444 self.chart_df(tempdf, label, nth_x_tick=nth_x_tick)
Creates a basic charts of the scores for each unique iopath of an AssayAnalysisList
458class Assays(List[Assay]): 459 """Wraps a list of assays for display in an HTML display-aware environment like Jupyter.""" 460 461 def _repr_html_(self) -> str: 462 def row(assay) -> str: 463 return ( 464 "<tr>" 465 + f"<td>{assay._name}</td>" 466 + f"<td>{assay._active}</td>" 467 + f"<td>{assay._status}</td>" 468 + f"<td>{assay._warning_threshold}</td>" 469 + f"<td>{assay._alert_threshold}</td>" 470 + f"<td>{assay._pipeline_name}</td>" 471 + "</tr>" 472 ) 473 474 fields = [ 475 "name", 476 "active", 477 "status", 478 "warning_threshold", 479 "alert_threshold", 480 "pipeline_name", 481 ] 482 483 if self == []: 484 return "(no assays)" 485 else: 486 return ( 487 "<table>" 488 + "<tr><th>" 489 + "</th><th>".join(fields) 490 + "</th></tr>" 491 + ("".join([row(assay) for assay in self])) 492 + "</table>" 493 )
Wraps a list of assays for display in an HTML display-aware environment like Jupyter.
Inherited Members
- builtins.list
- list
- clear
- copy
- append
- insert
- extend
- pop
- remove
- index
- count
- reverse
- sort