wallaroo.assay

  1from typing import TYPE_CHECKING, Any, Dict, List, Optional
  2
  3import gql  # type: ignore
  4import matplotlib.pyplot as plt
  5import numpy as np
  6import pandas as pd
  7
  8from .inference_decode import dict_list_to_dataframe
  9from .object import *
 10from .wallaroo_ml_ops_api_client.api.assay import assays_set_active
 11from .wallaroo_ml_ops_api_client.models.assays_set_active_json_body import (
 12    AssaysSetActiveJsonBody,
 13)
 14
 15if TYPE_CHECKING:
 16    from .client import Client
 17
 18
 19class Assay(Object):
 20    """An Assay represents a record in the database. An assay contains
 21    some high level attributes such as name, status, active, etc. as well
 22    as the sub objects Baseline, Window and Summarizer which specify how
 23    the Baseline is derived, how the Windows should be created and how the
 24    analysis should be conducted."""
 25
 26    def __init__(self, client: Optional["Client"], data: Dict[str, Any]) -> None:
 27        self.client = client
 28        assert client is not None
 29        super().__init__(gql_client=client._gql_client, data=data)
 30
 31    def _fill(self, data: Dict[str, Any]) -> None:
 32        for required_attribute in ["id"]:
 33            if required_attribute not in data:
 34                raise RequiredAttributeMissing(
 35                    self.__class__.__name__, required_attribute
 36                )
 37        self._id = data["id"]
 38
 39        for k in [
 40            "active",
 41            "status",
 42            "name",
 43            "warning_threshold",
 44            "alert_threshold",
 45            "pipeline_name",
 46        ]:
 47            if k in data:
 48                setattr(self, f"_{k}", data[k])
 49
 50    def _fetch_attributes(self) -> Dict[str, Any]:
 51        return self._gql_client.execute(
 52            gql.gql(
 53                """
 54            query GetAssay($id: bigint) {
 55              assay(where: {id: {_eq: $id}}) {
 56                id
 57                name
 58                active
 59                status
 60                warning_threshold
 61                alert_threshold
 62                pipeline_name
 63              }
 64            }
 65            """
 66            ),
 67            variable_values={
 68                "id": self._id,
 69            },
 70        )["assay"]
 71
 72    def turn_on(self):
 73        """Sets the Assay to active causing it to run and backfill any
 74        missing analysis."""
 75
 76        ret = assays_set_active.sync(
 77            client=self.client.mlops(),
 78            json_body=AssaysSetActiveJsonBody(self._id, True),
 79        )
 80        self._active = True
 81        return ret
 82
 83    def turn_off(self):
 84        """Disables the Assay. No further analysis will be conducted until the assay
 85        is enabled."""
 86        ret = assays_set_active.sync(
 87            client=self.client.mlops(),
 88            json_body=AssaysSetActiveJsonBody(self._id, False),
 89        )
 90        self._active = False
 91        return ret
 92
 93    def set_alert_threshold(self, threshold: float):
 94        """Sets the alert threshold at the specified level. The status in the AssayAnalysis
 95        will show if this level is exceeded however currently alerting/notifications are
 96        not implemented."""
 97        res = self._gql_client.execute(
 98            gql.gql(
 99                """
100            mutation SetActive($id: bigint!, $alert_threshold: Float!) {
101                update_assay_by_pk(pk_columns: {id: $id}, _set: {alert_threshold: $alert_threshold}) {
102                    id
103                    active
104                }
105            }
106            """
107            ),
108            variable_values={"id": self._id, "alert_threshold": threshold},
109        )["update_assay_by_pk"]
110        self._alert_threshold = threshold
111        return res
112
113    def set_warning_threshold(self, threshold: float):
114        """Sets the warning threshold at the specified level. The status in the AssayAnalysis
115        will show if this level is exceeded however currently alerting/notifications are
116        not implemented."""
117
118        res = self._gql_client.execute(
119            gql.gql(
120                """
121            mutation SetActive($id: bigint!, $warning_threshold: Float!) {
122                update_assay_by_pk(pk_columns: {id: $id}, _set: {warning_threshold: $warning_threshold}) {
123                    id
124                    active
125                }
126            }
127            """
128            ),
129            variable_values={"id": self._id, "warning_threshold": threshold},
130        )["update_assay_by_pk"]
131        self._warning_threshold = threshold
132        return res
133
134
135def meta_df(assay_result: Dict, index_name) -> pd.DataFrame:
136    """Creates a dataframe for the meta data in the baseline or window excluding the
137    edge information.
138    :param assay_result: The dict of the raw asset result"""
139    return pd.DataFrame(
140        {
141            k: [assay_result[k]]
142            for k in assay_result.keys()
143            if k not in ["edges", "edge_names", "aggregated_values", "aggregation"]
144        },
145        index=[index_name],
146    )
147
148
149def edge_df(window_or_baseline: Dict) -> pd.DataFrame:
150    """Creates a dataframe specifically for the edge information in the baseline or window.
151    :param window_or_baseline: The dict from the assay result of either the window or baseline"""
152
153    data = {
154        k: window_or_baseline[k]
155        for k in ["edges", "edge_names", "aggregated_values", "aggregation"]
156    }
157    return pd.DataFrame(data)
158
159
160class AssayAnalysis(object):
161    """The AssayAnalysis class helps handle the assay analysis logs from the Plateau
162    logs.  These logs are a json document with meta information on the assay and analysis
163    as well as summary information on the baseline and window and information on the comparison
164    between them."""
165
166    def __init__(self, raw: Dict[str, Any]):
167        self.assay_id = 0
168        self.name = ""
169        self.raw = raw
170        self.iopath = ""
171        self.score = 0.0
172        self.status = ""
173        self.alert_threshold = None
174        self.warning_threshold = None
175        self.window_summary: Dict[str, Any] = {}
176        for k, v in raw.items():
177            setattr(self, k, v)
178
179    def compare_basic_stats(self) -> pd.DataFrame:
180        """Creates a simple dataframe making it easy to compare a baseline and window."""
181        r = self.raw
182        baseline = r["baseline_summary"]
183        window = r["window_summary"]
184
185        bs_df = meta_df(baseline, "Baseline")
186        ws_df = meta_df(window, "Window")
187        df = pd.concat([bs_df, ws_df])
188
189        text_cols = ["start", "end"]
190        tdf = df[text_cols]
191        df = df.drop(text_cols, axis=1)
192
193        df.loc["diff"] = df.loc["Window"] - df.loc["Baseline"]
194        df.loc["pct_diff"] = df.loc["diff"] / df.loc["Baseline"] * 100.0
195        return pd.concat([df.T, tdf.T])
196
197    def baseline_stats(self) -> pd.DataFrame:
198        """Creates a simple dataframe with the basic stats data for a baseline."""
199        r = self.raw
200        baseline = r["baseline_summary"]
201        bs_df = meta_df(baseline, "Baseline")
202        return bs_df.T
203
204    def compare_bins(self) -> pd.DataFrame:
205        """Creates a simple dataframe to compare the bin/edge information of baseline and window."""
206        r = self.raw
207        is_baseline_run = r["status"] == "BaselineRun"
208
209        baseline = r["baseline_summary"]
210        window = r["window_summary"]
211        bs_df = edge_df(baseline)
212        ws_df = edge_df(window)
213        bs_df.columns = [f"b_{c}" for c in bs_df.columns]  # type: ignore
214        ws_df.columns = [f"w_{c}" for c in ws_df.columns]  # type: ignore
215        if is_baseline_run:
216            df = bs_df
217        else:
218            df = pd.concat([bs_df, ws_df], axis=1)
219            df["diff_in_pcts"] = df["w_aggregated_values"] - df["b_aggregated_values"]
220        return df
221
222    def baseline_bins(self) -> pd.DataFrame:
223        """Creates a simple dataframe to with the edge/bin data for a baseline."""
224        r = self.raw
225
226        baseline = r["baseline_summary"]
227        bs_df = edge_df(baseline)
228        bs_df.columns = [f"b_{c}" for c in bs_df.columns]  # type: ignore
229        return bs_df.fillna(np.inf)
230
231    def chart(self, show_scores=True):
232        """Quickly create a chart showing the bins, values and scores of an assay analysis.
233        show_scores will also label each bin with its final weighted (if specified) score.
234        """
235        r = self.raw
236        is_baseline_run = r["status"] == "BaselineRun"
237        baseline = r["baseline_summary"]
238        window = r["window_summary"]
239
240        summarizer = r["summarizer"]
241        es = summarizer["bin_mode"]
242        vk = baseline["aggregation"]
243        metric = summarizer["metric"]
244        num_bins = summarizer["num_bins"]
245        weighted = True if summarizer["bin_weights"] is not None else False
246        score = r["score"]
247        scores = r["scores"]
248        index = r["bin_index"]
249
250        print(f"baseline mean = {baseline['mean']}")
251        if not is_baseline_run:
252            print(f"window mean = {window['mean']}")
253        print(f"baseline median = {baseline['median']}")
254        if not is_baseline_run:
255            print(f"window median = {window['median']}")
256        print(f"bin_mode = {es}")
257        print(f"aggregation = {vk}")
258        print(f"metric = {metric}")
259        print(f"weighted = {weighted}")
260        if not is_baseline_run:
261            print(f"score = {score}")
262            print(f"scores = {scores}")
263            print(f"index = {index}")
264
265        title = f"{num_bins} {es} {vk} {metric}={score:5.3f} bin#={index} Weighted={weighted} {window['start']}"
266
267        if (
268            len(baseline["aggregated_values"])
269            == len(window["aggregated_values"])
270            == len(baseline["edge_names"])
271        ):
272            if vk == "Edges":
273                fig, ax = plt.subplots()
274                for n, v in enumerate(baseline["aggregated_values"]):
275                    plt.axvline(x=v, color="blue", alpha=0.5)
276                    plt.text(v, 0, f"e{n}", color="blue")
277                for n, v in enumerate(window["aggregated_values"]):
278                    plt.axvline(x=v, color="orange", alpha=0.5)
279                    plt.text(v, 0.1, f"e{n}", color="orange")
280            else:
281                fig, ax = plt.subplots()
282
283                last = "Min"
284                bin_begin = "["
285                bin_end = ")"
286                edge_names = []
287                for idx, (n, e) in enumerate(
288                    zip(baseline["edge_names"], baseline["edges"])
289                ):
290                    if e is not None:
291                        next = f"{e:.1E}"
292                        name = f"{n}\n{bin_begin}{last}, {next}{bin_end}"
293                        last = next
294                    else:
295                        name = f"{n}\n({last}, Max]"
296                    edge_names.append(name)
297                    if idx >= 1:
298                        bin_begin = "("
299                    bin_end = "]"
300
301                bar1 = plt.bar(
302                    edge_names,
303                    baseline["aggregated_values"],
304                    alpha=0.50,
305                    label=f"Baseline ({baseline['count']})",
306                )
307                if not is_baseline_run:
308                    bar2 = plt.bar(
309                        edge_names,
310                        window["aggregated_values"],
311                        alpha=0.50,
312                        label=f"Window ({window['count']})",
313                    )
314                if len(edge_names) > 7:
315                    ax.set_xticklabels(labels=edge_names, rotation=45)
316
317                if show_scores and not is_baseline_run:
318                    for i, bar in enumerate(bar1.patches):
319                        ax.annotate(
320                            f"{scores[i]:.4f}",
321                            (bar.get_x() + bar.get_width() / 2, bar.get_height()),
322                            ha="center",
323                            va="center",
324                            size=9,
325                            xytext=(0, 8),
326                            textcoords="offset points",
327                        )
328                plt.legend()
329            ax.set_title(title)
330            plt.xticks(rotation=45)
331            plt.show()
332        else:
333            print(title)
334            print(
335                len(baseline["aggregated_values"]),
336                len(window["aggregated_values"]),
337                len(baseline["edge_names"]),
338                len(window["edge_names"]),
339            )
340            print(baseline["aggregated_values"])
341            print(window["aggregated_values"])
342            print(baseline["edge_names"])
343            print(window["edge_names"])
344            return r
345
346
347class AssayAnalysisList(object):
348    """Helper class primarily to easily create a dataframe from a list
349    of AssayAnalysis objects."""
350
351    def __init__(self, raw: List[AssayAnalysis]):
352        self.raw = raw
353
354    def __getitem__(self, index):
355        return self.raw[index]
356
357    def __len__(self):
358        return len(self.raw)
359
360    def to_dataframe(self) -> pd.DataFrame:
361        """Creates and returns a summary dataframe from the assay results."""
362        return pd.DataFrame(
363            [
364                {
365                    "assay_id": a.assay_id,
366                    "name": a.name,
367                    "iopath": a.iopath,
368                    "score": a.score,
369                    "start": a.window_summary["start"],
370                    "min": a.window_summary["min"],
371                    "max": a.window_summary["max"],
372                    "mean": a.window_summary["mean"],
373                    "median": a.window_summary["median"],
374                    "std": a.window_summary["std"],
375                    "std": a.window_summary["std"],
376                    "warning_threshold": a.warning_threshold,
377                    "alert_threshold": a.alert_threshold,
378                    "status": a.status,
379                }
380                for a in self.raw
381            ]
382        )
383
384    def to_full_dataframe(self) -> pd.DataFrame:
385        """Creates and returns a dataframe with all values including inputs
386        and outputs from the assay results."""
387
388        return dict_list_to_dataframe([a.raw for a in self.raw])
389
390    def chart_df(self, df: Union[pd.DataFrame, pd.Series], title: str, nth_x_tick=None):
391        """Creates a basic chart of the scores from dataframe created from assay analysis list"""
392
393        if nth_x_tick is None:
394            if len(df) > 10:
395                nth_x_tick = len(df) / 10
396            else:
397                nth_x_tick = 1
398
399        plt.scatter(df.start, df.score, color=self.__pick_colors(df.status))
400        plt.title(title)
401
402        old_ticks = plt.xticks()[0]
403        new_ticks = [t for i, t in enumerate(old_ticks) if i % nth_x_tick == 0]  # type: ignore
404        plt.xticks(ticks=new_ticks, rotation=90)
405
406        plt.grid()
407        plt.show()
408
409    def chart_scores(self, title: Optional[str] = None, nth_x_tick=4):
410        """Creates a basic chart of the scores from an AssayAnalysisList"""
411        if title is None:
412            title = f"Model Insights Score"
413        ardf = self.to_dataframe()
414        if ardf.shape == (0, 0):
415            raise ValueError("No data in this AssayAnalysisList.")
416
417        self.chart_df(ardf, title, nth_x_tick=nth_x_tick)
418
419    def chart_iopaths(
420        self,
421        labels: Optional[List[str]] = None,
422        selected_labels: Optional[List[str]] = None,
423        nth_x_tick=None,
424    ):
425        """Creates a basic charts of the scores for each unique iopath of an AssayAnalysisList"""
426
427        iadf = self.to_dataframe()
428        if iadf.shape == (0, 0):
429            raise ValueError("No io paths in this AssayAnalysisList.")
430
431        for i, iopath in enumerate(iadf["iopath"].unique()):
432            if selected_labels is None or (
433                labels is not None and labels[i] in selected_labels
434            ):
435                tempdf = iadf[iadf["iopath"] == iopath]
436                if labels:
437                    label = (
438                        f"Model Insights Score on '{labels[i]}' ({iopath}) vs Baseline"
439                    )
440                else:
441                    label = f"Model Insights Score on '{iopath}' vs Baseline"
442
443                self.chart_df(tempdf, label, nth_x_tick=nth_x_tick)
444
445    def __status_color(self, status: str):
446        if status == "Ok":
447            return "green"
448        elif status == "Warning":
449            return "orange"
450        else:
451            return "red"
452
453    def __pick_colors(self, s):
454        return [self.__status_color(status) for status in s]
455
456
457class Assays(List[Assay]):
458    """Wraps a list of assays for display in an HTML display-aware environment like Jupyter."""
459
460    def _repr_html_(self) -> str:
461        def row(assay) -> str:
462            return (
463                "<tr>"
464                + f"<td>{assay._name}</td>"
465                + f"<td>{assay._active}</td>"
466                + f"<td>{assay._status}</td>"
467                + f"<td>{assay._warning_threshold}</td>"
468                + f"<td>{assay._alert_threshold}</td>"
469                + f"<td>{assay._pipeline_name}</td>"
470                + "</tr>"
471            )
472
473        fields = [
474            "name",
475            "active",
476            "status",
477            "warning_threshold",
478            "alert_threshold",
479            "pipeline_name",
480        ]
481
482        if self == []:
483            return "(no assays)"
484        else:
485            return (
486                "<table>"
487                + "<tr><th>"
488                + "</th><th>".join(fields)
489                + "</th></tr>"
490                + ("".join([row(assay) for assay in self]))
491                + "</table>"
492            )
class Assay(wallaroo.object.Object):
 20class Assay(Object):
 21    """An Assay represents a record in the database. An assay contains
 22    some high level attributes such as name, status, active, etc. as well
 23    as the sub objects Baseline, Window and Summarizer which specify how
 24    the Baseline is derived, how the Windows should be created and how the
 25    analysis should be conducted."""
 26
 27    def __init__(self, client: Optional["Client"], data: Dict[str, Any]) -> None:
 28        self.client = client
 29        assert client is not None
 30        super().__init__(gql_client=client._gql_client, data=data)
 31
 32    def _fill(self, data: Dict[str, Any]) -> None:
 33        for required_attribute in ["id"]:
 34            if required_attribute not in data:
 35                raise RequiredAttributeMissing(
 36                    self.__class__.__name__, required_attribute
 37                )
 38        self._id = data["id"]
 39
 40        for k in [
 41            "active",
 42            "status",
 43            "name",
 44            "warning_threshold",
 45            "alert_threshold",
 46            "pipeline_name",
 47        ]:
 48            if k in data:
 49                setattr(self, f"_{k}", data[k])
 50
 51    def _fetch_attributes(self) -> Dict[str, Any]:
 52        return self._gql_client.execute(
 53            gql.gql(
 54                """
 55            query GetAssay($id: bigint) {
 56              assay(where: {id: {_eq: $id}}) {
 57                id
 58                name
 59                active
 60                status
 61                warning_threshold
 62                alert_threshold
 63                pipeline_name
 64              }
 65            }
 66            """
 67            ),
 68            variable_values={
 69                "id": self._id,
 70            },
 71        )["assay"]
 72
 73    def turn_on(self):
 74        """Sets the Assay to active causing it to run and backfill any
 75        missing analysis."""
 76
 77        ret = assays_set_active.sync(
 78            client=self.client.mlops(),
 79            json_body=AssaysSetActiveJsonBody(self._id, True),
 80        )
 81        self._active = True
 82        return ret
 83
 84    def turn_off(self):
 85        """Disables the Assay. No further analysis will be conducted until the assay
 86        is enabled."""
 87        ret = assays_set_active.sync(
 88            client=self.client.mlops(),
 89            json_body=AssaysSetActiveJsonBody(self._id, False),
 90        )
 91        self._active = False
 92        return ret
 93
 94    def set_alert_threshold(self, threshold: float):
 95        """Sets the alert threshold at the specified level. The status in the AssayAnalysis
 96        will show if this level is exceeded however currently alerting/notifications are
 97        not implemented."""
 98        res = self._gql_client.execute(
 99            gql.gql(
100                """
101            mutation SetActive($id: bigint!, $alert_threshold: Float!) {
102                update_assay_by_pk(pk_columns: {id: $id}, _set: {alert_threshold: $alert_threshold}) {
103                    id
104                    active
105                }
106            }
107            """
108            ),
109            variable_values={"id": self._id, "alert_threshold": threshold},
110        )["update_assay_by_pk"]
111        self._alert_threshold = threshold
112        return res
113
114    def set_warning_threshold(self, threshold: float):
115        """Sets the warning threshold at the specified level. The status in the AssayAnalysis
116        will show if this level is exceeded however currently alerting/notifications are
117        not implemented."""
118
119        res = self._gql_client.execute(
120            gql.gql(
121                """
122            mutation SetActive($id: bigint!, $warning_threshold: Float!) {
123                update_assay_by_pk(pk_columns: {id: $id}, _set: {warning_threshold: $warning_threshold}) {
124                    id
125                    active
126                }
127            }
128            """
129            ),
130            variable_values={"id": self._id, "warning_threshold": threshold},
131        )["update_assay_by_pk"]
132        self._warning_threshold = threshold
133        return res

An Assay represents a record in the database. An assay contains some high level attributes such as name, status, active, etc. as well as the sub objects Baseline, Window and Summarizer which specify how the Baseline is derived, how the Windows should be created and how the analysis should be conducted.

Assay(client: Optional[wallaroo.client.Client], data: Dict[str, Any])
27    def __init__(self, client: Optional["Client"], data: Dict[str, Any]) -> None:
28        self.client = client
29        assert client is not None
30        super().__init__(gql_client=client._gql_client, data=data)

Base constructor.

Each object requires:

  • a GraphQL client - in order to fill its missing members dynamically
  • an initial data blob - typically from unserialized JSON, contains at
  • least the data for required members (typically the object's primary key) and optionally other data members.
def turn_on(self):
73    def turn_on(self):
74        """Sets the Assay to active causing it to run and backfill any
75        missing analysis."""
76
77        ret = assays_set_active.sync(
78            client=self.client.mlops(),
79            json_body=AssaysSetActiveJsonBody(self._id, True),
80        )
81        self._active = True
82        return ret

Sets the Assay to active causing it to run and backfill any missing analysis.

def turn_off(self):
84    def turn_off(self):
85        """Disables the Assay. No further analysis will be conducted until the assay
86        is enabled."""
87        ret = assays_set_active.sync(
88            client=self.client.mlops(),
89            json_body=AssaysSetActiveJsonBody(self._id, False),
90        )
91        self._active = False
92        return ret

Disables the Assay. No further analysis will be conducted until the assay is enabled.

def set_alert_threshold(self, threshold: float):
 94    def set_alert_threshold(self, threshold: float):
 95        """Sets the alert threshold at the specified level. The status in the AssayAnalysis
 96        will show if this level is exceeded however currently alerting/notifications are
 97        not implemented."""
 98        res = self._gql_client.execute(
 99            gql.gql(
100                """
101            mutation SetActive($id: bigint!, $alert_threshold: Float!) {
102                update_assay_by_pk(pk_columns: {id: $id}, _set: {alert_threshold: $alert_threshold}) {
103                    id
104                    active
105                }
106            }
107            """
108            ),
109            variable_values={"id": self._id, "alert_threshold": threshold},
110        )["update_assay_by_pk"]
111        self._alert_threshold = threshold
112        return res

Sets the alert threshold at the specified level. The status in the AssayAnalysis will show if this level is exceeded however currently alerting/notifications are not implemented.

def set_warning_threshold(self, threshold: float):
114    def set_warning_threshold(self, threshold: float):
115        """Sets the warning threshold at the specified level. The status in the AssayAnalysis
116        will show if this level is exceeded however currently alerting/notifications are
117        not implemented."""
118
119        res = self._gql_client.execute(
120            gql.gql(
121                """
122            mutation SetActive($id: bigint!, $warning_threshold: Float!) {
123                update_assay_by_pk(pk_columns: {id: $id}, _set: {warning_threshold: $warning_threshold}) {
124                    id
125                    active
126                }
127            }
128            """
129            ),
130            variable_values={"id": self._id, "warning_threshold": threshold},
131        )["update_assay_by_pk"]
132        self._warning_threshold = threshold
133        return res

Sets the warning threshold at the specified level. The status in the AssayAnalysis will show if this level is exceeded however currently alerting/notifications are not implemented.

def meta_df(assay_result: Dict, index_name) -> pandas.core.frame.DataFrame:
136def meta_df(assay_result: Dict, index_name) -> pd.DataFrame:
137    """Creates a dataframe for the meta data in the baseline or window excluding the
138    edge information.
139    :param assay_result: The dict of the raw asset result"""
140    return pd.DataFrame(
141        {
142            k: [assay_result[k]]
143            for k in assay_result.keys()
144            if k not in ["edges", "edge_names", "aggregated_values", "aggregation"]
145        },
146        index=[index_name],
147    )

Creates a dataframe for the meta data in the baseline or window excluding the edge information.

Parameters
  • assay_result: The dict of the raw asset result
def edge_df(window_or_baseline: Dict) -> pandas.core.frame.DataFrame:
150def edge_df(window_or_baseline: Dict) -> pd.DataFrame:
151    """Creates a dataframe specifically for the edge information in the baseline or window.
152    :param window_or_baseline: The dict from the assay result of either the window or baseline"""
153
154    data = {
155        k: window_or_baseline[k]
156        for k in ["edges", "edge_names", "aggregated_values", "aggregation"]
157    }
158    return pd.DataFrame(data)

Creates a dataframe specifically for the edge information in the baseline or window.

Parameters
  • window_or_baseline: The dict from the assay result of either the window or baseline
class AssayAnalysis:
161class AssayAnalysis(object):
162    """The AssayAnalysis class helps handle the assay analysis logs from the Plateau
163    logs.  These logs are a json document with meta information on the assay and analysis
164    as well as summary information on the baseline and window and information on the comparison
165    between them."""
166
167    def __init__(self, raw: Dict[str, Any]):
168        self.assay_id = 0
169        self.name = ""
170        self.raw = raw
171        self.iopath = ""
172        self.score = 0.0
173        self.status = ""
174        self.alert_threshold = None
175        self.warning_threshold = None
176        self.window_summary: Dict[str, Any] = {}
177        for k, v in raw.items():
178            setattr(self, k, v)
179
180    def compare_basic_stats(self) -> pd.DataFrame:
181        """Creates a simple dataframe making it easy to compare a baseline and window."""
182        r = self.raw
183        baseline = r["baseline_summary"]
184        window = r["window_summary"]
185
186        bs_df = meta_df(baseline, "Baseline")
187        ws_df = meta_df(window, "Window")
188        df = pd.concat([bs_df, ws_df])
189
190        text_cols = ["start", "end"]
191        tdf = df[text_cols]
192        df = df.drop(text_cols, axis=1)
193
194        df.loc["diff"] = df.loc["Window"] - df.loc["Baseline"]
195        df.loc["pct_diff"] = df.loc["diff"] / df.loc["Baseline"] * 100.0
196        return pd.concat([df.T, tdf.T])
197
198    def baseline_stats(self) -> pd.DataFrame:
199        """Creates a simple dataframe with the basic stats data for a baseline."""
200        r = self.raw
201        baseline = r["baseline_summary"]
202        bs_df = meta_df(baseline, "Baseline")
203        return bs_df.T
204
205    def compare_bins(self) -> pd.DataFrame:
206        """Creates a simple dataframe to compare the bin/edge information of baseline and window."""
207        r = self.raw
208        is_baseline_run = r["status"] == "BaselineRun"
209
210        baseline = r["baseline_summary"]
211        window = r["window_summary"]
212        bs_df = edge_df(baseline)
213        ws_df = edge_df(window)
214        bs_df.columns = [f"b_{c}" for c in bs_df.columns]  # type: ignore
215        ws_df.columns = [f"w_{c}" for c in ws_df.columns]  # type: ignore
216        if is_baseline_run:
217            df = bs_df
218        else:
219            df = pd.concat([bs_df, ws_df], axis=1)
220            df["diff_in_pcts"] = df["w_aggregated_values"] - df["b_aggregated_values"]
221        return df
222
223    def baseline_bins(self) -> pd.DataFrame:
224        """Creates a simple dataframe to with the edge/bin data for a baseline."""
225        r = self.raw
226
227        baseline = r["baseline_summary"]
228        bs_df = edge_df(baseline)
229        bs_df.columns = [f"b_{c}" for c in bs_df.columns]  # type: ignore
230        return bs_df.fillna(np.inf)
231
232    def chart(self, show_scores=True):
233        """Quickly create a chart showing the bins, values and scores of an assay analysis.
234        show_scores will also label each bin with its final weighted (if specified) score.
235        """
236        r = self.raw
237        is_baseline_run = r["status"] == "BaselineRun"
238        baseline = r["baseline_summary"]
239        window = r["window_summary"]
240
241        summarizer = r["summarizer"]
242        es = summarizer["bin_mode"]
243        vk = baseline["aggregation"]
244        metric = summarizer["metric"]
245        num_bins = summarizer["num_bins"]
246        weighted = True if summarizer["bin_weights"] is not None else False
247        score = r["score"]
248        scores = r["scores"]
249        index = r["bin_index"]
250
251        print(f"baseline mean = {baseline['mean']}")
252        if not is_baseline_run:
253            print(f"window mean = {window['mean']}")
254        print(f"baseline median = {baseline['median']}")
255        if not is_baseline_run:
256            print(f"window median = {window['median']}")
257        print(f"bin_mode = {es}")
258        print(f"aggregation = {vk}")
259        print(f"metric = {metric}")
260        print(f"weighted = {weighted}")
261        if not is_baseline_run:
262            print(f"score = {score}")
263            print(f"scores = {scores}")
264            print(f"index = {index}")
265
266        title = f"{num_bins} {es} {vk} {metric}={score:5.3f} bin#={index} Weighted={weighted} {window['start']}"
267
268        if (
269            len(baseline["aggregated_values"])
270            == len(window["aggregated_values"])
271            == len(baseline["edge_names"])
272        ):
273            if vk == "Edges":
274                fig, ax = plt.subplots()
275                for n, v in enumerate(baseline["aggregated_values"]):
276                    plt.axvline(x=v, color="blue", alpha=0.5)
277                    plt.text(v, 0, f"e{n}", color="blue")
278                for n, v in enumerate(window["aggregated_values"]):
279                    plt.axvline(x=v, color="orange", alpha=0.5)
280                    plt.text(v, 0.1, f"e{n}", color="orange")
281            else:
282                fig, ax = plt.subplots()
283
284                last = "Min"
285                bin_begin = "["
286                bin_end = ")"
287                edge_names = []
288                for idx, (n, e) in enumerate(
289                    zip(baseline["edge_names"], baseline["edges"])
290                ):
291                    if e is not None:
292                        next = f"{e:.1E}"
293                        name = f"{n}\n{bin_begin}{last}, {next}{bin_end}"
294                        last = next
295                    else:
296                        name = f"{n}\n({last}, Max]"
297                    edge_names.append(name)
298                    if idx >= 1:
299                        bin_begin = "("
300                    bin_end = "]"
301
302                bar1 = plt.bar(
303                    edge_names,
304                    baseline["aggregated_values"],
305                    alpha=0.50,
306                    label=f"Baseline ({baseline['count']})",
307                )
308                if not is_baseline_run:
309                    bar2 = plt.bar(
310                        edge_names,
311                        window["aggregated_values"],
312                        alpha=0.50,
313                        label=f"Window ({window['count']})",
314                    )
315                if len(edge_names) > 7:
316                    ax.set_xticklabels(labels=edge_names, rotation=45)
317
318                if show_scores and not is_baseline_run:
319                    for i, bar in enumerate(bar1.patches):
320                        ax.annotate(
321                            f"{scores[i]:.4f}",
322                            (bar.get_x() + bar.get_width() / 2, bar.get_height()),
323                            ha="center",
324                            va="center",
325                            size=9,
326                            xytext=(0, 8),
327                            textcoords="offset points",
328                        )
329                plt.legend()
330            ax.set_title(title)
331            plt.xticks(rotation=45)
332            plt.show()
333        else:
334            print(title)
335            print(
336                len(baseline["aggregated_values"]),
337                len(window["aggregated_values"]),
338                len(baseline["edge_names"]),
339                len(window["edge_names"]),
340            )
341            print(baseline["aggregated_values"])
342            print(window["aggregated_values"])
343            print(baseline["edge_names"])
344            print(window["edge_names"])
345            return r

The AssayAnalysis class helps handle the assay analysis logs from the Plateau logs. These logs are a json document with meta information on the assay and analysis as well as summary information on the baseline and window and information on the comparison between them.

AssayAnalysis(raw: Dict[str, Any])
167    def __init__(self, raw: Dict[str, Any]):
168        self.assay_id = 0
169        self.name = ""
170        self.raw = raw
171        self.iopath = ""
172        self.score = 0.0
173        self.status = ""
174        self.alert_threshold = None
175        self.warning_threshold = None
176        self.window_summary: Dict[str, Any] = {}
177        for k, v in raw.items():
178            setattr(self, k, v)
def compare_basic_stats(self) -> pandas.core.frame.DataFrame:
180    def compare_basic_stats(self) -> pd.DataFrame:
181        """Creates a simple dataframe making it easy to compare a baseline and window."""
182        r = self.raw
183        baseline = r["baseline_summary"]
184        window = r["window_summary"]
185
186        bs_df = meta_df(baseline, "Baseline")
187        ws_df = meta_df(window, "Window")
188        df = pd.concat([bs_df, ws_df])
189
190        text_cols = ["start", "end"]
191        tdf = df[text_cols]
192        df = df.drop(text_cols, axis=1)
193
194        df.loc["diff"] = df.loc["Window"] - df.loc["Baseline"]
195        df.loc["pct_diff"] = df.loc["diff"] / df.loc["Baseline"] * 100.0
196        return pd.concat([df.T, tdf.T])

Creates a simple dataframe making it easy to compare a baseline and window.

def baseline_stats(self) -> pandas.core.frame.DataFrame:
198    def baseline_stats(self) -> pd.DataFrame:
199        """Creates a simple dataframe with the basic stats data for a baseline."""
200        r = self.raw
201        baseline = r["baseline_summary"]
202        bs_df = meta_df(baseline, "Baseline")
203        return bs_df.T

Creates a simple dataframe with the basic stats data for a baseline.

def compare_bins(self) -> pandas.core.frame.DataFrame:
205    def compare_bins(self) -> pd.DataFrame:
206        """Creates a simple dataframe to compare the bin/edge information of baseline and window."""
207        r = self.raw
208        is_baseline_run = r["status"] == "BaselineRun"
209
210        baseline = r["baseline_summary"]
211        window = r["window_summary"]
212        bs_df = edge_df(baseline)
213        ws_df = edge_df(window)
214        bs_df.columns = [f"b_{c}" for c in bs_df.columns]  # type: ignore
215        ws_df.columns = [f"w_{c}" for c in ws_df.columns]  # type: ignore
216        if is_baseline_run:
217            df = bs_df
218        else:
219            df = pd.concat([bs_df, ws_df], axis=1)
220            df["diff_in_pcts"] = df["w_aggregated_values"] - df["b_aggregated_values"]
221        return df

Creates a simple dataframe to compare the bin/edge information of baseline and window.

def baseline_bins(self) -> pandas.core.frame.DataFrame:
223    def baseline_bins(self) -> pd.DataFrame:
224        """Creates a simple dataframe to with the edge/bin data for a baseline."""
225        r = self.raw
226
227        baseline = r["baseline_summary"]
228        bs_df = edge_df(baseline)
229        bs_df.columns = [f"b_{c}" for c in bs_df.columns]  # type: ignore
230        return bs_df.fillna(np.inf)

Creates a simple dataframe to with the edge/bin data for a baseline.

def chart(self, show_scores=True):
232    def chart(self, show_scores=True):
233        """Quickly create a chart showing the bins, values and scores of an assay analysis.
234        show_scores will also label each bin with its final weighted (if specified) score.
235        """
236        r = self.raw
237        is_baseline_run = r["status"] == "BaselineRun"
238        baseline = r["baseline_summary"]
239        window = r["window_summary"]
240
241        summarizer = r["summarizer"]
242        es = summarizer["bin_mode"]
243        vk = baseline["aggregation"]
244        metric = summarizer["metric"]
245        num_bins = summarizer["num_bins"]
246        weighted = True if summarizer["bin_weights"] is not None else False
247        score = r["score"]
248        scores = r["scores"]
249        index = r["bin_index"]
250
251        print(f"baseline mean = {baseline['mean']}")
252        if not is_baseline_run:
253            print(f"window mean = {window['mean']}")
254        print(f"baseline median = {baseline['median']}")
255        if not is_baseline_run:
256            print(f"window median = {window['median']}")
257        print(f"bin_mode = {es}")
258        print(f"aggregation = {vk}")
259        print(f"metric = {metric}")
260        print(f"weighted = {weighted}")
261        if not is_baseline_run:
262            print(f"score = {score}")
263            print(f"scores = {scores}")
264            print(f"index = {index}")
265
266        title = f"{num_bins} {es} {vk} {metric}={score:5.3f} bin#={index} Weighted={weighted} {window['start']}"
267
268        if (
269            len(baseline["aggregated_values"])
270            == len(window["aggregated_values"])
271            == len(baseline["edge_names"])
272        ):
273            if vk == "Edges":
274                fig, ax = plt.subplots()
275                for n, v in enumerate(baseline["aggregated_values"]):
276                    plt.axvline(x=v, color="blue", alpha=0.5)
277                    plt.text(v, 0, f"e{n}", color="blue")
278                for n, v in enumerate(window["aggregated_values"]):
279                    plt.axvline(x=v, color="orange", alpha=0.5)
280                    plt.text(v, 0.1, f"e{n}", color="orange")
281            else:
282                fig, ax = plt.subplots()
283
284                last = "Min"
285                bin_begin = "["
286                bin_end = ")"
287                edge_names = []
288                for idx, (n, e) in enumerate(
289                    zip(baseline["edge_names"], baseline["edges"])
290                ):
291                    if e is not None:
292                        next = f"{e:.1E}"
293                        name = f"{n}\n{bin_begin}{last}, {next}{bin_end}"
294                        last = next
295                    else:
296                        name = f"{n}\n({last}, Max]"
297                    edge_names.append(name)
298                    if idx >= 1:
299                        bin_begin = "("
300                    bin_end = "]"
301
302                bar1 = plt.bar(
303                    edge_names,
304                    baseline["aggregated_values"],
305                    alpha=0.50,
306                    label=f"Baseline ({baseline['count']})",
307                )
308                if not is_baseline_run:
309                    bar2 = plt.bar(
310                        edge_names,
311                        window["aggregated_values"],
312                        alpha=0.50,
313                        label=f"Window ({window['count']})",
314                    )
315                if len(edge_names) > 7:
316                    ax.set_xticklabels(labels=edge_names, rotation=45)
317
318                if show_scores and not is_baseline_run:
319                    for i, bar in enumerate(bar1.patches):
320                        ax.annotate(
321                            f"{scores[i]:.4f}",
322                            (bar.get_x() + bar.get_width() / 2, bar.get_height()),
323                            ha="center",
324                            va="center",
325                            size=9,
326                            xytext=(0, 8),
327                            textcoords="offset points",
328                        )
329                plt.legend()
330            ax.set_title(title)
331            plt.xticks(rotation=45)
332            plt.show()
333        else:
334            print(title)
335            print(
336                len(baseline["aggregated_values"]),
337                len(window["aggregated_values"]),
338                len(baseline["edge_names"]),
339                len(window["edge_names"]),
340            )
341            print(baseline["aggregated_values"])
342            print(window["aggregated_values"])
343            print(baseline["edge_names"])
344            print(window["edge_names"])
345            return r

Quickly create a chart showing the bins, values and scores of an assay analysis. show_scores will also label each bin with its final weighted (if specified) score.

class AssayAnalysisList:
348class AssayAnalysisList(object):
349    """Helper class primarily to easily create a dataframe from a list
350    of AssayAnalysis objects."""
351
352    def __init__(self, raw: List[AssayAnalysis]):
353        self.raw = raw
354
355    def __getitem__(self, index):
356        return self.raw[index]
357
358    def __len__(self):
359        return len(self.raw)
360
361    def to_dataframe(self) -> pd.DataFrame:
362        """Creates and returns a summary dataframe from the assay results."""
363        return pd.DataFrame(
364            [
365                {
366                    "assay_id": a.assay_id,
367                    "name": a.name,
368                    "iopath": a.iopath,
369                    "score": a.score,
370                    "start": a.window_summary["start"],
371                    "min": a.window_summary["min"],
372                    "max": a.window_summary["max"],
373                    "mean": a.window_summary["mean"],
374                    "median": a.window_summary["median"],
375                    "std": a.window_summary["std"],
376                    "std": a.window_summary["std"],
377                    "warning_threshold": a.warning_threshold,
378                    "alert_threshold": a.alert_threshold,
379                    "status": a.status,
380                }
381                for a in self.raw
382            ]
383        )
384
385    def to_full_dataframe(self) -> pd.DataFrame:
386        """Creates and returns a dataframe with all values including inputs
387        and outputs from the assay results."""
388
389        return dict_list_to_dataframe([a.raw for a in self.raw])
390
391    def chart_df(self, df: Union[pd.DataFrame, pd.Series], title: str, nth_x_tick=None):
392        """Creates a basic chart of the scores from dataframe created from assay analysis list"""
393
394        if nth_x_tick is None:
395            if len(df) > 10:
396                nth_x_tick = len(df) / 10
397            else:
398                nth_x_tick = 1
399
400        plt.scatter(df.start, df.score, color=self.__pick_colors(df.status))
401        plt.title(title)
402
403        old_ticks = plt.xticks()[0]
404        new_ticks = [t for i, t in enumerate(old_ticks) if i % nth_x_tick == 0]  # type: ignore
405        plt.xticks(ticks=new_ticks, rotation=90)
406
407        plt.grid()
408        plt.show()
409
410    def chart_scores(self, title: Optional[str] = None, nth_x_tick=4):
411        """Creates a basic chart of the scores from an AssayAnalysisList"""
412        if title is None:
413            title = f"Model Insights Score"
414        ardf = self.to_dataframe()
415        if ardf.shape == (0, 0):
416            raise ValueError("No data in this AssayAnalysisList.")
417
418        self.chart_df(ardf, title, nth_x_tick=nth_x_tick)
419
420    def chart_iopaths(
421        self,
422        labels: Optional[List[str]] = None,
423        selected_labels: Optional[List[str]] = None,
424        nth_x_tick=None,
425    ):
426        """Creates a basic charts of the scores for each unique iopath of an AssayAnalysisList"""
427
428        iadf = self.to_dataframe()
429        if iadf.shape == (0, 0):
430            raise ValueError("No io paths in this AssayAnalysisList.")
431
432        for i, iopath in enumerate(iadf["iopath"].unique()):
433            if selected_labels is None or (
434                labels is not None and labels[i] in selected_labels
435            ):
436                tempdf = iadf[iadf["iopath"] == iopath]
437                if labels:
438                    label = (
439                        f"Model Insights Score on '{labels[i]}' ({iopath}) vs Baseline"
440                    )
441                else:
442                    label = f"Model Insights Score on '{iopath}' vs Baseline"
443
444                self.chart_df(tempdf, label, nth_x_tick=nth_x_tick)
445
446    def __status_color(self, status: str):
447        if status == "Ok":
448            return "green"
449        elif status == "Warning":
450            return "orange"
451        else:
452            return "red"
453
454    def __pick_colors(self, s):
455        return [self.__status_color(status) for status in s]

Helper class primarily to easily create a dataframe from a list of AssayAnalysis objects.

AssayAnalysisList(raw: List[wallaroo.assay.AssayAnalysis])
352    def __init__(self, raw: List[AssayAnalysis]):
353        self.raw = raw
def to_dataframe(self) -> pandas.core.frame.DataFrame:
361    def to_dataframe(self) -> pd.DataFrame:
362        """Creates and returns a summary dataframe from the assay results."""
363        return pd.DataFrame(
364            [
365                {
366                    "assay_id": a.assay_id,
367                    "name": a.name,
368                    "iopath": a.iopath,
369                    "score": a.score,
370                    "start": a.window_summary["start"],
371                    "min": a.window_summary["min"],
372                    "max": a.window_summary["max"],
373                    "mean": a.window_summary["mean"],
374                    "median": a.window_summary["median"],
375                    "std": a.window_summary["std"],
376                    "std": a.window_summary["std"],
377                    "warning_threshold": a.warning_threshold,
378                    "alert_threshold": a.alert_threshold,
379                    "status": a.status,
380                }
381                for a in self.raw
382            ]
383        )

Creates and returns a summary dataframe from the assay results.

def to_full_dataframe(self) -> pandas.core.frame.DataFrame:
385    def to_full_dataframe(self) -> pd.DataFrame:
386        """Creates and returns a dataframe with all values including inputs
387        and outputs from the assay results."""
388
389        return dict_list_to_dataframe([a.raw for a in self.raw])

Creates and returns a dataframe with all values including inputs and outputs from the assay results.

def chart_df( self, df: Union[pandas.core.frame.DataFrame, pandas.core.series.Series], title: str, nth_x_tick=None):
391    def chart_df(self, df: Union[pd.DataFrame, pd.Series], title: str, nth_x_tick=None):
392        """Creates a basic chart of the scores from dataframe created from assay analysis list"""
393
394        if nth_x_tick is None:
395            if len(df) > 10:
396                nth_x_tick = len(df) / 10
397            else:
398                nth_x_tick = 1
399
400        plt.scatter(df.start, df.score, color=self.__pick_colors(df.status))
401        plt.title(title)
402
403        old_ticks = plt.xticks()[0]
404        new_ticks = [t for i, t in enumerate(old_ticks) if i % nth_x_tick == 0]  # type: ignore
405        plt.xticks(ticks=new_ticks, rotation=90)
406
407        plt.grid()
408        plt.show()

Creates a basic chart of the scores from dataframe created from assay analysis list

def chart_scores(self, title: Optional[str] = None, nth_x_tick=4):
410    def chart_scores(self, title: Optional[str] = None, nth_x_tick=4):
411        """Creates a basic chart of the scores from an AssayAnalysisList"""
412        if title is None:
413            title = f"Model Insights Score"
414        ardf = self.to_dataframe()
415        if ardf.shape == (0, 0):
416            raise ValueError("No data in this AssayAnalysisList.")
417
418        self.chart_df(ardf, title, nth_x_tick=nth_x_tick)

Creates a basic chart of the scores from an AssayAnalysisList

def chart_iopaths( self, labels: Optional[List[str]] = None, selected_labels: Optional[List[str]] = None, nth_x_tick=None):
420    def chart_iopaths(
421        self,
422        labels: Optional[List[str]] = None,
423        selected_labels: Optional[List[str]] = None,
424        nth_x_tick=None,
425    ):
426        """Creates a basic charts of the scores for each unique iopath of an AssayAnalysisList"""
427
428        iadf = self.to_dataframe()
429        if iadf.shape == (0, 0):
430            raise ValueError("No io paths in this AssayAnalysisList.")
431
432        for i, iopath in enumerate(iadf["iopath"].unique()):
433            if selected_labels is None or (
434                labels is not None and labels[i] in selected_labels
435            ):
436                tempdf = iadf[iadf["iopath"] == iopath]
437                if labels:
438                    label = (
439                        f"Model Insights Score on '{labels[i]}' ({iopath}) vs Baseline"
440                    )
441                else:
442                    label = f"Model Insights Score on '{iopath}' vs Baseline"
443
444                self.chart_df(tempdf, label, nth_x_tick=nth_x_tick)

Creates a basic charts of the scores for each unique iopath of an AssayAnalysisList

class Assays(typing.List[wallaroo.assay.Assay]):
458class Assays(List[Assay]):
459    """Wraps a list of assays for display in an HTML display-aware environment like Jupyter."""
460
461    def _repr_html_(self) -> str:
462        def row(assay) -> str:
463            return (
464                "<tr>"
465                + f"<td>{assay._name}</td>"
466                + f"<td>{assay._active}</td>"
467                + f"<td>{assay._status}</td>"
468                + f"<td>{assay._warning_threshold}</td>"
469                + f"<td>{assay._alert_threshold}</td>"
470                + f"<td>{assay._pipeline_name}</td>"
471                + "</tr>"
472            )
473
474        fields = [
475            "name",
476            "active",
477            "status",
478            "warning_threshold",
479            "alert_threshold",
480            "pipeline_name",
481        ]
482
483        if self == []:
484            return "(no assays)"
485        else:
486            return (
487                "<table>"
488                + "<tr><th>"
489                + "</th><th>".join(fields)
490                + "</th></tr>"
491                + ("".join([row(assay) for assay in self]))
492                + "</table>"
493            )

Wraps a list of assays for display in an HTML display-aware environment like Jupyter.

Inherited Members
builtins.list
list
clear
copy
append
insert
extend
pop
remove
index
count
reverse
sort