wallaroo.assay_config

  1import json
  2import math
  3from abc import ABC, abstractmethod
  4from collections import Counter
  5from datetime import datetime, timezone
  6from enum import Enum
  7from typing import TYPE_CHECKING, Dict, List, Optional, TypeVar, Union
  8
  9import matplotlib.pyplot as plt
 10import pandas as pd
 11import seaborn as sns
 12
 13from wallaroo.assay import AssayAnalysis, AssayAnalysisList
 14
 15from .wallaroo_ml_ops_api_client.api.assay import (
 16    assays_run_interactive,
 17    assays_run_interactive_baseline,
 18)
 19from .wallaroo_ml_ops_api_client.models.assays_run_interactive_baseline_json_body import (
 20    AssaysRunInteractiveBaselineJsonBody,
 21)
 22from .wallaroo_ml_ops_api_client.models.assays_run_interactive_baseline_response_200 import (
 23    AssaysRunInteractiveBaselineResponse200,
 24)
 25from .wallaroo_ml_ops_api_client.models.assays_run_interactive_json_body import (
 26    AssaysRunInteractiveJsonBody,
 27)
 28
 29if TYPE_CHECKING:
 30    # Imports that happen below in methods to fix circular import dependency
 31    # issues need to also be specified here to satisfy mypy type checking.
 32    from wallaroo.client import Client
 33
 34T = TypeVar("T")
 35
 36
 37def unwrap(v: Optional[T]) -> T:
 38    """Simple function to placate pylance"""
 39    if v:
 40        return v
 41    raise Exception("Expected a value in forced unwrap")
 42
 43
 44class BaselineConfig(object):
 45    """Abstract base class for Baseline config objects. Currently
 46    only FixedBaseline is implemented though SlidingBaseline and
 47    others are planned."""
 48
 49    def __init__(self):
 50        pass
 51
 52    def to_json(self) -> str:
 53        return json.dumps(self, indent=4, default=ConfigEncoder)
 54
 55
 56class FixedBaseline(BaselineConfig):
 57    """The FixedBaseline is calculate from the inferences from a
 58    specific time window."""
 59
 60    def __init__(
 61        self, pipeline_name: str, model_name: str, start: datetime, end: datetime
 62    ):
 63        self.Fixed = {
 64            "pipeline": pipeline_name,
 65            "model": model_name,
 66            "start_at": start.isoformat(),
 67            "end_at": end.isoformat(),
 68        }
 69
 70
 71class BaselineBuilder(ABC):
 72    @abstractmethod
 73    def build(self) -> BaselineConfig:
 74        pass
 75
 76    def to_json(self) -> str:
 77        return json.dumps(self, indent=4, default=ConfigEncoder)
 78
 79
 80def ensure_tz(d: datetime) -> datetime:
 81    """Ensure the date it tz aware. If naive assume it is in utc."""
 82    if d.tzinfo:
 83        return d
 84    else:
 85        return d.astimezone(tz=timezone.utc)
 86
 87
 88class FixedBaselineBuilder(BaselineBuilder):
 89    """Helps to easily create the config object for a FixedBaseline."""
 90
 91    def __init__(self, pipeline_name: str):
 92        self.pipeline_name = pipeline_name
 93        self.model_name: Optional[str] = None
 94        self.start: Optional[datetime] = None
 95        self.end: Optional[datetime] = None
 96
 97    def add_model_name(self, model_name: str):
 98        """Specify the model to use in the baseline"""
 99        self.model_name = model_name
100        return self
101
102    def add_start(self, start: datetime):
103        """Specify the start of the window for the baseline"""
104        self.start = start
105        return self
106
107    def add_end(self, end: datetime):
108        """Specify the end of the window for the baseline"""
109        self.end = end
110        return self
111
112    def build(self) -> FixedBaseline:
113        """Create the FixedBaseline object."""
114        start = ensure_tz(unwrap(self.start))
115        end = ensure_tz(unwrap(self.end))
116
117        return FixedBaseline(self.pipeline_name, unwrap(self.model_name), start, end)
118
119
120class SummarizerConfig(object):
121    """The summarizer specifies how the bins of the baseline and
122    window should be compared."""
123
124    def __init__(self):
125        pass
126
127    def to_json(self) -> str:
128        return json.dumps(self, indent=4, default=ConfigEncoder)
129
130
131class BinMode(str, Enum):
132    """How should we calculate the bins.
133    NONE - no bins. Only useful if we only care about the mean, median, etc.
134    EQUAL - evenly spaced bins: min - max / num_bins
135    QUANTILE - based on percentages. If num_bins is 5 then quintiles
136    so bins are created at the 20%, 40%, 60%, 80% and 100% points.
137    PROVIDED - user provides the edge points for the bins.
138    """
139
140    NONE = "None"
141    EQUAL = "Equal"
142    QUANTILE = "Quantile"
143    PROVIDED = "Provided"
144
145
146class Aggregation(str, Enum):
147    """What we use to calculate the score.
148    EDGES - distnces between the edges.
149    DENSITY - percentage of values that fall in each bin.
150    CUMULATIVE - cumulative percentage that fall in the bins."""
151
152    EDGES = "Edges"
153    DENSITY = "Density"
154    CUMULATIVE = "Cumulative"
155
156
157class Metric(str, Enum):
158    """How we calculate the score.
159    MAXDIFF - maximum difference between corresponding bins.
160    SUMDIFF - sum of differences between corresponding bins.
161    PSI - Population Stability Index"""
162
163    MAXDIFF = "MaxDiff"
164    SUMDIFF = "SumDiff"
165    PSI = "PSI"
166
167
168class UnivariateContinousSummarizerConfig(SummarizerConfig):
169    """The UnivariateContinousSummarizer analyizes one input or output feature
170    (Univariate) at a time. Expects the values to be continous or at least numerous
171    enough to fall in various/all the bins."""
172
173    def __init__(
174        self,
175        bin_mode: BinMode,
176        aggregation: Aggregation,
177        metric: Metric,
178        num_bins: int,
179        bin_weights: Optional[List[float]] = None,
180        bin_width: Optional[float] = None,
181        provided_edges: Optional[List[float]] = None,
182        add_outlier_edges: bool = True,
183    ):
184        self.type = "UnivariateContinuous"
185        self.bin_mode = bin_mode
186        self.aggregation = aggregation
187        self.metric = metric
188        self.num_bins = num_bins
189        self.bin_weights = bin_weights
190        self.bin_width = bin_width
191        self.provided_edges = provided_edges
192        self.add_outlier_edges = add_outlier_edges
193
194
195class SummarizerBuilder(ABC):
196    @abstractmethod
197    def build(self) -> SummarizerConfig:
198        pass
199
200
201class UnivariateContinousSummarizerBuilder(SummarizerBuilder):
202    """Builds the UnviariateSummarizer"""
203
204    def __init__(self):
205        self.bin_mode = BinMode.QUANTILE
206        self.aggregation = Aggregation.DENSITY
207        self.metric = Metric.PSI
208        self.num_bins = 5
209        self.bin_weights: Optional[List[float]] = None
210        self.bin_width: Optional[float] = None
211        self.provided_edges: Optional[List[float]] = None
212        self.add_outlier_edges = True
213
214    def build(self) -> UnivariateContinousSummarizerConfig:
215        if self.bin_mode == BinMode.PROVIDED:
216            if self.provided_edges is None:
217                raise ValueError("Edges must be provided with BinMode.PROVIDED")
218        else:
219            if self.provided_edges is not None:
220                raise ValueError(
221                    f"Edges may not be provided with bin mode {self.bin_mode}"
222                )
223
224        sum = UnivariateContinousSummarizerConfig(
225            self.bin_mode,
226            self.aggregation,
227            self.metric,
228            self.num_bins,
229            self.bin_weights,
230            self.bin_width,
231            self.provided_edges,
232            self.add_outlier_edges,
233        )
234        return sum
235
236    def add_bin_mode(self, bin_mode: BinMode, edges: Optional[List[float]] = None):
237        """Sets the binning mode. If BinMode.PROVIDED is specified a list of edges
238        is also required."""
239        if bin_mode == BinMode.PROVIDED:
240            if edges is None:
241                raise ValueError("Edges must be provided with BinMode.PROVIDED")
242
243        self.bin_mode = bin_mode
244        self.add_bin_edges(edges)
245        return self
246
247    def add_num_bins(self, num_bins: int):
248        """Sets the number of bins. If weights have been previously set they
249        must be set to none to allow changing the number of bins."""
250
251        if num_bins != self.num_bins and self.bin_weights is not None:
252            if num_bins + 2 != len(self.bin_weights):
253                msg = (
254                    f"({len(self.bin_weights)}) have already been set. "
255                    + f"Please set them to None before changing the number of bins."
256                )
257                raise ValueError(msg)
258
259        if num_bins != self.num_bins and self.provided_edges is not None:
260            if not (
261                len(self.provided_edges) == num_bins
262                or len(self.provided_edges) == num_bins + 1
263            ):
264                msg = (
265                    f"({len(self.provided_edges)}) bin edges have already been set. "
266                    + f"Please set them to None before changing the number of bins."
267                )
268                raise ValueError(msg)
269
270        self.num_bins = num_bins
271        return self
272
273    def add_bin_weights(self, weights: Union[List[float], None]):
274        """Specifies the weighting to be given to the bins. The number of weights
275        must be 2 larger than the number of bins to accomodate outliers smaller
276        and outliers larger than values seen in the baseline.
277        The passed in values can be whole or real numbers and do not need to add
278        up to 1 or any other specific value as they will be normalized during the
279        score calculation phase.
280        The weights passed in can be none to remove previously specified weights
281        and to allow changing of the number of bins."""
282
283        if weights is not None:
284            if self.num_bins + 2 != len(weights):
285                msg = (
286                    f"The number of weights ({len(weights)}) "
287                    + f"must be 2 more ({self.num_bins + 2}) than the "
288                    + f"number of bins ({self.num_bins}) to allow for the "
289                    + f"left and right outlier bins."
290                )
291                raise ValueError(msg)
292        self.bin_weights = weights
293        return self
294
295    def add_metric(self, metric: Metric):
296        """Sets the metric mode."""
297        self.metric = metric
298        return self
299
300    def add_aggregation(self, aggregation: Aggregation):
301        """Sets the aggregation style."""
302        self.aggregation = aggregation
303        return self
304
305    def add_bin_edges(self, edges: Union[List[float], None]):
306        """Specifies the right hand side (max value) of the bins. The number
307        of edges must be equal to or one more than the number of bins. When
308        equal to the number of bins the edge for the left outlier bin is
309        calculated from the baseline. When an additional edge (one more than
310        number of bins) that first (lower) value is used as the max value for
311        the left outlier bin.  The max value for the right hand outlier bin is
312        always Float MAX.
313        """
314
315        if edges is not None:
316            if not (len(edges) == self.num_bins or len(edges) == self.num_bins + 1):
317                msg = (
318                    f"The number of edges ({len(edges)}) "
319                    + f"must be equal to ({self.num_bins}) or one more "
320                    + f"({self.num_bins + 1 }) than the number of bins to account "
321                    + f"for the left outlier bin."
322                )
323                raise ValueError(msg)
324            edges = sorted(edges)
325
326        self.provided_edges = edges
327        return self
328
329
330class WindowConfig(object):
331    """Configures a window to be compared against the baseline."""
332
333    def __init__(
334        self,
335        pipeline_name: str,
336        model_name: str,
337        width: str,
338        start: Optional[datetime] = None,
339        interval: Optional[str] = None,
340    ):
341        self.pipeline = pipeline_name
342        self.model = model_name
343        self.width = width
344        self.start = start
345        self.interval = interval
346
347    def to_json(self) -> str:
348        return json.dumps(self, indent=4, default=ConfigEncoder)
349
350
351class WindowBuilder(object):
352    """Helps build a WindowConfig. model and width are required but there are no
353    good default values for them because they depend on the baseline. We leave it
354    up to the assay builder to configure the window correctly after it is created.
355    """
356
357    def __init__(self, pipeline_name: str):
358        self.pipeline = pipeline_name
359        self.model: Optional[str] = None
360        self.width: Optional[str] = "24 hours"
361        self.start: Optional[datetime] = None
362        self.interval: Optional[str] = None
363
364    def add_model_name(self, model_name: str):
365        """The model name (model_id) that the window should analyze."""
366        self.model = model_name
367        return self
368
369    def _duration_kw_to_str(self, **kwargs) -> str:
370        interval_names = ["minute", "hour", "day", "week"]
371        duration_str = None
372        kw_count = 0
373
374        for interval_name in interval_names:
375            plural = interval_name + "s"
376
377            for kw in [interval_name, plural]:
378                if kw in kwargs:
379                    duration_str = f"{kwargs[kw]} {plural}"
380                    kw_count += 1
381
382        if kw_count == 0:
383            raise Exception(
384                "Please specify one of 'minutes', 'hours', 'days' or 'weeks' keyword args"
385            )
386
387        elif kw_count > 1:
388            raise Exception(
389                "Please specify only one of 'minutes', 'hours', 'days' or 'weeks' keyword args"
390            )
391        else:
392            return unwrap(duration_str)
393
394    def add_width(self, **kwargs: int):
395        """The width of the window to use when collecting data for analysis."""
396        self.width = self._duration_kw_to_str(**kwargs)
397        return self
398
399    def add_interval(self, **kwargs: int):
400        """The width of the window to use when collecting data for analysis."""
401        self.interval = self._duration_kw_to_str(**kwargs)
402        return self
403
404    def add_start(self, start: datetime):
405        self.start = start
406        return self
407
408    def build(self) -> WindowConfig:
409        start = ensure_tz(self.start) if self.start else None
410
411        return WindowConfig(
412            self.pipeline,
413            unwrap(self.model),
414            unwrap(self.width),
415            start,
416            self.interval,
417        )
418
419
420def ConfigEncoder(o):
421    """Used to format datetimes as we need when encoding to JSON"""
422    if isinstance(o, datetime):
423        return o.isoformat()
424    else:
425        return o.__dict__
426
427
428class AssayConfig(object):
429    """Configuration for an Assay record."""
430
431    def __init__(
432        self,
433        client: Optional["Client"],
434        name: str,
435        pipeline_id: int,
436        pipeline_name: str,
437        active: bool,
438        status: str,
439        iopath: str,
440        baseline: BaselineConfig,
441        window: WindowConfig,
442        summarizer: SummarizerConfig,
443        warning_threshold: Optional[float],
444        alert_threshold: float,
445        run_until: Optional[datetime],
446        workspace_id: Optional[int],
447    ):
448        self.client = client
449        self.name = name
450        self.pipeline_id = pipeline_id
451        self.pipeline_name = pipeline_name
452        self.active = active
453        self.status = status
454        self.iopath = iopath
455        self.baseline = baseline
456        self.window = window
457        self.summarizer = summarizer
458        self.warning_threshold = warning_threshold
459        self.alert_threshold = alert_threshold
460        self.run_until = run_until
461        self.workspace_id = workspace_id
462
463    def to_json(self) -> str:
464        payload = self.__dict__.copy()
465        payload.pop("client", None)
466        payload.pop("model_insights_url", None)
467        return json.dumps(payload, indent=4, default=ConfigEncoder)
468
469    def interactive_run(self) -> AssayAnalysisList:
470        """Runs this assay interactively. The assay is not saved to the database
471        nor are analyis records saved to a Plateau topic. Useful for exploring
472        pipeline inference data and experimenting with thresholds."""
473
474        client = unwrap(self.client)
475        payload = {
476            **json.loads(self.to_json()),
477            "created_at": datetime.now(timezone.utc).isoformat(),
478        }
479        mlops_client = client.mlops()
480        mlops_client.timeout = 5 * 60
481        ret = assays_run_interactive.sync(
482            client=mlops_client,
483            json_body=AssaysRunInteractiveJsonBody.from_dict(payload),
484        )
485
486        analysis_list = []
487        if ret is not None:
488            if not isinstance(ret, List):
489                raise Exception(ret.msg)
490
491            analysis_list = [AssayAnalysis(ar.to_dict()) for ar in ret]
492
493        return AssayAnalysisList(analysis_list)
494
495    def interactive_baseline_run(self) -> Optional[AssayAnalysis]:
496
497        client = unwrap(self.client)
498        payload = {
499            **json.loads(self.to_json()),
500            "created_at": datetime.now(timezone.utc).isoformat(),
501        }
502        ret = assays_run_interactive_baseline.sync(
503            client=client.mlops(),
504            json_body=AssaysRunInteractiveBaselineJsonBody.from_dict(payload),
505        )
506
507        if ret is not None:
508            if not isinstance(ret, AssaysRunInteractiveBaselineResponse200):
509                raise Exception(ret.msg)
510
511            aa = ret.to_dict()
512            return AssayAnalysis(aa)
513
514        return None
515
516    def interactive_input_run(
517        self, inferences: List[Dict], labels: Optional[List[str]]
518    ) -> AssayAnalysisList:
519        """Analyzes the inputs given to create an interactive run for each feature
520        column. The assay is not saved to the database nor are analyis records saved
521        to a Plateau topic. Usefull for exploring inputs for possible causes when a
522        difference is detected in the output."""
523
524        all_assays = []
525        inference = inferences[0]
526
527        print(f"input column distinct_vals label           largest_pct")
528        # TODO extend this to work for any input shape
529        inputs = inference["original_data"]["tensor"]
530        for idx0, _ in enumerate(inputs):
531            if labels and len(inputs[idx0]) != len(labels):
532                print(
533                    f"Labels are not the same len {len(labels)} as inputs {len(inference['inputs'][idx0])}"
534                )
535            for idx1, _ in enumerate(inputs[idx0]):
536                values = []
537                for inf in inferences:
538                    values.append(inf["original_data"]["tensor"][idx0][idx1])
539                counter = Counter(values)
540                value_pct = [c / len(values) for c in counter.values()]
541                value_pct.sort()
542                largest_pct = value_pct[-1]
543                distinct_values = len(counter.keys())
544                label = labels[idx1] if labels else ""
545                # TODO: Rule of thumb may need better way to distinguish
546                msg = (
547                    "*** May not be continuous feature"
548                    if distinct_values < 5 or largest_pct > 0.90
549                    else ""
550                )
551                print(
552                    f"{idx0:5} {idx1:5} {distinct_values:14} {label:15} {largest_pct:0.4f} {msg}"
553                )
554
555                iopath = f"inputs {idx0} {idx1}"
556                self.iopath = iopath
557
558                assays = self.interactive_run()
559                all_assays.extend(assays.raw)
560
561        return AssayAnalysisList(all_assays)
562
563
564class AssayBuilder(object):
565    """Helps build an AssayConfig"""
566
567    def __init__(
568        self,
569        client: Optional["Client"],
570        name: str,
571        pipeline_id: int,
572        pipeline_name: str,
573        model_name: str,
574        baseline_start: datetime,
575        baseline_end: datetime,
576    ):
577        self.client = client
578        self.name = name
579        self.pipeline_id = pipeline_id
580        self.pipeline_name: str = pipeline_name
581        self.active = True
582        self.status = "created"
583        self.iopath: str = "output 0 0"
584        self.baseline: Optional[BaselineConfig] = None
585        self.window: Optional[WindowConfig] = None
586        self.summarizer: Optional[SummarizerConfig] = None
587        self.warning_threshold: Optional[float] = None
588        self.alert_threshold: float = 0.25
589        self.run_until: Optional[datetime] = None
590        self.workspace_id = (
591            None if self.client is None else self.client.get_current_workspace().id()
592        )
593
594        self.baseline_builder = (
595            FixedBaselineBuilder(self.pipeline_name)
596            .add_model_name(model_name)
597            .add_start(baseline_start)
598            .add_end(baseline_end)
599        )
600        self.window_builder_ = WindowBuilder(self.pipeline_name).add_model_name(
601            model_name
602        )
603
604        self.summarizer_builder = UnivariateContinousSummarizerBuilder()
605
606        self._baseline_df: Optional[pd.DataFrame] = None
607
608    def baseline_dataframe(self):
609        if self._baseline_df is None:
610            client = unwrap(self.client)
611            self._baseline_df = client.get_pipeline_inference_dataframe(
612                client.get_topic_name(self.pipeline_id),
613                unwrap(self.baseline_builder.start),
614                unwrap(self.baseline_builder.end),
615                self.baseline_builder.model_name,
616            )
617        return self._baseline_df
618
619    def baseline_histogram(
620        self, bins: Optional[Union[str, int]] = None, log_scale: bool = False
621    ):
622
623        df = self.baseline_dataframe()
624
625        n_bins = calc_bins(df.shape[0], bins)
626
627        col_name = self.iopath.replace(" ", "_")
628
629        # type inference for the bins param to histplot is incorrect: str vs str|int.
630        sns.histplot(data=df, x=col_name, bins=n_bins, log_scale=log_scale).set(  # type: ignore
631            title=f"Baseline '{self.iopath}' {self.baseline_builder.start} - {self.baseline_builder.end}"
632        )
633        plt.show()
634
635    def baseline_kde(self, log_scale: bool = False):
636        df = self.baseline_dataframe()
637
638        col_name = self.iopath.replace(" ", "_")
639
640        sns.kdeplot(data=df, x=col_name, log_scale=log_scale).set(
641            title=f"Baseline '{self.iopath}' {self.baseline_builder.start} - {self.baseline_builder.end}"
642        )
643        plt.grid()
644        plt.show()
645
646    def baseline_ecdf(self, log_scale: bool = False):
647        df = self.baseline_dataframe()
648
649        col_name = self.iopath.replace(" ", "_")
650
651        sns.ecdfplot(data=df, x=col_name, log_scale=log_scale).set(
652            title=f"Baseline '{self.iopath}' {self.baseline_builder.start} - {self.baseline_builder.end}"
653        )
654        plt.grid()
655        plt.show()
656
657    def build(self) -> AssayConfig:
658        self.baseline = self.baseline_builder.build()
659        self.window = self.window_builder_.build()
660        self.summarizer = self.summarizer_builder.build()
661
662        run_until = ensure_tz(self.run_until) if self.run_until else None
663
664        return AssayConfig(
665            self.client,
666            self.name,
667            self.pipeline_id,
668            self.pipeline_name,
669            self.active,
670            self.status,
671            self.iopath,
672            unwrap(self.baseline),
673            unwrap(self.window),
674            unwrap(self.summarizer),
675            self.warning_threshold,
676            self.alert_threshold,
677            run_until,
678            self.workspace_id,
679        )
680
681    def upload(self) -> int:
682        config = self.build()
683
684        if self.client:
685            res = self.client.upload_assay(config)  # type: ignore
686            return res
687        raise RuntimeError(
688            "Assay config was created for standalone and may only be used to generate configuration"
689        )
690
691    def add_name(self, name: str):
692        """Specify the assay name"""
693        self.name = name
694        return self
695
696    def add_active(self, active: bool):
697        """Specify if the assay is active or not"""
698        self.active = active
699        return self
700
701    def add_iopath(self, iopath: str):
702        """Specify what the assay should analyze. Should start with input or output and have
703        indexes (zero based) into row and column: For example 'input 0 1' specifies the second
704        column of the first input."""
705
706        iopath = iopath.strip()
707        assert iopath.lower().startswith("input") or iopath.lower().startswith("output")
708        self.iopath = iopath
709        self._baseline_df = None
710        return self
711
712    def fixed_baseline_builder(self):
713        """Specify creates a fixed baseline builder for this assay builder."""
714
715        bb = FixedBaselineBuilder(unwrap(self.pipeline_name))
716        self.baseline_builder = bb
717        return bb
718
719    def add_baseline(self, baseline: BaselineConfig):
720        """Adds a specific baseline created elsewhere."""
721        self.baseline = baseline
722        self._baseline_df = None
723        return self
724
725    def window_builder(self):
726        """Returns this assay builders window builder."""
727        return self.window_builder_
728
729    def add_window(self, window: WindowConfig):
730        """Adds a window created elsewhere."""
731        self.window = window
732        return self
733
734    def univariate_continuous_summarizer(self) -> UnivariateContinousSummarizerBuilder:
735        """Creates and adds an UCS to this assay builder."""
736        ucsb = UnivariateContinousSummarizerBuilder()
737        self.summarizer_builder = ucsb
738        return ucsb
739
740    def add_summarizer(self, summarizer: SummarizerConfig):
741        """Adds the summarizer created elsewhere to this builder."""
742        self.summarizer = summarizer
743        return self
744
745    def add_warning_threshold(self, warning_threshold: float):
746        """Specify the warning threshold for this assay."""
747        self.warning_threshold = warning_threshold
748        return self
749
750    def add_alert_threshold(self, alert_threshold: float):
751        """Specify the alert threshold for this assay."""
752        self.alert_threshold = alert_threshold
753        return self
754
755    def add_run_until(self, run_until: datetime):
756        """ "How long should this assay run. Primarily useful for
757        interactive runs to limit the number of analysis."""
758        self.run_until = run_until
759        return self
760
761
762def calc_bins(num_samples: int, bins: Optional[Union[str, int]]) -> Union[str, int]:
763    """If the users specifies a number of bins or a strategy for calculating
764    it use that. Else us the min of the square root or 50."""
765
766    if bins is None:
767        return min(int(math.sqrt(num_samples)), 50)
768    else:
769        return bins
def unwrap(v: Optional[~T]) -> ~T:
38def unwrap(v: Optional[T]) -> T:
39    """Simple function to placate pylance"""
40    if v:
41        return v
42    raise Exception("Expected a value in forced unwrap")

Simple function to placate pylance

class BaselineConfig:
45class BaselineConfig(object):
46    """Abstract base class for Baseline config objects. Currently
47    only FixedBaseline is implemented though SlidingBaseline and
48    others are planned."""
49
50    def __init__(self):
51        pass
52
53    def to_json(self) -> str:
54        return json.dumps(self, indent=4, default=ConfigEncoder)

Abstract base class for Baseline config objects. Currently only FixedBaseline is implemented though SlidingBaseline and others are planned.

BaselineConfig()
50    def __init__(self):
51        pass
def to_json(self) -> str:
53    def to_json(self) -> str:
54        return json.dumps(self, indent=4, default=ConfigEncoder)
class FixedBaseline(BaselineConfig):
57class FixedBaseline(BaselineConfig):
58    """The FixedBaseline is calculate from the inferences from a
59    specific time window."""
60
61    def __init__(
62        self, pipeline_name: str, model_name: str, start: datetime, end: datetime
63    ):
64        self.Fixed = {
65            "pipeline": pipeline_name,
66            "model": model_name,
67            "start_at": start.isoformat(),
68            "end_at": end.isoformat(),
69        }

The FixedBaseline is calculate from the inferences from a specific time window.

FixedBaseline( pipeline_name: str, model_name: str, start: datetime.datetime, end: datetime.datetime)
61    def __init__(
62        self, pipeline_name: str, model_name: str, start: datetime, end: datetime
63    ):
64        self.Fixed = {
65            "pipeline": pipeline_name,
66            "model": model_name,
67            "start_at": start.isoformat(),
68            "end_at": end.isoformat(),
69        }
Inherited Members
BaselineConfig
to_json
class BaselineBuilder(abc.ABC):
72class BaselineBuilder(ABC):
73    @abstractmethod
74    def build(self) -> BaselineConfig:
75        pass
76
77    def to_json(self) -> str:
78        return json.dumps(self, indent=4, default=ConfigEncoder)

Helper class that provides a standard way to create an ABC using inheritance.

@abstractmethod
def build(self) -> wallaroo.assay_config.BaselineConfig:
73    @abstractmethod
74    def build(self) -> BaselineConfig:
75        pass
def to_json(self) -> str:
77    def to_json(self) -> str:
78        return json.dumps(self, indent=4, default=ConfigEncoder)
def ensure_tz(d: datetime.datetime) -> datetime.datetime:
81def ensure_tz(d: datetime) -> datetime:
82    """Ensure the date it tz aware. If naive assume it is in utc."""
83    if d.tzinfo:
84        return d
85    else:
86        return d.astimezone(tz=timezone.utc)

Ensure the date it tz aware. If naive assume it is in utc.

class FixedBaselineBuilder(BaselineBuilder):
 89class FixedBaselineBuilder(BaselineBuilder):
 90    """Helps to easily create the config object for a FixedBaseline."""
 91
 92    def __init__(self, pipeline_name: str):
 93        self.pipeline_name = pipeline_name
 94        self.model_name: Optional[str] = None
 95        self.start: Optional[datetime] = None
 96        self.end: Optional[datetime] = None
 97
 98    def add_model_name(self, model_name: str):
 99        """Specify the model to use in the baseline"""
100        self.model_name = model_name
101        return self
102
103    def add_start(self, start: datetime):
104        """Specify the start of the window for the baseline"""
105        self.start = start
106        return self
107
108    def add_end(self, end: datetime):
109        """Specify the end of the window for the baseline"""
110        self.end = end
111        return self
112
113    def build(self) -> FixedBaseline:
114        """Create the FixedBaseline object."""
115        start = ensure_tz(unwrap(self.start))
116        end = ensure_tz(unwrap(self.end))
117
118        return FixedBaseline(self.pipeline_name, unwrap(self.model_name), start, end)

Helps to easily create the config object for a FixedBaseline.

FixedBaselineBuilder(pipeline_name: str)
92    def __init__(self, pipeline_name: str):
93        self.pipeline_name = pipeline_name
94        self.model_name: Optional[str] = None
95        self.start: Optional[datetime] = None
96        self.end: Optional[datetime] = None
def add_model_name(self, model_name: str):
 98    def add_model_name(self, model_name: str):
 99        """Specify the model to use in the baseline"""
100        self.model_name = model_name
101        return self

Specify the model to use in the baseline

def add_start(self, start: datetime.datetime):
103    def add_start(self, start: datetime):
104        """Specify the start of the window for the baseline"""
105        self.start = start
106        return self

Specify the start of the window for the baseline

def add_end(self, end: datetime.datetime):
108    def add_end(self, end: datetime):
109        """Specify the end of the window for the baseline"""
110        self.end = end
111        return self

Specify the end of the window for the baseline

def build(self) -> wallaroo.assay_config.FixedBaseline:
113    def build(self) -> FixedBaseline:
114        """Create the FixedBaseline object."""
115        start = ensure_tz(unwrap(self.start))
116        end = ensure_tz(unwrap(self.end))
117
118        return FixedBaseline(self.pipeline_name, unwrap(self.model_name), start, end)

Create the FixedBaseline object.

Inherited Members
BaselineBuilder
to_json
class SummarizerConfig:
121class SummarizerConfig(object):
122    """The summarizer specifies how the bins of the baseline and
123    window should be compared."""
124
125    def __init__(self):
126        pass
127
128    def to_json(self) -> str:
129        return json.dumps(self, indent=4, default=ConfigEncoder)

The summarizer specifies how the bins of the baseline and window should be compared.

SummarizerConfig()
125    def __init__(self):
126        pass
def to_json(self) -> str:
128    def to_json(self) -> str:
129        return json.dumps(self, indent=4, default=ConfigEncoder)
class BinMode(builtins.str, enum.Enum):
132class BinMode(str, Enum):
133    """How should we calculate the bins.
134    NONE - no bins. Only useful if we only care about the mean, median, etc.
135    EQUAL - evenly spaced bins: min - max / num_bins
136    QUANTILE - based on percentages. If num_bins is 5 then quintiles
137    so bins are created at the 20%, 40%, 60%, 80% and 100% points.
138    PROVIDED - user provides the edge points for the bins.
139    """
140
141    NONE = "None"
142    EQUAL = "Equal"
143    QUANTILE = "Quantile"
144    PROVIDED = "Provided"

How should we calculate the bins. NONE - no bins. Only useful if we only care about the mean, median, etc. EQUAL - evenly spaced bins: min - max / num_bins QUANTILE - based on percentages. If num_bins is 5 then quintiles so bins are created at the 20%, 40%, 60%, 80% and 100% points. PROVIDED - user provides the edge points for the bins.

NONE = <BinMode.NONE: 'None'>
EQUAL = <BinMode.EQUAL: 'Equal'>
QUANTILE = <BinMode.QUANTILE: 'Quantile'>
PROVIDED = <BinMode.PROVIDED: 'Provided'>
Inherited Members
enum.Enum
name
value
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
count
expandtabs
find
partition
index
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans
class Aggregation(builtins.str, enum.Enum):
147class Aggregation(str, Enum):
148    """What we use to calculate the score.
149    EDGES - distnces between the edges.
150    DENSITY - percentage of values that fall in each bin.
151    CUMULATIVE - cumulative percentage that fall in the bins."""
152
153    EDGES = "Edges"
154    DENSITY = "Density"
155    CUMULATIVE = "Cumulative"

What we use to calculate the score. EDGES - distnces between the edges. DENSITY - percentage of values that fall in each bin. CUMULATIVE - cumulative percentage that fall in the bins.

EDGES = <Aggregation.EDGES: 'Edges'>
DENSITY = <Aggregation.DENSITY: 'Density'>
CUMULATIVE = <Aggregation.CUMULATIVE: 'Cumulative'>
Inherited Members
enum.Enum
name
value
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
count
expandtabs
find
partition
index
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans
class Metric(builtins.str, enum.Enum):
158class Metric(str, Enum):
159    """How we calculate the score.
160    MAXDIFF - maximum difference between corresponding bins.
161    SUMDIFF - sum of differences between corresponding bins.
162    PSI - Population Stability Index"""
163
164    MAXDIFF = "MaxDiff"
165    SUMDIFF = "SumDiff"
166    PSI = "PSI"

How we calculate the score. MAXDIFF - maximum difference between corresponding bins. SUMDIFF - sum of differences between corresponding bins. PSI - Population Stability Index

MAXDIFF = <Metric.MAXDIFF: 'MaxDiff'>
SUMDIFF = <Metric.SUMDIFF: 'SumDiff'>
PSI = <Metric.PSI: 'PSI'>
Inherited Members
enum.Enum
name
value
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
count
expandtabs
find
partition
index
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans
class UnivariateContinousSummarizerConfig(SummarizerConfig):
169class UnivariateContinousSummarizerConfig(SummarizerConfig):
170    """The UnivariateContinousSummarizer analyizes one input or output feature
171    (Univariate) at a time. Expects the values to be continous or at least numerous
172    enough to fall in various/all the bins."""
173
174    def __init__(
175        self,
176        bin_mode: BinMode,
177        aggregation: Aggregation,
178        metric: Metric,
179        num_bins: int,
180        bin_weights: Optional[List[float]] = None,
181        bin_width: Optional[float] = None,
182        provided_edges: Optional[List[float]] = None,
183        add_outlier_edges: bool = True,
184    ):
185        self.type = "UnivariateContinuous"
186        self.bin_mode = bin_mode
187        self.aggregation = aggregation
188        self.metric = metric
189        self.num_bins = num_bins
190        self.bin_weights = bin_weights
191        self.bin_width = bin_width
192        self.provided_edges = provided_edges
193        self.add_outlier_edges = add_outlier_edges

The UnivariateContinousSummarizer analyizes one input or output feature (Univariate) at a time. Expects the values to be continous or at least numerous enough to fall in various/all the bins.

UnivariateContinousSummarizerConfig( bin_mode: wallaroo.assay_config.BinMode, aggregation: wallaroo.assay_config.Aggregation, metric: wallaroo.assay_config.Metric, num_bins: int, bin_weights: Optional[List[float]] = None, bin_width: Optional[float] = None, provided_edges: Optional[List[float]] = None, add_outlier_edges: bool = True)
174    def __init__(
175        self,
176        bin_mode: BinMode,
177        aggregation: Aggregation,
178        metric: Metric,
179        num_bins: int,
180        bin_weights: Optional[List[float]] = None,
181        bin_width: Optional[float] = None,
182        provided_edges: Optional[List[float]] = None,
183        add_outlier_edges: bool = True,
184    ):
185        self.type = "UnivariateContinuous"
186        self.bin_mode = bin_mode
187        self.aggregation = aggregation
188        self.metric = metric
189        self.num_bins = num_bins
190        self.bin_weights = bin_weights
191        self.bin_width = bin_width
192        self.provided_edges = provided_edges
193        self.add_outlier_edges = add_outlier_edges
Inherited Members
SummarizerConfig
to_json
class SummarizerBuilder(abc.ABC):
196class SummarizerBuilder(ABC):
197    @abstractmethod
198    def build(self) -> SummarizerConfig:
199        pass

Helper class that provides a standard way to create an ABC using inheritance.

@abstractmethod
def build(self) -> wallaroo.assay_config.SummarizerConfig:
197    @abstractmethod
198    def build(self) -> SummarizerConfig:
199        pass
class UnivariateContinousSummarizerBuilder(SummarizerBuilder):
202class UnivariateContinousSummarizerBuilder(SummarizerBuilder):
203    """Builds the UnviariateSummarizer"""
204
205    def __init__(self):
206        self.bin_mode = BinMode.QUANTILE
207        self.aggregation = Aggregation.DENSITY
208        self.metric = Metric.PSI
209        self.num_bins = 5
210        self.bin_weights: Optional[List[float]] = None
211        self.bin_width: Optional[float] = None
212        self.provided_edges: Optional[List[float]] = None
213        self.add_outlier_edges = True
214
215    def build(self) -> UnivariateContinousSummarizerConfig:
216        if self.bin_mode == BinMode.PROVIDED:
217            if self.provided_edges is None:
218                raise ValueError("Edges must be provided with BinMode.PROVIDED")
219        else:
220            if self.provided_edges is not None:
221                raise ValueError(
222                    f"Edges may not be provided with bin mode {self.bin_mode}"
223                )
224
225        sum = UnivariateContinousSummarizerConfig(
226            self.bin_mode,
227            self.aggregation,
228            self.metric,
229            self.num_bins,
230            self.bin_weights,
231            self.bin_width,
232            self.provided_edges,
233            self.add_outlier_edges,
234        )
235        return sum
236
237    def add_bin_mode(self, bin_mode: BinMode, edges: Optional[List[float]] = None):
238        """Sets the binning mode. If BinMode.PROVIDED is specified a list of edges
239        is also required."""
240        if bin_mode == BinMode.PROVIDED:
241            if edges is None:
242                raise ValueError("Edges must be provided with BinMode.PROVIDED")
243
244        self.bin_mode = bin_mode
245        self.add_bin_edges(edges)
246        return self
247
248    def add_num_bins(self, num_bins: int):
249        """Sets the number of bins. If weights have been previously set they
250        must be set to none to allow changing the number of bins."""
251
252        if num_bins != self.num_bins and self.bin_weights is not None:
253            if num_bins + 2 != len(self.bin_weights):
254                msg = (
255                    f"({len(self.bin_weights)}) have already been set. "
256                    + f"Please set them to None before changing the number of bins."
257                )
258                raise ValueError(msg)
259
260        if num_bins != self.num_bins and self.provided_edges is not None:
261            if not (
262                len(self.provided_edges) == num_bins
263                or len(self.provided_edges) == num_bins + 1
264            ):
265                msg = (
266                    f"({len(self.provided_edges)}) bin edges have already been set. "
267                    + f"Please set them to None before changing the number of bins."
268                )
269                raise ValueError(msg)
270
271        self.num_bins = num_bins
272        return self
273
274    def add_bin_weights(self, weights: Union[List[float], None]):
275        """Specifies the weighting to be given to the bins. The number of weights
276        must be 2 larger than the number of bins to accomodate outliers smaller
277        and outliers larger than values seen in the baseline.
278        The passed in values can be whole or real numbers and do not need to add
279        up to 1 or any other specific value as they will be normalized during the
280        score calculation phase.
281        The weights passed in can be none to remove previously specified weights
282        and to allow changing of the number of bins."""
283
284        if weights is not None:
285            if self.num_bins + 2 != len(weights):
286                msg = (
287                    f"The number of weights ({len(weights)}) "
288                    + f"must be 2 more ({self.num_bins + 2}) than the "
289                    + f"number of bins ({self.num_bins}) to allow for the "
290                    + f"left and right outlier bins."
291                )
292                raise ValueError(msg)
293        self.bin_weights = weights
294        return self
295
296    def add_metric(self, metric: Metric):
297        """Sets the metric mode."""
298        self.metric = metric
299        return self
300
301    def add_aggregation(self, aggregation: Aggregation):
302        """Sets the aggregation style."""
303        self.aggregation = aggregation
304        return self
305
306    def add_bin_edges(self, edges: Union[List[float], None]):
307        """Specifies the right hand side (max value) of the bins. The number
308        of edges must be equal to or one more than the number of bins. When
309        equal to the number of bins the edge for the left outlier bin is
310        calculated from the baseline. When an additional edge (one more than
311        number of bins) that first (lower) value is used as the max value for
312        the left outlier bin.  The max value for the right hand outlier bin is
313        always Float MAX.
314        """
315
316        if edges is not None:
317            if not (len(edges) == self.num_bins or len(edges) == self.num_bins + 1):
318                msg = (
319                    f"The number of edges ({len(edges)}) "
320                    + f"must be equal to ({self.num_bins}) or one more "
321                    + f"({self.num_bins + 1 }) than the number of bins to account "
322                    + f"for the left outlier bin."
323                )
324                raise ValueError(msg)
325            edges = sorted(edges)
326
327        self.provided_edges = edges
328        return self

Builds the UnviariateSummarizer

UnivariateContinousSummarizerBuilder()
205    def __init__(self):
206        self.bin_mode = BinMode.QUANTILE
207        self.aggregation = Aggregation.DENSITY
208        self.metric = Metric.PSI
209        self.num_bins = 5
210        self.bin_weights: Optional[List[float]] = None
211        self.bin_width: Optional[float] = None
212        self.provided_edges: Optional[List[float]] = None
213        self.add_outlier_edges = True
215    def build(self) -> UnivariateContinousSummarizerConfig:
216        if self.bin_mode == BinMode.PROVIDED:
217            if self.provided_edges is None:
218                raise ValueError("Edges must be provided with BinMode.PROVIDED")
219        else:
220            if self.provided_edges is not None:
221                raise ValueError(
222                    f"Edges may not be provided with bin mode {self.bin_mode}"
223                )
224
225        sum = UnivariateContinousSummarizerConfig(
226            self.bin_mode,
227            self.aggregation,
228            self.metric,
229            self.num_bins,
230            self.bin_weights,
231            self.bin_width,
232            self.provided_edges,
233            self.add_outlier_edges,
234        )
235        return sum
def add_bin_mode( self, bin_mode: wallaroo.assay_config.BinMode, edges: Optional[List[float]] = None):
237    def add_bin_mode(self, bin_mode: BinMode, edges: Optional[List[float]] = None):
238        """Sets the binning mode. If BinMode.PROVIDED is specified a list of edges
239        is also required."""
240        if bin_mode == BinMode.PROVIDED:
241            if edges is None:
242                raise ValueError("Edges must be provided with BinMode.PROVIDED")
243
244        self.bin_mode = bin_mode
245        self.add_bin_edges(edges)
246        return self

Sets the binning mode. If BinMode.PROVIDED is specified a list of edges is also required.

def add_num_bins(self, num_bins: int):
248    def add_num_bins(self, num_bins: int):
249        """Sets the number of bins. If weights have been previously set they
250        must be set to none to allow changing the number of bins."""
251
252        if num_bins != self.num_bins and self.bin_weights is not None:
253            if num_bins + 2 != len(self.bin_weights):
254                msg = (
255                    f"({len(self.bin_weights)}) have already been set. "
256                    + f"Please set them to None before changing the number of bins."
257                )
258                raise ValueError(msg)
259
260        if num_bins != self.num_bins and self.provided_edges is not None:
261            if not (
262                len(self.provided_edges) == num_bins
263                or len(self.provided_edges) == num_bins + 1
264            ):
265                msg = (
266                    f"({len(self.provided_edges)}) bin edges have already been set. "
267                    + f"Please set them to None before changing the number of bins."
268                )
269                raise ValueError(msg)
270
271        self.num_bins = num_bins
272        return self

Sets the number of bins. If weights have been previously set they must be set to none to allow changing the number of bins.

def add_bin_weights(self, weights: Optional[List[float]]):
274    def add_bin_weights(self, weights: Union[List[float], None]):
275        """Specifies the weighting to be given to the bins. The number of weights
276        must be 2 larger than the number of bins to accomodate outliers smaller
277        and outliers larger than values seen in the baseline.
278        The passed in values can be whole or real numbers and do not need to add
279        up to 1 or any other specific value as they will be normalized during the
280        score calculation phase.
281        The weights passed in can be none to remove previously specified weights
282        and to allow changing of the number of bins."""
283
284        if weights is not None:
285            if self.num_bins + 2 != len(weights):
286                msg = (
287                    f"The number of weights ({len(weights)}) "
288                    + f"must be 2 more ({self.num_bins + 2}) than the "
289                    + f"number of bins ({self.num_bins}) to allow for the "
290                    + f"left and right outlier bins."
291                )
292                raise ValueError(msg)
293        self.bin_weights = weights
294        return self

Specifies the weighting to be given to the bins. The number of weights must be 2 larger than the number of bins to accomodate outliers smaller and outliers larger than values seen in the baseline. The passed in values can be whole or real numbers and do not need to add up to 1 or any other specific value as they will be normalized during the score calculation phase. The weights passed in can be none to remove previously specified weights and to allow changing of the number of bins.

def add_metric(self, metric: wallaroo.assay_config.Metric):
296    def add_metric(self, metric: Metric):
297        """Sets the metric mode."""
298        self.metric = metric
299        return self

Sets the metric mode.

def add_aggregation(self, aggregation: wallaroo.assay_config.Aggregation):
301    def add_aggregation(self, aggregation: Aggregation):
302        """Sets the aggregation style."""
303        self.aggregation = aggregation
304        return self

Sets the aggregation style.

def add_bin_edges(self, edges: Optional[List[float]]):
306    def add_bin_edges(self, edges: Union[List[float], None]):
307        """Specifies the right hand side (max value) of the bins. The number
308        of edges must be equal to or one more than the number of bins. When
309        equal to the number of bins the edge for the left outlier bin is
310        calculated from the baseline. When an additional edge (one more than
311        number of bins) that first (lower) value is used as the max value for
312        the left outlier bin.  The max value for the right hand outlier bin is
313        always Float MAX.
314        """
315
316        if edges is not None:
317            if not (len(edges) == self.num_bins or len(edges) == self.num_bins + 1):
318                msg = (
319                    f"The number of edges ({len(edges)}) "
320                    + f"must be equal to ({self.num_bins}) or one more "
321                    + f"({self.num_bins + 1 }) than the number of bins to account "
322                    + f"for the left outlier bin."
323                )
324                raise ValueError(msg)
325            edges = sorted(edges)
326
327        self.provided_edges = edges
328        return self

Specifies the right hand side (max value) of the bins. The number of edges must be equal to or one more than the number of bins. When equal to the number of bins the edge for the left outlier bin is calculated from the baseline. When an additional edge (one more than number of bins) that first (lower) value is used as the max value for the left outlier bin. The max value for the right hand outlier bin is always Float MAX.

class WindowConfig:
331class WindowConfig(object):
332    """Configures a window to be compared against the baseline."""
333
334    def __init__(
335        self,
336        pipeline_name: str,
337        model_name: str,
338        width: str,
339        start: Optional[datetime] = None,
340        interval: Optional[str] = None,
341    ):
342        self.pipeline = pipeline_name
343        self.model = model_name
344        self.width = width
345        self.start = start
346        self.interval = interval
347
348    def to_json(self) -> str:
349        return json.dumps(self, indent=4, default=ConfigEncoder)

Configures a window to be compared against the baseline.

WindowConfig( pipeline_name: str, model_name: str, width: str, start: Optional[datetime.datetime] = None, interval: Optional[str] = None)
334    def __init__(
335        self,
336        pipeline_name: str,
337        model_name: str,
338        width: str,
339        start: Optional[datetime] = None,
340        interval: Optional[str] = None,
341    ):
342        self.pipeline = pipeline_name
343        self.model = model_name
344        self.width = width
345        self.start = start
346        self.interval = interval
def to_json(self) -> str:
348    def to_json(self) -> str:
349        return json.dumps(self, indent=4, default=ConfigEncoder)
class WindowBuilder:
352class WindowBuilder(object):
353    """Helps build a WindowConfig. model and width are required but there are no
354    good default values for them because they depend on the baseline. We leave it
355    up to the assay builder to configure the window correctly after it is created.
356    """
357
358    def __init__(self, pipeline_name: str):
359        self.pipeline = pipeline_name
360        self.model: Optional[str] = None
361        self.width: Optional[str] = "24 hours"
362        self.start: Optional[datetime] = None
363        self.interval: Optional[str] = None
364
365    def add_model_name(self, model_name: str):
366        """The model name (model_id) that the window should analyze."""
367        self.model = model_name
368        return self
369
370    def _duration_kw_to_str(self, **kwargs) -> str:
371        interval_names = ["minute", "hour", "day", "week"]
372        duration_str = None
373        kw_count = 0
374
375        for interval_name in interval_names:
376            plural = interval_name + "s"
377
378            for kw in [interval_name, plural]:
379                if kw in kwargs:
380                    duration_str = f"{kwargs[kw]} {plural}"
381                    kw_count += 1
382
383        if kw_count == 0:
384            raise Exception(
385                "Please specify one of 'minutes', 'hours', 'days' or 'weeks' keyword args"
386            )
387
388        elif kw_count > 1:
389            raise Exception(
390                "Please specify only one of 'minutes', 'hours', 'days' or 'weeks' keyword args"
391            )
392        else:
393            return unwrap(duration_str)
394
395    def add_width(self, **kwargs: int):
396        """The width of the window to use when collecting data for analysis."""
397        self.width = self._duration_kw_to_str(**kwargs)
398        return self
399
400    def add_interval(self, **kwargs: int):
401        """The width of the window to use when collecting data for analysis."""
402        self.interval = self._duration_kw_to_str(**kwargs)
403        return self
404
405    def add_start(self, start: datetime):
406        self.start = start
407        return self
408
409    def build(self) -> WindowConfig:
410        start = ensure_tz(self.start) if self.start else None
411
412        return WindowConfig(
413            self.pipeline,
414            unwrap(self.model),
415            unwrap(self.width),
416            start,
417            self.interval,
418        )

Helps build a WindowConfig. model and width are required but there are no good default values for them because they depend on the baseline. We leave it up to the assay builder to configure the window correctly after it is created.

WindowBuilder(pipeline_name: str)
358    def __init__(self, pipeline_name: str):
359        self.pipeline = pipeline_name
360        self.model: Optional[str] = None
361        self.width: Optional[str] = "24 hours"
362        self.start: Optional[datetime] = None
363        self.interval: Optional[str] = None
def add_model_name(self, model_name: str):
365    def add_model_name(self, model_name: str):
366        """The model name (model_id) that the window should analyze."""
367        self.model = model_name
368        return self

The model name (model_id) that the window should analyze.

def add_width(self, **kwargs: int):
395    def add_width(self, **kwargs: int):
396        """The width of the window to use when collecting data for analysis."""
397        self.width = self._duration_kw_to_str(**kwargs)
398        return self

The width of the window to use when collecting data for analysis.

def add_interval(self, **kwargs: int):
400    def add_interval(self, **kwargs: int):
401        """The width of the window to use when collecting data for analysis."""
402        self.interval = self._duration_kw_to_str(**kwargs)
403        return self

The width of the window to use when collecting data for analysis.

def add_start(self, start: datetime.datetime):
405    def add_start(self, start: datetime):
406        self.start = start
407        return self
def build(self) -> wallaroo.assay_config.WindowConfig:
409    def build(self) -> WindowConfig:
410        start = ensure_tz(self.start) if self.start else None
411
412        return WindowConfig(
413            self.pipeline,
414            unwrap(self.model),
415            unwrap(self.width),
416            start,
417            self.interval,
418        )
def ConfigEncoder(o):
421def ConfigEncoder(o):
422    """Used to format datetimes as we need when encoding to JSON"""
423    if isinstance(o, datetime):
424        return o.isoformat()
425    else:
426        return o.__dict__

Used to format datetimes as we need when encoding to JSON

class AssayConfig:
429class AssayConfig(object):
430    """Configuration for an Assay record."""
431
432    def __init__(
433        self,
434        client: Optional["Client"],
435        name: str,
436        pipeline_id: int,
437        pipeline_name: str,
438        active: bool,
439        status: str,
440        iopath: str,
441        baseline: BaselineConfig,
442        window: WindowConfig,
443        summarizer: SummarizerConfig,
444        warning_threshold: Optional[float],
445        alert_threshold: float,
446        run_until: Optional[datetime],
447        workspace_id: Optional[int],
448    ):
449        self.client = client
450        self.name = name
451        self.pipeline_id = pipeline_id
452        self.pipeline_name = pipeline_name
453        self.active = active
454        self.status = status
455        self.iopath = iopath
456        self.baseline = baseline
457        self.window = window
458        self.summarizer = summarizer
459        self.warning_threshold = warning_threshold
460        self.alert_threshold = alert_threshold
461        self.run_until = run_until
462        self.workspace_id = workspace_id
463
464    def to_json(self) -> str:
465        payload = self.__dict__.copy()
466        payload.pop("client", None)
467        payload.pop("model_insights_url", None)
468        return json.dumps(payload, indent=4, default=ConfigEncoder)
469
470    def interactive_run(self) -> AssayAnalysisList:
471        """Runs this assay interactively. The assay is not saved to the database
472        nor are analyis records saved to a Plateau topic. Useful for exploring
473        pipeline inference data and experimenting with thresholds."""
474
475        client = unwrap(self.client)
476        payload = {
477            **json.loads(self.to_json()),
478            "created_at": datetime.now(timezone.utc).isoformat(),
479        }
480        mlops_client = client.mlops()
481        mlops_client.timeout = 5 * 60
482        ret = assays_run_interactive.sync(
483            client=mlops_client,
484            json_body=AssaysRunInteractiveJsonBody.from_dict(payload),
485        )
486
487        analysis_list = []
488        if ret is not None:
489            if not isinstance(ret, List):
490                raise Exception(ret.msg)
491
492            analysis_list = [AssayAnalysis(ar.to_dict()) for ar in ret]
493
494        return AssayAnalysisList(analysis_list)
495
496    def interactive_baseline_run(self) -> Optional[AssayAnalysis]:
497
498        client = unwrap(self.client)
499        payload = {
500            **json.loads(self.to_json()),
501            "created_at": datetime.now(timezone.utc).isoformat(),
502        }
503        ret = assays_run_interactive_baseline.sync(
504            client=client.mlops(),
505            json_body=AssaysRunInteractiveBaselineJsonBody.from_dict(payload),
506        )
507
508        if ret is not None:
509            if not isinstance(ret, AssaysRunInteractiveBaselineResponse200):
510                raise Exception(ret.msg)
511
512            aa = ret.to_dict()
513            return AssayAnalysis(aa)
514
515        return None
516
517    def interactive_input_run(
518        self, inferences: List[Dict], labels: Optional[List[str]]
519    ) -> AssayAnalysisList:
520        """Analyzes the inputs given to create an interactive run for each feature
521        column. The assay is not saved to the database nor are analyis records saved
522        to a Plateau topic. Usefull for exploring inputs for possible causes when a
523        difference is detected in the output."""
524
525        all_assays = []
526        inference = inferences[0]
527
528        print(f"input column distinct_vals label           largest_pct")
529        # TODO extend this to work for any input shape
530        inputs = inference["original_data"]["tensor"]
531        for idx0, _ in enumerate(inputs):
532            if labels and len(inputs[idx0]) != len(labels):
533                print(
534                    f"Labels are not the same len {len(labels)} as inputs {len(inference['inputs'][idx0])}"
535                )
536            for idx1, _ in enumerate(inputs[idx0]):
537                values = []
538                for inf in inferences:
539                    values.append(inf["original_data"]["tensor"][idx0][idx1])
540                counter = Counter(values)
541                value_pct = [c / len(values) for c in counter.values()]
542                value_pct.sort()
543                largest_pct = value_pct[-1]
544                distinct_values = len(counter.keys())
545                label = labels[idx1] if labels else ""
546                # TODO: Rule of thumb may need better way to distinguish
547                msg = (
548                    "*** May not be continuous feature"
549                    if distinct_values < 5 or largest_pct > 0.90
550                    else ""
551                )
552                print(
553                    f"{idx0:5} {idx1:5} {distinct_values:14} {label:15} {largest_pct:0.4f} {msg}"
554                )
555
556                iopath = f"inputs {idx0} {idx1}"
557                self.iopath = iopath
558
559                assays = self.interactive_run()
560                all_assays.extend(assays.raw)
561
562        return AssayAnalysisList(all_assays)

Configuration for an Assay record.

AssayConfig( client: Optional[wallaroo.client.Client], name: str, pipeline_id: int, pipeline_name: str, active: bool, status: str, iopath: str, baseline: wallaroo.assay_config.BaselineConfig, window: wallaroo.assay_config.WindowConfig, summarizer: wallaroo.assay_config.SummarizerConfig, warning_threshold: Optional[float], alert_threshold: float, run_until: Optional[datetime.datetime], workspace_id: Optional[int])
432    def __init__(
433        self,
434        client: Optional["Client"],
435        name: str,
436        pipeline_id: int,
437        pipeline_name: str,
438        active: bool,
439        status: str,
440        iopath: str,
441        baseline: BaselineConfig,
442        window: WindowConfig,
443        summarizer: SummarizerConfig,
444        warning_threshold: Optional[float],
445        alert_threshold: float,
446        run_until: Optional[datetime],
447        workspace_id: Optional[int],
448    ):
449        self.client = client
450        self.name = name
451        self.pipeline_id = pipeline_id
452        self.pipeline_name = pipeline_name
453        self.active = active
454        self.status = status
455        self.iopath = iopath
456        self.baseline = baseline
457        self.window = window
458        self.summarizer = summarizer
459        self.warning_threshold = warning_threshold
460        self.alert_threshold = alert_threshold
461        self.run_until = run_until
462        self.workspace_id = workspace_id
def to_json(self) -> str:
464    def to_json(self) -> str:
465        payload = self.__dict__.copy()
466        payload.pop("client", None)
467        payload.pop("model_insights_url", None)
468        return json.dumps(payload, indent=4, default=ConfigEncoder)
def interactive_run(self) -> wallaroo.assay.AssayAnalysisList:
470    def interactive_run(self) -> AssayAnalysisList:
471        """Runs this assay interactively. The assay is not saved to the database
472        nor are analyis records saved to a Plateau topic. Useful for exploring
473        pipeline inference data and experimenting with thresholds."""
474
475        client = unwrap(self.client)
476        payload = {
477            **json.loads(self.to_json()),
478            "created_at": datetime.now(timezone.utc).isoformat(),
479        }
480        mlops_client = client.mlops()
481        mlops_client.timeout = 5 * 60
482        ret = assays_run_interactive.sync(
483            client=mlops_client,
484            json_body=AssaysRunInteractiveJsonBody.from_dict(payload),
485        )
486
487        analysis_list = []
488        if ret is not None:
489            if not isinstance(ret, List):
490                raise Exception(ret.msg)
491
492            analysis_list = [AssayAnalysis(ar.to_dict()) for ar in ret]
493
494        return AssayAnalysisList(analysis_list)

Runs this assay interactively. The assay is not saved to the database nor are analyis records saved to a Plateau topic. Useful for exploring pipeline inference data and experimenting with thresholds.

def interactive_baseline_run(self) -> Optional[wallaroo.assay.AssayAnalysis]:
496    def interactive_baseline_run(self) -> Optional[AssayAnalysis]:
497
498        client = unwrap(self.client)
499        payload = {
500            **json.loads(self.to_json()),
501            "created_at": datetime.now(timezone.utc).isoformat(),
502        }
503        ret = assays_run_interactive_baseline.sync(
504            client=client.mlops(),
505            json_body=AssaysRunInteractiveBaselineJsonBody.from_dict(payload),
506        )
507
508        if ret is not None:
509            if not isinstance(ret, AssaysRunInteractiveBaselineResponse200):
510                raise Exception(ret.msg)
511
512            aa = ret.to_dict()
513            return AssayAnalysis(aa)
514
515        return None
def interactive_input_run( self, inferences: List[Dict], labels: Optional[List[str]]) -> wallaroo.assay.AssayAnalysisList:
517    def interactive_input_run(
518        self, inferences: List[Dict], labels: Optional[List[str]]
519    ) -> AssayAnalysisList:
520        """Analyzes the inputs given to create an interactive run for each feature
521        column. The assay is not saved to the database nor are analyis records saved
522        to a Plateau topic. Usefull for exploring inputs for possible causes when a
523        difference is detected in the output."""
524
525        all_assays = []
526        inference = inferences[0]
527
528        print(f"input column distinct_vals label           largest_pct")
529        # TODO extend this to work for any input shape
530        inputs = inference["original_data"]["tensor"]
531        for idx0, _ in enumerate(inputs):
532            if labels and len(inputs[idx0]) != len(labels):
533                print(
534                    f"Labels are not the same len {len(labels)} as inputs {len(inference['inputs'][idx0])}"
535                )
536            for idx1, _ in enumerate(inputs[idx0]):
537                values = []
538                for inf in inferences:
539                    values.append(inf["original_data"]["tensor"][idx0][idx1])
540                counter = Counter(values)
541                value_pct = [c / len(values) for c in counter.values()]
542                value_pct.sort()
543                largest_pct = value_pct[-1]
544                distinct_values = len(counter.keys())
545                label = labels[idx1] if labels else ""
546                # TODO: Rule of thumb may need better way to distinguish
547                msg = (
548                    "*** May not be continuous feature"
549                    if distinct_values < 5 or largest_pct > 0.90
550                    else ""
551                )
552                print(
553                    f"{idx0:5} {idx1:5} {distinct_values:14} {label:15} {largest_pct:0.4f} {msg}"
554                )
555
556                iopath = f"inputs {idx0} {idx1}"
557                self.iopath = iopath
558
559                assays = self.interactive_run()
560                all_assays.extend(assays.raw)
561
562        return AssayAnalysisList(all_assays)

Analyzes the inputs given to create an interactive run for each feature column. The assay is not saved to the database nor are analyis records saved to a Plateau topic. Usefull for exploring inputs for possible causes when a difference is detected in the output.

class AssayBuilder:
565class AssayBuilder(object):
566    """Helps build an AssayConfig"""
567
568    def __init__(
569        self,
570        client: Optional["Client"],
571        name: str,
572        pipeline_id: int,
573        pipeline_name: str,
574        model_name: str,
575        baseline_start: datetime,
576        baseline_end: datetime,
577    ):
578        self.client = client
579        self.name = name
580        self.pipeline_id = pipeline_id
581        self.pipeline_name: str = pipeline_name
582        self.active = True
583        self.status = "created"
584        self.iopath: str = "output 0 0"
585        self.baseline: Optional[BaselineConfig] = None
586        self.window: Optional[WindowConfig] = None
587        self.summarizer: Optional[SummarizerConfig] = None
588        self.warning_threshold: Optional[float] = None
589        self.alert_threshold: float = 0.25
590        self.run_until: Optional[datetime] = None
591        self.workspace_id = (
592            None if self.client is None else self.client.get_current_workspace().id()
593        )
594
595        self.baseline_builder = (
596            FixedBaselineBuilder(self.pipeline_name)
597            .add_model_name(model_name)
598            .add_start(baseline_start)
599            .add_end(baseline_end)
600        )
601        self.window_builder_ = WindowBuilder(self.pipeline_name).add_model_name(
602            model_name
603        )
604
605        self.summarizer_builder = UnivariateContinousSummarizerBuilder()
606
607        self._baseline_df: Optional[pd.DataFrame] = None
608
609    def baseline_dataframe(self):
610        if self._baseline_df is None:
611            client = unwrap(self.client)
612            self._baseline_df = client.get_pipeline_inference_dataframe(
613                client.get_topic_name(self.pipeline_id),
614                unwrap(self.baseline_builder.start),
615                unwrap(self.baseline_builder.end),
616                self.baseline_builder.model_name,
617            )
618        return self._baseline_df
619
620    def baseline_histogram(
621        self, bins: Optional[Union[str, int]] = None, log_scale: bool = False
622    ):
623
624        df = self.baseline_dataframe()
625
626        n_bins = calc_bins(df.shape[0], bins)
627
628        col_name = self.iopath.replace(" ", "_")
629
630        # type inference for the bins param to histplot is incorrect: str vs str|int.
631        sns.histplot(data=df, x=col_name, bins=n_bins, log_scale=log_scale).set(  # type: ignore
632            title=f"Baseline '{self.iopath}' {self.baseline_builder.start} - {self.baseline_builder.end}"
633        )
634        plt.show()
635
636    def baseline_kde(self, log_scale: bool = False):
637        df = self.baseline_dataframe()
638
639        col_name = self.iopath.replace(" ", "_")
640
641        sns.kdeplot(data=df, x=col_name, log_scale=log_scale).set(
642            title=f"Baseline '{self.iopath}' {self.baseline_builder.start} - {self.baseline_builder.end}"
643        )
644        plt.grid()
645        plt.show()
646
647    def baseline_ecdf(self, log_scale: bool = False):
648        df = self.baseline_dataframe()
649
650        col_name = self.iopath.replace(" ", "_")
651
652        sns.ecdfplot(data=df, x=col_name, log_scale=log_scale).set(
653            title=f"Baseline '{self.iopath}' {self.baseline_builder.start} - {self.baseline_builder.end}"
654        )
655        plt.grid()
656        plt.show()
657
658    def build(self) -> AssayConfig:
659        self.baseline = self.baseline_builder.build()
660        self.window = self.window_builder_.build()
661        self.summarizer = self.summarizer_builder.build()
662
663        run_until = ensure_tz(self.run_until) if self.run_until else None
664
665        return AssayConfig(
666            self.client,
667            self.name,
668            self.pipeline_id,
669            self.pipeline_name,
670            self.active,
671            self.status,
672            self.iopath,
673            unwrap(self.baseline),
674            unwrap(self.window),
675            unwrap(self.summarizer),
676            self.warning_threshold,
677            self.alert_threshold,
678            run_until,
679            self.workspace_id,
680        )
681
682    def upload(self) -> int:
683        config = self.build()
684
685        if self.client:
686            res = self.client.upload_assay(config)  # type: ignore
687            return res
688        raise RuntimeError(
689            "Assay config was created for standalone and may only be used to generate configuration"
690        )
691
692    def add_name(self, name: str):
693        """Specify the assay name"""
694        self.name = name
695        return self
696
697    def add_active(self, active: bool):
698        """Specify if the assay is active or not"""
699        self.active = active
700        return self
701
702    def add_iopath(self, iopath: str):
703        """Specify what the assay should analyze. Should start with input or output and have
704        indexes (zero based) into row and column: For example 'input 0 1' specifies the second
705        column of the first input."""
706
707        iopath = iopath.strip()
708        assert iopath.lower().startswith("input") or iopath.lower().startswith("output")
709        self.iopath = iopath
710        self._baseline_df = None
711        return self
712
713    def fixed_baseline_builder(self):
714        """Specify creates a fixed baseline builder for this assay builder."""
715
716        bb = FixedBaselineBuilder(unwrap(self.pipeline_name))
717        self.baseline_builder = bb
718        return bb
719
720    def add_baseline(self, baseline: BaselineConfig):
721        """Adds a specific baseline created elsewhere."""
722        self.baseline = baseline
723        self._baseline_df = None
724        return self
725
726    def window_builder(self):
727        """Returns this assay builders window builder."""
728        return self.window_builder_
729
730    def add_window(self, window: WindowConfig):
731        """Adds a window created elsewhere."""
732        self.window = window
733        return self
734
735    def univariate_continuous_summarizer(self) -> UnivariateContinousSummarizerBuilder:
736        """Creates and adds an UCS to this assay builder."""
737        ucsb = UnivariateContinousSummarizerBuilder()
738        self.summarizer_builder = ucsb
739        return ucsb
740
741    def add_summarizer(self, summarizer: SummarizerConfig):
742        """Adds the summarizer created elsewhere to this builder."""
743        self.summarizer = summarizer
744        return self
745
746    def add_warning_threshold(self, warning_threshold: float):
747        """Specify the warning threshold for this assay."""
748        self.warning_threshold = warning_threshold
749        return self
750
751    def add_alert_threshold(self, alert_threshold: float):
752        """Specify the alert threshold for this assay."""
753        self.alert_threshold = alert_threshold
754        return self
755
756    def add_run_until(self, run_until: datetime):
757        """ "How long should this assay run. Primarily useful for
758        interactive runs to limit the number of analysis."""
759        self.run_until = run_until
760        return self

Helps build an AssayConfig

AssayBuilder( client: Optional[wallaroo.client.Client], name: str, pipeline_id: int, pipeline_name: str, model_name: str, baseline_start: datetime.datetime, baseline_end: datetime.datetime)
568    def __init__(
569        self,
570        client: Optional["Client"],
571        name: str,
572        pipeline_id: int,
573        pipeline_name: str,
574        model_name: str,
575        baseline_start: datetime,
576        baseline_end: datetime,
577    ):
578        self.client = client
579        self.name = name
580        self.pipeline_id = pipeline_id
581        self.pipeline_name: str = pipeline_name
582        self.active = True
583        self.status = "created"
584        self.iopath: str = "output 0 0"
585        self.baseline: Optional[BaselineConfig] = None
586        self.window: Optional[WindowConfig] = None
587        self.summarizer: Optional[SummarizerConfig] = None
588        self.warning_threshold: Optional[float] = None
589        self.alert_threshold: float = 0.25
590        self.run_until: Optional[datetime] = None
591        self.workspace_id = (
592            None if self.client is None else self.client.get_current_workspace().id()
593        )
594
595        self.baseline_builder = (
596            FixedBaselineBuilder(self.pipeline_name)
597            .add_model_name(model_name)
598            .add_start(baseline_start)
599            .add_end(baseline_end)
600        )
601        self.window_builder_ = WindowBuilder(self.pipeline_name).add_model_name(
602            model_name
603        )
604
605        self.summarizer_builder = UnivariateContinousSummarizerBuilder()
606
607        self._baseline_df: Optional[pd.DataFrame] = None
def baseline_dataframe(self):
609    def baseline_dataframe(self):
610        if self._baseline_df is None:
611            client = unwrap(self.client)
612            self._baseline_df = client.get_pipeline_inference_dataframe(
613                client.get_topic_name(self.pipeline_id),
614                unwrap(self.baseline_builder.start),
615                unwrap(self.baseline_builder.end),
616                self.baseline_builder.model_name,
617            )
618        return self._baseline_df
def baseline_histogram( self, bins: Union[int, str, NoneType] = None, log_scale: bool = False):
620    def baseline_histogram(
621        self, bins: Optional[Union[str, int]] = None, log_scale: bool = False
622    ):
623
624        df = self.baseline_dataframe()
625
626        n_bins = calc_bins(df.shape[0], bins)
627
628        col_name = self.iopath.replace(" ", "_")
629
630        # type inference for the bins param to histplot is incorrect: str vs str|int.
631        sns.histplot(data=df, x=col_name, bins=n_bins, log_scale=log_scale).set(  # type: ignore
632            title=f"Baseline '{self.iopath}' {self.baseline_builder.start} - {self.baseline_builder.end}"
633        )
634        plt.show()
def baseline_kde(self, log_scale: bool = False):
636    def baseline_kde(self, log_scale: bool = False):
637        df = self.baseline_dataframe()
638
639        col_name = self.iopath.replace(" ", "_")
640
641        sns.kdeplot(data=df, x=col_name, log_scale=log_scale).set(
642            title=f"Baseline '{self.iopath}' {self.baseline_builder.start} - {self.baseline_builder.end}"
643        )
644        plt.grid()
645        plt.show()
def baseline_ecdf(self, log_scale: bool = False):
647    def baseline_ecdf(self, log_scale: bool = False):
648        df = self.baseline_dataframe()
649
650        col_name = self.iopath.replace(" ", "_")
651
652        sns.ecdfplot(data=df, x=col_name, log_scale=log_scale).set(
653            title=f"Baseline '{self.iopath}' {self.baseline_builder.start} - {self.baseline_builder.end}"
654        )
655        plt.grid()
656        plt.show()
def build(self) -> wallaroo.assay_config.AssayConfig:
658    def build(self) -> AssayConfig:
659        self.baseline = self.baseline_builder.build()
660        self.window = self.window_builder_.build()
661        self.summarizer = self.summarizer_builder.build()
662
663        run_until = ensure_tz(self.run_until) if self.run_until else None
664
665        return AssayConfig(
666            self.client,
667            self.name,
668            self.pipeline_id,
669            self.pipeline_name,
670            self.active,
671            self.status,
672            self.iopath,
673            unwrap(self.baseline),
674            unwrap(self.window),
675            unwrap(self.summarizer),
676            self.warning_threshold,
677            self.alert_threshold,
678            run_until,
679            self.workspace_id,
680        )
def upload(self) -> int:
682    def upload(self) -> int:
683        config = self.build()
684
685        if self.client:
686            res = self.client.upload_assay(config)  # type: ignore
687            return res
688        raise RuntimeError(
689            "Assay config was created for standalone and may only be used to generate configuration"
690        )
def add_name(self, name: str):
692    def add_name(self, name: str):
693        """Specify the assay name"""
694        self.name = name
695        return self

Specify the assay name

def add_active(self, active: bool):
697    def add_active(self, active: bool):
698        """Specify if the assay is active or not"""
699        self.active = active
700        return self

Specify if the assay is active or not

def add_iopath(self, iopath: str):
702    def add_iopath(self, iopath: str):
703        """Specify what the assay should analyze. Should start with input or output and have
704        indexes (zero based) into row and column: For example 'input 0 1' specifies the second
705        column of the first input."""
706
707        iopath = iopath.strip()
708        assert iopath.lower().startswith("input") or iopath.lower().startswith("output")
709        self.iopath = iopath
710        self._baseline_df = None
711        return self

Specify what the assay should analyze. Should start with input or output and have indexes (zero based) into row and column: For example 'input 0 1' specifies the second column of the first input.

def fixed_baseline_builder(self):
713    def fixed_baseline_builder(self):
714        """Specify creates a fixed baseline builder for this assay builder."""
715
716        bb = FixedBaselineBuilder(unwrap(self.pipeline_name))
717        self.baseline_builder = bb
718        return bb

Specify creates a fixed baseline builder for this assay builder.

def add_baseline(self, baseline: wallaroo.assay_config.BaselineConfig):
720    def add_baseline(self, baseline: BaselineConfig):
721        """Adds a specific baseline created elsewhere."""
722        self.baseline = baseline
723        self._baseline_df = None
724        return self

Adds a specific baseline created elsewhere.

def window_builder(self):
726    def window_builder(self):
727        """Returns this assay builders window builder."""
728        return self.window_builder_

Returns this assay builders window builder.

def add_window(self, window: wallaroo.assay_config.WindowConfig):
730    def add_window(self, window: WindowConfig):
731        """Adds a window created elsewhere."""
732        self.window = window
733        return self

Adds a window created elsewhere.

def univariate_continuous_summarizer(self) -> wallaroo.assay_config.UnivariateContinousSummarizerBuilder:
735    def univariate_continuous_summarizer(self) -> UnivariateContinousSummarizerBuilder:
736        """Creates and adds an UCS to this assay builder."""
737        ucsb = UnivariateContinousSummarizerBuilder()
738        self.summarizer_builder = ucsb
739        return ucsb

Creates and adds an UCS to this assay builder.

def add_summarizer(self, summarizer: wallaroo.assay_config.SummarizerConfig):
741    def add_summarizer(self, summarizer: SummarizerConfig):
742        """Adds the summarizer created elsewhere to this builder."""
743        self.summarizer = summarizer
744        return self

Adds the summarizer created elsewhere to this builder.

def add_warning_threshold(self, warning_threshold: float):
746    def add_warning_threshold(self, warning_threshold: float):
747        """Specify the warning threshold for this assay."""
748        self.warning_threshold = warning_threshold
749        return self

Specify the warning threshold for this assay.

def add_alert_threshold(self, alert_threshold: float):
751    def add_alert_threshold(self, alert_threshold: float):
752        """Specify the alert threshold for this assay."""
753        self.alert_threshold = alert_threshold
754        return self

Specify the alert threshold for this assay.

def add_run_until(self, run_until: datetime.datetime):
756    def add_run_until(self, run_until: datetime):
757        """ "How long should this assay run. Primarily useful for
758        interactive runs to limit the number of analysis."""
759        self.run_until = run_until
760        return self

"How long should this assay run. Primarily useful for interactive runs to limit the number of analysis.

def calc_bins(num_samples: int, bins: Union[int, str, NoneType]) -> Union[str, int]:
763def calc_bins(num_samples: int, bins: Optional[Union[str, int]]) -> Union[str, int]:
764    """If the users specifies a number of bins or a strategy for calculating
765    it use that. Else us the min of the square root or 50."""
766
767    if bins is None:
768        return min(int(math.sqrt(num_samples)), 50)
769    else:
770        return bins

If the users specifies a number of bins or a strategy for calculating it use that. Else us the min of the square root or 50.