wallaroo.pipeline

  1import datetime
  2import pathlib
  3import time
  4from dataclasses import asdict
  5from typing import (
  6    TYPE_CHECKING,
  7    Any,
  8    Dict,
  9    Iterable,
 10    List,
 11    Sequence,
 12    Tuple,
 13    Union,
 14    cast,
 15)
 16
 17import pandas as pd
 18import pyarrow as pa  # type: ignore
 19from dateutil import parser as dateparse
 20
 21from wallaroo import notify
 22
 23from . import queries
 24from .checks import Alert, Expression, require_dns_compliance
 25from .deployment import Deployment
 26from .deployment_config import DeploymentConfig
 27from .explainability import ExplainabilityConfig, ExplainabilityConfigList
 28from .inference_result import InferenceResult
 29from .logs import LogEntries, LogEntriesShadowDeploy
 30from .model import Model
 31from .model_config import ModelConfig
 32from .object import *
 33from .pipeline_config import PipelineConfigBuilder, Step
 34from .unwrap import unwrap
 35from .visibility import _Visibility
 36
 37if TYPE_CHECKING:
 38    # Imports that happen below in methods to fix circular import dependency
 39    # issues need to also be specified here to satisfy mypy type checking.
 40    from .client import Client
 41    from .deployment import Deployment
 42    from .pipeline_variant import PipelineVariant
 43    from .tag import Tag
 44
 45
 46def update_timestamp(f):
 47    def _inner(self, *args, **kwargs):
 48        results = f(self, *args, **kwargs)
 49        if isinstance(results, list):
 50            self._last_infer_time = max(r.timestamp() for r in results)
 51        elif isinstance(results, pd.DataFrame):
 52            if "time" in results:
 53                self._last_infer_time = results["time"].max()
 54        elif isinstance(results, pa.Table):
 55            if "time" in results:
 56                min_max_time = pa.compute.min_max(results["time"])
 57                self._last_infer_time = min_max_time["max"].as_py()
 58
 59        return results
 60
 61    return _inner
 62
 63
 64class Pipeline(Object):
 65    """A pipeline is an execution context for models. Pipelines contain Steps, which are often Models. Pipelines can be deployed or undeployed."""
 66
 67    def __init__(
 68        self,
 69        client: Optional["Client"],
 70        data: Dict[str, Any],
 71    ) -> None:
 72        from .pipeline_config import PipelineConfigBuilder  # avoids circular imports
 73
 74        self.client = client
 75        assert client is not None
 76
 77        # We track the last timestamp received as a hack, so that we can wait for logs
 78        # that are still being processed.
 79        self._last_infer_time = None
 80
 81        # We will shim through to all builder methods but return self so we can chain pipeline
 82        # calls. See "Shims" below. Using multiple inheritance from the PipelineConfigBuilder was
 83        # another option considered, and maybe it's an option, but shims let us fiddle with args
 84        # individually if needed.
 85        self._builder = None
 86        self._deployment = None
 87
 88        super().__init__(gql_client=client._gql_client, data=data)
 89
 90    def __repr__(self) -> str:
 91        return str(
 92            {
 93                "name": self.name(),
 94                "create_time": self.create_time(),
 95                "definition": self.definition(),
 96            }
 97        )
 98
 99    def _html_steptable(self) -> str:
100        models = self._fetch_models()
101        return ", ".join(models)
102
103        # Yes this is biased towards models only
104        # TODO: other types of steps
105        # steps = self.steps()
106        # steptable = ""
107        # if steps:
108        #     rows = ""
109        #     for step in steps:
110        #         rows += step._repr_html_()
111        #     steptable = f"<table>{rows}</table>"
112        # else:
113        #     steptable = "(no steps)"
114        # return steptable
115
116    def _repr_html_(self) -> str:
117        tags = ", ".join([tag.tag() for tag in self.tags()])
118        deployment = self._deployment_for_pipeline()
119        deployed = "(none)" if deployment is None else deployment.deployed()
120        variants = ", ".join([variant.name() for variant in self.variants()])
121
122        return (
123            f"<table>"
124            f"<tr><th>name</th> <td>{self.name()}</td></tr>"
125            f"<tr><th>created</th> <td>{self.create_time()}</td></tr>"
126            f"<tr><th>last_updated</th> <td>{self.last_update_time()}</td></tr>"
127            f"<tr><th>deployed</th> <td>{deployed}</td></tr>"
128            f"<tr><th>tags</th> <td>{tags}</td></tr>"
129            f"<tr><th>versions</th> <td>{variants}</td></tr>"
130            f"<tr><th>steps</th> <td>{self._html_steptable()}</td></tr>"
131            f"</table>"
132        )
133
134    def _is_named(self) -> bool:
135        try:
136            self.name()
137            return True
138        except:
139            return False
140
141    def builder(self) -> "PipelineConfigBuilder":
142        if self._builder is None:
143            self._builder = PipelineConfigBuilder(
144                self.client,
145                pipeline_name=self.name(),
146                standalone=False,
147            )
148        return cast(PipelineConfigBuilder, self._builder)
149
150    def _fill(self, data: Dict[str, Any]) -> None:
151        from .pipeline_variant import PipelineVariant  # avoids circular imports
152        from .tag import Tag
153
154        for required_attribute in ["id"]:
155            if required_attribute not in data:
156                raise RequiredAttributeMissing(
157                    self.__class__.__name__, required_attribute
158                )
159        self._id = data["id"]
160
161        # Optional
162        self._owner_id = value_if_present(data, "owner_id")
163
164        # Optional
165        self._tags = (
166            [Tag(self.client, tag["tag"]) for tag in data["pipeline_tags"]]
167            if "pipeline_tags" in data
168            else DehydratedValue()
169        )
170        self._create_time = (
171            dateparse.isoparse(data["created_at"])
172            if "created_at" in data
173            else DehydratedValue()
174        )
175        self._last_update_time = (
176            dateparse.isoparse(data["updated_at"])
177            if "updated_at" in data
178            else DehydratedValue()
179        )
180        self._name = value_if_present(data, "pipeline_id")
181        self._variants = (
182            [PipelineVariant(self.client, elem) for elem in data["pipeline_versions"]]
183            if "pipeline_versions" in data
184            else DehydratedValue()
185        )
186
187    def _fetch_attributes(self) -> Dict[str, Any]:
188        assert self.client is not None
189        return self.client._gql_client.execute(
190            gql.gql(
191                """
192            query PipelineById($pipeline_id: bigint!) {
193                pipeline_by_pk(id: $pipeline_id) {
194                    id
195                    pipeline_id
196                    created_at
197                    updated_at
198                    visibility
199                    owner_id
200                    pipeline_versions(order_by: {id: desc}) {
201                        id
202                    }
203                    pipeline_tags {
204                      tag {
205                        id
206                        tag
207                      }
208                    }
209                }
210            }
211                """
212            ),
213            variable_values={
214                "pipeline_id": self._id,
215            },
216        )["pipeline_by_pk"]
217
218    def _update_visibility(self, visibility: _Visibility):
219        assert self.client is not None
220        return self._fill(
221            self.client._gql_client.execute(
222                gql.gql(
223                    """
224                mutation UpdatePipelineVisibility(
225                    $pipeline_id: bigint!,
226                    $visibility: String
227                ) {
228                  update_pipeline(
229                    where: {id: {_eq: $pipeline_id}},
230                    _set: {visibility: $visibility}) {
231                      returning  {
232                          id
233                          pipeline_id
234                          created_at
235                          updated_at
236                          visibility
237                          owner_id
238                          pipeline_versions(order_by: {id: desc}) {
239                                id
240                            }
241                        }
242                    }
243                }
244                """
245                ),
246                variable_values={
247                    "pipeline_id": self._id,
248                    "visibility": visibility,
249                },
250            )["update_pipeline"]["returning"][0]
251        )
252
253    def _fetch_models(self):
254        """Load deployment and any models associated, used only for listing and searching cases."""
255        data = self._gql_client.execute(
256            gql.gql(queries.named("PipelineModels")),
257            variable_values={"pipeline_id": self.id()},
258        )
259        names = []
260        try:
261            mc_nodes = data["pipeline_by_pk"]["deployment"][
262                "deployment_model_configs_aggregate"
263            ]["nodes"]
264            names = [mc["model_config"]["model"]["model"]["name"] for mc in mc_nodes]
265        except Exception:
266            pass
267        return names
268
269    def id(self) -> int:
270        return self._id
271
272    @rehydrate("_owner_id")
273    def owner_id(self) -> str:
274        return cast(str, self._owner_id)
275
276    @rehydrate("_create_time")
277    def create_time(self) -> datetime.datetime:
278        return cast(datetime.datetime, self._create_time)
279
280    @rehydrate("_last_update_time")
281    def last_update_time(self) -> datetime.datetime:
282        return cast(datetime.datetime, self._last_update_time)
283
284    @rehydrate("_name")
285    def name(self) -> str:
286        return cast(str, self._name)
287
288    @rehydrate("_variants")
289    def variants(self) -> List["PipelineVariant"]:
290        from .pipeline_variant import PipelineVariant  # avoids import cycles
291
292        return cast(List[PipelineVariant], self._variants)
293
294    @rehydrate("_tags")
295    def tags(self) -> List["Tag"]:
296        from .tag import Tag
297
298        return cast(List[Tag], self._tags)
299
300    def logs(self, limit: int = 100, valid: Optional[bool] = None) -> LogEntries:
301        topic = self.get_topic_name()
302
303        if valid is False:
304            topic += "-failures"
305        assert self.client is not None
306
307        [entries, status] = self.client.get_logs(topic, limit)
308
309        # XXX: hack to attempt to align logs with received inference results.
310        # Ideally we'd use indices from plateau directly for querying, but the
311        # engine currently does not support that.
312        if self._last_infer_time is not None:
313            for ix in range(5):
314                if entries and self._last_infer_time <= max(
315                    e.timestamp for e in entries
316                ):
317                    break
318
319                time.sleep(1)
320                [entries, status] = self.client.get_logs(topic, limit)
321
322        if status == "ByteLimited":
323            returned = len(entries)
324            print(
325                f"Warning: only displaying {returned} log messages (of {limit} requested) due to payload size limitations."
326            )
327
328        return entries
329
330    def logs_shadow_deploy(self):
331        logs = self.logs()
332        return LogEntriesShadowDeploy(logs)
333
334    def url(self) -> str:
335        """Returns the inference URL for this pipeline."""
336        deployment = self._deployment_for_pipeline()
337        if deployment is None:
338            raise RuntimeError("Pipeline has not been deployed and has no url")
339        else:
340            return deployment.url()
341
342    def deploy(
343        self,
344        pipeline_name: Optional[str] = None,
345        deployment_config: Optional[DeploymentConfig] = None,
346    ) -> "Pipeline":
347        """Deploy pipeline. `pipeline_name` is optional if deploy was called previously. When specified,
348        `pipeline_name` must be ASCII alpha-numeric characters, plus dash (-) only."""
349        if pipeline_name is not None:
350            require_dns_compliance(pipeline_name)
351        self._deploy_upload_optional(pipeline_name, deployment_config)
352        return self
353
354    def definition(self) -> str:
355        """Get the current definition of the pipeline as a string"""
356        return str(self.builder().steps)
357
358    def _deploy_upload_optional(
359        self,
360        pipeline_name: Optional[str] = None,
361        deployment_config: Optional[DeploymentConfig] = None,
362        upload: bool = True,
363    ) -> "Pipeline":
364        """INTERNAL USE ONLY: This is used in convenience methods that create pipelines"""
365
366        if pipeline_name is None:
367            if not self._is_named():
368                raise RuntimeError(
369                    "pipeline_name is required when pipeline was not previously deployed."
370                )
371            else:
372                pipeline_name = self.name()
373        if upload:
374            self._upload()
375
376        self._deployment = self.variants()[0].deploy(
377            deployment_name=pipeline_name,
378            model_configs=self.builder()._model_configs(),
379            config=deployment_config,
380        )
381        return self
382
383    def _deployment_for_pipeline(self) -> Optional["Deployment"]:
384        """Fetch a pipeline's deployment."""
385        if self._deployment is not None:
386            if not isinstance(self._deployment, DehydratedValue):
387                self._deployment._rehydrate()
388            return self._deployment
389
390        res = self._gql_client.execute(
391            gql.gql(
392                """
393		query GetDeploymentForPipeline($pipeline_id: bigint!) {
394		  pipeline_by_pk(id: $pipeline_id) {
395		    deployment {
396		      id
397		      deploy_id
398		      deployed
399		    }
400		  }
401		}"""
402            ),
403            variable_values={
404                "pipeline_id": self.id(),
405            },
406        )
407        if not res["pipeline_by_pk"]:
408            raise EntityNotFoundError("Pipeline", {"pipeline_id": str(self.id())})
409
410        if res["pipeline_by_pk"]["deployment"]:
411            self._deployment = Deployment(
412                client=self.client,
413                data=res["pipeline_by_pk"]["deployment"],
414            )
415        return self._deployment
416
417    def get_topic_name(self) -> str:
418        if self.client is None:
419            return f"pipeline-{self.name()}-inference"
420        return self.client.get_topic_name(self.id())
421
422    # -----------------------------------------------------------------------------
423    # Shims for Deployment methods
424    # -----------------------------------------------------------------------------
425
426    def undeploy(self) -> "Pipeline":
427
428        assert self.client is not None
429        deployment = self._deployment_for_pipeline()
430        if deployment:
431            deployment.undeploy()
432        return self
433
434    @update_timestamp
435    def infer(
436        self,
437        tensor: Union[Dict[str, Any], pd.DataFrame, pa.Table],
438        timeout: Optional[Union[int, float]] = None,
439        dataset: Optional[Union[Sequence[str], str]] = None,
440        dataset_exclude: Optional[Union[Sequence[str], str]] = None,
441        dataset_separator: Optional[str] = None,
442    ) -> Union[List[InferenceResult], pd.DataFrame, pa.Table]:
443        """
444        Returns an inference result on this deployment, given a tensor.
445        :param: tensor: Union[Dict[str, Any], pd.DataFrame, pa.Table] Inference data. Should be a dictionary.
446        Future improvement: will be a pandas dataframe or arrow table
447        :param: timeout: Optional[Union[int, float]] infer requests will time out after
448            the amount of seconds provided are exceeded. timeout defaults
449            to 15 secs.
450        :param: dataset: Optional[list] By default this is set to return, ["time", "out"].
451            Other available options "check_failures", "metadata"
452        :param: dataset_exclude: Optional[Union[Sequence[str], str]] If set, allows user to exclude parts of dataset.
453        :param: dataset_separator: Optional[Union[Sequence[str], str]] If set to ".", return dataset will be flattened.
454        :return: InferenceResult in dictionary, dataframe or arrow format.
455        """
456        deployment = self._deployment_for_pipeline()
457        if deployment:
458            return deployment.infer(
459                tensor, timeout, dataset, dataset_exclude, dataset_separator
460            )
461        else:
462            raise RuntimeError("Pipeline {self.name} is not deployed")
463
464    @update_timestamp
465    def infer_from_file(
466        self,
467        filename: Union[str, pathlib.Path],
468        timeout: Optional[Union[int, float]] = None,
469        dataset: Optional[Sequence[str]] = None,
470        exclude: Optional[Sequence[str]] = None,
471        dataset_separator: Optional[str] = None,
472    ) -> List[InferenceResult]:
473        """Returns an inference result on this deployment, given tensors in a file."""
474
475        deployment = self._deployment_for_pipeline()
476        if deployment:
477            return deployment.infer_from_file(
478                filename, timeout, dataset, exclude, dataset_separator
479            )
480        else:
481            raise RuntimeError("Pipeline {self.name} is not deployed")
482
483    async def batch_infer_from_file(
484        self,
485        filename: Union[str, pathlib.Path],
486        data_key: str = "tensor",
487        batch_size: int = 1000,
488        connector_limit: int = 4,
489    ) -> List[InferenceResult]:
490        """Async method to run batched inference on a data file for a given deployment.
491
492        :param str filename: path to an existing file with tensor data in JSON format.
493        :param str data_key: key which the tensor data is under within the JSON. defaults to "tensor".
494        :param int batch_size: batch size to use when sending requests to the engine. defaults to 1000.
495        :param int connector_limit: limit for the amount of TCP connections. defaults to 4.
496        :return: List of InferenceResult's.
497        :rtype: List[InferenceResult]
498        """
499        deployment = self._deployment_for_pipeline()
500        if deployment:
501            return await deployment.batch_infer_from_file(
502                filename, data_key, batch_size, connector_limit
503            )
504        else:
505            raise RuntimeError("Pipeline {self.name} is not deployed")
506
507    def status(self) -> Dict[str, Any]:
508        """Status of pipeline"""
509        deployment = self._deployment_for_pipeline()
510        if deployment:
511            return deployment.status()
512        else:
513            return {"status": f"Pipeline {self.name()} is not deployed"}
514
515    # -----------------------------------------------------------------------------
516    # Accessors for PipelineConfigBuilder attributes. Not exactly shims and they may be changing a
517    # contract elsewhere.
518    # -----------------------------------------------------------------------------
519
520    def steps(self) -> List[Step]:
521        """Returns a list of the steps of a pipeline. Not exactly a shim"""
522        return self.builder().steps
523
524    def model_configs(self) -> List[ModelConfig]:
525        """Returns a list of the model configs of a pipeline. Not exactly a shim"""
526        return self.builder()._model_configs()
527
528    # -----------------------------------------------------------------------------
529    # Shims for PipelineConfigBuilder methods
530    # -----------------------------------------------------------------------------
531
532    def _upload(self) -> "Pipeline":
533        assert self.client is not None
534
535        # Special case: deploying an existing pipeline where pipeline steps are of type ModelInference
536        # The builder doesn't get repopulated so we do that here.
537
538        if self.builder().steps == []:
539            for step in self.variants()[0].definition()["steps"]:
540                if "ModelInference" in step:
541                    name = step["ModelInference"]["models"][0]["name"]
542                    version = step["ModelInference"]["models"][0]["version"]
543                    model = self.client.model_by_name(
544                        model_class=name, model_name=version
545                    )
546                    self.add_model_step(model)
547
548        new_pipeline = self.builder().upload()
549        self._fill({"id": new_pipeline.id()})
550        return self
551
552    def remove_step(self, index: int) -> "Pipeline":
553        """Remove a step at a given index"""
554        self.builder().remove_step(index)
555        return self
556
557    def add_model_step(self, model: Model) -> "Pipeline":
558        """Perform inference with a single model."""
559        self.builder().add_model_step(model)
560        return self
561
562    def replace_with_model_step(self, index: int, model: Model) -> "Pipeline":
563        """Replaces the step at the given index with a model step"""
564        self.builder().replace_with_model_step(index, model)
565        return self
566
567    def add_multi_model_step(self, models: Iterable[Model]) -> "Pipeline":
568        """Perform inference on the same input data for any number of models."""
569        self.builder().add_multi_model_step(models)
570        return self
571
572    def replace_with_multi_model_step(
573        self, index: int, models: Iterable[Model]
574    ) -> "Pipeline":
575        """Replaces the step at the index with a multi model step"""
576        self.builder().replace_with_multi_model_step(index, models)
577        return self
578
579    def add_audit(self, slice) -> "Pipeline":
580        """Run audit logging on a specified `slice` of model outputs.
581
582        The slice must be in python-like format. `start:`, `start:end`, and
583        `:end` are supported.
584        """
585        self.builder().add_audit(slice)
586        return self
587
588    def replace_with_audit(self, index: int, audit_slice: str) -> "Pipeline":
589        """Replaces the step at the index with an audit step"""
590        self.builder().replace_with_audit(index, audit_slice)
591        return self
592
593    def add_select(self, index: int) -> "Pipeline":
594        """Select only the model output with the given `index` from an array of
595        outputs.
596        """
597        self.builder().add_select(index)
598        return self
599
600    def replace_with_select(self, step_index: int, select_index: int) -> "Pipeline":
601        """Replaces the step at the index with a select step"""
602        self.builder().replace_with_select(step_index, select_index)
603        return self
604
605    def add_key_split(
606        self, default: Model, meta_key: str, options: Dict[str, Model]
607    ) -> "Pipeline":
608        """Split traffic based on the value at a given `meta_key` in the input data,
609        routing to the appropriate model.
610
611        If the resulting value is a key in `options`, the corresponding model is used.
612        Otherwise, the `default` model is used for inference.
613        """
614        self.builder().add_key_split(default, meta_key, options)
615        return self
616
617    def replace_with_key_split(
618        self, index: int, default: Model, meta_key: str, options: Dict[str, Model]
619    ) -> "Pipeline":
620        """Replace the step at the index with a key split step"""
621        self.builder().replace_with_key_split(index, default, meta_key, options)
622        return self
623
624    def add_random_split(
625        self,
626        weighted: Iterable[Tuple[float, Model]],
627        hash_key: Optional[str] = None,
628    ) -> "Pipeline":
629        """Routes inputs to a single model, randomly chosen from the list of
630        `weighted` options.
631
632        Each model receives inputs that are approximately proportional to the
633        weight it is assigned.  For example, with two models having weights 1
634        and 1, each will receive roughly equal amounts of inference inputs. If
635        the weights were changed to 1 and 2, the models would receive roughly
636        33% and 66% respectively instead.
637
638        When choosing the model to use, a random number between 0.0 and 1.0 is
639        generated. The weighted inputs are mapped to that range, and the random
640        input is then used to select the model to use. For example, for the
641        two-models equal-weight case, a random key of 0.4 would route to the
642        first model. 0.6 would route to the second.
643
644        To support consistent assignment to a model, a `hash_key` can be
645        specified. This must be between 0.0 and 1.0. The value at this key, when
646        present in the input data, will be used instead of a random number for
647        model selection.
648        """
649        self.builder().add_random_split(weighted, hash_key)
650        return self
651
652    def replace_with_random_split(
653        self,
654        index: int,
655        weighted: Iterable[Tuple[float, Model]],
656        hash_key: Optional[str] = None,
657    ) -> "Pipeline":
658        """Replace the step at the index with a random split step"""
659        self.builder().replace_with_random_split(index, weighted, hash_key)
660        return self
661
662    def add_shadow_deploy(
663        self, champion: Model, challengers: Iterable[Model]
664    ) -> "Pipeline":
665        """Create a "shadow deployment" experiment pipeline. The `champion`
666        model and all `challengers` are run for each input. The result data for
667        all models is logged, but the output of the `champion` is the only
668        result returned.
669
670        This is particularly useful for "burn-in" testing a new model with real
671        world data without displacing the currently proven model.
672
673        This is currently implemented as three steps: A multi model step, an audit step, and
674        a select step. To remove or replace this step, you need to remove or replace
675        all three. You can remove steps using pipeline.remove_step
676        """
677        self.builder().add_shadow_deploy(champion, challengers)
678        return self
679
680    def replace_with_shadow_deploy(
681        self, index: int, champion: Model, challengers: Iterable[Model]
682    ) -> "Pipeline":
683        """Replace a given step with a shadow deployment"""
684        self.builder().replace_with_shadow_deploy(index, champion, challengers)
685        return self
686
687    def add_validation(self, name: str, validation: Expression) -> "Pipeline":
688        """Add a `validation` with the given `name`. All validations are run on
689        all outputs, and all failures are logged.
690        """
691        self.builder().add_validation(name, validation)
692        return self
693
694    def add_alert(
695        self, name: str, alert: Alert, notifications: List[notify.Notification]
696    ) -> "Pipeline":
697        self.builder().add_alert(name, alert, notifications)
698        return self
699
700    def replace_with_alert(
701        self,
702        index: int,
703        name: str,
704        alert: Alert,
705        notifications: List[notify.Notification],
706    ) -> "Pipeline":
707        """Replace the step at the given index with the specified alert"""
708        self.builder().replace_with_alert(index, name, alert, notifications)
709        return self
710
711    def clear(self) -> "Pipeline":
712        """
713        Remove all steps from the pipeline. This might be desireable if replacing models, for example.
714        """
715        self.builder().clear()
716        return self
717
718    def list_explainability_configs(self) -> List[ExplainabilityConfig]:
719        """List the explainability configs we've created."""
720
721        result = unwrap(self.client)._post_rest_api_json(
722            f"v1/api/explainability/list_configs_by_pipeline",
723            {"pipeline_id": self.id()},
724        )
725        l = [ExplainabilityConfig(**ec) for ec in result]
726        for ec in l:
727            ec.client = self.client  # type: ignore
728        return ExplainabilityConfigList(l)
729
730    def get_explainability_config(
731        self, expr: Union[str, ExplainabilityConfig]
732    ) -> ExplainabilityConfig:
733        """Get the details of an explainability config."""
734
735        if isinstance(expr, str):
736            explainability_config_id = expr
737        else:
738            explainability_config_id = str(expr.id)
739
740        result = unwrap(self.client)._post_rest_api_json(
741            f"v1/api/explainability/get_config",
742            {"explainability_config_id": explainability_config_id},
743        )
744
745        exp_cfg = ExplainabilityConfig(**result)
746        exp_cfg.client = self.client  # type: ignore
747        return exp_cfg
748
749    def create_explainability_config(self, feature_names: Sequence[str], num_points=10):
750        """Create a shap config to be used later for reference and adhoc requests."""
751
752        output_names = ["output_0"]
753        feature_name_list = list(feature_names)
754        reference_version = self.variants()[0].name()
755        workspace_id = unwrap(self.client).get_current_workspace().id()
756
757        shap_config = ExplainabilityConfig(
758            id=None,
759            workspace_id=workspace_id,
760            reference_pipeline_version=reference_version,
761            explainability_pipeline_version=None,
762            status={},
763            feature_bounds={},
764            num_points=num_points,
765            feature_names=feature_name_list,
766            output_names=output_names,
767        )
768
769        result = unwrap(self.client)._post_rest_api_json(
770            f"v1/api/explainability/create_config", asdict(shap_config)
771        )
772        exp_id = result["id"]
773        return self.get_explainability_config(exp_id)
774
775
776class Pipelines(List[Pipeline]):
777    """Wraps a list of pipelines for display in a display-aware environment like Jupyter."""
778
779    def _repr_html_(self) -> str:
780        def row(pipeline):
781            steptable = pipeline._html_steptable()
782            fmt = pipeline.client._time_format
783            tags = ", ".join([tag.tag() for tag in pipeline.tags()])
784            deployment = pipeline._deployment_for_pipeline()
785            depstr = "(unknown)" if deployment is None else deployment.deployed()
786            variants = ", ".join([variant.name() for variant in pipeline.variants()])
787
788            return (
789                "<tr>"
790                + f"<td>{pipeline.name()}</td>"
791                + f"<td>{pipeline.create_time().strftime(fmt)}</td>"
792                + f"<td>{pipeline.last_update_time().strftime(fmt)}</td>"
793                + f"<td>{depstr}</td>"
794                + f"<td>{tags}</td>"
795                + f"<td>{variants}</td>"
796                + f"<td>{steptable}</td>"
797                + "</tr>"
798            )
799
800        fields = [
801            "name",
802            "created",
803            "last_updated",
804            "deployed",
805            "tags",
806            "versions",
807            "steps",
808        ]
809
810        if self == []:
811            return "(no pipelines)"
812        else:
813            return (
814                "<table>"
815                + "<tr><th>"
816                + "</th><th>".join(fields)
817                + "</th></tr>"
818                + ("".join([row(p) for p in self]))
819                + "</table>"
820            )
def update_timestamp(f):
47def update_timestamp(f):
48    def _inner(self, *args, **kwargs):
49        results = f(self, *args, **kwargs)
50        if isinstance(results, list):
51            self._last_infer_time = max(r.timestamp() for r in results)
52        elif isinstance(results, pd.DataFrame):
53            if "time" in results:
54                self._last_infer_time = results["time"].max()
55        elif isinstance(results, pa.Table):
56            if "time" in results:
57                min_max_time = pa.compute.min_max(results["time"])
58                self._last_infer_time = min_max_time["max"].as_py()
59
60        return results
61
62    return _inner
class Pipeline(wallaroo.object.Object):
 65class Pipeline(Object):
 66    """A pipeline is an execution context for models. Pipelines contain Steps, which are often Models. Pipelines can be deployed or undeployed."""
 67
 68    def __init__(
 69        self,
 70        client: Optional["Client"],
 71        data: Dict[str, Any],
 72    ) -> None:
 73        from .pipeline_config import PipelineConfigBuilder  # avoids circular imports
 74
 75        self.client = client
 76        assert client is not None
 77
 78        # We track the last timestamp received as a hack, so that we can wait for logs
 79        # that are still being processed.
 80        self._last_infer_time = None
 81
 82        # We will shim through to all builder methods but return self so we can chain pipeline
 83        # calls. See "Shims" below. Using multiple inheritance from the PipelineConfigBuilder was
 84        # another option considered, and maybe it's an option, but shims let us fiddle with args
 85        # individually if needed.
 86        self._builder = None
 87        self._deployment = None
 88
 89        super().__init__(gql_client=client._gql_client, data=data)
 90
 91    def __repr__(self) -> str:
 92        return str(
 93            {
 94                "name": self.name(),
 95                "create_time": self.create_time(),
 96                "definition": self.definition(),
 97            }
 98        )
 99
100    def _html_steptable(self) -> str:
101        models = self._fetch_models()
102        return ", ".join(models)
103
104        # Yes this is biased towards models only
105        # TODO: other types of steps
106        # steps = self.steps()
107        # steptable = ""
108        # if steps:
109        #     rows = ""
110        #     for step in steps:
111        #         rows += step._repr_html_()
112        #     steptable = f"<table>{rows}</table>"
113        # else:
114        #     steptable = "(no steps)"
115        # return steptable
116
117    def _repr_html_(self) -> str:
118        tags = ", ".join([tag.tag() for tag in self.tags()])
119        deployment = self._deployment_for_pipeline()
120        deployed = "(none)" if deployment is None else deployment.deployed()
121        variants = ", ".join([variant.name() for variant in self.variants()])
122
123        return (
124            f"<table>"
125            f"<tr><th>name</th> <td>{self.name()}</td></tr>"
126            f"<tr><th>created</th> <td>{self.create_time()}</td></tr>"
127            f"<tr><th>last_updated</th> <td>{self.last_update_time()}</td></tr>"
128            f"<tr><th>deployed</th> <td>{deployed}</td></tr>"
129            f"<tr><th>tags</th> <td>{tags}</td></tr>"
130            f"<tr><th>versions</th> <td>{variants}</td></tr>"
131            f"<tr><th>steps</th> <td>{self._html_steptable()}</td></tr>"
132            f"</table>"
133        )
134
135    def _is_named(self) -> bool:
136        try:
137            self.name()
138            return True
139        except:
140            return False
141
142    def builder(self) -> "PipelineConfigBuilder":
143        if self._builder is None:
144            self._builder = PipelineConfigBuilder(
145                self.client,
146                pipeline_name=self.name(),
147                standalone=False,
148            )
149        return cast(PipelineConfigBuilder, self._builder)
150
151    def _fill(self, data: Dict[str, Any]) -> None:
152        from .pipeline_variant import PipelineVariant  # avoids circular imports
153        from .tag import Tag
154
155        for required_attribute in ["id"]:
156            if required_attribute not in data:
157                raise RequiredAttributeMissing(
158                    self.__class__.__name__, required_attribute
159                )
160        self._id = data["id"]
161
162        # Optional
163        self._owner_id = value_if_present(data, "owner_id")
164
165        # Optional
166        self._tags = (
167            [Tag(self.client, tag["tag"]) for tag in data["pipeline_tags"]]
168            if "pipeline_tags" in data
169            else DehydratedValue()
170        )
171        self._create_time = (
172            dateparse.isoparse(data["created_at"])
173            if "created_at" in data
174            else DehydratedValue()
175        )
176        self._last_update_time = (
177            dateparse.isoparse(data["updated_at"])
178            if "updated_at" in data
179            else DehydratedValue()
180        )
181        self._name = value_if_present(data, "pipeline_id")
182        self._variants = (
183            [PipelineVariant(self.client, elem) for elem in data["pipeline_versions"]]
184            if "pipeline_versions" in data
185            else DehydratedValue()
186        )
187
188    def _fetch_attributes(self) -> Dict[str, Any]:
189        assert self.client is not None
190        return self.client._gql_client.execute(
191            gql.gql(
192                """
193            query PipelineById($pipeline_id: bigint!) {
194                pipeline_by_pk(id: $pipeline_id) {
195                    id
196                    pipeline_id
197                    created_at
198                    updated_at
199                    visibility
200                    owner_id
201                    pipeline_versions(order_by: {id: desc}) {
202                        id
203                    }
204                    pipeline_tags {
205                      tag {
206                        id
207                        tag
208                      }
209                    }
210                }
211            }
212                """
213            ),
214            variable_values={
215                "pipeline_id": self._id,
216            },
217        )["pipeline_by_pk"]
218
219    def _update_visibility(self, visibility: _Visibility):
220        assert self.client is not None
221        return self._fill(
222            self.client._gql_client.execute(
223                gql.gql(
224                    """
225                mutation UpdatePipelineVisibility(
226                    $pipeline_id: bigint!,
227                    $visibility: String
228                ) {
229                  update_pipeline(
230                    where: {id: {_eq: $pipeline_id}},
231                    _set: {visibility: $visibility}) {
232                      returning  {
233                          id
234                          pipeline_id
235                          created_at
236                          updated_at
237                          visibility
238                          owner_id
239                          pipeline_versions(order_by: {id: desc}) {
240                                id
241                            }
242                        }
243                    }
244                }
245                """
246                ),
247                variable_values={
248                    "pipeline_id": self._id,
249                    "visibility": visibility,
250                },
251            )["update_pipeline"]["returning"][0]
252        )
253
254    def _fetch_models(self):
255        """Load deployment and any models associated, used only for listing and searching cases."""
256        data = self._gql_client.execute(
257            gql.gql(queries.named("PipelineModels")),
258            variable_values={"pipeline_id": self.id()},
259        )
260        names = []
261        try:
262            mc_nodes = data["pipeline_by_pk"]["deployment"][
263                "deployment_model_configs_aggregate"
264            ]["nodes"]
265            names = [mc["model_config"]["model"]["model"]["name"] for mc in mc_nodes]
266        except Exception:
267            pass
268        return names
269
270    def id(self) -> int:
271        return self._id
272
273    @rehydrate("_owner_id")
274    def owner_id(self) -> str:
275        return cast(str, self._owner_id)
276
277    @rehydrate("_create_time")
278    def create_time(self) -> datetime.datetime:
279        return cast(datetime.datetime, self._create_time)
280
281    @rehydrate("_last_update_time")
282    def last_update_time(self) -> datetime.datetime:
283        return cast(datetime.datetime, self._last_update_time)
284
285    @rehydrate("_name")
286    def name(self) -> str:
287        return cast(str, self._name)
288
289    @rehydrate("_variants")
290    def variants(self) -> List["PipelineVariant"]:
291        from .pipeline_variant import PipelineVariant  # avoids import cycles
292
293        return cast(List[PipelineVariant], self._variants)
294
295    @rehydrate("_tags")
296    def tags(self) -> List["Tag"]:
297        from .tag import Tag
298
299        return cast(List[Tag], self._tags)
300
301    def logs(self, limit: int = 100, valid: Optional[bool] = None) -> LogEntries:
302        topic = self.get_topic_name()
303
304        if valid is False:
305            topic += "-failures"
306        assert self.client is not None
307
308        [entries, status] = self.client.get_logs(topic, limit)
309
310        # XXX: hack to attempt to align logs with received inference results.
311        # Ideally we'd use indices from plateau directly for querying, but the
312        # engine currently does not support that.
313        if self._last_infer_time is not None:
314            for ix in range(5):
315                if entries and self._last_infer_time <= max(
316                    e.timestamp for e in entries
317                ):
318                    break
319
320                time.sleep(1)
321                [entries, status] = self.client.get_logs(topic, limit)
322
323        if status == "ByteLimited":
324            returned = len(entries)
325            print(
326                f"Warning: only displaying {returned} log messages (of {limit} requested) due to payload size limitations."
327            )
328
329        return entries
330
331    def logs_shadow_deploy(self):
332        logs = self.logs()
333        return LogEntriesShadowDeploy(logs)
334
335    def url(self) -> str:
336        """Returns the inference URL for this pipeline."""
337        deployment = self._deployment_for_pipeline()
338        if deployment is None:
339            raise RuntimeError("Pipeline has not been deployed and has no url")
340        else:
341            return deployment.url()
342
343    def deploy(
344        self,
345        pipeline_name: Optional[str] = None,
346        deployment_config: Optional[DeploymentConfig] = None,
347    ) -> "Pipeline":
348        """Deploy pipeline. `pipeline_name` is optional if deploy was called previously. When specified,
349        `pipeline_name` must be ASCII alpha-numeric characters, plus dash (-) only."""
350        if pipeline_name is not None:
351            require_dns_compliance(pipeline_name)
352        self._deploy_upload_optional(pipeline_name, deployment_config)
353        return self
354
355    def definition(self) -> str:
356        """Get the current definition of the pipeline as a string"""
357        return str(self.builder().steps)
358
359    def _deploy_upload_optional(
360        self,
361        pipeline_name: Optional[str] = None,
362        deployment_config: Optional[DeploymentConfig] = None,
363        upload: bool = True,
364    ) -> "Pipeline":
365        """INTERNAL USE ONLY: This is used in convenience methods that create pipelines"""
366
367        if pipeline_name is None:
368            if not self._is_named():
369                raise RuntimeError(
370                    "pipeline_name is required when pipeline was not previously deployed."
371                )
372            else:
373                pipeline_name = self.name()
374        if upload:
375            self._upload()
376
377        self._deployment = self.variants()[0].deploy(
378            deployment_name=pipeline_name,
379            model_configs=self.builder()._model_configs(),
380            config=deployment_config,
381        )
382        return self
383
384    def _deployment_for_pipeline(self) -> Optional["Deployment"]:
385        """Fetch a pipeline's deployment."""
386        if self._deployment is not None:
387            if not isinstance(self._deployment, DehydratedValue):
388                self._deployment._rehydrate()
389            return self._deployment
390
391        res = self._gql_client.execute(
392            gql.gql(
393                """
394		query GetDeploymentForPipeline($pipeline_id: bigint!) {
395		  pipeline_by_pk(id: $pipeline_id) {
396		    deployment {
397		      id
398		      deploy_id
399		      deployed
400		    }
401		  }
402		}"""
403            ),
404            variable_values={
405                "pipeline_id": self.id(),
406            },
407        )
408        if not res["pipeline_by_pk"]:
409            raise EntityNotFoundError("Pipeline", {"pipeline_id": str(self.id())})
410
411        if res["pipeline_by_pk"]["deployment"]:
412            self._deployment = Deployment(
413                client=self.client,
414                data=res["pipeline_by_pk"]["deployment"],
415            )
416        return self._deployment
417
418    def get_topic_name(self) -> str:
419        if self.client is None:
420            return f"pipeline-{self.name()}-inference"
421        return self.client.get_topic_name(self.id())
422
423    # -----------------------------------------------------------------------------
424    # Shims for Deployment methods
425    # -----------------------------------------------------------------------------
426
427    def undeploy(self) -> "Pipeline":
428
429        assert self.client is not None
430        deployment = self._deployment_for_pipeline()
431        if deployment:
432            deployment.undeploy()
433        return self
434
435    @update_timestamp
436    def infer(
437        self,
438        tensor: Union[Dict[str, Any], pd.DataFrame, pa.Table],
439        timeout: Optional[Union[int, float]] = None,
440        dataset: Optional[Union[Sequence[str], str]] = None,
441        dataset_exclude: Optional[Union[Sequence[str], str]] = None,
442        dataset_separator: Optional[str] = None,
443    ) -> Union[List[InferenceResult], pd.DataFrame, pa.Table]:
444        """
445        Returns an inference result on this deployment, given a tensor.
446        :param: tensor: Union[Dict[str, Any], pd.DataFrame, pa.Table] Inference data. Should be a dictionary.
447        Future improvement: will be a pandas dataframe or arrow table
448        :param: timeout: Optional[Union[int, float]] infer requests will time out after
449            the amount of seconds provided are exceeded. timeout defaults
450            to 15 secs.
451        :param: dataset: Optional[list] By default this is set to return, ["time", "out"].
452            Other available options "check_failures", "metadata"
453        :param: dataset_exclude: Optional[Union[Sequence[str], str]] If set, allows user to exclude parts of dataset.
454        :param: dataset_separator: Optional[Union[Sequence[str], str]] If set to ".", return dataset will be flattened.
455        :return: InferenceResult in dictionary, dataframe or arrow format.
456        """
457        deployment = self._deployment_for_pipeline()
458        if deployment:
459            return deployment.infer(
460                tensor, timeout, dataset, dataset_exclude, dataset_separator
461            )
462        else:
463            raise RuntimeError("Pipeline {self.name} is not deployed")
464
465    @update_timestamp
466    def infer_from_file(
467        self,
468        filename: Union[str, pathlib.Path],
469        timeout: Optional[Union[int, float]] = None,
470        dataset: Optional[Sequence[str]] = None,
471        exclude: Optional[Sequence[str]] = None,
472        dataset_separator: Optional[str] = None,
473    ) -> List[InferenceResult]:
474        """Returns an inference result on this deployment, given tensors in a file."""
475
476        deployment = self._deployment_for_pipeline()
477        if deployment:
478            return deployment.infer_from_file(
479                filename, timeout, dataset, exclude, dataset_separator
480            )
481        else:
482            raise RuntimeError("Pipeline {self.name} is not deployed")
483
484    async def batch_infer_from_file(
485        self,
486        filename: Union[str, pathlib.Path],
487        data_key: str = "tensor",
488        batch_size: int = 1000,
489        connector_limit: int = 4,
490    ) -> List[InferenceResult]:
491        """Async method to run batched inference on a data file for a given deployment.
492
493        :param str filename: path to an existing file with tensor data in JSON format.
494        :param str data_key: key which the tensor data is under within the JSON. defaults to "tensor".
495        :param int batch_size: batch size to use when sending requests to the engine. defaults to 1000.
496        :param int connector_limit: limit for the amount of TCP connections. defaults to 4.
497        :return: List of InferenceResult's.
498        :rtype: List[InferenceResult]
499        """
500        deployment = self._deployment_for_pipeline()
501        if deployment:
502            return await deployment.batch_infer_from_file(
503                filename, data_key, batch_size, connector_limit
504            )
505        else:
506            raise RuntimeError("Pipeline {self.name} is not deployed")
507
508    def status(self) -> Dict[str, Any]:
509        """Status of pipeline"""
510        deployment = self._deployment_for_pipeline()
511        if deployment:
512            return deployment.status()
513        else:
514            return {"status": f"Pipeline {self.name()} is not deployed"}
515
516    # -----------------------------------------------------------------------------
517    # Accessors for PipelineConfigBuilder attributes. Not exactly shims and they may be changing a
518    # contract elsewhere.
519    # -----------------------------------------------------------------------------
520
521    def steps(self) -> List[Step]:
522        """Returns a list of the steps of a pipeline. Not exactly a shim"""
523        return self.builder().steps
524
525    def model_configs(self) -> List[ModelConfig]:
526        """Returns a list of the model configs of a pipeline. Not exactly a shim"""
527        return self.builder()._model_configs()
528
529    # -----------------------------------------------------------------------------
530    # Shims for PipelineConfigBuilder methods
531    # -----------------------------------------------------------------------------
532
533    def _upload(self) -> "Pipeline":
534        assert self.client is not None
535
536        # Special case: deploying an existing pipeline where pipeline steps are of type ModelInference
537        # The builder doesn't get repopulated so we do that here.
538
539        if self.builder().steps == []:
540            for step in self.variants()[0].definition()["steps"]:
541                if "ModelInference" in step:
542                    name = step["ModelInference"]["models"][0]["name"]
543                    version = step["ModelInference"]["models"][0]["version"]
544                    model = self.client.model_by_name(
545                        model_class=name, model_name=version
546                    )
547                    self.add_model_step(model)
548
549        new_pipeline = self.builder().upload()
550        self._fill({"id": new_pipeline.id()})
551        return self
552
553    def remove_step(self, index: int) -> "Pipeline":
554        """Remove a step at a given index"""
555        self.builder().remove_step(index)
556        return self
557
558    def add_model_step(self, model: Model) -> "Pipeline":
559        """Perform inference with a single model."""
560        self.builder().add_model_step(model)
561        return self
562
563    def replace_with_model_step(self, index: int, model: Model) -> "Pipeline":
564        """Replaces the step at the given index with a model step"""
565        self.builder().replace_with_model_step(index, model)
566        return self
567
568    def add_multi_model_step(self, models: Iterable[Model]) -> "Pipeline":
569        """Perform inference on the same input data for any number of models."""
570        self.builder().add_multi_model_step(models)
571        return self
572
573    def replace_with_multi_model_step(
574        self, index: int, models: Iterable[Model]
575    ) -> "Pipeline":
576        """Replaces the step at the index with a multi model step"""
577        self.builder().replace_with_multi_model_step(index, models)
578        return self
579
580    def add_audit(self, slice) -> "Pipeline":
581        """Run audit logging on a specified `slice` of model outputs.
582
583        The slice must be in python-like format. `start:`, `start:end`, and
584        `:end` are supported.
585        """
586        self.builder().add_audit(slice)
587        return self
588
589    def replace_with_audit(self, index: int, audit_slice: str) -> "Pipeline":
590        """Replaces the step at the index with an audit step"""
591        self.builder().replace_with_audit(index, audit_slice)
592        return self
593
594    def add_select(self, index: int) -> "Pipeline":
595        """Select only the model output with the given `index` from an array of
596        outputs.
597        """
598        self.builder().add_select(index)
599        return self
600
601    def replace_with_select(self, step_index: int, select_index: int) -> "Pipeline":
602        """Replaces the step at the index with a select step"""
603        self.builder().replace_with_select(step_index, select_index)
604        return self
605
606    def add_key_split(
607        self, default: Model, meta_key: str, options: Dict[str, Model]
608    ) -> "Pipeline":
609        """Split traffic based on the value at a given `meta_key` in the input data,
610        routing to the appropriate model.
611
612        If the resulting value is a key in `options`, the corresponding model is used.
613        Otherwise, the `default` model is used for inference.
614        """
615        self.builder().add_key_split(default, meta_key, options)
616        return self
617
618    def replace_with_key_split(
619        self, index: int, default: Model, meta_key: str, options: Dict[str, Model]
620    ) -> "Pipeline":
621        """Replace the step at the index with a key split step"""
622        self.builder().replace_with_key_split(index, default, meta_key, options)
623        return self
624
625    def add_random_split(
626        self,
627        weighted: Iterable[Tuple[float, Model]],
628        hash_key: Optional[str] = None,
629    ) -> "Pipeline":
630        """Routes inputs to a single model, randomly chosen from the list of
631        `weighted` options.
632
633        Each model receives inputs that are approximately proportional to the
634        weight it is assigned.  For example, with two models having weights 1
635        and 1, each will receive roughly equal amounts of inference inputs. If
636        the weights were changed to 1 and 2, the models would receive roughly
637        33% and 66% respectively instead.
638
639        When choosing the model to use, a random number between 0.0 and 1.0 is
640        generated. The weighted inputs are mapped to that range, and the random
641        input is then used to select the model to use. For example, for the
642        two-models equal-weight case, a random key of 0.4 would route to the
643        first model. 0.6 would route to the second.
644
645        To support consistent assignment to a model, a `hash_key` can be
646        specified. This must be between 0.0 and 1.0. The value at this key, when
647        present in the input data, will be used instead of a random number for
648        model selection.
649        """
650        self.builder().add_random_split(weighted, hash_key)
651        return self
652
653    def replace_with_random_split(
654        self,
655        index: int,
656        weighted: Iterable[Tuple[float, Model]],
657        hash_key: Optional[str] = None,
658    ) -> "Pipeline":
659        """Replace the step at the index with a random split step"""
660        self.builder().replace_with_random_split(index, weighted, hash_key)
661        return self
662
663    def add_shadow_deploy(
664        self, champion: Model, challengers: Iterable[Model]
665    ) -> "Pipeline":
666        """Create a "shadow deployment" experiment pipeline. The `champion`
667        model and all `challengers` are run for each input. The result data for
668        all models is logged, but the output of the `champion` is the only
669        result returned.
670
671        This is particularly useful for "burn-in" testing a new model with real
672        world data without displacing the currently proven model.
673
674        This is currently implemented as three steps: A multi model step, an audit step, and
675        a select step. To remove or replace this step, you need to remove or replace
676        all three. You can remove steps using pipeline.remove_step
677        """
678        self.builder().add_shadow_deploy(champion, challengers)
679        return self
680
681    def replace_with_shadow_deploy(
682        self, index: int, champion: Model, challengers: Iterable[Model]
683    ) -> "Pipeline":
684        """Replace a given step with a shadow deployment"""
685        self.builder().replace_with_shadow_deploy(index, champion, challengers)
686        return self
687
688    def add_validation(self, name: str, validation: Expression) -> "Pipeline":
689        """Add a `validation` with the given `name`. All validations are run on
690        all outputs, and all failures are logged.
691        """
692        self.builder().add_validation(name, validation)
693        return self
694
695    def add_alert(
696        self, name: str, alert: Alert, notifications: List[notify.Notification]
697    ) -> "Pipeline":
698        self.builder().add_alert(name, alert, notifications)
699        return self
700
701    def replace_with_alert(
702        self,
703        index: int,
704        name: str,
705        alert: Alert,
706        notifications: List[notify.Notification],
707    ) -> "Pipeline":
708        """Replace the step at the given index with the specified alert"""
709        self.builder().replace_with_alert(index, name, alert, notifications)
710        return self
711
712    def clear(self) -> "Pipeline":
713        """
714        Remove all steps from the pipeline. This might be desireable if replacing models, for example.
715        """
716        self.builder().clear()
717        return self
718
719    def list_explainability_configs(self) -> List[ExplainabilityConfig]:
720        """List the explainability configs we've created."""
721
722        result = unwrap(self.client)._post_rest_api_json(
723            f"v1/api/explainability/list_configs_by_pipeline",
724            {"pipeline_id": self.id()},
725        )
726        l = [ExplainabilityConfig(**ec) for ec in result]
727        for ec in l:
728            ec.client = self.client  # type: ignore
729        return ExplainabilityConfigList(l)
730
731    def get_explainability_config(
732        self, expr: Union[str, ExplainabilityConfig]
733    ) -> ExplainabilityConfig:
734        """Get the details of an explainability config."""
735
736        if isinstance(expr, str):
737            explainability_config_id = expr
738        else:
739            explainability_config_id = str(expr.id)
740
741        result = unwrap(self.client)._post_rest_api_json(
742            f"v1/api/explainability/get_config",
743            {"explainability_config_id": explainability_config_id},
744        )
745
746        exp_cfg = ExplainabilityConfig(**result)
747        exp_cfg.client = self.client  # type: ignore
748        return exp_cfg
749
750    def create_explainability_config(self, feature_names: Sequence[str], num_points=10):
751        """Create a shap config to be used later for reference and adhoc requests."""
752
753        output_names = ["output_0"]
754        feature_name_list = list(feature_names)
755        reference_version = self.variants()[0].name()
756        workspace_id = unwrap(self.client).get_current_workspace().id()
757
758        shap_config = ExplainabilityConfig(
759            id=None,
760            workspace_id=workspace_id,
761            reference_pipeline_version=reference_version,
762            explainability_pipeline_version=None,
763            status={},
764            feature_bounds={},
765            num_points=num_points,
766            feature_names=feature_name_list,
767            output_names=output_names,
768        )
769
770        result = unwrap(self.client)._post_rest_api_json(
771            f"v1/api/explainability/create_config", asdict(shap_config)
772        )
773        exp_id = result["id"]
774        return self.get_explainability_config(exp_id)

A pipeline is an execution context for models. Pipelines contain Steps, which are often Models. Pipelines can be deployed or undeployed.

Pipeline(client: Optional[wallaroo.client.Client], data: Dict[str, Any])
68    def __init__(
69        self,
70        client: Optional["Client"],
71        data: Dict[str, Any],
72    ) -> None:
73        from .pipeline_config import PipelineConfigBuilder  # avoids circular imports
74
75        self.client = client
76        assert client is not None
77
78        # We track the last timestamp received as a hack, so that we can wait for logs
79        # that are still being processed.
80        self._last_infer_time = None
81
82        # We will shim through to all builder methods but return self so we can chain pipeline
83        # calls. See "Shims" below. Using multiple inheritance from the PipelineConfigBuilder was
84        # another option considered, and maybe it's an option, but shims let us fiddle with args
85        # individually if needed.
86        self._builder = None
87        self._deployment = None
88
89        super().__init__(gql_client=client._gql_client, data=data)

Base constructor.

Each object requires:

  • a GraphQL client - in order to fill its missing members dynamically
  • an initial data blob - typically from unserialized JSON, contains at
  • least the data for required members (typically the object's primary key) and optionally other data members.
def builder(self) -> wallaroo.pipeline_config.PipelineConfigBuilder:
142    def builder(self) -> "PipelineConfigBuilder":
143        if self._builder is None:
144            self._builder = PipelineConfigBuilder(
145                self.client,
146                pipeline_name=self.name(),
147                standalone=False,
148            )
149        return cast(PipelineConfigBuilder, self._builder)
def id(self) -> int:
270    def id(self) -> int:
271        return self._id
def owner_id(*args, **kwargs):
41        def wrapper(*args, **kwargs):
42            obj = args[0]
43            if not getattr(obj, "_standalone", None):
44                present = getattr(obj, attr) != DehydratedValue()
45                # Uncomment to debug while testing
46                # print(
47                #    "rehydrate: {} -> {}".format(
48                #        attr, "present" if present else "not present"
49                #    )
50                # )
51                if not present:
52                    obj._rehydrate()
53            result = fn(*args, **kwargs)
54            return result
def create_time(*args, **kwargs):
41        def wrapper(*args, **kwargs):
42            obj = args[0]
43            if not getattr(obj, "_standalone", None):
44                present = getattr(obj, attr) != DehydratedValue()
45                # Uncomment to debug while testing
46                # print(
47                #    "rehydrate: {} -> {}".format(
48                #        attr, "present" if present else "not present"
49                #    )
50                # )
51                if not present:
52                    obj._rehydrate()
53            result = fn(*args, **kwargs)
54            return result
def last_update_time(*args, **kwargs):
41        def wrapper(*args, **kwargs):
42            obj = args[0]
43            if not getattr(obj, "_standalone", None):
44                present = getattr(obj, attr) != DehydratedValue()
45                # Uncomment to debug while testing
46                # print(
47                #    "rehydrate: {} -> {}".format(
48                #        attr, "present" if present else "not present"
49                #    )
50                # )
51                if not present:
52                    obj._rehydrate()
53            result = fn(*args, **kwargs)
54            return result
def name(*args, **kwargs):
41        def wrapper(*args, **kwargs):
42            obj = args[0]
43            if not getattr(obj, "_standalone", None):
44                present = getattr(obj, attr) != DehydratedValue()
45                # Uncomment to debug while testing
46                # print(
47                #    "rehydrate: {} -> {}".format(
48                #        attr, "present" if present else "not present"
49                #    )
50                # )
51                if not present:
52                    obj._rehydrate()
53            result = fn(*args, **kwargs)
54            return result
def variants(*args, **kwargs):
41        def wrapper(*args, **kwargs):
42            obj = args[0]
43            if not getattr(obj, "_standalone", None):
44                present = getattr(obj, attr) != DehydratedValue()
45                # Uncomment to debug while testing
46                # print(
47                #    "rehydrate: {} -> {}".format(
48                #        attr, "present" if present else "not present"
49                #    )
50                # )
51                if not present:
52                    obj._rehydrate()
53            result = fn(*args, **kwargs)
54            return result
def tags(*args, **kwargs):
41        def wrapper(*args, **kwargs):
42            obj = args[0]
43            if not getattr(obj, "_standalone", None):
44                present = getattr(obj, attr) != DehydratedValue()
45                # Uncomment to debug while testing
46                # print(
47                #    "rehydrate: {} -> {}".format(
48                #        attr, "present" if present else "not present"
49                #    )
50                # )
51                if not present:
52                    obj._rehydrate()
53            result = fn(*args, **kwargs)
54            return result
def logs( self, limit: int = 100, valid: Optional[bool] = None) -> wallaroo.logs.LogEntries:
301    def logs(self, limit: int = 100, valid: Optional[bool] = None) -> LogEntries:
302        topic = self.get_topic_name()
303
304        if valid is False:
305            topic += "-failures"
306        assert self.client is not None
307
308        [entries, status] = self.client.get_logs(topic, limit)
309
310        # XXX: hack to attempt to align logs with received inference results.
311        # Ideally we'd use indices from plateau directly for querying, but the
312        # engine currently does not support that.
313        if self._last_infer_time is not None:
314            for ix in range(5):
315                if entries and self._last_infer_time <= max(
316                    e.timestamp for e in entries
317                ):
318                    break
319
320                time.sleep(1)
321                [entries, status] = self.client.get_logs(topic, limit)
322
323        if status == "ByteLimited":
324            returned = len(entries)
325            print(
326                f"Warning: only displaying {returned} log messages (of {limit} requested) due to payload size limitations."
327            )
328
329        return entries
def logs_shadow_deploy(self):
331    def logs_shadow_deploy(self):
332        logs = self.logs()
333        return LogEntriesShadowDeploy(logs)
def url(self) -> str:
335    def url(self) -> str:
336        """Returns the inference URL for this pipeline."""
337        deployment = self._deployment_for_pipeline()
338        if deployment is None:
339            raise RuntimeError("Pipeline has not been deployed and has no url")
340        else:
341            return deployment.url()

Returns the inference URL for this pipeline.

def deploy( self, pipeline_name: Optional[str] = None, deployment_config: Optional[wallaroo.deployment_config.DeploymentConfig] = None) -> wallaroo.pipeline.Pipeline:
343    def deploy(
344        self,
345        pipeline_name: Optional[str] = None,
346        deployment_config: Optional[DeploymentConfig] = None,
347    ) -> "Pipeline":
348        """Deploy pipeline. `pipeline_name` is optional if deploy was called previously. When specified,
349        `pipeline_name` must be ASCII alpha-numeric characters, plus dash (-) only."""
350        if pipeline_name is not None:
351            require_dns_compliance(pipeline_name)
352        self._deploy_upload_optional(pipeline_name, deployment_config)
353        return self

Deploy pipeline. pipeline_name is optional if deploy was called previously. When specified, pipeline_name must be ASCII alpha-numeric characters, plus dash (-) only.

def definition(self) -> str:
355    def definition(self) -> str:
356        """Get the current definition of the pipeline as a string"""
357        return str(self.builder().steps)

Get the current definition of the pipeline as a string

def get_topic_name(self) -> str:
418    def get_topic_name(self) -> str:
419        if self.client is None:
420            return f"pipeline-{self.name()}-inference"
421        return self.client.get_topic_name(self.id())
def undeploy(self) -> wallaroo.pipeline.Pipeline:
427    def undeploy(self) -> "Pipeline":
428
429        assert self.client is not None
430        deployment = self._deployment_for_pipeline()
431        if deployment:
432            deployment.undeploy()
433        return self
def infer(self, *args, **kwargs):
48    def _inner(self, *args, **kwargs):
49        results = f(self, *args, **kwargs)
50        if isinstance(results, list):
51            self._last_infer_time = max(r.timestamp() for r in results)
52        elif isinstance(results, pd.DataFrame):
53            if "time" in results:
54                self._last_infer_time = results["time"].max()
55        elif isinstance(results, pa.Table):
56            if "time" in results:
57                min_max_time = pa.compute.min_max(results["time"])
58                self._last_infer_time = min_max_time["max"].as_py()
59
60        return results

Returns an inference result on this deployment, given a tensor.

Parameters
  • tensor: Union[Dict[str, Any], pd.DataFrame, pa.Table] Inference data. Should be a dictionary. Future improvement: will be a pandas dataframe or arrow table
  • timeout: Optional[Union[int, float]] infer requests will time out after the amount of seconds provided are exceeded. timeout defaults to 15 secs.
  • dataset: Optional[list] By default this is set to return, ["time", "out"]. Other available options "check_failures", "metadata"
  • dataset_exclude: Optional[Union[Sequence[str], str]] If set, allows user to exclude parts of dataset.
  • dataset_separator: Optional[Union[Sequence[str], str]] If set to ".", return dataset will be flattened.
Returns

InferenceResult in dictionary, dataframe or arrow format.

def infer_from_file(self, *args, **kwargs):
48    def _inner(self, *args, **kwargs):
49        results = f(self, *args, **kwargs)
50        if isinstance(results, list):
51            self._last_infer_time = max(r.timestamp() for r in results)
52        elif isinstance(results, pd.DataFrame):
53            if "time" in results:
54                self._last_infer_time = results["time"].max()
55        elif isinstance(results, pa.Table):
56            if "time" in results:
57                min_max_time = pa.compute.min_max(results["time"])
58                self._last_infer_time = min_max_time["max"].as_py()
59
60        return results

Returns an inference result on this deployment, given tensors in a file.

async def batch_infer_from_file( self, filename: Union[str, pathlib.Path], data_key: str = 'tensor', batch_size: int = 1000, connector_limit: int = 4) -> List[wallaroo.inference_result.InferenceResult]:
484    async def batch_infer_from_file(
485        self,
486        filename: Union[str, pathlib.Path],
487        data_key: str = "tensor",
488        batch_size: int = 1000,
489        connector_limit: int = 4,
490    ) -> List[InferenceResult]:
491        """Async method to run batched inference on a data file for a given deployment.
492
493        :param str filename: path to an existing file with tensor data in JSON format.
494        :param str data_key: key which the tensor data is under within the JSON. defaults to "tensor".
495        :param int batch_size: batch size to use when sending requests to the engine. defaults to 1000.
496        :param int connector_limit: limit for the amount of TCP connections. defaults to 4.
497        :return: List of InferenceResult's.
498        :rtype: List[InferenceResult]
499        """
500        deployment = self._deployment_for_pipeline()
501        if deployment:
502            return await deployment.batch_infer_from_file(
503                filename, data_key, batch_size, connector_limit
504            )
505        else:
506            raise RuntimeError("Pipeline {self.name} is not deployed")

Async method to run batched inference on a data file for a given deployment.

Parameters
  • str filename: path to an existing file with tensor data in JSON format.
  • str data_key: key which the tensor data is under within the JSON. defaults to "tensor".
  • int batch_size: batch size to use when sending requests to the engine. defaults to 1000.
  • int connector_limit: limit for the amount of TCP connections. defaults to 4.
Returns

List of InferenceResult's.

def status(self) -> Dict[str, Any]:
508    def status(self) -> Dict[str, Any]:
509        """Status of pipeline"""
510        deployment = self._deployment_for_pipeline()
511        if deployment:
512            return deployment.status()
513        else:
514            return {"status": f"Pipeline {self.name()} is not deployed"}

Status of pipeline

def steps(self) -> List[wallaroo.pipeline_config.Step]:
521    def steps(self) -> List[Step]:
522        """Returns a list of the steps of a pipeline. Not exactly a shim"""
523        return self.builder().steps

Returns a list of the steps of a pipeline. Not exactly a shim

def model_configs(self) -> List[wallaroo.model_config.ModelConfig]:
525    def model_configs(self) -> List[ModelConfig]:
526        """Returns a list of the model configs of a pipeline. Not exactly a shim"""
527        return self.builder()._model_configs()

Returns a list of the model configs of a pipeline. Not exactly a shim

def remove_step(self, index: int) -> wallaroo.pipeline.Pipeline:
553    def remove_step(self, index: int) -> "Pipeline":
554        """Remove a step at a given index"""
555        self.builder().remove_step(index)
556        return self

Remove a step at a given index

def add_model_step(self, model: wallaroo.model.Model) -> wallaroo.pipeline.Pipeline:
558    def add_model_step(self, model: Model) -> "Pipeline":
559        """Perform inference with a single model."""
560        self.builder().add_model_step(model)
561        return self

Perform inference with a single model.

def replace_with_model_step( self, index: int, model: wallaroo.model.Model) -> wallaroo.pipeline.Pipeline:
563    def replace_with_model_step(self, index: int, model: Model) -> "Pipeline":
564        """Replaces the step at the given index with a model step"""
565        self.builder().replace_with_model_step(index, model)
566        return self

Replaces the step at the given index with a model step

def add_multi_model_step( self, models: Iterable[wallaroo.model.Model]) -> wallaroo.pipeline.Pipeline:
568    def add_multi_model_step(self, models: Iterable[Model]) -> "Pipeline":
569        """Perform inference on the same input data for any number of models."""
570        self.builder().add_multi_model_step(models)
571        return self

Perform inference on the same input data for any number of models.

def replace_with_multi_model_step( self, index: int, models: Iterable[wallaroo.model.Model]) -> wallaroo.pipeline.Pipeline:
573    def replace_with_multi_model_step(
574        self, index: int, models: Iterable[Model]
575    ) -> "Pipeline":
576        """Replaces the step at the index with a multi model step"""
577        self.builder().replace_with_multi_model_step(index, models)
578        return self

Replaces the step at the index with a multi model step

def add_audit(self, slice) -> wallaroo.pipeline.Pipeline:
580    def add_audit(self, slice) -> "Pipeline":
581        """Run audit logging on a specified `slice` of model outputs.
582
583        The slice must be in python-like format. `start:`, `start:end`, and
584        `:end` are supported.
585        """
586        self.builder().add_audit(slice)
587        return self

Run audit logging on a specified slice of model outputs.

The slice must be in python-like format. start:, start:end, and :end are supported.

def replace_with_audit(self, index: int, audit_slice: str) -> wallaroo.pipeline.Pipeline:
589    def replace_with_audit(self, index: int, audit_slice: str) -> "Pipeline":
590        """Replaces the step at the index with an audit step"""
591        self.builder().replace_with_audit(index, audit_slice)
592        return self

Replaces the step at the index with an audit step

def add_select(self, index: int) -> wallaroo.pipeline.Pipeline:
594    def add_select(self, index: int) -> "Pipeline":
595        """Select only the model output with the given `index` from an array of
596        outputs.
597        """
598        self.builder().add_select(index)
599        return self

Select only the model output with the given index from an array of outputs.

def replace_with_select(self, step_index: int, select_index: int) -> wallaroo.pipeline.Pipeline:
601    def replace_with_select(self, step_index: int, select_index: int) -> "Pipeline":
602        """Replaces the step at the index with a select step"""
603        self.builder().replace_with_select(step_index, select_index)
604        return self

Replaces the step at the index with a select step

def add_key_split( self, default: wallaroo.model.Model, meta_key: str, options: Dict[str, wallaroo.model.Model]) -> wallaroo.pipeline.Pipeline:
606    def add_key_split(
607        self, default: Model, meta_key: str, options: Dict[str, Model]
608    ) -> "Pipeline":
609        """Split traffic based on the value at a given `meta_key` in the input data,
610        routing to the appropriate model.
611
612        If the resulting value is a key in `options`, the corresponding model is used.
613        Otherwise, the `default` model is used for inference.
614        """
615        self.builder().add_key_split(default, meta_key, options)
616        return self

Split traffic based on the value at a given meta_key in the input data, routing to the appropriate model.

If the resulting value is a key in options, the corresponding model is used. Otherwise, the default model is used for inference.

def replace_with_key_split( self, index: int, default: wallaroo.model.Model, meta_key: str, options: Dict[str, wallaroo.model.Model]) -> wallaroo.pipeline.Pipeline:
618    def replace_with_key_split(
619        self, index: int, default: Model, meta_key: str, options: Dict[str, Model]
620    ) -> "Pipeline":
621        """Replace the step at the index with a key split step"""
622        self.builder().replace_with_key_split(index, default, meta_key, options)
623        return self

Replace the step at the index with a key split step

def add_random_split( self, weighted: Iterable[Tuple[float, wallaroo.model.Model]], hash_key: Optional[str] = None) -> wallaroo.pipeline.Pipeline:
625    def add_random_split(
626        self,
627        weighted: Iterable[Tuple[float, Model]],
628        hash_key: Optional[str] = None,
629    ) -> "Pipeline":
630        """Routes inputs to a single model, randomly chosen from the list of
631        `weighted` options.
632
633        Each model receives inputs that are approximately proportional to the
634        weight it is assigned.  For example, with two models having weights 1
635        and 1, each will receive roughly equal amounts of inference inputs. If
636        the weights were changed to 1 and 2, the models would receive roughly
637        33% and 66% respectively instead.
638
639        When choosing the model to use, a random number between 0.0 and 1.0 is
640        generated. The weighted inputs are mapped to that range, and the random
641        input is then used to select the model to use. For example, for the
642        two-models equal-weight case, a random key of 0.4 would route to the
643        first model. 0.6 would route to the second.
644
645        To support consistent assignment to a model, a `hash_key` can be
646        specified. This must be between 0.0 and 1.0. The value at this key, when
647        present in the input data, will be used instead of a random number for
648        model selection.
649        """
650        self.builder().add_random_split(weighted, hash_key)
651        return self

Routes inputs to a single model, randomly chosen from the list of weighted options.

Each model receives inputs that are approximately proportional to the weight it is assigned. For example, with two models having weights 1 and 1, each will receive roughly equal amounts of inference inputs. If the weights were changed to 1 and 2, the models would receive roughly 33% and 66% respectively instead.

When choosing the model to use, a random number between 0.0 and 1.0 is generated. The weighted inputs are mapped to that range, and the random input is then used to select the model to use. For example, for the two-models equal-weight case, a random key of 0.4 would route to the first model. 0.6 would route to the second.

To support consistent assignment to a model, a hash_key can be specified. This must be between 0.0 and 1.0. The value at this key, when present in the input data, will be used instead of a random number for model selection.

def replace_with_random_split( self, index: int, weighted: Iterable[Tuple[float, wallaroo.model.Model]], hash_key: Optional[str] = None) -> wallaroo.pipeline.Pipeline:
653    def replace_with_random_split(
654        self,
655        index: int,
656        weighted: Iterable[Tuple[float, Model]],
657        hash_key: Optional[str] = None,
658    ) -> "Pipeline":
659        """Replace the step at the index with a random split step"""
660        self.builder().replace_with_random_split(index, weighted, hash_key)
661        return self

Replace the step at the index with a random split step

def add_shadow_deploy( self, champion: wallaroo.model.Model, challengers: Iterable[wallaroo.model.Model]) -> wallaroo.pipeline.Pipeline:
663    def add_shadow_deploy(
664        self, champion: Model, challengers: Iterable[Model]
665    ) -> "Pipeline":
666        """Create a "shadow deployment" experiment pipeline. The `champion`
667        model and all `challengers` are run for each input. The result data for
668        all models is logged, but the output of the `champion` is the only
669        result returned.
670
671        This is particularly useful for "burn-in" testing a new model with real
672        world data without displacing the currently proven model.
673
674        This is currently implemented as three steps: A multi model step, an audit step, and
675        a select step. To remove or replace this step, you need to remove or replace
676        all three. You can remove steps using pipeline.remove_step
677        """
678        self.builder().add_shadow_deploy(champion, challengers)
679        return self

Create a "shadow deployment" experiment pipeline. The champion model and all challengers are run for each input. The result data for all models is logged, but the output of the champion is the only result returned.

This is particularly useful for "burn-in" testing a new model with real world data without displacing the currently proven model.

This is currently implemented as three steps: A multi model step, an audit step, and a select step. To remove or replace this step, you need to remove or replace all three. You can remove steps using pipeline.remove_step

def replace_with_shadow_deploy( self, index: int, champion: wallaroo.model.Model, challengers: Iterable[wallaroo.model.Model]) -> wallaroo.pipeline.Pipeline:
681    def replace_with_shadow_deploy(
682        self, index: int, champion: Model, challengers: Iterable[Model]
683    ) -> "Pipeline":
684        """Replace a given step with a shadow deployment"""
685        self.builder().replace_with_shadow_deploy(index, champion, challengers)
686        return self

Replace a given step with a shadow deployment

def add_validation( self, name: str, validation: wallaroo.checks.Expression) -> wallaroo.pipeline.Pipeline:
688    def add_validation(self, name: str, validation: Expression) -> "Pipeline":
689        """Add a `validation` with the given `name`. All validations are run on
690        all outputs, and all failures are logged.
691        """
692        self.builder().add_validation(name, validation)
693        return self

Add a validation with the given name. All validations are run on all outputs, and all failures are logged.

def add_alert( self, name: str, alert: wallaroo.checks.Alert, notifications: List[wallaroo.notify.Notification]) -> wallaroo.pipeline.Pipeline:
695    def add_alert(
696        self, name: str, alert: Alert, notifications: List[notify.Notification]
697    ) -> "Pipeline":
698        self.builder().add_alert(name, alert, notifications)
699        return self
def replace_with_alert( self, index: int, name: str, alert: wallaroo.checks.Alert, notifications: List[wallaroo.notify.Notification]) -> wallaroo.pipeline.Pipeline:
701    def replace_with_alert(
702        self,
703        index: int,
704        name: str,
705        alert: Alert,
706        notifications: List[notify.Notification],
707    ) -> "Pipeline":
708        """Replace the step at the given index with the specified alert"""
709        self.builder().replace_with_alert(index, name, alert, notifications)
710        return self

Replace the step at the given index with the specified alert

def clear(self) -> wallaroo.pipeline.Pipeline:
712    def clear(self) -> "Pipeline":
713        """
714        Remove all steps from the pipeline. This might be desireable if replacing models, for example.
715        """
716        self.builder().clear()
717        return self

Remove all steps from the pipeline. This might be desireable if replacing models, for example.

def list_explainability_configs(self) -> List[wallaroo.explainability.ExplainabilityConfig]:
719    def list_explainability_configs(self) -> List[ExplainabilityConfig]:
720        """List the explainability configs we've created."""
721
722        result = unwrap(self.client)._post_rest_api_json(
723            f"v1/api/explainability/list_configs_by_pipeline",
724            {"pipeline_id": self.id()},
725        )
726        l = [ExplainabilityConfig(**ec) for ec in result]
727        for ec in l:
728            ec.client = self.client  # type: ignore
729        return ExplainabilityConfigList(l)

List the explainability configs we've created.

def get_explainability_config( self, expr: Union[str, wallaroo.explainability.ExplainabilityConfig]) -> wallaroo.explainability.ExplainabilityConfig:
731    def get_explainability_config(
732        self, expr: Union[str, ExplainabilityConfig]
733    ) -> ExplainabilityConfig:
734        """Get the details of an explainability config."""
735
736        if isinstance(expr, str):
737            explainability_config_id = expr
738        else:
739            explainability_config_id = str(expr.id)
740
741        result = unwrap(self.client)._post_rest_api_json(
742            f"v1/api/explainability/get_config",
743            {"explainability_config_id": explainability_config_id},
744        )
745
746        exp_cfg = ExplainabilityConfig(**result)
747        exp_cfg.client = self.client  # type: ignore
748        return exp_cfg

Get the details of an explainability config.

def create_explainability_config(self, feature_names: Sequence[str], num_points=10):
750    def create_explainability_config(self, feature_names: Sequence[str], num_points=10):
751        """Create a shap config to be used later for reference and adhoc requests."""
752
753        output_names = ["output_0"]
754        feature_name_list = list(feature_names)
755        reference_version = self.variants()[0].name()
756        workspace_id = unwrap(self.client).get_current_workspace().id()
757
758        shap_config = ExplainabilityConfig(
759            id=None,
760            workspace_id=workspace_id,
761            reference_pipeline_version=reference_version,
762            explainability_pipeline_version=None,
763            status={},
764            feature_bounds={},
765            num_points=num_points,
766            feature_names=feature_name_list,
767            output_names=output_names,
768        )
769
770        result = unwrap(self.client)._post_rest_api_json(
771            f"v1/api/explainability/create_config", asdict(shap_config)
772        )
773        exp_id = result["id"]
774        return self.get_explainability_config(exp_id)

Create a shap config to be used later for reference and adhoc requests.

class Pipelines(typing.List[wallaroo.pipeline.Pipeline]):
777class Pipelines(List[Pipeline]):
778    """Wraps a list of pipelines for display in a display-aware environment like Jupyter."""
779
780    def _repr_html_(self) -> str:
781        def row(pipeline):
782            steptable = pipeline._html_steptable()
783            fmt = pipeline.client._time_format
784            tags = ", ".join([tag.tag() for tag in pipeline.tags()])
785            deployment = pipeline._deployment_for_pipeline()
786            depstr = "(unknown)" if deployment is None else deployment.deployed()
787            variants = ", ".join([variant.name() for variant in pipeline.variants()])
788
789            return (
790                "<tr>"
791                + f"<td>{pipeline.name()}</td>"
792                + f"<td>{pipeline.create_time().strftime(fmt)}</td>"
793                + f"<td>{pipeline.last_update_time().strftime(fmt)}</td>"
794                + f"<td>{depstr}</td>"
795                + f"<td>{tags}</td>"
796                + f"<td>{variants}</td>"
797                + f"<td>{steptable}</td>"
798                + "</tr>"
799            )
800
801        fields = [
802            "name",
803            "created",
804            "last_updated",
805            "deployed",
806            "tags",
807            "versions",
808            "steps",
809        ]
810
811        if self == []:
812            return "(no pipelines)"
813        else:
814            return (
815                "<table>"
816                + "<tr><th>"
817                + "</th><th>".join(fields)
818                + "</th></tr>"
819                + ("".join([row(p) for p in self]))
820                + "</table>"
821            )

Wraps a list of pipelines for display in a display-aware environment like Jupyter.

Inherited Members
builtins.list
list
clear
copy
append
insert
extend
pop
remove
index
count
reverse
sort