wallaroo.pipeline_variant

  1import datetime
  2from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast
  3
  4from dateutil import parser as dateparse
  5
  6from wallaroo import queries
  7
  8from .deployment import Deployment
  9from .deployment_config import DeploymentConfig, DeploymentConfigBuilder
 10from .model_config import ModelConfig
 11from .object import *
 12from .wallaroo_ml_ops_api_client.api.pipeline import pipelines_deploy
 13from .wallaroo_ml_ops_api_client.models import (
 14    pipelines_deploy_json_body,
 15    pipelines_deploy_json_body_engine_config,
 16)
 17from .wallaroo_ml_ops_api_client.models.pipelines_deploy_response_500 import (
 18    PipelinesDeployResponse500,
 19)
 20from .wallaroo_ml_ops_api_client.types import UNSET
 21
 22if TYPE_CHECKING:
 23    # Imports that happen below in methods to fix circular import dependency
 24    # issues need to also be specified here to satisfy mypy type checking.
 25    from .client import Client
 26    from .pipeline import Pipeline
 27
 28
 29class PipelineVariant(Object):
 30    def __init__(self, client: Optional["Client"], data: Dict[str, Any]) -> None:
 31        self.client = client
 32        assert client is not None
 33        super().__init__(gql_client=client._gql_client, data=data)
 34
 35    def _fill(self, data: Dict[str, Any]) -> None:
 36        from .pipeline import Pipeline  # avoids circular imports
 37
 38        for required_attribute in ["id"]:
 39            if required_attribute not in data:
 40                raise RequiredAttributeMissing(
 41                    self.__class__.__name__, required_attribute
 42                )
 43        self._id = data["id"]
 44
 45        self._create_time = (
 46            dateparse.isoparse(data["created_at"])
 47            if "created_at" in data
 48            else DehydratedValue()
 49        )
 50        self._last_update_time = (
 51            dateparse.isoparse(data["updated_at"])
 52            if "updated_at" in data
 53            else DehydratedValue()
 54        )
 55        self._name = value_if_present(data, "version")
 56        self._definition = value_if_present(data, "definition")
 57        self._pipeline = (
 58            Pipeline(client=self.client, data=data["pipeline"])
 59            if "pipeline" in data
 60            else DehydratedValue()
 61        )
 62        self._deployments = (
 63            [
 64                Deployment(
 65                    client=self.client,
 66                    data=elem["deployment"],
 67                )
 68                for elem in data["deployment_pipeline_versions"]
 69            ]
 70            if "deployment_pipeline_versions" in data
 71            else DehydratedValue()
 72        )
 73        self._model_configs = (
 74            [
 75                ModelConfig(
 76                    client=self.client,
 77                    data=elem["model_config"],
 78                )
 79                for elem in data["deployment_model_configs"]
 80            ]
 81            if "deployment_model_configs" in data
 82            else DehydratedValue()
 83        )
 84
 85    def _fetch_attributes(self) -> Dict[str, Any]:
 86        return self._gql_client.execute(
 87            gql.gql(queries.named("PipelineVariantById")),
 88            variable_values={
 89                "variant_id": self._id,
 90            },
 91        )["pipeline_version_by_pk"]
 92
 93    def id(self) -> int:
 94        return self._id
 95
 96    @rehydrate("_create_time")
 97    def create_time(self) -> datetime.datetime:
 98        return cast(datetime.datetime, self._create_time)
 99
100    @rehydrate("_last_update_time")
101    def last_update_time(self) -> datetime.datetime:
102        return cast(datetime.datetime, self._last_update_time)
103
104    @rehydrate("_name")
105    def name(self) -> str:
106        return cast(str, self._name)
107
108    @rehydrate("_definition")
109    def definition(self) -> Dict[str, Any]:
110        return cast(Dict[str, Any], self._definition)
111
112    @rehydrate("_pipeline")
113    def pipeline(self) -> "Pipeline":
114        from .pipeline import Pipeline
115
116        return cast(Pipeline, self._pipeline)
117
118    @rehydrate("_deployments")
119    def deployments(self) -> List[Deployment]:
120        return cast(List[Deployment], self._deployments)
121
122    @rehydrate("_model_configs")
123    def model_configs(self) -> List[ModelConfig]:
124        return cast(List[ModelConfig], self._model_configs)
125
126    def deploy(
127        self,
128        deployment_name: str,
129        model_configs: List[ModelConfig],
130        config: Optional[DeploymentConfig] = None,
131    ) -> Deployment:
132        """Deploys this PipelineVariant.
133
134        :param str deployment_name: Name of the new Deployment. Must be unique
135            across all deployments.
136        :param List[ModelConfig] model_configs: List of the configured models to
137        use. These must be the same ModelConfigs used when creating the
138            Pipeline.
139        :param Optional[DeploymentConfig] config: Deployment configuration to use.
140        :return: A Deployment object for the resulting deployment.
141        :rtype: Deployment
142        """
143        workspace_id = (
144            None if self.client is None else self.client.get_current_workspace().id()
145        )
146        if config is None:
147            config = DeploymentConfigBuilder(workspace_id=workspace_id).build()
148        else:
149            config.guarantee_workspace_id(workspace_id=workspace_id)
150
151        assert self.client is not None
152
153        engine = pipelines_deploy_json_body_engine_config.PipelinesDeployJsonBodyEngineConfig.from_dict(
154            config
155        )
156        model_config_ids = [mc.id() for mc in model_configs]
157        body = pipelines_deploy_json_body.PipelinesDeployJsonBody(
158            deployment_name,
159            self.id(),
160            self.pipeline().id(),
161            engine,
162            model_config_ids,
163            UNSET,
164            UNSET,
165        )
166
167        data = pipelines_deploy.sync(client=self.client.mlops(), json_body=body)
168        if isinstance(data, PipelinesDeployResponse500):
169            raise Exception(data.msg)
170
171        if data is None:
172            raise Exception("Failed to deploy.")
173
174        deployment = Deployment(client=self.client, data=data.to_dict())
175
176        deployment._rehydrate()
177
178        # Increase timeout for each MLFlow image, up to a limit. Based on very limited testing,
179        # multiple MLFlow images do require more time than a single image due to some network
180        # saturation.
181        timeout_multiplier = 1
182        for mc in model_configs:
183            if mc.runtime() == "mlflow" and timeout_multiplier < 3:
184                timeout_multiplier += 1
185
186        return deployment.wait_for_running(self.client.timeout * timeout_multiplier)
187
188
189class PipelineVariants(List[PipelineVariant]):
190    """Wraps a list of pipelines for display in a display-aware environment like Jupyter."""
191
192    def _repr_html_(self) -> str:
193        def row(pipeline_variant):
194            # TODO: we shouldn't be accessing a protected member. No side effects now, so deal with it later.
195            fmt = pipeline_variant.client._time_format
196            pipeline = pipeline_variant.pipeline()
197            tags = ", ".join([tag.tag() for tag in pipeline.tags()])
198            deployments = pipeline_variant.deployments()
199            deployed = (
200                "(unknown)"
201                if not deployments
202                else pipeline_variant.deployments()[0].deployed()
203            )
204            model_configs = pipeline_variant.model_configs()
205            steps = ", ".join([mc.model().name() for mc in model_configs])
206            return (
207                "<tr>"
208                + f"<td>{pipeline.name()}</td>"
209                + f"<td>{pipeline_variant.name()}</td>"
210                + f"<td>{pipeline_variant.create_time().strftime(fmt)}</td>"
211                + f"<td>{pipeline_variant.last_update_time().strftime(fmt)}</td>"
212                + f"<td>{deployed}</td>"
213                + f"<td>{tags}</td>"
214                + f"<td>{steps}</td>"
215                + "</tr>"
216            )
217
218        fields = [
219            "name",
220            "version",
221            "creation_time",
222            "last_updated_time",
223            "deployed",
224            "tags",
225            "steps",
226        ]
227
228        if not self:
229            return "(no pipelines)"
230        else:
231            return (
232                "<table>"
233                + "<tr><th>"
234                + "</th><th>".join(fields)
235                + "</th></tr>"
236                + ("".join([row(p) for p in self]))
237                + "</table>"
238            )
class PipelineVariant(wallaroo.object.Object):
 30class PipelineVariant(Object):
 31    def __init__(self, client: Optional["Client"], data: Dict[str, Any]) -> None:
 32        self.client = client
 33        assert client is not None
 34        super().__init__(gql_client=client._gql_client, data=data)
 35
 36    def _fill(self, data: Dict[str, Any]) -> None:
 37        from .pipeline import Pipeline  # avoids circular imports
 38
 39        for required_attribute in ["id"]:
 40            if required_attribute not in data:
 41                raise RequiredAttributeMissing(
 42                    self.__class__.__name__, required_attribute
 43                )
 44        self._id = data["id"]
 45
 46        self._create_time = (
 47            dateparse.isoparse(data["created_at"])
 48            if "created_at" in data
 49            else DehydratedValue()
 50        )
 51        self._last_update_time = (
 52            dateparse.isoparse(data["updated_at"])
 53            if "updated_at" in data
 54            else DehydratedValue()
 55        )
 56        self._name = value_if_present(data, "version")
 57        self._definition = value_if_present(data, "definition")
 58        self._pipeline = (
 59            Pipeline(client=self.client, data=data["pipeline"])
 60            if "pipeline" in data
 61            else DehydratedValue()
 62        )
 63        self._deployments = (
 64            [
 65                Deployment(
 66                    client=self.client,
 67                    data=elem["deployment"],
 68                )
 69                for elem in data["deployment_pipeline_versions"]
 70            ]
 71            if "deployment_pipeline_versions" in data
 72            else DehydratedValue()
 73        )
 74        self._model_configs = (
 75            [
 76                ModelConfig(
 77                    client=self.client,
 78                    data=elem["model_config"],
 79                )
 80                for elem in data["deployment_model_configs"]
 81            ]
 82            if "deployment_model_configs" in data
 83            else DehydratedValue()
 84        )
 85
 86    def _fetch_attributes(self) -> Dict[str, Any]:
 87        return self._gql_client.execute(
 88            gql.gql(queries.named("PipelineVariantById")),
 89            variable_values={
 90                "variant_id": self._id,
 91            },
 92        )["pipeline_version_by_pk"]
 93
 94    def id(self) -> int:
 95        return self._id
 96
 97    @rehydrate("_create_time")
 98    def create_time(self) -> datetime.datetime:
 99        return cast(datetime.datetime, self._create_time)
100
101    @rehydrate("_last_update_time")
102    def last_update_time(self) -> datetime.datetime:
103        return cast(datetime.datetime, self._last_update_time)
104
105    @rehydrate("_name")
106    def name(self) -> str:
107        return cast(str, self._name)
108
109    @rehydrate("_definition")
110    def definition(self) -> Dict[str, Any]:
111        return cast(Dict[str, Any], self._definition)
112
113    @rehydrate("_pipeline")
114    def pipeline(self) -> "Pipeline":
115        from .pipeline import Pipeline
116
117        return cast(Pipeline, self._pipeline)
118
119    @rehydrate("_deployments")
120    def deployments(self) -> List[Deployment]:
121        return cast(List[Deployment], self._deployments)
122
123    @rehydrate("_model_configs")
124    def model_configs(self) -> List[ModelConfig]:
125        return cast(List[ModelConfig], self._model_configs)
126
127    def deploy(
128        self,
129        deployment_name: str,
130        model_configs: List[ModelConfig],
131        config: Optional[DeploymentConfig] = None,
132    ) -> Deployment:
133        """Deploys this PipelineVariant.
134
135        :param str deployment_name: Name of the new Deployment. Must be unique
136            across all deployments.
137        :param List[ModelConfig] model_configs: List of the configured models to
138        use. These must be the same ModelConfigs used when creating the
139            Pipeline.
140        :param Optional[DeploymentConfig] config: Deployment configuration to use.
141        :return: A Deployment object for the resulting deployment.
142        :rtype: Deployment
143        """
144        workspace_id = (
145            None if self.client is None else self.client.get_current_workspace().id()
146        )
147        if config is None:
148            config = DeploymentConfigBuilder(workspace_id=workspace_id).build()
149        else:
150            config.guarantee_workspace_id(workspace_id=workspace_id)
151
152        assert self.client is not None
153
154        engine = pipelines_deploy_json_body_engine_config.PipelinesDeployJsonBodyEngineConfig.from_dict(
155            config
156        )
157        model_config_ids = [mc.id() for mc in model_configs]
158        body = pipelines_deploy_json_body.PipelinesDeployJsonBody(
159            deployment_name,
160            self.id(),
161            self.pipeline().id(),
162            engine,
163            model_config_ids,
164            UNSET,
165            UNSET,
166        )
167
168        data = pipelines_deploy.sync(client=self.client.mlops(), json_body=body)
169        if isinstance(data, PipelinesDeployResponse500):
170            raise Exception(data.msg)
171
172        if data is None:
173            raise Exception("Failed to deploy.")
174
175        deployment = Deployment(client=self.client, data=data.to_dict())
176
177        deployment._rehydrate()
178
179        # Increase timeout for each MLFlow image, up to a limit. Based on very limited testing,
180        # multiple MLFlow images do require more time than a single image due to some network
181        # saturation.
182        timeout_multiplier = 1
183        for mc in model_configs:
184            if mc.runtime() == "mlflow" and timeout_multiplier < 3:
185                timeout_multiplier += 1
186
187        return deployment.wait_for_running(self.client.timeout * timeout_multiplier)

Base class for all backend GraphQL API objects.

This class serves as a framework for API objects to be constructed based on a partially-complete JSON response, and to fill in their remaining members dynamically if needed.

PipelineVariant(client: Optional[wallaroo.client.Client], data: Dict[str, Any])
31    def __init__(self, client: Optional["Client"], data: Dict[str, Any]) -> None:
32        self.client = client
33        assert client is not None
34        super().__init__(gql_client=client._gql_client, data=data)

Base constructor.

Each object requires:

  • a GraphQL client - in order to fill its missing members dynamically
  • an initial data blob - typically from unserialized JSON, contains at
  • least the data for required members (typically the object's primary key) and optionally other data members.
def id(self) -> int:
94    def id(self) -> int:
95        return self._id
def create_time(*args, **kwargs):
41        def wrapper(*args, **kwargs):
42            obj = args[0]
43            if not getattr(obj, "_standalone", None):
44                present = getattr(obj, attr) != DehydratedValue()
45                # Uncomment to debug while testing
46                # print(
47                #    "rehydrate: {} -> {}".format(
48                #        attr, "present" if present else "not present"
49                #    )
50                # )
51                if not present:
52                    obj._rehydrate()
53            result = fn(*args, **kwargs)
54            return result
def last_update_time(*args, **kwargs):
41        def wrapper(*args, **kwargs):
42            obj = args[0]
43            if not getattr(obj, "_standalone", None):
44                present = getattr(obj, attr) != DehydratedValue()
45                # Uncomment to debug while testing
46                # print(
47                #    "rehydrate: {} -> {}".format(
48                #        attr, "present" if present else "not present"
49                #    )
50                # )
51                if not present:
52                    obj._rehydrate()
53            result = fn(*args, **kwargs)
54            return result
def name(*args, **kwargs):
41        def wrapper(*args, **kwargs):
42            obj = args[0]
43            if not getattr(obj, "_standalone", None):
44                present = getattr(obj, attr) != DehydratedValue()
45                # Uncomment to debug while testing
46                # print(
47                #    "rehydrate: {} -> {}".format(
48                #        attr, "present" if present else "not present"
49                #    )
50                # )
51                if not present:
52                    obj._rehydrate()
53            result = fn(*args, **kwargs)
54            return result
def definition(*args, **kwargs):
41        def wrapper(*args, **kwargs):
42            obj = args[0]
43            if not getattr(obj, "_standalone", None):
44                present = getattr(obj, attr) != DehydratedValue()
45                # Uncomment to debug while testing
46                # print(
47                #    "rehydrate: {} -> {}".format(
48                #        attr, "present" if present else "not present"
49                #    )
50                # )
51                if not present:
52                    obj._rehydrate()
53            result = fn(*args, **kwargs)
54            return result
def pipeline(*args, **kwargs):
41        def wrapper(*args, **kwargs):
42            obj = args[0]
43            if not getattr(obj, "_standalone", None):
44                present = getattr(obj, attr) != DehydratedValue()
45                # Uncomment to debug while testing
46                # print(
47                #    "rehydrate: {} -> {}".format(
48                #        attr, "present" if present else "not present"
49                #    )
50                # )
51                if not present:
52                    obj._rehydrate()
53            result = fn(*args, **kwargs)
54            return result
def deployments(*args, **kwargs):
41        def wrapper(*args, **kwargs):
42            obj = args[0]
43            if not getattr(obj, "_standalone", None):
44                present = getattr(obj, attr) != DehydratedValue()
45                # Uncomment to debug while testing
46                # print(
47                #    "rehydrate: {} -> {}".format(
48                #        attr, "present" if present else "not present"
49                #    )
50                # )
51                if not present:
52                    obj._rehydrate()
53            result = fn(*args, **kwargs)
54            return result
def model_configs(*args, **kwargs):
41        def wrapper(*args, **kwargs):
42            obj = args[0]
43            if not getattr(obj, "_standalone", None):
44                present = getattr(obj, attr) != DehydratedValue()
45                # Uncomment to debug while testing
46                # print(
47                #    "rehydrate: {} -> {}".format(
48                #        attr, "present" if present else "not present"
49                #    )
50                # )
51                if not present:
52                    obj._rehydrate()
53            result = fn(*args, **kwargs)
54            return result
def deploy( self, deployment_name: str, model_configs: List[wallaroo.model_config.ModelConfig], config: Optional[wallaroo.deployment_config.DeploymentConfig] = None) -> wallaroo.deployment.Deployment:
127    def deploy(
128        self,
129        deployment_name: str,
130        model_configs: List[ModelConfig],
131        config: Optional[DeploymentConfig] = None,
132    ) -> Deployment:
133        """Deploys this PipelineVariant.
134
135        :param str deployment_name: Name of the new Deployment. Must be unique
136            across all deployments.
137        :param List[ModelConfig] model_configs: List of the configured models to
138        use. These must be the same ModelConfigs used when creating the
139            Pipeline.
140        :param Optional[DeploymentConfig] config: Deployment configuration to use.
141        :return: A Deployment object for the resulting deployment.
142        :rtype: Deployment
143        """
144        workspace_id = (
145            None if self.client is None else self.client.get_current_workspace().id()
146        )
147        if config is None:
148            config = DeploymentConfigBuilder(workspace_id=workspace_id).build()
149        else:
150            config.guarantee_workspace_id(workspace_id=workspace_id)
151
152        assert self.client is not None
153
154        engine = pipelines_deploy_json_body_engine_config.PipelinesDeployJsonBodyEngineConfig.from_dict(
155            config
156        )
157        model_config_ids = [mc.id() for mc in model_configs]
158        body = pipelines_deploy_json_body.PipelinesDeployJsonBody(
159            deployment_name,
160            self.id(),
161            self.pipeline().id(),
162            engine,
163            model_config_ids,
164            UNSET,
165            UNSET,
166        )
167
168        data = pipelines_deploy.sync(client=self.client.mlops(), json_body=body)
169        if isinstance(data, PipelinesDeployResponse500):
170            raise Exception(data.msg)
171
172        if data is None:
173            raise Exception("Failed to deploy.")
174
175        deployment = Deployment(client=self.client, data=data.to_dict())
176
177        deployment._rehydrate()
178
179        # Increase timeout for each MLFlow image, up to a limit. Based on very limited testing,
180        # multiple MLFlow images do require more time than a single image due to some network
181        # saturation.
182        timeout_multiplier = 1
183        for mc in model_configs:
184            if mc.runtime() == "mlflow" and timeout_multiplier < 3:
185                timeout_multiplier += 1
186
187        return deployment.wait_for_running(self.client.timeout * timeout_multiplier)

Deploys this PipelineVariant.

Parameters
  • str deployment_name: Name of the new Deployment. Must be unique across all deployments.
  • List[ModelConfig] model_configs: List of the configured models to use. These must be the same ModelConfigs used when creating the Pipeline.
  • Optional[DeploymentConfig] config: Deployment configuration to use.
Returns

A Deployment object for the resulting deployment.

class PipelineVariants(typing.List[wallaroo.pipeline_variant.PipelineVariant]):
190class PipelineVariants(List[PipelineVariant]):
191    """Wraps a list of pipelines for display in a display-aware environment like Jupyter."""
192
193    def _repr_html_(self) -> str:
194        def row(pipeline_variant):
195            # TODO: we shouldn't be accessing a protected member. No side effects now, so deal with it later.
196            fmt = pipeline_variant.client._time_format
197            pipeline = pipeline_variant.pipeline()
198            tags = ", ".join([tag.tag() for tag in pipeline.tags()])
199            deployments = pipeline_variant.deployments()
200            deployed = (
201                "(unknown)"
202                if not deployments
203                else pipeline_variant.deployments()[0].deployed()
204            )
205            model_configs = pipeline_variant.model_configs()
206            steps = ", ".join([mc.model().name() for mc in model_configs])
207            return (
208                "<tr>"
209                + f"<td>{pipeline.name()}</td>"
210                + f"<td>{pipeline_variant.name()}</td>"
211                + f"<td>{pipeline_variant.create_time().strftime(fmt)}</td>"
212                + f"<td>{pipeline_variant.last_update_time().strftime(fmt)}</td>"
213                + f"<td>{deployed}</td>"
214                + f"<td>{tags}</td>"
215                + f"<td>{steps}</td>"
216                + "</tr>"
217            )
218
219        fields = [
220            "name",
221            "version",
222            "creation_time",
223            "last_updated_time",
224            "deployed",
225            "tags",
226            "steps",
227        ]
228
229        if not self:
230            return "(no pipelines)"
231        else:
232            return (
233                "<table>"
234                + "<tr><th>"
235                + "</th><th>".join(fields)
236                + "</th></tr>"
237                + ("".join([row(p) for p in self]))
238                + "</table>"
239            )

Wraps a list of pipelines for display in a display-aware environment like Jupyter.

Inherited Members
builtins.list
list
clear
copy
append
insert
extend
pop
remove
index
count
reverse
sort