wallaroo.pipeline_variant
1import datetime 2from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast 3 4from dateutil import parser as dateparse 5 6from wallaroo import queries 7 8from .deployment import Deployment 9from .deployment_config import DeploymentConfig, DeploymentConfigBuilder 10from .model_config import ModelConfig 11from .object import * 12from .wallaroo_ml_ops_api_client.api.pipeline import pipelines_deploy 13from .wallaroo_ml_ops_api_client.models import ( 14 pipelines_deploy_json_body, 15 pipelines_deploy_json_body_engine_config, 16) 17from .wallaroo_ml_ops_api_client.models.pipelines_deploy_response_500 import ( 18 PipelinesDeployResponse500, 19) 20from .wallaroo_ml_ops_api_client.types import UNSET 21 22if TYPE_CHECKING: 23 # Imports that happen below in methods to fix circular import dependency 24 # issues need to also be specified here to satisfy mypy type checking. 25 from .client import Client 26 from .pipeline import Pipeline 27 28 29class PipelineVariant(Object): 30 def __init__(self, client: Optional["Client"], data: Dict[str, Any]) -> None: 31 self.client = client 32 assert client is not None 33 super().__init__(gql_client=client._gql_client, data=data) 34 35 def _fill(self, data: Dict[str, Any]) -> None: 36 from .pipeline import Pipeline # avoids circular imports 37 38 for required_attribute in ["id"]: 39 if required_attribute not in data: 40 raise RequiredAttributeMissing( 41 self.__class__.__name__, required_attribute 42 ) 43 self._id = data["id"] 44 45 self._create_time = ( 46 dateparse.isoparse(data["created_at"]) 47 if "created_at" in data 48 else DehydratedValue() 49 ) 50 self._last_update_time = ( 51 dateparse.isoparse(data["updated_at"]) 52 if "updated_at" in data 53 else DehydratedValue() 54 ) 55 self._name = value_if_present(data, "version") 56 self._definition = value_if_present(data, "definition") 57 self._pipeline = ( 58 Pipeline(client=self.client, data=data["pipeline"]) 59 if "pipeline" in data 60 else DehydratedValue() 61 ) 62 self._deployments = ( 63 [ 64 Deployment( 65 client=self.client, 66 data=elem["deployment"], 67 ) 68 for elem in data["deployment_pipeline_versions"] 69 ] 70 if "deployment_pipeline_versions" in data 71 else DehydratedValue() 72 ) 73 self._model_configs = ( 74 [ 75 ModelConfig( 76 client=self.client, 77 data=elem["model_config"], 78 ) 79 for elem in data["deployment_model_configs"] 80 ] 81 if "deployment_model_configs" in data 82 else DehydratedValue() 83 ) 84 85 def _fetch_attributes(self) -> Dict[str, Any]: 86 return self._gql_client.execute( 87 gql.gql(queries.named("PipelineVariantById")), 88 variable_values={ 89 "variant_id": self._id, 90 }, 91 )["pipeline_version_by_pk"] 92 93 def id(self) -> int: 94 return self._id 95 96 @rehydrate("_create_time") 97 def create_time(self) -> datetime.datetime: 98 return cast(datetime.datetime, self._create_time) 99 100 @rehydrate("_last_update_time") 101 def last_update_time(self) -> datetime.datetime: 102 return cast(datetime.datetime, self._last_update_time) 103 104 @rehydrate("_name") 105 def name(self) -> str: 106 return cast(str, self._name) 107 108 @rehydrate("_definition") 109 def definition(self) -> Dict[str, Any]: 110 return cast(Dict[str, Any], self._definition) 111 112 @rehydrate("_pipeline") 113 def pipeline(self) -> "Pipeline": 114 from .pipeline import Pipeline 115 116 return cast(Pipeline, self._pipeline) 117 118 @rehydrate("_deployments") 119 def deployments(self) -> List[Deployment]: 120 return cast(List[Deployment], self._deployments) 121 122 @rehydrate("_model_configs") 123 def model_configs(self) -> List[ModelConfig]: 124 return cast(List[ModelConfig], self._model_configs) 125 126 def deploy( 127 self, 128 deployment_name: str, 129 model_configs: List[ModelConfig], 130 config: Optional[DeploymentConfig] = None, 131 ) -> Deployment: 132 """Deploys this PipelineVariant. 133 134 :param str deployment_name: Name of the new Deployment. Must be unique 135 across all deployments. 136 :param List[ModelConfig] model_configs: List of the configured models to 137 use. These must be the same ModelConfigs used when creating the 138 Pipeline. 139 :param Optional[DeploymentConfig] config: Deployment configuration to use. 140 :return: A Deployment object for the resulting deployment. 141 :rtype: Deployment 142 """ 143 workspace_id = ( 144 None if self.client is None else self.client.get_current_workspace().id() 145 ) 146 if config is None: 147 config = DeploymentConfigBuilder(workspace_id=workspace_id).build() 148 else: 149 config.guarantee_workspace_id(workspace_id=workspace_id) 150 151 assert self.client is not None 152 153 engine = pipelines_deploy_json_body_engine_config.PipelinesDeployJsonBodyEngineConfig.from_dict( 154 config 155 ) 156 model_config_ids = [mc.id() for mc in model_configs] 157 body = pipelines_deploy_json_body.PipelinesDeployJsonBody( 158 deployment_name, 159 self.id(), 160 self.pipeline().id(), 161 engine, 162 model_config_ids, 163 UNSET, 164 UNSET, 165 ) 166 167 data = pipelines_deploy.sync(client=self.client.mlops(), json_body=body) 168 if isinstance(data, PipelinesDeployResponse500): 169 raise Exception(data.msg) 170 171 if data is None: 172 raise Exception("Failed to deploy.") 173 174 deployment = Deployment(client=self.client, data=data.to_dict()) 175 176 deployment._rehydrate() 177 178 # Increase timeout for each MLFlow image, up to a limit. Based on very limited testing, 179 # multiple MLFlow images do require more time than a single image due to some network 180 # saturation. 181 timeout_multiplier = 1 182 for mc in model_configs: 183 if mc.runtime() == "mlflow" and timeout_multiplier < 3: 184 timeout_multiplier += 1 185 186 return deployment.wait_for_running(self.client.timeout * timeout_multiplier) 187 188 189class PipelineVariants(List[PipelineVariant]): 190 """Wraps a list of pipelines for display in a display-aware environment like Jupyter.""" 191 192 def _repr_html_(self) -> str: 193 def row(pipeline_variant): 194 # TODO: we shouldn't be accessing a protected member. No side effects now, so deal with it later. 195 fmt = pipeline_variant.client._time_format 196 pipeline = pipeline_variant.pipeline() 197 tags = ", ".join([tag.tag() for tag in pipeline.tags()]) 198 deployments = pipeline_variant.deployments() 199 deployed = ( 200 "(unknown)" 201 if not deployments 202 else pipeline_variant.deployments()[0].deployed() 203 ) 204 model_configs = pipeline_variant.model_configs() 205 steps = ", ".join([mc.model().name() for mc in model_configs]) 206 return ( 207 "<tr>" 208 + f"<td>{pipeline.name()}</td>" 209 + f"<td>{pipeline_variant.name()}</td>" 210 + f"<td>{pipeline_variant.create_time().strftime(fmt)}</td>" 211 + f"<td>{pipeline_variant.last_update_time().strftime(fmt)}</td>" 212 + f"<td>{deployed}</td>" 213 + f"<td>{tags}</td>" 214 + f"<td>{steps}</td>" 215 + "</tr>" 216 ) 217 218 fields = [ 219 "name", 220 "version", 221 "creation_time", 222 "last_updated_time", 223 "deployed", 224 "tags", 225 "steps", 226 ] 227 228 if not self: 229 return "(no pipelines)" 230 else: 231 return ( 232 "<table>" 233 + "<tr><th>" 234 + "</th><th>".join(fields) 235 + "</th></tr>" 236 + ("".join([row(p) for p in self])) 237 + "</table>" 238 )
30class PipelineVariant(Object): 31 def __init__(self, client: Optional["Client"], data: Dict[str, Any]) -> None: 32 self.client = client 33 assert client is not None 34 super().__init__(gql_client=client._gql_client, data=data) 35 36 def _fill(self, data: Dict[str, Any]) -> None: 37 from .pipeline import Pipeline # avoids circular imports 38 39 for required_attribute in ["id"]: 40 if required_attribute not in data: 41 raise RequiredAttributeMissing( 42 self.__class__.__name__, required_attribute 43 ) 44 self._id = data["id"] 45 46 self._create_time = ( 47 dateparse.isoparse(data["created_at"]) 48 if "created_at" in data 49 else DehydratedValue() 50 ) 51 self._last_update_time = ( 52 dateparse.isoparse(data["updated_at"]) 53 if "updated_at" in data 54 else DehydratedValue() 55 ) 56 self._name = value_if_present(data, "version") 57 self._definition = value_if_present(data, "definition") 58 self._pipeline = ( 59 Pipeline(client=self.client, data=data["pipeline"]) 60 if "pipeline" in data 61 else DehydratedValue() 62 ) 63 self._deployments = ( 64 [ 65 Deployment( 66 client=self.client, 67 data=elem["deployment"], 68 ) 69 for elem in data["deployment_pipeline_versions"] 70 ] 71 if "deployment_pipeline_versions" in data 72 else DehydratedValue() 73 ) 74 self._model_configs = ( 75 [ 76 ModelConfig( 77 client=self.client, 78 data=elem["model_config"], 79 ) 80 for elem in data["deployment_model_configs"] 81 ] 82 if "deployment_model_configs" in data 83 else DehydratedValue() 84 ) 85 86 def _fetch_attributes(self) -> Dict[str, Any]: 87 return self._gql_client.execute( 88 gql.gql(queries.named("PipelineVariantById")), 89 variable_values={ 90 "variant_id": self._id, 91 }, 92 )["pipeline_version_by_pk"] 93 94 def id(self) -> int: 95 return self._id 96 97 @rehydrate("_create_time") 98 def create_time(self) -> datetime.datetime: 99 return cast(datetime.datetime, self._create_time) 100 101 @rehydrate("_last_update_time") 102 def last_update_time(self) -> datetime.datetime: 103 return cast(datetime.datetime, self._last_update_time) 104 105 @rehydrate("_name") 106 def name(self) -> str: 107 return cast(str, self._name) 108 109 @rehydrate("_definition") 110 def definition(self) -> Dict[str, Any]: 111 return cast(Dict[str, Any], self._definition) 112 113 @rehydrate("_pipeline") 114 def pipeline(self) -> "Pipeline": 115 from .pipeline import Pipeline 116 117 return cast(Pipeline, self._pipeline) 118 119 @rehydrate("_deployments") 120 def deployments(self) -> List[Deployment]: 121 return cast(List[Deployment], self._deployments) 122 123 @rehydrate("_model_configs") 124 def model_configs(self) -> List[ModelConfig]: 125 return cast(List[ModelConfig], self._model_configs) 126 127 def deploy( 128 self, 129 deployment_name: str, 130 model_configs: List[ModelConfig], 131 config: Optional[DeploymentConfig] = None, 132 ) -> Deployment: 133 """Deploys this PipelineVariant. 134 135 :param str deployment_name: Name of the new Deployment. Must be unique 136 across all deployments. 137 :param List[ModelConfig] model_configs: List of the configured models to 138 use. These must be the same ModelConfigs used when creating the 139 Pipeline. 140 :param Optional[DeploymentConfig] config: Deployment configuration to use. 141 :return: A Deployment object for the resulting deployment. 142 :rtype: Deployment 143 """ 144 workspace_id = ( 145 None if self.client is None else self.client.get_current_workspace().id() 146 ) 147 if config is None: 148 config = DeploymentConfigBuilder(workspace_id=workspace_id).build() 149 else: 150 config.guarantee_workspace_id(workspace_id=workspace_id) 151 152 assert self.client is not None 153 154 engine = pipelines_deploy_json_body_engine_config.PipelinesDeployJsonBodyEngineConfig.from_dict( 155 config 156 ) 157 model_config_ids = [mc.id() for mc in model_configs] 158 body = pipelines_deploy_json_body.PipelinesDeployJsonBody( 159 deployment_name, 160 self.id(), 161 self.pipeline().id(), 162 engine, 163 model_config_ids, 164 UNSET, 165 UNSET, 166 ) 167 168 data = pipelines_deploy.sync(client=self.client.mlops(), json_body=body) 169 if isinstance(data, PipelinesDeployResponse500): 170 raise Exception(data.msg) 171 172 if data is None: 173 raise Exception("Failed to deploy.") 174 175 deployment = Deployment(client=self.client, data=data.to_dict()) 176 177 deployment._rehydrate() 178 179 # Increase timeout for each MLFlow image, up to a limit. Based on very limited testing, 180 # multiple MLFlow images do require more time than a single image due to some network 181 # saturation. 182 timeout_multiplier = 1 183 for mc in model_configs: 184 if mc.runtime() == "mlflow" and timeout_multiplier < 3: 185 timeout_multiplier += 1 186 187 return deployment.wait_for_running(self.client.timeout * timeout_multiplier)
Base class for all backend GraphQL API objects.
This class serves as a framework for API objects to be constructed based on a partially-complete JSON response, and to fill in their remaining members dynamically if needed.
PipelineVariant(client: Optional[wallaroo.client.Client], data: Dict[str, Any])
31 def __init__(self, client: Optional["Client"], data: Dict[str, Any]) -> None: 32 self.client = client 33 assert client is not None 34 super().__init__(gql_client=client._gql_client, data=data)
Base constructor.
Each object requires:
- a GraphQL client - in order to fill its missing members dynamically
- an initial data blob - typically from unserialized JSON, contains at
- least the data for required members (typically the object's primary key) and optionally other data members.
def
create_time(*args, **kwargs):
41 def wrapper(*args, **kwargs): 42 obj = args[0] 43 if not getattr(obj, "_standalone", None): 44 present = getattr(obj, attr) != DehydratedValue() 45 # Uncomment to debug while testing 46 # print( 47 # "rehydrate: {} -> {}".format( 48 # attr, "present" if present else "not present" 49 # ) 50 # ) 51 if not present: 52 obj._rehydrate() 53 result = fn(*args, **kwargs) 54 return result
def
last_update_time(*args, **kwargs):
41 def wrapper(*args, **kwargs): 42 obj = args[0] 43 if not getattr(obj, "_standalone", None): 44 present = getattr(obj, attr) != DehydratedValue() 45 # Uncomment to debug while testing 46 # print( 47 # "rehydrate: {} -> {}".format( 48 # attr, "present" if present else "not present" 49 # ) 50 # ) 51 if not present: 52 obj._rehydrate() 53 result = fn(*args, **kwargs) 54 return result
def
name(*args, **kwargs):
41 def wrapper(*args, **kwargs): 42 obj = args[0] 43 if not getattr(obj, "_standalone", None): 44 present = getattr(obj, attr) != DehydratedValue() 45 # Uncomment to debug while testing 46 # print( 47 # "rehydrate: {} -> {}".format( 48 # attr, "present" if present else "not present" 49 # ) 50 # ) 51 if not present: 52 obj._rehydrate() 53 result = fn(*args, **kwargs) 54 return result
def
definition(*args, **kwargs):
41 def wrapper(*args, **kwargs): 42 obj = args[0] 43 if not getattr(obj, "_standalone", None): 44 present = getattr(obj, attr) != DehydratedValue() 45 # Uncomment to debug while testing 46 # print( 47 # "rehydrate: {} -> {}".format( 48 # attr, "present" if present else "not present" 49 # ) 50 # ) 51 if not present: 52 obj._rehydrate() 53 result = fn(*args, **kwargs) 54 return result
def
pipeline(*args, **kwargs):
41 def wrapper(*args, **kwargs): 42 obj = args[0] 43 if not getattr(obj, "_standalone", None): 44 present = getattr(obj, attr) != DehydratedValue() 45 # Uncomment to debug while testing 46 # print( 47 # "rehydrate: {} -> {}".format( 48 # attr, "present" if present else "not present" 49 # ) 50 # ) 51 if not present: 52 obj._rehydrate() 53 result = fn(*args, **kwargs) 54 return result
def
deployments(*args, **kwargs):
41 def wrapper(*args, **kwargs): 42 obj = args[0] 43 if not getattr(obj, "_standalone", None): 44 present = getattr(obj, attr) != DehydratedValue() 45 # Uncomment to debug while testing 46 # print( 47 # "rehydrate: {} -> {}".format( 48 # attr, "present" if present else "not present" 49 # ) 50 # ) 51 if not present: 52 obj._rehydrate() 53 result = fn(*args, **kwargs) 54 return result
def
model_configs(*args, **kwargs):
41 def wrapper(*args, **kwargs): 42 obj = args[0] 43 if not getattr(obj, "_standalone", None): 44 present = getattr(obj, attr) != DehydratedValue() 45 # Uncomment to debug while testing 46 # print( 47 # "rehydrate: {} -> {}".format( 48 # attr, "present" if present else "not present" 49 # ) 50 # ) 51 if not present: 52 obj._rehydrate() 53 result = fn(*args, **kwargs) 54 return result
def
deploy( self, deployment_name: str, model_configs: List[wallaroo.model_config.ModelConfig], config: Optional[wallaroo.deployment_config.DeploymentConfig] = None) -> wallaroo.deployment.Deployment:
127 def deploy( 128 self, 129 deployment_name: str, 130 model_configs: List[ModelConfig], 131 config: Optional[DeploymentConfig] = None, 132 ) -> Deployment: 133 """Deploys this PipelineVariant. 134 135 :param str deployment_name: Name of the new Deployment. Must be unique 136 across all deployments. 137 :param List[ModelConfig] model_configs: List of the configured models to 138 use. These must be the same ModelConfigs used when creating the 139 Pipeline. 140 :param Optional[DeploymentConfig] config: Deployment configuration to use. 141 :return: A Deployment object for the resulting deployment. 142 :rtype: Deployment 143 """ 144 workspace_id = ( 145 None if self.client is None else self.client.get_current_workspace().id() 146 ) 147 if config is None: 148 config = DeploymentConfigBuilder(workspace_id=workspace_id).build() 149 else: 150 config.guarantee_workspace_id(workspace_id=workspace_id) 151 152 assert self.client is not None 153 154 engine = pipelines_deploy_json_body_engine_config.PipelinesDeployJsonBodyEngineConfig.from_dict( 155 config 156 ) 157 model_config_ids = [mc.id() for mc in model_configs] 158 body = pipelines_deploy_json_body.PipelinesDeployJsonBody( 159 deployment_name, 160 self.id(), 161 self.pipeline().id(), 162 engine, 163 model_config_ids, 164 UNSET, 165 UNSET, 166 ) 167 168 data = pipelines_deploy.sync(client=self.client.mlops(), json_body=body) 169 if isinstance(data, PipelinesDeployResponse500): 170 raise Exception(data.msg) 171 172 if data is None: 173 raise Exception("Failed to deploy.") 174 175 deployment = Deployment(client=self.client, data=data.to_dict()) 176 177 deployment._rehydrate() 178 179 # Increase timeout for each MLFlow image, up to a limit. Based on very limited testing, 180 # multiple MLFlow images do require more time than a single image due to some network 181 # saturation. 182 timeout_multiplier = 1 183 for mc in model_configs: 184 if mc.runtime() == "mlflow" and timeout_multiplier < 3: 185 timeout_multiplier += 1 186 187 return deployment.wait_for_running(self.client.timeout * timeout_multiplier)
Deploys this PipelineVariant.
Parameters
- str deployment_name: Name of the new Deployment. Must be unique across all deployments.
- List[ModelConfig] model_configs: List of the configured models to use. These must be the same ModelConfigs used when creating the Pipeline.
- Optional[DeploymentConfig] config: Deployment configuration to use.
Returns
A Deployment object for the resulting deployment.
class
PipelineVariants(typing.List[wallaroo.pipeline_variant.PipelineVariant]):
190class PipelineVariants(List[PipelineVariant]): 191 """Wraps a list of pipelines for display in a display-aware environment like Jupyter.""" 192 193 def _repr_html_(self) -> str: 194 def row(pipeline_variant): 195 # TODO: we shouldn't be accessing a protected member. No side effects now, so deal with it later. 196 fmt = pipeline_variant.client._time_format 197 pipeline = pipeline_variant.pipeline() 198 tags = ", ".join([tag.tag() for tag in pipeline.tags()]) 199 deployments = pipeline_variant.deployments() 200 deployed = ( 201 "(unknown)" 202 if not deployments 203 else pipeline_variant.deployments()[0].deployed() 204 ) 205 model_configs = pipeline_variant.model_configs() 206 steps = ", ".join([mc.model().name() for mc in model_configs]) 207 return ( 208 "<tr>" 209 + f"<td>{pipeline.name()}</td>" 210 + f"<td>{pipeline_variant.name()}</td>" 211 + f"<td>{pipeline_variant.create_time().strftime(fmt)}</td>" 212 + f"<td>{pipeline_variant.last_update_time().strftime(fmt)}</td>" 213 + f"<td>{deployed}</td>" 214 + f"<td>{tags}</td>" 215 + f"<td>{steps}</td>" 216 + "</tr>" 217 ) 218 219 fields = [ 220 "name", 221 "version", 222 "creation_time", 223 "last_updated_time", 224 "deployed", 225 "tags", 226 "steps", 227 ] 228 229 if not self: 230 return "(no pipelines)" 231 else: 232 return ( 233 "<table>" 234 + "<tr><th>" 235 + "</th><th>".join(fields) 236 + "</th></tr>" 237 + ("".join([row(p) for p in self])) 238 + "</table>" 239 )
Wraps a list of pipelines for display in a display-aware environment like Jupyter.
Inherited Members
- builtins.list
- list
- clear
- copy
- append
- insert
- extend
- pop
- remove
- index
- count
- reverse
- sort