wallaroo.pipeline_config
1import json 2from enum import Enum 3from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple 4 5import yaml 6 7from wallaroo import notify 8 9from .checks import Alert, Expression, instrument 10from .model import Model 11from .model_config import ModelConfig 12 13if TYPE_CHECKING: 14 # Imports that happen below in methods to fix circular import dependency 15 # issues need to also be specified here to satisfy mypy type checking. 16 from .client import Client 17 from .pipeline import Pipeline 18 19 20class ValidDataType(str, Enum): 21 f32 = "f32" 22 f64 = "f64" 23 i8 = "i8" 24 u8 = "u8" 25 i16 = "i16" 26 u16 = "u16" 27 i32 = "i32" 28 u32 = "u32" 29 i64 = "i64" 30 u64 = "u64" 31 32 33class ModelConfigsForStep: 34 def __init__(self, model_configs: List[ModelConfig]): 35 self.model_configs = model_configs 36 37 38class ModelForStep: 39 def __init__(self, name, version, sha): 40 self.name = name 41 self.version = version 42 self.sha = sha 43 44 def to_json(self): 45 return {"name": self.name, "version": self.version, "sha": self.sha} 46 47 @classmethod 48 def from_json(cls, json_dict: Dict[str, str]): 49 return cls(json_dict["name"], json_dict["version"], json_dict["sha"]) 50 51 @classmethod 52 def from_model(cls, model: Model): 53 return cls( 54 model.name(), 55 model.version(), 56 model.sha(), 57 ) 58 59 def __eq__(self, obj): 60 return ( 61 isinstance(obj, ModelForStep) 62 and self.name == obj.name 63 and self.version == obj.version 64 and self.sha == obj.sha 65 ) 66 67 def __repr__(self): 68 return str(self.to_json()) 69 70 71class ModelWeight: 72 def __init__(self, weight: float, model: ModelForStep): 73 self.weight = weight 74 self.model = model 75 76 def to_json(self): 77 return {"model": self.model.to_json(), "weight": self.weight} 78 79 @classmethod 80 def from_json(cls, json_dict: Dict[str, Any]): 81 return cls(json_dict["weight"], ModelForStep.from_json(json_dict["model"])) 82 83 @classmethod 84 def from_tuple(cls, tup: Tuple[float, Model]): 85 (weight, model) = tup 86 return ModelWeight(weight, ModelForStep.from_model(model)) 87 88 def __eq__(self, obj): 89 return ( 90 isinstance(obj, ModelWeight) 91 and self.weight == obj.weight 92 and self.model == obj.model 93 ) 94 95 def __repr__(self): 96 return str(self.to_json()) 97 98 99class RowToModel: 100 def __init__(self, row_index: int, model: ModelForStep): 101 self.row_index = row_index 102 self.model = model 103 104 def to_json(self): 105 return {"row_index": self.row_index, "model": self.model.to_json()} 106 107 @classmethod 108 def from_json(cls, json_dict: Dict[str, Any]): 109 return cls(json_dict["row_index"], ModelForStep.from_json(json_dict["model"])) 110 111 def __eq__(self, obj): 112 return ( 113 isinstance(obj, RowToModel) 114 and self.row_index == obj.row_index 115 and self.model == obj.model 116 ) 117 118 def __repr__(self): 119 return str(self.to_json()) 120 121 122class Step: 123 def to_json(self): 124 pass 125 126 def is_inference_step(self): 127 return False 128 129 def __repr__(self): 130 return repr(self.to_json()) 131 132 def _repr_html_(self): 133 return repr(self.to_json()) 134 135 @staticmethod 136 def from_json(json_dict: Dict): 137 step_name = next(iter(json_dict)) 138 # TODO update this to use a switch statement in 3.10 139 from_json_dispatch = { 140 "Average": Average, 141 "AuditResults": AuditResults, 142 "Check": Check, 143 "ColumnsSelect": ColumnsSelect, 144 "ColumnsToRows": ColumnsToRows, 145 "InputDataToType": InputDataToType, 146 "ModelInference": ModelInference, 147 "RowsToModels": RowsToModels, 148 "Nth": Nth, 149 "MetaValueSplit": MetaValueSplit, 150 "RandomSplit": RandomSplit, 151 "MultiOut": MultiOut, 152 } 153 if step_name not in from_json_dispatch.keys(): 154 raise RuntimeError(f"An invalid step definition was given {step_name}") 155 return from_json_dispatch[step_name].from_json(json_dict[step_name]) # type: ignore 156 157 158class Average(Step): 159 def to_json(self): 160 return {"Average": {}} 161 162 @staticmethod 163 def from_json(json_dict: Dict): 164 return Average() 165 166 def __eq__(self, obj): 167 return isinstance(Average, obj) 168 169 170class AuditResults(Step): 171 def __init__(self, start: int, end: Optional[int] = None): 172 self.start = start 173 self.end = end 174 175 def to_json(self): 176 return {"AuditResults": {"from": self.start, "to": self.end}} 177 178 @staticmethod 179 def from_json(json_dict: Dict): 180 return AuditResults(start=json_dict["from"], end=json_dict["to"]) 181 182 def __eq__(self, obj): 183 return ( 184 isinstance(obj, AuditResults) 185 and self.start == obj.start 186 and self.end == obj.end 187 ) 188 189 190class Check(Step): 191 def __init__(self, tree: str): 192 self.tree = tree 193 194 def to_json(self): 195 return {"Check": {"tree": [self.tree]}} 196 197 @classmethod 198 def from_name_and_validation( 199 cls, name: str, validation: Expression, gauges: List[str] = [] 200 ): 201 return cls(str(json.dumps(instrument({name: validation}, gauges, [name])))) 202 203 @staticmethod 204 def from_json(json_dict: Dict): 205 tree = json_dict["tree"] 206 return Check(tree[0]) 207 208 def __eq__(self, obj): 209 return isinstance(obj, Check) and self.tree == obj.tree 210 211 212class ColumnsSelect(Step): 213 def __init__(self, columns: List[int]): 214 self.columns = columns 215 216 def to_json(self): 217 return {"ColumnsSelect": {"columns": self.columns}} 218 219 @staticmethod 220 def from_json(json_dict: Dict): 221 return ColumnsSelect(json_dict["columns"]) 222 223 def __eq__(self, obj): 224 return isinstance(obj, ColumnsSelect) and self.columns == obj.columns 225 226 227class ColumnsToRows(Step): 228 def to_json(self): 229 return {"ColumnsToRows": {}} 230 231 @staticmethod 232 def from_json(json_dict: Dict): 233 return ColumnsToRows() 234 235 def __eq__(self, obj): 236 return isinstance(obj, ColumnsToRows) 237 238 239class InputDataToType(Step): 240 def __init__(self, data_type: ValidDataType): 241 self.data_type = data_type 242 243 def to_json(self): 244 return {"InputDataToType": {"data_type": f"{self.data_type}"}} 245 246 @staticmethod 247 def from_json(json_dict: Dict): 248 return InputDataToType(ValidDataType[json_dict["data_type"]]) 249 250 def __eq__(self, obj): 251 return isinstance(obj, InputDataToType) and self.data_type == obj.data_type 252 253 254class ModelInference(Step): 255 def __init__(self, models: List[ModelForStep]): 256 self.models = models 257 258 def to_json(self): 259 jsonified_models = list(map(lambda m: m.to_json(), self.models)) 260 return {"ModelInference": {"models": jsonified_models}} 261 262 def _repr_html_(self): 263 return ",".join( 264 [ 265 f"<tr><th>ModelInference</th><td>{m.name}</td><td>{m.version}</td></tr>" 266 for m in self.models 267 ] 268 ) 269 270 @staticmethod 271 def from_json(json_dict: Dict): 272 return ModelInference(list(map(ModelForStep.from_json, json_dict["models"]))) 273 274 def is_inference_step(self): 275 return True 276 277 def __eq__(self, obj): 278 return isinstance(obj, ModelInference) and self.models == obj.models 279 280 281class RowsToModels(Step): 282 def __init__(self, rows_to_models: List[RowToModel]): 283 self.rows_to_models = rows_to_models 284 285 def to_json(self): 286 jsonified_list = list(map(lambda m: m.to_json(), self.rows_to_models)) 287 return {"RowsToModels": {"rows_to_models": jsonified_list}} 288 289 @staticmethod 290 def from_json(json_dict: Dict): 291 return RowsToModels( 292 list(map(RowToModel.from_json, json_dict["rows_to_models"])) 293 ) 294 295 def is_inference_step(self): 296 return True 297 298 def __eq__(self, obj): 299 return ( 300 isinstance(obj, RowsToModels) and self.rows_to_models == obj.rows_to_models 301 ) 302 303 304class Nth(Step): 305 def __init__(self, index: int): 306 self.index = index 307 308 def to_json(self): 309 return {"Nth": {"index": self.index}} 310 311 @staticmethod 312 def from_json(json_dict: Dict): 313 return Nth(json_dict["index"]) 314 315 def __eq__(self, obj): 316 return isinstance(obj, Nth) and self.index == obj.index 317 318 319class MultiOut(Step): 320 def to_json(self): 321 return {"MultiOut": {}} 322 323 @staticmethod 324 def from_json(json_dict: Dict): 325 return MultiOut() 326 327 def __eq__(self, obj): 328 return isinstance(obj, MultiOut) 329 330 331class MetaValueSplit(Step): 332 def __init__( 333 self, split_key: str, control: ModelForStep, routes: Dict[str, ModelForStep] 334 ): 335 self.split_key = split_key 336 self.control = control 337 self.routes = routes 338 339 def to_json(self): 340 jsonified_routes = dict( 341 zip(self.routes, map(lambda v: v.to_json(), self.routes.values())) 342 ) 343 return { 344 "MetaValueSplit": { 345 "split_key": self.split_key, 346 "control": self.control.to_json(), 347 "routes": jsonified_routes, 348 } 349 } 350 351 @staticmethod 352 def from_json(json_dict: Dict): 353 json_routes = json_dict["routes"] 354 routes = dict( 355 zip(json_routes, map(ModelForStep.from_json, json_routes.values())) 356 ) 357 return MetaValueSplit( 358 json_dict["split_key"], ModelForStep.from_json(json_dict["control"]), routes 359 ) 360 361 def is_inference_step(self): 362 return True 363 364 def __eq__(self, obj): 365 return ( 366 isinstance(obj, MetaValueSplit) 367 and self.control == obj.control 368 and self.routes == obj.routes 369 ) 370 371 372class RandomSplit(Step): 373 def __init__(self, weights: List[ModelWeight], hash_key: Optional[str] = None): 374 self.hash_key = hash_key 375 self.weights = weights 376 377 def to_json(self): 378 # TODO This is wrong 379 jsonified_model_weights = list(map(lambda v: v.to_json(), self.weights)) 380 return { 381 "RandomSplit": { 382 "hash_key": self.hash_key, 383 "weights": jsonified_model_weights, 384 } 385 } 386 387 @staticmethod 388 def from_json(json_dict: Dict): 389 weights = list(map(ModelWeight.from_json, json_dict["weights"])) 390 return RandomSplit(weights, hash_key=json_dict.get("hash_key")) 391 392 def is_inference_step(self): 393 return True 394 395 def __eq__(self, obj): 396 return ( 397 isinstance(obj, RandomSplit) 398 and self.weights == obj.weights 399 and self.hash_key == obj.hash_key 400 ) 401 402 403class PipelineConfig: 404 def __init__( 405 self, 406 pipeline_name: str, 407 steps: Iterable[Step], 408 alert_configurations: Iterable[notify.AlertConfiguration], 409 ): 410 self.pipeline_name = pipeline_name 411 self.steps = steps 412 self.alert_configurations = alert_configurations 413 414 def __eq__(self, other): 415 return self.pipeline_name == other.pipeline_name and self.steps == other.steps 416 417 def __repr__(self): 418 return f"PipelineConfig({repr(self.pipeline_name)}, {repr(self.steps)})" 419 420 @classmethod 421 def from_json(Klass, json): 422 return Klass(json["id"], [Step.from_json(v) for v in json["steps"]], []) 423 424 def to_json(self): 425 return { 426 "id": self.pipeline_name, 427 "steps": [s.to_json() for s in self.steps], 428 } 429 430 def to_yaml(self): 431 return yaml.dump( 432 { 433 "id": self.pipeline_name, 434 "steps": [s.to_json() for s in self.steps], 435 } 436 ) 437 438 439class PipelineConfigBuilder: 440 def __init__( 441 self, 442 client: Optional["Client"], 443 pipeline_name: str, 444 standalone=False, 445 ): 446 import re 447 448 regex = r"[a-z0-9]([-a-z0-9]*[a-z0-9])?" 449 comp = re.compile(regex) 450 if not comp.fullmatch(pipeline_name): 451 raise RuntimeError( 452 f"Pipeline name `{pipeline_name}` must conform to {regex}" 453 ) 454 455 self.client = client 456 self.pipeline_name = pipeline_name 457 self.steps: List[Step] = [] 458 self.alert_configurations: List[notify.AlertConfiguration] = [] 459 self.model_configs: List[Optional[ModelConfigsForStep]] = [] 460 self.visibility = None 461 self._standalone = standalone 462 463 @staticmethod 464 def as_standalone(pipeline_name: str): 465 return PipelineConfigBuilder(None, pipeline_name, standalone=True) 466 467 def config(self) -> "PipelineConfig": 468 return PipelineConfig(self.pipeline_name, self.steps, self.alert_configurations) 469 470 def upload(self) -> "Pipeline": 471 if not self._standalone and self.client: 472 return self.client._upload_pipeline_variant( 473 self.pipeline_name, self.config() 474 ) 475 raise RuntimeError( 476 "Pipeline config was created for standalone and may only be used to generate configuration" 477 ) 478 479 def _add_step( 480 self, step: Step, configs: Optional[ModelConfigsForStep] = None 481 ) -> "PipelineConfigBuilder": 482 self.model_configs.append(configs) 483 self.steps.append(step) 484 return self 485 486 def _check_replacement_bounds(self, index: int): 487 if index > len(self.steps): 488 raise IndexError(f"Step index {index} out of bounds") 489 490 def _model_configs(self) -> List[ModelConfig]: 491 """returns a list of all model configs""" 492 configs = [] 493 for maybe_config in self.model_configs: 494 if maybe_config: 495 configs.extend(maybe_config.model_configs) 496 497 return configs 498 499 def _insert_step( 500 self, index: int, step: Step, configs: Optional[ModelConfigsForStep] = None 501 ) -> "PipelineConfigBuilder": 502 self.model_configs.insert(index, configs) 503 self.steps.insert(index, step) 504 return self 505 506 def remove_step(self, index: int): 507 """Remove a step at a given index""" 508 self._check_replacement_bounds(index) 509 del self.model_configs[index] 510 del self.steps[index] 511 512 def _replace_step_at_index( 513 self, index: int, step: Step, configs: Optional[ModelConfigsForStep] = None 514 ) -> "PipelineConfigBuilder": 515 self._check_replacement_bounds(index) 516 self.model_configs[index] = configs 517 self.steps[index] = step 518 return self 519 520 def add_model_step(self, model: Model) -> "PipelineConfigBuilder": 521 """Perform inference with a single model.""" 522 return self._add_step( 523 ModelInference([ModelForStep.from_model(model)]), 524 ModelConfigsForStep([model.config()]), 525 ) 526 527 def replace_with_model_step( 528 self, index: int, model: Model 529 ) -> "PipelineConfigBuilder": 530 """Replaces the step at the given index with a model step""" 531 config = ModelConfigsForStep([model.config()]) 532 step = ModelInference([ModelForStep.from_model(model)]) 533 return self._replace_step_at_index(index, step, config) 534 535 def add_multi_model_step(self, models: Iterable[Model]) -> "PipelineConfigBuilder": 536 """Perform inference on the same input data for any number of models.""" 537 model_configs = [m.config() for m in models] 538 models_for_step = [ModelForStep.from_model(m) for m in models] 539 return self._add_step( 540 ModelInference(models_for_step), ModelConfigsForStep(model_configs) 541 ) 542 543 def replace_with_multi_model_step( 544 self, index: int, models: Iterable[Model] 545 ) -> "PipelineConfigBuilder": 546 """Replaces the step at the index with a multi model step""" 547 model_configs = [m.config() for m in models] 548 models_for_step = [ModelForStep.from_model(m) for m in models] 549 config = ModelConfigsForStep(model_configs) 550 step = ModelInference(models_for_step) 551 return self._replace_step_at_index(index, step, config) 552 553 def _audit_from_slice_str(self, audit_slice: str) -> "AuditResults": 554 slice_split = audit_slice.split(":") 555 start = 0 556 end = None 557 if slice_split[0]: 558 start = int(slice_split[0]) 559 if len(slice_split) > 1 and slice_split[1]: 560 end = int(slice_split[1]) 561 return AuditResults(start, end) 562 563 def add_audit(self, audit_slice: str) -> "PipelineConfigBuilder": 564 """Run audit logging on a specified `slice` of model outputs. 565 566 The slice must be in python-like format. `start:`, `start:end`, and 567 `:end` are supported. 568 """ 569 self.model_configs.append(None) 570 return self._add_step(self._audit_from_slice_str(audit_slice)) 571 572 def replace_with_audit( 573 self, index: int, audit_slice: str 574 ) -> "PipelineConfigBuilder": 575 """Replaces the step at the index with an audit step""" 576 return self._replace_step_at_index( 577 index, self._audit_from_slice_str(audit_slice) 578 ) 579 580 def add_select(self, index: int) -> "PipelineConfigBuilder": 581 """Select only the model output with the given `index` from an array of 582 outputs. 583 """ 584 return self._add_step(Nth(index)) 585 586 def add_multi_out(self): 587 return self._add_step(MultiOut()) 588 589 def replace_with_select( 590 self, step_index: int, select_index: int 591 ) -> "PipelineConfigBuilder": 592 """Replaces the step at the index with a select step""" 593 return self._replace_step_at_index(step_index, Nth(select_index)) 594 595 def add_key_split( 596 self, default: Model, meta_key: str, options: Dict[str, Model] 597 ) -> "PipelineConfigBuilder": 598 """Split traffic based on the value at a given `meta_key` in the input data, 599 routing to the appropriate model. 600 601 If the resulting value is a key in `options`, the corresponding model is used. 602 Otherwise, the `default` model is used for inference. 603 """ 604 605 control = ModelForStep.from_model(default) 606 model_configs = [m.config() for m in options.values()] 607 routes = dict(zip(options, map(ModelForStep.from_model, options.values()))) 608 configs = [default.config(), *model_configs] 609 return self._add_step( 610 MetaValueSplit(meta_key, control, routes), ModelConfigsForStep(configs) 611 ) 612 613 def replace_with_key_split( 614 self, index: int, default: Model, meta_key: str, options: Dict[str, Model] 615 ) -> "PipelineConfigBuilder": 616 """Replace the step at the index with a key split step""" 617 control = ModelForStep.from_model(default) 618 model_configs = [m.config() for m in options.values()] 619 routes = dict(zip(options, map(ModelForStep.from_model, options.values()))) 620 configs = [default.config(), *model_configs] 621 return self._replace_step_at_index( 622 index, 623 MetaValueSplit(meta_key, control, routes), 624 ModelConfigsForStep(configs), 625 ) 626 627 def add_random_split( 628 self, 629 weighted: Iterable[Tuple[float, Model]], 630 hash_key: Optional[str] = None, 631 ) -> "PipelineConfigBuilder": 632 """Routes inputs to a single model, randomly chosen from the list of 633 `weighted` options. 634 635 Each model receives inputs that are approximately proportional to the 636 weight it is assigned. For example, with two models having weights 1 637 and 1, each will receive roughly equal amounts of inference inputs. If 638 the weights were changed to 1 and 2, the models would receive roughly 639 33% and 66% respectively instead. 640 641 When choosing the model to use, a random number between 0.0 and 1.0 is 642 generated. The weighted inputs are mapped to that range, and the random 643 input is then used to select the model to use. For example, for the 644 two-models equal-weight case, a random key of 0.4 would route to the 645 first model. 0.6 would route to the second. 646 647 To support consistent assignment to a model, a `hash_key` can be 648 specified. This must be between 0.0 and 1.0. The value at this key, when 649 present in the input data, will be used instead of a random number for 650 model selection. 651 """ 652 weights = list(map(ModelWeight.from_tuple, weighted)) 653 self.model_configs.append( 654 ModelConfigsForStep([m.config() for (_, m) in weighted]) 655 ) 656 return self._add_step(RandomSplit(weights, hash_key)) 657 658 def replace_with_random_split( 659 self, 660 index: int, 661 weighted: Iterable[Tuple[float, Model]], 662 hash_key: Optional[str] = None, 663 ) -> "PipelineConfigBuilder": 664 """Replace the step at the index with a random split step""" 665 weights = list(map(ModelWeight.from_tuple, weighted)) 666 return self._replace_step_at_index( 667 index, 668 RandomSplit(weights, hash_key), 669 ModelConfigsForStep([m.config() for (_, m) in weighted]), 670 ) 671 672 def add_shadow_deploy( 673 self, champion: Model, challengers: Iterable[Model] 674 ) -> "PipelineConfigBuilder": 675 """Create a "shadow deployment" experiment pipeline. The `champion` 676 model and all `challengers` are run for each input. The result data for 677 all models is logged, but the output of the `champion` is the only 678 result returned. 679 680 This is particularly useful for "burn-in" testing a new model with real 681 world data without displacing the currently proven model. 682 683 This is currently implemented as three steps: A multi model step, an audit step, and 684 a select step. To remove or replace this step, you need to remove or replace 685 all three. You can remove steps using pipeline.remove_step 686 """ 687 # TODO This should be a single step and the backend can implement it as 3 steps 688 return ( 689 self.add_multi_model_step([champion, *challengers]) 690 .add_audit("1:") 691 .add_multi_out() 692 ) 693 694 def replace_with_shadow_deploy( 695 self, index: int, champion: Model, challengers: Iterable[Model] 696 ) -> "PipelineConfigBuilder": 697 return ( 698 self.replace_with_multi_model_step(index, [champion, *challengers]) 699 ._insert_step(index + 1, self._audit_from_slice_str("1:")) 700 ._insert_step(index + 2, MultiOut()) 701 ) 702 703 def _add_instrument(self, step: Step) -> "PipelineConfigBuilder": 704 last_inference = next( 705 ( 706 pair 707 for pair in reversed(list(enumerate(self.steps))) 708 if pair[1].is_inference_step() 709 ), 710 None, 711 ) 712 assert last_inference is not None 713 ix, _ = last_inference 714 self._insert_step(ix + 1, step) 715 return self 716 717 def add_validation( 718 self, name: str, validation: Expression 719 ) -> "PipelineConfigBuilder": 720 """Add a `validation` with the given `name`. All validations are run on 721 all outputs, and all failures are logged. 722 """ 723 return self._add_instrument(Check.from_name_and_validation(name, validation)) 724 725 def _create_validation(self, name: str, validation: Expression) -> "Check": 726 configured_models = set(m.model().name() for m in self._model_configs()) 727 for model_class in validation.model_names(): 728 assert model_class in configured_models 729 return Check.from_name_and_validation(name, validation) 730 731 def replace_with_validation( 732 self, index: int, name: str, validation: Expression 733 ) -> "PipelineConfigBuilder": 734 """Replace the step at the given index with a validation step""" 735 # TODO It sort of seems like since this is a replace operation, that this check is reasonable. 736 # We may want to automatically check this like we do with add at some point 737 if index - 1 < 0 or not self.steps[index - 1].is_inference_step(): 738 raise RuntimeError( 739 "Validations must come after a step that runs inference!" 740 ) 741 return self._replace_step_at_index( 742 index, self._create_validation(name, validation) 743 ) 744 745 def add_alert( 746 self, name: str, alert: Alert, notifications: List[notify.Notification] 747 ) -> "PipelineConfigBuilder": 748 left_name = f"{name}:left" 749 step = self._create_check_for_alert(left_name, alert) 750 configured_models = set(m.model().name() for m in self._model_configs()) 751 for model_class in alert.left.expression().model_names(): 752 assert model_class in configured_models 753 754 config = notify.AlertConfiguration(name, alert.promql(left_name), notifications) 755 self.alert_configurations.append(config) 756 return self._add_instrument(step) 757 758 def _create_check_for_alert(self, left_name: str, alert: Alert) -> "Check": 759 expression = alert.left.expression() 760 return Check.from_name_and_validation(left_name, expression, gauges=[left_name]) 761 762 def replace_with_alert( 763 self, index, name: str, alert: Alert, notifications: List[notify.Notification] 764 ) -> "PipelineConfigBuilder": 765 """Replace the step at the given index with the specified alert""" 766 if index - 1 < 0 or not self.steps[index - 1].is_inference_step(): 767 raise RuntimeError( 768 "Validations must come after a step that runs inference!" 769 ) 770 left_name = f"{name}:left" 771 configured_models = set(m.model().name() for m in self._model_configs()) 772 for model_class in alert.left.expression().model_names(): 773 assert model_class in configured_models 774 775 config = notify.AlertConfiguration(name, alert.promql(left_name), notifications) 776 self.alert_configurations.append(config) 777 step = self._create_check_for_alert(left_name, alert) 778 return self._replace_step_at_index(index, step) 779 780 def clear(self) -> "PipelineConfigBuilder": 781 """ 782 Remove all steps from the pipeline. This might be desireable if replacing models, for example. 783 """ 784 self.steps = [] 785 self.model_configs = [] 786 return self
21class ValidDataType(str, Enum): 22 f32 = "f32" 23 f64 = "f64" 24 i8 = "i8" 25 u8 = "u8" 26 i16 = "i16" 27 u16 = "u16" 28 i32 = "i32" 29 u32 = "u32" 30 i64 = "i64" 31 u64 = "u64"
An enumeration.
Inherited Members
- enum.Enum
- name
- value
- builtins.str
- encode
- replace
- split
- rsplit
- join
- capitalize
- casefold
- title
- center
- count
- expandtabs
- find
- partition
- index
- ljust
- lower
- lstrip
- rfind
- rindex
- rjust
- rstrip
- rpartition
- splitlines
- strip
- swapcase
- translate
- upper
- startswith
- endswith
- removeprefix
- removesuffix
- isascii
- islower
- isupper
- istitle
- isspace
- isdecimal
- isdigit
- isnumeric
- isalpha
- isalnum
- isidentifier
- isprintable
- zfill
- format
- format_map
- maketrans
34class ModelConfigsForStep: 35 def __init__(self, model_configs: List[ModelConfig]): 36 self.model_configs = model_configs
39class ModelForStep: 40 def __init__(self, name, version, sha): 41 self.name = name 42 self.version = version 43 self.sha = sha 44 45 def to_json(self): 46 return {"name": self.name, "version": self.version, "sha": self.sha} 47 48 @classmethod 49 def from_json(cls, json_dict: Dict[str, str]): 50 return cls(json_dict["name"], json_dict["version"], json_dict["sha"]) 51 52 @classmethod 53 def from_model(cls, model: Model): 54 return cls( 55 model.name(), 56 model.version(), 57 model.sha(), 58 ) 59 60 def __eq__(self, obj): 61 return ( 62 isinstance(obj, ModelForStep) 63 and self.name == obj.name 64 and self.version == obj.version 65 and self.sha == obj.sha 66 ) 67 68 def __repr__(self): 69 return str(self.to_json())
72class ModelWeight: 73 def __init__(self, weight: float, model: ModelForStep): 74 self.weight = weight 75 self.model = model 76 77 def to_json(self): 78 return {"model": self.model.to_json(), "weight": self.weight} 79 80 @classmethod 81 def from_json(cls, json_dict: Dict[str, Any]): 82 return cls(json_dict["weight"], ModelForStep.from_json(json_dict["model"])) 83 84 @classmethod 85 def from_tuple(cls, tup: Tuple[float, Model]): 86 (weight, model) = tup 87 return ModelWeight(weight, ModelForStep.from_model(model)) 88 89 def __eq__(self, obj): 90 return ( 91 isinstance(obj, ModelWeight) 92 and self.weight == obj.weight 93 and self.model == obj.model 94 ) 95 96 def __repr__(self): 97 return str(self.to_json())
100class RowToModel: 101 def __init__(self, row_index: int, model: ModelForStep): 102 self.row_index = row_index 103 self.model = model 104 105 def to_json(self): 106 return {"row_index": self.row_index, "model": self.model.to_json()} 107 108 @classmethod 109 def from_json(cls, json_dict: Dict[str, Any]): 110 return cls(json_dict["row_index"], ModelForStep.from_json(json_dict["model"])) 111 112 def __eq__(self, obj): 113 return ( 114 isinstance(obj, RowToModel) 115 and self.row_index == obj.row_index 116 and self.model == obj.model 117 ) 118 119 def __repr__(self): 120 return str(self.to_json())
123class Step: 124 def to_json(self): 125 pass 126 127 def is_inference_step(self): 128 return False 129 130 def __repr__(self): 131 return repr(self.to_json()) 132 133 def _repr_html_(self): 134 return repr(self.to_json()) 135 136 @staticmethod 137 def from_json(json_dict: Dict): 138 step_name = next(iter(json_dict)) 139 # TODO update this to use a switch statement in 3.10 140 from_json_dispatch = { 141 "Average": Average, 142 "AuditResults": AuditResults, 143 "Check": Check, 144 "ColumnsSelect": ColumnsSelect, 145 "ColumnsToRows": ColumnsToRows, 146 "InputDataToType": InputDataToType, 147 "ModelInference": ModelInference, 148 "RowsToModels": RowsToModels, 149 "Nth": Nth, 150 "MetaValueSplit": MetaValueSplit, 151 "RandomSplit": RandomSplit, 152 "MultiOut": MultiOut, 153 } 154 if step_name not in from_json_dispatch.keys(): 155 raise RuntimeError(f"An invalid step definition was given {step_name}") 156 return from_json_dispatch[step_name].from_json(json_dict[step_name]) # type: ignore
136 @staticmethod 137 def from_json(json_dict: Dict): 138 step_name = next(iter(json_dict)) 139 # TODO update this to use a switch statement in 3.10 140 from_json_dispatch = { 141 "Average": Average, 142 "AuditResults": AuditResults, 143 "Check": Check, 144 "ColumnsSelect": ColumnsSelect, 145 "ColumnsToRows": ColumnsToRows, 146 "InputDataToType": InputDataToType, 147 "ModelInference": ModelInference, 148 "RowsToModels": RowsToModels, 149 "Nth": Nth, 150 "MetaValueSplit": MetaValueSplit, 151 "RandomSplit": RandomSplit, 152 "MultiOut": MultiOut, 153 } 154 if step_name not in from_json_dispatch.keys(): 155 raise RuntimeError(f"An invalid step definition was given {step_name}") 156 return from_json_dispatch[step_name].from_json(json_dict[step_name]) # type: ignore
159class Average(Step): 160 def to_json(self): 161 return {"Average": {}} 162 163 @staticmethod 164 def from_json(json_dict: Dict): 165 return Average() 166 167 def __eq__(self, obj): 168 return isinstance(Average, obj)
Inherited Members
171class AuditResults(Step): 172 def __init__(self, start: int, end: Optional[int] = None): 173 self.start = start 174 self.end = end 175 176 def to_json(self): 177 return {"AuditResults": {"from": self.start, "to": self.end}} 178 179 @staticmethod 180 def from_json(json_dict: Dict): 181 return AuditResults(start=json_dict["from"], end=json_dict["to"]) 182 183 def __eq__(self, obj): 184 return ( 185 isinstance(obj, AuditResults) 186 and self.start == obj.start 187 and self.end == obj.end 188 )
Inherited Members
191class Check(Step): 192 def __init__(self, tree: str): 193 self.tree = tree 194 195 def to_json(self): 196 return {"Check": {"tree": [self.tree]}} 197 198 @classmethod 199 def from_name_and_validation( 200 cls, name: str, validation: Expression, gauges: List[str] = [] 201 ): 202 return cls(str(json.dumps(instrument({name: validation}, gauges, [name])))) 203 204 @staticmethod 205 def from_json(json_dict: Dict): 206 tree = json_dict["tree"] 207 return Check(tree[0]) 208 209 def __eq__(self, obj): 210 return isinstance(obj, Check) and self.tree == obj.tree
Inherited Members
213class ColumnsSelect(Step): 214 def __init__(self, columns: List[int]): 215 self.columns = columns 216 217 def to_json(self): 218 return {"ColumnsSelect": {"columns": self.columns}} 219 220 @staticmethod 221 def from_json(json_dict: Dict): 222 return ColumnsSelect(json_dict["columns"]) 223 224 def __eq__(self, obj): 225 return isinstance(obj, ColumnsSelect) and self.columns == obj.columns
Inherited Members
228class ColumnsToRows(Step): 229 def to_json(self): 230 return {"ColumnsToRows": {}} 231 232 @staticmethod 233 def from_json(json_dict: Dict): 234 return ColumnsToRows() 235 236 def __eq__(self, obj): 237 return isinstance(obj, ColumnsToRows)
Inherited Members
240class InputDataToType(Step): 241 def __init__(self, data_type: ValidDataType): 242 self.data_type = data_type 243 244 def to_json(self): 245 return {"InputDataToType": {"data_type": f"{self.data_type}"}} 246 247 @staticmethod 248 def from_json(json_dict: Dict): 249 return InputDataToType(ValidDataType[json_dict["data_type"]]) 250 251 def __eq__(self, obj): 252 return isinstance(obj, InputDataToType) and self.data_type == obj.data_type
Inherited Members
255class ModelInference(Step): 256 def __init__(self, models: List[ModelForStep]): 257 self.models = models 258 259 def to_json(self): 260 jsonified_models = list(map(lambda m: m.to_json(), self.models)) 261 return {"ModelInference": {"models": jsonified_models}} 262 263 def _repr_html_(self): 264 return ",".join( 265 [ 266 f"<tr><th>ModelInference</th><td>{m.name}</td><td>{m.version}</td></tr>" 267 for m in self.models 268 ] 269 ) 270 271 @staticmethod 272 def from_json(json_dict: Dict): 273 return ModelInference(list(map(ModelForStep.from_json, json_dict["models"]))) 274 275 def is_inference_step(self): 276 return True 277 278 def __eq__(self, obj): 279 return isinstance(obj, ModelInference) and self.models == obj.models
282class RowsToModels(Step): 283 def __init__(self, rows_to_models: List[RowToModel]): 284 self.rows_to_models = rows_to_models 285 286 def to_json(self): 287 jsonified_list = list(map(lambda m: m.to_json(), self.rows_to_models)) 288 return {"RowsToModels": {"rows_to_models": jsonified_list}} 289 290 @staticmethod 291 def from_json(json_dict: Dict): 292 return RowsToModels( 293 list(map(RowToModel.from_json, json_dict["rows_to_models"])) 294 ) 295 296 def is_inference_step(self): 297 return True 298 299 def __eq__(self, obj): 300 return ( 301 isinstance(obj, RowsToModels) and self.rows_to_models == obj.rows_to_models 302 )
305class Nth(Step): 306 def __init__(self, index: int): 307 self.index = index 308 309 def to_json(self): 310 return {"Nth": {"index": self.index}} 311 312 @staticmethod 313 def from_json(json_dict: Dict): 314 return Nth(json_dict["index"]) 315 316 def __eq__(self, obj): 317 return isinstance(obj, Nth) and self.index == obj.index
Inherited Members
320class MultiOut(Step): 321 def to_json(self): 322 return {"MultiOut": {}} 323 324 @staticmethod 325 def from_json(json_dict: Dict): 326 return MultiOut() 327 328 def __eq__(self, obj): 329 return isinstance(obj, MultiOut)
Inherited Members
332class MetaValueSplit(Step): 333 def __init__( 334 self, split_key: str, control: ModelForStep, routes: Dict[str, ModelForStep] 335 ): 336 self.split_key = split_key 337 self.control = control 338 self.routes = routes 339 340 def to_json(self): 341 jsonified_routes = dict( 342 zip(self.routes, map(lambda v: v.to_json(), self.routes.values())) 343 ) 344 return { 345 "MetaValueSplit": { 346 "split_key": self.split_key, 347 "control": self.control.to_json(), 348 "routes": jsonified_routes, 349 } 350 } 351 352 @staticmethod 353 def from_json(json_dict: Dict): 354 json_routes = json_dict["routes"] 355 routes = dict( 356 zip(json_routes, map(ModelForStep.from_json, json_routes.values())) 357 ) 358 return MetaValueSplit( 359 json_dict["split_key"], ModelForStep.from_json(json_dict["control"]), routes 360 ) 361 362 def is_inference_step(self): 363 return True 364 365 def __eq__(self, obj): 366 return ( 367 isinstance(obj, MetaValueSplit) 368 and self.control == obj.control 369 and self.routes == obj.routes 370 )
352 @staticmethod 353 def from_json(json_dict: Dict): 354 json_routes = json_dict["routes"] 355 routes = dict( 356 zip(json_routes, map(ModelForStep.from_json, json_routes.values())) 357 ) 358 return MetaValueSplit( 359 json_dict["split_key"], ModelForStep.from_json(json_dict["control"]), routes 360 )
373class RandomSplit(Step): 374 def __init__(self, weights: List[ModelWeight], hash_key: Optional[str] = None): 375 self.hash_key = hash_key 376 self.weights = weights 377 378 def to_json(self): 379 # TODO This is wrong 380 jsonified_model_weights = list(map(lambda v: v.to_json(), self.weights)) 381 return { 382 "RandomSplit": { 383 "hash_key": self.hash_key, 384 "weights": jsonified_model_weights, 385 } 386 } 387 388 @staticmethod 389 def from_json(json_dict: Dict): 390 weights = list(map(ModelWeight.from_json, json_dict["weights"])) 391 return RandomSplit(weights, hash_key=json_dict.get("hash_key")) 392 393 def is_inference_step(self): 394 return True 395 396 def __eq__(self, obj): 397 return ( 398 isinstance(obj, RandomSplit) 399 and self.weights == obj.weights 400 and self.hash_key == obj.hash_key 401 )
404class PipelineConfig: 405 def __init__( 406 self, 407 pipeline_name: str, 408 steps: Iterable[Step], 409 alert_configurations: Iterable[notify.AlertConfiguration], 410 ): 411 self.pipeline_name = pipeline_name 412 self.steps = steps 413 self.alert_configurations = alert_configurations 414 415 def __eq__(self, other): 416 return self.pipeline_name == other.pipeline_name and self.steps == other.steps 417 418 def __repr__(self): 419 return f"PipelineConfig({repr(self.pipeline_name)}, {repr(self.steps)})" 420 421 @classmethod 422 def from_json(Klass, json): 423 return Klass(json["id"], [Step.from_json(v) for v in json["steps"]], []) 424 425 def to_json(self): 426 return { 427 "id": self.pipeline_name, 428 "steps": [s.to_json() for s in self.steps], 429 } 430 431 def to_yaml(self): 432 return yaml.dump( 433 { 434 "id": self.pipeline_name, 435 "steps": [s.to_json() for s in self.steps], 436 } 437 )
440class PipelineConfigBuilder: 441 def __init__( 442 self, 443 client: Optional["Client"], 444 pipeline_name: str, 445 standalone=False, 446 ): 447 import re 448 449 regex = r"[a-z0-9]([-a-z0-9]*[a-z0-9])?" 450 comp = re.compile(regex) 451 if not comp.fullmatch(pipeline_name): 452 raise RuntimeError( 453 f"Pipeline name `{pipeline_name}` must conform to {regex}" 454 ) 455 456 self.client = client 457 self.pipeline_name = pipeline_name 458 self.steps: List[Step] = [] 459 self.alert_configurations: List[notify.AlertConfiguration] = [] 460 self.model_configs: List[Optional[ModelConfigsForStep]] = [] 461 self.visibility = None 462 self._standalone = standalone 463 464 @staticmethod 465 def as_standalone(pipeline_name: str): 466 return PipelineConfigBuilder(None, pipeline_name, standalone=True) 467 468 def config(self) -> "PipelineConfig": 469 return PipelineConfig(self.pipeline_name, self.steps, self.alert_configurations) 470 471 def upload(self) -> "Pipeline": 472 if not self._standalone and self.client: 473 return self.client._upload_pipeline_variant( 474 self.pipeline_name, self.config() 475 ) 476 raise RuntimeError( 477 "Pipeline config was created for standalone and may only be used to generate configuration" 478 ) 479 480 def _add_step( 481 self, step: Step, configs: Optional[ModelConfigsForStep] = None 482 ) -> "PipelineConfigBuilder": 483 self.model_configs.append(configs) 484 self.steps.append(step) 485 return self 486 487 def _check_replacement_bounds(self, index: int): 488 if index > len(self.steps): 489 raise IndexError(f"Step index {index} out of bounds") 490 491 def _model_configs(self) -> List[ModelConfig]: 492 """returns a list of all model configs""" 493 configs = [] 494 for maybe_config in self.model_configs: 495 if maybe_config: 496 configs.extend(maybe_config.model_configs) 497 498 return configs 499 500 def _insert_step( 501 self, index: int, step: Step, configs: Optional[ModelConfigsForStep] = None 502 ) -> "PipelineConfigBuilder": 503 self.model_configs.insert(index, configs) 504 self.steps.insert(index, step) 505 return self 506 507 def remove_step(self, index: int): 508 """Remove a step at a given index""" 509 self._check_replacement_bounds(index) 510 del self.model_configs[index] 511 del self.steps[index] 512 513 def _replace_step_at_index( 514 self, index: int, step: Step, configs: Optional[ModelConfigsForStep] = None 515 ) -> "PipelineConfigBuilder": 516 self._check_replacement_bounds(index) 517 self.model_configs[index] = configs 518 self.steps[index] = step 519 return self 520 521 def add_model_step(self, model: Model) -> "PipelineConfigBuilder": 522 """Perform inference with a single model.""" 523 return self._add_step( 524 ModelInference([ModelForStep.from_model(model)]), 525 ModelConfigsForStep([model.config()]), 526 ) 527 528 def replace_with_model_step( 529 self, index: int, model: Model 530 ) -> "PipelineConfigBuilder": 531 """Replaces the step at the given index with a model step""" 532 config = ModelConfigsForStep([model.config()]) 533 step = ModelInference([ModelForStep.from_model(model)]) 534 return self._replace_step_at_index(index, step, config) 535 536 def add_multi_model_step(self, models: Iterable[Model]) -> "PipelineConfigBuilder": 537 """Perform inference on the same input data for any number of models.""" 538 model_configs = [m.config() for m in models] 539 models_for_step = [ModelForStep.from_model(m) for m in models] 540 return self._add_step( 541 ModelInference(models_for_step), ModelConfigsForStep(model_configs) 542 ) 543 544 def replace_with_multi_model_step( 545 self, index: int, models: Iterable[Model] 546 ) -> "PipelineConfigBuilder": 547 """Replaces the step at the index with a multi model step""" 548 model_configs = [m.config() for m in models] 549 models_for_step = [ModelForStep.from_model(m) for m in models] 550 config = ModelConfigsForStep(model_configs) 551 step = ModelInference(models_for_step) 552 return self._replace_step_at_index(index, step, config) 553 554 def _audit_from_slice_str(self, audit_slice: str) -> "AuditResults": 555 slice_split = audit_slice.split(":") 556 start = 0 557 end = None 558 if slice_split[0]: 559 start = int(slice_split[0]) 560 if len(slice_split) > 1 and slice_split[1]: 561 end = int(slice_split[1]) 562 return AuditResults(start, end) 563 564 def add_audit(self, audit_slice: str) -> "PipelineConfigBuilder": 565 """Run audit logging on a specified `slice` of model outputs. 566 567 The slice must be in python-like format. `start:`, `start:end`, and 568 `:end` are supported. 569 """ 570 self.model_configs.append(None) 571 return self._add_step(self._audit_from_slice_str(audit_slice)) 572 573 def replace_with_audit( 574 self, index: int, audit_slice: str 575 ) -> "PipelineConfigBuilder": 576 """Replaces the step at the index with an audit step""" 577 return self._replace_step_at_index( 578 index, self._audit_from_slice_str(audit_slice) 579 ) 580 581 def add_select(self, index: int) -> "PipelineConfigBuilder": 582 """Select only the model output with the given `index` from an array of 583 outputs. 584 """ 585 return self._add_step(Nth(index)) 586 587 def add_multi_out(self): 588 return self._add_step(MultiOut()) 589 590 def replace_with_select( 591 self, step_index: int, select_index: int 592 ) -> "PipelineConfigBuilder": 593 """Replaces the step at the index with a select step""" 594 return self._replace_step_at_index(step_index, Nth(select_index)) 595 596 def add_key_split( 597 self, default: Model, meta_key: str, options: Dict[str, Model] 598 ) -> "PipelineConfigBuilder": 599 """Split traffic based on the value at a given `meta_key` in the input data, 600 routing to the appropriate model. 601 602 If the resulting value is a key in `options`, the corresponding model is used. 603 Otherwise, the `default` model is used for inference. 604 """ 605 606 control = ModelForStep.from_model(default) 607 model_configs = [m.config() for m in options.values()] 608 routes = dict(zip(options, map(ModelForStep.from_model, options.values()))) 609 configs = [default.config(), *model_configs] 610 return self._add_step( 611 MetaValueSplit(meta_key, control, routes), ModelConfigsForStep(configs) 612 ) 613 614 def replace_with_key_split( 615 self, index: int, default: Model, meta_key: str, options: Dict[str, Model] 616 ) -> "PipelineConfigBuilder": 617 """Replace the step at the index with a key split step""" 618 control = ModelForStep.from_model(default) 619 model_configs = [m.config() for m in options.values()] 620 routes = dict(zip(options, map(ModelForStep.from_model, options.values()))) 621 configs = [default.config(), *model_configs] 622 return self._replace_step_at_index( 623 index, 624 MetaValueSplit(meta_key, control, routes), 625 ModelConfigsForStep(configs), 626 ) 627 628 def add_random_split( 629 self, 630 weighted: Iterable[Tuple[float, Model]], 631 hash_key: Optional[str] = None, 632 ) -> "PipelineConfigBuilder": 633 """Routes inputs to a single model, randomly chosen from the list of 634 `weighted` options. 635 636 Each model receives inputs that are approximately proportional to the 637 weight it is assigned. For example, with two models having weights 1 638 and 1, each will receive roughly equal amounts of inference inputs. If 639 the weights were changed to 1 and 2, the models would receive roughly 640 33% and 66% respectively instead. 641 642 When choosing the model to use, a random number between 0.0 and 1.0 is 643 generated. The weighted inputs are mapped to that range, and the random 644 input is then used to select the model to use. For example, for the 645 two-models equal-weight case, a random key of 0.4 would route to the 646 first model. 0.6 would route to the second. 647 648 To support consistent assignment to a model, a `hash_key` can be 649 specified. This must be between 0.0 and 1.0. The value at this key, when 650 present in the input data, will be used instead of a random number for 651 model selection. 652 """ 653 weights = list(map(ModelWeight.from_tuple, weighted)) 654 self.model_configs.append( 655 ModelConfigsForStep([m.config() for (_, m) in weighted]) 656 ) 657 return self._add_step(RandomSplit(weights, hash_key)) 658 659 def replace_with_random_split( 660 self, 661 index: int, 662 weighted: Iterable[Tuple[float, Model]], 663 hash_key: Optional[str] = None, 664 ) -> "PipelineConfigBuilder": 665 """Replace the step at the index with a random split step""" 666 weights = list(map(ModelWeight.from_tuple, weighted)) 667 return self._replace_step_at_index( 668 index, 669 RandomSplit(weights, hash_key), 670 ModelConfigsForStep([m.config() for (_, m) in weighted]), 671 ) 672 673 def add_shadow_deploy( 674 self, champion: Model, challengers: Iterable[Model] 675 ) -> "PipelineConfigBuilder": 676 """Create a "shadow deployment" experiment pipeline. The `champion` 677 model and all `challengers` are run for each input. The result data for 678 all models is logged, but the output of the `champion` is the only 679 result returned. 680 681 This is particularly useful for "burn-in" testing a new model with real 682 world data without displacing the currently proven model. 683 684 This is currently implemented as three steps: A multi model step, an audit step, and 685 a select step. To remove or replace this step, you need to remove or replace 686 all three. You can remove steps using pipeline.remove_step 687 """ 688 # TODO This should be a single step and the backend can implement it as 3 steps 689 return ( 690 self.add_multi_model_step([champion, *challengers]) 691 .add_audit("1:") 692 .add_multi_out() 693 ) 694 695 def replace_with_shadow_deploy( 696 self, index: int, champion: Model, challengers: Iterable[Model] 697 ) -> "PipelineConfigBuilder": 698 return ( 699 self.replace_with_multi_model_step(index, [champion, *challengers]) 700 ._insert_step(index + 1, self._audit_from_slice_str("1:")) 701 ._insert_step(index + 2, MultiOut()) 702 ) 703 704 def _add_instrument(self, step: Step) -> "PipelineConfigBuilder": 705 last_inference = next( 706 ( 707 pair 708 for pair in reversed(list(enumerate(self.steps))) 709 if pair[1].is_inference_step() 710 ), 711 None, 712 ) 713 assert last_inference is not None 714 ix, _ = last_inference 715 self._insert_step(ix + 1, step) 716 return self 717 718 def add_validation( 719 self, name: str, validation: Expression 720 ) -> "PipelineConfigBuilder": 721 """Add a `validation` with the given `name`. All validations are run on 722 all outputs, and all failures are logged. 723 """ 724 return self._add_instrument(Check.from_name_and_validation(name, validation)) 725 726 def _create_validation(self, name: str, validation: Expression) -> "Check": 727 configured_models = set(m.model().name() for m in self._model_configs()) 728 for model_class in validation.model_names(): 729 assert model_class in configured_models 730 return Check.from_name_and_validation(name, validation) 731 732 def replace_with_validation( 733 self, index: int, name: str, validation: Expression 734 ) -> "PipelineConfigBuilder": 735 """Replace the step at the given index with a validation step""" 736 # TODO It sort of seems like since this is a replace operation, that this check is reasonable. 737 # We may want to automatically check this like we do with add at some point 738 if index - 1 < 0 or not self.steps[index - 1].is_inference_step(): 739 raise RuntimeError( 740 "Validations must come after a step that runs inference!" 741 ) 742 return self._replace_step_at_index( 743 index, self._create_validation(name, validation) 744 ) 745 746 def add_alert( 747 self, name: str, alert: Alert, notifications: List[notify.Notification] 748 ) -> "PipelineConfigBuilder": 749 left_name = f"{name}:left" 750 step = self._create_check_for_alert(left_name, alert) 751 configured_models = set(m.model().name() for m in self._model_configs()) 752 for model_class in alert.left.expression().model_names(): 753 assert model_class in configured_models 754 755 config = notify.AlertConfiguration(name, alert.promql(left_name), notifications) 756 self.alert_configurations.append(config) 757 return self._add_instrument(step) 758 759 def _create_check_for_alert(self, left_name: str, alert: Alert) -> "Check": 760 expression = alert.left.expression() 761 return Check.from_name_and_validation(left_name, expression, gauges=[left_name]) 762 763 def replace_with_alert( 764 self, index, name: str, alert: Alert, notifications: List[notify.Notification] 765 ) -> "PipelineConfigBuilder": 766 """Replace the step at the given index with the specified alert""" 767 if index - 1 < 0 or not self.steps[index - 1].is_inference_step(): 768 raise RuntimeError( 769 "Validations must come after a step that runs inference!" 770 ) 771 left_name = f"{name}:left" 772 configured_models = set(m.model().name() for m in self._model_configs()) 773 for model_class in alert.left.expression().model_names(): 774 assert model_class in configured_models 775 776 config = notify.AlertConfiguration(name, alert.promql(left_name), notifications) 777 self.alert_configurations.append(config) 778 step = self._create_check_for_alert(left_name, alert) 779 return self._replace_step_at_index(index, step) 780 781 def clear(self) -> "PipelineConfigBuilder": 782 """ 783 Remove all steps from the pipeline. This might be desireable if replacing models, for example. 784 """ 785 self.steps = [] 786 self.model_configs = [] 787 return self
441 def __init__( 442 self, 443 client: Optional["Client"], 444 pipeline_name: str, 445 standalone=False, 446 ): 447 import re 448 449 regex = r"[a-z0-9]([-a-z0-9]*[a-z0-9])?" 450 comp = re.compile(regex) 451 if not comp.fullmatch(pipeline_name): 452 raise RuntimeError( 453 f"Pipeline name `{pipeline_name}` must conform to {regex}" 454 ) 455 456 self.client = client 457 self.pipeline_name = pipeline_name 458 self.steps: List[Step] = [] 459 self.alert_configurations: List[notify.AlertConfiguration] = [] 460 self.model_configs: List[Optional[ModelConfigsForStep]] = [] 461 self.visibility = None 462 self._standalone = standalone
507 def remove_step(self, index: int): 508 """Remove a step at a given index""" 509 self._check_replacement_bounds(index) 510 del self.model_configs[index] 511 del self.steps[index]
Remove a step at a given index
521 def add_model_step(self, model: Model) -> "PipelineConfigBuilder": 522 """Perform inference with a single model.""" 523 return self._add_step( 524 ModelInference([ModelForStep.from_model(model)]), 525 ModelConfigsForStep([model.config()]), 526 )
Perform inference with a single model.
528 def replace_with_model_step( 529 self, index: int, model: Model 530 ) -> "PipelineConfigBuilder": 531 """Replaces the step at the given index with a model step""" 532 config = ModelConfigsForStep([model.config()]) 533 step = ModelInference([ModelForStep.from_model(model)]) 534 return self._replace_step_at_index(index, step, config)
Replaces the step at the given index with a model step
536 def add_multi_model_step(self, models: Iterable[Model]) -> "PipelineConfigBuilder": 537 """Perform inference on the same input data for any number of models.""" 538 model_configs = [m.config() for m in models] 539 models_for_step = [ModelForStep.from_model(m) for m in models] 540 return self._add_step( 541 ModelInference(models_for_step), ModelConfigsForStep(model_configs) 542 )
Perform inference on the same input data for any number of models.
544 def replace_with_multi_model_step( 545 self, index: int, models: Iterable[Model] 546 ) -> "PipelineConfigBuilder": 547 """Replaces the step at the index with a multi model step""" 548 model_configs = [m.config() for m in models] 549 models_for_step = [ModelForStep.from_model(m) for m in models] 550 config = ModelConfigsForStep(model_configs) 551 step = ModelInference(models_for_step) 552 return self._replace_step_at_index(index, step, config)
Replaces the step at the index with a multi model step
564 def add_audit(self, audit_slice: str) -> "PipelineConfigBuilder": 565 """Run audit logging on a specified `slice` of model outputs. 566 567 The slice must be in python-like format. `start:`, `start:end`, and 568 `:end` are supported. 569 """ 570 self.model_configs.append(None) 571 return self._add_step(self._audit_from_slice_str(audit_slice))
Run audit logging on a specified slice
of model outputs.
The slice must be in python-like format. start:
, start:end
, and
:end
are supported.
573 def replace_with_audit( 574 self, index: int, audit_slice: str 575 ) -> "PipelineConfigBuilder": 576 """Replaces the step at the index with an audit step""" 577 return self._replace_step_at_index( 578 index, self._audit_from_slice_str(audit_slice) 579 )
Replaces the step at the index with an audit step
581 def add_select(self, index: int) -> "PipelineConfigBuilder": 582 """Select only the model output with the given `index` from an array of 583 outputs. 584 """ 585 return self._add_step(Nth(index))
Select only the model output with the given index
from an array of
outputs.
590 def replace_with_select( 591 self, step_index: int, select_index: int 592 ) -> "PipelineConfigBuilder": 593 """Replaces the step at the index with a select step""" 594 return self._replace_step_at_index(step_index, Nth(select_index))
Replaces the step at the index with a select step
596 def add_key_split( 597 self, default: Model, meta_key: str, options: Dict[str, Model] 598 ) -> "PipelineConfigBuilder": 599 """Split traffic based on the value at a given `meta_key` in the input data, 600 routing to the appropriate model. 601 602 If the resulting value is a key in `options`, the corresponding model is used. 603 Otherwise, the `default` model is used for inference. 604 """ 605 606 control = ModelForStep.from_model(default) 607 model_configs = [m.config() for m in options.values()] 608 routes = dict(zip(options, map(ModelForStep.from_model, options.values()))) 609 configs = [default.config(), *model_configs] 610 return self._add_step( 611 MetaValueSplit(meta_key, control, routes), ModelConfigsForStep(configs) 612 )
Split traffic based on the value at a given meta_key
in the input data,
routing to the appropriate model.
If the resulting value is a key in options
, the corresponding model is used.
Otherwise, the default
model is used for inference.
614 def replace_with_key_split( 615 self, index: int, default: Model, meta_key: str, options: Dict[str, Model] 616 ) -> "PipelineConfigBuilder": 617 """Replace the step at the index with a key split step""" 618 control = ModelForStep.from_model(default) 619 model_configs = [m.config() for m in options.values()] 620 routes = dict(zip(options, map(ModelForStep.from_model, options.values()))) 621 configs = [default.config(), *model_configs] 622 return self._replace_step_at_index( 623 index, 624 MetaValueSplit(meta_key, control, routes), 625 ModelConfigsForStep(configs), 626 )
Replace the step at the index with a key split step
628 def add_random_split( 629 self, 630 weighted: Iterable[Tuple[float, Model]], 631 hash_key: Optional[str] = None, 632 ) -> "PipelineConfigBuilder": 633 """Routes inputs to a single model, randomly chosen from the list of 634 `weighted` options. 635 636 Each model receives inputs that are approximately proportional to the 637 weight it is assigned. For example, with two models having weights 1 638 and 1, each will receive roughly equal amounts of inference inputs. If 639 the weights were changed to 1 and 2, the models would receive roughly 640 33% and 66% respectively instead. 641 642 When choosing the model to use, a random number between 0.0 and 1.0 is 643 generated. The weighted inputs are mapped to that range, and the random 644 input is then used to select the model to use. For example, for the 645 two-models equal-weight case, a random key of 0.4 would route to the 646 first model. 0.6 would route to the second. 647 648 To support consistent assignment to a model, a `hash_key` can be 649 specified. This must be between 0.0 and 1.0. The value at this key, when 650 present in the input data, will be used instead of a random number for 651 model selection. 652 """ 653 weights = list(map(ModelWeight.from_tuple, weighted)) 654 self.model_configs.append( 655 ModelConfigsForStep([m.config() for (_, m) in weighted]) 656 ) 657 return self._add_step(RandomSplit(weights, hash_key))
Routes inputs to a single model, randomly chosen from the list of
weighted
options.
Each model receives inputs that are approximately proportional to the weight it is assigned. For example, with two models having weights 1 and 1, each will receive roughly equal amounts of inference inputs. If the weights were changed to 1 and 2, the models would receive roughly 33% and 66% respectively instead.
When choosing the model to use, a random number between 0.0 and 1.0 is generated. The weighted inputs are mapped to that range, and the random input is then used to select the model to use. For example, for the two-models equal-weight case, a random key of 0.4 would route to the first model. 0.6 would route to the second.
To support consistent assignment to a model, a hash_key
can be
specified. This must be between 0.0 and 1.0. The value at this key, when
present in the input data, will be used instead of a random number for
model selection.
659 def replace_with_random_split( 660 self, 661 index: int, 662 weighted: Iterable[Tuple[float, Model]], 663 hash_key: Optional[str] = None, 664 ) -> "PipelineConfigBuilder": 665 """Replace the step at the index with a random split step""" 666 weights = list(map(ModelWeight.from_tuple, weighted)) 667 return self._replace_step_at_index( 668 index, 669 RandomSplit(weights, hash_key), 670 ModelConfigsForStep([m.config() for (_, m) in weighted]), 671 )
Replace the step at the index with a random split step
673 def add_shadow_deploy( 674 self, champion: Model, challengers: Iterable[Model] 675 ) -> "PipelineConfigBuilder": 676 """Create a "shadow deployment" experiment pipeline. The `champion` 677 model and all `challengers` are run for each input. The result data for 678 all models is logged, but the output of the `champion` is the only 679 result returned. 680 681 This is particularly useful for "burn-in" testing a new model with real 682 world data without displacing the currently proven model. 683 684 This is currently implemented as three steps: A multi model step, an audit step, and 685 a select step. To remove or replace this step, you need to remove or replace 686 all three. You can remove steps using pipeline.remove_step 687 """ 688 # TODO This should be a single step and the backend can implement it as 3 steps 689 return ( 690 self.add_multi_model_step([champion, *challengers]) 691 .add_audit("1:") 692 .add_multi_out() 693 )
Create a "shadow deployment" experiment pipeline. The champion
model and all challengers
are run for each input. The result data for
all models is logged, but the output of the champion
is the only
result returned.
This is particularly useful for "burn-in" testing a new model with real world data without displacing the currently proven model.
This is currently implemented as three steps: A multi model step, an audit step, and a select step. To remove or replace this step, you need to remove or replace all three. You can remove steps using pipeline.remove_step
695 def replace_with_shadow_deploy( 696 self, index: int, champion: Model, challengers: Iterable[Model] 697 ) -> "PipelineConfigBuilder": 698 return ( 699 self.replace_with_multi_model_step(index, [champion, *challengers]) 700 ._insert_step(index + 1, self._audit_from_slice_str("1:")) 701 ._insert_step(index + 2, MultiOut()) 702 )
718 def add_validation( 719 self, name: str, validation: Expression 720 ) -> "PipelineConfigBuilder": 721 """Add a `validation` with the given `name`. All validations are run on 722 all outputs, and all failures are logged. 723 """ 724 return self._add_instrument(Check.from_name_and_validation(name, validation))
Add a validation
with the given name
. All validations are run on
all outputs, and all failures are logged.
732 def replace_with_validation( 733 self, index: int, name: str, validation: Expression 734 ) -> "PipelineConfigBuilder": 735 """Replace the step at the given index with a validation step""" 736 # TODO It sort of seems like since this is a replace operation, that this check is reasonable. 737 # We may want to automatically check this like we do with add at some point 738 if index - 1 < 0 or not self.steps[index - 1].is_inference_step(): 739 raise RuntimeError( 740 "Validations must come after a step that runs inference!" 741 ) 742 return self._replace_step_at_index( 743 index, self._create_validation(name, validation) 744 )
Replace the step at the given index with a validation step
746 def add_alert( 747 self, name: str, alert: Alert, notifications: List[notify.Notification] 748 ) -> "PipelineConfigBuilder": 749 left_name = f"{name}:left" 750 step = self._create_check_for_alert(left_name, alert) 751 configured_models = set(m.model().name() for m in self._model_configs()) 752 for model_class in alert.left.expression().model_names(): 753 assert model_class in configured_models 754 755 config = notify.AlertConfiguration(name, alert.promql(left_name), notifications) 756 self.alert_configurations.append(config) 757 return self._add_instrument(step)
763 def replace_with_alert( 764 self, index, name: str, alert: Alert, notifications: List[notify.Notification] 765 ) -> "PipelineConfigBuilder": 766 """Replace the step at the given index with the specified alert""" 767 if index - 1 < 0 or not self.steps[index - 1].is_inference_step(): 768 raise RuntimeError( 769 "Validations must come after a step that runs inference!" 770 ) 771 left_name = f"{name}:left" 772 configured_models = set(m.model().name() for m in self._model_configs()) 773 for model_class in alert.left.expression().model_names(): 774 assert model_class in configured_models 775 776 config = notify.AlertConfiguration(name, alert.promql(left_name), notifications) 777 self.alert_configurations.append(config) 778 step = self._create_check_for_alert(left_name, alert) 779 return self._replace_step_at_index(index, step)
Replace the step at the given index with the specified alert
781 def clear(self) -> "PipelineConfigBuilder": 782 """ 783 Remove all steps from the pipeline. This might be desireable if replacing models, for example. 784 """ 785 self.steps = [] 786 self.model_configs = [] 787 return self
Remove all steps from the pipeline. This might be desireable if replacing models, for example.