kiln_ai.datamodel
1from __future__ import annotations 2 3import json 4import math 5import random 6from enum import Enum, IntEnum 7from typing import TYPE_CHECKING, Callable, Dict, List, Type, Union 8 9import jsonschema 10import jsonschema.exceptions 11from pydantic import BaseModel, Field, model_validator 12from typing_extensions import Self 13 14from kiln_ai.datamodel.json_schema import JsonObjectSchema, schema_from_json_str 15 16from .basemodel import ( 17 ID_FIELD, 18 ID_TYPE, 19 NAME_FIELD, 20 SHORT_NAME_FIELD, 21 KilnBaseModel, 22 KilnParentedModel, 23 KilnParentModel, 24) 25from .json_schema import validate_schema 26 27if TYPE_CHECKING: 28 from . import Task 29 30 31__all__ = [ 32 "basemodel", 33 "json_schema", 34 "Task", 35 "Project", 36 "TaskRun", 37 "TaskOutput", 38 "TaskOutputRating", 39 "Priority", 40 "DataSource", 41 "DataSourceType", 42 "DataSourceProperty", 43 "TaskOutputRatingType", 44 "TaskRequirement", 45 "TaskDeterminism", 46] 47 48 49class Priority(IntEnum): 50 """Defines priority levels for tasks and requirements, where P0 is highest priority.""" 51 52 p0 = 0 53 p1 = 1 54 p2 = 2 55 p3 = 3 56 57 58# Only one rating type for now, but this allows for extensibility if we want to add more in the future 59class TaskOutputRatingType(str, Enum): 60 """Defines the types of rating systems available for task outputs.""" 61 62 five_star = "five_star" 63 custom = "custom" 64 65 66class TaskOutputRating(KilnBaseModel): 67 """ 68 A rating for a task output, including an overall rating and ratings for each requirement. 69 70 Only supports five star ratings for now, but extensible for custom values. 71 """ 72 73 type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star) 74 value: float | None = Field( 75 description="The overall rating value (typically 1-5 stars).", 76 default=None, 77 ) 78 requirement_ratings: Dict[ID_TYPE, float] = Field( 79 default={}, 80 description="The ratings of the requirements of the task. The keys are the ids of the requirements. The values are the ratings (typically 1-5 stars).", 81 ) 82 83 # Used to select high quality outputs for example selection (MultiShotPromptBuilder, etc) 84 def is_high_quality(self) -> bool: 85 if self.type == TaskOutputRatingType.five_star: 86 return self.value is not None and self.value >= 4 87 return False 88 89 @model_validator(mode="after") 90 def validate_rating(self) -> Self: 91 if self.type not in TaskOutputRatingType: 92 raise ValueError(f"Invalid rating type: {self.type}") 93 94 if self.type == TaskOutputRatingType.five_star: 95 if self.value is not None: 96 self._validate_five_star(self.value, "overall rating") 97 for req_id, req_rating in self.requirement_ratings.items(): 98 self._validate_five_star(req_rating, f"requirement rating for {req_id}") 99 100 return self 101 102 def _validate_five_star(self, rating: float, rating_name: str) -> None: 103 if not isinstance(rating, float) or not rating.is_integer(): 104 raise ValueError( 105 f"{rating_name.capitalize()} of type five_star must be an integer value (1.0, 2.0, 3.0, 4.0, or 5.0)" 106 ) 107 if rating < 1 or rating > 5: 108 raise ValueError( 109 f"{rating_name.capitalize()} of type five_star must be between 1 and 5 stars" 110 ) 111 112 113class TaskOutput(KilnBaseModel): 114 """ 115 An output for a specific task run. 116 117 Contains the actual output content, its source (human or synthetic), 118 and optional rating information. 119 """ 120 121 output: str = Field( 122 description="The output of the task. JSON formatted for structured output, plaintext for unstructured output." 123 ) 124 source: DataSource = Field( 125 description="The source of the output: human or synthetic." 126 ) 127 rating: TaskOutputRating | None = Field( 128 default=None, description="The rating of the output" 129 ) 130 131 def validate_output_format(self, task: Task) -> Self: 132 # validate output 133 if task.output_json_schema is not None: 134 try: 135 validate_schema(json.loads(self.output), task.output_json_schema) 136 except json.JSONDecodeError: 137 raise ValueError("Output is not a valid JSON object") 138 except jsonschema.exceptions.ValidationError as e: 139 raise ValueError(f"Output does not match task output schema: {e}") 140 return self 141 142 143class Finetune(KilnParentedModel): 144 name: str = NAME_FIELD 145 description: str | None = Field( 146 default=None, 147 description="A description of the fine-tune for you and your team. Not used in training.", 148 ) 149 provider: str = Field( 150 description="The provider to use for the fine-tune (e.g. 'openai')." 151 ) 152 base_model_id: str = Field( 153 description="The id of the base model to use for the fine-tune. This string relates to the provider's IDs for their own models, not Kiln IDs." 154 ) 155 provider_id: str | None = Field( 156 default=None, 157 description="The ID of the fine-tuned model on the provider's side.", 158 ) 159 dataset_split_id: str = Field( 160 description="The ID of the dataset split to use for this fine-tune.", 161 ) 162 train_split_name: str = Field( 163 default="train", 164 description="The name of the training split to use for this fine-tune.", 165 ) 166 validation_split_name: str | None = Field( 167 default=None, 168 description="The name of the validation split to use for this fine-tune. Optional.", 169 ) 170 parameters: dict[str, str | int | float | bool] = Field( 171 default={}, 172 description="The parameters to use for this fine-tune. These are provider-specific.", 173 ) 174 system_message: str = Field( 175 description="The system message to use for this fine-tune.", 176 ) 177 178 def parent_task(self) -> Task | None: 179 if not isinstance(self.parent, Task): 180 return None 181 return self.parent 182 183 184class DataSourceType(str, Enum): 185 """ 186 The source type of a piece of data. 187 188 Human: a human created the data 189 Synthetic: a model created the data 190 """ 191 192 human = "human" 193 synthetic = "synthetic" 194 195 196class DataSourceProperty(BaseModel): 197 """ 198 Defines a property that can be associated with a data source. 199 200 Includes validation rules for when properties are required or not allowed 201 based on the data source type. 202 """ 203 204 name: str 205 type: Type[Union[str, int, float]] 206 required_for: List[DataSourceType] = [] 207 not_allowed_for: List[DataSourceType] = [] 208 209 210class DataSource(BaseModel): 211 """ 212 Represents the origin of data, either human or synthetic, with associated properties. 213 214 Properties vary based on the source type - for synthetic sources this includes 215 model information, for human sources this includes creator information. 216 """ 217 218 type: DataSourceType 219 properties: Dict[str, str | int | float] = Field( 220 default={}, 221 description="Properties describing the data source. For synthetic things like model. For human, the human's name.", 222 ) 223 224 _data_source_properties = [ 225 DataSourceProperty( 226 name="created_by", 227 type=str, 228 required_for=[DataSourceType.human], 229 not_allowed_for=[DataSourceType.synthetic], 230 ), 231 DataSourceProperty( 232 name="model_name", 233 type=str, 234 required_for=[DataSourceType.synthetic], 235 not_allowed_for=[DataSourceType.human], 236 ), 237 DataSourceProperty( 238 name="model_provider", 239 type=str, 240 required_for=[DataSourceType.synthetic], 241 not_allowed_for=[DataSourceType.human], 242 ), 243 DataSourceProperty( 244 name="adapter_name", 245 type=str, 246 required_for=[DataSourceType.synthetic], 247 not_allowed_for=[DataSourceType.human], 248 ), 249 DataSourceProperty( 250 name="prompt_builder_name", 251 type=str, 252 not_allowed_for=[DataSourceType.human], 253 ), 254 ] 255 256 @model_validator(mode="after") 257 def validate_type(self) -> "DataSource": 258 if self.type not in DataSourceType: 259 raise ValueError(f"Invalid data source type: {self.type}") 260 return self 261 262 @model_validator(mode="after") 263 def validate_properties(self) -> "DataSource": 264 for prop in self._data_source_properties: 265 # Check the property type is correct 266 if prop.name in self.properties: 267 if not isinstance(self.properties[prop.name], prop.type): 268 raise ValueError( 269 f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source" 270 ) 271 # Check the property is required for the data source type 272 if self.type in prop.required_for: 273 if prop.name not in self.properties: 274 raise ValueError( 275 f"'{prop.name}' is required for {self.type} data source" 276 ) 277 # Check the property is not allowed for the data source type 278 elif self.type in prop.not_allowed_for and prop.name in self.properties: 279 raise ValueError( 280 f"'{prop.name}' is not allowed for {self.type} data source" 281 ) 282 return self 283 284 @model_validator(mode="after") 285 def validate_no_empty_properties(self) -> Self: 286 for prop, value in self.properties.items(): 287 if isinstance(value, str) and value == "": 288 raise ValueError( 289 f"Property '{prop}' must be a non-empty string for {self.type} data source" 290 ) 291 return self 292 293 294class TaskRun(KilnParentedModel): 295 """ 296 Represents a single execution of a Task. 297 298 Contains the input used, its source, the output produced, and optional 299 repair information if the output needed correction. 300 """ 301 302 input: str = Field( 303 description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input." 304 ) 305 input_source: DataSource = Field( 306 description="The source of the input: human or synthetic." 307 ) 308 309 output: TaskOutput = Field(description="The output of the task run.") 310 repair_instructions: str | None = Field( 311 default=None, 312 description="Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.", 313 ) 314 repaired_output: TaskOutput | None = Field( 315 default=None, 316 description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.", 317 ) 318 intermediate_outputs: Dict[str, str] | None = Field( 319 default=None, 320 description="Intermediate outputs from the task run. Keys are the names of the intermediate output steps (cot=chain of thought, etc), values are the output data.", 321 ) 322 323 def parent_task(self) -> Task | None: 324 if not isinstance(self.parent, Task): 325 return None 326 return self.parent 327 328 @model_validator(mode="after") 329 def validate_input_format(self) -> Self: 330 task = self.parent_task() 331 if task is None: 332 # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving) 333 return self 334 335 # validate output 336 if task.input_json_schema is not None: 337 try: 338 validate_schema(json.loads(self.input), task.input_json_schema) 339 except json.JSONDecodeError: 340 raise ValueError("Input is not a valid JSON object") 341 except jsonschema.exceptions.ValidationError as e: 342 raise ValueError(f"Input does not match task input schema: {e}") 343 return self 344 345 @model_validator(mode="after") 346 def validate_output_format(self) -> Self: 347 task = self.parent_task() 348 if task is None: 349 return self 350 351 self.output.validate_output_format(task) 352 return self 353 354 @model_validator(mode="after") 355 def validate_repaired_output(self) -> Self: 356 if self.repaired_output is not None: 357 if self.repaired_output.rating is not None: 358 raise ValueError( 359 "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed." 360 ) 361 if self.repair_instructions is None and self.repaired_output is not None: 362 raise ValueError( 363 "Repair instructions are required if providing a repaired output." 364 ) 365 if self.repair_instructions is not None and self.repaired_output is None: 366 raise ValueError( 367 "A repaired output is required if providing repair instructions." 368 ) 369 return self 370 371 372# Define the type alias for clarity 373DatasetFilter = Callable[[TaskRun], bool] 374 375 376def AllDatasetFilter(_: TaskRun) -> bool: 377 return True 378 379 380def HighRatingDatasetFilter(task_run: TaskRun) -> bool: 381 if task_run.output is None or task_run.output.rating is None: 382 return False 383 return task_run.output.rating.is_high_quality() 384 385 386class DatasetSplitDefinition(BaseModel): 387 """ 388 A definition of a split in a dataset. 389 390 Example: name="train", description="The training set", percentage=0.8 (80% of the dataset) 391 """ 392 393 name: str = NAME_FIELD 394 description: str | None = Field( 395 default=None, 396 description="A description of the dataset for you and your team. Not used in training.", 397 ) 398 percentage: float = Field( 399 ge=0.0, 400 le=1.0, 401 description="The percentage of the dataset that this split represents (between 0 and 1).", 402 ) 403 404 405AllSplitDefinition: list[DatasetSplitDefinition] = [ 406 DatasetSplitDefinition(name="all", percentage=1.0) 407] 408Train80Test20SplitDefinition: list[DatasetSplitDefinition] = [ 409 DatasetSplitDefinition(name="train", percentage=0.8), 410 DatasetSplitDefinition(name="test", percentage=0.2), 411] 412Train60Test20Val20SplitDefinition: list[DatasetSplitDefinition] = [ 413 DatasetSplitDefinition(name="train", percentage=0.6), 414 DatasetSplitDefinition(name="test", percentage=0.2), 415 DatasetSplitDefinition(name="val", percentage=0.2), 416] 417 418 419class DatasetSplit(KilnParentedModel): 420 """ 421 A collection of task runs, with optional splits (train, test, validation) 422 """ 423 424 name: str = NAME_FIELD 425 description: str | None = Field( 426 default=None, 427 description="A description of the dataset for you and your team. Not used in training.", 428 ) 429 splits: list[DatasetSplitDefinition] = Field( 430 default_factory=list, 431 description="The splits in the dataset.", 432 ) 433 split_contents: dict[str, list[str]] = Field( 434 description="The contents of each split in the dataset. The key is the split name, and the value is a list of task run IDs.", 435 ) 436 437 @model_validator(mode="after") 438 def validate_split_percentages(self) -> "DatasetSplit": 439 total = sum(split.percentage for split in self.splits) 440 if not math.isclose(total, 1.0, rel_tol=1e-9): 441 raise ValueError(f"The sum of split percentages must be 1.0 (got {total})") 442 return self 443 444 @classmethod 445 def from_task( 446 cls, 447 name: str, 448 task: "Task", 449 splits: list[DatasetSplitDefinition], 450 filter: DatasetFilter = AllDatasetFilter, 451 description: str | None = None, 452 ): 453 split_contents = cls.build_split_contents(task, splits, filter) 454 return cls( 455 parent=task, 456 name=name, 457 description=description, 458 splits=splits, 459 split_contents=split_contents, 460 ) 461 462 @classmethod 463 def build_split_contents( 464 cls, 465 task: "Task", 466 splits: list[DatasetSplitDefinition], 467 filter: DatasetFilter, 468 ) -> dict[str, list[str]]: 469 valid_ids = [] 470 for task_run in task.runs(): 471 if filter(task_run): 472 valid_ids.append(task_run.id) 473 474 # Shuffle and split by split percentage 475 random.shuffle(valid_ids) 476 split_contents = {} 477 start_idx = 0 478 remaining_items = len(valid_ids) 479 480 # Handle all splits except the last one 481 for split in splits[:-1]: 482 split_size = round(len(valid_ids) * split.percentage) 483 split_contents[split.name] = valid_ids[start_idx : start_idx + split_size] 484 start_idx += split_size 485 remaining_items -= split_size 486 487 # Last split gets all remaining items (for rounding) 488 if splits: 489 split_contents[splits[-1].name] = valid_ids[start_idx:] 490 491 return split_contents 492 493 def parent_task(self) -> "Task | None": 494 # inline import to avoid circular import 495 from kiln_ai.datamodel import Task 496 497 if not isinstance(self.parent, Task): 498 return None 499 return self.parent 500 501 def missing_count(self) -> int: 502 """ 503 Returns: 504 int: the number of task runs that have an ID persisted in this dataset split, but no longer exist in the dataset 505 """ 506 parent = self.parent_task() 507 if parent is None: 508 raise ValueError("DatasetSplit has no parent task") 509 510 runs = parent.runs() 511 all_ids = set(run.id for run in runs) 512 all_ids_in_splits = set() 513 for ids in self.split_contents.values(): 514 all_ids_in_splits.update(ids) 515 missing = all_ids_in_splits - all_ids 516 return len(missing) 517 518 519class TaskRequirement(BaseModel): 520 """ 521 Defines a specific requirement that should be met by task outputs. 522 523 Includes an identifier, name, description, instruction for meeting the requirement, 524 and priority level. 525 """ 526 527 id: ID_TYPE = ID_FIELD 528 name: str = SHORT_NAME_FIELD 529 description: str | None = Field(default=None) 530 instruction: str = Field(min_length=1) 531 priority: Priority = Field(default=Priority.p2) 532 533 534class TaskDeterminism(str, Enum): 535 """ 536 Defines how strictly task outputs should match expected results. 537 538 - deterministic: Requires exact matches 539 - semantic_match: Allows different wording with same meaning 540 - flexible: Allows variation in both wording and meaning within requirements 541 """ 542 543 deterministic = "deterministic" # Expect exact match 544 semantic_match = "semantic_match" # Expect same meaning, but flexible on expression of the meaning 545 flexible = "flexible" # Flexible on semantic output. Eval should be custom based on parsing requirements. 546 547 548class Task( 549 KilnParentedModel, 550 KilnParentModel, 551 parent_of={ 552 "runs": TaskRun, 553 "dataset_splits": DatasetSplit, 554 "finetunes": Finetune, 555 }, 556): 557 """ 558 Represents a specific task to be performed, with associated requirements and validation rules. 559 560 Contains the task definition, requirements, input/output schemas, and maintains 561 a collection of task runs. 562 """ 563 564 name: str = NAME_FIELD 565 description: str | None = Field( 566 default=None, 567 description="A description of the task for you and your team. Will not be used in prompts/training/validation.", 568 ) 569 instruction: str = Field( 570 min_length=1, 571 description="The instructions for the task. Will be used in prompts/training/validation.", 572 ) 573 requirements: List[TaskRequirement] = Field(default=[]) 574 output_json_schema: JsonObjectSchema | None = None 575 input_json_schema: JsonObjectSchema | None = None 576 thinking_instruction: str | None = Field( 577 default=None, 578 description="Instructions for the model 'thinking' about the requirement prior to answering. Used for chain of thought style prompting.", 579 ) 580 581 def output_schema(self) -> Dict | None: 582 if self.output_json_schema is None: 583 return None 584 return schema_from_json_str(self.output_json_schema) 585 586 def input_schema(self) -> Dict | None: 587 if self.input_json_schema is None: 588 return None 589 return schema_from_json_str(self.input_json_schema) 590 591 # Needed for typechecking. TODO P2: fix this in KilnParentModel 592 def runs(self) -> list[TaskRun]: 593 return super().runs() # type: ignore 594 595 def dataset_splits(self) -> list[DatasetSplit]: 596 return super().dataset_splits() # type: ignore 597 598 def finetunes(self) -> list[Finetune]: 599 return super().finetunes() # type: ignore 600 601 602class Project(KilnParentModel, parent_of={"tasks": Task}): 603 """ 604 A collection of related tasks. 605 606 Projects organize tasks into logical groups and provide high-level descriptions 607 of the overall goals. 608 """ 609 610 name: str = NAME_FIELD 611 description: str | None = Field( 612 default=None, 613 description="A description of the project for you and your team. Will not be used in prompts/training/validation.", 614 ) 615 616 # Needed for typechecking. TODO P2: fix this in KilnParentModel 617 def tasks(self) -> list[Task]: 618 return super().tasks() # type: ignore
549class Task( 550 KilnParentedModel, 551 KilnParentModel, 552 parent_of={ 553 "runs": TaskRun, 554 "dataset_splits": DatasetSplit, 555 "finetunes": Finetune, 556 }, 557): 558 """ 559 Represents a specific task to be performed, with associated requirements and validation rules. 560 561 Contains the task definition, requirements, input/output schemas, and maintains 562 a collection of task runs. 563 """ 564 565 name: str = NAME_FIELD 566 description: str | None = Field( 567 default=None, 568 description="A description of the task for you and your team. Will not be used in prompts/training/validation.", 569 ) 570 instruction: str = Field( 571 min_length=1, 572 description="The instructions for the task. Will be used in prompts/training/validation.", 573 ) 574 requirements: List[TaskRequirement] = Field(default=[]) 575 output_json_schema: JsonObjectSchema | None = None 576 input_json_schema: JsonObjectSchema | None = None 577 thinking_instruction: str | None = Field( 578 default=None, 579 description="Instructions for the model 'thinking' about the requirement prior to answering. Used for chain of thought style prompting.", 580 ) 581 582 def output_schema(self) -> Dict | None: 583 if self.output_json_schema is None: 584 return None 585 return schema_from_json_str(self.output_json_schema) 586 587 def input_schema(self) -> Dict | None: 588 if self.input_json_schema is None: 589 return None 590 return schema_from_json_str(self.input_json_schema) 591 592 # Needed for typechecking. TODO P2: fix this in KilnParentModel 593 def runs(self) -> list[TaskRun]: 594 return super().runs() # type: ignore 595 596 def dataset_splits(self) -> list[DatasetSplit]: 597 return super().dataset_splits() # type: ignore 598 599 def finetunes(self) -> list[Finetune]: 600 return super().finetunes() # type: ignore
Represents a specific task to be performed, with associated requirements and validation rules.
Contains the task definition, requirements, input/output schemas, and maintains a collection of task runs.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
Metadata about the fields defined on the model,
mapping of field names to [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
A dictionary of computed field names and their corresponding ComputedFieldInfo
objects.
124 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 125 """We need to both initialize private attributes and call the user-defined model_post_init 126 method. 127 """ 128 init_private_attributes(self, context) 129 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
Inherited Members
603class Project(KilnParentModel, parent_of={"tasks": Task}): 604 """ 605 A collection of related tasks. 606 607 Projects organize tasks into logical groups and provide high-level descriptions 608 of the overall goals. 609 """ 610 611 name: str = NAME_FIELD 612 description: str | None = Field( 613 default=None, 614 description="A description of the project for you and your team. Will not be used in prompts/training/validation.", 615 ) 616 617 # Needed for typechecking. TODO P2: fix this in KilnParentModel 618 def tasks(self) -> list[Task]: 619 return super().tasks() # type: ignore
A collection of related tasks.
Projects organize tasks into logical groups and provide high-level descriptions of the overall goals.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
Metadata about the fields defined on the model,
mapping of field names to [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
A dictionary of computed field names and their corresponding ComputedFieldInfo
objects.
295class TaskRun(KilnParentedModel): 296 """ 297 Represents a single execution of a Task. 298 299 Contains the input used, its source, the output produced, and optional 300 repair information if the output needed correction. 301 """ 302 303 input: str = Field( 304 description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input." 305 ) 306 input_source: DataSource = Field( 307 description="The source of the input: human or synthetic." 308 ) 309 310 output: TaskOutput = Field(description="The output of the task run.") 311 repair_instructions: str | None = Field( 312 default=None, 313 description="Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.", 314 ) 315 repaired_output: TaskOutput | None = Field( 316 default=None, 317 description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.", 318 ) 319 intermediate_outputs: Dict[str, str] | None = Field( 320 default=None, 321 description="Intermediate outputs from the task run. Keys are the names of the intermediate output steps (cot=chain of thought, etc), values are the output data.", 322 ) 323 324 def parent_task(self) -> Task | None: 325 if not isinstance(self.parent, Task): 326 return None 327 return self.parent 328 329 @model_validator(mode="after") 330 def validate_input_format(self) -> Self: 331 task = self.parent_task() 332 if task is None: 333 # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving) 334 return self 335 336 # validate output 337 if task.input_json_schema is not None: 338 try: 339 validate_schema(json.loads(self.input), task.input_json_schema) 340 except json.JSONDecodeError: 341 raise ValueError("Input is not a valid JSON object") 342 except jsonschema.exceptions.ValidationError as e: 343 raise ValueError(f"Input does not match task input schema: {e}") 344 return self 345 346 @model_validator(mode="after") 347 def validate_output_format(self) -> Self: 348 task = self.parent_task() 349 if task is None: 350 return self 351 352 self.output.validate_output_format(task) 353 return self 354 355 @model_validator(mode="after") 356 def validate_repaired_output(self) -> Self: 357 if self.repaired_output is not None: 358 if self.repaired_output.rating is not None: 359 raise ValueError( 360 "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed." 361 ) 362 if self.repair_instructions is None and self.repaired_output is not None: 363 raise ValueError( 364 "Repair instructions are required if providing a repaired output." 365 ) 366 if self.repair_instructions is not None and self.repaired_output is None: 367 raise ValueError( 368 "A repaired output is required if providing repair instructions." 369 ) 370 return self
Represents a single execution of a Task.
Contains the input used, its source, the output produced, and optional repair information if the output needed correction.
329 @model_validator(mode="after") 330 def validate_input_format(self) -> Self: 331 task = self.parent_task() 332 if task is None: 333 # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving) 334 return self 335 336 # validate output 337 if task.input_json_schema is not None: 338 try: 339 validate_schema(json.loads(self.input), task.input_json_schema) 340 except json.JSONDecodeError: 341 raise ValueError("Input is not a valid JSON object") 342 except jsonschema.exceptions.ValidationError as e: 343 raise ValueError(f"Input does not match task input schema: {e}") 344 return self
355 @model_validator(mode="after") 356 def validate_repaired_output(self) -> Self: 357 if self.repaired_output is not None: 358 if self.repaired_output.rating is not None: 359 raise ValueError( 360 "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed." 361 ) 362 if self.repair_instructions is None and self.repaired_output is not None: 363 raise ValueError( 364 "Repair instructions are required if providing a repaired output." 365 ) 366 if self.repair_instructions is not None and self.repaired_output is None: 367 raise ValueError( 368 "A repaired output is required if providing repair instructions." 369 ) 370 return self
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
Metadata about the fields defined on the model,
mapping of field names to [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
A dictionary of computed field names and their corresponding ComputedFieldInfo
objects.
124 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 125 """We need to both initialize private attributes and call the user-defined model_post_init 126 method. 127 """ 128 init_private_attributes(self, context) 129 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
Inherited Members
114class TaskOutput(KilnBaseModel): 115 """ 116 An output for a specific task run. 117 118 Contains the actual output content, its source (human or synthetic), 119 and optional rating information. 120 """ 121 122 output: str = Field( 123 description="The output of the task. JSON formatted for structured output, plaintext for unstructured output." 124 ) 125 source: DataSource = Field( 126 description="The source of the output: human or synthetic." 127 ) 128 rating: TaskOutputRating | None = Field( 129 default=None, description="The rating of the output" 130 ) 131 132 def validate_output_format(self, task: Task) -> Self: 133 # validate output 134 if task.output_json_schema is not None: 135 try: 136 validate_schema(json.loads(self.output), task.output_json_schema) 137 except json.JSONDecodeError: 138 raise ValueError("Output is not a valid JSON object") 139 except jsonschema.exceptions.ValidationError as e: 140 raise ValueError(f"Output does not match task output schema: {e}") 141 return self
An output for a specific task run.
Contains the actual output content, its source (human or synthetic), and optional rating information.
132 def validate_output_format(self, task: Task) -> Self: 133 # validate output 134 if task.output_json_schema is not None: 135 try: 136 validate_schema(json.loads(self.output), task.output_json_schema) 137 except json.JSONDecodeError: 138 raise ValueError("Output is not a valid JSON object") 139 except jsonschema.exceptions.ValidationError as e: 140 raise ValueError(f"Output does not match task output schema: {e}") 141 return self
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
Metadata about the fields defined on the model,
mapping of field names to [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
A dictionary of computed field names and their corresponding ComputedFieldInfo
objects.
67class TaskOutputRating(KilnBaseModel): 68 """ 69 A rating for a task output, including an overall rating and ratings for each requirement. 70 71 Only supports five star ratings for now, but extensible for custom values. 72 """ 73 74 type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star) 75 value: float | None = Field( 76 description="The overall rating value (typically 1-5 stars).", 77 default=None, 78 ) 79 requirement_ratings: Dict[ID_TYPE, float] = Field( 80 default={}, 81 description="The ratings of the requirements of the task. The keys are the ids of the requirements. The values are the ratings (typically 1-5 stars).", 82 ) 83 84 # Used to select high quality outputs for example selection (MultiShotPromptBuilder, etc) 85 def is_high_quality(self) -> bool: 86 if self.type == TaskOutputRatingType.five_star: 87 return self.value is not None and self.value >= 4 88 return False 89 90 @model_validator(mode="after") 91 def validate_rating(self) -> Self: 92 if self.type not in TaskOutputRatingType: 93 raise ValueError(f"Invalid rating type: {self.type}") 94 95 if self.type == TaskOutputRatingType.five_star: 96 if self.value is not None: 97 self._validate_five_star(self.value, "overall rating") 98 for req_id, req_rating in self.requirement_ratings.items(): 99 self._validate_five_star(req_rating, f"requirement rating for {req_id}") 100 101 return self 102 103 def _validate_five_star(self, rating: float, rating_name: str) -> None: 104 if not isinstance(rating, float) or not rating.is_integer(): 105 raise ValueError( 106 f"{rating_name.capitalize()} of type five_star must be an integer value (1.0, 2.0, 3.0, 4.0, or 5.0)" 107 ) 108 if rating < 1 or rating > 5: 109 raise ValueError( 110 f"{rating_name.capitalize()} of type five_star must be between 1 and 5 stars" 111 )
A rating for a task output, including an overall rating and ratings for each requirement.
Only supports five star ratings for now, but extensible for custom values.
90 @model_validator(mode="after") 91 def validate_rating(self) -> Self: 92 if self.type not in TaskOutputRatingType: 93 raise ValueError(f"Invalid rating type: {self.type}") 94 95 if self.type == TaskOutputRatingType.five_star: 96 if self.value is not None: 97 self._validate_five_star(self.value, "overall rating") 98 for req_id, req_rating in self.requirement_ratings.items(): 99 self._validate_five_star(req_rating, f"requirement rating for {req_id}") 100 101 return self
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
Metadata about the fields defined on the model,
mapping of field names to [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
A dictionary of computed field names and their corresponding ComputedFieldInfo
objects.
50class Priority(IntEnum): 51 """Defines priority levels for tasks and requirements, where P0 is highest priority.""" 52 53 p0 = 0 54 p1 = 1 55 p2 = 2 56 p3 = 3
Defines priority levels for tasks and requirements, where P0 is highest priority.
211class DataSource(BaseModel): 212 """ 213 Represents the origin of data, either human or synthetic, with associated properties. 214 215 Properties vary based on the source type - for synthetic sources this includes 216 model information, for human sources this includes creator information. 217 """ 218 219 type: DataSourceType 220 properties: Dict[str, str | int | float] = Field( 221 default={}, 222 description="Properties describing the data source. For synthetic things like model. For human, the human's name.", 223 ) 224 225 _data_source_properties = [ 226 DataSourceProperty( 227 name="created_by", 228 type=str, 229 required_for=[DataSourceType.human], 230 not_allowed_for=[DataSourceType.synthetic], 231 ), 232 DataSourceProperty( 233 name="model_name", 234 type=str, 235 required_for=[DataSourceType.synthetic], 236 not_allowed_for=[DataSourceType.human], 237 ), 238 DataSourceProperty( 239 name="model_provider", 240 type=str, 241 required_for=[DataSourceType.synthetic], 242 not_allowed_for=[DataSourceType.human], 243 ), 244 DataSourceProperty( 245 name="adapter_name", 246 type=str, 247 required_for=[DataSourceType.synthetic], 248 not_allowed_for=[DataSourceType.human], 249 ), 250 DataSourceProperty( 251 name="prompt_builder_name", 252 type=str, 253 not_allowed_for=[DataSourceType.human], 254 ), 255 ] 256 257 @model_validator(mode="after") 258 def validate_type(self) -> "DataSource": 259 if self.type not in DataSourceType: 260 raise ValueError(f"Invalid data source type: {self.type}") 261 return self 262 263 @model_validator(mode="after") 264 def validate_properties(self) -> "DataSource": 265 for prop in self._data_source_properties: 266 # Check the property type is correct 267 if prop.name in self.properties: 268 if not isinstance(self.properties[prop.name], prop.type): 269 raise ValueError( 270 f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source" 271 ) 272 # Check the property is required for the data source type 273 if self.type in prop.required_for: 274 if prop.name not in self.properties: 275 raise ValueError( 276 f"'{prop.name}' is required for {self.type} data source" 277 ) 278 # Check the property is not allowed for the data source type 279 elif self.type in prop.not_allowed_for and prop.name in self.properties: 280 raise ValueError( 281 f"'{prop.name}' is not allowed for {self.type} data source" 282 ) 283 return self 284 285 @model_validator(mode="after") 286 def validate_no_empty_properties(self) -> Self: 287 for prop, value in self.properties.items(): 288 if isinstance(value, str) and value == "": 289 raise ValueError( 290 f"Property '{prop}' must be a non-empty string for {self.type} data source" 291 ) 292 return self
Represents the origin of data, either human or synthetic, with associated properties.
Properties vary based on the source type - for synthetic sources this includes model information, for human sources this includes creator information.
263 @model_validator(mode="after") 264 def validate_properties(self) -> "DataSource": 265 for prop in self._data_source_properties: 266 # Check the property type is correct 267 if prop.name in self.properties: 268 if not isinstance(self.properties[prop.name], prop.type): 269 raise ValueError( 270 f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source" 271 ) 272 # Check the property is required for the data source type 273 if self.type in prop.required_for: 274 if prop.name not in self.properties: 275 raise ValueError( 276 f"'{prop.name}' is required for {self.type} data source" 277 ) 278 # Check the property is not allowed for the data source type 279 elif self.type in prop.not_allowed_for and prop.name in self.properties: 280 raise ValueError( 281 f"'{prop.name}' is not allowed for {self.type} data source" 282 ) 283 return self
285 @model_validator(mode="after") 286 def validate_no_empty_properties(self) -> Self: 287 for prop, value in self.properties.items(): 288 if isinstance(value, str) and value == "": 289 raise ValueError( 290 f"Property '{prop}' must be a non-empty string for {self.type} data source" 291 ) 292 return self
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
Metadata about the fields defined on the model,
mapping of field names to [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
A dictionary of computed field names and their corresponding ComputedFieldInfo
objects.
306def init_private_attributes(self: BaseModel, context: Any, /) -> None: 307 """This function is meant to behave like a BaseModel method to initialise private attributes. 308 309 It takes context as an argument since that's what pydantic-core passes when calling it. 310 311 Args: 312 self: The BaseModel instance. 313 context: The context. 314 """ 315 if getattr(self, '__pydantic_private__', None) is None: 316 pydantic_private = {} 317 for name, private_attr in self.__private_attributes__.items(): 318 default = private_attr.get_default() 319 if default is not PydanticUndefined: 320 pydantic_private[name] = default 321 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Args: self: The BaseModel instance. context: The context.
185class DataSourceType(str, Enum): 186 """ 187 The source type of a piece of data. 188 189 Human: a human created the data 190 Synthetic: a model created the data 191 """ 192 193 human = "human" 194 synthetic = "synthetic"
The source type of a piece of data.
Human: a human created the data Synthetic: a model created the data
197class DataSourceProperty(BaseModel): 198 """ 199 Defines a property that can be associated with a data source. 200 201 Includes validation rules for when properties are required or not allowed 202 based on the data source type. 203 """ 204 205 name: str 206 type: Type[Union[str, int, float]] 207 required_for: List[DataSourceType] = [] 208 not_allowed_for: List[DataSourceType] = []
Defines a property that can be associated with a data source.
Includes validation rules for when properties are required or not allowed based on the data source type.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
Metadata about the fields defined on the model,
mapping of field names to [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
60class TaskOutputRatingType(str, Enum): 61 """Defines the types of rating systems available for task outputs.""" 62 63 five_star = "five_star" 64 custom = "custom"
Defines the types of rating systems available for task outputs.
520class TaskRequirement(BaseModel): 521 """ 522 Defines a specific requirement that should be met by task outputs. 523 524 Includes an identifier, name, description, instruction for meeting the requirement, 525 and priority level. 526 """ 527 528 id: ID_TYPE = ID_FIELD 529 name: str = SHORT_NAME_FIELD 530 description: str | None = Field(default=None) 531 instruction: str = Field(min_length=1) 532 priority: Priority = Field(default=Priority.p2)
Defines a specific requirement that should be met by task outputs.
Includes an identifier, name, description, instruction for meeting the requirement, and priority level.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
Metadata about the fields defined on the model,
mapping of field names to [FieldInfo
][pydantic.fields.FieldInfo] objects.
This replaces Model.__fields__
from Pydantic V1.
535class TaskDeterminism(str, Enum): 536 """ 537 Defines how strictly task outputs should match expected results. 538 539 - deterministic: Requires exact matches 540 - semantic_match: Allows different wording with same meaning 541 - flexible: Allows variation in both wording and meaning within requirements 542 """ 543 544 deterministic = "deterministic" # Expect exact match 545 semantic_match = "semantic_match" # Expect same meaning, but flexible on expression of the meaning 546 flexible = "flexible" # Flexible on semantic output. Eval should be custom based on parsing requirements.
Defines how strictly task outputs should match expected results.
- deterministic: Requires exact matches
- semantic_match: Allows different wording with same meaning
- flexible: Allows variation in both wording and meaning within requirements