kiln_ai.datamodel
1from __future__ import annotations 2 3import json 4from enum import Enum, IntEnum 5from typing import TYPE_CHECKING, Dict, List, Self, Type, Union 6 7import jsonschema 8import jsonschema.exceptions 9from pydantic import BaseModel, Field, model_validator 10 11from kiln_ai.datamodel.json_schema import JsonObjectSchema, schema_from_json_str 12 13from .basemodel import ( 14 ID_FIELD, 15 ID_TYPE, 16 KilnBaseModel, 17 KilnParentedModel, 18 KilnParentModel, 19) 20from .json_schema import validate_schema 21 22if TYPE_CHECKING: 23 from . import Task 24 25 26__all__ = [ 27 "basemodel", 28 "json_schema", 29 "Task", 30 "Project", 31 "TaskRun", 32 "TaskOutput", 33 "TaskOutputRating", 34 "Priority", 35 "DataSource", 36 "DataSourceType", 37 "DataSourceProperty", 38 "TaskOutputRatingType", 39 "TaskRequirement", 40 "TaskDeterminism", 41] 42 43 44# Conventions: 45# 1) Names are filename safe as they may be used as file names. They are informational and not to be used in prompts/training/validation. 46# 2) Descrptions are for Kiln users to describe/understanding the purpose of this object. They must never be used in prompts/training/validation. Use "instruction/requirements" instead. 47 48# Filename compatible names 49NAME_REGEX = r"^[A-Za-z0-9 _-]+$" 50NAME_FIELD = Field(min_length=1, max_length=120, pattern=NAME_REGEX) 51SHORT_NAME_FIELD = Field(min_length=1, max_length=20, pattern=NAME_REGEX) 52 53 54class Priority(IntEnum): 55 """Defines priority levels for tasks and requirements, where P0 is highest priority.""" 56 57 p0 = 0 58 p1 = 1 59 p2 = 2 60 p3 = 3 61 62 63# Only one rating type for now, but this allows for extensibility if we want to add more in the future 64class TaskOutputRatingType(str, Enum): 65 """Defines the types of rating systems available for task outputs.""" 66 67 five_star = "five_star" 68 custom = "custom" 69 70 71class TaskOutputRating(KilnBaseModel): 72 """ 73 A rating for a task output, including an overall rating and ratings for each requirement. 74 75 Only supports five star ratings for now, but extensible for custom values. 76 """ 77 78 type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star) 79 value: float | None = Field( 80 description="The overall rating value (typically 1-5 stars).", 81 default=None, 82 ) 83 requirement_ratings: Dict[ID_TYPE, float] = Field( 84 default={}, 85 description="The ratings of the requirements of the task. The keys are the ids of the requirements. The values are the ratings (typically 1-5 stars).", 86 ) 87 88 # Used to select high quality outputs for example selection (MultiShotPromptBuilder, etc) 89 def is_high_quality(self) -> bool: 90 if self.type == TaskOutputRatingType.five_star: 91 return self.value is not None and self.value >= 4 92 return False 93 94 @model_validator(mode="after") 95 def validate_rating(self) -> Self: 96 if self.type not in TaskOutputRatingType: 97 raise ValueError(f"Invalid rating type: {self.type}") 98 99 if self.type == TaskOutputRatingType.five_star: 100 if self.value is not None: 101 self._validate_five_star(self.value, "overall rating") 102 for req_id, req_rating in self.requirement_ratings.items(): 103 self._validate_five_star(req_rating, f"requirement rating for {req_id}") 104 105 return self 106 107 def _validate_five_star(self, rating: float, rating_name: str) -> None: 108 if not isinstance(rating, float) or not rating.is_integer(): 109 raise ValueError( 110 f"{rating_name.capitalize()} of type five_star must be an integer value (1.0, 2.0, 3.0, 4.0, or 5.0)" 111 ) 112 if rating < 1 or rating > 5: 113 raise ValueError( 114 f"{rating_name.capitalize()} of type five_star must be between 1 and 5 stars" 115 ) 116 117 def validate_requirement_rating_keys(self, task: Task) -> Self: 118 if len(self.requirement_ratings) == 0: 119 return self 120 121 valid_requirement_ids = {req.id for req in task.requirements} 122 for key in self.requirement_ratings.keys(): 123 if key not in valid_requirement_ids: 124 raise ValueError( 125 f"Requirement ID '{key}' is not a valid requirement ID for this task" 126 ) 127 return self 128 129 130class TaskOutput(KilnBaseModel): 131 """ 132 An output for a specific task run. 133 134 Contains the actual output content, its source (human or synthetic), 135 and optional rating information. 136 """ 137 138 output: str = Field( 139 description="The output of the task. JSON formatted for structured output, plaintext for unstructured output." 140 ) 141 source: DataSource = Field( 142 description="The source of the output: human or synthetic." 143 ) 144 rating: TaskOutputRating | None = Field( 145 default=None, description="The rating of the output" 146 ) 147 148 def validate_output_format(self, task: Task) -> Self: 149 # validate output 150 if task.output_json_schema is not None: 151 try: 152 validate_schema(json.loads(self.output), task.output_json_schema) 153 except json.JSONDecodeError: 154 raise ValueError("Output is not a valid JSON object") 155 except jsonschema.exceptions.ValidationError as e: 156 raise ValueError(f"Output does not match task output schema: {e}") 157 return self 158 159 160class DataSourceType(str, Enum): 161 """ 162 The source of a piece of data. 163 """ 164 165 human = "human" 166 synthetic = "synthetic" 167 168 169class DataSourceProperty(BaseModel): 170 """ 171 Defines a property that can be associated with a data source. 172 173 Includes validation rules for when properties are required or not allowed 174 based on the data source type. 175 """ 176 177 name: str 178 type: Type[Union[str, int, float]] 179 required_for: List[DataSourceType] = [] 180 not_allowed_for: List[DataSourceType] = [] 181 182 183class DataSource(BaseModel): 184 """ 185 Represents the origin of data, either human or synthetic, with associated properties. 186 187 Properties vary based on the source type - for synthetic sources this includes 188 model information, for human sources this includes creator information. 189 """ 190 191 type: DataSourceType 192 properties: Dict[str, str | int | float] = Field( 193 default={}, 194 description="Properties describing the data source. For synthetic things like model. For human, the human's name.", 195 ) 196 197 _data_source_properties = [ 198 DataSourceProperty( 199 name="created_by", 200 type=str, 201 required_for=[DataSourceType.human], 202 not_allowed_for=[DataSourceType.synthetic], 203 ), 204 DataSourceProperty( 205 name="model_name", 206 type=str, 207 required_for=[DataSourceType.synthetic], 208 not_allowed_for=[DataSourceType.human], 209 ), 210 DataSourceProperty( 211 name="model_provider", 212 type=str, 213 required_for=[DataSourceType.synthetic], 214 not_allowed_for=[DataSourceType.human], 215 ), 216 DataSourceProperty( 217 name="adapter_name", 218 type=str, 219 required_for=[DataSourceType.synthetic], 220 not_allowed_for=[DataSourceType.human], 221 ), 222 DataSourceProperty( 223 name="prompt_builder_name", 224 type=str, 225 not_allowed_for=[DataSourceType.human], 226 ), 227 ] 228 229 @model_validator(mode="after") 230 def validate_type(self) -> "DataSource": 231 if self.type not in DataSourceType: 232 raise ValueError(f"Invalid data source type: {self.type}") 233 return self 234 235 @model_validator(mode="after") 236 def validate_properties(self) -> "DataSource": 237 for prop in self._data_source_properties: 238 # Check the property type is correct 239 if prop.name in self.properties: 240 if not isinstance(self.properties[prop.name], prop.type): 241 raise ValueError( 242 f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source" 243 ) 244 # Check the property is required for the data source type 245 if self.type in prop.required_for: 246 if prop.name not in self.properties: 247 raise ValueError( 248 f"'{prop.name}' is required for {self.type} data source" 249 ) 250 # Check the property is not allowed for the data source type 251 elif self.type in prop.not_allowed_for and prop.name in self.properties: 252 raise ValueError( 253 f"'{prop.name}' is not allowed for {self.type} data source" 254 ) 255 return self 256 257 @model_validator(mode="after") 258 def validate_no_empty_properties(self) -> Self: 259 for prop, value in self.properties.items(): 260 if isinstance(value, str) and value == "": 261 raise ValueError( 262 f"Property '{prop}' must be a non-empty string for {self.type} data source" 263 ) 264 return self 265 266 267class TaskRun(KilnParentedModel): 268 """ 269 Represents a single execution of a Task. 270 271 Contains the input used, its source, the output produced, and optional 272 repair information if the output needed correction. 273 """ 274 275 input: str = Field( 276 description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input." 277 ) 278 input_source: DataSource = Field( 279 description="The source of the input: human or synthetic." 280 ) 281 282 output: TaskOutput = Field(description="The output of the task run.") 283 repair_instructions: str | None = Field( 284 default=None, 285 description="Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.", 286 ) 287 repaired_output: TaskOutput | None = Field( 288 default=None, 289 description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.", 290 ) 291 292 def parent_task(self) -> Task | None: 293 if not isinstance(self.parent, Task): 294 return None 295 return self.parent 296 297 @model_validator(mode="after") 298 def validate_input_format(self) -> Self: 299 task = self.parent_task() 300 if task is None: 301 # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving) 302 return self 303 304 # validate output 305 if task.input_json_schema is not None: 306 try: 307 validate_schema(json.loads(self.input), task.input_json_schema) 308 except json.JSONDecodeError: 309 raise ValueError("Input is not a valid JSON object") 310 except jsonschema.exceptions.ValidationError as e: 311 raise ValueError(f"Input does not match task input schema: {e}") 312 return self 313 314 @model_validator(mode="after") 315 def validate_output_format(self) -> Self: 316 task = self.parent_task() 317 if task is None: 318 return self 319 320 self.output.validate_output_format(task) 321 return self 322 323 @model_validator(mode="after") 324 def validate_requirement_ratings(self) -> Self: 325 task = self.parent_task() 326 if task is None: 327 return self 328 329 if self.output.rating is not None: 330 self.output.rating.validate_requirement_rating_keys(task) 331 if self.repaired_output is not None and self.repaired_output.rating is not None: 332 self.repaired_output.rating.validate_requirement_rating_keys(task) 333 334 return self 335 336 @model_validator(mode="after") 337 def validate_repaired_output(self) -> Self: 338 if self.repaired_output is not None: 339 if self.repaired_output.rating is not None: 340 raise ValueError( 341 "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed." 342 ) 343 if self.repair_instructions is None and self.repaired_output is not None: 344 raise ValueError( 345 "Repair instructions are required if providing a repaired output." 346 ) 347 if self.repair_instructions is not None and self.repaired_output is None: 348 raise ValueError( 349 "A repaired output is required if providing repair instructions." 350 ) 351 return self 352 353 354class TaskRequirement(BaseModel): 355 """ 356 Defines a specific requirement that should be met by task outputs. 357 358 Includes an identifier, name, description, instruction for meeting the requirement, 359 and priority level. 360 """ 361 362 id: ID_TYPE = ID_FIELD 363 name: str = SHORT_NAME_FIELD 364 description: str | None = Field(default=None) 365 instruction: str = Field(min_length=1) 366 priority: Priority = Field(default=Priority.p2) 367 368 369class TaskDeterminism(str, Enum): 370 """ 371 Defines how strictly task outputs should match expected results. 372 373 - deterministic: Requires exact matches 374 - semantic_match: Allows different wording with same meaning 375 - flexible: Allows variation in both wording and meaning within requirements 376 """ 377 378 deterministic = "deterministic" # Expect exact match 379 semantic_match = "semantic_match" # Expect same meaning, but flexible on expression of the meaning 380 flexible = "flexible" # Flexible on semantic output. Eval should be custom based on parsing requirements. 381 382 383class Task( 384 KilnParentedModel, 385 KilnParentModel, 386 parent_of={"runs": TaskRun}, 387): 388 """ 389 Represents a specific task to be performed, with associated requirements and validation rules. 390 391 Contains the task definition, requirements, input/output schemas, and maintains 392 a collection of task runs. 393 """ 394 395 name: str = NAME_FIELD 396 description: str = Field(default="") 397 priority: Priority = Field(default=Priority.p2) 398 determinism: TaskDeterminism = Field(default=TaskDeterminism.flexible) 399 instruction: str = Field(min_length=1) 400 requirements: List[TaskRequirement] = Field(default=[]) 401 # TODO: make this required, or formalize the default message output schema 402 output_json_schema: JsonObjectSchema | None = None 403 input_json_schema: JsonObjectSchema | None = None 404 405 def output_schema(self) -> Dict | None: 406 if self.output_json_schema is None: 407 return None 408 return schema_from_json_str(self.output_json_schema) 409 410 def input_schema(self) -> Dict | None: 411 if self.input_json_schema is None: 412 return None 413 return schema_from_json_str(self.input_json_schema) 414 415 # Needed for typechecking. TODO P2: fix this in KilnParentModel 416 def runs(self) -> list[TaskRun]: 417 return super().runs() # type: ignore 418 419 420class Project(KilnParentModel, parent_of={"tasks": Task}): 421 """ 422 A collection of related tasks. 423 424 Projects organize tasks into logical groups and provide high-level descriptions 425 of the overall goals. 426 """ 427 428 name: str = NAME_FIELD 429 description: str | None = Field( 430 default=None, 431 description="A description of the project for you and your team. Will not be used in prompts/training/validation.", 432 ) 433 434 # Needed for typechecking. TODO P2: fix this in KilnParentModel 435 def tasks(self) -> list[Task]: 436 return super().tasks() # type: ignore
384class Task( 385 KilnParentedModel, 386 KilnParentModel, 387 parent_of={"runs": TaskRun}, 388): 389 """ 390 Represents a specific task to be performed, with associated requirements and validation rules. 391 392 Contains the task definition, requirements, input/output schemas, and maintains 393 a collection of task runs. 394 """ 395 396 name: str = NAME_FIELD 397 description: str = Field(default="") 398 priority: Priority = Field(default=Priority.p2) 399 determinism: TaskDeterminism = Field(default=TaskDeterminism.flexible) 400 instruction: str = Field(min_length=1) 401 requirements: List[TaskRequirement] = Field(default=[]) 402 # TODO: make this required, or formalize the default message output schema 403 output_json_schema: JsonObjectSchema | None = None 404 input_json_schema: JsonObjectSchema | None = None 405 406 def output_schema(self) -> Dict | None: 407 if self.output_json_schema is None: 408 return None 409 return schema_from_json_str(self.output_json_schema) 410 411 def input_schema(self) -> Dict | None: 412 if self.input_json_schema is None: 413 return None 414 return schema_from_json_str(self.input_json_schema) 415 416 # Needed for typechecking. TODO P2: fix this in KilnParentModel 417 def runs(self) -> list[TaskRun]: 418 return super().runs() # type: ignore
Represents a specific task to be performed, with associated requirements and validation rules.
Contains the task definition, requirements, input/output schemas, and maintains a collection of task runs.
105 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 106 """We need to both initialize private attributes and call the user-defined model_post_init 107 method. 108 """ 109 init_private_attributes(self, context) 110 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
Inherited Members
421class Project(KilnParentModel, parent_of={"tasks": Task}): 422 """ 423 A collection of related tasks. 424 425 Projects organize tasks into logical groups and provide high-level descriptions 426 of the overall goals. 427 """ 428 429 name: str = NAME_FIELD 430 description: str | None = Field( 431 default=None, 432 description="A description of the project for you and your team. Will not be used in prompts/training/validation.", 433 ) 434 435 # Needed for typechecking. TODO P2: fix this in KilnParentModel 436 def tasks(self) -> list[Task]: 437 return super().tasks() # type: ignore
A collection of related tasks.
Projects organize tasks into logical groups and provide high-level descriptions of the overall goals.
268class TaskRun(KilnParentedModel): 269 """ 270 Represents a single execution of a Task. 271 272 Contains the input used, its source, the output produced, and optional 273 repair information if the output needed correction. 274 """ 275 276 input: str = Field( 277 description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input." 278 ) 279 input_source: DataSource = Field( 280 description="The source of the input: human or synthetic." 281 ) 282 283 output: TaskOutput = Field(description="The output of the task run.") 284 repair_instructions: str | None = Field( 285 default=None, 286 description="Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.", 287 ) 288 repaired_output: TaskOutput | None = Field( 289 default=None, 290 description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.", 291 ) 292 293 def parent_task(self) -> Task | None: 294 if not isinstance(self.parent, Task): 295 return None 296 return self.parent 297 298 @model_validator(mode="after") 299 def validate_input_format(self) -> Self: 300 task = self.parent_task() 301 if task is None: 302 # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving) 303 return self 304 305 # validate output 306 if task.input_json_schema is not None: 307 try: 308 validate_schema(json.loads(self.input), task.input_json_schema) 309 except json.JSONDecodeError: 310 raise ValueError("Input is not a valid JSON object") 311 except jsonschema.exceptions.ValidationError as e: 312 raise ValueError(f"Input does not match task input schema: {e}") 313 return self 314 315 @model_validator(mode="after") 316 def validate_output_format(self) -> Self: 317 task = self.parent_task() 318 if task is None: 319 return self 320 321 self.output.validate_output_format(task) 322 return self 323 324 @model_validator(mode="after") 325 def validate_requirement_ratings(self) -> Self: 326 task = self.parent_task() 327 if task is None: 328 return self 329 330 if self.output.rating is not None: 331 self.output.rating.validate_requirement_rating_keys(task) 332 if self.repaired_output is not None and self.repaired_output.rating is not None: 333 self.repaired_output.rating.validate_requirement_rating_keys(task) 334 335 return self 336 337 @model_validator(mode="after") 338 def validate_repaired_output(self) -> Self: 339 if self.repaired_output is not None: 340 if self.repaired_output.rating is not None: 341 raise ValueError( 342 "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed." 343 ) 344 if self.repair_instructions is None and self.repaired_output is not None: 345 raise ValueError( 346 "Repair instructions are required if providing a repaired output." 347 ) 348 if self.repair_instructions is not None and self.repaired_output is None: 349 raise ValueError( 350 "A repaired output is required if providing repair instructions." 351 ) 352 return self
Represents a single execution of a Task.
Contains the input used, its source, the output produced, and optional repair information if the output needed correction.
298 @model_validator(mode="after") 299 def validate_input_format(self) -> Self: 300 task = self.parent_task() 301 if task is None: 302 # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving) 303 return self 304 305 # validate output 306 if task.input_json_schema is not None: 307 try: 308 validate_schema(json.loads(self.input), task.input_json_schema) 309 except json.JSONDecodeError: 310 raise ValueError("Input is not a valid JSON object") 311 except jsonschema.exceptions.ValidationError as e: 312 raise ValueError(f"Input does not match task input schema: {e}") 313 return self
324 @model_validator(mode="after") 325 def validate_requirement_ratings(self) -> Self: 326 task = self.parent_task() 327 if task is None: 328 return self 329 330 if self.output.rating is not None: 331 self.output.rating.validate_requirement_rating_keys(task) 332 if self.repaired_output is not None and self.repaired_output.rating is not None: 333 self.repaired_output.rating.validate_requirement_rating_keys(task) 334 335 return self
337 @model_validator(mode="after") 338 def validate_repaired_output(self) -> Self: 339 if self.repaired_output is not None: 340 if self.repaired_output.rating is not None: 341 raise ValueError( 342 "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed." 343 ) 344 if self.repair_instructions is None and self.repaired_output is not None: 345 raise ValueError( 346 "Repair instructions are required if providing a repaired output." 347 ) 348 if self.repair_instructions is not None and self.repaired_output is None: 349 raise ValueError( 350 "A repaired output is required if providing repair instructions." 351 ) 352 return self
105 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 106 """We need to both initialize private attributes and call the user-defined model_post_init 107 method. 108 """ 109 init_private_attributes(self, context) 110 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
Inherited Members
131class TaskOutput(KilnBaseModel): 132 """ 133 An output for a specific task run. 134 135 Contains the actual output content, its source (human or synthetic), 136 and optional rating information. 137 """ 138 139 output: str = Field( 140 description="The output of the task. JSON formatted for structured output, plaintext for unstructured output." 141 ) 142 source: DataSource = Field( 143 description="The source of the output: human or synthetic." 144 ) 145 rating: TaskOutputRating | None = Field( 146 default=None, description="The rating of the output" 147 ) 148 149 def validate_output_format(self, task: Task) -> Self: 150 # validate output 151 if task.output_json_schema is not None: 152 try: 153 validate_schema(json.loads(self.output), task.output_json_schema) 154 except json.JSONDecodeError: 155 raise ValueError("Output is not a valid JSON object") 156 except jsonschema.exceptions.ValidationError as e: 157 raise ValueError(f"Output does not match task output schema: {e}") 158 return self
An output for a specific task run.
Contains the actual output content, its source (human or synthetic), and optional rating information.
149 def validate_output_format(self, task: Task) -> Self: 150 # validate output 151 if task.output_json_schema is not None: 152 try: 153 validate_schema(json.loads(self.output), task.output_json_schema) 154 except json.JSONDecodeError: 155 raise ValueError("Output is not a valid JSON object") 156 except jsonschema.exceptions.ValidationError as e: 157 raise ValueError(f"Output does not match task output schema: {e}") 158 return self
72class TaskOutputRating(KilnBaseModel): 73 """ 74 A rating for a task output, including an overall rating and ratings for each requirement. 75 76 Only supports five star ratings for now, but extensible for custom values. 77 """ 78 79 type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star) 80 value: float | None = Field( 81 description="The overall rating value (typically 1-5 stars).", 82 default=None, 83 ) 84 requirement_ratings: Dict[ID_TYPE, float] = Field( 85 default={}, 86 description="The ratings of the requirements of the task. The keys are the ids of the requirements. The values are the ratings (typically 1-5 stars).", 87 ) 88 89 # Used to select high quality outputs for example selection (MultiShotPromptBuilder, etc) 90 def is_high_quality(self) -> bool: 91 if self.type == TaskOutputRatingType.five_star: 92 return self.value is not None and self.value >= 4 93 return False 94 95 @model_validator(mode="after") 96 def validate_rating(self) -> Self: 97 if self.type not in TaskOutputRatingType: 98 raise ValueError(f"Invalid rating type: {self.type}") 99 100 if self.type == TaskOutputRatingType.five_star: 101 if self.value is not None: 102 self._validate_five_star(self.value, "overall rating") 103 for req_id, req_rating in self.requirement_ratings.items(): 104 self._validate_five_star(req_rating, f"requirement rating for {req_id}") 105 106 return self 107 108 def _validate_five_star(self, rating: float, rating_name: str) -> None: 109 if not isinstance(rating, float) or not rating.is_integer(): 110 raise ValueError( 111 f"{rating_name.capitalize()} of type five_star must be an integer value (1.0, 2.0, 3.0, 4.0, or 5.0)" 112 ) 113 if rating < 1 or rating > 5: 114 raise ValueError( 115 f"{rating_name.capitalize()} of type five_star must be between 1 and 5 stars" 116 ) 117 118 def validate_requirement_rating_keys(self, task: Task) -> Self: 119 if len(self.requirement_ratings) == 0: 120 return self 121 122 valid_requirement_ids = {req.id for req in task.requirements} 123 for key in self.requirement_ratings.keys(): 124 if key not in valid_requirement_ids: 125 raise ValueError( 126 f"Requirement ID '{key}' is not a valid requirement ID for this task" 127 ) 128 return self
A rating for a task output, including an overall rating and ratings for each requirement.
Only supports five star ratings for now, but extensible for custom values.
95 @model_validator(mode="after") 96 def validate_rating(self) -> Self: 97 if self.type not in TaskOutputRatingType: 98 raise ValueError(f"Invalid rating type: {self.type}") 99 100 if self.type == TaskOutputRatingType.five_star: 101 if self.value is not None: 102 self._validate_five_star(self.value, "overall rating") 103 for req_id, req_rating in self.requirement_ratings.items(): 104 self._validate_five_star(req_rating, f"requirement rating for {req_id}") 105 106 return self
118 def validate_requirement_rating_keys(self, task: Task) -> Self: 119 if len(self.requirement_ratings) == 0: 120 return self 121 122 valid_requirement_ids = {req.id for req in task.requirements} 123 for key in self.requirement_ratings.keys(): 124 if key not in valid_requirement_ids: 125 raise ValueError( 126 f"Requirement ID '{key}' is not a valid requirement ID for this task" 127 ) 128 return self
55class Priority(IntEnum): 56 """Defines priority levels for tasks and requirements, where P0 is highest priority.""" 57 58 p0 = 0 59 p1 = 1 60 p2 = 2 61 p3 = 3
Defines priority levels for tasks and requirements, where P0 is highest priority.
184class DataSource(BaseModel): 185 """ 186 Represents the origin of data, either human or synthetic, with associated properties. 187 188 Properties vary based on the source type - for synthetic sources this includes 189 model information, for human sources this includes creator information. 190 """ 191 192 type: DataSourceType 193 properties: Dict[str, str | int | float] = Field( 194 default={}, 195 description="Properties describing the data source. For synthetic things like model. For human, the human's name.", 196 ) 197 198 _data_source_properties = [ 199 DataSourceProperty( 200 name="created_by", 201 type=str, 202 required_for=[DataSourceType.human], 203 not_allowed_for=[DataSourceType.synthetic], 204 ), 205 DataSourceProperty( 206 name="model_name", 207 type=str, 208 required_for=[DataSourceType.synthetic], 209 not_allowed_for=[DataSourceType.human], 210 ), 211 DataSourceProperty( 212 name="model_provider", 213 type=str, 214 required_for=[DataSourceType.synthetic], 215 not_allowed_for=[DataSourceType.human], 216 ), 217 DataSourceProperty( 218 name="adapter_name", 219 type=str, 220 required_for=[DataSourceType.synthetic], 221 not_allowed_for=[DataSourceType.human], 222 ), 223 DataSourceProperty( 224 name="prompt_builder_name", 225 type=str, 226 not_allowed_for=[DataSourceType.human], 227 ), 228 ] 229 230 @model_validator(mode="after") 231 def validate_type(self) -> "DataSource": 232 if self.type not in DataSourceType: 233 raise ValueError(f"Invalid data source type: {self.type}") 234 return self 235 236 @model_validator(mode="after") 237 def validate_properties(self) -> "DataSource": 238 for prop in self._data_source_properties: 239 # Check the property type is correct 240 if prop.name in self.properties: 241 if not isinstance(self.properties[prop.name], prop.type): 242 raise ValueError( 243 f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source" 244 ) 245 # Check the property is required for the data source type 246 if self.type in prop.required_for: 247 if prop.name not in self.properties: 248 raise ValueError( 249 f"'{prop.name}' is required for {self.type} data source" 250 ) 251 # Check the property is not allowed for the data source type 252 elif self.type in prop.not_allowed_for and prop.name in self.properties: 253 raise ValueError( 254 f"'{prop.name}' is not allowed for {self.type} data source" 255 ) 256 return self 257 258 @model_validator(mode="after") 259 def validate_no_empty_properties(self) -> Self: 260 for prop, value in self.properties.items(): 261 if isinstance(value, str) and value == "": 262 raise ValueError( 263 f"Property '{prop}' must be a non-empty string for {self.type} data source" 264 ) 265 return self
Represents the origin of data, either human or synthetic, with associated properties.
Properties vary based on the source type - for synthetic sources this includes model information, for human sources this includes creator information.
236 @model_validator(mode="after") 237 def validate_properties(self) -> "DataSource": 238 for prop in self._data_source_properties: 239 # Check the property type is correct 240 if prop.name in self.properties: 241 if not isinstance(self.properties[prop.name], prop.type): 242 raise ValueError( 243 f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source" 244 ) 245 # Check the property is required for the data source type 246 if self.type in prop.required_for: 247 if prop.name not in self.properties: 248 raise ValueError( 249 f"'{prop.name}' is required for {self.type} data source" 250 ) 251 # Check the property is not allowed for the data source type 252 elif self.type in prop.not_allowed_for and prop.name in self.properties: 253 raise ValueError( 254 f"'{prop.name}' is not allowed for {self.type} data source" 255 ) 256 return self
258 @model_validator(mode="after") 259 def validate_no_empty_properties(self) -> Self: 260 for prop, value in self.properties.items(): 261 if isinstance(value, str) and value == "": 262 raise ValueError( 263 f"Property '{prop}' must be a non-empty string for {self.type} data source" 264 ) 265 return self
281def init_private_attributes(self: BaseModel, context: Any, /) -> None: 282 """This function is meant to behave like a BaseModel method to initialise private attributes. 283 284 It takes context as an argument since that's what pydantic-core passes when calling it. 285 286 Args: 287 self: The BaseModel instance. 288 context: The context. 289 """ 290 if getattr(self, '__pydantic_private__', None) is None: 291 pydantic_private = {} 292 for name, private_attr in self.__private_attributes__.items(): 293 default = private_attr.get_default() 294 if default is not PydanticUndefined: 295 pydantic_private[name] = default 296 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Args: self: The BaseModel instance. context: The context.
161class DataSourceType(str, Enum): 162 """ 163 The source of a piece of data. 164 """ 165 166 human = "human" 167 synthetic = "synthetic"
The source of a piece of data.
170class DataSourceProperty(BaseModel): 171 """ 172 Defines a property that can be associated with a data source. 173 174 Includes validation rules for when properties are required or not allowed 175 based on the data source type. 176 """ 177 178 name: str 179 type: Type[Union[str, int, float]] 180 required_for: List[DataSourceType] = [] 181 not_allowed_for: List[DataSourceType] = []
Defines a property that can be associated with a data source.
Includes validation rules for when properties are required or not allowed based on the data source type.
65class TaskOutputRatingType(str, Enum): 66 """Defines the types of rating systems available for task outputs.""" 67 68 five_star = "five_star" 69 custom = "custom"
Defines the types of rating systems available for task outputs.
355class TaskRequirement(BaseModel): 356 """ 357 Defines a specific requirement that should be met by task outputs. 358 359 Includes an identifier, name, description, instruction for meeting the requirement, 360 and priority level. 361 """ 362 363 id: ID_TYPE = ID_FIELD 364 name: str = SHORT_NAME_FIELD 365 description: str | None = Field(default=None) 366 instruction: str = Field(min_length=1) 367 priority: Priority = Field(default=Priority.p2)
Defines a specific requirement that should be met by task outputs.
Includes an identifier, name, description, instruction for meeting the requirement, and priority level.
370class TaskDeterminism(str, Enum): 371 """ 372 Defines how strictly task outputs should match expected results. 373 374 - deterministic: Requires exact matches 375 - semantic_match: Allows different wording with same meaning 376 - flexible: Allows variation in both wording and meaning within requirements 377 """ 378 379 deterministic = "deterministic" # Expect exact match 380 semantic_match = "semantic_match" # Expect same meaning, but flexible on expression of the meaning 381 flexible = "flexible" # Flexible on semantic output. Eval should be custom based on parsing requirements.
Defines how strictly task outputs should match expected results.
- deterministic: Requires exact matches
- semantic_match: Allows different wording with same meaning
- flexible: Allows variation in both wording and meaning within requirements