kiln_ai.datamodel

  1from __future__ import annotations
  2
  3import json
  4from enum import Enum, IntEnum
  5from typing import TYPE_CHECKING, Dict, List, Self, Type, Union
  6
  7import jsonschema
  8import jsonschema.exceptions
  9from pydantic import BaseModel, Field, model_validator
 10
 11from kiln_ai.datamodel.json_schema import JsonObjectSchema, schema_from_json_str
 12
 13from .basemodel import (
 14    ID_FIELD,
 15    ID_TYPE,
 16    KilnBaseModel,
 17    KilnParentedModel,
 18    KilnParentModel,
 19)
 20from .json_schema import validate_schema
 21
 22if TYPE_CHECKING:
 23    from . import Task
 24
 25
 26__all__ = [
 27    "basemodel",
 28    "json_schema",
 29    "Task",
 30    "Project",
 31    "TaskRun",
 32    "TaskOutput",
 33    "TaskOutputRating",
 34    "Priority",
 35    "DataSource",
 36    "DataSourceType",
 37    "DataSourceProperty",
 38    "TaskOutputRatingType",
 39    "TaskRequirement",
 40    "TaskDeterminism",
 41]
 42
 43
 44# Conventions:
 45# 1) Names are filename safe as they may be used as file names. They are informational and not to be used in prompts/training/validation.
 46# 2) Descrptions are for Kiln users to describe/understanding the purpose of this object. They must never be used in prompts/training/validation. Use "instruction/requirements" instead.
 47
 48# Filename compatible names
 49NAME_REGEX = r"^[A-Za-z0-9 _-]+$"
 50NAME_FIELD = Field(min_length=1, max_length=120, pattern=NAME_REGEX)
 51SHORT_NAME_FIELD = Field(min_length=1, max_length=20, pattern=NAME_REGEX)
 52
 53
 54class Priority(IntEnum):
 55    """Defines priority levels for tasks and requirements, where P0 is highest priority."""
 56
 57    p0 = 0
 58    p1 = 1
 59    p2 = 2
 60    p3 = 3
 61
 62
 63# Only one rating type for now, but this allows for extensibility if we want to add more in the future
 64class TaskOutputRatingType(str, Enum):
 65    """Defines the types of rating systems available for task outputs."""
 66
 67    five_star = "five_star"
 68    custom = "custom"
 69
 70
 71class TaskOutputRating(KilnBaseModel):
 72    """
 73    A rating for a task output, including an overall rating and ratings for each requirement.
 74
 75    Only supports five star ratings for now, but extensible for custom values.
 76    """
 77
 78    type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star)
 79    value: float | None = Field(
 80        description="The overall rating value (typically 1-5 stars).",
 81        default=None,
 82    )
 83    requirement_ratings: Dict[ID_TYPE, float] = Field(
 84        default={},
 85        description="The ratings of the requirements of the task. The keys are the ids of the requirements. The values are the ratings (typically 1-5 stars).",
 86    )
 87
 88    # Used to select high quality outputs for example selection (MultiShotPromptBuilder, etc)
 89    def is_high_quality(self) -> bool:
 90        if self.type == TaskOutputRatingType.five_star:
 91            return self.value is not None and self.value >= 4
 92        return False
 93
 94    @model_validator(mode="after")
 95    def validate_rating(self) -> Self:
 96        if self.type not in TaskOutputRatingType:
 97            raise ValueError(f"Invalid rating type: {self.type}")
 98
 99        if self.type == TaskOutputRatingType.five_star:
100            if self.value is not None:
101                self._validate_five_star(self.value, "overall rating")
102            for req_id, req_rating in self.requirement_ratings.items():
103                self._validate_five_star(req_rating, f"requirement rating for {req_id}")
104
105        return self
106
107    def _validate_five_star(self, rating: float, rating_name: str) -> None:
108        if not isinstance(rating, float) or not rating.is_integer():
109            raise ValueError(
110                f"{rating_name.capitalize()} of type five_star must be an integer value (1.0, 2.0, 3.0, 4.0, or 5.0)"
111            )
112        if rating < 1 or rating > 5:
113            raise ValueError(
114                f"{rating_name.capitalize()} of type five_star must be between 1 and 5 stars"
115            )
116
117    def validate_requirement_rating_keys(self, task: Task) -> Self:
118        if len(self.requirement_ratings) == 0:
119            return self
120
121        valid_requirement_ids = {req.id for req in task.requirements}
122        for key in self.requirement_ratings.keys():
123            if key not in valid_requirement_ids:
124                raise ValueError(
125                    f"Requirement ID '{key}' is not a valid requirement ID for this task"
126                )
127        return self
128
129
130class TaskOutput(KilnBaseModel):
131    """
132    An output for a specific task run.
133
134    Contains the actual output content, its source (human or synthetic),
135    and optional rating information.
136    """
137
138    output: str = Field(
139        description="The output of the task. JSON formatted for structured output, plaintext for unstructured output."
140    )
141    source: DataSource = Field(
142        description="The source of the output: human or synthetic."
143    )
144    rating: TaskOutputRating | None = Field(
145        default=None, description="The rating of the output"
146    )
147
148    def validate_output_format(self, task: Task) -> Self:
149        # validate output
150        if task.output_json_schema is not None:
151            try:
152                validate_schema(json.loads(self.output), task.output_json_schema)
153            except json.JSONDecodeError:
154                raise ValueError("Output is not a valid JSON object")
155            except jsonschema.exceptions.ValidationError as e:
156                raise ValueError(f"Output does not match task output schema: {e}")
157        return self
158
159
160class DataSourceType(str, Enum):
161    """
162    The source of a piece of data.
163    """
164
165    human = "human"
166    synthetic = "synthetic"
167
168
169class DataSourceProperty(BaseModel):
170    """
171    Defines a property that can be associated with a data source.
172
173    Includes validation rules for when properties are required or not allowed
174    based on the data source type.
175    """
176
177    name: str
178    type: Type[Union[str, int, float]]
179    required_for: List[DataSourceType] = []
180    not_allowed_for: List[DataSourceType] = []
181
182
183class DataSource(BaseModel):
184    """
185    Represents the origin of data, either human or synthetic, with associated properties.
186
187    Properties vary based on the source type - for synthetic sources this includes
188    model information, for human sources this includes creator information.
189    """
190
191    type: DataSourceType
192    properties: Dict[str, str | int | float] = Field(
193        default={},
194        description="Properties describing the data source. For synthetic things like model. For human, the human's name.",
195    )
196
197    _data_source_properties = [
198        DataSourceProperty(
199            name="created_by",
200            type=str,
201            required_for=[DataSourceType.human],
202            not_allowed_for=[DataSourceType.synthetic],
203        ),
204        DataSourceProperty(
205            name="model_name",
206            type=str,
207            required_for=[DataSourceType.synthetic],
208            not_allowed_for=[DataSourceType.human],
209        ),
210        DataSourceProperty(
211            name="model_provider",
212            type=str,
213            required_for=[DataSourceType.synthetic],
214            not_allowed_for=[DataSourceType.human],
215        ),
216        DataSourceProperty(
217            name="adapter_name",
218            type=str,
219            required_for=[DataSourceType.synthetic],
220            not_allowed_for=[DataSourceType.human],
221        ),
222        DataSourceProperty(
223            name="prompt_builder_name",
224            type=str,
225            not_allowed_for=[DataSourceType.human],
226        ),
227    ]
228
229    @model_validator(mode="after")
230    def validate_type(self) -> "DataSource":
231        if self.type not in DataSourceType:
232            raise ValueError(f"Invalid data source type: {self.type}")
233        return self
234
235    @model_validator(mode="after")
236    def validate_properties(self) -> "DataSource":
237        for prop in self._data_source_properties:
238            # Check the property type is correct
239            if prop.name in self.properties:
240                if not isinstance(self.properties[prop.name], prop.type):
241                    raise ValueError(
242                        f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source"
243                    )
244            # Check the property is required for the data source type
245            if self.type in prop.required_for:
246                if prop.name not in self.properties:
247                    raise ValueError(
248                        f"'{prop.name}' is required for {self.type} data source"
249                    )
250            # Check the property is not allowed for the data source type
251            elif self.type in prop.not_allowed_for and prop.name in self.properties:
252                raise ValueError(
253                    f"'{prop.name}' is not allowed for {self.type} data source"
254                )
255        return self
256
257    @model_validator(mode="after")
258    def validate_no_empty_properties(self) -> Self:
259        for prop, value in self.properties.items():
260            if isinstance(value, str) and value == "":
261                raise ValueError(
262                    f"Property '{prop}' must be a non-empty string for {self.type} data source"
263                )
264        return self
265
266
267class TaskRun(KilnParentedModel):
268    """
269    Represents a single execution of a Task.
270
271    Contains the input used, its source, the output produced, and optional
272    repair information if the output needed correction.
273    """
274
275    input: str = Field(
276        description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input."
277    )
278    input_source: DataSource = Field(
279        description="The source of the input: human or synthetic."
280    )
281
282    output: TaskOutput = Field(description="The output of the task run.")
283    repair_instructions: str | None = Field(
284        default=None,
285        description="Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.",
286    )
287    repaired_output: TaskOutput | None = Field(
288        default=None,
289        description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.",
290    )
291
292    def parent_task(self) -> Task | None:
293        if not isinstance(self.parent, Task):
294            return None
295        return self.parent
296
297    @model_validator(mode="after")
298    def validate_input_format(self) -> Self:
299        task = self.parent_task()
300        if task is None:
301            # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving)
302            return self
303
304        # validate output
305        if task.input_json_schema is not None:
306            try:
307                validate_schema(json.loads(self.input), task.input_json_schema)
308            except json.JSONDecodeError:
309                raise ValueError("Input is not a valid JSON object")
310            except jsonschema.exceptions.ValidationError as e:
311                raise ValueError(f"Input does not match task input schema: {e}")
312        return self
313
314    @model_validator(mode="after")
315    def validate_output_format(self) -> Self:
316        task = self.parent_task()
317        if task is None:
318            return self
319
320        self.output.validate_output_format(task)
321        return self
322
323    @model_validator(mode="after")
324    def validate_requirement_ratings(self) -> Self:
325        task = self.parent_task()
326        if task is None:
327            return self
328
329        if self.output.rating is not None:
330            self.output.rating.validate_requirement_rating_keys(task)
331        if self.repaired_output is not None and self.repaired_output.rating is not None:
332            self.repaired_output.rating.validate_requirement_rating_keys(task)
333
334        return self
335
336    @model_validator(mode="after")
337    def validate_repaired_output(self) -> Self:
338        if self.repaired_output is not None:
339            if self.repaired_output.rating is not None:
340                raise ValueError(
341                    "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed."
342                )
343        if self.repair_instructions is None and self.repaired_output is not None:
344            raise ValueError(
345                "Repair instructions are required if providing a repaired output."
346            )
347        if self.repair_instructions is not None and self.repaired_output is None:
348            raise ValueError(
349                "A repaired output is required if providing repair instructions."
350            )
351        return self
352
353
354class TaskRequirement(BaseModel):
355    """
356    Defines a specific requirement that should be met by task outputs.
357
358    Includes an identifier, name, description, instruction for meeting the requirement,
359    and priority level.
360    """
361
362    id: ID_TYPE = ID_FIELD
363    name: str = SHORT_NAME_FIELD
364    description: str | None = Field(default=None)
365    instruction: str = Field(min_length=1)
366    priority: Priority = Field(default=Priority.p2)
367
368
369class TaskDeterminism(str, Enum):
370    """
371    Defines how strictly task outputs should match expected results.
372
373    - deterministic: Requires exact matches
374    - semantic_match: Allows different wording with same meaning
375    - flexible: Allows variation in both wording and meaning within requirements
376    """
377
378    deterministic = "deterministic"  # Expect exact match
379    semantic_match = "semantic_match"  # Expect same meaning, but flexible on expression of the meaning
380    flexible = "flexible"  # Flexible on semantic output. Eval should be custom based on parsing requirements.
381
382
383class Task(
384    KilnParentedModel,
385    KilnParentModel,
386    parent_of={"runs": TaskRun},
387):
388    """
389    Represents a specific task to be performed, with associated requirements and validation rules.
390
391    Contains the task definition, requirements, input/output schemas, and maintains
392    a collection of task runs.
393    """
394
395    name: str = NAME_FIELD
396    description: str = Field(default="")
397    priority: Priority = Field(default=Priority.p2)
398    determinism: TaskDeterminism = Field(default=TaskDeterminism.flexible)
399    instruction: str = Field(min_length=1)
400    requirements: List[TaskRequirement] = Field(default=[])
401    # TODO: make this required, or formalize the default message output schema
402    output_json_schema: JsonObjectSchema | None = None
403    input_json_schema: JsonObjectSchema | None = None
404
405    def output_schema(self) -> Dict | None:
406        if self.output_json_schema is None:
407            return None
408        return schema_from_json_str(self.output_json_schema)
409
410    def input_schema(self) -> Dict | None:
411        if self.input_json_schema is None:
412            return None
413        return schema_from_json_str(self.input_json_schema)
414
415    # Needed for typechecking. TODO P2: fix this in KilnParentModel
416    def runs(self) -> list[TaskRun]:
417        return super().runs()  # type: ignore
418
419
420class Project(KilnParentModel, parent_of={"tasks": Task}):
421    """
422    A collection of related tasks.
423
424    Projects organize tasks into logical groups and provide high-level descriptions
425    of the overall goals.
426    """
427
428    name: str = NAME_FIELD
429    description: str | None = Field(
430        default=None,
431        description="A description of the project for you and your team. Will not be used in prompts/training/validation.",
432    )
433
434    # Needed for typechecking. TODO P2: fix this in KilnParentModel
435    def tasks(self) -> list[Task]:
436        return super().tasks()  # type: ignore
384class Task(
385    KilnParentedModel,
386    KilnParentModel,
387    parent_of={"runs": TaskRun},
388):
389    """
390    Represents a specific task to be performed, with associated requirements and validation rules.
391
392    Contains the task definition, requirements, input/output schemas, and maintains
393    a collection of task runs.
394    """
395
396    name: str = NAME_FIELD
397    description: str = Field(default="")
398    priority: Priority = Field(default=Priority.p2)
399    determinism: TaskDeterminism = Field(default=TaskDeterminism.flexible)
400    instruction: str = Field(min_length=1)
401    requirements: List[TaskRequirement] = Field(default=[])
402    # TODO: make this required, or formalize the default message output schema
403    output_json_schema: JsonObjectSchema | None = None
404    input_json_schema: JsonObjectSchema | None = None
405
406    def output_schema(self) -> Dict | None:
407        if self.output_json_schema is None:
408            return None
409        return schema_from_json_str(self.output_json_schema)
410
411    def input_schema(self) -> Dict | None:
412        if self.input_json_schema is None:
413            return None
414        return schema_from_json_str(self.input_json_schema)
415
416    # Needed for typechecking. TODO P2: fix this in KilnParentModel
417    def runs(self) -> list[TaskRun]:
418        return super().runs()  # type: ignore

Represents a specific task to be performed, with associated requirements and validation rules.

Contains the task definition, requirements, input/output schemas, and maintains a collection of task runs.

name: str
description: str
priority: Priority
determinism: TaskDeterminism
instruction: str
requirements: List[TaskRequirement]
output_json_schema: Optional[Annotated[str, AfterValidator(func=<function <lambda> at 0x1099a67a0>)]]
input_json_schema: Optional[Annotated[str, AfterValidator(func=<function <lambda> at 0x1099a67a0>)]]
def output_schema(self) -> Optional[Dict]:
406    def output_schema(self) -> Dict | None:
407        if self.output_json_schema is None:
408            return None
409        return schema_from_json_str(self.output_json_schema)
def input_schema(self) -> Optional[Dict]:
411    def input_schema(self) -> Dict | None:
412        if self.input_json_schema is None:
413            return None
414        return schema_from_json_str(self.input_json_schema)
def runs(self) -> List[TaskRun]:
310        def child_method(self) -> list[child_class]:
311            return child_class.all_children_of_parent_path(self.path)
def relationship_name() -> str:
328        def relationship_name_method() -> str:
329            return relationship_name
def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
321        def parent_class_method() -> Type[KilnParentModel]:
322            return cls
model_config = {'validate_assignment': True}
def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
105                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
106                        """We need to both initialize private attributes and call the user-defined model_post_init
107                        method.
108                        """
109                        init_private_attributes(self, context)
110                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

model_fields = {'v': FieldInfo(annotation=int, required=False, default=1), 'id': FieldInfo(annotation=Union[str, NoneType], required=False, default_factory=<lambda>), 'path': FieldInfo(annotation=Union[Path, NoneType], required=False, default=None), 'created_at': FieldInfo(annotation=datetime, required=False, default_factory=builtin_function_or_method), 'created_by': FieldInfo(annotation=str, required=False, default_factory=<lambda>), 'name': FieldInfo(annotation=str, required=True, metadata=[MinLen(min_length=1), MaxLen(max_length=120), _PydanticGeneralMetadata(pattern='^[A-Za-z0-9 _-]+$')]), 'description': FieldInfo(annotation=str, required=False, default=''), 'priority': FieldInfo(annotation=Priority, required=False, default=<Priority.p2: 2>), 'determinism': FieldInfo(annotation=TaskDeterminism, required=False, default=<TaskDeterminism.flexible: 'flexible'>), 'instruction': FieldInfo(annotation=str, required=True, metadata=[MinLen(min_length=1)]), 'requirements': FieldInfo(annotation=List[TaskRequirement], required=False, default=[]), 'output_json_schema': FieldInfo(annotation=Union[Annotated[str, AfterValidator], NoneType], required=False, default=None), 'input_json_schema': FieldInfo(annotation=Union[Annotated[str, AfterValidator], NoneType], required=False, default=None)}
model_computed_fields = {'model_type': ComputedFieldInfo(wrapped_property=<property object>, return_type=<class 'str'>, alias=None, alias_priority=None, title=None, field_title_generator=None, description=None, deprecated=None, examples=None, json_schema_extra=None, repr=True)}
class Project(kiln_ai.datamodel.basemodel.KilnParentModel):
421class Project(KilnParentModel, parent_of={"tasks": Task}):
422    """
423    A collection of related tasks.
424
425    Projects organize tasks into logical groups and provide high-level descriptions
426    of the overall goals.
427    """
428
429    name: str = NAME_FIELD
430    description: str | None = Field(
431        default=None,
432        description="A description of the project for you and your team. Will not be used in prompts/training/validation.",
433    )
434
435    # Needed for typechecking. TODO P2: fix this in KilnParentModel
436    def tasks(self) -> list[Task]:
437        return super().tasks()  # type: ignore

A collection of related tasks.

Projects organize tasks into logical groups and provide high-level descriptions of the overall goals.

name: str
description: str | None
def tasks(self) -> List[Task]:
310        def child_method(self) -> list[child_class]:
311            return child_class.all_children_of_parent_path(self.path)
model_config = {'validate_assignment': True}
model_fields = {'v': FieldInfo(annotation=int, required=False, default=1), 'id': FieldInfo(annotation=Union[str, NoneType], required=False, default_factory=<lambda>), 'path': FieldInfo(annotation=Union[Path, NoneType], required=False, default=None), 'created_at': FieldInfo(annotation=datetime, required=False, default_factory=builtin_function_or_method), 'created_by': FieldInfo(annotation=str, required=False, default_factory=<lambda>), 'name': FieldInfo(annotation=str, required=True, metadata=[MinLen(min_length=1), MaxLen(max_length=120), _PydanticGeneralMetadata(pattern='^[A-Za-z0-9 _-]+$')]), 'description': FieldInfo(annotation=Union[str, NoneType], required=False, default=None, description='A description of the project for you and your team. Will not be used in prompts/training/validation.')}
model_computed_fields = {'model_type': ComputedFieldInfo(wrapped_property=<property object>, return_type=<class 'str'>, alias=None, alias_priority=None, title=None, field_title_generator=None, description=None, deprecated=None, examples=None, json_schema_extra=None, repr=True)}
class TaskRun(kiln_ai.datamodel.basemodel.KilnParentedModel):
268class TaskRun(KilnParentedModel):
269    """
270    Represents a single execution of a Task.
271
272    Contains the input used, its source, the output produced, and optional
273    repair information if the output needed correction.
274    """
275
276    input: str = Field(
277        description="The inputs to the task. JSON formatted for structured input, plaintext for unstructured input."
278    )
279    input_source: DataSource = Field(
280        description="The source of the input: human or synthetic."
281    )
282
283    output: TaskOutput = Field(description="The output of the task run.")
284    repair_instructions: str | None = Field(
285        default=None,
286        description="Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.",
287    )
288    repaired_output: TaskOutput | None = Field(
289        default=None,
290        description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.",
291    )
292
293    def parent_task(self) -> Task | None:
294        if not isinstance(self.parent, Task):
295            return None
296        return self.parent
297
298    @model_validator(mode="after")
299    def validate_input_format(self) -> Self:
300        task = self.parent_task()
301        if task is None:
302            # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving)
303            return self
304
305        # validate output
306        if task.input_json_schema is not None:
307            try:
308                validate_schema(json.loads(self.input), task.input_json_schema)
309            except json.JSONDecodeError:
310                raise ValueError("Input is not a valid JSON object")
311            except jsonschema.exceptions.ValidationError as e:
312                raise ValueError(f"Input does not match task input schema: {e}")
313        return self
314
315    @model_validator(mode="after")
316    def validate_output_format(self) -> Self:
317        task = self.parent_task()
318        if task is None:
319            return self
320
321        self.output.validate_output_format(task)
322        return self
323
324    @model_validator(mode="after")
325    def validate_requirement_ratings(self) -> Self:
326        task = self.parent_task()
327        if task is None:
328            return self
329
330        if self.output.rating is not None:
331            self.output.rating.validate_requirement_rating_keys(task)
332        if self.repaired_output is not None and self.repaired_output.rating is not None:
333            self.repaired_output.rating.validate_requirement_rating_keys(task)
334
335        return self
336
337    @model_validator(mode="after")
338    def validate_repaired_output(self) -> Self:
339        if self.repaired_output is not None:
340            if self.repaired_output.rating is not None:
341                raise ValueError(
342                    "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed."
343                )
344        if self.repair_instructions is None and self.repaired_output is not None:
345            raise ValueError(
346                "Repair instructions are required if providing a repaired output."
347            )
348        if self.repair_instructions is not None and self.repaired_output is None:
349            raise ValueError(
350                "A repaired output is required if providing repair instructions."
351            )
352        return self

Represents a single execution of a Task.

Contains the input used, its source, the output produced, and optional repair information if the output needed correction.

input: str
input_source: DataSource
output: TaskOutput
repair_instructions: str | None
repaired_output: TaskOutput | None
def parent_task(self) -> Task | None:
293    def parent_task(self) -> Task | None:
294        if not isinstance(self.parent, Task):
295            return None
296        return self.parent
@model_validator(mode='after')
def validate_input_format(self) -> Self:
298    @model_validator(mode="after")
299    def validate_input_format(self) -> Self:
300        task = self.parent_task()
301        if task is None:
302            # don't validate this relationship until we have a path or parent. Give them time to build it (but will catch it before saving)
303            return self
304
305        # validate output
306        if task.input_json_schema is not None:
307            try:
308                validate_schema(json.loads(self.input), task.input_json_schema)
309            except json.JSONDecodeError:
310                raise ValueError("Input is not a valid JSON object")
311            except jsonschema.exceptions.ValidationError as e:
312                raise ValueError(f"Input does not match task input schema: {e}")
313        return self
@model_validator(mode='after')
def validate_output_format(self) -> Self:
315    @model_validator(mode="after")
316    def validate_output_format(self) -> Self:
317        task = self.parent_task()
318        if task is None:
319            return self
320
321        self.output.validate_output_format(task)
322        return self
@model_validator(mode='after')
def validate_requirement_ratings(self) -> Self:
324    @model_validator(mode="after")
325    def validate_requirement_ratings(self) -> Self:
326        task = self.parent_task()
327        if task is None:
328            return self
329
330        if self.output.rating is not None:
331            self.output.rating.validate_requirement_rating_keys(task)
332        if self.repaired_output is not None and self.repaired_output.rating is not None:
333            self.repaired_output.rating.validate_requirement_rating_keys(task)
334
335        return self
@model_validator(mode='after')
def validate_repaired_output(self) -> Self:
337    @model_validator(mode="after")
338    def validate_repaired_output(self) -> Self:
339        if self.repaired_output is not None:
340            if self.repaired_output.rating is not None:
341                raise ValueError(
342                    "Repaired output rating must be None. Repaired outputs are assumed to have a perfect rating, as they have been fixed."
343                )
344        if self.repair_instructions is None and self.repaired_output is not None:
345            raise ValueError(
346                "Repair instructions are required if providing a repaired output."
347            )
348        if self.repair_instructions is not None and self.repaired_output is None:
349            raise ValueError(
350                "A repaired output is required if providing repair instructions."
351            )
352        return self
def relationship_name() -> str:
328        def relationship_name_method() -> str:
329            return relationship_name
def parent_type() -> Type[kiln_ai.datamodel.basemodel.KilnParentModel]:
321        def parent_class_method() -> Type[KilnParentModel]:
322            return cls
model_config = {'validate_assignment': True}
def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
105                    def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None:
106                        """We need to both initialize private attributes and call the user-defined model_post_init
107                        method.
108                        """
109                        init_private_attributes(self, context)
110                        original_model_post_init(self, context)

We need to both initialize private attributes and call the user-defined model_post_init method.

model_fields = {'v': FieldInfo(annotation=int, required=False, default=1), 'id': FieldInfo(annotation=Union[str, NoneType], required=False, default_factory=<lambda>), 'path': FieldInfo(annotation=Union[Path, NoneType], required=False, default=None), 'created_at': FieldInfo(annotation=datetime, required=False, default_factory=builtin_function_or_method), 'created_by': FieldInfo(annotation=str, required=False, default_factory=<lambda>), 'input': FieldInfo(annotation=str, required=True, description='The inputs to the task. JSON formatted for structured input, plaintext for unstructured input.'), 'input_source': FieldInfo(annotation=DataSource, required=True, description='The source of the input: human or synthetic.'), 'output': FieldInfo(annotation=TaskOutput, required=True, description='The output of the task run.'), 'repair_instructions': FieldInfo(annotation=Union[str, NoneType], required=False, default=None, description='Instructions for fixing the output. Should define what is wrong, and how to fix it. Will be used by models for both generating a fixed output, and evaluating future models.'), 'repaired_output': FieldInfo(annotation=Union[TaskOutput, NoneType], required=False, default=None, description="An version of the output with issues fixed. This must be a 'fixed' version of the existing output, and not an entirely new output. If you wish to generate an ideal curatorial output for this task unrelated to this output, generate a new TaskOutput with type 'human' instead of using this field.")}
model_computed_fields = {'model_type': ComputedFieldInfo(wrapped_property=<property object>, return_type=<class 'str'>, alias=None, alias_priority=None, title=None, field_title_generator=None, description=None, deprecated=None, examples=None, json_schema_extra=None, repr=True)}
class TaskOutput(kiln_ai.datamodel.basemodel.KilnBaseModel):
131class TaskOutput(KilnBaseModel):
132    """
133    An output for a specific task run.
134
135    Contains the actual output content, its source (human or synthetic),
136    and optional rating information.
137    """
138
139    output: str = Field(
140        description="The output of the task. JSON formatted for structured output, plaintext for unstructured output."
141    )
142    source: DataSource = Field(
143        description="The source of the output: human or synthetic."
144    )
145    rating: TaskOutputRating | None = Field(
146        default=None, description="The rating of the output"
147    )
148
149    def validate_output_format(self, task: Task) -> Self:
150        # validate output
151        if task.output_json_schema is not None:
152            try:
153                validate_schema(json.loads(self.output), task.output_json_schema)
154            except json.JSONDecodeError:
155                raise ValueError("Output is not a valid JSON object")
156            except jsonschema.exceptions.ValidationError as e:
157                raise ValueError(f"Output does not match task output schema: {e}")
158        return self

An output for a specific task run.

Contains the actual output content, its source (human or synthetic), and optional rating information.

output: str
source: DataSource
rating: TaskOutputRating | None
def validate_output_format(self, task: Task) -> Self:
149    def validate_output_format(self, task: Task) -> Self:
150        # validate output
151        if task.output_json_schema is not None:
152            try:
153                validate_schema(json.loads(self.output), task.output_json_schema)
154            except json.JSONDecodeError:
155                raise ValueError("Output is not a valid JSON object")
156            except jsonschema.exceptions.ValidationError as e:
157                raise ValueError(f"Output does not match task output schema: {e}")
158        return self
model_config = {'validate_assignment': True}
model_fields = {'v': FieldInfo(annotation=int, required=False, default=1), 'id': FieldInfo(annotation=Union[str, NoneType], required=False, default_factory=<lambda>), 'path': FieldInfo(annotation=Union[Path, NoneType], required=False, default=None), 'created_at': FieldInfo(annotation=datetime, required=False, default_factory=builtin_function_or_method), 'created_by': FieldInfo(annotation=str, required=False, default_factory=<lambda>), 'output': FieldInfo(annotation=str, required=True, description='The output of the task. JSON formatted for structured output, plaintext for unstructured output.'), 'source': FieldInfo(annotation=DataSource, required=True, description='The source of the output: human or synthetic.'), 'rating': FieldInfo(annotation=Union[TaskOutputRating, NoneType], required=False, default=None, description='The rating of the output')}
model_computed_fields = {'model_type': ComputedFieldInfo(wrapped_property=<property object>, return_type=<class 'str'>, alias=None, alias_priority=None, title=None, field_title_generator=None, description=None, deprecated=None, examples=None, json_schema_extra=None, repr=True)}
class TaskOutputRating(kiln_ai.datamodel.basemodel.KilnBaseModel):
 72class TaskOutputRating(KilnBaseModel):
 73    """
 74    A rating for a task output, including an overall rating and ratings for each requirement.
 75
 76    Only supports five star ratings for now, but extensible for custom values.
 77    """
 78
 79    type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star)
 80    value: float | None = Field(
 81        description="The overall rating value (typically 1-5 stars).",
 82        default=None,
 83    )
 84    requirement_ratings: Dict[ID_TYPE, float] = Field(
 85        default={},
 86        description="The ratings of the requirements of the task. The keys are the ids of the requirements. The values are the ratings (typically 1-5 stars).",
 87    )
 88
 89    # Used to select high quality outputs for example selection (MultiShotPromptBuilder, etc)
 90    def is_high_quality(self) -> bool:
 91        if self.type == TaskOutputRatingType.five_star:
 92            return self.value is not None and self.value >= 4
 93        return False
 94
 95    @model_validator(mode="after")
 96    def validate_rating(self) -> Self:
 97        if self.type not in TaskOutputRatingType:
 98            raise ValueError(f"Invalid rating type: {self.type}")
 99
100        if self.type == TaskOutputRatingType.five_star:
101            if self.value is not None:
102                self._validate_five_star(self.value, "overall rating")
103            for req_id, req_rating in self.requirement_ratings.items():
104                self._validate_five_star(req_rating, f"requirement rating for {req_id}")
105
106        return self
107
108    def _validate_five_star(self, rating: float, rating_name: str) -> None:
109        if not isinstance(rating, float) or not rating.is_integer():
110            raise ValueError(
111                f"{rating_name.capitalize()} of type five_star must be an integer value (1.0, 2.0, 3.0, 4.0, or 5.0)"
112            )
113        if rating < 1 or rating > 5:
114            raise ValueError(
115                f"{rating_name.capitalize()} of type five_star must be between 1 and 5 stars"
116            )
117
118    def validate_requirement_rating_keys(self, task: Task) -> Self:
119        if len(self.requirement_ratings) == 0:
120            return self
121
122        valid_requirement_ids = {req.id for req in task.requirements}
123        for key in self.requirement_ratings.keys():
124            if key not in valid_requirement_ids:
125                raise ValueError(
126                    f"Requirement ID '{key}' is not a valid requirement ID for this task"
127                )
128        return self

A rating for a task output, including an overall rating and ratings for each requirement.

Only supports five star ratings for now, but extensible for custom values.

value: float | None
requirement_ratings: Dict[Optional[str], float]
def is_high_quality(self) -> bool:
90    def is_high_quality(self) -> bool:
91        if self.type == TaskOutputRatingType.five_star:
92            return self.value is not None and self.value >= 4
93        return False
@model_validator(mode='after')
def validate_rating(self) -> Self:
 95    @model_validator(mode="after")
 96    def validate_rating(self) -> Self:
 97        if self.type not in TaskOutputRatingType:
 98            raise ValueError(f"Invalid rating type: {self.type}")
 99
100        if self.type == TaskOutputRatingType.five_star:
101            if self.value is not None:
102                self._validate_five_star(self.value, "overall rating")
103            for req_id, req_rating in self.requirement_ratings.items():
104                self._validate_five_star(req_rating, f"requirement rating for {req_id}")
105
106        return self
def validate_requirement_rating_keys(self, task: Task) -> Self:
118    def validate_requirement_rating_keys(self, task: Task) -> Self:
119        if len(self.requirement_ratings) == 0:
120            return self
121
122        valid_requirement_ids = {req.id for req in task.requirements}
123        for key in self.requirement_ratings.keys():
124            if key not in valid_requirement_ids:
125                raise ValueError(
126                    f"Requirement ID '{key}' is not a valid requirement ID for this task"
127                )
128        return self
model_config = {'validate_assignment': True}
model_fields = {'v': FieldInfo(annotation=int, required=False, default=1), 'id': FieldInfo(annotation=Union[str, NoneType], required=False, default_factory=<lambda>), 'path': FieldInfo(annotation=Union[Path, NoneType], required=False, default=None), 'created_at': FieldInfo(annotation=datetime, required=False, default_factory=builtin_function_or_method), 'created_by': FieldInfo(annotation=str, required=False, default_factory=<lambda>), 'type': FieldInfo(annotation=TaskOutputRatingType, required=False, default=<TaskOutputRatingType.five_star: 'five_star'>), 'value': FieldInfo(annotation=Union[float, NoneType], required=False, default=None, description='The overall rating value (typically 1-5 stars).'), 'requirement_ratings': FieldInfo(annotation=Dict[Union[str, NoneType], float], required=False, default={}, description='The ratings of the requirements of the task. The keys are the ids of the requirements. The values are the ratings (typically 1-5 stars).')}
model_computed_fields = {'model_type': ComputedFieldInfo(wrapped_property=<property object>, return_type=<class 'str'>, alias=None, alias_priority=None, title=None, field_title_generator=None, description=None, deprecated=None, examples=None, json_schema_extra=None, repr=True)}
class Priority(enum.IntEnum):
55class Priority(IntEnum):
56    """Defines priority levels for tasks and requirements, where P0 is highest priority."""
57
58    p0 = 0
59    p1 = 1
60    p2 = 2
61    p3 = 3

Defines priority levels for tasks and requirements, where P0 is highest priority.

p0 = <Priority.p0: 0>
p1 = <Priority.p1: 1>
p2 = <Priority.p2: 2>
p3 = <Priority.p3: 3>
class DataSource(pydantic.main.BaseModel):
184class DataSource(BaseModel):
185    """
186    Represents the origin of data, either human or synthetic, with associated properties.
187
188    Properties vary based on the source type - for synthetic sources this includes
189    model information, for human sources this includes creator information.
190    """
191
192    type: DataSourceType
193    properties: Dict[str, str | int | float] = Field(
194        default={},
195        description="Properties describing the data source. For synthetic things like model. For human, the human's name.",
196    )
197
198    _data_source_properties = [
199        DataSourceProperty(
200            name="created_by",
201            type=str,
202            required_for=[DataSourceType.human],
203            not_allowed_for=[DataSourceType.synthetic],
204        ),
205        DataSourceProperty(
206            name="model_name",
207            type=str,
208            required_for=[DataSourceType.synthetic],
209            not_allowed_for=[DataSourceType.human],
210        ),
211        DataSourceProperty(
212            name="model_provider",
213            type=str,
214            required_for=[DataSourceType.synthetic],
215            not_allowed_for=[DataSourceType.human],
216        ),
217        DataSourceProperty(
218            name="adapter_name",
219            type=str,
220            required_for=[DataSourceType.synthetic],
221            not_allowed_for=[DataSourceType.human],
222        ),
223        DataSourceProperty(
224            name="prompt_builder_name",
225            type=str,
226            not_allowed_for=[DataSourceType.human],
227        ),
228    ]
229
230    @model_validator(mode="after")
231    def validate_type(self) -> "DataSource":
232        if self.type not in DataSourceType:
233            raise ValueError(f"Invalid data source type: {self.type}")
234        return self
235
236    @model_validator(mode="after")
237    def validate_properties(self) -> "DataSource":
238        for prop in self._data_source_properties:
239            # Check the property type is correct
240            if prop.name in self.properties:
241                if not isinstance(self.properties[prop.name], prop.type):
242                    raise ValueError(
243                        f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source"
244                    )
245            # Check the property is required for the data source type
246            if self.type in prop.required_for:
247                if prop.name not in self.properties:
248                    raise ValueError(
249                        f"'{prop.name}' is required for {self.type} data source"
250                    )
251            # Check the property is not allowed for the data source type
252            elif self.type in prop.not_allowed_for and prop.name in self.properties:
253                raise ValueError(
254                    f"'{prop.name}' is not allowed for {self.type} data source"
255                )
256        return self
257
258    @model_validator(mode="after")
259    def validate_no_empty_properties(self) -> Self:
260        for prop, value in self.properties.items():
261            if isinstance(value, str) and value == "":
262                raise ValueError(
263                    f"Property '{prop}' must be a non-empty string for {self.type} data source"
264                )
265        return self

Represents the origin of data, either human or synthetic, with associated properties.

Properties vary based on the source type - for synthetic sources this includes model information, for human sources this includes creator information.

properties: Dict[str, str | int | float]
@model_validator(mode='after')
def validate_type(self) -> DataSource:
230    @model_validator(mode="after")
231    def validate_type(self) -> "DataSource":
232        if self.type not in DataSourceType:
233            raise ValueError(f"Invalid data source type: {self.type}")
234        return self
@model_validator(mode='after')
def validate_properties(self) -> DataSource:
236    @model_validator(mode="after")
237    def validate_properties(self) -> "DataSource":
238        for prop in self._data_source_properties:
239            # Check the property type is correct
240            if prop.name in self.properties:
241                if not isinstance(self.properties[prop.name], prop.type):
242                    raise ValueError(
243                        f"'{prop.name}' must be of type {prop.type.__name__} for {self.type} data source"
244                    )
245            # Check the property is required for the data source type
246            if self.type in prop.required_for:
247                if prop.name not in self.properties:
248                    raise ValueError(
249                        f"'{prop.name}' is required for {self.type} data source"
250                    )
251            # Check the property is not allowed for the data source type
252            elif self.type in prop.not_allowed_for and prop.name in self.properties:
253                raise ValueError(
254                    f"'{prop.name}' is not allowed for {self.type} data source"
255                )
256        return self
@model_validator(mode='after')
def validate_no_empty_properties(self) -> Self:
258    @model_validator(mode="after")
259    def validate_no_empty_properties(self) -> Self:
260        for prop, value in self.properties.items():
261            if isinstance(value, str) and value == "":
262                raise ValueError(
263                    f"Property '{prop}' must be a non-empty string for {self.type} data source"
264                )
265        return self
model_config = {}
def model_post_init(self: pydantic.main.BaseModel, context: Any, /) -> None:
281def init_private_attributes(self: BaseModel, context: Any, /) -> None:
282    """This function is meant to behave like a BaseModel method to initialise private attributes.
283
284    It takes context as an argument since that's what pydantic-core passes when calling it.
285
286    Args:
287        self: The BaseModel instance.
288        context: The context.
289    """
290    if getattr(self, '__pydantic_private__', None) is None:
291        pydantic_private = {}
292        for name, private_attr in self.__private_attributes__.items():
293            default = private_attr.get_default()
294            if default is not PydanticUndefined:
295                pydantic_private[name] = default
296        object_setattr(self, '__pydantic_private__', pydantic_private)

This function is meant to behave like a BaseModel method to initialise private attributes.

It takes context as an argument since that's what pydantic-core passes when calling it.

Args: self: The BaseModel instance. context: The context.

model_fields = {'type': FieldInfo(annotation=DataSourceType, required=True), 'properties': FieldInfo(annotation=Dict[str, Union[str, int, float]], required=False, default={}, description="Properties describing the data source. For synthetic things like model. For human, the human's name.")}
model_computed_fields = {}
class DataSourceType(builtins.str, enum.Enum):
161class DataSourceType(str, Enum):
162    """
163    The source of a piece of data.
164    """
165
166    human = "human"
167    synthetic = "synthetic"

The source of a piece of data.

human = <DataSourceType.human: 'human'>
synthetic = <DataSourceType.synthetic: 'synthetic'>
class DataSourceProperty(pydantic.main.BaseModel):
170class DataSourceProperty(BaseModel):
171    """
172    Defines a property that can be associated with a data source.
173
174    Includes validation rules for when properties are required or not allowed
175    based on the data source type.
176    """
177
178    name: str
179    type: Type[Union[str, int, float]]
180    required_for: List[DataSourceType] = []
181    not_allowed_for: List[DataSourceType] = []

Defines a property that can be associated with a data source.

Includes validation rules for when properties are required or not allowed based on the data source type.

name: str
type: Type[Union[str, int, float]]
required_for: List[DataSourceType]
not_allowed_for: List[DataSourceType]
model_config = {}
model_fields = {'name': FieldInfo(annotation=str, required=True), 'type': FieldInfo(annotation=Type[Union[str, int, float]], required=True), 'required_for': FieldInfo(annotation=List[DataSourceType], required=False, default=[]), 'not_allowed_for': FieldInfo(annotation=List[DataSourceType], required=False, default=[])}
model_computed_fields = {}
class TaskOutputRatingType(builtins.str, enum.Enum):
65class TaskOutputRatingType(str, Enum):
66    """Defines the types of rating systems available for task outputs."""
67
68    five_star = "five_star"
69    custom = "custom"

Defines the types of rating systems available for task outputs.

five_star = <TaskOutputRatingType.five_star: 'five_star'>
custom = <TaskOutputRatingType.custom: 'custom'>
class TaskRequirement(pydantic.main.BaseModel):
355class TaskRequirement(BaseModel):
356    """
357    Defines a specific requirement that should be met by task outputs.
358
359    Includes an identifier, name, description, instruction for meeting the requirement,
360    and priority level.
361    """
362
363    id: ID_TYPE = ID_FIELD
364    name: str = SHORT_NAME_FIELD
365    description: str | None = Field(default=None)
366    instruction: str = Field(min_length=1)
367    priority: Priority = Field(default=Priority.p2)

Defines a specific requirement that should be met by task outputs.

Includes an identifier, name, description, instruction for meeting the requirement, and priority level.

id: Optional[str]
name: str
description: str | None
instruction: str
priority: Priority
model_config = {}
model_fields = {'id': FieldInfo(annotation=Union[str, NoneType], required=False, default_factory=<lambda>), 'name': FieldInfo(annotation=str, required=True, metadata=[MinLen(min_length=1), MaxLen(max_length=20), _PydanticGeneralMetadata(pattern='^[A-Za-z0-9 _-]+$')]), 'description': FieldInfo(annotation=Union[str, NoneType], required=False, default=None), 'instruction': FieldInfo(annotation=str, required=True, metadata=[MinLen(min_length=1)]), 'priority': FieldInfo(annotation=Priority, required=False, default=<Priority.p2: 2>)}
model_computed_fields = {}
class TaskDeterminism(builtins.str, enum.Enum):
370class TaskDeterminism(str, Enum):
371    """
372    Defines how strictly task outputs should match expected results.
373
374    - deterministic: Requires exact matches
375    - semantic_match: Allows different wording with same meaning
376    - flexible: Allows variation in both wording and meaning within requirements
377    """
378
379    deterministic = "deterministic"  # Expect exact match
380    semantic_match = "semantic_match"  # Expect same meaning, but flexible on expression of the meaning
381    flexible = "flexible"  # Flexible on semantic output. Eval should be custom based on parsing requirements.

Defines how strictly task outputs should match expected results.

  • deterministic: Requires exact matches
  • semantic_match: Allows different wording with same meaning
  • flexible: Allows variation in both wording and meaning within requirements
deterministic = <TaskDeterminism.deterministic: 'deterministic'>
semantic_match = <TaskDeterminism.semantic_match: 'semantic_match'>
flexible = <TaskDeterminism.flexible: 'flexible'>