kiln_ai.adapters.base_adapter

  1import json
  2from abc import ABCMeta, abstractmethod
  3from dataclasses import dataclass
  4from typing import Dict
  5
  6from kiln_ai.datamodel import (
  7    DataSource,
  8    DataSourceType,
  9    Task,
 10    TaskOutput,
 11    TaskRun,
 12)
 13from kiln_ai.datamodel.json_schema import validate_schema
 14from kiln_ai.utils.config import Config
 15
 16from .prompt_builders import BasePromptBuilder, SimplePromptBuilder
 17
 18
 19@dataclass
 20class AdapterInfo:
 21    adapter_name: str
 22    model_name: str
 23    model_provider: str
 24    prompt_builder_name: str
 25    prompt_id: str | None = None
 26
 27
 28@dataclass
 29class RunOutput:
 30    output: Dict | str
 31    intermediate_outputs: Dict[str, str] | None
 32
 33
 34class BaseAdapter(metaclass=ABCMeta):
 35    """Base class for AI model adapters that handle task execution.
 36
 37    This abstract class provides the foundation for implementing model-specific adapters
 38    that can process tasks with structured or unstructured inputs/outputs. It handles
 39    input/output validation, prompt building, and run tracking.
 40
 41    Attributes:
 42        prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model
 43        kiln_task (Task): The task configuration and metadata
 44        output_schema (dict | None): JSON schema for validating structured outputs
 45        input_schema (dict | None): JSON schema for validating structured inputs
 46    """
 47
 48    def __init__(
 49        self,
 50        kiln_task: Task,
 51        prompt_builder: BasePromptBuilder | None = None,
 52        tags: list[str] | None = None,
 53    ):
 54        self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task)
 55        self.kiln_task = kiln_task
 56        self.output_schema = self.kiln_task.output_json_schema
 57        self.input_schema = self.kiln_task.input_json_schema
 58        self.default_tags = tags
 59
 60    async def invoke_returning_raw(
 61        self,
 62        input: Dict | str,
 63        input_source: DataSource | None = None,
 64    ) -> Dict | str:
 65        result = await self.invoke(input, input_source)
 66        if self.kiln_task.output_json_schema is None:
 67            return result.output.output
 68        else:
 69            return json.loads(result.output.output)
 70
 71    async def invoke(
 72        self,
 73        input: Dict | str,
 74        input_source: DataSource | None = None,
 75    ) -> TaskRun:
 76        # validate input
 77        if self.input_schema is not None:
 78            if not isinstance(input, dict):
 79                raise ValueError(f"structured input is not a dict: {input}")
 80            validate_schema(input, self.input_schema)
 81
 82        # Run
 83        run_output = await self._run(input)
 84
 85        # validate output
 86        if self.output_schema is not None:
 87            if not isinstance(run_output.output, dict):
 88                raise RuntimeError(
 89                    f"structured response is not a dict: {run_output.output}"
 90                )
 91            validate_schema(run_output.output, self.output_schema)
 92        else:
 93            if not isinstance(run_output.output, str):
 94                raise RuntimeError(
 95                    f"response is not a string for non-structured task: {run_output.output}"
 96                )
 97
 98        # Generate the run and output
 99        run = self.generate_run(input, input_source, run_output)
100
101        # Save the run if configured to do so, and we have a path to save to
102        if Config.shared().autosave_runs and self.kiln_task.path is not None:
103            run.save_to_file()
104        else:
105            # Clear the ID to indicate it's not persisted
106            run.id = None
107
108        return run
109
110    def has_structured_output(self) -> bool:
111        return self.output_schema is not None
112
113    @abstractmethod
114    def adapter_info(self) -> AdapterInfo:
115        pass
116
117    @abstractmethod
118    async def _run(self, input: Dict | str) -> RunOutput:
119        pass
120
121    def build_prompt(self) -> str:
122        return self.prompt_builder.build_prompt()
123
124    # create a run and task output
125    def generate_run(
126        self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput
127    ) -> TaskRun:
128        # Convert input and output to JSON strings if they are dictionaries
129        input_str = json.dumps(input) if isinstance(input, dict) else input
130        output_str = (
131            json.dumps(run_output.output)
132            if isinstance(run_output.output, dict)
133            else run_output.output
134        )
135
136        # If no input source is provided, use the human data source
137        if input_source is None:
138            input_source = DataSource(
139                type=DataSourceType.human,
140                properties={"created_by": Config.shared().user_id},
141            )
142
143        new_task_run = TaskRun(
144            parent=self.kiln_task,
145            input=input_str,
146            input_source=input_source,
147            output=TaskOutput(
148                output=output_str,
149                # Synthetic since an adapter, not a human, is creating this
150                source=DataSource(
151                    type=DataSourceType.synthetic,
152                    properties=self._properties_for_task_output(),
153                ),
154            ),
155            intermediate_outputs=run_output.intermediate_outputs,
156            tags=self.default_tags or [],
157        )
158
159        return new_task_run
160
161    def _properties_for_task_output(self) -> Dict[str, str | int | float]:
162        props = {}
163
164        # adapter info
165        adapter_info = self.adapter_info()
166        props["adapter_name"] = adapter_info.adapter_name
167        props["model_name"] = adapter_info.model_name
168        props["model_provider"] = adapter_info.model_provider
169        props["prompt_builder_name"] = adapter_info.prompt_builder_name
170        if adapter_info.prompt_id is not None:
171            props["prompt_id"] = adapter_info.prompt_id
172
173        return props
@dataclass
class AdapterInfo:
20@dataclass
21class AdapterInfo:
22    adapter_name: str
23    model_name: str
24    model_provider: str
25    prompt_builder_name: str
26    prompt_id: str | None = None
AdapterInfo( adapter_name: str, model_name: str, model_provider: str, prompt_builder_name: str, prompt_id: str | None = None)
adapter_name: str
model_name: str
model_provider: str
prompt_builder_name: str
prompt_id: str | None = None
@dataclass
class RunOutput:
29@dataclass
30class RunOutput:
31    output: Dict | str
32    intermediate_outputs: Dict[str, str] | None
RunOutput( output: Union[Dict, str], intermediate_outputs: Optional[Dict[str, str]])
output: Union[Dict, str]
intermediate_outputs: Optional[Dict[str, str]]
class BaseAdapter:
 35class BaseAdapter(metaclass=ABCMeta):
 36    """Base class for AI model adapters that handle task execution.
 37
 38    This abstract class provides the foundation for implementing model-specific adapters
 39    that can process tasks with structured or unstructured inputs/outputs. It handles
 40    input/output validation, prompt building, and run tracking.
 41
 42    Attributes:
 43        prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model
 44        kiln_task (Task): The task configuration and metadata
 45        output_schema (dict | None): JSON schema for validating structured outputs
 46        input_schema (dict | None): JSON schema for validating structured inputs
 47    """
 48
 49    def __init__(
 50        self,
 51        kiln_task: Task,
 52        prompt_builder: BasePromptBuilder | None = None,
 53        tags: list[str] | None = None,
 54    ):
 55        self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task)
 56        self.kiln_task = kiln_task
 57        self.output_schema = self.kiln_task.output_json_schema
 58        self.input_schema = self.kiln_task.input_json_schema
 59        self.default_tags = tags
 60
 61    async def invoke_returning_raw(
 62        self,
 63        input: Dict | str,
 64        input_source: DataSource | None = None,
 65    ) -> Dict | str:
 66        result = await self.invoke(input, input_source)
 67        if self.kiln_task.output_json_schema is None:
 68            return result.output.output
 69        else:
 70            return json.loads(result.output.output)
 71
 72    async def invoke(
 73        self,
 74        input: Dict | str,
 75        input_source: DataSource | None = None,
 76    ) -> TaskRun:
 77        # validate input
 78        if self.input_schema is not None:
 79            if not isinstance(input, dict):
 80                raise ValueError(f"structured input is not a dict: {input}")
 81            validate_schema(input, self.input_schema)
 82
 83        # Run
 84        run_output = await self._run(input)
 85
 86        # validate output
 87        if self.output_schema is not None:
 88            if not isinstance(run_output.output, dict):
 89                raise RuntimeError(
 90                    f"structured response is not a dict: {run_output.output}"
 91                )
 92            validate_schema(run_output.output, self.output_schema)
 93        else:
 94            if not isinstance(run_output.output, str):
 95                raise RuntimeError(
 96                    f"response is not a string for non-structured task: {run_output.output}"
 97                )
 98
 99        # Generate the run and output
100        run = self.generate_run(input, input_source, run_output)
101
102        # Save the run if configured to do so, and we have a path to save to
103        if Config.shared().autosave_runs and self.kiln_task.path is not None:
104            run.save_to_file()
105        else:
106            # Clear the ID to indicate it's not persisted
107            run.id = None
108
109        return run
110
111    def has_structured_output(self) -> bool:
112        return self.output_schema is not None
113
114    @abstractmethod
115    def adapter_info(self) -> AdapterInfo:
116        pass
117
118    @abstractmethod
119    async def _run(self, input: Dict | str) -> RunOutput:
120        pass
121
122    def build_prompt(self) -> str:
123        return self.prompt_builder.build_prompt()
124
125    # create a run and task output
126    def generate_run(
127        self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput
128    ) -> TaskRun:
129        # Convert input and output to JSON strings if they are dictionaries
130        input_str = json.dumps(input) if isinstance(input, dict) else input
131        output_str = (
132            json.dumps(run_output.output)
133            if isinstance(run_output.output, dict)
134            else run_output.output
135        )
136
137        # If no input source is provided, use the human data source
138        if input_source is None:
139            input_source = DataSource(
140                type=DataSourceType.human,
141                properties={"created_by": Config.shared().user_id},
142            )
143
144        new_task_run = TaskRun(
145            parent=self.kiln_task,
146            input=input_str,
147            input_source=input_source,
148            output=TaskOutput(
149                output=output_str,
150                # Synthetic since an adapter, not a human, is creating this
151                source=DataSource(
152                    type=DataSourceType.synthetic,
153                    properties=self._properties_for_task_output(),
154                ),
155            ),
156            intermediate_outputs=run_output.intermediate_outputs,
157            tags=self.default_tags or [],
158        )
159
160        return new_task_run
161
162    def _properties_for_task_output(self) -> Dict[str, str | int | float]:
163        props = {}
164
165        # adapter info
166        adapter_info = self.adapter_info()
167        props["adapter_name"] = adapter_info.adapter_name
168        props["model_name"] = adapter_info.model_name
169        props["model_provider"] = adapter_info.model_provider
170        props["prompt_builder_name"] = adapter_info.prompt_builder_name
171        if adapter_info.prompt_id is not None:
172            props["prompt_id"] = adapter_info.prompt_id
173
174        return props

Base class for AI model adapters that handle task execution.

This abstract class provides the foundation for implementing model-specific adapters that can process tasks with structured or unstructured inputs/outputs. It handles input/output validation, prompt building, and run tracking.

Attributes: prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model kiln_task (Task): The task configuration and metadata output_schema (dict | None): JSON schema for validating structured outputs input_schema (dict | None): JSON schema for validating structured inputs

prompt_builder
kiln_task
output_schema
input_schema
default_tags
async def invoke_returning_raw( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> Union[Dict, str]:
61    async def invoke_returning_raw(
62        self,
63        input: Dict | str,
64        input_source: DataSource | None = None,
65    ) -> Dict | str:
66        result = await self.invoke(input, input_source)
67        if self.kiln_task.output_json_schema is None:
68            return result.output.output
69        else:
70            return json.loads(result.output.output)
async def invoke( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> kiln_ai.datamodel.TaskRun:
 72    async def invoke(
 73        self,
 74        input: Dict | str,
 75        input_source: DataSource | None = None,
 76    ) -> TaskRun:
 77        # validate input
 78        if self.input_schema is not None:
 79            if not isinstance(input, dict):
 80                raise ValueError(f"structured input is not a dict: {input}")
 81            validate_schema(input, self.input_schema)
 82
 83        # Run
 84        run_output = await self._run(input)
 85
 86        # validate output
 87        if self.output_schema is not None:
 88            if not isinstance(run_output.output, dict):
 89                raise RuntimeError(
 90                    f"structured response is not a dict: {run_output.output}"
 91                )
 92            validate_schema(run_output.output, self.output_schema)
 93        else:
 94            if not isinstance(run_output.output, str):
 95                raise RuntimeError(
 96                    f"response is not a string for non-structured task: {run_output.output}"
 97                )
 98
 99        # Generate the run and output
100        run = self.generate_run(input, input_source, run_output)
101
102        # Save the run if configured to do so, and we have a path to save to
103        if Config.shared().autosave_runs and self.kiln_task.path is not None:
104            run.save_to_file()
105        else:
106            # Clear the ID to indicate it's not persisted
107            run.id = None
108
109        return run
def has_structured_output(self) -> bool:
111    def has_structured_output(self) -> bool:
112        return self.output_schema is not None
@abstractmethod
def adapter_info(self) -> AdapterInfo:
114    @abstractmethod
115    def adapter_info(self) -> AdapterInfo:
116        pass
def build_prompt(self) -> str:
122    def build_prompt(self) -> str:
123        return self.prompt_builder.build_prompt()
def generate_run( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None, run_output: RunOutput) -> kiln_ai.datamodel.TaskRun:
126    def generate_run(
127        self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput
128    ) -> TaskRun:
129        # Convert input and output to JSON strings if they are dictionaries
130        input_str = json.dumps(input) if isinstance(input, dict) else input
131        output_str = (
132            json.dumps(run_output.output)
133            if isinstance(run_output.output, dict)
134            else run_output.output
135        )
136
137        # If no input source is provided, use the human data source
138        if input_source is None:
139            input_source = DataSource(
140                type=DataSourceType.human,
141                properties={"created_by": Config.shared().user_id},
142            )
143
144        new_task_run = TaskRun(
145            parent=self.kiln_task,
146            input=input_str,
147            input_source=input_source,
148            output=TaskOutput(
149                output=output_str,
150                # Synthetic since an adapter, not a human, is creating this
151                source=DataSource(
152                    type=DataSourceType.synthetic,
153                    properties=self._properties_for_task_output(),
154                ),
155            ),
156            intermediate_outputs=run_output.intermediate_outputs,
157            tags=self.default_tags or [],
158        )
159
160        return new_task_run