kiln_ai.adapters.base_adapter

  1import json
  2from abc import ABCMeta, abstractmethod
  3from dataclasses import dataclass
  4from typing import Dict
  5
  6from kiln_ai.datamodel import (
  7    DataSource,
  8    DataSourceType,
  9    Task,
 10    TaskOutput,
 11    TaskRun,
 12)
 13from kiln_ai.datamodel.json_schema import validate_schema
 14from kiln_ai.utils.config import Config
 15
 16from .prompt_builders import BasePromptBuilder, SimplePromptBuilder
 17
 18
 19@dataclass
 20class AdapterInfo:
 21    adapter_name: str
 22    model_name: str
 23    model_provider: str
 24    prompt_builder_name: str
 25
 26
 27class BaseAdapter(metaclass=ABCMeta):
 28    """Base class for AI model adapters that handle task execution.
 29
 30    This abstract class provides the foundation for implementing model-specific adapters
 31    that can process tasks with structured or unstructured inputs/outputs. It handles
 32    input/output validation, prompt building, and run tracking.
 33
 34    Attributes:
 35        prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model
 36        kiln_task (Task): The task configuration and metadata
 37        output_schema (dict | None): JSON schema for validating structured outputs
 38        input_schema (dict | None): JSON schema for validating structured inputs
 39
 40    Example:
 41        ```python
 42        class CustomAdapter(BaseAdapter):
 43            async def _run(self, input: Dict | str) -> Dict | str:
 44                # Implementation for specific model
 45                pass
 46
 47            def adapter_info(self) -> AdapterInfo:
 48                return AdapterInfo(
 49                    adapter_name="custom",
 50                    model_name="model-1",
 51                    model_provider="provider",
 52                    prompt_builder_name="simple"
 53                )
 54        ```
 55    """
 56
 57    def __init__(
 58        self, kiln_task: Task, prompt_builder: BasePromptBuilder | None = None
 59    ):
 60        self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task)
 61        self.kiln_task = kiln_task
 62        self.output_schema = self.kiln_task.output_json_schema
 63        self.input_schema = self.kiln_task.input_json_schema
 64
 65    async def invoke_returning_raw(
 66        self,
 67        input: Dict | str,
 68        input_source: DataSource | None = None,
 69    ) -> Dict | str:
 70        result = await self.invoke(input, input_source)
 71        if self.kiln_task.output_json_schema is None:
 72            return result.output.output
 73        else:
 74            return json.loads(result.output.output)
 75
 76    async def invoke(
 77        self,
 78        input: Dict | str,
 79        input_source: DataSource | None = None,
 80    ) -> TaskRun:
 81        # validate input
 82        if self.input_schema is not None:
 83            if not isinstance(input, dict):
 84                raise ValueError(f"structured input is not a dict: {input}")
 85            validate_schema(input, self.input_schema)
 86
 87        # Run
 88        result = await self._run(input)
 89
 90        # validate output
 91        if self.output_schema is not None:
 92            if not isinstance(result, dict):
 93                raise RuntimeError(f"structured response is not a dict: {result}")
 94            validate_schema(result, self.output_schema)
 95        else:
 96            if not isinstance(result, str):
 97                raise RuntimeError(
 98                    f"response is not a string for non-structured task: {result}"
 99                )
100
101        # Generate the run and output
102        run = self.generate_run(input, input_source, result)
103
104        # Save the run if configured to do so, and we have a path to save to
105        if Config.shared().autosave_runs and self.kiln_task.path is not None:
106            run.save_to_file()
107        else:
108            # Clear the ID to indicate it's not persisted
109            run.id = None
110
111        return run
112
113    def has_structured_output(self) -> bool:
114        return self.output_schema is not None
115
116    @abstractmethod
117    def adapter_info(self) -> AdapterInfo:
118        pass
119
120    @abstractmethod
121    async def _run(self, input: Dict | str) -> Dict | str:
122        pass
123
124    def build_prompt(self) -> str:
125        prompt = self.prompt_builder.build_prompt()
126        adapter_instructions = self.adapter_specific_instructions()
127        if adapter_instructions is not None:
128            prompt += f"# Format Instructions\n\n{adapter_instructions}\n\n"
129        return prompt
130
131    # override for adapter specific instructions (e.g. tool calling, json format, etc)
132    def adapter_specific_instructions(self) -> str | None:
133        return None
134
135    # create a run and task output
136    def generate_run(
137        self, input: Dict | str, input_source: DataSource | None, output: Dict | str
138    ) -> TaskRun:
139        # Convert input and output to JSON strings if they are dictionaries
140        input_str = json.dumps(input) if isinstance(input, dict) else input
141        output_str = json.dumps(output) if isinstance(output, dict) else output
142
143        # If no input source is provided, use the human data source
144        if input_source is None:
145            input_source = DataSource(
146                type=DataSourceType.human,
147                properties={"created_by": Config.shared().user_id},
148            )
149
150        new_task_run = TaskRun(
151            parent=self.kiln_task,
152            input=input_str,
153            input_source=input_source,
154            output=TaskOutput(
155                output=output_str,
156                # Synthetic since an adapter, not a human, is creating this
157                source=DataSource(
158                    type=DataSourceType.synthetic,
159                    properties=self._properties_for_task_output(),
160                ),
161            ),
162        )
163
164        exclude_fields = {
165            "id": True,
166            "created_at": True,
167            "updated_at": True,
168            "path": True,
169            "output": {"id": True, "created_at": True, "updated_at": True},
170        }
171        new_run_dump = new_task_run.model_dump(exclude=exclude_fields)
172
173        # Check if the same run already exists
174        existing_task_run = next(
175            (
176                task_run
177                for task_run in self.kiln_task.runs()
178                if task_run.model_dump(exclude=exclude_fields) == new_run_dump
179            ),
180            None,
181        )
182        if existing_task_run:
183            return existing_task_run
184
185        return new_task_run
186
187    def _properties_for_task_output(self) -> Dict[str, str | int | float]:
188        props = {}
189
190        # adapter info
191        adapter_info = self.adapter_info()
192        props["adapter_name"] = adapter_info.adapter_name
193        props["model_name"] = adapter_info.model_name
194        props["model_provider"] = adapter_info.model_provider
195        props["prompt_builder_name"] = adapter_info.prompt_builder_name
196
197        return props
@dataclass
class AdapterInfo:
20@dataclass
21class AdapterInfo:
22    adapter_name: str
23    model_name: str
24    model_provider: str
25    prompt_builder_name: str
AdapterInfo( adapter_name: str, model_name: str, model_provider: str, prompt_builder_name: str)
adapter_name: str
model_name: str
model_provider: str
prompt_builder_name: str
class BaseAdapter:
 28class BaseAdapter(metaclass=ABCMeta):
 29    """Base class for AI model adapters that handle task execution.
 30
 31    This abstract class provides the foundation for implementing model-specific adapters
 32    that can process tasks with structured or unstructured inputs/outputs. It handles
 33    input/output validation, prompt building, and run tracking.
 34
 35    Attributes:
 36        prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model
 37        kiln_task (Task): The task configuration and metadata
 38        output_schema (dict | None): JSON schema for validating structured outputs
 39        input_schema (dict | None): JSON schema for validating structured inputs
 40
 41    Example:
 42        ```python
 43        class CustomAdapter(BaseAdapter):
 44            async def _run(self, input: Dict | str) -> Dict | str:
 45                # Implementation for specific model
 46                pass
 47
 48            def adapter_info(self) -> AdapterInfo:
 49                return AdapterInfo(
 50                    adapter_name="custom",
 51                    model_name="model-1",
 52                    model_provider="provider",
 53                    prompt_builder_name="simple"
 54                )
 55        ```
 56    """
 57
 58    def __init__(
 59        self, kiln_task: Task, prompt_builder: BasePromptBuilder | None = None
 60    ):
 61        self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task)
 62        self.kiln_task = kiln_task
 63        self.output_schema = self.kiln_task.output_json_schema
 64        self.input_schema = self.kiln_task.input_json_schema
 65
 66    async def invoke_returning_raw(
 67        self,
 68        input: Dict | str,
 69        input_source: DataSource | None = None,
 70    ) -> Dict | str:
 71        result = await self.invoke(input, input_source)
 72        if self.kiln_task.output_json_schema is None:
 73            return result.output.output
 74        else:
 75            return json.loads(result.output.output)
 76
 77    async def invoke(
 78        self,
 79        input: Dict | str,
 80        input_source: DataSource | None = None,
 81    ) -> TaskRun:
 82        # validate input
 83        if self.input_schema is not None:
 84            if not isinstance(input, dict):
 85                raise ValueError(f"structured input is not a dict: {input}")
 86            validate_schema(input, self.input_schema)
 87
 88        # Run
 89        result = await self._run(input)
 90
 91        # validate output
 92        if self.output_schema is not None:
 93            if not isinstance(result, dict):
 94                raise RuntimeError(f"structured response is not a dict: {result}")
 95            validate_schema(result, self.output_schema)
 96        else:
 97            if not isinstance(result, str):
 98                raise RuntimeError(
 99                    f"response is not a string for non-structured task: {result}"
100                )
101
102        # Generate the run and output
103        run = self.generate_run(input, input_source, result)
104
105        # Save the run if configured to do so, and we have a path to save to
106        if Config.shared().autosave_runs and self.kiln_task.path is not None:
107            run.save_to_file()
108        else:
109            # Clear the ID to indicate it's not persisted
110            run.id = None
111
112        return run
113
114    def has_structured_output(self) -> bool:
115        return self.output_schema is not None
116
117    @abstractmethod
118    def adapter_info(self) -> AdapterInfo:
119        pass
120
121    @abstractmethod
122    async def _run(self, input: Dict | str) -> Dict | str:
123        pass
124
125    def build_prompt(self) -> str:
126        prompt = self.prompt_builder.build_prompt()
127        adapter_instructions = self.adapter_specific_instructions()
128        if adapter_instructions is not None:
129            prompt += f"# Format Instructions\n\n{adapter_instructions}\n\n"
130        return prompt
131
132    # override for adapter specific instructions (e.g. tool calling, json format, etc)
133    def adapter_specific_instructions(self) -> str | None:
134        return None
135
136    # create a run and task output
137    def generate_run(
138        self, input: Dict | str, input_source: DataSource | None, output: Dict | str
139    ) -> TaskRun:
140        # Convert input and output to JSON strings if they are dictionaries
141        input_str = json.dumps(input) if isinstance(input, dict) else input
142        output_str = json.dumps(output) if isinstance(output, dict) else output
143
144        # If no input source is provided, use the human data source
145        if input_source is None:
146            input_source = DataSource(
147                type=DataSourceType.human,
148                properties={"created_by": Config.shared().user_id},
149            )
150
151        new_task_run = TaskRun(
152            parent=self.kiln_task,
153            input=input_str,
154            input_source=input_source,
155            output=TaskOutput(
156                output=output_str,
157                # Synthetic since an adapter, not a human, is creating this
158                source=DataSource(
159                    type=DataSourceType.synthetic,
160                    properties=self._properties_for_task_output(),
161                ),
162            ),
163        )
164
165        exclude_fields = {
166            "id": True,
167            "created_at": True,
168            "updated_at": True,
169            "path": True,
170            "output": {"id": True, "created_at": True, "updated_at": True},
171        }
172        new_run_dump = new_task_run.model_dump(exclude=exclude_fields)
173
174        # Check if the same run already exists
175        existing_task_run = next(
176            (
177                task_run
178                for task_run in self.kiln_task.runs()
179                if task_run.model_dump(exclude=exclude_fields) == new_run_dump
180            ),
181            None,
182        )
183        if existing_task_run:
184            return existing_task_run
185
186        return new_task_run
187
188    def _properties_for_task_output(self) -> Dict[str, str | int | float]:
189        props = {}
190
191        # adapter info
192        adapter_info = self.adapter_info()
193        props["adapter_name"] = adapter_info.adapter_name
194        props["model_name"] = adapter_info.model_name
195        props["model_provider"] = adapter_info.model_provider
196        props["prompt_builder_name"] = adapter_info.prompt_builder_name
197
198        return props

Base class for AI model adapters that handle task execution.

This abstract class provides the foundation for implementing model-specific adapters that can process tasks with structured or unstructured inputs/outputs. It handles input/output validation, prompt building, and run tracking.

Attributes: prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model kiln_task (Task): The task configuration and metadata output_schema (dict | None): JSON schema for validating structured outputs input_schema (dict | None): JSON schema for validating structured inputs

Example:

class CustomAdapter(BaseAdapter):
    async def _run(self, input: Dict | str) -> Dict | str:
        # Implementation for specific model
        pass

    def adapter_info(self) -> AdapterInfo:
        return AdapterInfo(
            adapter_name="custom",
            model_name="model-1",
            model_provider="provider",
            prompt_builder_name="simple"
        )
prompt_builder
kiln_task
output_schema
input_schema
async def invoke_returning_raw( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> Union[Dict, str]:
66    async def invoke_returning_raw(
67        self,
68        input: Dict | str,
69        input_source: DataSource | None = None,
70    ) -> Dict | str:
71        result = await self.invoke(input, input_source)
72        if self.kiln_task.output_json_schema is None:
73            return result.output.output
74        else:
75            return json.loads(result.output.output)
async def invoke( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> kiln_ai.datamodel.TaskRun:
 77    async def invoke(
 78        self,
 79        input: Dict | str,
 80        input_source: DataSource | None = None,
 81    ) -> TaskRun:
 82        # validate input
 83        if self.input_schema is not None:
 84            if not isinstance(input, dict):
 85                raise ValueError(f"structured input is not a dict: {input}")
 86            validate_schema(input, self.input_schema)
 87
 88        # Run
 89        result = await self._run(input)
 90
 91        # validate output
 92        if self.output_schema is not None:
 93            if not isinstance(result, dict):
 94                raise RuntimeError(f"structured response is not a dict: {result}")
 95            validate_schema(result, self.output_schema)
 96        else:
 97            if not isinstance(result, str):
 98                raise RuntimeError(
 99                    f"response is not a string for non-structured task: {result}"
100                )
101
102        # Generate the run and output
103        run = self.generate_run(input, input_source, result)
104
105        # Save the run if configured to do so, and we have a path to save to
106        if Config.shared().autosave_runs and self.kiln_task.path is not None:
107            run.save_to_file()
108        else:
109            # Clear the ID to indicate it's not persisted
110            run.id = None
111
112        return run
def has_structured_output(self) -> bool:
114    def has_structured_output(self) -> bool:
115        return self.output_schema is not None
@abstractmethod
def adapter_info(self) -> AdapterInfo:
117    @abstractmethod
118    def adapter_info(self) -> AdapterInfo:
119        pass
def build_prompt(self) -> str:
125    def build_prompt(self) -> str:
126        prompt = self.prompt_builder.build_prompt()
127        adapter_instructions = self.adapter_specific_instructions()
128        if adapter_instructions is not None:
129            prompt += f"# Format Instructions\n\n{adapter_instructions}\n\n"
130        return prompt
def adapter_specific_instructions(self) -> str | None:
133    def adapter_specific_instructions(self) -> str | None:
134        return None
def generate_run( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None, output: Union[Dict, str]) -> kiln_ai.datamodel.TaskRun:
137    def generate_run(
138        self, input: Dict | str, input_source: DataSource | None, output: Dict | str
139    ) -> TaskRun:
140        # Convert input and output to JSON strings if they are dictionaries
141        input_str = json.dumps(input) if isinstance(input, dict) else input
142        output_str = json.dumps(output) if isinstance(output, dict) else output
143
144        # If no input source is provided, use the human data source
145        if input_source is None:
146            input_source = DataSource(
147                type=DataSourceType.human,
148                properties={"created_by": Config.shared().user_id},
149            )
150
151        new_task_run = TaskRun(
152            parent=self.kiln_task,
153            input=input_str,
154            input_source=input_source,
155            output=TaskOutput(
156                output=output_str,
157                # Synthetic since an adapter, not a human, is creating this
158                source=DataSource(
159                    type=DataSourceType.synthetic,
160                    properties=self._properties_for_task_output(),
161                ),
162            ),
163        )
164
165        exclude_fields = {
166            "id": True,
167            "created_at": True,
168            "updated_at": True,
169            "path": True,
170            "output": {"id": True, "created_at": True, "updated_at": True},
171        }
172        new_run_dump = new_task_run.model_dump(exclude=exclude_fields)
173
174        # Check if the same run already exists
175        existing_task_run = next(
176            (
177                task_run
178                for task_run in self.kiln_task.runs()
179                if task_run.model_dump(exclude=exclude_fields) == new_run_dump
180            ),
181            None,
182        )
183        if existing_task_run:
184            return existing_task_run
185
186        return new_task_run