kiln_ai.adapters.model_adapters.base_adapter

  1import json
  2from abc import ABCMeta, abstractmethod
  3from dataclasses import dataclass
  4from typing import Dict
  5
  6from kiln_ai.adapters.ml_model_list import KilnModelProvider, StructuredOutputMode
  7from kiln_ai.adapters.parsers.parser_registry import model_parser_from_id
  8from kiln_ai.adapters.prompt_builders import BasePromptBuilder, SimplePromptBuilder
  9from kiln_ai.adapters.provider_tools import kiln_model_provider_from
 10from kiln_ai.adapters.run_output import RunOutput
 11from kiln_ai.datamodel import (
 12    DataSource,
 13    DataSourceType,
 14    Task,
 15    TaskOutput,
 16    TaskRun,
 17)
 18from kiln_ai.datamodel.json_schema import validate_schema
 19from kiln_ai.utils.config import Config
 20
 21
 22@dataclass
 23class AdapterInfo:
 24    adapter_name: str
 25    model_name: str
 26    model_provider: str
 27    prompt_builder_name: str
 28    prompt_id: str | None = None
 29
 30
 31class BaseAdapter(metaclass=ABCMeta):
 32    """Base class for AI model adapters that handle task execution.
 33
 34    This abstract class provides the foundation for implementing model-specific adapters
 35    that can process tasks with structured or unstructured inputs/outputs. It handles
 36    input/output validation, prompt building, and run tracking.
 37
 38    Attributes:
 39        prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model
 40        kiln_task (Task): The task configuration and metadata
 41        output_schema (dict | None): JSON schema for validating structured outputs
 42        input_schema (dict | None): JSON schema for validating structured inputs
 43    """
 44
 45    def __init__(
 46        self,
 47        kiln_task: Task,
 48        model_name: str,
 49        model_provider_name: str,
 50        prompt_builder: BasePromptBuilder | None = None,
 51        tags: list[str] | None = None,
 52    ):
 53        self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task)
 54        self.kiln_task = kiln_task
 55        self.output_schema = self.kiln_task.output_json_schema
 56        self.input_schema = self.kiln_task.input_json_schema
 57        self.default_tags = tags
 58        self.model_name = model_name
 59        self.model_provider_name = model_provider_name
 60        self._model_provider: KilnModelProvider | None = None
 61
 62    async def model_provider(self) -> KilnModelProvider:
 63        """
 64        Lazy load the model provider for this adapter.
 65        """
 66        if self._model_provider is not None:
 67            return self._model_provider
 68        if not self.model_name or not self.model_provider_name:
 69            raise ValueError("model_name and model_provider_name must be provided")
 70        self._model_provider = await kiln_model_provider_from(
 71            self.model_name, self.model_provider_name
 72        )
 73        if not self._model_provider:
 74            raise ValueError(
 75                f"model_provider_name {self.model_provider_name} not found for model {self.model_name}"
 76            )
 77        return self._model_provider
 78
 79    async def invoke_returning_raw(
 80        self,
 81        input: Dict | str,
 82        input_source: DataSource | None = None,
 83    ) -> Dict | str:
 84        result = await self.invoke(input, input_source)
 85        if self.kiln_task.output_json_schema is None:
 86            return result.output.output
 87        else:
 88            return json.loads(result.output.output)
 89
 90    async def invoke(
 91        self,
 92        input: Dict | str,
 93        input_source: DataSource | None = None,
 94    ) -> TaskRun:
 95        # validate input
 96        if self.input_schema is not None:
 97            if not isinstance(input, dict):
 98                raise ValueError(f"structured input is not a dict: {input}")
 99            validate_schema(input, self.input_schema)
100
101        # Run
102        run_output = await self._run(input)
103
104        # Parse
105        provider = await self.model_provider()
106        parser = model_parser_from_id(provider.parser)(
107            structured_output=self.has_structured_output()
108        )
109        parsed_output = parser.parse_output(original_output=run_output)
110
111        # validate output
112        if self.output_schema is not None:
113            if not isinstance(parsed_output.output, dict):
114                raise RuntimeError(
115                    f"structured response is not a dict: {parsed_output.output}"
116                )
117            validate_schema(parsed_output.output, self.output_schema)
118        else:
119            if not isinstance(parsed_output.output, str):
120                raise RuntimeError(
121                    f"response is not a string for non-structured task: {parsed_output.output}"
122                )
123
124        # Generate the run and output
125        run = self.generate_run(input, input_source, parsed_output)
126
127        # Save the run if configured to do so, and we have a path to save to
128        if Config.shared().autosave_runs and self.kiln_task.path is not None:
129            run.save_to_file()
130        else:
131            # Clear the ID to indicate it's not persisted
132            run.id = None
133
134        return run
135
136    def has_structured_output(self) -> bool:
137        return self.output_schema is not None
138
139    @abstractmethod
140    def adapter_info(self) -> AdapterInfo:
141        pass
142
143    @abstractmethod
144    async def _run(self, input: Dict | str) -> RunOutput:
145        pass
146
147    async def build_prompt(self) -> str:
148        # The prompt builder needs to know if we want to inject formatting instructions
149        provider = await self.model_provider()
150        add_json_instructions = self.has_structured_output() and (
151            provider.structured_output_mode == StructuredOutputMode.json_instructions
152            or provider.structured_output_mode
153            == StructuredOutputMode.json_instruction_and_object
154        )
155
156        return self.prompt_builder.build_prompt(
157            include_json_instructions=add_json_instructions
158        )
159
160    # create a run and task output
161    def generate_run(
162        self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput
163    ) -> TaskRun:
164        # Convert input and output to JSON strings if they are dictionaries
165        input_str = (
166            json.dumps(input, ensure_ascii=False) if isinstance(input, dict) else input
167        )
168        output_str = (
169            json.dumps(run_output.output, ensure_ascii=False)
170            if isinstance(run_output.output, dict)
171            else run_output.output
172        )
173
174        # If no input source is provided, use the human data source
175        if input_source is None:
176            input_source = DataSource(
177                type=DataSourceType.human,
178                properties={"created_by": Config.shared().user_id},
179            )
180
181        new_task_run = TaskRun(
182            parent=self.kiln_task,
183            input=input_str,
184            input_source=input_source,
185            output=TaskOutput(
186                output=output_str,
187                # Synthetic since an adapter, not a human, is creating this
188                source=DataSource(
189                    type=DataSourceType.synthetic,
190                    properties=self._properties_for_task_output(),
191                ),
192            ),
193            intermediate_outputs=run_output.intermediate_outputs,
194            tags=self.default_tags or [],
195        )
196
197        return new_task_run
198
199    def _properties_for_task_output(self) -> Dict[str, str | int | float]:
200        props = {}
201
202        # adapter info
203        adapter_info = self.adapter_info()
204        props["adapter_name"] = adapter_info.adapter_name
205        props["model_name"] = adapter_info.model_name
206        props["model_provider"] = adapter_info.model_provider
207        props["prompt_builder_name"] = adapter_info.prompt_builder_name
208        if adapter_info.prompt_id is not None:
209            props["prompt_id"] = adapter_info.prompt_id
210
211        return props
@dataclass
class AdapterInfo:
23@dataclass
24class AdapterInfo:
25    adapter_name: str
26    model_name: str
27    model_provider: str
28    prompt_builder_name: str
29    prompt_id: str | None = None
AdapterInfo( adapter_name: str, model_name: str, model_provider: str, prompt_builder_name: str, prompt_id: str | None = None)
adapter_name: str
model_name: str
model_provider: str
prompt_builder_name: str
prompt_id: str | None = None
class BaseAdapter:
 32class BaseAdapter(metaclass=ABCMeta):
 33    """Base class for AI model adapters that handle task execution.
 34
 35    This abstract class provides the foundation for implementing model-specific adapters
 36    that can process tasks with structured or unstructured inputs/outputs. It handles
 37    input/output validation, prompt building, and run tracking.
 38
 39    Attributes:
 40        prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model
 41        kiln_task (Task): The task configuration and metadata
 42        output_schema (dict | None): JSON schema for validating structured outputs
 43        input_schema (dict | None): JSON schema for validating structured inputs
 44    """
 45
 46    def __init__(
 47        self,
 48        kiln_task: Task,
 49        model_name: str,
 50        model_provider_name: str,
 51        prompt_builder: BasePromptBuilder | None = None,
 52        tags: list[str] | None = None,
 53    ):
 54        self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task)
 55        self.kiln_task = kiln_task
 56        self.output_schema = self.kiln_task.output_json_schema
 57        self.input_schema = self.kiln_task.input_json_schema
 58        self.default_tags = tags
 59        self.model_name = model_name
 60        self.model_provider_name = model_provider_name
 61        self._model_provider: KilnModelProvider | None = None
 62
 63    async def model_provider(self) -> KilnModelProvider:
 64        """
 65        Lazy load the model provider for this adapter.
 66        """
 67        if self._model_provider is not None:
 68            return self._model_provider
 69        if not self.model_name or not self.model_provider_name:
 70            raise ValueError("model_name and model_provider_name must be provided")
 71        self._model_provider = await kiln_model_provider_from(
 72            self.model_name, self.model_provider_name
 73        )
 74        if not self._model_provider:
 75            raise ValueError(
 76                f"model_provider_name {self.model_provider_name} not found for model {self.model_name}"
 77            )
 78        return self._model_provider
 79
 80    async def invoke_returning_raw(
 81        self,
 82        input: Dict | str,
 83        input_source: DataSource | None = None,
 84    ) -> Dict | str:
 85        result = await self.invoke(input, input_source)
 86        if self.kiln_task.output_json_schema is None:
 87            return result.output.output
 88        else:
 89            return json.loads(result.output.output)
 90
 91    async def invoke(
 92        self,
 93        input: Dict | str,
 94        input_source: DataSource | None = None,
 95    ) -> TaskRun:
 96        # validate input
 97        if self.input_schema is not None:
 98            if not isinstance(input, dict):
 99                raise ValueError(f"structured input is not a dict: {input}")
100            validate_schema(input, self.input_schema)
101
102        # Run
103        run_output = await self._run(input)
104
105        # Parse
106        provider = await self.model_provider()
107        parser = model_parser_from_id(provider.parser)(
108            structured_output=self.has_structured_output()
109        )
110        parsed_output = parser.parse_output(original_output=run_output)
111
112        # validate output
113        if self.output_schema is not None:
114            if not isinstance(parsed_output.output, dict):
115                raise RuntimeError(
116                    f"structured response is not a dict: {parsed_output.output}"
117                )
118            validate_schema(parsed_output.output, self.output_schema)
119        else:
120            if not isinstance(parsed_output.output, str):
121                raise RuntimeError(
122                    f"response is not a string for non-structured task: {parsed_output.output}"
123                )
124
125        # Generate the run and output
126        run = self.generate_run(input, input_source, parsed_output)
127
128        # Save the run if configured to do so, and we have a path to save to
129        if Config.shared().autosave_runs and self.kiln_task.path is not None:
130            run.save_to_file()
131        else:
132            # Clear the ID to indicate it's not persisted
133            run.id = None
134
135        return run
136
137    def has_structured_output(self) -> bool:
138        return self.output_schema is not None
139
140    @abstractmethod
141    def adapter_info(self) -> AdapterInfo:
142        pass
143
144    @abstractmethod
145    async def _run(self, input: Dict | str) -> RunOutput:
146        pass
147
148    async def build_prompt(self) -> str:
149        # The prompt builder needs to know if we want to inject formatting instructions
150        provider = await self.model_provider()
151        add_json_instructions = self.has_structured_output() and (
152            provider.structured_output_mode == StructuredOutputMode.json_instructions
153            or provider.structured_output_mode
154            == StructuredOutputMode.json_instruction_and_object
155        )
156
157        return self.prompt_builder.build_prompt(
158            include_json_instructions=add_json_instructions
159        )
160
161    # create a run and task output
162    def generate_run(
163        self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput
164    ) -> TaskRun:
165        # Convert input and output to JSON strings if they are dictionaries
166        input_str = (
167            json.dumps(input, ensure_ascii=False) if isinstance(input, dict) else input
168        )
169        output_str = (
170            json.dumps(run_output.output, ensure_ascii=False)
171            if isinstance(run_output.output, dict)
172            else run_output.output
173        )
174
175        # If no input source is provided, use the human data source
176        if input_source is None:
177            input_source = DataSource(
178                type=DataSourceType.human,
179                properties={"created_by": Config.shared().user_id},
180            )
181
182        new_task_run = TaskRun(
183            parent=self.kiln_task,
184            input=input_str,
185            input_source=input_source,
186            output=TaskOutput(
187                output=output_str,
188                # Synthetic since an adapter, not a human, is creating this
189                source=DataSource(
190                    type=DataSourceType.synthetic,
191                    properties=self._properties_for_task_output(),
192                ),
193            ),
194            intermediate_outputs=run_output.intermediate_outputs,
195            tags=self.default_tags or [],
196        )
197
198        return new_task_run
199
200    def _properties_for_task_output(self) -> Dict[str, str | int | float]:
201        props = {}
202
203        # adapter info
204        adapter_info = self.adapter_info()
205        props["adapter_name"] = adapter_info.adapter_name
206        props["model_name"] = adapter_info.model_name
207        props["model_provider"] = adapter_info.model_provider
208        props["prompt_builder_name"] = adapter_info.prompt_builder_name
209        if adapter_info.prompt_id is not None:
210            props["prompt_id"] = adapter_info.prompt_id
211
212        return props

Base class for AI model adapters that handle task execution.

This abstract class provides the foundation for implementing model-specific adapters that can process tasks with structured or unstructured inputs/outputs. It handles input/output validation, prompt building, and run tracking.

Attributes: prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model kiln_task (Task): The task configuration and metadata output_schema (dict | None): JSON schema for validating structured outputs input_schema (dict | None): JSON schema for validating structured inputs

prompt_builder
kiln_task
output_schema
input_schema
default_tags
model_name
model_provider_name
async def model_provider(self) -> kiln_ai.adapters.ml_model_list.KilnModelProvider:
63    async def model_provider(self) -> KilnModelProvider:
64        """
65        Lazy load the model provider for this adapter.
66        """
67        if self._model_provider is not None:
68            return self._model_provider
69        if not self.model_name or not self.model_provider_name:
70            raise ValueError("model_name and model_provider_name must be provided")
71        self._model_provider = await kiln_model_provider_from(
72            self.model_name, self.model_provider_name
73        )
74        if not self._model_provider:
75            raise ValueError(
76                f"model_provider_name {self.model_provider_name} not found for model {self.model_name}"
77            )
78        return self._model_provider

Lazy load the model provider for this adapter.

async def invoke_returning_raw( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> Union[Dict, str]:
80    async def invoke_returning_raw(
81        self,
82        input: Dict | str,
83        input_source: DataSource | None = None,
84    ) -> Dict | str:
85        result = await self.invoke(input, input_source)
86        if self.kiln_task.output_json_schema is None:
87            return result.output.output
88        else:
89            return json.loads(result.output.output)
async def invoke( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> kiln_ai.datamodel.TaskRun:
 91    async def invoke(
 92        self,
 93        input: Dict | str,
 94        input_source: DataSource | None = None,
 95    ) -> TaskRun:
 96        # validate input
 97        if self.input_schema is not None:
 98            if not isinstance(input, dict):
 99                raise ValueError(f"structured input is not a dict: {input}")
100            validate_schema(input, self.input_schema)
101
102        # Run
103        run_output = await self._run(input)
104
105        # Parse
106        provider = await self.model_provider()
107        parser = model_parser_from_id(provider.parser)(
108            structured_output=self.has_structured_output()
109        )
110        parsed_output = parser.parse_output(original_output=run_output)
111
112        # validate output
113        if self.output_schema is not None:
114            if not isinstance(parsed_output.output, dict):
115                raise RuntimeError(
116                    f"structured response is not a dict: {parsed_output.output}"
117                )
118            validate_schema(parsed_output.output, self.output_schema)
119        else:
120            if not isinstance(parsed_output.output, str):
121                raise RuntimeError(
122                    f"response is not a string for non-structured task: {parsed_output.output}"
123                )
124
125        # Generate the run and output
126        run = self.generate_run(input, input_source, parsed_output)
127
128        # Save the run if configured to do so, and we have a path to save to
129        if Config.shared().autosave_runs and self.kiln_task.path is not None:
130            run.save_to_file()
131        else:
132            # Clear the ID to indicate it's not persisted
133            run.id = None
134
135        return run
def has_structured_output(self) -> bool:
137    def has_structured_output(self) -> bool:
138        return self.output_schema is not None
@abstractmethod
def adapter_info(self) -> AdapterInfo:
140    @abstractmethod
141    def adapter_info(self) -> AdapterInfo:
142        pass
async def build_prompt(self) -> str:
148    async def build_prompt(self) -> str:
149        # The prompt builder needs to know if we want to inject formatting instructions
150        provider = await self.model_provider()
151        add_json_instructions = self.has_structured_output() and (
152            provider.structured_output_mode == StructuredOutputMode.json_instructions
153            or provider.structured_output_mode
154            == StructuredOutputMode.json_instruction_and_object
155        )
156
157        return self.prompt_builder.build_prompt(
158            include_json_instructions=add_json_instructions
159        )
def generate_run( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None, run_output: kiln_ai.adapters.run_output.RunOutput) -> kiln_ai.datamodel.TaskRun:
162    def generate_run(
163        self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput
164    ) -> TaskRun:
165        # Convert input and output to JSON strings if they are dictionaries
166        input_str = (
167            json.dumps(input, ensure_ascii=False) if isinstance(input, dict) else input
168        )
169        output_str = (
170            json.dumps(run_output.output, ensure_ascii=False)
171            if isinstance(run_output.output, dict)
172            else run_output.output
173        )
174
175        # If no input source is provided, use the human data source
176        if input_source is None:
177            input_source = DataSource(
178                type=DataSourceType.human,
179                properties={"created_by": Config.shared().user_id},
180            )
181
182        new_task_run = TaskRun(
183            parent=self.kiln_task,
184            input=input_str,
185            input_source=input_source,
186            output=TaskOutput(
187                output=output_str,
188                # Synthetic since an adapter, not a human, is creating this
189                source=DataSource(
190                    type=DataSourceType.synthetic,
191                    properties=self._properties_for_task_output(),
192                ),
193            ),
194            intermediate_outputs=run_output.intermediate_outputs,
195            tags=self.default_tags or [],
196        )
197
198        return new_task_run