kiln_ai.adapters.model_adapters.base_adapter
1import json 2from abc import ABCMeta, abstractmethod 3from dataclasses import dataclass 4from typing import Dict 5 6from kiln_ai.adapters.ml_model_list import KilnModelProvider, StructuredOutputMode 7from kiln_ai.adapters.parsers.parser_registry import model_parser_from_id 8from kiln_ai.adapters.prompt_builders import BasePromptBuilder, SimplePromptBuilder 9from kiln_ai.adapters.provider_tools import kiln_model_provider_from 10from kiln_ai.adapters.run_output import RunOutput 11from kiln_ai.datamodel import ( 12 DataSource, 13 DataSourceType, 14 Task, 15 TaskOutput, 16 TaskRun, 17) 18from kiln_ai.datamodel.json_schema import validate_schema 19from kiln_ai.utils.config import Config 20 21 22@dataclass 23class AdapterInfo: 24 adapter_name: str 25 model_name: str 26 model_provider: str 27 prompt_builder_name: str 28 prompt_id: str | None = None 29 30 31class BaseAdapter(metaclass=ABCMeta): 32 """Base class for AI model adapters that handle task execution. 33 34 This abstract class provides the foundation for implementing model-specific adapters 35 that can process tasks with structured or unstructured inputs/outputs. It handles 36 input/output validation, prompt building, and run tracking. 37 38 Attributes: 39 prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model 40 kiln_task (Task): The task configuration and metadata 41 output_schema (dict | None): JSON schema for validating structured outputs 42 input_schema (dict | None): JSON schema for validating structured inputs 43 """ 44 45 def __init__( 46 self, 47 kiln_task: Task, 48 model_name: str, 49 model_provider_name: str, 50 prompt_builder: BasePromptBuilder | None = None, 51 tags: list[str] | None = None, 52 ): 53 self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task) 54 self.kiln_task = kiln_task 55 self.output_schema = self.kiln_task.output_json_schema 56 self.input_schema = self.kiln_task.input_json_schema 57 self.default_tags = tags 58 self.model_name = model_name 59 self.model_provider_name = model_provider_name 60 self._model_provider: KilnModelProvider | None = None 61 62 async def model_provider(self) -> KilnModelProvider: 63 """ 64 Lazy load the model provider for this adapter. 65 """ 66 if self._model_provider is not None: 67 return self._model_provider 68 if not self.model_name or not self.model_provider_name: 69 raise ValueError("model_name and model_provider_name must be provided") 70 self._model_provider = await kiln_model_provider_from( 71 self.model_name, self.model_provider_name 72 ) 73 if not self._model_provider: 74 raise ValueError( 75 f"model_provider_name {self.model_provider_name} not found for model {self.model_name}" 76 ) 77 return self._model_provider 78 79 async def invoke_returning_raw( 80 self, 81 input: Dict | str, 82 input_source: DataSource | None = None, 83 ) -> Dict | str: 84 result = await self.invoke(input, input_source) 85 if self.kiln_task.output_json_schema is None: 86 return result.output.output 87 else: 88 return json.loads(result.output.output) 89 90 async def invoke( 91 self, 92 input: Dict | str, 93 input_source: DataSource | None = None, 94 ) -> TaskRun: 95 # validate input 96 if self.input_schema is not None: 97 if not isinstance(input, dict): 98 raise ValueError(f"structured input is not a dict: {input}") 99 validate_schema(input, self.input_schema) 100 101 # Run 102 run_output = await self._run(input) 103 104 # Parse 105 provider = await self.model_provider() 106 parser = model_parser_from_id(provider.parser)( 107 structured_output=self.has_structured_output() 108 ) 109 parsed_output = parser.parse_output(original_output=run_output) 110 111 # validate output 112 if self.output_schema is not None: 113 if not isinstance(parsed_output.output, dict): 114 raise RuntimeError( 115 f"structured response is not a dict: {parsed_output.output}" 116 ) 117 validate_schema(parsed_output.output, self.output_schema) 118 else: 119 if not isinstance(parsed_output.output, str): 120 raise RuntimeError( 121 f"response is not a string for non-structured task: {parsed_output.output}" 122 ) 123 124 # Generate the run and output 125 run = self.generate_run(input, input_source, parsed_output) 126 127 # Save the run if configured to do so, and we have a path to save to 128 if Config.shared().autosave_runs and self.kiln_task.path is not None: 129 run.save_to_file() 130 else: 131 # Clear the ID to indicate it's not persisted 132 run.id = None 133 134 return run 135 136 def has_structured_output(self) -> bool: 137 return self.output_schema is not None 138 139 @abstractmethod 140 def adapter_info(self) -> AdapterInfo: 141 pass 142 143 @abstractmethod 144 async def _run(self, input: Dict | str) -> RunOutput: 145 pass 146 147 async def build_prompt(self) -> str: 148 # The prompt builder needs to know if we want to inject formatting instructions 149 provider = await self.model_provider() 150 add_json_instructions = self.has_structured_output() and ( 151 provider.structured_output_mode == StructuredOutputMode.json_instructions 152 or provider.structured_output_mode 153 == StructuredOutputMode.json_instruction_and_object 154 ) 155 156 return self.prompt_builder.build_prompt( 157 include_json_instructions=add_json_instructions 158 ) 159 160 # create a run and task output 161 def generate_run( 162 self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput 163 ) -> TaskRun: 164 # Convert input and output to JSON strings if they are dictionaries 165 input_str = ( 166 json.dumps(input, ensure_ascii=False) if isinstance(input, dict) else input 167 ) 168 output_str = ( 169 json.dumps(run_output.output, ensure_ascii=False) 170 if isinstance(run_output.output, dict) 171 else run_output.output 172 ) 173 174 # If no input source is provided, use the human data source 175 if input_source is None: 176 input_source = DataSource( 177 type=DataSourceType.human, 178 properties={"created_by": Config.shared().user_id}, 179 ) 180 181 new_task_run = TaskRun( 182 parent=self.kiln_task, 183 input=input_str, 184 input_source=input_source, 185 output=TaskOutput( 186 output=output_str, 187 # Synthetic since an adapter, not a human, is creating this 188 source=DataSource( 189 type=DataSourceType.synthetic, 190 properties=self._properties_for_task_output(), 191 ), 192 ), 193 intermediate_outputs=run_output.intermediate_outputs, 194 tags=self.default_tags or [], 195 ) 196 197 return new_task_run 198 199 def _properties_for_task_output(self) -> Dict[str, str | int | float]: 200 props = {} 201 202 # adapter info 203 adapter_info = self.adapter_info() 204 props["adapter_name"] = adapter_info.adapter_name 205 props["model_name"] = adapter_info.model_name 206 props["model_provider"] = adapter_info.model_provider 207 props["prompt_builder_name"] = adapter_info.prompt_builder_name 208 if adapter_info.prompt_id is not None: 209 props["prompt_id"] = adapter_info.prompt_id 210 211 return props
@dataclass
class
AdapterInfo:
23@dataclass 24class AdapterInfo: 25 adapter_name: str 26 model_name: str 27 model_provider: str 28 prompt_builder_name: str 29 prompt_id: str | None = None
class
BaseAdapter:
32class BaseAdapter(metaclass=ABCMeta): 33 """Base class for AI model adapters that handle task execution. 34 35 This abstract class provides the foundation for implementing model-specific adapters 36 that can process tasks with structured or unstructured inputs/outputs. It handles 37 input/output validation, prompt building, and run tracking. 38 39 Attributes: 40 prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model 41 kiln_task (Task): The task configuration and metadata 42 output_schema (dict | None): JSON schema for validating structured outputs 43 input_schema (dict | None): JSON schema for validating structured inputs 44 """ 45 46 def __init__( 47 self, 48 kiln_task: Task, 49 model_name: str, 50 model_provider_name: str, 51 prompt_builder: BasePromptBuilder | None = None, 52 tags: list[str] | None = None, 53 ): 54 self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task) 55 self.kiln_task = kiln_task 56 self.output_schema = self.kiln_task.output_json_schema 57 self.input_schema = self.kiln_task.input_json_schema 58 self.default_tags = tags 59 self.model_name = model_name 60 self.model_provider_name = model_provider_name 61 self._model_provider: KilnModelProvider | None = None 62 63 async def model_provider(self) -> KilnModelProvider: 64 """ 65 Lazy load the model provider for this adapter. 66 """ 67 if self._model_provider is not None: 68 return self._model_provider 69 if not self.model_name or not self.model_provider_name: 70 raise ValueError("model_name and model_provider_name must be provided") 71 self._model_provider = await kiln_model_provider_from( 72 self.model_name, self.model_provider_name 73 ) 74 if not self._model_provider: 75 raise ValueError( 76 f"model_provider_name {self.model_provider_name} not found for model {self.model_name}" 77 ) 78 return self._model_provider 79 80 async def invoke_returning_raw( 81 self, 82 input: Dict | str, 83 input_source: DataSource | None = None, 84 ) -> Dict | str: 85 result = await self.invoke(input, input_source) 86 if self.kiln_task.output_json_schema is None: 87 return result.output.output 88 else: 89 return json.loads(result.output.output) 90 91 async def invoke( 92 self, 93 input: Dict | str, 94 input_source: DataSource | None = None, 95 ) -> TaskRun: 96 # validate input 97 if self.input_schema is not None: 98 if not isinstance(input, dict): 99 raise ValueError(f"structured input is not a dict: {input}") 100 validate_schema(input, self.input_schema) 101 102 # Run 103 run_output = await self._run(input) 104 105 # Parse 106 provider = await self.model_provider() 107 parser = model_parser_from_id(provider.parser)( 108 structured_output=self.has_structured_output() 109 ) 110 parsed_output = parser.parse_output(original_output=run_output) 111 112 # validate output 113 if self.output_schema is not None: 114 if not isinstance(parsed_output.output, dict): 115 raise RuntimeError( 116 f"structured response is not a dict: {parsed_output.output}" 117 ) 118 validate_schema(parsed_output.output, self.output_schema) 119 else: 120 if not isinstance(parsed_output.output, str): 121 raise RuntimeError( 122 f"response is not a string for non-structured task: {parsed_output.output}" 123 ) 124 125 # Generate the run and output 126 run = self.generate_run(input, input_source, parsed_output) 127 128 # Save the run if configured to do so, and we have a path to save to 129 if Config.shared().autosave_runs and self.kiln_task.path is not None: 130 run.save_to_file() 131 else: 132 # Clear the ID to indicate it's not persisted 133 run.id = None 134 135 return run 136 137 def has_structured_output(self) -> bool: 138 return self.output_schema is not None 139 140 @abstractmethod 141 def adapter_info(self) -> AdapterInfo: 142 pass 143 144 @abstractmethod 145 async def _run(self, input: Dict | str) -> RunOutput: 146 pass 147 148 async def build_prompt(self) -> str: 149 # The prompt builder needs to know if we want to inject formatting instructions 150 provider = await self.model_provider() 151 add_json_instructions = self.has_structured_output() and ( 152 provider.structured_output_mode == StructuredOutputMode.json_instructions 153 or provider.structured_output_mode 154 == StructuredOutputMode.json_instruction_and_object 155 ) 156 157 return self.prompt_builder.build_prompt( 158 include_json_instructions=add_json_instructions 159 ) 160 161 # create a run and task output 162 def generate_run( 163 self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput 164 ) -> TaskRun: 165 # Convert input and output to JSON strings if they are dictionaries 166 input_str = ( 167 json.dumps(input, ensure_ascii=False) if isinstance(input, dict) else input 168 ) 169 output_str = ( 170 json.dumps(run_output.output, ensure_ascii=False) 171 if isinstance(run_output.output, dict) 172 else run_output.output 173 ) 174 175 # If no input source is provided, use the human data source 176 if input_source is None: 177 input_source = DataSource( 178 type=DataSourceType.human, 179 properties={"created_by": Config.shared().user_id}, 180 ) 181 182 new_task_run = TaskRun( 183 parent=self.kiln_task, 184 input=input_str, 185 input_source=input_source, 186 output=TaskOutput( 187 output=output_str, 188 # Synthetic since an adapter, not a human, is creating this 189 source=DataSource( 190 type=DataSourceType.synthetic, 191 properties=self._properties_for_task_output(), 192 ), 193 ), 194 intermediate_outputs=run_output.intermediate_outputs, 195 tags=self.default_tags or [], 196 ) 197 198 return new_task_run 199 200 def _properties_for_task_output(self) -> Dict[str, str | int | float]: 201 props = {} 202 203 # adapter info 204 adapter_info = self.adapter_info() 205 props["adapter_name"] = adapter_info.adapter_name 206 props["model_name"] = adapter_info.model_name 207 props["model_provider"] = adapter_info.model_provider 208 props["prompt_builder_name"] = adapter_info.prompt_builder_name 209 if adapter_info.prompt_id is not None: 210 props["prompt_id"] = adapter_info.prompt_id 211 212 return props
Base class for AI model adapters that handle task execution.
This abstract class provides the foundation for implementing model-specific adapters that can process tasks with structured or unstructured inputs/outputs. It handles input/output validation, prompt building, and run tracking.
Attributes: prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model kiln_task (Task): The task configuration and metadata output_schema (dict | None): JSON schema for validating structured outputs input_schema (dict | None): JSON schema for validating structured inputs
63 async def model_provider(self) -> KilnModelProvider: 64 """ 65 Lazy load the model provider for this adapter. 66 """ 67 if self._model_provider is not None: 68 return self._model_provider 69 if not self.model_name or not self.model_provider_name: 70 raise ValueError("model_name and model_provider_name must be provided") 71 self._model_provider = await kiln_model_provider_from( 72 self.model_name, self.model_provider_name 73 ) 74 if not self._model_provider: 75 raise ValueError( 76 f"model_provider_name {self.model_provider_name} not found for model {self.model_name}" 77 ) 78 return self._model_provider
Lazy load the model provider for this adapter.
async def
invoke_returning_raw( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> Union[Dict, str]:
80 async def invoke_returning_raw( 81 self, 82 input: Dict | str, 83 input_source: DataSource | None = None, 84 ) -> Dict | str: 85 result = await self.invoke(input, input_source) 86 if self.kiln_task.output_json_schema is None: 87 return result.output.output 88 else: 89 return json.loads(result.output.output)
async def
invoke( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> kiln_ai.datamodel.TaskRun:
91 async def invoke( 92 self, 93 input: Dict | str, 94 input_source: DataSource | None = None, 95 ) -> TaskRun: 96 # validate input 97 if self.input_schema is not None: 98 if not isinstance(input, dict): 99 raise ValueError(f"structured input is not a dict: {input}") 100 validate_schema(input, self.input_schema) 101 102 # Run 103 run_output = await self._run(input) 104 105 # Parse 106 provider = await self.model_provider() 107 parser = model_parser_from_id(provider.parser)( 108 structured_output=self.has_structured_output() 109 ) 110 parsed_output = parser.parse_output(original_output=run_output) 111 112 # validate output 113 if self.output_schema is not None: 114 if not isinstance(parsed_output.output, dict): 115 raise RuntimeError( 116 f"structured response is not a dict: {parsed_output.output}" 117 ) 118 validate_schema(parsed_output.output, self.output_schema) 119 else: 120 if not isinstance(parsed_output.output, str): 121 raise RuntimeError( 122 f"response is not a string for non-structured task: {parsed_output.output}" 123 ) 124 125 # Generate the run and output 126 run = self.generate_run(input, input_source, parsed_output) 127 128 # Save the run if configured to do so, and we have a path to save to 129 if Config.shared().autosave_runs and self.kiln_task.path is not None: 130 run.save_to_file() 131 else: 132 # Clear the ID to indicate it's not persisted 133 run.id = None 134 135 return run
async def
build_prompt(self) -> str:
148 async def build_prompt(self) -> str: 149 # The prompt builder needs to know if we want to inject formatting instructions 150 provider = await self.model_provider() 151 add_json_instructions = self.has_structured_output() and ( 152 provider.structured_output_mode == StructuredOutputMode.json_instructions 153 or provider.structured_output_mode 154 == StructuredOutputMode.json_instruction_and_object 155 ) 156 157 return self.prompt_builder.build_prompt( 158 include_json_instructions=add_json_instructions 159 )
def
generate_run( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None, run_output: kiln_ai.adapters.run_output.RunOutput) -> kiln_ai.datamodel.TaskRun:
162 def generate_run( 163 self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput 164 ) -> TaskRun: 165 # Convert input and output to JSON strings if they are dictionaries 166 input_str = ( 167 json.dumps(input, ensure_ascii=False) if isinstance(input, dict) else input 168 ) 169 output_str = ( 170 json.dumps(run_output.output, ensure_ascii=False) 171 if isinstance(run_output.output, dict) 172 else run_output.output 173 ) 174 175 # If no input source is provided, use the human data source 176 if input_source is None: 177 input_source = DataSource( 178 type=DataSourceType.human, 179 properties={"created_by": Config.shared().user_id}, 180 ) 181 182 new_task_run = TaskRun( 183 parent=self.kiln_task, 184 input=input_str, 185 input_source=input_source, 186 output=TaskOutput( 187 output=output_str, 188 # Synthetic since an adapter, not a human, is creating this 189 source=DataSource( 190 type=DataSourceType.synthetic, 191 properties=self._properties_for_task_output(), 192 ), 193 ), 194 intermediate_outputs=run_output.intermediate_outputs, 195 tags=self.default_tags or [], 196 ) 197 198 return new_task_run