kiln_ai.adapters.base_adapter
1import json 2from abc import ABCMeta, abstractmethod 3from dataclasses import dataclass 4from typing import Dict 5 6from kiln_ai.datamodel import ( 7 DataSource, 8 DataSourceType, 9 Task, 10 TaskOutput, 11 TaskRun, 12) 13from kiln_ai.datamodel.json_schema import validate_schema 14from kiln_ai.utils.config import Config 15 16from .prompt_builders import BasePromptBuilder, SimplePromptBuilder 17 18 19@dataclass 20class AdapterInfo: 21 adapter_name: str 22 model_name: str 23 model_provider: str 24 prompt_builder_name: str 25 26 27class BaseAdapter(metaclass=ABCMeta): 28 """Base class for AI model adapters that handle task execution. 29 30 This abstract class provides the foundation for implementing model-specific adapters 31 that can process tasks with structured or unstructured inputs/outputs. It handles 32 input/output validation, prompt building, and run tracking. 33 34 Attributes: 35 prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model 36 kiln_task (Task): The task configuration and metadata 37 output_schema (dict | None): JSON schema for validating structured outputs 38 input_schema (dict | None): JSON schema for validating structured inputs 39 40 Example: 41 ```python 42 class CustomAdapter(BaseAdapter): 43 async def _run(self, input: Dict | str) -> Dict | str: 44 # Implementation for specific model 45 pass 46 47 def adapter_info(self) -> AdapterInfo: 48 return AdapterInfo( 49 adapter_name="custom", 50 model_name="model-1", 51 model_provider="provider", 52 prompt_builder_name="simple" 53 ) 54 ``` 55 """ 56 57 def __init__( 58 self, kiln_task: Task, prompt_builder: BasePromptBuilder | None = None 59 ): 60 self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task) 61 self.kiln_task = kiln_task 62 self.output_schema = self.kiln_task.output_json_schema 63 self.input_schema = self.kiln_task.input_json_schema 64 65 async def invoke_returning_raw( 66 self, 67 input: Dict | str, 68 input_source: DataSource | None = None, 69 ) -> Dict | str: 70 result = await self.invoke(input, input_source) 71 if self.kiln_task.output_json_schema is None: 72 return result.output.output 73 else: 74 return json.loads(result.output.output) 75 76 async def invoke( 77 self, 78 input: Dict | str, 79 input_source: DataSource | None = None, 80 ) -> TaskRun: 81 # validate input 82 if self.input_schema is not None: 83 if not isinstance(input, dict): 84 raise ValueError(f"structured input is not a dict: {input}") 85 validate_schema(input, self.input_schema) 86 87 # Run 88 result = await self._run(input) 89 90 # validate output 91 if self.output_schema is not None: 92 if not isinstance(result, dict): 93 raise RuntimeError(f"structured response is not a dict: {result}") 94 validate_schema(result, self.output_schema) 95 else: 96 if not isinstance(result, str): 97 raise RuntimeError( 98 f"response is not a string for non-structured task: {result}" 99 ) 100 101 # Generate the run and output 102 run = self.generate_run(input, input_source, result) 103 104 # Save the run if configured to do so, and we have a path to save to 105 if Config.shared().autosave_runs and self.kiln_task.path is not None: 106 run.save_to_file() 107 else: 108 # Clear the ID to indicate it's not persisted 109 run.id = None 110 111 return run 112 113 def has_structured_output(self) -> bool: 114 return self.output_schema is not None 115 116 @abstractmethod 117 def adapter_info(self) -> AdapterInfo: 118 pass 119 120 @abstractmethod 121 async def _run(self, input: Dict | str) -> Dict | str: 122 pass 123 124 def build_prompt(self) -> str: 125 prompt = self.prompt_builder.build_prompt() 126 adapter_instructions = self.adapter_specific_instructions() 127 if adapter_instructions is not None: 128 prompt += f"# Format Instructions\n\n{adapter_instructions}\n\n" 129 return prompt 130 131 # override for adapter specific instructions (e.g. tool calling, json format, etc) 132 def adapter_specific_instructions(self) -> str | None: 133 return None 134 135 # create a run and task output 136 def generate_run( 137 self, input: Dict | str, input_source: DataSource | None, output: Dict | str 138 ) -> TaskRun: 139 # Convert input and output to JSON strings if they are dictionaries 140 input_str = json.dumps(input) if isinstance(input, dict) else input 141 output_str = json.dumps(output) if isinstance(output, dict) else output 142 143 # If no input source is provided, use the human data source 144 if input_source is None: 145 input_source = DataSource( 146 type=DataSourceType.human, 147 properties={"created_by": Config.shared().user_id}, 148 ) 149 150 new_task_run = TaskRun( 151 parent=self.kiln_task, 152 input=input_str, 153 input_source=input_source, 154 output=TaskOutput( 155 output=output_str, 156 # Synthetic since an adapter, not a human, is creating this 157 source=DataSource( 158 type=DataSourceType.synthetic, 159 properties=self._properties_for_task_output(), 160 ), 161 ), 162 ) 163 164 exclude_fields = { 165 "id": True, 166 "created_at": True, 167 "updated_at": True, 168 "path": True, 169 "output": {"id": True, "created_at": True, "updated_at": True}, 170 } 171 new_run_dump = new_task_run.model_dump(exclude=exclude_fields) 172 173 # Check if the same run already exists 174 existing_task_run = next( 175 ( 176 task_run 177 for task_run in self.kiln_task.runs() 178 if task_run.model_dump(exclude=exclude_fields) == new_run_dump 179 ), 180 None, 181 ) 182 if existing_task_run: 183 return existing_task_run 184 185 return new_task_run 186 187 def _properties_for_task_output(self) -> Dict[str, str | int | float]: 188 props = {} 189 190 # adapter info 191 adapter_info = self.adapter_info() 192 props["adapter_name"] = adapter_info.adapter_name 193 props["model_name"] = adapter_info.model_name 194 props["model_provider"] = adapter_info.model_provider 195 props["prompt_builder_name"] = adapter_info.prompt_builder_name 196 197 return props
@dataclass
class
AdapterInfo:
class
BaseAdapter:
28class BaseAdapter(metaclass=ABCMeta): 29 """Base class for AI model adapters that handle task execution. 30 31 This abstract class provides the foundation for implementing model-specific adapters 32 that can process tasks with structured or unstructured inputs/outputs. It handles 33 input/output validation, prompt building, and run tracking. 34 35 Attributes: 36 prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model 37 kiln_task (Task): The task configuration and metadata 38 output_schema (dict | None): JSON schema for validating structured outputs 39 input_schema (dict | None): JSON schema for validating structured inputs 40 41 Example: 42 ```python 43 class CustomAdapter(BaseAdapter): 44 async def _run(self, input: Dict | str) -> Dict | str: 45 # Implementation for specific model 46 pass 47 48 def adapter_info(self) -> AdapterInfo: 49 return AdapterInfo( 50 adapter_name="custom", 51 model_name="model-1", 52 model_provider="provider", 53 prompt_builder_name="simple" 54 ) 55 ``` 56 """ 57 58 def __init__( 59 self, kiln_task: Task, prompt_builder: BasePromptBuilder | None = None 60 ): 61 self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task) 62 self.kiln_task = kiln_task 63 self.output_schema = self.kiln_task.output_json_schema 64 self.input_schema = self.kiln_task.input_json_schema 65 66 async def invoke_returning_raw( 67 self, 68 input: Dict | str, 69 input_source: DataSource | None = None, 70 ) -> Dict | str: 71 result = await self.invoke(input, input_source) 72 if self.kiln_task.output_json_schema is None: 73 return result.output.output 74 else: 75 return json.loads(result.output.output) 76 77 async def invoke( 78 self, 79 input: Dict | str, 80 input_source: DataSource | None = None, 81 ) -> TaskRun: 82 # validate input 83 if self.input_schema is not None: 84 if not isinstance(input, dict): 85 raise ValueError(f"structured input is not a dict: {input}") 86 validate_schema(input, self.input_schema) 87 88 # Run 89 result = await self._run(input) 90 91 # validate output 92 if self.output_schema is not None: 93 if not isinstance(result, dict): 94 raise RuntimeError(f"structured response is not a dict: {result}") 95 validate_schema(result, self.output_schema) 96 else: 97 if not isinstance(result, str): 98 raise RuntimeError( 99 f"response is not a string for non-structured task: {result}" 100 ) 101 102 # Generate the run and output 103 run = self.generate_run(input, input_source, result) 104 105 # Save the run if configured to do so, and we have a path to save to 106 if Config.shared().autosave_runs and self.kiln_task.path is not None: 107 run.save_to_file() 108 else: 109 # Clear the ID to indicate it's not persisted 110 run.id = None 111 112 return run 113 114 def has_structured_output(self) -> bool: 115 return self.output_schema is not None 116 117 @abstractmethod 118 def adapter_info(self) -> AdapterInfo: 119 pass 120 121 @abstractmethod 122 async def _run(self, input: Dict | str) -> Dict | str: 123 pass 124 125 def build_prompt(self) -> str: 126 prompt = self.prompt_builder.build_prompt() 127 adapter_instructions = self.adapter_specific_instructions() 128 if adapter_instructions is not None: 129 prompt += f"# Format Instructions\n\n{adapter_instructions}\n\n" 130 return prompt 131 132 # override for adapter specific instructions (e.g. tool calling, json format, etc) 133 def adapter_specific_instructions(self) -> str | None: 134 return None 135 136 # create a run and task output 137 def generate_run( 138 self, input: Dict | str, input_source: DataSource | None, output: Dict | str 139 ) -> TaskRun: 140 # Convert input and output to JSON strings if they are dictionaries 141 input_str = json.dumps(input) if isinstance(input, dict) else input 142 output_str = json.dumps(output) if isinstance(output, dict) else output 143 144 # If no input source is provided, use the human data source 145 if input_source is None: 146 input_source = DataSource( 147 type=DataSourceType.human, 148 properties={"created_by": Config.shared().user_id}, 149 ) 150 151 new_task_run = TaskRun( 152 parent=self.kiln_task, 153 input=input_str, 154 input_source=input_source, 155 output=TaskOutput( 156 output=output_str, 157 # Synthetic since an adapter, not a human, is creating this 158 source=DataSource( 159 type=DataSourceType.synthetic, 160 properties=self._properties_for_task_output(), 161 ), 162 ), 163 ) 164 165 exclude_fields = { 166 "id": True, 167 "created_at": True, 168 "updated_at": True, 169 "path": True, 170 "output": {"id": True, "created_at": True, "updated_at": True}, 171 } 172 new_run_dump = new_task_run.model_dump(exclude=exclude_fields) 173 174 # Check if the same run already exists 175 existing_task_run = next( 176 ( 177 task_run 178 for task_run in self.kiln_task.runs() 179 if task_run.model_dump(exclude=exclude_fields) == new_run_dump 180 ), 181 None, 182 ) 183 if existing_task_run: 184 return existing_task_run 185 186 return new_task_run 187 188 def _properties_for_task_output(self) -> Dict[str, str | int | float]: 189 props = {} 190 191 # adapter info 192 adapter_info = self.adapter_info() 193 props["adapter_name"] = adapter_info.adapter_name 194 props["model_name"] = adapter_info.model_name 195 props["model_provider"] = adapter_info.model_provider 196 props["prompt_builder_name"] = adapter_info.prompt_builder_name 197 198 return props
Base class for AI model adapters that handle task execution.
This abstract class provides the foundation for implementing model-specific adapters that can process tasks with structured or unstructured inputs/outputs. It handles input/output validation, prompt building, and run tracking.
Attributes: prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model kiln_task (Task): The task configuration and metadata output_schema (dict | None): JSON schema for validating structured outputs input_schema (dict | None): JSON schema for validating structured inputs
Example:
class CustomAdapter(BaseAdapter):
async def _run(self, input: Dict | str) -> Dict | str:
# Implementation for specific model
pass
def adapter_info(self) -> AdapterInfo:
return AdapterInfo(
adapter_name="custom",
model_name="model-1",
model_provider="provider",
prompt_builder_name="simple"
)
async def
invoke_returning_raw( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> Union[Dict, str]:
66 async def invoke_returning_raw( 67 self, 68 input: Dict | str, 69 input_source: DataSource | None = None, 70 ) -> Dict | str: 71 result = await self.invoke(input, input_source) 72 if self.kiln_task.output_json_schema is None: 73 return result.output.output 74 else: 75 return json.loads(result.output.output)
async def
invoke( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> kiln_ai.datamodel.TaskRun:
77 async def invoke( 78 self, 79 input: Dict | str, 80 input_source: DataSource | None = None, 81 ) -> TaskRun: 82 # validate input 83 if self.input_schema is not None: 84 if not isinstance(input, dict): 85 raise ValueError(f"structured input is not a dict: {input}") 86 validate_schema(input, self.input_schema) 87 88 # Run 89 result = await self._run(input) 90 91 # validate output 92 if self.output_schema is not None: 93 if not isinstance(result, dict): 94 raise RuntimeError(f"structured response is not a dict: {result}") 95 validate_schema(result, self.output_schema) 96 else: 97 if not isinstance(result, str): 98 raise RuntimeError( 99 f"response is not a string for non-structured task: {result}" 100 ) 101 102 # Generate the run and output 103 run = self.generate_run(input, input_source, result) 104 105 # Save the run if configured to do so, and we have a path to save to 106 if Config.shared().autosave_runs and self.kiln_task.path is not None: 107 run.save_to_file() 108 else: 109 # Clear the ID to indicate it's not persisted 110 run.id = None 111 112 return run
def
generate_run( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None, output: Union[Dict, str]) -> kiln_ai.datamodel.TaskRun:
137 def generate_run( 138 self, input: Dict | str, input_source: DataSource | None, output: Dict | str 139 ) -> TaskRun: 140 # Convert input and output to JSON strings if they are dictionaries 141 input_str = json.dumps(input) if isinstance(input, dict) else input 142 output_str = json.dumps(output) if isinstance(output, dict) else output 143 144 # If no input source is provided, use the human data source 145 if input_source is None: 146 input_source = DataSource( 147 type=DataSourceType.human, 148 properties={"created_by": Config.shared().user_id}, 149 ) 150 151 new_task_run = TaskRun( 152 parent=self.kiln_task, 153 input=input_str, 154 input_source=input_source, 155 output=TaskOutput( 156 output=output_str, 157 # Synthetic since an adapter, not a human, is creating this 158 source=DataSource( 159 type=DataSourceType.synthetic, 160 properties=self._properties_for_task_output(), 161 ), 162 ), 163 ) 164 165 exclude_fields = { 166 "id": True, 167 "created_at": True, 168 "updated_at": True, 169 "path": True, 170 "output": {"id": True, "created_at": True, "updated_at": True}, 171 } 172 new_run_dump = new_task_run.model_dump(exclude=exclude_fields) 173 174 # Check if the same run already exists 175 existing_task_run = next( 176 ( 177 task_run 178 for task_run in self.kiln_task.runs() 179 if task_run.model_dump(exclude=exclude_fields) == new_run_dump 180 ), 181 None, 182 ) 183 if existing_task_run: 184 return existing_task_run 185 186 return new_task_run