kiln_ai.adapters.base_adapter
1import json 2from abc import ABCMeta, abstractmethod 3from dataclasses import dataclass 4from typing import Dict 5 6from kiln_ai.datamodel import ( 7 DataSource, 8 DataSourceType, 9 Task, 10 TaskOutput, 11 TaskRun, 12) 13from kiln_ai.datamodel.json_schema import validate_schema 14from kiln_ai.utils.config import Config 15 16from .prompt_builders import BasePromptBuilder, SimplePromptBuilder 17 18 19@dataclass 20class AdapterInfo: 21 adapter_name: str 22 model_name: str 23 model_provider: str 24 prompt_builder_name: str 25 26 27@dataclass 28class RunOutput: 29 output: Dict | str 30 intermediate_outputs: Dict[str, str] | None 31 32 33class BaseAdapter(metaclass=ABCMeta): 34 """Base class for AI model adapters that handle task execution. 35 36 This abstract class provides the foundation for implementing model-specific adapters 37 that can process tasks with structured or unstructured inputs/outputs. It handles 38 input/output validation, prompt building, and run tracking. 39 40 Attributes: 41 prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model 42 kiln_task (Task): The task configuration and metadata 43 output_schema (dict | None): JSON schema for validating structured outputs 44 input_schema (dict | None): JSON schema for validating structured inputs 45 """ 46 47 def __init__( 48 self, kiln_task: Task, prompt_builder: BasePromptBuilder | None = None 49 ): 50 self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task) 51 self.kiln_task = kiln_task 52 self.output_schema = self.kiln_task.output_json_schema 53 self.input_schema = self.kiln_task.input_json_schema 54 55 async def invoke_returning_raw( 56 self, 57 input: Dict | str, 58 input_source: DataSource | None = None, 59 ) -> Dict | str: 60 result = await self.invoke(input, input_source) 61 if self.kiln_task.output_json_schema is None: 62 return result.output.output 63 else: 64 return json.loads(result.output.output) 65 66 async def invoke( 67 self, 68 input: Dict | str, 69 input_source: DataSource | None = None, 70 ) -> TaskRun: 71 # validate input 72 if self.input_schema is not None: 73 if not isinstance(input, dict): 74 raise ValueError(f"structured input is not a dict: {input}") 75 validate_schema(input, self.input_schema) 76 77 # Run 78 run_output = await self._run(input) 79 80 # validate output 81 if self.output_schema is not None: 82 if not isinstance(run_output.output, dict): 83 raise RuntimeError( 84 f"structured response is not a dict: {run_output.output}" 85 ) 86 validate_schema(run_output.output, self.output_schema) 87 else: 88 if not isinstance(run_output.output, str): 89 raise RuntimeError( 90 f"response is not a string for non-structured task: {run_output.output}" 91 ) 92 93 # Generate the run and output 94 run = self.generate_run(input, input_source, run_output) 95 96 # Save the run if configured to do so, and we have a path to save to 97 if Config.shared().autosave_runs and self.kiln_task.path is not None: 98 run.save_to_file() 99 else: 100 # Clear the ID to indicate it's not persisted 101 run.id = None 102 103 return run 104 105 def has_structured_output(self) -> bool: 106 return self.output_schema is not None 107 108 @abstractmethod 109 def adapter_info(self) -> AdapterInfo: 110 pass 111 112 @abstractmethod 113 async def _run(self, input: Dict | str) -> RunOutput: 114 pass 115 116 def build_prompt(self) -> str: 117 return self.prompt_builder.build_prompt() 118 119 # create a run and task output 120 def generate_run( 121 self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput 122 ) -> TaskRun: 123 # Convert input and output to JSON strings if they are dictionaries 124 input_str = json.dumps(input) if isinstance(input, dict) else input 125 output_str = ( 126 json.dumps(run_output.output) 127 if isinstance(run_output.output, dict) 128 else run_output.output 129 ) 130 131 # If no input source is provided, use the human data source 132 if input_source is None: 133 input_source = DataSource( 134 type=DataSourceType.human, 135 properties={"created_by": Config.shared().user_id}, 136 ) 137 138 new_task_run = TaskRun( 139 parent=self.kiln_task, 140 input=input_str, 141 input_source=input_source, 142 output=TaskOutput( 143 output=output_str, 144 # Synthetic since an adapter, not a human, is creating this 145 source=DataSource( 146 type=DataSourceType.synthetic, 147 properties=self._properties_for_task_output(), 148 ), 149 ), 150 intermediate_outputs=run_output.intermediate_outputs, 151 ) 152 153 exclude_fields = { 154 "id": True, 155 "created_at": True, 156 "updated_at": True, 157 "path": True, 158 "output": {"id": True, "created_at": True, "updated_at": True}, 159 } 160 new_run_dump = new_task_run.model_dump(exclude=exclude_fields) 161 162 # Check if the same run already exists 163 existing_task_run = next( 164 ( 165 task_run 166 for task_run in self.kiln_task.runs() 167 if task_run.model_dump(exclude=exclude_fields) == new_run_dump 168 ), 169 None, 170 ) 171 if existing_task_run: 172 return existing_task_run 173 174 return new_task_run 175 176 def _properties_for_task_output(self) -> Dict[str, str | int | float]: 177 props = {} 178 179 # adapter info 180 adapter_info = self.adapter_info() 181 props["adapter_name"] = adapter_info.adapter_name 182 props["model_name"] = adapter_info.model_name 183 props["model_provider"] = adapter_info.model_provider 184 props["prompt_builder_name"] = adapter_info.prompt_builder_name 185 186 return props
@dataclass
class
AdapterInfo:
@dataclass
class
RunOutput:
class
BaseAdapter:
34class BaseAdapter(metaclass=ABCMeta): 35 """Base class for AI model adapters that handle task execution. 36 37 This abstract class provides the foundation for implementing model-specific adapters 38 that can process tasks with structured or unstructured inputs/outputs. It handles 39 input/output validation, prompt building, and run tracking. 40 41 Attributes: 42 prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model 43 kiln_task (Task): The task configuration and metadata 44 output_schema (dict | None): JSON schema for validating structured outputs 45 input_schema (dict | None): JSON schema for validating structured inputs 46 """ 47 48 def __init__( 49 self, kiln_task: Task, prompt_builder: BasePromptBuilder | None = None 50 ): 51 self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task) 52 self.kiln_task = kiln_task 53 self.output_schema = self.kiln_task.output_json_schema 54 self.input_schema = self.kiln_task.input_json_schema 55 56 async def invoke_returning_raw( 57 self, 58 input: Dict | str, 59 input_source: DataSource | None = None, 60 ) -> Dict | str: 61 result = await self.invoke(input, input_source) 62 if self.kiln_task.output_json_schema is None: 63 return result.output.output 64 else: 65 return json.loads(result.output.output) 66 67 async def invoke( 68 self, 69 input: Dict | str, 70 input_source: DataSource | None = None, 71 ) -> TaskRun: 72 # validate input 73 if self.input_schema is not None: 74 if not isinstance(input, dict): 75 raise ValueError(f"structured input is not a dict: {input}") 76 validate_schema(input, self.input_schema) 77 78 # Run 79 run_output = await self._run(input) 80 81 # validate output 82 if self.output_schema is not None: 83 if not isinstance(run_output.output, dict): 84 raise RuntimeError( 85 f"structured response is not a dict: {run_output.output}" 86 ) 87 validate_schema(run_output.output, self.output_schema) 88 else: 89 if not isinstance(run_output.output, str): 90 raise RuntimeError( 91 f"response is not a string for non-structured task: {run_output.output}" 92 ) 93 94 # Generate the run and output 95 run = self.generate_run(input, input_source, run_output) 96 97 # Save the run if configured to do so, and we have a path to save to 98 if Config.shared().autosave_runs and self.kiln_task.path is not None: 99 run.save_to_file() 100 else: 101 # Clear the ID to indicate it's not persisted 102 run.id = None 103 104 return run 105 106 def has_structured_output(self) -> bool: 107 return self.output_schema is not None 108 109 @abstractmethod 110 def adapter_info(self) -> AdapterInfo: 111 pass 112 113 @abstractmethod 114 async def _run(self, input: Dict | str) -> RunOutput: 115 pass 116 117 def build_prompt(self) -> str: 118 return self.prompt_builder.build_prompt() 119 120 # create a run and task output 121 def generate_run( 122 self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput 123 ) -> TaskRun: 124 # Convert input and output to JSON strings if they are dictionaries 125 input_str = json.dumps(input) if isinstance(input, dict) else input 126 output_str = ( 127 json.dumps(run_output.output) 128 if isinstance(run_output.output, dict) 129 else run_output.output 130 ) 131 132 # If no input source is provided, use the human data source 133 if input_source is None: 134 input_source = DataSource( 135 type=DataSourceType.human, 136 properties={"created_by": Config.shared().user_id}, 137 ) 138 139 new_task_run = TaskRun( 140 parent=self.kiln_task, 141 input=input_str, 142 input_source=input_source, 143 output=TaskOutput( 144 output=output_str, 145 # Synthetic since an adapter, not a human, is creating this 146 source=DataSource( 147 type=DataSourceType.synthetic, 148 properties=self._properties_for_task_output(), 149 ), 150 ), 151 intermediate_outputs=run_output.intermediate_outputs, 152 ) 153 154 exclude_fields = { 155 "id": True, 156 "created_at": True, 157 "updated_at": True, 158 "path": True, 159 "output": {"id": True, "created_at": True, "updated_at": True}, 160 } 161 new_run_dump = new_task_run.model_dump(exclude=exclude_fields) 162 163 # Check if the same run already exists 164 existing_task_run = next( 165 ( 166 task_run 167 for task_run in self.kiln_task.runs() 168 if task_run.model_dump(exclude=exclude_fields) == new_run_dump 169 ), 170 None, 171 ) 172 if existing_task_run: 173 return existing_task_run 174 175 return new_task_run 176 177 def _properties_for_task_output(self) -> Dict[str, str | int | float]: 178 props = {} 179 180 # adapter info 181 adapter_info = self.adapter_info() 182 props["adapter_name"] = adapter_info.adapter_name 183 props["model_name"] = adapter_info.model_name 184 props["model_provider"] = adapter_info.model_provider 185 props["prompt_builder_name"] = adapter_info.prompt_builder_name 186 187 return props
Base class for AI model adapters that handle task execution.
This abstract class provides the foundation for implementing model-specific adapters that can process tasks with structured or unstructured inputs/outputs. It handles input/output validation, prompt building, and run tracking.
Attributes: prompt_builder (BasePromptBuilder): Builder for constructing prompts for the model kiln_task (Task): The task configuration and metadata output_schema (dict | None): JSON schema for validating structured outputs input_schema (dict | None): JSON schema for validating structured inputs
async def
invoke_returning_raw( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> Union[Dict, str]:
56 async def invoke_returning_raw( 57 self, 58 input: Dict | str, 59 input_source: DataSource | None = None, 60 ) -> Dict | str: 61 result = await self.invoke(input, input_source) 62 if self.kiln_task.output_json_schema is None: 63 return result.output.output 64 else: 65 return json.loads(result.output.output)
async def
invoke( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None = None) -> kiln_ai.datamodel.TaskRun:
67 async def invoke( 68 self, 69 input: Dict | str, 70 input_source: DataSource | None = None, 71 ) -> TaskRun: 72 # validate input 73 if self.input_schema is not None: 74 if not isinstance(input, dict): 75 raise ValueError(f"structured input is not a dict: {input}") 76 validate_schema(input, self.input_schema) 77 78 # Run 79 run_output = await self._run(input) 80 81 # validate output 82 if self.output_schema is not None: 83 if not isinstance(run_output.output, dict): 84 raise RuntimeError( 85 f"structured response is not a dict: {run_output.output}" 86 ) 87 validate_schema(run_output.output, self.output_schema) 88 else: 89 if not isinstance(run_output.output, str): 90 raise RuntimeError( 91 f"response is not a string for non-structured task: {run_output.output}" 92 ) 93 94 # Generate the run and output 95 run = self.generate_run(input, input_source, run_output) 96 97 # Save the run if configured to do so, and we have a path to save to 98 if Config.shared().autosave_runs and self.kiln_task.path is not None: 99 run.save_to_file() 100 else: 101 # Clear the ID to indicate it's not persisted 102 run.id = None 103 104 return run
def
generate_run( self, input: Union[Dict, str], input_source: kiln_ai.datamodel.DataSource | None, run_output: RunOutput) -> kiln_ai.datamodel.TaskRun:
121 def generate_run( 122 self, input: Dict | str, input_source: DataSource | None, run_output: RunOutput 123 ) -> TaskRun: 124 # Convert input and output to JSON strings if they are dictionaries 125 input_str = json.dumps(input) if isinstance(input, dict) else input 126 output_str = ( 127 json.dumps(run_output.output) 128 if isinstance(run_output.output, dict) 129 else run_output.output 130 ) 131 132 # If no input source is provided, use the human data source 133 if input_source is None: 134 input_source = DataSource( 135 type=DataSourceType.human, 136 properties={"created_by": Config.shared().user_id}, 137 ) 138 139 new_task_run = TaskRun( 140 parent=self.kiln_task, 141 input=input_str, 142 input_source=input_source, 143 output=TaskOutput( 144 output=output_str, 145 # Synthetic since an adapter, not a human, is creating this 146 source=DataSource( 147 type=DataSourceType.synthetic, 148 properties=self._properties_for_task_output(), 149 ), 150 ), 151 intermediate_outputs=run_output.intermediate_outputs, 152 ) 153 154 exclude_fields = { 155 "id": True, 156 "created_at": True, 157 "updated_at": True, 158 "path": True, 159 "output": {"id": True, "created_at": True, "updated_at": True}, 160 } 161 new_run_dump = new_task_run.model_dump(exclude=exclude_fields) 162 163 # Check if the same run already exists 164 existing_task_run = next( 165 ( 166 task_run 167 for task_run in self.kiln_task.runs() 168 if task_run.model_dump(exclude=exclude_fields) == new_run_dump 169 ), 170 None, 171 ) 172 if existing_task_run: 173 return existing_task_run 174 175 return new_task_run