kiln_ai.datamodel.basemodel
1import json 2import os 3import re 4import shutil 5import uuid 6from abc import ABCMeta 7from builtins import classmethod 8from datetime import datetime 9from pathlib import Path 10from typing import ( 11 Any, 12 Dict, 13 List, 14 Optional, 15 Type, 16 TypeVar, 17) 18 19from pydantic import ( 20 BaseModel, 21 ConfigDict, 22 Field, 23 ValidationError, 24 ValidationInfo, 25 computed_field, 26 model_validator, 27) 28from pydantic_core import ErrorDetails 29from typing_extensions import Self 30 31from kiln_ai.datamodel.model_cache import ModelCache 32from kiln_ai.utils.config import Config 33from kiln_ai.utils.formatting import snake_case 34 35# ID is a 12 digit random integer string. 36# Should be unique per item, at least inside the context of a parent/child relationship. 37# Use integers to make it easier to type for a search function. 38# Allow none, even though we generate it, because we clear it in the REST API if the object is ephemeral (not persisted to disk) 39ID_FIELD = Field(default_factory=lambda: str(uuid.uuid4().int)[:12]) 40ID_TYPE = Optional[str] 41T = TypeVar("T", bound="KilnBaseModel") 42PT = TypeVar("PT", bound="KilnParentedModel") 43 44 45# Naming conventions: 46# 1) Names are filename safe as they may be used as file names. They are informational and not to be used in prompts/training/validation. 47# 2) Descrptions are for Kiln users to describe/understanding the purpose of this object. They must never be used in prompts/training/validation. Use "instruction/requirements" instead. 48 49# Filename compatible names 50NAME_REGEX = r"^[A-Za-z0-9 _-]+$" 51NAME_FIELD = Field( 52 min_length=1, 53 max_length=120, 54 pattern=NAME_REGEX, 55 description="A name for this entity.", 56) 57SHORT_NAME_FIELD = Field( 58 min_length=1, 59 max_length=32, 60 pattern=NAME_REGEX, 61 description="A name for this entity", 62) 63 64 65def string_to_valid_name(name: str) -> str: 66 # Replace any character not allowed by NAME_REGEX with an underscore 67 valid_name = re.sub(r"[^A-Za-z0-9 _-]", "_", name) 68 # Replace consecutive underscores with a single underscore 69 valid_name = re.sub(r"_+", "_", valid_name) 70 # Remove leading and trailing underscores or whitespace 71 return valid_name.strip("_").strip() 72 73 74class KilnBaseModel(BaseModel): 75 """Base model for all Kiln data models with common functionality for persistence and versioning. 76 77 Attributes: 78 v (int): Schema version number for migration support 79 id (str): Unique identifier for the model instance 80 path (Path): File system path where the model is stored 81 created_at (datetime): Timestamp when the model was created 82 created_by (str): User ID of the creator 83 """ 84 85 model_config = ConfigDict(validate_assignment=True) 86 87 v: int = Field(default=1) # schema_version 88 id: ID_TYPE = ID_FIELD 89 path: Optional[Path] = Field(default=None) 90 created_at: datetime = Field(default_factory=datetime.now) 91 created_by: str = Field(default_factory=lambda: Config.shared().user_id) 92 93 _loaded_from_file: bool = False 94 95 @computed_field() 96 def model_type(self) -> str: 97 return self.type_name() 98 99 # if changing the model name, should keep the original name here for parsing old files 100 @classmethod 101 def type_name(cls) -> str: 102 return snake_case(cls.__name__) 103 104 # used as /obj_folder/base_filename.kiln 105 @classmethod 106 def base_filename(cls) -> str: 107 return cls.type_name() + ".kiln" 108 109 @classmethod 110 def load_from_folder(cls: Type[T], folderPath: Path) -> T: 111 """Load a model instance from a folder using the default filename. 112 113 Args: 114 folderPath (Path): Directory path containing the model file 115 116 Returns: 117 T: Instance of the model 118 """ 119 path = folderPath / cls.base_filename() 120 return cls.load_from_file(path) 121 122 @classmethod 123 def load_from_file(cls: Type[T], path: Path | str) -> T: 124 """Load a model instance from a specific file path. 125 126 Args: 127 path (Path): Path to the model file 128 129 Returns: 130 T: Instance of the model 131 132 Raises: 133 ValueError: If the loaded model is not of the expected type or version 134 FileNotFoundError: If the file does not exist 135 """ 136 if isinstance(path, str): 137 path = Path(path) 138 cached_model = ModelCache.shared().get_model(path, cls) 139 if cached_model is not None: 140 return cached_model 141 with open(path, "r") as file: 142 # modified time of file for cache invalidation. From file descriptor so it's atomic w read. 143 mtime_ns = os.fstat(file.fileno()).st_mtime_ns 144 file_data = file.read() 145 # TODO P2 perf: parsing the JSON twice here. 146 # Once for model_type, once for model. Can't call model_validate with parsed json because enum types break; they get strings instead of enums. 147 parsed_json = json.loads(file_data) 148 m = cls.model_validate_json( 149 file_data, 150 strict=True, 151 context={"loading_from_file": True}, 152 ) 153 if not isinstance(m, cls): 154 raise ValueError(f"Loaded model is not of type {cls.__name__}") 155 m._loaded_from_file = True 156 file_data = None 157 m.path = path 158 if m.v > m.max_schema_version(): 159 raise ValueError( 160 f"Cannot load from file because the schema version is higher than the current version. Upgrade kiln to the latest version. " 161 f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, " 162 f"version: {m.v}, max version: {m.max_schema_version()}" 163 ) 164 if parsed_json["model_type"] != cls.type_name(): 165 raise ValueError( 166 f"Cannot load from file because the model type is incorrect. Expected {cls.type_name()}, got {parsed_json['model_type']}. " 167 f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, " 168 f"version: {m.v}, max version: {m.max_schema_version()}" 169 ) 170 ModelCache.shared().set_model(path, m, mtime_ns) 171 return m 172 173 def loaded_from_file(self, info: ValidationInfo | None = None) -> bool: 174 # Two methods of indicated it's loaded from file: 175 # 1) info.context.get("loading_from_file") -> During actual loading, before we can set _loaded_from_file 176 # 2) self._loaded_from_file -> After loading, set by the loader 177 if ( 178 info is not None 179 and info.context is not None 180 and info.context.get("loading_from_file", False) 181 ): 182 return True 183 return self._loaded_from_file 184 185 def save_to_file(self) -> None: 186 """Save the model instance to a file. 187 188 Raises: 189 ValueError: If the path is not set 190 """ 191 path = self.build_path() 192 if path is None: 193 raise ValueError( 194 f"Cannot save to file because 'path' is not set. Class: {self.__class__.__name__}, " 195 f"id: {getattr(self, 'id', None)}, path: {path}" 196 ) 197 path.parent.mkdir(parents=True, exist_ok=True) 198 json_data = self.model_dump_json(indent=2, exclude={"path"}) 199 with open(path, "w") as file: 200 file.write(json_data) 201 # save the path so even if something like name changes, the file doesn't move 202 self.path = path 203 # We could save, but invalidating will trigger load on next use. 204 # This ensures everything in cache is loaded from disk, and the cache perfectly reflects what's on disk 205 ModelCache.shared().invalidate(path) 206 207 def delete(self) -> None: 208 if self.path is None: 209 raise ValueError("Cannot delete model because path is not set") 210 dir_path = self.path.parent if self.path.is_file() else self.path 211 if dir_path is None: 212 raise ValueError("Cannot delete model because path is not set") 213 shutil.rmtree(dir_path) 214 ModelCache.shared().invalidate(self.path) 215 self.path = None 216 217 def build_path(self) -> Path | None: 218 if self.path is not None: 219 return self.path 220 return None 221 222 # increment for breaking changes 223 def max_schema_version(self) -> int: 224 return 1 225 226 227class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta): 228 """Base model for Kiln models that have a parent-child relationship. This base class is for child models. 229 230 This class provides functionality for managing hierarchical relationships between models, 231 including parent reference handling and file system organization. 232 233 Attributes: 234 parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory. 235 """ 236 237 # Parent is an in memory only reference to parent. If it's set we use that. If not we'll try to load it from disk based on the path. 238 # We don't persist the parent reference to disk. See the accessors below for how we make it a clean api (parent accessor will lazy load from disk) 239 parent: Optional[KilnBaseModel] = Field(default=None, exclude=True) 240 241 def __getattribute__(self, name: str) -> Any: 242 if name == "parent": 243 return self.load_parent() 244 return super().__getattribute__(name) 245 246 def cached_parent(self) -> Optional[KilnBaseModel]: 247 return object.__getattribute__(self, "parent") 248 249 def load_parent(self) -> Optional[KilnBaseModel]: 250 """Get the parent model instance, loading it from disk if necessary. 251 252 Returns: 253 Optional[KilnBaseModel]: The parent model instance or None if not set 254 """ 255 cached_parent = self.cached_parent() 256 if cached_parent is not None: 257 return cached_parent 258 259 # lazy load parent from path 260 if self.path is None: 261 return None 262 # Note: this only works with base_filename. If we every support custom names, we need to change this. 263 parent_path = ( 264 self.path.parent.parent.parent 265 / self.__class__.parent_type().base_filename() 266 ) 267 if parent_path is None: 268 return None 269 loaded_parent = self.__class__.parent_type().load_from_file(parent_path) 270 self.parent = loaded_parent 271 return loaded_parent 272 273 # Dynamically implemented by KilnParentModel method injection 274 @classmethod 275 def relationship_name(cls) -> str: 276 raise NotImplementedError("Relationship name must be implemented") 277 278 # Dynamically implemented by KilnParentModel method injection 279 @classmethod 280 def parent_type(cls) -> Type[KilnBaseModel]: 281 raise NotImplementedError("Parent type must be implemented") 282 283 @model_validator(mode="after") 284 def check_parent_type(self) -> Self: 285 cached_parent = self.cached_parent() 286 if cached_parent is not None: 287 expected_parent_type = self.__class__.parent_type() 288 if not isinstance(cached_parent, expected_parent_type): 289 raise ValueError( 290 f"Parent must be of type {expected_parent_type}, but was {type(cached_parent)}" 291 ) 292 return self 293 294 def build_child_dirname(self) -> Path: 295 # Default implementation for readable folder names. 296 # {id} - {name}/{type}.kiln 297 if self.id is None: 298 # consider generating an ID here. But if it's been cleared, we've already used this without one so raise for now. 299 raise ValueError("ID is not set - can not save or build path") 300 path = self.id 301 name = getattr(self, "name", None) 302 if name is not None: 303 path = f"{path} - {name[:32]}" 304 return Path(path) 305 306 def build_path(self) -> Path | None: 307 # if specifically loaded from an existing path, keep that no matter what 308 # this ensures the file structure is easy to use with git/version control 309 # and that changes to things like name (which impacts default path) don't leave dangling files 310 if self.path is not None: 311 return self.path 312 # Build a path under parent_folder/relationship/file.kiln 313 if self.parent is None: 314 return None 315 parent_path = self.parent.build_path() 316 if parent_path is None: 317 return None 318 parent_folder = parent_path.parent 319 if parent_folder is None: 320 return None 321 return ( 322 parent_folder 323 / self.__class__.relationship_name() 324 / self.build_child_dirname() 325 / self.__class__.base_filename() 326 ) 327 328 @classmethod 329 def iterate_children_paths_of_parent_path(cls: Type[PT], parent_path: Path | None): 330 if parent_path is None: 331 # children are disk based. If not saved, they don't exist 332 return [] 333 334 # Determine the parent folder 335 if parent_path.is_file(): 336 parent_folder = parent_path.parent 337 else: 338 parent_folder = parent_path 339 340 parent = cls.parent_type().load_from_file(parent_path) 341 if parent is None: 342 raise ValueError("Parent must be set to load children") 343 344 # Ignore type error: this is abstract base class, but children must implement relationship_name 345 relationship_folder = parent_folder / Path(cls.relationship_name()) # type: ignore 346 347 if not relationship_folder.exists() or not relationship_folder.is_dir(): 348 return [] 349 350 # Collect all /relationship/{id}/{base_filename.kiln} files in the relationship folder 351 for child_file in relationship_folder.glob(f"**/{cls.base_filename()}"): 352 yield child_file 353 354 @classmethod 355 def all_children_of_parent_path( 356 cls: Type[PT], parent_path: Path | None 357 ) -> list[PT]: 358 children = [] 359 for child_path in cls.iterate_children_paths_of_parent_path(parent_path): 360 children.append(cls.load_from_file(child_path)) 361 return children 362 363 @classmethod 364 def from_id_and_parent_path( 365 cls: Type[PT], id: str, parent_path: Path | None 366 ) -> PT | None: 367 """ 368 Fast search by ID using the cache. Avoids the model_copy overhead on all but the exact match. 369 370 Uses cache so still slow on first load. 371 """ 372 if parent_path is None: 373 return None 374 375 # Note: we're using the in-file ID. We could make this faster using the path-ID if this becomes perf bottleneck, but it's better to have 1 source of truth. 376 for child_path in cls.iterate_children_paths_of_parent_path(parent_path): 377 child_id = ModelCache.shared().get_model_id(child_path, cls) 378 if child_id == id: 379 return cls.load_from_file(child_path) 380 if child_id is None: 381 child = cls.load_from_file(child_path) 382 if child.id == id: 383 return child 384 return None 385 386 387# Parent create methods for all child relationships 388# You must pass in parent_of in the subclass definition, defining the child relationships 389class KilnParentModel(KilnBaseModel, metaclass=ABCMeta): 390 """Base model for Kiln models that can have child models. 391 392 This class provides functionality for managing collections of child models and their persistence. 393 Child relationships must be defined using the parent_of parameter in the class definition. 394 395 Args: 396 parent_of (Dict[str, Type[KilnParentedModel]]): Mapping of relationship names to child model types 397 """ 398 399 @classmethod 400 def _create_child_method( 401 cls, relationship_name: str, child_class: Type[KilnParentedModel] 402 ): 403 def child_method(self) -> list[child_class]: 404 return child_class.all_children_of_parent_path(self.path) 405 406 child_method.__name__ = relationship_name 407 child_method.__annotations__ = {"return": List[child_class]} 408 setattr(cls, relationship_name, child_method) 409 410 @classmethod 411 def _create_parent_methods( 412 cls, targetCls: Type[KilnParentedModel], relationship_name: str 413 ): 414 def parent_class_method() -> Type[KilnParentModel]: 415 return cls 416 417 parent_class_method.__name__ = "parent_type" 418 parent_class_method.__annotations__ = {"return": Type[KilnParentModel]} 419 setattr(targetCls, "parent_type", parent_class_method) 420 421 def relationship_name_method() -> str: 422 return relationship_name 423 424 relationship_name_method.__name__ = "relationship_name" 425 relationship_name_method.__annotations__ = {"return": str} 426 setattr(targetCls, "relationship_name", relationship_name_method) 427 428 @classmethod 429 def __init_subclass__(cls, parent_of: Dict[str, Type[KilnParentedModel]], **kwargs): 430 super().__init_subclass__(**kwargs) 431 cls._parent_of = parent_of 432 for relationship_name, child_class in parent_of.items(): 433 cls._create_child_method(relationship_name, child_class) 434 cls._create_parent_methods(child_class, relationship_name) 435 436 @classmethod 437 def validate_and_save_with_subrelations( 438 cls, 439 data: Dict[str, Any], 440 path: Path | None = None, 441 parent: KilnBaseModel | None = None, 442 ): 443 """Validate and save a model instance along with all its nested child relationships. 444 445 Args: 446 data (Dict[str, Any]): Model data including child relationships 447 path (Path, optional): Path where the model should be saved 448 parent (KilnBaseModel, optional): Parent model instance for parented models 449 450 Returns: 451 KilnParentModel: The validated and saved model instance 452 453 Raises: 454 ValidationError: If validation fails for the model or any of its children 455 """ 456 # Validate first, then save. Don't want error half way through, and partly persisted 457 # TODO P2: save to tmp dir, then move atomically. But need to merge directories so later. 458 cls._validate_nested(data, save=False, path=path, parent=parent) 459 instance = cls._validate_nested(data, save=True, path=path, parent=parent) 460 return instance 461 462 @classmethod 463 def _validate_nested( 464 cls, 465 data: Dict[str, Any], 466 save: bool = False, 467 parent: KilnBaseModel | None = None, 468 path: Path | None = None, 469 ): 470 # Collect all validation errors so we can report them all at once 471 validation_errors = [] 472 473 try: 474 instance = cls.model_validate(data, strict=True) 475 if path is not None: 476 instance.path = path 477 if parent is not None and isinstance(instance, KilnParentedModel): 478 instance.parent = parent 479 if save: 480 instance.save_to_file() 481 except ValidationError as e: 482 instance = None 483 for suberror in e.errors(): 484 validation_errors.append(suberror) 485 486 for key, value_list in data.items(): 487 if key in cls._parent_of: 488 parent_type = cls._parent_of[key] 489 if not isinstance(value_list, list): 490 raise ValueError( 491 f"Expected a list for {key}, but got {type(value_list)}" 492 ) 493 for value_index, value in enumerate(value_list): 494 try: 495 if issubclass(parent_type, KilnParentModel): 496 kwargs = {"data": value, "save": save} 497 if instance is not None: 498 kwargs["parent"] = instance 499 parent_type._validate_nested(**kwargs) 500 elif issubclass(parent_type, KilnParentedModel): 501 # Root node 502 subinstance = parent_type.model_validate(value, strict=True) 503 if instance is not None: 504 subinstance.parent = instance 505 if save: 506 subinstance.save_to_file() 507 else: 508 raise ValueError( 509 f"Invalid type {parent_type}. Should be KilnBaseModel based." 510 ) 511 except ValidationError as e: 512 for suberror in e.errors(): 513 cls._append_loc(suberror, key, value_index) 514 validation_errors.append(suberror) 515 516 if len(validation_errors) > 0: 517 raise ValidationError.from_exception_data( 518 title=f"Validation failed for {cls.__name__}", 519 line_errors=validation_errors, 520 input_type="json", 521 ) 522 523 return instance 524 525 @classmethod 526 def _append_loc( 527 cls, error: ErrorDetails, current_loc: str, value_index: int | None = None 528 ): 529 orig_loc = error["loc"] if "loc" in error else None 530 new_loc: list[str | int] = [current_loc] 531 if value_index is not None: 532 new_loc.append(value_index) 533 if isinstance(orig_loc, tuple): 534 new_loc.extend(list(orig_loc)) 535 elif isinstance(orig_loc, list): 536 new_loc.extend(orig_loc) 537 error["loc"] = tuple(new_loc)
66def string_to_valid_name(name: str) -> str: 67 # Replace any character not allowed by NAME_REGEX with an underscore 68 valid_name = re.sub(r"[^A-Za-z0-9 _-]", "_", name) 69 # Replace consecutive underscores with a single underscore 70 valid_name = re.sub(r"_+", "_", valid_name) 71 # Remove leading and trailing underscores or whitespace 72 return valid_name.strip("_").strip()
75class KilnBaseModel(BaseModel): 76 """Base model for all Kiln data models with common functionality for persistence and versioning. 77 78 Attributes: 79 v (int): Schema version number for migration support 80 id (str): Unique identifier for the model instance 81 path (Path): File system path where the model is stored 82 created_at (datetime): Timestamp when the model was created 83 created_by (str): User ID of the creator 84 """ 85 86 model_config = ConfigDict(validate_assignment=True) 87 88 v: int = Field(default=1) # schema_version 89 id: ID_TYPE = ID_FIELD 90 path: Optional[Path] = Field(default=None) 91 created_at: datetime = Field(default_factory=datetime.now) 92 created_by: str = Field(default_factory=lambda: Config.shared().user_id) 93 94 _loaded_from_file: bool = False 95 96 @computed_field() 97 def model_type(self) -> str: 98 return self.type_name() 99 100 # if changing the model name, should keep the original name here for parsing old files 101 @classmethod 102 def type_name(cls) -> str: 103 return snake_case(cls.__name__) 104 105 # used as /obj_folder/base_filename.kiln 106 @classmethod 107 def base_filename(cls) -> str: 108 return cls.type_name() + ".kiln" 109 110 @classmethod 111 def load_from_folder(cls: Type[T], folderPath: Path) -> T: 112 """Load a model instance from a folder using the default filename. 113 114 Args: 115 folderPath (Path): Directory path containing the model file 116 117 Returns: 118 T: Instance of the model 119 """ 120 path = folderPath / cls.base_filename() 121 return cls.load_from_file(path) 122 123 @classmethod 124 def load_from_file(cls: Type[T], path: Path | str) -> T: 125 """Load a model instance from a specific file path. 126 127 Args: 128 path (Path): Path to the model file 129 130 Returns: 131 T: Instance of the model 132 133 Raises: 134 ValueError: If the loaded model is not of the expected type or version 135 FileNotFoundError: If the file does not exist 136 """ 137 if isinstance(path, str): 138 path = Path(path) 139 cached_model = ModelCache.shared().get_model(path, cls) 140 if cached_model is not None: 141 return cached_model 142 with open(path, "r") as file: 143 # modified time of file for cache invalidation. From file descriptor so it's atomic w read. 144 mtime_ns = os.fstat(file.fileno()).st_mtime_ns 145 file_data = file.read() 146 # TODO P2 perf: parsing the JSON twice here. 147 # Once for model_type, once for model. Can't call model_validate with parsed json because enum types break; they get strings instead of enums. 148 parsed_json = json.loads(file_data) 149 m = cls.model_validate_json( 150 file_data, 151 strict=True, 152 context={"loading_from_file": True}, 153 ) 154 if not isinstance(m, cls): 155 raise ValueError(f"Loaded model is not of type {cls.__name__}") 156 m._loaded_from_file = True 157 file_data = None 158 m.path = path 159 if m.v > m.max_schema_version(): 160 raise ValueError( 161 f"Cannot load from file because the schema version is higher than the current version. Upgrade kiln to the latest version. " 162 f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, " 163 f"version: {m.v}, max version: {m.max_schema_version()}" 164 ) 165 if parsed_json["model_type"] != cls.type_name(): 166 raise ValueError( 167 f"Cannot load from file because the model type is incorrect. Expected {cls.type_name()}, got {parsed_json['model_type']}. " 168 f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, " 169 f"version: {m.v}, max version: {m.max_schema_version()}" 170 ) 171 ModelCache.shared().set_model(path, m, mtime_ns) 172 return m 173 174 def loaded_from_file(self, info: ValidationInfo | None = None) -> bool: 175 # Two methods of indicated it's loaded from file: 176 # 1) info.context.get("loading_from_file") -> During actual loading, before we can set _loaded_from_file 177 # 2) self._loaded_from_file -> After loading, set by the loader 178 if ( 179 info is not None 180 and info.context is not None 181 and info.context.get("loading_from_file", False) 182 ): 183 return True 184 return self._loaded_from_file 185 186 def save_to_file(self) -> None: 187 """Save the model instance to a file. 188 189 Raises: 190 ValueError: If the path is not set 191 """ 192 path = self.build_path() 193 if path is None: 194 raise ValueError( 195 f"Cannot save to file because 'path' is not set. Class: {self.__class__.__name__}, " 196 f"id: {getattr(self, 'id', None)}, path: {path}" 197 ) 198 path.parent.mkdir(parents=True, exist_ok=True) 199 json_data = self.model_dump_json(indent=2, exclude={"path"}) 200 with open(path, "w") as file: 201 file.write(json_data) 202 # save the path so even if something like name changes, the file doesn't move 203 self.path = path 204 # We could save, but invalidating will trigger load on next use. 205 # This ensures everything in cache is loaded from disk, and the cache perfectly reflects what's on disk 206 ModelCache.shared().invalidate(path) 207 208 def delete(self) -> None: 209 if self.path is None: 210 raise ValueError("Cannot delete model because path is not set") 211 dir_path = self.path.parent if self.path.is_file() else self.path 212 if dir_path is None: 213 raise ValueError("Cannot delete model because path is not set") 214 shutil.rmtree(dir_path) 215 ModelCache.shared().invalidate(self.path) 216 self.path = None 217 218 def build_path(self) -> Path | None: 219 if self.path is not None: 220 return self.path 221 return None 222 223 # increment for breaking changes 224 def max_schema_version(self) -> int: 225 return 1
Base model for all Kiln data models with common functionality for persistence and versioning.
Attributes: v (int): Schema version number for migration support id (str): Unique identifier for the model instance path (Path): File system path where the model is stored created_at (datetime): Timestamp when the model was created created_by (str): User ID of the creator
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
110 @classmethod 111 def load_from_folder(cls: Type[T], folderPath: Path) -> T: 112 """Load a model instance from a folder using the default filename. 113 114 Args: 115 folderPath (Path): Directory path containing the model file 116 117 Returns: 118 T: Instance of the model 119 """ 120 path = folderPath / cls.base_filename() 121 return cls.load_from_file(path)
Load a model instance from a folder using the default filename.
Args: folderPath (Path): Directory path containing the model file
Returns: T: Instance of the model
123 @classmethod 124 def load_from_file(cls: Type[T], path: Path | str) -> T: 125 """Load a model instance from a specific file path. 126 127 Args: 128 path (Path): Path to the model file 129 130 Returns: 131 T: Instance of the model 132 133 Raises: 134 ValueError: If the loaded model is not of the expected type or version 135 FileNotFoundError: If the file does not exist 136 """ 137 if isinstance(path, str): 138 path = Path(path) 139 cached_model = ModelCache.shared().get_model(path, cls) 140 if cached_model is not None: 141 return cached_model 142 with open(path, "r") as file: 143 # modified time of file for cache invalidation. From file descriptor so it's atomic w read. 144 mtime_ns = os.fstat(file.fileno()).st_mtime_ns 145 file_data = file.read() 146 # TODO P2 perf: parsing the JSON twice here. 147 # Once for model_type, once for model. Can't call model_validate with parsed json because enum types break; they get strings instead of enums. 148 parsed_json = json.loads(file_data) 149 m = cls.model_validate_json( 150 file_data, 151 strict=True, 152 context={"loading_from_file": True}, 153 ) 154 if not isinstance(m, cls): 155 raise ValueError(f"Loaded model is not of type {cls.__name__}") 156 m._loaded_from_file = True 157 file_data = None 158 m.path = path 159 if m.v > m.max_schema_version(): 160 raise ValueError( 161 f"Cannot load from file because the schema version is higher than the current version. Upgrade kiln to the latest version. " 162 f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, " 163 f"version: {m.v}, max version: {m.max_schema_version()}" 164 ) 165 if parsed_json["model_type"] != cls.type_name(): 166 raise ValueError( 167 f"Cannot load from file because the model type is incorrect. Expected {cls.type_name()}, got {parsed_json['model_type']}. " 168 f"Class: {m.__class__.__name__}, id: {getattr(m, 'id', None)}, path: {path}, " 169 f"version: {m.v}, max version: {m.max_schema_version()}" 170 ) 171 ModelCache.shared().set_model(path, m, mtime_ns) 172 return m
Load a model instance from a specific file path.
Args: path (Path): Path to the model file
Returns: T: Instance of the model
Raises: ValueError: If the loaded model is not of the expected type or version FileNotFoundError: If the file does not exist
174 def loaded_from_file(self, info: ValidationInfo | None = None) -> bool: 175 # Two methods of indicated it's loaded from file: 176 # 1) info.context.get("loading_from_file") -> During actual loading, before we can set _loaded_from_file 177 # 2) self._loaded_from_file -> After loading, set by the loader 178 if ( 179 info is not None 180 and info.context is not None 181 and info.context.get("loading_from_file", False) 182 ): 183 return True 184 return self._loaded_from_file
186 def save_to_file(self) -> None: 187 """Save the model instance to a file. 188 189 Raises: 190 ValueError: If the path is not set 191 """ 192 path = self.build_path() 193 if path is None: 194 raise ValueError( 195 f"Cannot save to file because 'path' is not set. Class: {self.__class__.__name__}, " 196 f"id: {getattr(self, 'id', None)}, path: {path}" 197 ) 198 path.parent.mkdir(parents=True, exist_ok=True) 199 json_data = self.model_dump_json(indent=2, exclude={"path"}) 200 with open(path, "w") as file: 201 file.write(json_data) 202 # save the path so even if something like name changes, the file doesn't move 203 self.path = path 204 # We could save, but invalidating will trigger load on next use. 205 # This ensures everything in cache is loaded from disk, and the cache perfectly reflects what's on disk 206 ModelCache.shared().invalidate(path)
Save the model instance to a file.
Raises: ValueError: If the path is not set
208 def delete(self) -> None: 209 if self.path is None: 210 raise ValueError("Cannot delete model because path is not set") 211 dir_path = self.path.parent if self.path.is_file() else self.path 212 if dir_path is None: 213 raise ValueError("Cannot delete model because path is not set") 214 shutil.rmtree(dir_path) 215 ModelCache.shared().invalidate(self.path) 216 self.path = None
384def init_private_attributes(self: BaseModel, context: Any, /) -> None: 385 """This function is meant to behave like a BaseModel method to initialise private attributes. 386 387 It takes context as an argument since that's what pydantic-core passes when calling it. 388 389 Args: 390 self: The BaseModel instance. 391 context: The context. 392 """ 393 if getattr(self, '__pydantic_private__', None) is None: 394 pydantic_private = {} 395 for name, private_attr in self.__private_attributes__.items(): 396 default = private_attr.get_default() 397 if default is not PydanticUndefined: 398 pydantic_private[name] = default 399 object_setattr(self, '__pydantic_private__', pydantic_private)
This function is meant to behave like a BaseModel method to initialise private attributes.
It takes context as an argument since that's what pydantic-core passes when calling it.
Args: self: The BaseModel instance. context: The context.
228class KilnParentedModel(KilnBaseModel, metaclass=ABCMeta): 229 """Base model for Kiln models that have a parent-child relationship. This base class is for child models. 230 231 This class provides functionality for managing hierarchical relationships between models, 232 including parent reference handling and file system organization. 233 234 Attributes: 235 parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory. 236 """ 237 238 # Parent is an in memory only reference to parent. If it's set we use that. If not we'll try to load it from disk based on the path. 239 # We don't persist the parent reference to disk. See the accessors below for how we make it a clean api (parent accessor will lazy load from disk) 240 parent: Optional[KilnBaseModel] = Field(default=None, exclude=True) 241 242 def __getattribute__(self, name: str) -> Any: 243 if name == "parent": 244 return self.load_parent() 245 return super().__getattribute__(name) 246 247 def cached_parent(self) -> Optional[KilnBaseModel]: 248 return object.__getattribute__(self, "parent") 249 250 def load_parent(self) -> Optional[KilnBaseModel]: 251 """Get the parent model instance, loading it from disk if necessary. 252 253 Returns: 254 Optional[KilnBaseModel]: The parent model instance or None if not set 255 """ 256 cached_parent = self.cached_parent() 257 if cached_parent is not None: 258 return cached_parent 259 260 # lazy load parent from path 261 if self.path is None: 262 return None 263 # Note: this only works with base_filename. If we every support custom names, we need to change this. 264 parent_path = ( 265 self.path.parent.parent.parent 266 / self.__class__.parent_type().base_filename() 267 ) 268 if parent_path is None: 269 return None 270 loaded_parent = self.__class__.parent_type().load_from_file(parent_path) 271 self.parent = loaded_parent 272 return loaded_parent 273 274 # Dynamically implemented by KilnParentModel method injection 275 @classmethod 276 def relationship_name(cls) -> str: 277 raise NotImplementedError("Relationship name must be implemented") 278 279 # Dynamically implemented by KilnParentModel method injection 280 @classmethod 281 def parent_type(cls) -> Type[KilnBaseModel]: 282 raise NotImplementedError("Parent type must be implemented") 283 284 @model_validator(mode="after") 285 def check_parent_type(self) -> Self: 286 cached_parent = self.cached_parent() 287 if cached_parent is not None: 288 expected_parent_type = self.__class__.parent_type() 289 if not isinstance(cached_parent, expected_parent_type): 290 raise ValueError( 291 f"Parent must be of type {expected_parent_type}, but was {type(cached_parent)}" 292 ) 293 return self 294 295 def build_child_dirname(self) -> Path: 296 # Default implementation for readable folder names. 297 # {id} - {name}/{type}.kiln 298 if self.id is None: 299 # consider generating an ID here. But if it's been cleared, we've already used this without one so raise for now. 300 raise ValueError("ID is not set - can not save or build path") 301 path = self.id 302 name = getattr(self, "name", None) 303 if name is not None: 304 path = f"{path} - {name[:32]}" 305 return Path(path) 306 307 def build_path(self) -> Path | None: 308 # if specifically loaded from an existing path, keep that no matter what 309 # this ensures the file structure is easy to use with git/version control 310 # and that changes to things like name (which impacts default path) don't leave dangling files 311 if self.path is not None: 312 return self.path 313 # Build a path under parent_folder/relationship/file.kiln 314 if self.parent is None: 315 return None 316 parent_path = self.parent.build_path() 317 if parent_path is None: 318 return None 319 parent_folder = parent_path.parent 320 if parent_folder is None: 321 return None 322 return ( 323 parent_folder 324 / self.__class__.relationship_name() 325 / self.build_child_dirname() 326 / self.__class__.base_filename() 327 ) 328 329 @classmethod 330 def iterate_children_paths_of_parent_path(cls: Type[PT], parent_path: Path | None): 331 if parent_path is None: 332 # children are disk based. If not saved, they don't exist 333 return [] 334 335 # Determine the parent folder 336 if parent_path.is_file(): 337 parent_folder = parent_path.parent 338 else: 339 parent_folder = parent_path 340 341 parent = cls.parent_type().load_from_file(parent_path) 342 if parent is None: 343 raise ValueError("Parent must be set to load children") 344 345 # Ignore type error: this is abstract base class, but children must implement relationship_name 346 relationship_folder = parent_folder / Path(cls.relationship_name()) # type: ignore 347 348 if not relationship_folder.exists() or not relationship_folder.is_dir(): 349 return [] 350 351 # Collect all /relationship/{id}/{base_filename.kiln} files in the relationship folder 352 for child_file in relationship_folder.glob(f"**/{cls.base_filename()}"): 353 yield child_file 354 355 @classmethod 356 def all_children_of_parent_path( 357 cls: Type[PT], parent_path: Path | None 358 ) -> list[PT]: 359 children = [] 360 for child_path in cls.iterate_children_paths_of_parent_path(parent_path): 361 children.append(cls.load_from_file(child_path)) 362 return children 363 364 @classmethod 365 def from_id_and_parent_path( 366 cls: Type[PT], id: str, parent_path: Path | None 367 ) -> PT | None: 368 """ 369 Fast search by ID using the cache. Avoids the model_copy overhead on all but the exact match. 370 371 Uses cache so still slow on first load. 372 """ 373 if parent_path is None: 374 return None 375 376 # Note: we're using the in-file ID. We could make this faster using the path-ID if this becomes perf bottleneck, but it's better to have 1 source of truth. 377 for child_path in cls.iterate_children_paths_of_parent_path(parent_path): 378 child_id = ModelCache.shared().get_model_id(child_path, cls) 379 if child_id == id: 380 return cls.load_from_file(child_path) 381 if child_id is None: 382 child = cls.load_from_file(child_path) 383 if child.id == id: 384 return child 385 return None
Base model for Kiln models that have a parent-child relationship. This base class is for child models.
This class provides functionality for managing hierarchical relationships between models, including parent reference handling and file system organization.
Attributes: parent (KilnBaseModel): Reference to the parent model instance. Not persisted, just in memory.
250 def load_parent(self) -> Optional[KilnBaseModel]: 251 """Get the parent model instance, loading it from disk if necessary. 252 253 Returns: 254 Optional[KilnBaseModel]: The parent model instance or None if not set 255 """ 256 cached_parent = self.cached_parent() 257 if cached_parent is not None: 258 return cached_parent 259 260 # lazy load parent from path 261 if self.path is None: 262 return None 263 # Note: this only works with base_filename. If we every support custom names, we need to change this. 264 parent_path = ( 265 self.path.parent.parent.parent 266 / self.__class__.parent_type().base_filename() 267 ) 268 if parent_path is None: 269 return None 270 loaded_parent = self.__class__.parent_type().load_from_file(parent_path) 271 self.parent = loaded_parent 272 return loaded_parent
Get the parent model instance, loading it from disk if necessary.
Returns: Optional[KilnBaseModel]: The parent model instance or None if not set
284 @model_validator(mode="after") 285 def check_parent_type(self) -> Self: 286 cached_parent = self.cached_parent() 287 if cached_parent is not None: 288 expected_parent_type = self.__class__.parent_type() 289 if not isinstance(cached_parent, expected_parent_type): 290 raise ValueError( 291 f"Parent must be of type {expected_parent_type}, but was {type(cached_parent)}" 292 ) 293 return self
295 def build_child_dirname(self) -> Path: 296 # Default implementation for readable folder names. 297 # {id} - {name}/{type}.kiln 298 if self.id is None: 299 # consider generating an ID here. But if it's been cleared, we've already used this without one so raise for now. 300 raise ValueError("ID is not set - can not save or build path") 301 path = self.id 302 name = getattr(self, "name", None) 303 if name is not None: 304 path = f"{path} - {name[:32]}" 305 return Path(path)
307 def build_path(self) -> Path | None: 308 # if specifically loaded from an existing path, keep that no matter what 309 # this ensures the file structure is easy to use with git/version control 310 # and that changes to things like name (which impacts default path) don't leave dangling files 311 if self.path is not None: 312 return self.path 313 # Build a path under parent_folder/relationship/file.kiln 314 if self.parent is None: 315 return None 316 parent_path = self.parent.build_path() 317 if parent_path is None: 318 return None 319 parent_folder = parent_path.parent 320 if parent_folder is None: 321 return None 322 return ( 323 parent_folder 324 / self.__class__.relationship_name() 325 / self.build_child_dirname() 326 / self.__class__.base_filename() 327 )
329 @classmethod 330 def iterate_children_paths_of_parent_path(cls: Type[PT], parent_path: Path | None): 331 if parent_path is None: 332 # children are disk based. If not saved, they don't exist 333 return [] 334 335 # Determine the parent folder 336 if parent_path.is_file(): 337 parent_folder = parent_path.parent 338 else: 339 parent_folder = parent_path 340 341 parent = cls.parent_type().load_from_file(parent_path) 342 if parent is None: 343 raise ValueError("Parent must be set to load children") 344 345 # Ignore type error: this is abstract base class, but children must implement relationship_name 346 relationship_folder = parent_folder / Path(cls.relationship_name()) # type: ignore 347 348 if not relationship_folder.exists() or not relationship_folder.is_dir(): 349 return [] 350 351 # Collect all /relationship/{id}/{base_filename.kiln} files in the relationship folder 352 for child_file in relationship_folder.glob(f"**/{cls.base_filename()}"): 353 yield child_file
364 @classmethod 365 def from_id_and_parent_path( 366 cls: Type[PT], id: str, parent_path: Path | None 367 ) -> PT | None: 368 """ 369 Fast search by ID using the cache. Avoids the model_copy overhead on all but the exact match. 370 371 Uses cache so still slow on first load. 372 """ 373 if parent_path is None: 374 return None 375 376 # Note: we're using the in-file ID. We could make this faster using the path-ID if this becomes perf bottleneck, but it's better to have 1 source of truth. 377 for child_path in cls.iterate_children_paths_of_parent_path(parent_path): 378 child_id = ModelCache.shared().get_model_id(child_path, cls) 379 if child_id == id: 380 return cls.load_from_file(child_path) 381 if child_id is None: 382 child = cls.load_from_file(child_path) 383 if child.id == id: 384 return child 385 return None
Fast search by ID using the cache. Avoids the model_copy overhead on all but the exact match.
Uses cache so still slow on first load.
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
122 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 123 """We need to both initialize private attributes and call the user-defined model_post_init 124 method. 125 """ 126 init_private_attributes(self, context) 127 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.
390class KilnParentModel(KilnBaseModel, metaclass=ABCMeta): 391 """Base model for Kiln models that can have child models. 392 393 This class provides functionality for managing collections of child models and their persistence. 394 Child relationships must be defined using the parent_of parameter in the class definition. 395 396 Args: 397 parent_of (Dict[str, Type[KilnParentedModel]]): Mapping of relationship names to child model types 398 """ 399 400 @classmethod 401 def _create_child_method( 402 cls, relationship_name: str, child_class: Type[KilnParentedModel] 403 ): 404 def child_method(self) -> list[child_class]: 405 return child_class.all_children_of_parent_path(self.path) 406 407 child_method.__name__ = relationship_name 408 child_method.__annotations__ = {"return": List[child_class]} 409 setattr(cls, relationship_name, child_method) 410 411 @classmethod 412 def _create_parent_methods( 413 cls, targetCls: Type[KilnParentedModel], relationship_name: str 414 ): 415 def parent_class_method() -> Type[KilnParentModel]: 416 return cls 417 418 parent_class_method.__name__ = "parent_type" 419 parent_class_method.__annotations__ = {"return": Type[KilnParentModel]} 420 setattr(targetCls, "parent_type", parent_class_method) 421 422 def relationship_name_method() -> str: 423 return relationship_name 424 425 relationship_name_method.__name__ = "relationship_name" 426 relationship_name_method.__annotations__ = {"return": str} 427 setattr(targetCls, "relationship_name", relationship_name_method) 428 429 @classmethod 430 def __init_subclass__(cls, parent_of: Dict[str, Type[KilnParentedModel]], **kwargs): 431 super().__init_subclass__(**kwargs) 432 cls._parent_of = parent_of 433 for relationship_name, child_class in parent_of.items(): 434 cls._create_child_method(relationship_name, child_class) 435 cls._create_parent_methods(child_class, relationship_name) 436 437 @classmethod 438 def validate_and_save_with_subrelations( 439 cls, 440 data: Dict[str, Any], 441 path: Path | None = None, 442 parent: KilnBaseModel | None = None, 443 ): 444 """Validate and save a model instance along with all its nested child relationships. 445 446 Args: 447 data (Dict[str, Any]): Model data including child relationships 448 path (Path, optional): Path where the model should be saved 449 parent (KilnBaseModel, optional): Parent model instance for parented models 450 451 Returns: 452 KilnParentModel: The validated and saved model instance 453 454 Raises: 455 ValidationError: If validation fails for the model or any of its children 456 """ 457 # Validate first, then save. Don't want error half way through, and partly persisted 458 # TODO P2: save to tmp dir, then move atomically. But need to merge directories so later. 459 cls._validate_nested(data, save=False, path=path, parent=parent) 460 instance = cls._validate_nested(data, save=True, path=path, parent=parent) 461 return instance 462 463 @classmethod 464 def _validate_nested( 465 cls, 466 data: Dict[str, Any], 467 save: bool = False, 468 parent: KilnBaseModel | None = None, 469 path: Path | None = None, 470 ): 471 # Collect all validation errors so we can report them all at once 472 validation_errors = [] 473 474 try: 475 instance = cls.model_validate(data, strict=True) 476 if path is not None: 477 instance.path = path 478 if parent is not None and isinstance(instance, KilnParentedModel): 479 instance.parent = parent 480 if save: 481 instance.save_to_file() 482 except ValidationError as e: 483 instance = None 484 for suberror in e.errors(): 485 validation_errors.append(suberror) 486 487 for key, value_list in data.items(): 488 if key in cls._parent_of: 489 parent_type = cls._parent_of[key] 490 if not isinstance(value_list, list): 491 raise ValueError( 492 f"Expected a list for {key}, but got {type(value_list)}" 493 ) 494 for value_index, value in enumerate(value_list): 495 try: 496 if issubclass(parent_type, KilnParentModel): 497 kwargs = {"data": value, "save": save} 498 if instance is not None: 499 kwargs["parent"] = instance 500 parent_type._validate_nested(**kwargs) 501 elif issubclass(parent_type, KilnParentedModel): 502 # Root node 503 subinstance = parent_type.model_validate(value, strict=True) 504 if instance is not None: 505 subinstance.parent = instance 506 if save: 507 subinstance.save_to_file() 508 else: 509 raise ValueError( 510 f"Invalid type {parent_type}. Should be KilnBaseModel based." 511 ) 512 except ValidationError as e: 513 for suberror in e.errors(): 514 cls._append_loc(suberror, key, value_index) 515 validation_errors.append(suberror) 516 517 if len(validation_errors) > 0: 518 raise ValidationError.from_exception_data( 519 title=f"Validation failed for {cls.__name__}", 520 line_errors=validation_errors, 521 input_type="json", 522 ) 523 524 return instance 525 526 @classmethod 527 def _append_loc( 528 cls, error: ErrorDetails, current_loc: str, value_index: int | None = None 529 ): 530 orig_loc = error["loc"] if "loc" in error else None 531 new_loc: list[str | int] = [current_loc] 532 if value_index is not None: 533 new_loc.append(value_index) 534 if isinstance(orig_loc, tuple): 535 new_loc.extend(list(orig_loc)) 536 elif isinstance(orig_loc, list): 537 new_loc.extend(orig_loc) 538 error["loc"] = tuple(new_loc)
Base model for Kiln models that can have child models.
This class provides functionality for managing collections of child models and their persistence. Child relationships must be defined using the parent_of parameter in the class definition.
Args: parent_of (Dict[str, Type[KilnParentedModel]]): Mapping of relationship names to child model types
437 @classmethod 438 def validate_and_save_with_subrelations( 439 cls, 440 data: Dict[str, Any], 441 path: Path | None = None, 442 parent: KilnBaseModel | None = None, 443 ): 444 """Validate and save a model instance along with all its nested child relationships. 445 446 Args: 447 data (Dict[str, Any]): Model data including child relationships 448 path (Path, optional): Path where the model should be saved 449 parent (KilnBaseModel, optional): Parent model instance for parented models 450 451 Returns: 452 KilnParentModel: The validated and saved model instance 453 454 Raises: 455 ValidationError: If validation fails for the model or any of its children 456 """ 457 # Validate first, then save. Don't want error half way through, and partly persisted 458 # TODO P2: save to tmp dir, then move atomically. But need to merge directories so later. 459 cls._validate_nested(data, save=False, path=path, parent=parent) 460 instance = cls._validate_nested(data, save=True, path=path, parent=parent) 461 return instance
Validate and save a model instance along with all its nested child relationships.
Args: data (Dict[str, Any]): Model data including child relationships path (Path, optional): Path where the model should be saved parent (KilnBaseModel, optional): Parent model instance for parented models
Returns: KilnParentModel: The validated and saved model instance
Raises: ValidationError: If validation fails for the model or any of its children
Configuration for the model, should be a dictionary conforming to [ConfigDict
][pydantic.config.ConfigDict].
122 def wrapped_model_post_init(self: BaseModel, context: Any, /) -> None: 123 """We need to both initialize private attributes and call the user-defined model_post_init 124 method. 125 """ 126 init_private_attributes(self, context) 127 original_model_post_init(self, context)
We need to both initialize private attributes and call the user-defined model_post_init method.