Source code for belso.translator.utils.detecting

from typing import Any
import xml.etree.ElementTree as ET

from pydantic import BaseModel
from google.ai.generativelanguage_v1beta.types import content

from belso.schemas import Schema
from belso.utils.logging import get_logger

# Get a module-specific logger
logger = get_logger(__name__)

[docs] def detect_schema_format(schema: Any) -> str: """ Detect the format of the input schema.\n --- ### Args - `schema`: the schema to detect.\n --- ### Returns - `str`: the detected format as a string. """ logger.debug("Detecting schema format...") try: # Check if it's our custom Schema format if isinstance(schema, type) and issubclass(schema, Schema): logger.debug("Detected belso schema format.") return "belso" # Check if it's a Google Gemini schema if isinstance(schema, content.Schema): logger.debug("Detected Google Gemini schema format.") return "google" # Check if it's a Pydantic model (OpenAI) if isinstance(schema, type) and issubclass(schema, BaseModel): logger.debug("Detected OpenAI (Pydantic) schema format.") return "openai" # Check if it's an XML Element if isinstance(schema, ET.Element): logger.debug("Detected XML Element schema format.") return "xml" # Check if it's a string (could be XML or a file path) if isinstance(schema, str): # Check if it looks like XML if schema.strip().startswith("<") and schema.strip().endswith(">"): logger.debug("Detected XML string schema format.") return "xml" logger.debug("String input detected, but not recognized as XML. Might be a file path.") # Check if it's a JSON Schema-based format (Anthropic, Ollama, Mistral, etc.) if isinstance(schema, dict): # Check for JSON Schema identifier if "$schema" in schema and "http://json-schema.org" in schema["$schema"]: # Differentiate between providers that use JSON Schema if "title" in schema and isinstance(schema["title"], str): logger.debug("Detected LangChain schema format.") return "langchain" else: # Both Anthropic and Mistral use JSON Schema, try to differentiate # This is a simplistic approach - in practice you might need more specific checks logger.debug("Detected JSON Schema format (Anthropic/Mistral).") return "anthropic" # Default to Anthropic if can't differentiate elif "type" in schema and schema["type"] == "object" and "properties" in schema: # Basic check for Ollama/Hugging Face schema if "title" in schema: logger.debug("Detected LangChain schema format.") return "langchain" elif "format" in schema and schema["format"] == "huggingface": logger.debug("Detected Hugging Face schema format.") return "huggingface" else: logger.debug("Detected Ollama schema format.") return "ollama" elif "name" in schema and "fields" in schema and isinstance(schema["fields"], list): # Check for our JSON format logger.debug("Detected belso JSON schema format.") return "json" logger.debug("Dictionary input detected, but not recognized as a known schema format.") logger.warning("Unable to detect schema format. Returning 'unknown'.") return "unknown" except Exception as e: logger.error(f"Error during schema format detection: {e}") logger.debug("Detection error details", exc_info=True) return "unknown"