Examples

This page contains precision-engineered examples that demonstrate the Ollama Toolkit client’s capabilities with Eidosian elegance.

Basic Usage {#basic-usage}

  • Demonstrates version checks, listing models, and generating completions.

  • Run:

    python -m ollama_toolkit.examples.basic_usage
    

Here’s a simple example of getting the version and listing available models:

from ollama_toolkit import OllamaClient

# Initialize the client - foundation of all operations
client = OllamaClient()

# Get the version - basic system awareness
version = client.get_version()
print(f"Connected to Ollama version: {version['version']}")

# List available models - structural knowledge acquisition
models = client.list_models()
print("\nAvailable models:")
for model in models.get("models", []):
    print(f"- {model.get('name')}")  # Precise formatting

Run with mathematical simplicity:

python -m ollama_toolkit.examples.basic_usage

Text Generation {#text-generation}

  • Non-streaming and streaming examples:

    response = client.generate(model="deepseek-r1:1.5b", prompt="Explain quantum mechanics.")
    # ...
    

Non-streaming Generation

from ollama_toolkit import OllamaClient

client = OllamaClient()
# Single operation, complete result - maximum efficiency
response = client.generate(
    model="deepseek-r1:1.5b",  # Precision model selection
    prompt="Explain quantum computing in simple terms",
    options={
        "temperature": 0.7,  # Calibrated randomness
        "top_p": 0.9,        # Optimized diversity
        "max_tokens": 500    # Bounded output
    },
    stream=False
)

print(response["response"])

Streaming Generation

from ollama_toolkit import OllamaClient

client = OllamaClient()
# Flowing river of tokens - immediate feedback loop
for chunk in client.generate(
    model="llama2",
    prompt="Write a short story about AI",
    options={"temperature": 0.9},
    stream=True
):
    if "response" in chunk:
        print(chunk["response"], end="", flush=True)  # Seamless display

Async Examples

Async Generation

import asyncio
from ollama_toolkit import OllamaClient

async def main():
    client = OllamaClient()
    
    # Async generation
    response = await client.agenerate(
        model="llama2",
        prompt="Explain how neural networks work"
    )
    print(response["response"])

asyncio.run(main())

Async Streaming

import asyncio
from ollama_toolkit import OllamaClient

async def main():
    client = OllamaClient()
    
    # Async streaming
    async for chunk in client.agenerate(
        model="llama2",
        prompt="Write a haiku about programming",
        stream=True
    ):
        if "response" in chunk:
            print(chunk["response"], end="", flush=True)

asyncio.run(main())

Generate a Completion

Here’s a complete example of generating text using the synchronous API:

from ollama_toolkit import OllamaClient

client = OllamaClient(timeout=300)  # Increased timeout for larger responses

# Non-streaming example (get complete response at once)
response = client.generate(
    model="llama2",
    prompt="Write a short poem about artificial intelligence.",
    options={
        "temperature": 0.7,
        "top_p": 0.9,
        "max_tokens": 200
    },
    stream=False
)

print(f"Complete response: {response['response']}")

# Streaming example (get tokens as they're generated)
print("\nStreaming response:")
for chunk in client.generate(
    model="llama2",
    prompt="Explain the concept of machine learning to a 10-year old.",
    stream=True
):
    if "response" in chunk:
        print(chunk["response"], end="", flush=True)
    if chunk.get("done", False):
        print("\n\nGeneration complete!")

Chat Completion {#chat-completion}

  • Use message roles (system, user, assistant).

    python -m ollama_toolkit.examples.chat_example
    

The chat interface is robust and fully implemented:

from ollama_toolkit import OllamaClient

client = OllamaClient(timeout=300)

# Prepare chat messages
messages = [
    {"role": "system", "content": "You are a helpful assistant who speaks like a pirate."},
    {"role": "user", "content": "Tell me about the solar system."}
]

# Non-streaming example
response = client.chat(
    model="llama2",
    messages=messages,
    stream=False,
    options={"temperature": 0.8}
)

print(f"Assistant: {response['message']['content']}")

# Streaming example
messages.append({"role": "user", "content": "What's the largest planet?"})

print("\nStreaming response:")
print("Assistant: ", end="", flush=True)

for chunk in client.chat(
    model="llama2",
    messages=messages,
    stream=True
):
    if "message" in chunk and "content" in chunk["message"]:
        content = chunk["message"]["content"]
        print(content, end="", flush=True)
    
    if chunk.get("done", False):
        print("\n\nChat complete!")

Embeddings {#embeddings}

  • Demonstrates embedding creation and cosine similarity:

    python -m ollama_toolkit.examples.embedding_example --text "Hello world"
    

Generate embeddings for semantic search and similarity:

from ollama_toolkit import OllamaClient
import numpy as np

client = OllamaClient()

# Create embedding with semantic precision
embedding1 = client.create_embedding(
    model="nomic-embed-text",  # Purpose-built model selection
    prompt="Artificial intelligence is transforming industries worldwide."
)

embedding2 = client.create_embedding(
    model="nomic-embed-text",
    prompt="AI technologies are changing how businesses operate globally."
)

# Calculate cosine similarity with mathematical elegance
def cosine_similarity(a, b):
    # Vector mathematics distilled to its essence
    dot_product = np.dot(a, b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    return dot_product / (norm_a * norm_b)

# Extract vectors with contextual awareness
vec1 = embedding1["embedding"]
vec2 = embedding2["embedding"]

# Calculate similarity—higher value means more similar concepts
similarity = cosine_similarity(vec1, vec2)
print(f"Similarity score: {similarity:.4f}")  # Precise formatting

Run with one simple command:

python -m ollama_toolkit.examples.embedding_example --text "Hello world"

Working with Models

  • Pull, list, or delete models:

    client.pull_model("deepseek-r1:1.5b", stream=True)
    # ...
    

Manage models with the toolkit:

from ollama_toolkit import OllamaClient

client = OllamaClient()

# List all available models
models = client.list_models()
print("Available models:")
for model in models.get("models", []):
    name = model.get("name", "Unknown")
    size_bytes = model.get("size", 0)
    size_gb = size_bytes / (1024**3) if size_bytes else "Unknown"
    print(f"- {name} ({size_gb:.2f} GB)" if isinstance(size_gb, float) else f"- {name} (size: {size_gb})")

# Pull a new model with progress updates
print("\nPulling tinyllama model...")
for update in client.pull_model("tinyllama", stream=True):
    status = update.get("status", "")
    if status == "downloading":
        progress = update.get("completed", 0) / update.get("total", 1) * 100
        print(f"\rDownloading: {progress:.1f}%", end="", flush=True)
    elif status == "success":
        print("\nDownload complete!")
        
# Delete a model (if needed)
# Uncomment to test deletion:
# result = client.delete_model("tinyllama")
# print(f"Model deleted: {result}")

Error Handling

Includes fallback mechanisms and thorough exceptions—structural resilience in action:

from ollama_toolkit import OllamaClient
from ollama_toolkit.exceptions import (
    ModelNotFoundError,  # Specific error classification
    ConnectionError, 
    TimeoutError,
    OllamaAPIError
)

client = OllamaClient()

def safe_generate():
    """Recursively refined error handling approach"""
    try:
        # Primary attempt - optimistic path
        return client.generate(
            model="nonexistent-model-123",
            prompt="This won't work",
            stream=False
        )
    except ModelNotFoundError as e:
        print(f"Model not found: {e}")  # Precise error communication
        # Fallback with graceful recovery - structural control
        return client.generate(
            model="llama2",
            prompt="This is a fallback prompt",
            stream=False
        )
    except ConnectionError as e:
        # System awareness
        print(f"Connection error: {e}")
        print("Please ensure Ollama server is running")
        return None
    except TimeoutError as e:
        # Velocity awareness
        print(f"Request timed out: {e}")
        return None
    except OllamaAPIError as e:
        # Generic error handling as final safety net
        print(f"API error: {e}")
        return None

response = safe_generate()
if response:
    print(f"Response: {response.get('response', '')}")

Automatic Ollama Installation

from ollama_toolkit.utils.common import ensure_ollama_running, check_ollama_installed

# Check if Ollama is installed
is_installed, install_message = check_ollama_installed()
if is_installed:
    print(f"Ollama is installed: {install_message}")
else:
    print(f"Ollama is not installed: {install_message}")

# Ensure Ollama is running
is_running, message = ensure_ollama_running()
if is_running:
    print(f"Ollama is running: {message}")
else:
    print(f"Ollama setup failed: {message}")

For more examples that embody Eidosian principles, explore the example scripts in the /examples directory.