Skip to main content
The auriko Python package provides an OpenAI-compatible client for the Auriko API.

Full SDK Reference

Complete API reference with all types, parameters, and examples

Installation

pip install auriko
Requires Python 3.10 or later.

Get started

from auriko import Client

client = Client()  # reads AURIKO_API_KEY from environment

response = client.chat.completions.create(
    model="gpt-5.4",
    messages=[{"role": "user", "content": "Hello!"}]
)

print(response.choices[0].message.content)

Configure

API Key

import os

# Option 1: Auto-detect from AURIKO_API_KEY env var (recommended)
client = Client()

# Option 2: Pass explicitly
client = Client(api_key=os.environ["AURIKO_API_KEY"])

Base URL

# Default: https://api.auriko.ai/v1
# Override for self-hosted or proxy setups:
client = Client(base_url="https://your-proxy.example.com/v1")

Timeout

client = Client(timeout=60.0)  # seconds

Retries

client = Client(max_retries=3)  # default is 2

Create chat completions

Basic request

Send a chat completion request:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What is 2+2?"}
    ]
)

print(response.choices[0].message.content)

With routing options

response = client.chat.completions.create(
    model="gpt-5.4",
    messages=[{"role": "user", "content": "Hello!"}],
    routing={
        "optimize": "cost",
        "max_ttft_ms": 200,
    }
)

# Access routing metadata
print(f"Provider: {response.routing_metadata.provider}")
if response.routing_metadata.cost:
    print(f"Cost: ${response.routing_metadata.cost.billable_cost_usd:.6f}")
You can also pass a RoutingOptions object for IDE autocomplete and validation:
from auriko.route_types import RoutingOptions, Optimize

response = client.chat.completions.create(
    model="gpt-5.4",
    messages=[{"role": "user", "content": "Hello!"}],
    routing=RoutingOptions(optimize=Optimize.COST, max_ttft_ms=200)
)
All routing fields:
FieldTypeDescription
optimizeOptimizeStrategy: "cost", "speed", "ttft", "throughput", "balanced", "cheapest"
weightsdict[str, float]Custom scoring weights: cost, ttft, throughput, reliability. Overrides preset.
max_cost_per_1mfloatMax cost per 1M tokens
max_ttft_msintMax time to first token (ms)
min_throughput_tpsfloatMin tokens per second
min_success_ratefloatMin provider success rate (0.0–1.0)
providerslist[str]Allowlist of providers
exclude_providerslist[str]Blocklist of providers
preferstrPreferred provider (soft preference)
modeMode"pool" (default) or "fallback"
allow_fallbacksboolEnable fallback on failure
max_fallback_attemptsintMax fallback retries
data_policyDataPolicy"none", "no_training", "zdr"
only_byokboolOnly use BYOK providers
only_platformboolOnly use platform providers
See Advanced Routing for detailed strategy guides.

Multi-model routing

Route a request across multiple models. The router picks the best option based on your routing strategy:
response = client.chat.completions.create(
    models=["gpt-4o", "claude-sonnet-4-20250514", "gemini-2.5-flash"],
    messages=[{"role": "user", "content": "Explain quantum computing briefly."}],
    routing={"optimize": "cost"}
)

print(f"Model used: {response.model}")
print(f"Provider: {response.routing_metadata.provider}")
print(response.choices[0].message.content)
model and models are mutually exclusive. Specify exactly one. Passing both raises InvalidRequestError.

Extended thinking

Enable extended reasoning for complex tasks using the extensions parameter:
response = client.chat.completions.create(
    model="claude-sonnet-4-20250514",
    messages=[{"role": "user", "content": "Solve step by step: what is 23! / 20!?"}],
    extensions={"thinking": {"enabled": True, "budget_tokens": 10000}}
)

# Access the reasoning output (if the model returns it)
if response.choices[0].message.reasoning_content:
    print(f"Reasoning: {response.choices[0].message.reasoning_content}")
print(f"Answer: {response.choices[0].message.content}")
You can also pass provider-specific parameters through extensions:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Hello!"}],
    extensions={"openai": {"logit_bias": {"1234": -100}}}
)
See Extensions and Thinking for provider details and streaming thinking output.

Request metadata

Attach metadata to requests for tracking and analytics:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Hello!"}],
    auriko_metadata={"session_id": "abc-123", "user_tier": "premium"}
)
The Auriko dashboard logs and displays your metadata.

Stream responses

stream = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Count to 10"}],
    stream=True
)

for chunk in stream:
    if chunk.choices and chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)
After consuming all chunks, access stream-level metadata:
print(f"\nProvider: {stream.routing_metadata.provider}")
print(f"Tokens: {stream.usage.total_tokens}")
print(f"Request ID: {stream.response_headers.request_id}")
Use a context manager for automatic cleanup:
with client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Count to 10"}],
    stream=True
) as stream:
    for chunk in stream:
        if chunk.choices and chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="", flush=True)
# stream is automatically closed
Or close manually with stream.close().
Routing metadata, usage, and response headers are available only after consuming all chunks.
See Streaming Guide for full patterns including tool call streaming.

Tool calling

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get weather for a city",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {"type": "string"}
                },
                "required": ["city"]
            }
        }
    }
]

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "What's the weather in Paris?"}],
    tools=tools
)

if response.choices[0].message.tool_calls:
    tool_call = response.choices[0].message.tool_calls[0]
    print(f"Function: {tool_call.function.name}")
    print(f"Arguments: {tool_call.function.arguments}")
See Tool Calling Guide for multi-turn tool conversations.

Read response headers

Every response and error includes a response_headers object with typed accessors:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Hello!"}]
)

response.response_headers.request_id                  # str | None
response.response_headers.rate_limit_remaining         # int | None
response.response_headers.rate_limit_limit             # int | None
response.response_headers.rate_limit_reset             # str | None
response.response_headers.credits_balance_microdollars # int | None
response.response_headers.provider_used                # str | None
response.response_headers.routing_strategy             # str | None
response.response_headers.get("x-custom-header")       # generic lookup
PropertyHeaderType
request_idx-request-idstr | None
rate_limit_remainingx-ratelimit-remaining-requestsint | None
rate_limit_limitx-ratelimit-limit-requestsint | None
rate_limit_resetx-ratelimit-reset-requestsstr | None
credits_balance_microdollarsx-credits-balance-microdollarsint | None
provider_usedx-provider-usedstr | None
routing_strategyx-routing-strategystr | None
Error objects also carry response_headers. Use e.response_headers.request_id when filing support tickets to correlate with server logs. See the Python SDK Reference for the complete ResponseHeaders API.

Read token usage

The Usage object on every response carries optional detail breakdowns:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Hello!"}]
)

usage = response.usage

# Prompt token breakdown
if usage.prompt_tokens_details:
    print(f"Cached: {usage.prompt_tokens_details.cached_tokens}")
    print(f"Text: {usage.prompt_tokens_details.text_tokens}")
    print(f"Image: {usage.prompt_tokens_details.image_tokens}")
    print(f"Audio: {usage.prompt_tokens_details.audio_tokens}")

# Completion token breakdown
if usage.completion_tokens_details:
    print(f"Reasoning: {usage.completion_tokens_details.reasoning_tokens}")
    print(f"Text: {usage.completion_tokens_details.text_tokens}")
FieldSub-fieldsType
prompt_tokens_detailscached_tokens, text_tokens, image_tokens, audio_tokensOptional[int] each
completion_tokens_detailsreasoning_tokens, text_tokens, image_tokens, audio_tokensOptional[int] each
Availability depends on the provider. completion_tokens_details.reasoning_tokens is present for OpenAI o-series, DeepSeek, xAI, and Google Gemini. It’s None for providers that don’t report reasoning token counts (Anthropic, Moonshot, Fireworks). See Check reasoning token availability for the full breakdown.

Handle errors

Catch typed exceptions:
from auriko import (
    Client,
    AurikoAPIError,
    AuthenticationError,
    RateLimitError,
    BudgetExceededError,
    ModelNotFoundError,
    ProviderError,
    # Also available: InvalidRequestError, InsufficientCreditsError,
    # InternalError, ProviderAuthError, ServiceUnavailableError
)

client = Client()

try:
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": "Hello!"}]
    )
except AuthenticationError as e:
    print(f"Check your API key: {e}")
except RateLimitError as e:
    print(f"Rate limited: {e}")
except BudgetExceededError as e:
    print(f"Budget exceeded: {e}")
except ModelNotFoundError as e:
    print(f"Model not found: {e}")
except ProviderError as e:
    print(f"Provider error: {e}")
except AurikoAPIError as e:
    print(f"API error ({e.status_code}): {e}")
See Error Handling Guide for retry patterns and map_openai_error().

Use management APIs

Query workspace, budget, and model information:
# Identity (discover your workspace)
identity = client.me.get()
print(f"Workspace: {identity.workspace_id}")

# Workspaces
workspaces = client.workspaces.list()
workspace = client.workspaces.get("ws-123")

# Budgets
budgets = client.budgets.list("ws-123")
budget = client.budgets.get("ws-123", "budget-456")

# Models
registry = client.models.list_registry()
directory = client.models.list_directory()
providers = client.models.list_providers()

Model listing choices

MethodReturnsUse when
list_registry()Flat list: id, family, display_nameYou need a quick model ID lookup
list_directory()Rich detail: provider entries, context windows, capabilities, pricing tiersYou need to compare providers or check capabilities
list_providers()Provider catalog: display name, description, data policyYou need to see available providers
See the Python SDK Reference for the complete API.

Use async client

Use the async client for non-blocking requests:
from auriko import AsyncClient

async def main():
    client = AsyncClient()

    response = await client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": "Hello!"}]
    )

    print(response.choices[0].message.content)

import asyncio
asyncio.run(main())

Async streaming

Stream responses asynchronously:
from auriko import AsyncClient

async def stream_response():
    client = AsyncClient()

    stream = await client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": "Count to 10"}],
        stream=True
    )

    async for chunk in stream:
        if chunk.choices and chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="", flush=True)

Async context manager

Use async with for automatic connection cleanup:
from auriko import AsyncClient

async def main():
    async with AsyncClient() as client:
        response = await client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": "Hello!"}]
        )
        print(response.choices[0].message.content)
    # client.close() called automatically
Or close explicitly: await client.close()

Use context managers

Use a context manager for automatic cleanup:
with Client() as client:
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": "Hello!"}]
    )
    print(response.choices[0].message.content)

SDK scope

The Auriko SDK covers: inference (chat completions with routing), read-only management (workspaces, budgets, identity), and model discovery. For full platform operations (workspace creation, budget management, API key rotation), use the REST API directly.

Use type hints

The SDK provides typed responses, errors, and routing configuration. Use your IDE’s autocomplete for the best experience:
from auriko import Client
from auriko.models.chat import ChatCompletion, ChatCompletionChunk

client = Client()

response: ChatCompletion = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Hello!"}]
)