Skip to main content

Documentation Index

Fetch the complete documentation index at: https://docs.auriko.ai/llms.txt

Use this file to discover all available pages before exploring further.

Auriko streams chat completions over Server-Sent Events (SSE). Set stream: true and iterate over chunks as they arrive.

Prerequisites

  • An Auriko API key
  • Python 3.10+ with the OpenAI SDK (pip install openai) or the auriko SDK (pip install auriko)
    • OR Node.js 18+ with the OpenAI SDK (npm install openai) or @auriko/sdk (npm install @auriko/sdk)

Stream responses

Stream a chat completion response:
import os
from openai import OpenAI

client = OpenAI(
    api_key=os.environ["AURIKO_API_KEY"],
    base_url="https://api.auriko.ai/v1"
)

stream = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Write a short story"}],
    stream=True
)

for chunk in stream:
    if chunk.choices and chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)

Stream asynchronously

Stream with the async client:
import os
from openai import AsyncOpenAI
import asyncio

async def stream_response():
    client = AsyncOpenAI(
        api_key=os.environ["AURIKO_API_KEY"],
        base_url="https://api.auriko.ai/v1"
    )

    stream = await client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": "Write a short story"}],
        stream=True
    )

    async for chunk in stream:
        if chunk.choices and chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="", flush=True)

asyncio.run(stream_response())

Stream events

Each chunk contains:
# ChatCompletionChunk
chunk.id           # "chatcmpl-abc123"
chunk.model        # "gpt-4o"
chunk.created      # 1234567890
chunk.choices[0].delta.content                  # Token content (may be None)
chunk.choices[0].delta.role                     # "assistant" (first chunk only)
chunk.choices[0].delta.reasoning_content        # Reasoning fragment (if model supports it)
chunk.choices[0].delta.reasoning_signature      # Signature for current thinking block
chunk.choices[0].delta.reasoning_redacted_data  # Encrypted redacted thinking data
chunk.choices[0].finish_reason                  # None until last chunk ("stop")
chunk.choices[0].native_finish_reason           # Provider's original value (e.g. "end_turn")

Handle final chunks

The last content chunk carries finish_reason. A trailing chunk carries usage on every stream. You don’t need to set stream_options.
stream = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Hello!"}],
    stream=True
)

full_content = ""
usage = None

for chunk in stream:
    if chunk.choices:
        if chunk.choices[0].delta.content:
            full_content += chunk.choices[0].delta.content
        if chunk.choices[0].finish_reason:
            print(f"\n\nFinished: {chunk.choices[0].finish_reason}")
    if chunk.usage:
        usage = chunk.usage

if usage:
    print(f"Tokens used: {usage.total_tokens}")
The final streaming chunk always contains token usage. Setting stream_options.include_usage explicitly is harmless but unnecessary.

Stream properties

The stream object exposes usage, routing metadata, and response headers after iteration completes.
PropertyPythonTypeScriptAvailable
Token usagestream.usagestream.usageAfter iteration
Routing infostream.routing_metadatastream.routing_metadataAfter iteration
Response headersstream.response_headersstream.responseHeadersImmediately
Close connectionstream.close()stream.close()Any time
Use the stream as a context manager to ensure the connection is released:
with client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Hello!"}],
    stream=True
) as stream:
    for chunk in stream:
        if chunk.choices and chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="", flush=True)

# Available after iteration
if stream.usage:
    print(f"Tokens: {stream.usage.total_tokens}")
if stream.routing_metadata:
    print(f"Provider: {stream.routing_metadata.provider}")
Use a context manager for automatic cleanup:
stream = await client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Hello!"}],
    stream=True
)

try:
    async for chunk in stream:
        if chunk.choices and chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="", flush=True)
finally:
    await stream.close()
routing_metadata and usage arrive on separate trailing chunks after all content chunks. Consume the stream to completion to access them.
In TypeScript, you can only iterate a stream once. A second attempt throws an error.

Stream with tools

Reassemble streamed tool call chunks into complete function calls:
stream = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "What's the weather in Paris?"}],
    tools=[{
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get weather",
            "parameters": {"type": "object", "properties": {"city": {"type": "string"}}}
        }
    }],
    stream=True
)

tool_calls = []
for chunk in stream:
    if not chunk.choices:
        continue
    delta = chunk.choices[0].delta

    if delta.tool_calls:
        for tc in delta.tool_calls:
            if tc.index >= len(tool_calls):
                tool_calls.append({"id": tc.id, "function": {"name": "", "arguments": ""}})
            if tc.function and tc.function.name:
                tool_calls[tc.index]["function"]["name"] += tc.function.name
            if tc.function and tc.function.arguments:
                tool_calls[tc.index]["function"]["arguments"] += tc.function.arguments

print(tool_calls)
See Tool Calling Guide for function definitions and multi-turn tool conversations.

Stream with routing options

Pass routing options to a streaming request:
stream = client.chat.completions.create(
    model="gpt-5.4",
    messages=[{"role": "user", "content": "Hello!"}],
    stream=True,
    extra_body={"gateway": {"routing": {
        "optimize": "ttft-focus",
        "max_ttft_ms": 1000,
    }}}
)

for chunk in stream:
    if chunk.choices and chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)

Handle stream errors

Catch errors during streaming:
import os
from openai import OpenAI, APIStatusError

client = OpenAI(
    api_key=os.environ["AURIKO_API_KEY"],
    base_url="https://api.auriko.ai/v1"
)

try:
    stream = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": "Hello!"}],
        stream=True
    )

    for chunk in stream:
        if chunk.choices and chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content, end="", flush=True)

except APIStatusError as e:
    if e.status_code == 429:
        retry_after = e.response.headers.get("retry-after")
        print(f"Rate limited: retry after {retry_after}s")
    else:
        print(f"API error ({e.status_code}): {e.message}")
See Error Handling Guide for retry strategies and circuit breakers.

SSE format

Raw SSE events look like this. The stream ends with usage and routing_metadata events before [DONE].
data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}

data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}

data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}

data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1234567890,"model":"gpt-4o","choices":[],"usage":{"prompt_tokens":8,"completion_tokens":2,"total_tokens":10}}

data: {"id":"chatcmpl-a1b2c3d4","object":"chat.completion.chunk","created":1234567890,"model":"gpt-4o","choices":[],"routing_metadata":{"provider":"openai","routing_strategy":"balanced","cost":{"usd":0.00015}}}

data: [DONE]
The trailing events before [DONE] carry usage and routing_metadata with choices: []. SDKs expose these as stream.usage and stream.routing_metadata after iteration.