Official Python SDK for Octen API - Web Search, Text Embeddings, and LLM Chat
Project description
Octen Python SDK
Official Python SDK for the Octen API — web search, text embeddings, and multi-model LLM chat in one package.
✨ Features
- 🔍 Web Search — search and retrieve ranked web results with filtering, highlighting, and full content
- 💬 Multi-model Chat — access 10+ LLMs (GPT, Claude, Gemini, Kimi, MiniMax) through a single unified API
- 🧮 Text Embeddings — convert text into high-quality vector representations
- ⚡ Streaming (SSE) — real-time token streaming with typed event objects
- 🔄 Auto Retry — exponential backoff for transient errors
- 🛡️ Type Safe — full Pydantic models with IDE auto-completion
- 🔀 Async Support — native
asyncioclient for concurrent workloads - 📦 HTTP/2 — connection pooling and keep-alive out of the box
📦 Installation
pip install octen
Requires Python 3.8 or higher.
Development Version
pip install octen[dev]
Async Support
pip install octen[async]
🚀 Quick Start
Search
from octen import Octen
with Octen(api_key="your-api-key") as client:
response = client.search.search(query="Python programming", count=5)
for result in response.results:
print(f"Title: {result['title']}")
print(f"URL: {result['url']}")
print(f"Highlight: {result.get('highlight', '')}")
Chat
from octen import Octen, ChatMessage
with Octen(api_key="your-api-key") as client:
response = client.chat.create(
model="openai/gpt-5.4",
messages=[ChatMessage(role="user", content="Hello!")],
web_search="on"
)
print(response.text)
Embeddings
from octen import Octen
with Octen(api_key="your-api-key") as client:
embedding = client.embedding.create(
input=["Hello, world!"],
model="octen-embedding-4b"
)
vector = embedding.get_first_embedding()
print(f"Vector dimension: {len(vector)}")
🔍 Search API
Advanced Search
from octen import Octen, HighlightOptions, FullContentOptions
with Octen(api_key="your-api-key") as client:
response = client.search.search(
query="machine learning best practices",
count=10,
search_type="semantic", # Semantic search
include_domains=["github.com", "arxiv.org"], # Search only these domains
start_time="2024-01-01T00:00:00Z", # Time filtering
highlight=HighlightOptions(
enable=True,
max_tokens=500
),
full_content=FullContentOptions(
enable=True,
max_tokens=2000
),
timeout=60.0 # Custom timeout
)
print(f"Found {len(response.results)} results")
print(f"Actual search type: {response.search_type}")
print(f"Token usage: {response.usage}")
💬 Chat API
Non-streaming
from octen import Octen, ChatMessage, WebSearchOptions
with Octen(api_key="your-api-key") as client:
response = client.chat.create(
model="openai/gpt-5.4",
messages=[
ChatMessage(role="system", content="You are a helpful assistant."),
ChatMessage(role="user", content="What happened in tech today?"),
],
web_search="on",
web_search_options=WebSearchOptions(safesearch="off", count=5),
max_tokens=500,
temperature=0.7
)
print(response.text)
print(f"Tokens used: {response.usage.total_tokens}")
# Access search results
if response.search_results:
for group in response.search_results:
for item in group.results:
print(f" - {item.title}: {item.url}")
Streaming
from octen import Octen, ChatMessage
with Octen(api_key="your-api-key") as client:
for event in client.chat.create(
model="openai/gpt-5.4",
messages=[ChatMessage(role="user", content="Tell me a story")],
stream=True,
web_search="on"
):
if event.type == "search_done":
print(f"[{len(event.search_results or [])} search groups]")
elif event.type == "content" and event.choices:
print(event.choices[0].delta.content or "", end="", flush=True)
elif event.type == "finish":
print() # newline
elif event.type == "usage" and event.usage:
print(f"[total tokens: {event.usage.total_tokens}]")
Tool Calling
from octen import Octen
from octen.models import ChatMessage, Tool, ToolFunction
weather_tool = Tool(
function=ToolFunction(
name="get_weather",
description="Get current weather for a city",
parameters={
"type": "object",
"properties": {
"city": {"type": "string", "description": "City name"},
},
"required": ["city"],
}
)
)
with Octen(api_key="your-api-key") as client:
response = client.chat.create(
model="openai/gpt-5.4",
messages=[ChatMessage(role="user", content="What's the weather in London?")],
tools=[weather_tool],
tool_choice="auto"
)
if response.choices[0].finish_reason == "tool_calls":
tc = response.choices[0].message.tool_calls[0]
print(f"Tool: {tc.function.name}, Args: {tc.function.arguments}")
JSON Output Mode
from octen import Octen, ChatMessage
from octen.models import ResponseFormat
with Octen(api_key="your-api-key") as client:
response = client.chat.create(
model="google/gemini-3-flash-preview",
messages=[ChatMessage(role="user", content="Return a JSON list of 3 programming languages")],
response_format=ResponseFormat(type="json_object"),
web_search="off"
)
print(response.text)
Web Search with Full Page Content
from octen import Octen, ChatMessage, WebSearchOptions
from octen.models.chat import ChatFullContentOptions
with Octen(api_key="your-api-key") as client:
response = client.chat.create(
model="openai/gpt-5.4",
messages=[ChatMessage(role="user", content="Latest Python 3.13 features?")],
web_search="on",
web_search_options=WebSearchOptions(
safesearch="off",
full_content=ChatFullContentOptions(enable=True, max_tokens=1000)
)
)
print(f"Full content tokens: {response.usage.full_content_tokens}")
🤖 Supported Chat Models
For the full and up-to-date list of supported models, visit the Octen official website.
🧮 Embeddings API
Batch Embeddings
from octen import Octen
with Octen(api_key="your-api-key") as client:
# Process multiple texts
texts = [
"Artificial intelligence is transforming the world",
"Applications of deep learning",
"Natural language processing technology"
]
response = client.embedding.create(
input=texts,
model="octen-embedding-8b",
input_type="document"
)
vectors = response.get_embeddings()
print(f"Generated {len(vectors)} vectors")
# Or use convenience methods
query_vector = client.embedding.embed_query("search query")
doc_vectors = client.embedding.embed_documents(["document 1", "document 2"])
Custom Configuration
from octen import Octen
client = Octen(
api_key="your-api-key",
base_url="https://api.octen.ai", # Custom API endpoint
timeout=10.0, # Global default timeout (seconds)
max_retries=3, # Maximum retry attempts
http2=True # Enable HTTP/2
)
try:
# This request uses global timeout (10 seconds)
response1 = client.search.search("query 1")
# This request overrides timeout to 30 seconds
response2 = client.search.search("complex query", timeout=30.0)
finally:
client.close() # Release connection pool resources
📚 API Documentation
Search API
client.search.search()
Perform a web search query.
Parameters:
query(str, required): Search query string, max 500 characterscount(int, optional): Number of results to return, range 1-100, default 5search_type(str, optional): Search type, options:"auto"- Automatically select (default)"keyword"- Keyword search"semantic"- Semantic search
include_domains(List[str], optional): Include only results from these domainsexclude_domains(List[str], optional): Exclude results from these domainsinclude_text(List[str], optional): Results must contain these textsexclude_text(List[str], optional): Results must exclude these textstime_basis(str, optional): Time basis, options:"auto","published","crawled"start_time(str, optional): Start time in ISO 8601 formatend_time(str, optional): End time in ISO 8601 formathighlight(HighlightOptions, optional): Highlight options configurationformat(str, optional): Content format, options:"text","markdown"safesearch(str, optional): Safe search, options:"off","strict"(default)full_content(FullContentOptions, optional): Full content options configurationtimeout(float, optional): Request timeout in seconds
Returns: SearchResponse object
Response Properties:
results- List of search resultsquery- The actual query usedsearch_type- The actual search type usedusage- Token usage informationlatency- Latency information
Chat API
client.chat.create()
Create a chat completion (non-streaming or streaming).
Parameters:
messages(List[ChatMessage | dict], required): Conversation history. Each item can be aChatMessageobject or a plain dict{"role": ..., "content": ...}model(str, required): Model ID (e.g."openai/gpt-5.4"). See Supported Chat Models for the full liststream(bool, optional): IfTrue, return aStreamiterator ofStreamEventobjects. DefaultFalseweb_search(str, optional):"on"to augment with live web search,"off"to disableweb_search_options(WebSearchOptions, optional): Fine-grained search configurationsafesearch(str):"off"or"strict"(default"off")count(int): Number of search results, range 1-100country(str): Country code for localised results (e.g."CN")include_domains/exclude_domains(List[str]): Domain filteringinclude_text/exclude_text(List[str]): Text filteringtime_basis(str):"auto","published", or"crawled"start_time/end_time(str): ISO 8601 time rangeformat(str):"text"or"markdown"full_content(ChatFullContentOptions): Full page content optionshighlight(ChatHighlightOptions): Highlight snippet options
max_tokens(int, optional): Maximum number of output tokensmax_completion_tokens(int, optional): Alternative max-token parametertemperature(float, optional): Sampling temperature[0, 2]top_p(float, optional): Nucleus sampling probability(0, 1]frequency_penalty(float, optional): Frequency penalty[-2, 2]presence_penalty(float, optional): Presence penalty[-2, 2]response_format(ResponseFormat, optional): Output format —ResponseFormat(type="text"),ResponseFormat(type="json_object"), orResponseFormat(type="json_schema", json_schema=...)stop(List[str], optional): Up to 4 stop sequencesseed(int, optional): Integer seed for deterministic samplingreasoning_effort(str, optional): Chain-of-thought effort:"low","medium", or"high"logprobs(bool, optional): Whether to return log probabilitiestop_logprobs(int, optional): Number of most-likely tokens[0, 20]. Requireslogprobs=Truelogit_bias(Dict[str, float], optional): Token ID to bias value mappingtools(List[Tool | dict], optional): Tool/function definitions available to the modeltool_choice(str | dict, optional):"none","auto","required", or a dict specifying a particular tooluser(str, optional): Opaque end-user identifiertimeout(float, optional): Per-request timeout in seconds (default 60s for chat)
Returns:
ChatCompletionwhenstream=FalseStream(iterable ofStreamEvent) whenstream=True
ChatCompletion Properties:
id- Unique completion IDmodel- Model used for generationchoices- List ofChoiceobjectstext- Convenience accessor for the first choice's contentusage-Usageobject (prompt_tokens, completion_tokens, total_tokens, num_search_queries, reasoning_tokens)search_results- List ofChatSearchResult(whenweb_search="on")citations- Citation string referencing search resultswarning- Optional warning message
StreamEvent Properties:
type- Event type:"search_done","content","finish","usage","error"choices- List ofStreamChoice(withdelta.contentfor incremental text)search_results- Web search results (onsearch_doneevent)usage- Token usage (onusageevent)citations- Citation string (onsearch_doneevent)error-StreamErrorwithmessageandcode(onerrorevent)
Embedding API
client.embedding.create()
Create text embedding vectors.
Parameters:
input(str | List[str], required): Input text or list of textsmodel(str, optional): Model name, options:"octen-embedding-0.6b"- Lightweight model"octen-embedding-4b"- Balanced performance"octen-embedding-8b"- Highest quality
dimension(int, optional): Vector dimensioninput_type(str, optional): Input type, options:"query"or"document"truncation(bool, optional): Whether to truncate long inputs, default Truetimeout(float, optional): Request timeout in seconds
Returns: EmbeddingResponse object
Response Methods:
get_embeddings()- Get all vectorsget_first_embedding()- Get first vector (for single input)
Convenience Methods:
embed_query(text)- Embed a single query textembed_documents(texts)- Batch embed document texts
🔧 Async Support
import asyncio
from octen import AsyncOcten, ChatMessage
async def main():
async with AsyncOcten(api_key="your-api-key") as client:
# Concurrent chat requests
task1 = client.chat.create(
model="openai/gpt-5.4",
messages=[ChatMessage(role="user", content="Explain deep learning")],
web_search="off"
)
task2 = client.chat.create(
model="anthropic/claude-sonnet-4.6",
messages=[ChatMessage(role="user", content="Explain reinforcement learning")],
web_search="off"
)
r1, r2 = await asyncio.gather(task1, task2)
print(r1.text)
print(r2.text)
# Async streaming
stream = await client.chat.create(
model="openai/gpt-5.4",
messages=[ChatMessage(role="user", content="Hello!")],
stream=True
)
async for event in stream:
if event.type == "content" and event.choices:
print(event.choices[0].delta.content or "", end="", flush=True)
# Search and embeddings also work async
results = await client.search.search(query="AI")
embedding = await client.embedding.create(input=["Hello"], model="octen-embedding-4b")
asyncio.run(main())
⚠️ Error Handling
from octen import (
Octen,
ChatMessage,
OctenAPIError,
OctenTimeoutError,
OctenConnectionError,
OctenRateLimitError,
OctenAuthenticationError,
OctenStreamError,
)
with Octen(api_key="your-api-key") as client:
try:
response = client.chat.create(
model="openai/gpt-5.4",
messages=[ChatMessage(role="user", content="Hello")]
)
except OctenAuthenticationError:
print("Invalid or missing API key")
except OctenRateLimitError as e:
print(f"Rate limited — retry after {e.retry_after}s")
except OctenStreamError as e:
print(f"Stream error: {e.message} (code {e.code})")
except OctenTimeoutError as e:
print(f"Request timed out after {e.timeout}s")
except OctenAPIError as e:
print(f"API error {e.status_code}: {e.message}")
🧪 Development
Install Development Dependencies
# Install development version from source
pip install -e ".[dev]"
Run Tests
pytest tests/
Code Formatting
black octen/
ruff check octen/ --fix
Type Checking
mypy octen/
📝 License
MIT License - See LICENSE file for details
🔗 Links
📧 Support
For questions or help, please:
- Check the Documentation
- Email us at support@octen.ai
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Filter files by name, interpreter, ABI, and platform.
If you're not sure about the file name format, learn more about wheel file names.
Copy a direct link to the current filters
File details
Details for the file octen-0.2.1.tar.gz.
File metadata
- Download URL: octen-0.2.1.tar.gz
- Upload date:
- Size: 42.9 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/6.2.0 CPython/3.9.18
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
aafdf6665e720517881d5caa7409571ecbe6b59d62d917b50029d8ba3002149e
|
|
| MD5 |
178174e5fbe509bf440ade6c763abd98
|
|
| BLAKE2b-256 |
7075d98f5f17d08bd893fb3e40413a1a3568db3aedaa8801648995e64c8c6ee5
|
File details
Details for the file octen-0.2.1-py3-none-any.whl.
File metadata
- Download URL: octen-0.2.1-py3-none-any.whl
- Upload date:
- Size: 38.6 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/6.2.0 CPython/3.9.18
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
b728f05801a070e3f4cb3372c0b29018f89c52b98ed8adaf82a7fc8cf648fed3
|
|
| MD5 |
e0ce16b6a1af5cbd0189794661af80e0
|
|
| BLAKE2b-256 |
6d10de4d945569f51116e874d7d7dc2bf04a3aa2e18de8016122433477245fd9
|