Skip to main content

Python SDK for SauTech Services

Project description

Sautech Python SDK

The official Python SDK for SauTech ASR Services. It supports two modes:

  • Fast Transcription (file-based, async and sync)
  • Realtime (streaming audio over websockets)

Installation

pip install humain-voice

Requires Python >= 3.9.

Configuration

Set the following environment variables provided by SauTech:

  • API_URL: Base URL of the ASR service (e.g. https://api.example.com)
  • API_PATH: API path for the specific service
  • API_KEY: Your API key

You can export these in your shell:

export API_URL="https://api.example.com"
export API_PATH="/realtime/socket.io"
export API_KEY="<your_api_key>"

Fast Transcription (sync)

Context manager

import os
from pathlib import Path
from sautech import stt

audio_path = Path("samples/king_fahad_test.wav")

with stt.FastTranscriptionClient(
    api_url=os.getenv("API_URL"),
    api_key=os.getenv("API_KEY"),
    api_path=os.getenv("API_PATH"),
) as client:
    with audio_path.open("rb") as f:
        result = client.transcribe_sync(
            f,  # file-like object or bytes via f.read()
            stt.Language.ArEn,
            stt.ASRModel.BayanArEnV1,
        )
        print(result)

Manual (no context manager)

import os
from pathlib import Path
from sautech import stt

audio_path = Path("samples/king_fahad_test.wav")

client = stt.FastTranscriptionClient(
    api_url=os.getenv("API_URL"),
    api_key=os.getenv("API_KEY"),
    api_path=os.getenv("API_PATH"),
)

with audio_path.open("rb") as f:
    result1 = client.transcribe_sync(
        f,
        stt.Language.ArEn,
        stt.ASRModel.BayanArEnV1,
    )
    print(result1)

with audio_path.open("rb") as f:
    result2 = client.transcribe_sync(
        f.read(),
        stt.Language.ArEn,
        stt.ASRModel.BayanArEnV1,
    )
    print(result2)

client.close_sync()

Fast Transcription (async)

Context manager

import os
import asyncio
from pathlib import Path
from sautech import stt

async def main():
    audio_bytes = Path("samples/king_fahad_test.wav").read_bytes()
    async with stt.FastTranscriptionClient(
        api_url=os.getenv("API_URL"),
        api_key=os.getenv("API_KEY"),
        api_path=os.getenv("API_PATH"),
    ) as client:
        result = await client.transcribe(
            audio_bytes,
            stt.Language.ArEn,
            stt.ASRModel.BayanArEnV1,
        )
        print(result)

asyncio.run(main())

Manual (no context manager)

import os
import asyncio
from pathlib import Path
from sautech import stt

async def main():
    audio_bytes = Path("samples/king_fahad_test.wav").read_bytes()

    client = stt.FastTranscriptionClient(
        api_url=os.getenv("API_URL"),
        api_key=os.getenv("API_KEY"),
        api_path=os.getenv("API_PATH"),
    )

    result1 = await client.transcribe(
        audio_bytes,
        stt.Language.ArEn,
        stt.ASRModel.BayanArEnV1,
    )
    print(result1)

    result2 = await client.transcribe(
        audio_bytes,
        stt.Language.ArEn,
        stt.ASRModel.BayanArEnV1,
    )
    print(result2)

    await client.close()

asyncio.run(main())

You can also pass on_response, on_file_upload, and on_error callbacks to receive intermediate updates and handle lifecycle events during processing.

Realtime Streaming

import asyncio
import os
import wave
from sautech import stt

async def run():
    client = stt.RealtimeClient(
        api_url=os.getenv("API_URL"),
        api_key=os.getenv("API_KEY"),
        api_path=os.getenv("API_PATH"),
    )

    stream = await client.start_stream(
        language=stt.Language.ArEn,
        on_connect=lambda: print("connected"),
        on_disconnect=lambda: print("disconnected"),
        on_response=lambda t: print("response:", t),
        on_error=lambda e: print("error:", e),
    )

    with wave.open("samples/king_fahad_test.wav", "rb") as f:
        audio_bytes = f.readframes(f.getnframes())

    chunk_duration_s = 0.1
    sample_rate = 16000
    bytes_per_sample = 2  # 16-bit PCM
    chunk_size = int(sample_rate * bytes_per_sample * chunk_duration_s)

    for start in range(0, len(audio_bytes), chunk_size):
        end = min(start + chunk_size, len(audio_bytes))
        await stream.send(audio_bytes[start:end])
        await asyncio.sleep(chunk_duration_s)

    await stream.close(timeout=1)

asyncio.run(run())

Parallel streams (single client)

import asyncio
import os
import wave
from sautech import stt

async def stream_audio(stream, audio_bytes: bytes):
    chunk_duration_s = 0.1
    sample_rate = 16000
    bytes_per_sample = 2
    chunk_size = int(sample_rate * bytes_per_sample * chunk_duration_s)

    for start in range(0, len(audio_bytes), chunk_size):
        end = min(start + chunk_size, len(audio_bytes))
        await stream.send(audio_bytes[start:end])
        await asyncio.sleep(chunk_duration_s)

    await stream.close(timeout=1)

async def run():
    client = stt.RealtimeClient(
        api_url=os.getenv("API_URL"),
        api_key=os.getenv("API_KEY"),
        api_path=os.getenv("API_PATH"),
    )

    stream_a = await client.start_stream(
        language=stt.Language.ArEn,
        on_response=lambda t: print("stream A:", t),
    )
    stream_b = await client.start_stream(
        language=stt.Language.ArEn,
        on_response=lambda t: print("stream B:", t),
    )

    with wave.open("samples/king_fahad_test.wav", "rb") as f:
        audio_bytes = f.readframes(f.getnframes())

    await asyncio.gather(
        stream_audio(stream_a, audio_bytes),
        stream_audio(stream_b, audio_bytes),
    )

asyncio.run(run())

Error handling

The SDK exposes a structured error contract aligned with the platform's ErrorResponse. The platform fields id, message, code, retryable, and timestamp are all optional — server paths exist that omit any of them. The error callback always fires; whether an in-flight context is terminated depends on the code's ownership (see ADR-0003).

Error code constants

from sautech.errors import (
    ASR_TRANSCRIPTION_FAILED,
    ASR_MODEL_UNAVAILABLE,
    RATE_LIMIT_EXCEEDED,
    TTS_VOICE_LIST_FAILED,
    SERVER_INTERNAL,
    is_asr_code,
    is_tts_code,
    is_request_scoped_code,
    is_realtime_owned,
    is_tts_owned,
)

Unknown future codes pass through as strings. Legacy aliases such as RATE_LIMITED, VALIDATION_FAILED, and INTERNAL_ERROR remain exported for older deployments.

Socket.IO errors (TTS / realtime STT / fast STT)

The on_error callback receives an ErrorResponse model. Branch on code to decide your policy:

from sautech.errors import ASR_TRANSCRIPTION_FAILED, ASR_MODEL_UNAVAILABLE

def on_error(err):
    # err is always non-None; any field can be None.
    print(f"code={err.code} retryable={err.retryable} msg={err.message}")
    if err.code == ASR_MODEL_UNAVAILABLE:
        # Tell the user the model is down; safe to retry later.
        ...
    elif err.code == ASR_TRANSCRIPTION_FAILED:
        # Final terminal error for this stream — no retry.
        ...

The realtime adapter only terminates an ASR stream context for ASR-shaped codes. A TTS_VOICE_LIST_FAILED arriving on the realtime socket fires the global on_error but does not kill the stream.

Batch transcription HTTP errors

BatchTranscribeError (and its subclasses) carry a structured payload plus per-field accessors:

from sautech.stt.batchtranscription import (
    BatchTranscribeClient,
    BatchTranscribeError,
    BatchTranscribeRateLimitError,
)

try:
    result = await client.submit(audio, "ar")
except BatchTranscribeRateLimitError as err:
    # Caller decides retry policy; SDK never retries internally.
    print(f"rate-limited; retry_after={err.retry_after}s capacity={err.capacity}")
except BatchTranscribeError as err:
    print(f"status={err.status_code}")
    print(f"code={err.code} retryable={err.retryable}")
    print(f"detail={err.detail} job_id={err.job_id}")
    print(f"raw_body={err.raw_body!r}")  # always preserved

User-facing message preference: detail → message → error → raw text. err.message already follows that order.

Note. The max_retries constructor argument is deprecated and is a no-op. The SDK never retries — callers decide policy based on err.retryable, err.code, and err.capacity.

Types

Common enums are available under sautech.stt, for example Language and ASRModel.

Examples

See complete examples in python/examples/ft_client.py and python/examples/rt_client.py. The batch transcription example (python/examples/batch_transcribe_client.py) shows structured-error handling end-to-end.

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

humain_voice-0.16.0a3.tar.gz (31.1 kB view details)

Uploaded Source

Built Distribution

If you're not sure about the file name format, learn more about wheel file names.

humain_voice-0.16.0a3-py3-none-any.whl (48.1 kB view details)

Uploaded Python 3

File details

Details for the file humain_voice-0.16.0a3.tar.gz.

File metadata

  • Download URL: humain_voice-0.16.0a3.tar.gz
  • Upload date:
  • Size: 31.1 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: uv/0.11.15 {"installer":{"name":"uv","version":"0.11.15","subcommand":["publish"]},"python":null,"implementation":{"name":null,"version":null},"distro":{"name":"macOS","version":null,"id":null,"libc":null},"system":{"name":null,"release":null},"cpu":null,"openssl_version":null,"setuptools_version":null,"rustc_version":null,"ci":null}

File hashes

Hashes for humain_voice-0.16.0a3.tar.gz
Algorithm Hash digest
SHA256 062695caea9c7c3f8ec1fc729e18089ace7f9a71f99195b076b3459597b3f806
MD5 c2d872f1fcedb55eb4810fbf9a268c15
BLAKE2b-256 63a203747e87695f9061d35dd30aaa6ca2c17bae2c012299af7b107136d20d2f

See more details on using hashes here.

File details

Details for the file humain_voice-0.16.0a3-py3-none-any.whl.

File metadata

  • Download URL: humain_voice-0.16.0a3-py3-none-any.whl
  • Upload date:
  • Size: 48.1 kB
  • Tags: Python 3
  • Uploaded using Trusted Publishing? No
  • Uploaded via: uv/0.11.15 {"installer":{"name":"uv","version":"0.11.15","subcommand":["publish"]},"python":null,"implementation":{"name":null,"version":null},"distro":{"name":"macOS","version":null,"id":null,"libc":null},"system":{"name":null,"release":null},"cpu":null,"openssl_version":null,"setuptools_version":null,"rustc_version":null,"ci":null}

File hashes

Hashes for humain_voice-0.16.0a3-py3-none-any.whl
Algorithm Hash digest
SHA256 779f2729a8423a7d64a667afaa0c3569cb99e17ee3eaa58ff74a4b89f67899e6
MD5 931c4921ec593d5ec72606b4261cc9fa
BLAKE2b-256 0a7a18afc5ddf230f3bb337ee19a859439d80275d2c15adaa50b291c50fcc9d3

See more details on using hashes here.

Supported by

AWS Cloud computing and Security Sponsor Datadog Monitoring Depot Continuous Integration Fastly CDN Google Download Analytics Pingdom Monitoring Sentry Error logging StatusPage Status page