Skip to main content

Speechly Public Protobuf Stubs

Project description

Python Speechly API

See the generic Speechly gRPC stubs documentation for more information about using the API.

A complete example on how to stream audio from a file to the Speechly API can be found in speechly_grpc_example.py.

Install

Install the latest package using pip:

pip install speechly-api

Note that the minimum python version supported is 3.6.

Using Python Stubs

The stubs are generated for the default grpcio python package, and the examples are using asyncio.

Creating a Channel

In python, the default authority of the channel needs to be overridden, as it defaults to a string containing the port number. This will not work with the API, so we set the DNS name manually:

channel = grpc.aio.secure_channel(
    target='api.speechly.com:443',
    credentials=grpc.ssl_channel_credentials(),
    options=[('grpc.default_authority', 'api.speechly.com')]
)

IdentityAPI

Login with speechly.identity.v2.IdentityAPI using an app_id:

async def login(channel, device_id, app_id=None, project_id=None):
    assert device_id, 'UUID device_is required'
    assert (app_id or project_id), 'app_id or project_id is required'
    identity_api = IdentityAPIStub(channel)
    req = LoginRequest(device_id=device_id)
    if app_id:
        # if a token with a single app_id is required:
        req.application.app_id = app_id
    else:
        # get a token that is usable for all apps in project:
        req.project.project_id = project_id
    response = await identity_api.Login(req)
    token = response.token
    expires = datetime.fromisoformat(response.expires_at)
    return token, expires

SLU

Open a bidirectional stream to speechly.slu.v1.SLU/Stream and send audio from a source generator to the API. The following example assumes that the audio_stream is an iterator that yields audio with 1 channel and sample rate 16KHz, in bytes chunks:

async def stream_speech(channel, token, audio_stream, app_id=None):
    auth = ('authorization', f'Bearer {token}')

    async def read_responses(stream):
        transcript = []
        intent = ''
        entities = []
        resp = await stream.read()
        while resp != grpc.aio.EOF:
            if resp.HasField('started'):
                print(f'audioContext {resp.audio_context} started')
            elif resp.HasField('transcript'):
                transcript.append(resp.transcript.word)
            elif resp.HasField('entity'):
                entities.append(resp.entity.entity)
            elif resp.HasField('intent'):
                intent = resp.intent.intent
            elif resp.HasField('finished'):
                print(f'audioContext {resp.audio_context} finished')
            resp = await stream.read()
        return intent, entities, transcript

    async def send_audio(stream, source):
        await stream.write(SLURequest(event=SLUEvent(event='START', app_id=app_id)))
        for chunk in source:
            await stream.write(SLURequest(audio=chunk))
        await stream.write(SLURequest(event=SLUEvent(event='STOP')))
        await stream.done_writing()

    async with channel:
        slu = SLUStub(channel)
        try:
            stream = slu.Stream(metadata=[auth])
            config = SLUConfig(channels=1, sample_rate_hertz=16000)
            await stream.write(SLURequest(config=config))
            recv = read_responses(stream)
            send = send_audio(stream, audio_stream)
            r = await asyncio.gather(recv, send)
            intent, entities, transcript = r[0]
            print('Intent:', intent)
            print('Entities:', ', '.join(entities))
            print('Transcript:', ' '.join(transcript))
        except grpc.aio.AioRpcError as e:
            print('Error in SLU', str(e.code()), e.details())

Using the HTTP REST API

The gRPC API is available also as JSON-based HTTP version. The following is an example of calling the BatchAPI with python requests library:

import requests
import uuid
import base64
import time

# read an audio file in memory (note that the it should be PCM 16Khz 1 channels to get good results)
with open('test1_en.wav', 'rb') as f:
    audio_data = f.read()

# create a device ID (uuid)
deviceId = uuid.uuid4()

# get a Speechly access token to use the correct Speechly app
r = requests.post(
    'https://api.speechly.com/speechly.identity.v2.IdentityAPI/Login',
    json={'deviceId': str(deviceId), 'application': {'appId': 'YOUR_APP_ID'}}
)
token = r.json()['token']

# send the file to the BatchAPI to create a batch transcribe operation
batch_req = [{
    'config': {
        'encoding': 1,
        'channels': 1,
        'sampleRateHertz': 16000
    },
    'audio': base64.b64encode(audio_data).decode('ascii')
}]
r = requests.post(
    'https://api.speechly.com/speechly.slu.v1.BatchAPI/ProcessAudio',
    headers={'authorization':f'Bearer {token}'},
    json=batch_req
)
op = r.json()['operation']

# poll the BatchAPI, waiting for the batch operation to be done
while op['status'] != 'STATUS_DONE':
    time.sleep(1)
    r = requests.post(
        'https://api.speechly.com/speechly.slu.v1.BatchAPI/QueryStatus',
        headers={'authorization':f'Bearer {token}'},
        json={'id': op['id']}
    )
    op = r.json()['operation']
    if op['error'] != '':
        raise Exception('error in transcribe: ' + op['error'])

# collect the words from the transcripts
transcript = [w['word'] for w in op['transcripts']]
print(' '.join(transcript))

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page