Python interface to CMU Sphinxbase and Pocketsphinx libraries
Project description
Pocketsphinx Python
Pocketsphinx is a part of the CMU Sphinx Open Source Toolkit For Speech Recognition.
This package provides a python interface to CMU Sphinxbase and Pocketsphinx libraries created with SWIG and Setuptools.
Supported platforms
- Windows
- Linux
- Mac OS X
Installation
# Make sure we have up-to-date versions of pip, setuptools and wheel
python -m pip install --upgrade pip setuptools wheel
pip install --upgrade pocketsphinx
More binary distributions for manual installation are available here.
Usage
LiveSpeech
It's an iterator class for continuous recognition or keyword search from a microphone.
from pocketsphinx import LiveSpeech
for phrase in LiveSpeech(): print(phrase)
An example of a keyword search:
from pocketsphinx import LiveSpeech
speech = LiveSpeech(lm=False, keyphrase='forward', kws_threshold=1e-20)
for phrase in speech:
print(phrase.segments(detailed=True))
With your model and dictionary:
import os
from pocketsphinx import LiveSpeech, get_model_path
model_path = get_model_path()
speech = LiveSpeech(
verbose=False,
sampling_rate=16000,
buffer_size=2048,
no_search=False,
full_utt=False,
hmm=os.path.join(model_path, 'en-us'),
lm=os.path.join(model_path, 'en-us.lm.bin'),
dic=os.path.join(model_path, 'cmudict-en-us.dict')
)
for phrase in speech:
print(phrase)
AudioFile
It's an iterator class for continuous recognition or keyword search from a file.
from pocketsphinx import AudioFile
for phrase in AudioFile(): print(phrase) # => "go forward ten meters"
An example of a keyword search:
from pocketsphinx import AudioFile
audio = AudioFile(lm=False, keyphrase='forward', kws_threshold=1e-20)
for phrase in audio:
print(phrase.segments(detailed=True)) # => "[('forward', -617, 63, 121)]"
With your model and dictionary:
import os
from pocketsphinx import AudioFile, get_model_path, get_data_path
model_path = get_model_path()
data_path = get_data_path()
config = {
'verbose': False,
'audio_file': os.path.join(data_path, 'goforward.raw'),
'buffer_size': 2048,
'no_search': False,
'full_utt': False,
'hmm': os.path.join(model_path, 'en-us'),
'lm': os.path.join(model_path, 'en-us.lm.bin'),
'dict': os.path.join(model_path, 'cmudict-en-us.dict')
}
audio = AudioFile(**config)
for phrase in audio:
print(phrase)
Convert frame into time coordinates:
from pocketsphinx import AudioFile
# Frames per Second
fps = 100
for phrase in AudioFile(frate=fps): # frate (default=100)
print('-' * 28)
print('| %5s | %3s | %4s |' % ('start', 'end', 'word'))
print('-' * 28)
for s in phrase.seg():
print('| %4ss | %4ss | %8s |' % (s.start_frame / fps, s.end_frame / fps, s.word))
print('-' * 28)
# ----------------------------
# | start | end | word |
# ----------------------------
# | 0.0s | 0.24s | <s> |
# | 0.25s | 0.45s | <sil> |
# | 0.46s | 0.63s | go |
# | 0.64s | 1.16s | forward |
# | 1.17s | 1.52s | ten |
# | 1.53s | 2.11s | meters |
# | 2.12s | 2.6s | </s> |
# ----------------------------
Pocketsphinx
It's a simple and flexible proxy class to pocketsphinx.Decode
.
from pocketsphinx import Pocketsphinx
print(Pocketsphinx().decode()) # => "go forward ten meters"
A more comprehensive example:
from __future__ import print_function
import os
from pocketsphinx import Pocketsphinx, get_model_path, get_data_path
model_path = get_model_path()
data_path = get_data_path()
config = {
'hmm': os.path.join(model_path, 'en-us'),
'lm': os.path.join(model_path, 'en-us.lm.bin'),
'dict': os.path.join(model_path, 'cmudict-en-us.dict')
}
ps = Pocketsphinx(**config)
ps.decode(
audio_file=os.path.join(data_path, 'goforward.raw'),
buffer_size=2048,
no_search=False,
full_utt=False
)
print(ps.segments()) # => ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>']
print('Detailed segments:', *ps.segments(detailed=True), sep='\n') # => [
# word, prob, start_frame, end_frame
# ('<s>', 0, 0, 24)
# ('<sil>', -3778, 25, 45)
# ('go', -27, 46, 63)
# ('forward', -38, 64, 116)
# ('ten', -14105, 117, 152)
# ('meters', -2152, 153, 211)
# ('</s>', 0, 212, 260)
# ]
print(ps.hypothesis()) # => go forward ten meters
print(ps.probability()) # => -32079
print(ps.score()) # => -7066
print(ps.confidence()) # => 0.04042641466841839
print(*ps.best(count=10), sep='\n') # => [
# ('go forward ten meters', -28034)
# ('go for word ten meters', -28570)
# ('go forward and majors', -28670)
# ('go forward and meters', -28681)
# ('go forward and readers', -28685)
# ('go forward ten readers', -28688)
# ('go forward ten leaders', -28695)
# ('go forward can meters', -28695)
# ('go forward and leaders', -28706)
# ('go for work ten meters', -28722)
# ]
Default config
If you don't pass any argument while creating an instance of the Pocketsphinx, AudioFile or LiveSpeech class, it will use next default values:
verbose = False
logfn = /dev/null or nul
audio_file = site-packages/pocketsphinx/data/goforward.raw
audio_device = None
sampling_rate = 16000
buffer_size = 2048
no_search = False
full_utt = False
hmm = site-packages/pocketsphinx/model/en-us
lm = site-packages/pocketsphinx/model/en-us.lm.bin
dict = site-packages/pocketsphinx/model/cmudict-en-us.dict
Any other option must be passed into the config as is, without using symbol -
.
If you want to disable default language model or dictionary, you can change the value of the corresponding options to False:
lm = False
dict = False
Verbose
Send output to stdout:
from pocketsphinx import Pocketsphinx
ps = Pocketsphinx(verbose=True)
ps.decode()
print(ps.hypothesis())
Send output to file:
from pocketsphinx import Pocketsphinx
ps = Pocketsphinx(verbose=True, logfn='pocketsphinx.log')
ps.decode()
print(ps.hypothesis())
Compatibility
Parent classes are still available:
import os
from pocketsphinx import DefaultConfig, Decoder, get_model_path, get_data_path
model_path = get_model_path()
data_path = get_data_path()
# Create a decoder with a certain model
config = DefaultConfig()
config.set_string('-hmm', os.path.join(model_path, 'en-us'))
config.set_string('-lm', os.path.join(model_path, 'en-us.lm.bin'))
config.set_string('-dict', os.path.join(model_path, 'cmudict-en-us.dict'))
decoder = Decoder(config)
# Decode streaming data
buf = bytearray(1024)
with open(os.path.join(data_path, 'goforward.raw'), 'rb') as f:
decoder.start_utt()
while f.readinto(buf):
decoder.process_raw(buf, False, False)
decoder.end_utt()
print('Best hypothesis segments:', [seg.word for seg in decoder.seg()])
Install development version
Install requirements
Windows requirements:
Ubuntu requirements:
sudo apt-get install -qq python python-dev python-pip build-essential swig git libpulse-dev libasound2-dev
Mac OS X requirements:
brew reinstall swig python
Install with pip
pip install https://github.com/bambocher/pocketsphinx-python/archive/master.zip
Install with distutils
git clone --recursive https://github.com/bambocher/pocketsphinx-python
cd pocketsphinx-python
python setup.py install
Projects using pocketsphinx-python
- SpeechRecognition - Library for performing speech recognition, with support for several engines and APIs, online and offline.
License
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distributions
Hashes for pocketsphinx-0.1.15.win-amd64-py3.6.exe
Algorithm | Hash digest | |
---|---|---|
SHA256 | 7f48fcfd16c913a3475b1b58c132bd06c9ad5cd9cf1464c3d71fc4ad8a5ccc6f |
|
MD5 | 0bddfadb11654bdd59fa4d61a9afc318 |
|
BLAKE2b-256 | 741cf693cb5ffc1d0b36c1eb7d8e60eb79474a1004df6d1c90a408bc2792cb4a |
Hashes for pocketsphinx-0.1.15.win-amd64-py3.5.exe
Algorithm | Hash digest | |
---|---|---|
SHA256 | de4e4ae2c1ef412ad5519ca5fc44744a4374970a1f49d61417072808bf426c63 |
|
MD5 | 08b96f85d40607f2be90800868d6ce04 |
|
BLAKE2b-256 | bb7fc9169fb85ac3100e0e0a8d40cfbb6a975929c19fee05552139c2ff8dcae1 |
Hashes for pocketsphinx-0.1.15.win-amd64-py2.7.exe
Algorithm | Hash digest | |
---|---|---|
SHA256 | cbbb96e5ff22cf06f144987aa209c77edf99e76764e4d671cbd4dda28825d192 |
|
MD5 | 8d6808e3f5f13599b3badc052800b411 |
|
BLAKE2b-256 | 2f2b3373b3b046fe2039a0cf6dac9517d5fe2cea6ef9c60e6681be6347bded64 |
Hashes for pocketsphinx-0.1.15.win32-py3.6.exe
Algorithm | Hash digest | |
---|---|---|
SHA256 | 9f4068789d426c14052bcc184368aece4d6d9ecf998359e958194faffb9c3698 |
|
MD5 | 86f8d5fa8acdf8ecc4db81b379df9672 |
|
BLAKE2b-256 | 1344c3cefa9c1181c0871d05b2b329fca7dfc946b4010cb1949cff189a4cbabd |
Hashes for pocketsphinx-0.1.15.win32-py3.5.exe
Algorithm | Hash digest | |
---|---|---|
SHA256 | 1fa4acdd2350d483a75ea2dc51d5698fdc240922f17069889cad70700710db2f |
|
MD5 | d5536e49cd661e42d9bc5b5db37fd856 |
|
BLAKE2b-256 | c86aad2e37fb5a770769b498568652d5e1257f35d39fffcd16b912e0f6d73805 |
Hashes for pocketsphinx-0.1.15.win32-py2.7.exe
Algorithm | Hash digest | |
---|---|---|
SHA256 | fd54d61f226c8f3906ef948579249db999a8119069303cec5de48e770e0e2361 |
|
MD5 | 9a635cc83eeaa5e41db5219e5b19db76 |
|
BLAKE2b-256 | 34abdf27560f4f956f5dd90d3d49844d3f5b34a91603eceef0ef240f4dffb395 |
Hashes for pocketsphinx-0.1.15-cp36-cp36m-win_amd64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 71e7cf04433615e21ad3c28a2cc2daa43e96e570f2611f142eb3468f4f7789a6 |
|
MD5 | 0c9ec34dd42cb57a7a789091bbe243d3 |
|
BLAKE2b-256 | 525330b12c3e4de918e32e73e9d635b4c9e1765512acc94ad0b51bfe960b54c9 |
Hashes for pocketsphinx-0.1.15-cp36-cp36m-win32.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | d20b497369bd108dd135b3d4eed1d3edd2c8b51b1b696e67600adbfff2fe3316 |
|
MD5 | 5c7bd15dcea5ae63bb5fde5211de6059 |
|
BLAKE2b-256 | 41b70816e16b0be54662ccd0761b1286a9498478e6da6b18accb42eeacfd52c6 |
Hashes for pocketsphinx-0.1.15-cp36-cp36m-macosx_10_13_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 1c2a8cc7f2032f9e8214f5e4e864511fe37bfd27f2045fbf8aed9c54569dc881 |
|
MD5 | e0418e5bded8921aaf3aeb2359609601 |
|
BLAKE2b-256 | 54d536eedee7bd8f5ba4297cb6326fe08c5ba0dc42ff3efd0bae4a1e7ea7ffb7 |
Hashes for pocketsphinx-0.1.15-cp36-cp36m-macosx_10_12_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 72286c3ecaa6641d426ca80bd4d1b3241c26b1703af419429ef779719e007443 |
|
MD5 | 8431f8a051624b3154dc2fde556a2092 |
|
BLAKE2b-256 | 62a6117a32156e1e9bdc7dac97112d5cde8849e2d1a0244db3ca1b1ebf36a32e |
Hashes for pocketsphinx-0.1.15-cp36-cp36m-macosx_10_11_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | bfbbeddb20196abbb04b8f7b5dc03628c7df0dc87e760326d76d37b27fb9d6f9 |
|
MD5 | 1d32ad5c591f6d4c683c6a19bdcaca67 |
|
BLAKE2b-256 | db21326206b7e470adad8088b1efd33a1e19a057c1d551cacedb7837fd942695 |
Hashes for pocketsphinx-0.1.15-cp36-cp36m-macosx_10_10_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 8ca14694d7cc111796e63c13c61420553a43a29c63c386c975bfc213d8da3e55 |
|
MD5 | cb1c38c51647ef98011d09c6940489bc |
|
BLAKE2b-256 | 22715c1bef04886900ba28f93f0531dba3fc1652164015e63f1aaa5b271ab5ad |
Hashes for pocketsphinx-0.1.15-cp35-cp35m-win_amd64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 667152b4139ba46de172fa627404fe284b2ecd5d6bfcfe905b068801324ba7fb |
|
MD5 | 544c1998158e7e314e48734a37f96808 |
|
BLAKE2b-256 | b2b733ea7440fe7aa0d423210bd418e11d6c29f125fd34e8809bf07cb4aa640d |
Hashes for pocketsphinx-0.1.15-cp35-cp35m-win32.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 3f438f9f5729a7629c122784697884cff5f2a2e022aef270a4081985d15eae88 |
|
MD5 | 6517c78c24a0143ac85c2f0f75904073 |
|
BLAKE2b-256 | 8815112741e06a1a55b14dc8baefd94954d39d76457ccbd5988d04fa3ca59a79 |
Hashes for pocketsphinx-0.1.15-cp27-cp27m-win_amd64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 41d2107b74597cf8444f9e36b91363d60097b4fa88a9c750760a85209deef439 |
|
MD5 | 2de95aea627e31b317e2c08cb5535c47 |
|
BLAKE2b-256 | 38d3192476022e989377ab00cb84fb0b18790e400bbd58e464155c58cb4622f8 |
Hashes for pocketsphinx-0.1.15-cp27-cp27m-win32.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 1f3eaca1b49d579e89e909bb101ad033493a1c9dddd8c72afd6b755c70365d56 |
|
MD5 | 7d9e1e149ba00f394f9a8d5e8f53abd1 |
|
BLAKE2b-256 | 3273fc063c87c5bf7ae8bef74d227902fb892c0693ed80cb50fbfc69c300bbf4 |
Hashes for pocketsphinx-0.1.15-cp27-cp27m-macosx_10_13_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 9b477fcf8829f53ab470fdcf3fa77db00d71eee321353d10b306f0e5b95e8591 |
|
MD5 | d6c858a56de9780d12878440183ce159 |
|
BLAKE2b-256 | c23c40d4ff6b2517ac880a6831cfb694316cfd3fb8edfbeab8db81cbab0591aa |
Hashes for pocketsphinx-0.1.15-cp27-cp27m-macosx_10_12_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | e2a4f22cddec68e75e796068ae82095dd8dba1187aa3a8abe305bdc6c565e8b4 |
|
MD5 | 673fd75e0b2fb50b04f6b3e1823e4c53 |
|
BLAKE2b-256 | 2b09d837705a6bb87e3e6ae51e5b1d503bc18d25fb4dee7fe48f2de462dcf78f |
Hashes for pocketsphinx-0.1.15-cp27-cp27m-macosx_10_11_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 855c6761d008cdb4fc2d9aded5c1a0f163ce901f8b64075d36a88ae7814af755 |
|
MD5 | 8f38456cfc1322e6aa934eb5e5fe2768 |
|
BLAKE2b-256 | c54e22206e4ba9d48177a435ebdde0189e58c82d4e1bac04b03987fea16bb62b |
Hashes for pocketsphinx-0.1.15-cp27-cp27m-macosx_10_10_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | f0fdfe162bc99590e8666251a55d6311ac4cb137482a871f93d38b1834251253 |
|
MD5 | 9a6dc0f0ad56a274dd3d2d2238e3ab89 |
|
BLAKE2b-256 | 1351fca1696011825d7c296ad4234aa9f28c6fbb53c2a36d78c20ad1ff4cfae9 |