Skip to main content

Mycroft's multilingual text parsing and formatting library

Project description

License CLA Team Status

Build Status Coverage Status PRs Welcome Join chat

Lingua Franca

Mycroft's multilingual text parsing and formatting library

Lingua Franca (noun)

a framework that is adopted as the common language between speakers with different native tongues

Formatting

Convert data into spoken equivalents

Pronounce numbers

spoken versions of numbers

from lingua_franca.format import nice_number, pronounce_number

assert nice_number(25/6) == "4 and a sixth"
assert nice_number(201) == "201"
assert nice_number(3.14159269) == "3 and a seventh"

assert pronounce_number(3.14159269) == "three point one four"
assert pronounce_number(0) == "zero"
assert pronounce_number(10) == "ten"
assert pronounce_number(201) == "two hundred and one"
assert pronounce_number(102.3) == "one hundred and two point three"
assert pronounce_number(
    4092949192) == "four billion, ninety two million, nine hundred and forty nine thousand, one hundred and ninety two"

assert pronounce_number(100034000000299792458, short_scale=True) == \
       "one hundred quintillion, thirty four quadrillion, " \
       "two hundred and ninety nine million, seven hundred and ninety " \
       "two thousand, four hundred and fifty eight"

assert pronounce_number(100034000000299792458, short_scale=False) == \
       "one hundred trillion, thirty four thousand billion, " \
       "two hundred and ninety nine million, seven hundred and ninety " \
       "two thousand, four hundred and fifty eight"

Pronounce datetime objects

spoken date for datetime.datetime objects

from lingua_franca.format import nice_date, nice_date_time, nice_time
import datetime

dt = datetime.datetime(2017, 1, 31,  13, 22, 3)

assert nice_date(dt) == "tuesday, january thirty-first, twenty seventeen"

assert nice_time(dt) == "one twenty two"
assert nice_time(dt, use_ampm=True) ==  "one twenty two p.m."
assert nice_time(dt, speech=False) == "1:22"
assert nice_time(dt, speech=False, use_ampm=True) == "1:22 PM"
assert nice_time(dt, speech=False, use_24hour=True) == "13:22"
assert nice_time(dt, speech=False, use_24hour=True, use_ampm=True) == "13:22"
assert nice_time(dt, use_24hour=True, use_ampm=True) == "thirteen twenty two"
assert nice_time(dt, use_24hour=True, use_ampm=False) == "thirteen twenty two"

assert nice_date_time(dt) == "tuesday, january thirty-first, twenty seventeen at one twenty two"

Pronounce durations

spoken number of seconds or datetime.timedelta objects

from lingua_franca.format import nice_duration


assert nice_duration(1) ==   "one second"
assert nice_duration(3) ==   "three seconds"
assert nice_duration(1, speech=False) ==   "0:01"
assert nice_duration(61), "one minute one second"
assert nice_duration(61, speech=False) ==   "1:01"
assert nice_duration(5000) ==  "one hour twenty three minutes twenty seconds"
assert nice_duration(5000, speech=False), "1:23:20"
assert nice_duration(50000) ==   "thirteen hours fifty three minutes twenty seconds"
assert nice_duration(50000, speech=False) ==   "13:53:20"
assert nice_duration(500000) ==   "five days  eighteen hours fifty three minutes twenty seconds"
assert nice_duration(500000, speech=False), "5d 18:53:20"

from datetime import timedelta

assert nice_duration(timedelta(seconds=500000), speech=False) ==  "5d 18:53:20"

Parsing

Extract data from natural language text

Extract numbers

from lingua_franca.parse import extract_number, extract_numbers

# extract a number
assert extract_number("nothing") is False
assert extract_number("two million five hundred thousand tons of spinning "
                      "metal") == 2500000
assert extract_number("six trillion") == 6000000000000.0
assert extract_number("six trillion", short_scale=False) == 6e+18

assert extract_number("1 and 3/4 cups") == 1.75
assert extract_number("1 cup and a half") == 1.5

## extracts all numbers
assert extract_numbers("nothing") == []
assert extract_numbers("this is a one twenty one  test") == [1.0, 21.0]
assert extract_numbers("1 dog, seven pigs, macdonald had a farm, "
                       "3 times 5 macarena") == [1, 7, 3, 5]

Extract durations

extract datetime.timedelta objects

## extract durations
from lingua_franca.parse import extract_duration
from datetime import timedelta

assert extract_duration("nothing") == (None, 'nothing')

assert extract_duration("Nineteen minutes past the hour") == (
    timedelta(minutes=19),
    "past the hour")
assert extract_duration("wake me up in three weeks, four hundred ninety seven"
                        " days, and three hundred 91.6 seconds") == (
           timedelta(weeks=3, days=497, seconds=391.6),
           "wake me up in , , and")
assert extract_duration(
    "The movie is one hour, fifty seven and a half minutes long") == (
           timedelta(hours=1, minutes=57.5),
           "the movie is ,  long")

Extract dates

extract datetime.datetime objects

## extract date times
from datetime import datetime
from lingua_franca.parse import extract_datetime, normalize

def extractWithFormat(text):
    date = datetime(2017, 6, 27, 13, 4)  # Tue June 27, 2017 @ 1:04pm
    [extractedDate, leftover] = extract_datetime(text, date)
    extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S")
    return [extractedDate, leftover]


def testExtract(text, expected_date, expected_leftover):
    res = extractWithFormat(normalize(text))
    assert res[0] == expected_date
    assert res[1] == expected_leftover


testExtract("now is the time",
            "2017-06-27 13:04:00", "is time")
testExtract("in a couple minutes",
            "2017-06-27 13:06:00", "")
testExtract("What is the day after tomorrow's weather?",
            "2017-06-29 00:00:00", "what is weather")
testExtract("Remind me at 10:45 pm",
            "2017-06-27 22:45:00", "remind me")
testExtract("what is the weather on friday morning",
            "2017-06-30 08:00:00", "what is weather")
testExtract("what is tomorrow's weather",
            "2017-06-28 00:00:00", "what is weather")
testExtract("remind me to call mom next tuesday",
            "2017-07-04 00:00:00", "remind me to call mom")
testExtract("remind me to call mom in 3 weeks",
            "2017-07-18 00:00:00", "remind me to call mom")
testExtract("set an alarm for tonight 9:30",
            "2017-06-27 21:30:00", "set alarm")
testExtract("on the evening of june 5th 2017 remind me to call my mother",
            "2017-06-05 19:00:00", "remind me to call my mother")

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

lingua_franca-0.1.0.tar.gz (138.7 kB view details)

Uploaded Source

Built Distribution

lingua_franca-0.1.0-py3-none-any.whl (218.1 kB view details)

Uploaded Python 3

File details

Details for the file lingua_franca-0.1.0.tar.gz.

File metadata

  • Download URL: lingua_franca-0.1.0.tar.gz
  • Upload date:
  • Size: 138.7 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/2.0.0 pkginfo/1.5.0.1 requests/2.22.0 setuptools/40.8.0 requests-toolbelt/0.9.1 tqdm/4.36.1 CPython/3.7.4

File hashes

Hashes for lingua_franca-0.1.0.tar.gz
Algorithm Hash digest
SHA256 ba1d806b7a213710489fb5be237ebc67f1b8da4b28c9e06e9ac2e62325a98be9
MD5 ecce116a21178ce148b857913f0e9af3
BLAKE2b-256 5737963225c210ff86f2243fc837582693041a14a1c3519b940559d70afa37dd

See more details on using hashes here.

Provenance

File details

Details for the file lingua_franca-0.1.0-py3-none-any.whl.

File metadata

  • Download URL: lingua_franca-0.1.0-py3-none-any.whl
  • Upload date:
  • Size: 218.1 kB
  • Tags: Python 3
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/2.0.0 pkginfo/1.5.0.1 requests/2.22.0 setuptools/40.8.0 requests-toolbelt/0.9.1 tqdm/4.36.1 CPython/3.7.4

File hashes

Hashes for lingua_franca-0.1.0-py3-none-any.whl
Algorithm Hash digest
SHA256 b69c74b34e9b36bdf2254e8843f61b78adcb62eadf1ba744d6bc9dabc25be2e1
MD5 07d92e2b8075aead2284b4f039a6b641
BLAKE2b-256 05e7b52b57f4374fd9ecf624771c8a289260f19462b57a8e8913784bbd63ac8f

See more details on using hashes here.

Provenance

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page