Skip to main content

ipapy is a Python module to work with IPA strings

Project description

ipapy is a Python module to work with International Phonetic Alphabet (IPA) strings.

Installation

$ pip install ipapy

or

$ git clone https://github.com/pettarin/ipapy.git
$ cd ipapy

Usage

As A Python Module

###########
# IMPORTS #
###########
from ipapy import UNICODE_TO_IPA
from ipapy import is_valid_ipa
from ipapy.ipachar import IPAConsonant
from ipapy.ipachar import IPAVowel
from ipapy.ipastring import IPAString


###########
# IPAChar #
###########

# Def.: an IPAChar is an IPA letter or diacritic/suprasegmental/tone mark

# create IPAChar from its Unicode representation
c1 = UNICODE_TO_IPA[u"a"]                   # vowel open front unrounded
c2 = UNICODE_TO_IPA[u"e"]                   # vowel close-mid front unrounded
c3 = UNICODE_TO_IPA[u"\u03B2"]              # consonant voiced bilabial non-sibilant-fricative
tS1 = UNICODE_TO_IPA[u"t͡ʃ"]                 # consonant voiceless palato-alveolar sibilant-affricate
tS2 = UNICODE_TO_IPA[u"t͜ʃ"]                 # consonant voiceless palato-alveolar sibilant-affricate
tS3 = UNICODE_TO_IPA[u"tʃ"]                 # consonant voiceless palato-alveolar sibilant-affricate
tS4 = UNICODE_TO_IPA[u"ʧ"]                  # consonant voiceless palato-alveolar sibilant-affricate
tS5 = UNICODE_TO_IPA[u"\u0074\u0361\u0283"] # consonant voiceless palato-alveolar sibilant-affricate
tS6 = UNICODE_TO_IPA[u"\u0074\u035C\u0283"] # consonant voiceless palato-alveolar sibilant-affricate
tS7 = UNICODE_TO_IPA[u"\u0074\u0283"]       # consonant voiceless palato-alveolar sibilant-affricate
tS8 = UNICODE_TO_IPA[u"\u02A7"]             # consonant voiceless palato-alveolar sibilant-affricate
c1 == c2    # False
c1 == c3    # False
c1 == tS1   # False
tS1 == tS2  # True (they both point to the same IPAChar object)
tS1 == tS3  # True (idem)
tS1 == tS4  # True (idem)
tS1 == tS5  # True (idem)
tS1 == tS6  # True (idem)
tS1 == tS7  # True (idem)
tS1 == tS8  # True (idem)

# create custom IPAChars
my_a1 = IPAVowel(name="my_a_1", descriptors=u"open front unrounded", unicode_repr=u"a")
my_a2 = IPAVowel(name="my_a_2", descriptors=[u"open", "front", "unrounded"], unicode_repr=u"a")
my_a3 = IPAVowel(name="my_a_3", height=u"open", backness=u"front", roundness=u"unrounded", unicode_repr=u"a")
my_a4 = IPAVowel(name="my_a_4", descriptors=[u"low", u"fnt", "unr"], unicode_repr=u"a")
my_ee = IPAVowel(name="my_e_1", descriptors=u"close-mid front unrounded", unicode_repr=u"e")
my_b1 = IPAConsonant(name="bilabial fricative", descriptors=u"voiced bilabial non-sibilant-fricative", unicode_repr=u"\u03B2")
my_b2 = IPAConsonant(name="bf", voicing=u"voiced", place=u"bilabial", manner=u"non-sibilant-fricative", unicode_repr=u"\u03B2")
my_tS = IPAConsonant(name="tS", voicing=u"voiceless", place=u"palato-alveolar", manner=u"sibilant-affricate", unicode_repr=u"t͡ʃ")
my_a1 == my_a2                  # False (two different objects)
my_a1 == c1                     # False (two different objects)
my_a1 == UNICODE_TO_IPA["a"]    # False (two different objects)

# associate non-standard Unicode representation
my_aa = IPAVowel(name="a special", descriptors=[u"low", u"fnt", u"unr"], unicode_repr=u"a{*}")
print(my_aa)    # "a{*}"

# equality vs. equivalence
my_tS == tS1                # False (my_tS is a different object than tS1)
my_tS.is_equivalent(tS1)    # True  (my_tS is equivalent to tS1...)
tS1.is_equivalent(my_tS)    # True  (... and vice versa)

# compare IPAChar objects
my_a1.is_equivalent(my_a2)  # True
my_a1.is_equivalent(my_a3)  # True
my_a1.is_equivalent(my_a4)  # True
my_a1.is_equivalent(my_ee)  # False
my_a1.is_equivalent(my_b1)  # False
my_b1.is_equivalent(my_b2)  # True
my_b1.is_equivalent(my_tS)  # False

# compare IPAChar and a Unicode string
my_b1.is_equivalent(u"\u03B2")  # True
my_b1.is_equivalent(u"β")       # True
my_b1.is_equivalent(u"b")       # False
my_tS.is_equivalent(u"tS")      # False
my_tS.is_equivalent(u"tʃ")      # False (missing the combining diacritic)
my_tS.is_equivalent(u"t͡ʃ")      # True (has combining diacritic)

# compare IPAChar and a string listing descriptors
my_a1.is_equivalent(u"open front unrounded")                                # False (missing 'vowel')
my_a1.is_equivalent(u"open front unrounded vowel")                          # True
my_a1.is_equivalent(u"low fnt unr vwl")                                     # True (known abbreviations are good as well)
my_ee.is_equivalent(u"open front unrounded vowel")                          # False
my_b1.is_equivalent(u"voiced bilabial non-sibilant-fricative")              # False (missing 'consonant')
my_b1.is_equivalent(u"voiced bilabial non-sibilant-fricative consonant")    # True
my_b1.is_equivalent(u"consonant non-sibilant-fricative bilabial voiced")    # True (the order does not matter)
my_b1.is_equivalent(u"consonant non-sibilant-fricative bilabial voiceless") # False

# compare IPAChar and list of descriptors
my_a1.is_equivalent([u"open", u"front", u"unrounded"])              # False
my_a1.is_equivalent([u"vowel", u"open", u"front", u"unrounded"])    # True
my_a1.is_equivalent([u"open", u"unrounded", u"vowel", u"front"])    # True
my_a1.is_equivalent([u"low", u"fnt", u"unr", u"vwl"])               # True


#############
# IPAString #
#############

# Def.: an IPAString is a list of IPAChar objects

# check if Unicode string contains only IPA valid characters
s_uni = u"əˈkiːn æˌkænˈθɑ.lə.d͡ʒi"   # Unicode string of the IPA pronunciation for "achene acanthology"
is_valid_ipa(s_uni)                 # True
is_valid_ipa(u"LoL")                # False (uppercase letter L is not IPA valid)

# create IPAString from list of IPAChar objects
new_s_ipa = IPAString(ipa_chars=[c3, c2, tS1, c1])

# create IPAString from Unicode string
s_ipa = IPAString(unicode_string=s_uni)

# IPAString is similar to regular Python string object
print(s_ipa)                            # "əˈkiːn æˌkænˈθɑ.lə.d͡ʒi"
len(s_ipa)                              # 21
s_ipa[0]                                # (first IPA char)
s_ipa[5:8]                              # (6th, 7th, 8th IPA chars)
s_ipa[19:]                              # (IPA chars from the 20th)
s_ipa[-1]                               # (last IPA char)
len(new_s_ipa)                          # 4
new_s_ipa.append(UNICODE_TO_IPA[u"a"])  # (append IPA char "a")
len(new_s_ipa)                          # 5
new_s_ipa.append(UNICODE_TO_IPA[u"t͡ʃ"]) # (append IPA char "t͡ʃ")
len(new_s_ipa)                          # 6
new_s_ipa.extend(s_ipa)                 # (append s_ipa to new_s_ipa)
len(new_s_ipa)                          # 27
double = s_ipa + new_s_ipa              # (concatenate s_ipa and new_s_ipa)
len(double)                             # 48

# new IPAString objects containing only...
print(s_ipa.consonants)                 # "knknθld͡ʒ"                (consonants)
print(s_ipa.vowels)                     # "əiææɑəi"                 (vowels)
print(s_ipa.letters)                    # "əkinækænθɑləd͡ʒi"         (vowels and consonants)
print(s_ipa.cns_vwl)                    # "əkinækænθɑləd͡ʒi"         (vowels and consonants)
print(s_ipa.cns_vwl_pstr)               # "əˈkinækænˈθɑləd͡ʒi"       (  + primary stress marks)
print(s_ipa.cns_vwl_pstr_long)          # "əˈkiːnækænˈθɑləd͡ʒi"      (    + long marks)
print(s_ipa.cns_vwl_str)                # "əˈkinæˌkænˈθɑləd͡ʒi"      (  + stress marks)
print(s_ipa.cns_vwl_str_len)            # "əˈkiːnæˌkænˈθɑləd͡ʒi"     (    + length marks)
print(s_ipa.cns_vwl_str_len_wb)         # "əˈkiːn æˌkænˈθɑləd͡ʒi"    (      + word breaks)
print(s_ipa.cns_vwl_str_len_wb_sb)      # "əˈkiːn æˌkænˈθɑ.lə.d͡ʒi"  (        + syllable breaks)
cns = s_ipa.consonants                  # (store new IPA string)
cns == s_ipa.consonants                 # False (two different objects)
cns.is_equivalent(s_ipa.consonants)     # True
cns.is_equivalent(s_ipa)                # False

# print representation and name of all IPAChar objects in IPAString
for c in s_ipa:
    print(u"%s\t%s" % (c, c.name))
# ə vowel mid central unrounded
# ˈ suprasegmental primary-stress
# k consonant voiceless velar plosive
# i vowel close front unrounded
# ː suprasegmental long
# n consonant voiced alveolar nasal
#   suprasegmental word-break
# æ vowel near-open front unrounded
# ˌ suprasegmental secondary-stress
# k consonant voiceless velar plosive
# æ vowel near-open front unrounded
# n consonant voiced alveolar nasal
# ˈ suprasegmental primary-stress
# θ consonant voiceless dental non-sibilant-fricative
# ɑ vowel open back unrounded
# . suprasegmental syllable-break
# l consonant voiced alveolar lateral-approximant
# ə vowel mid central unrounded
# . suprasegmental syllable-break
# d͡ʒ   consonant voiced palato-alveolar sibilant-affricate
# i vowel close front unrounded

# compare IPAString objects
s_ipa_d = IPAString(unicode_string=u"diff")
s_ipa_1 = IPAString(unicode_string=u"at͡ʃe")
s_ipa_2 = IPAString(unicode_string=u"aʧe")
s_ipa_3 = IPAString(unicode_string=u"at͡ʃe", single_char_parsing=True)
s_ipa_d == s_ipa_1              # False
s_ipa_1 == s_ipa_2              # False (different objects)
s_ipa_1 == s_ipa_3              # False (different objects)
s_ipa_2 == s_ipa_3              # False (different objects)
s_ipa_d.is_equivalent(s_ipa_1)  # False
s_ipa_1.is_equivalent(s_ipa_2)  # True
s_ipa_2.is_equivalent(s_ipa_1)  # True
s_ipa_1.is_equivalent(s_ipa_3)  # True
s_ipa_2.is_equivalent(s_ipa_3)  # True

# compare IPAString and list of IPAChar objects
s_ipa_1.is_equivalent([my_a1, my_tS, my_ee])    # True

# compare IPAString and Unicode string
s_ipa_d.is_equivalent(u"diff")                  # True
s_ipa_1.is_equivalent(u"atse")                  # False
s_ipa_1.is_equivalent(u"atSe")                  # False
s_ipa_1.is_equivalent(u"at͡ʃe")                  # True
s_ipa_1.is_equivalent(u"at͜ʃe")                  # True
s_ipa_1.is_equivalent(u"aʧe")                   # True
s_ipa_1.is_equivalent(u"at͡ʃeLOL", ignore=True)  # True (ignore chars non IPA valid)
s_ipa_1.is_equivalent(u"at͡ʃeLoL", ignore=True)  # False (ignore chars non IPA valid, note extra "o")


########################
# CONVERSION FUNCTIONS #
########################
from ipapy.kirshenbaummapper import KirshenbaumMapper
kmapper = KirshenbaumMapper()                                    # mapper to Kirshenbaum ASCII IPA
s_k_ipa = kmapper.map_ipa_string(s_ipa)                          # u"@'ki:n#&,k&n'TA#l@#dZi"
s_k_uni = kmapper.map_unicode_string(s_uni)                      # u"@'ki:n#&,k&n'TA#l@#dZi"
s_k_ipa == s_k_uni                                               # True
s_k_lis = kmapper.map_unicode_string(s_uni, return_as_list=True) # [u'@', u"'", u'k', u'i', u':', u'n', u'#', u'&', u',', u'k', u'&', u'n', u"'", u'T', u'A', u'#', u'l', u'@', u'#', u'dZ', u'i']

from ipapy.arpabetmapper import ARPABETMapper
amapper = ARPABETMapper()                                                    # mapper to ARPABET ASCII IPA (stress marks not supported yet)
s_a = amapper.map_unicode_string(u"pɹuːf")                                   # error: long suprasegmental not mapped
s_a = amapper.map_unicode_string(u"pɹuːf", ignore=True)                      # u"PRUWF"
s_a = amapper.map_unicode_string(u"pɹuːf", ignore=True, return_as_list=True) # [u'P', u'R', u'UW', u'F']

As A Command Line Tool

ipapy comes with a command line tool to perform operations on a given Unicode UTF-8 encoded string, representing an IPA string. Therefore, it is recommended to run it on a shell supporting UTF-8.

Currently, the supported operations are:

  • canonize: canonize the Unicode representation of the IPA string

  • chars: list all IPA characters appearing in the IPA string

  • check: check if the given Unicode string is IPA valid

  • clean: remove characters that are not IPA valid

  • u2a: print the corresponding ARPABET (ASCII IPA) string

  • u2k: print the corresponding Kirshenbaum (ASCII IPA) string

Run with the --help parameter to list all the available options:

$ python -m ipapy --help

usage: __main__.py [-h] [-i] [-p] [--separator [SEPARATOR]] [-s] [-u]
                   command string

ipapy perform a command on the given IPA/Unicode string

positional arguments:
  command               [canonize|chars|check|clean|u2a|u2k]
  string                String to canonize, check, clean, or convert

optional arguments:
  -h, --help            show this help message and exit
  -i, --ignore          Ignore Unicode characters that are not IPA valid
  -p, --print-invalid   Print Unicode characters that are not IPA valid
  --separator [SEPARATOR]
                        Print IPA chars separated by this character (default:
                        '')
  -s, --single-char-parsing
                        Perform single character parsing instead of maximal
                        parsing
  -u, --unicode         Print each Unicode character that is not IPA valid
                        with its Unicode codepoint and name

Examples:

$ python -m ipapy canonize "eʧiu"
et͡ʃiu

$ python -m ipapy canonize "eʧiu" --separator " "
e t͡ʃ i u

$ python -m ipapy chars "eʧiu"
'e' vowel close-mid front unrounded (U+0065)
't͡ʃ'   consonant voiceless palato-alveolar sibilant-affricate (U+0074 U+0361 U+0283)
'i' vowel close front unrounded (U+0069)
'u' vowel close back rounded (U+0075)

$ python -m ipapy chars "et͡ʃiu"
'e' vowel close-mid front unrounded (U+0065)
't͡ʃ'   consonant voiceless palato-alveolar sibilant-affricate (U+0074 U+0361 U+0283)
'i' vowel close front unrounded (U+0069)
'u' vowel close back rounded (U+0075)

$ python -m ipapy chars "et͡ʃiu" -s
'e' vowel close-mid front unrounded (U+0065)
't' consonant voiceless alveolar plosive (U+0074)
'͡' diacritic tie-bar-above (U+0361)
'ʃ' consonant voiceless palato-alveolar sibilant-fricative (U+0283)
'i' vowel close front unrounded (U+0069)
'u' vowel close back rounded (U+0075)

$ python -m ipapy check "eʧiu"
True

$ python -m ipapy check "LoL"
False

$ python -m ipapy check "LoL" -p
False
LL

$ python -m ipapy check "LoLOL" -p -u
False
LLOL
'L' 0x4c    LATIN CAPITAL LETTER L
'O' 0x4f    LATIN CAPITAL LETTER O

$ python -m ipapy clean "/eʧiu/"
eʧiu

$ python -m ipapy u2k "eʧiu"
etSiu

$ python -m ipapy u2k "eTa"
The given string contains characters not IPA valid. Use the 'ignore' option to ignore them.

$ python -m ipapy u2k "eTa" -i
ea

$ python -m ipapy u2a "eʧiu" --separator " "
EH CH IH UW

Unit Testing

$ python run_all_unit_tests.py

License

ipapy is released under the MIT License.

Acknowledgments

  • Bram Vanroy provided a fix to setup.py for Windows users

Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

ipapy-0.0.9.0.tar.gz (37.4 kB view details)

Uploaded Source

Built Distribution

ipapy-0.0.9.0-py2-none-any.whl (38.7 kB view details)

Uploaded Python 2

File details

Details for the file ipapy-0.0.9.0.tar.gz.

File metadata

  • Download URL: ipapy-0.0.9.0.tar.gz
  • Upload date:
  • Size: 37.4 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/1.13.0 pkginfo/1.5.0.1 requests/2.21.0 setuptools/41.0.1 requests-toolbelt/0.9.1 tqdm/4.31.1 CPython/3.7.3rc1

File hashes

Hashes for ipapy-0.0.9.0.tar.gz
Algorithm Hash digest
SHA256 e1bc73f6a4861b9a0ff562b70b87dab8acf7a63badd98caabd6e248b3839f1c9
MD5 4c87b745b63f5a69571815a979af9cc6
BLAKE2b-256 410d7e8652df6af20a61bb3315f5c9d99fb9ea8f3779ff80fca9d71001230f90

See more details on using hashes here.

File details

Details for the file ipapy-0.0.9.0-py2-none-any.whl.

File metadata

  • Download URL: ipapy-0.0.9.0-py2-none-any.whl
  • Upload date:
  • Size: 38.7 kB
  • Tags: Python 2
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/1.13.0 pkginfo/1.5.0.1 requests/2.21.0 setuptools/41.0.1 requests-toolbelt/0.9.1 tqdm/4.31.1 CPython/3.7.3rc1

File hashes

Hashes for ipapy-0.0.9.0-py2-none-any.whl
Algorithm Hash digest
SHA256 b96d0435282103c7d893c8226a458b70a810d130ce65fabe127c8a7490d1f82b
MD5 6d88c2fcc87174fc67a6b829f0bf2565
BLAKE2b-256 5db6c170e49cd5d3aaa8cbbe3c836d2fe09c72f08cf3b8ea1e4b4f81fed7881b

See more details on using hashes here.

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page