Source code for pycantonese.jyutping.tipa

from __future__ import annotations

from pycantonese.jyutping.parse_jyutping import parse_jyutping

ONSETS_TIPA = {
    "b": "p",
    "d": "t",
    "g": "k",
    "gw": "k\\super w ",
    "z": "ts",
    "p": "p\\super h ",
    "t": "t\\super h ",
    "k": "k\\super h ",
    "kw": "k\\super w\\super h ",
    "c": "ts\\super h ",
    "m": "m",
    "n": "n",
    "ng": "N",
    "f": "f",
    "h": "h",
    "s": "s",
    "l": "l",
    "w": "w",
    "j": "j",
    "v": "v",
    "": "",
}

FINALS_TIPA = {
    "i": "i",
    "ip": "ip\\textcorner ",
    "it": "it\\textcorner ",
    "ik": "Ik\\textcorner ",
    "im": "im",
    "in": "in",
    "ing": "IN",
    "iu": "iu",
    "yu": "y",
    "yut": "yt\\textcorner ",
    "yun": "yn",
    "u": "u",
    "ut": "ut\\textcorner ",
    "uk": "Uk\\textcorner ",
    "un": "un",
    "ung": "UN",
    "ui": "uY",
    "e": "E",
    "ek": "Ek\\textcorner ",
    "eng": "EN",
    "ei": "eI",
    "eot": "8t\\textcorner ",
    "eon": "8n",
    "eoi": "8Y",
    "oe": "\\oe ",
    "oek": "\\oe k\\textcorner ",
    "oeng": "\\oe N",
    "o": "O",
    "ot": "Ot\\textcorner ",
    "ok": "Ok\\textcorner ",
    "on": "On",
    "ong": "ON",
    "oi": "OY",
    "ou": "ou",
    "ap": "5p\\textcorner ",
    "at": "5t\\textcorner ",
    "ak": "5k\\textcorner ",
    "am": "5m",
    "an": "5n",
    "ang": "5N",
    "ai": "5I",
    "au": "5u",
    "aa": "a",
    "aap": "ap\\textcorner ",
    "aat": "at\\textcorner ",
    "aak": "ak\\textcorner ",
    "aam": "am",
    "aan": "an",
    "aang": "aN",
    "aai": "aI",
    "aau": "au",
    "m": "\\s{m}",
    "ng": "\\s{N}",
}

TONES_TIPA = {
    "1": "55",
    "2": "25",
    "3": "33",
    "4": "21",
    "5": "23",
    "6": "22",
}


[docs] def jyutping_to_tipa(jp: str | list[str]) -> list[str]: """Convert Jyutping romanization into LaTeX TIPA. Args: jp (str or list[str]): A Jyutping romanization string for a single word (any number of syllables, optionally separated by spaces), or a list of such strings carrying explicit word segmentation (one word per element). Returns: list[str]: A list with one element per input word. Each element is the TIPA representation of that word, with syllables separated by a single space. Raises: ValueError: If the Jyutping romanization is illegal (e.g., with unrecognized elements). Examples: >>> jyutping_to_tipa("gwong2dung1waa2") # 廣東話, Cantonese # doctest: +SKIP ['k\\super w ON25 tUN55 wa25'] """ # noqa: E501 if not jp: return [] words = [jp] if isinstance(jp, str) else jp return [" ".join(_word_to_tipa_syllables(word)) for word in words]
def _word_to_tipa_syllables(word: str) -> list[str]: jp_parsed_list = parse_jyutping(word) tipa_list = [] for jp_parsed in jp_parsed_list: # TODO: Separate "final" as "nucleus" and "coda" instead? tipa = ONSETS_TIPA[jp_parsed.onset] + FINALS_TIPA[jp_parsed.final] tipa = tipa.strip() + TONES_TIPA[jp_parsed.tone] tipa_list.append(tipa) return tipa_list