Source code for pycantonese.jyutping.ipa

from __future__ import annotations

from functools import lru_cache

from .parse_jyutping import parse_jyutping

_ONSETS = {
    "b": "p",
    "d": "t",
    "g": "k",
    "gw": "kʷ",
    "z": "ts",
    "p": "pʰ",
    "t": "tʰ",
    "k": "kʰ",
    "kw": "kʷʰ",
    "c": "tsʰ",
    "m": "m",
    "n": "n",
    "ng": "ŋ",
    "f": "f",
    "h": "h",
    "s": "s",
    "l": "l",
    "w": "w",
    "j": "j",
    "": "",
}

_NUCLEI = {
    "aa": "aː",
    "a": "ɐ",
    "i": "i",  # ɪ before ng, k
    "yu": "y",
    "u": "u",  # ʊ before ng, k
    "oe": "œ",
    "e": "ɛ",  # e before i
    "eo": "ɵ",
    "o": "ɔ",  # o before u
    "m": "m",
    "n": "n",
    "ng": "ŋ",
}

_CODAS = {
    "p": "p̚",
    "t": "t̚",
    "k": "k̚",
    "m": "m",
    "n": "n",
    "ng": "ŋ",
    "i": "i",  # y after eo, u, o
    "u": "u",
    "": "",
}

_TONES = {
    "1": "55",
    "2": "25",
    "3": "33",
    "4": "21",
    "5": "23",
    "6": "22",
}


@lru_cache
def _replace(current, parsed, part_to_match, matches, default):
    if getattr(parsed, part_to_match) in matches:
        return default
    else:
        return current


[docs] def jyutping_to_ipa( jp_str: str, return_as: str = "list", *, onsets: dict[str, str] | None = None, nuclei: dict[str, str] | None = None, codas: dict[str, str] | None = None, tones: dict[str, str] | None = None, ) -> list[str] | str: """Convert Jyutping romanization into IPA. The Jyutping-to-IPA mapping is based on Matthews and Yip (2011: 461-463). Parameters ---------- jp_str : str Jyutping romanization for one or multiple characters return_as : str, optional If ``"list"`` (the default), the returned value is a list of strings where each string is the IPA representation of each Cantonese / Chinese character based on the input Jyutping. If ``"string"``, the returned value is a single space-joined string. onsets : dict[str, str], optional If provided, it must be a dictionary that maps Jyutping onsets to the desired IPA symbols for customization. For example, Jyutping "z" maps to IPA /ts/ by default. Passing in ``{"z": "tʃ"}`` would map "z" to /tʃ/ instead. nuclei : dict[str, str], optional If provided, it must be a dictionary that maps Jyutping nuclei to the desired IPA symbols for customization. For example, Jyutping "i" maps to IPA /i/ by default. Passing in ``{"i": "iː"}`` would map "i" to /iː/ instead. codas : dict[str, str], optional If provided, it must be a dictionary that maps Jyutping codas to the desired IPA symbols for customization. For example, Jyutping "p" maps to IPA /p̚/ by default. Passing in ``{"p": "p"}`` would map "p" to /p/ instead. tones : dict[str, str], optional If provided, it must be a dictionary that maps Jyutping tones to the desired IPA symbols for customization. For example, Jyutping "2" (high-rising tone) maps to IPA /25/ by default. Passing in ``{"2": "35"}`` would map Jyutping "2" to /35/ instead. Returns ------- list[str] | str Examples -------- >>> jyutping_to_ipa('gwong2dung1waa2') # 廣東話 Cantonese ['kʷɔŋ25', 'tʊŋ55', 'waː25'] >>> jyutping_to_ipa('gwong2dung1waa2', return_as="string") 'kʷɔŋ25 tʊŋ55 waː25' >>> jyutping_to_ipa('ci1', onsets={'c': "tʃ'"}) ["tʃ'i55"] >>> jyutping_to_ipa('ci1', tones={'1': "˥"}) ['tsʰi˥'] """ jp_parsed_list = parse_jyutping(jp_str) ipa_list = [] for jp_parsed in jp_parsed_list: onset = _ONSETS[jp_parsed.onset] nucleus = _NUCLEI[jp_parsed.nucleus] coda = _CODAS[jp_parsed.coda] tone = _TONES[jp_parsed.tone] if (n := jp_parsed.nucleus) == "i": nucleus = _replace(nucleus, jp_parsed, "coda", ("ng", "k"), "ɪ") elif n == "u": nucleus = _replace(nucleus, jp_parsed, "coda", ("ng", "k"), "ʊ") elif n == "e": nucleus = _replace(nucleus, jp_parsed, "coda", ("i",), "e") elif n == "o": nucleus = _replace(nucleus, jp_parsed, "coda", ("u",), "o") if jp_parsed.coda == "i": coda = _replace(coda, jp_parsed, "nucleus", ("eo", "u", "o"), "y") onset = (onsets or {}).get(jp_parsed.onset, onset) nucleus = (nuclei or {}).get(jp_parsed.nucleus, nucleus) coda = (codas or {}).get(jp_parsed.coda, coda) tone = (tones or {}).get(jp_parsed.tone, tone) ipa_list.append(onset + nucleus + coda + tone) if return_as == "list": return ipa_list else: return " ".join(ipa_list)