from __future__ import annotations
import unicodedata
from pycantonese.jyutping.parse_jyutping import parse_jyutping
ONSETS_YALE = {
"b": "b",
"d": "d",
"g": "g",
"gw": "gw",
"z": "j",
"p": "p",
"t": "t",
"k": "k",
"kw": "kw",
"c": "ch",
"m": "m",
"n": "n",
"ng": "ng",
"f": "f",
"h": "h",
"s": "s",
"l": "l",
"w": "w",
"j": "y",
"v": "v",
"": "",
}
NUCLEI_YALE = {
"aa": "aa",
"a": "a",
"i": "i",
"yu": "yu",
"u": "u",
"oe": "eu",
"e": "e",
"eo": "eu",
"o": "o",
"m": "m",
"ng": "ng",
}
CODAS_YALE = {
"p": "p",
"t": "t",
"k": "k",
"m": "m",
"n": "n",
"ng": "ng",
"i": "i",
"u": "u",
"": "",
}
_YALE_AMBIGUOUS_CONSONANTS = ("ng", "h", "p", "t", "k", "m", "n")
_YALE_VOWEL_DISPLAY_LETTERS = frozenset("aeiou" "áéíóú" "àèìòù" "āēīōū")
def _needs_apostrophe(prev_syl: str, next_syl: str) -> bool:
"""Return True iff gluing ``prev_syl`` + ``next_syl`` would produce an
ambiguous syllable boundary -- either visually (the same heuristic used
historically by ``jyutping_to_yale(..., return_as='string')``) or
structurally (the joined string parses as a different syllable split)."""
# Visual heuristic: a consonant or low-tone "h" sitting between two
# syllables can be read as either an onset or a coda.
ends_ambig = any(prev_syl.endswith(c) for c in _YALE_AMBIGUOUS_CONSONANTS)
starts_vowel = bool(next_syl) and next_syl[0] in _YALE_VOWEL_DISPLAY_LETTERS
starts_ambig = any(next_syl.startswith(c) for c in _YALE_AMBIGUOUS_CONSONANTS)
if ends_ambig and starts_vowel:
return True
if not ends_ambig and starts_ambig:
return True
# Structural check: the visual heuristic above misses the "both ends are
# ambiguous consonants" case (it only fires on exactly-one-ambiguous-end).
# Concatenating without an apostrophe can let the greedy splitter re-bind
# boundary characters into a syllable that doesn't match the original.
#
# Example where the structural check is *necessary*:
# prev_syl="yih" (Jyutping ji6, low-tone "h"), next_syl="pa".
# Visual: prev ends in "h" (ambig) AND next starts in "p" (ambig)
# -- both ambig, so neither visual case fires.
# But "yihpa" re-parses greedily as one syllable "yihp" + "a"
# (Yale convention: low-tone "h" sits BEFORE a stop coda, so
# y + i + h + p is a valid single syllable, Jyutping jip6),
# so _split_piece("yihpa") = ["yihp", "a"] != ["yih", "pa"].
# -> returns True -> an apostrophe gives "yih'pa".
#
# Example where the structural check *agrees* (no apostrophe):
# prev_syl="m̀h" (Jyutping m4), next_syl="gōi" (goi1).
# Visual: prev ends "h" but next starts "g" (not ambig, not vowel)
# -> no apostrophe.
# "m̀hgōi" re-parses cleanly as ["m̀h", "gōi"] (syllabic-nasal m̀h
# followed by a fresh "gōi" syllable), so structural also says no.
nfd_prev = unicodedata.normalize("NFD", prev_syl)
nfd_next = unicodedata.normalize("NFD", next_syl)
try:
return _split_piece(nfd_prev + nfd_next) != [nfd_prev, nfd_next]
except ValueError:
return True
[docs]
def stringify_yale(yale: list[str]) -> str:
"""Join Yale words (the output of :func:`jyutping_to_yale`) into one string.
Words (list elements) are separated by a single space. Within each word,
syllables are concatenated directly, with an apostrophe ``'`` inserted at
a syllable boundary only when the boundary would otherwise be ambiguous
(i.e., when a consonant letter or the low-tone marker ``h`` could be read
either as the onset of the next syllable or as the coda of the previous
one).
Args:
yale (list[str]): A list of Yale words, each a string of syllables
separated by single spaces -- the shape returned by
:func:`jyutping_to_yale`.
Returns:
str: The joined Yale string.
Examples:
>>> stringify_yale(jyutping_to_yale("gwong2dung1waa2")) # 廣東話
'gwóngdūngwá'
>>> stringify_yale(jyutping_to_yale("hei3hau6")) # 氣候
"hei'hauh"
>>> stringify_yale(jyutping_to_yale(["gwong2dung1", "waa2"]))
'gwóngdūng wá'
"""
if not yale:
return ""
out_words = []
for word in yale:
syllables = word.split()
if not syllables:
continue
parts = [syllables[0]]
for prev, nxt in zip(syllables, syllables[1:]):
if _needs_apostrophe(prev, nxt):
parts.append("'")
parts.append(nxt)
out_words.append("".join(parts))
return " ".join(out_words)
[docs]
def jyutping_to_yale(jp: str | list[str]) -> list[str]:
"""Convert Jyutping romanization into Yale romanization.
Args:
jp (str or list[str]): A Jyutping romanization string for a single
word (any number of syllables, optionally separated by spaces),
or a list of such strings carrying explicit word segmentation
(one word per element).
Returns:
list[str]: A list with one element per input word. Each element is
the Yale romanization of that word, with syllables separated by a
single space.
Raises:
ValueError: If the Jyutping romanization is illegal (e.g., with
unrecognized elements).
Examples:
>>> jyutping_to_yale("gwong2dung1waa2") # 廣東話, Cantonese
['gwóng dūng wá']
>>> jyutping_to_yale(["gwong2dung1", "waa2"])
['gwóng dūng', 'wá']
>>> jyutping_to_yale("hei3hau6") # 氣候, climate
['hei hauh']
"""
if not jp:
return []
words = [jp] if isinstance(jp, str) else jp
return [" ".join(_word_to_yale_syllables(word)) for word in words]
def _word_to_yale_syllables(word: str) -> list[str]:
jp_parsed_list = parse_jyutping(word)
yale_list = []
for jp_parsed in jp_parsed_list:
onset = ONSETS_YALE[jp_parsed.onset]
nucleus = NUCLEI_YALE[jp_parsed.nucleus]
coda = CODAS_YALE[jp_parsed.coda]
tone = jp_parsed.tone # still in parse_jyutping
# jyutping2yale system uses "h" to mark the three low tones
if tone in {"4", "5", "6"}:
low_tone_h = "h"
else:
low_tone_h = ""
# in jyutping2yale, long "aa" vowel with no coda is denoted by "a"
if nucleus == "aa" and coda == "":
nucleus = "a"
# when nucleus is "yu"...
# 1. disallow "yyu" (when onset is "y")
# 2. change nucleus "yu" into "u" -- this is a hack for adding tone
# diacritic, since we don't want "y" to bear the diacritic
if nucleus == "yu":
if onset == "y":
onset = ""
nucleus = "u"
# when nucleus is "ng"
# the tone diacritic has to be on "g" but not "n"
# now we pretend that the nucleus is "g", and will prepend the "n" back
# at the end
if nucleus == "ng":
nucleus = "g"
# add the jyutping2yale tone diacritic to the first nucleus letter
# parse_jyutping tone 1 --> add macron
# parse_jyutping tone 2 or 5 --> add acute
# parse_jyutping tone 4 --> add grave
# parse_jyutping tone 3 or 6 --> (no diacritic)
# If the accented letter doesn't exist in unicode, use the combining
# accent instead.
letter = nucleus[0] # nucleus 1st letter
unicode_letter_name = unicodedata.name(letter)
if tone == "1":
try:
letter_with_diacritic = unicodedata.lookup(
unicode_letter_name + " WITH MACRON"
)
except KeyError:
letter_with_diacritic = letter + "\u0304"
elif tone in {"2", "5"}:
try:
letter_with_diacritic = unicodedata.lookup(
unicode_letter_name + " WITH ACUTE"
)
except KeyError:
letter_with_diacritic = letter + "\u0301"
elif tone == "4":
try:
letter_with_diacritic = unicodedata.lookup(
unicode_letter_name + " WITH GRAVE"
)
except KeyError:
letter_with_diacritic = letter + "\u0300"
else:
# either tone 3 or tone 6
letter_with_diacritic = letter
nucleus = letter_with_diacritic + nucleus[1:]
# add back "y" if the nucleus is "yu"
# ("y" was taken away for convenience in adding tone diacritic)
if jp_parsed.nucleus == "yu":
nucleus = "y" + nucleus
# add back "n" if the nucleus is "ng"
# ('n' was taken away so that tone diacritic is on "g" but not "n")
if jp_parsed.nucleus == "ng":
nucleus = "n" + nucleus
# parse_jyutping final "eu" should be jyutping2yale "ew" (not "eu")
if coda == "u" and nucleus == "e":
coda = "w"
# save the resultant jyutping2yale
if coda in {"i", "u", "w"} and tone in {"4", "5", "6"}:
yale = onset + nucleus + coda + low_tone_h
else:
yale = onset + nucleus + low_tone_h + coda
yale_list.append(yale)
return yale_list
# Inverse mapping tables for Yale -> Jyutping.
# Yale onsets and codas have unique inverses once kw->kw is corrected above.
_ONSETS_JYUTPING = {v: k for k, v in ONSETS_YALE.items()}
_CODAS_JYUTPING = {v: k for k, v in CODAS_YALE.items()}
# Yale nucleus "eu" maps from both Jyutping "oe" and "eo"; resolved by coda
# at parse time (see _resolve_eu). Other Yale nuclei invert cleanly.
_NUCLEI_JYUTPING_UNAMBIGUOUS = {
"aa": "aa",
"a": "a", # Jyutping "aa" with no coda is written "a" in Yale; reversed below
"i": "i",
"yu": "yu",
"u": "u",
"e": "e",
"o": "o",
"m": "m",
"ng": "ng",
}
# Yale onsets ordered longest-first for greedy matching.
_YALE_ONSETS_ORDERED = (
"ch",
"gw",
"kw",
"ng",
"b",
"d",
"g",
"j",
"p",
"t",
"k",
"m",
"n",
"f",
"h",
"s",
"l",
"w",
"y",
"v",
)
# Yale nuclei ordered longest-first (base-letter form, no diacritic).
_YALE_NUCLEI_ORDERED = ("aa", "eu", "yu", "ng", "a", "e", "i", "o", "u", "m")
_YALE_VOWEL_LETTERS = set("aeiou")
# Map a base vowel letter + combining-accent character to (vowel, tone-marker).
# tone-marker: "macron" -> tone 1; "acute" -> tone 2 or 5; "grave" -> tone 4;
# None -> tone 3 or 6 (disambiguated by trailing "h").
_DIACRITIC_TO_MARK = {
"̄": "macron",
"́": "acute",
"̀": "grave",
}
def _strip_diacritic(nucleus_chars):
"""Given an NFD-decomposed nucleus string (base letters + combining marks),
return (base_nucleus_str, tone_mark) where tone_mark is one of
{"macron", "acute", "grave", None}. Raises ValueError on unknown marks.
"""
base = []
mark = None
for ch in nucleus_chars:
if unicodedata.category(ch) == "Mn":
found = _DIACRITIC_TO_MARK.get(ch)
if found is None:
raise ValueError("unrecognized diacritic in nucleus -- " + repr(ch))
if mark is not None and mark != found:
raise ValueError("multiple tone diacritics in one syllable")
mark = found
else:
base.append(ch)
return "".join(base), mark
def _tone_from(mark, has_h):
if mark == "macron":
return "1"
if mark == "acute":
return "5" if has_h else "2"
if mark == "grave":
if not has_h:
raise ValueError("grave (tone 4) requires 'h' low-tone marker")
return "4"
return "6" if has_h else "3"
def _resolve_eu(coda_yale):
"""For nucleus 'eu', pick Jyutping 'oe' or 'eo' based on Yale coda."""
if coda_yale in {"n", "t", "i"}:
return "eo"
return "oe"
def _split_word_syllables(word):
"""Split a Yale word into a list of raw syllable strings. Both apostrophe
`'` and whitespace are honored as explicit syllable-break hints inside
the word."""
# Normalize apostrophes to spaces, then split on any whitespace.
pieces = [p for p in word.replace("'", " ").split() if p]
syllables = []
for piece in pieces:
syllables.extend(_split_piece(piece))
return syllables
def _split_piece(piece):
"""Split a Yale string with no apostrophes into syllables."""
nfd = unicodedata.normalize("NFD", piece)
syllables = []
i = 0
n = len(nfd)
while i < n:
end = _find_syllable_end(nfd, i)
syllables.append(nfd[i:end])
i = end
return syllables
def _find_syllable_end(s, start):
"""Find the end index (exclusive) of the syllable starting at s[start].
Yale low-tone 'h' placement depends on coda type:
- stop/nasal coda (p/t/k/m/n/ng): h comes BEFORE coda -> nucleus + h + coda
- glide coda (i/u/w): h comes AFTER coda -> nucleus + coda + h
- no coda: h comes at end -> nucleus + h
"""
i = start
n = len(s)
# ---- onset ----
onset = ""
for cand in _YALE_ONSETS_ORDERED:
if s.startswith(cand, i):
onset = cand
break
nucleus_start = i + len(onset)
# Backtrack: 'm'/'ng' may be a syllabic nasal nucleus, not an onset.
if nucleus_start >= n or s[nucleus_start] not in _YALE_VOWEL_LETTERS:
if onset in ("m", "ng") and _looks_like_syllabic(s, i, onset):
onset = ""
nucleus_start = i
elif onset == "" and i < n and s[i] in _YALE_VOWEL_LETTERS:
pass # vowel-initial syllable
else:
if onset == "":
raise ValueError(
"cannot parse Yale syllable starting at "
+ repr(unicodedata.normalize("NFC", s[i:]))
)
# Backtrack onset "y" if it's really the prefix of nucleus "yu" (Jyutping
# "jyu"). The "yu" nucleus only combines with codas in {"", "n", "t"} in
# real Cantonese, so only backtrack when the rest fits that shape.
if onset == "y":
test_end, test_raw = _consume_nucleus(s, i)
test_base = "".join(c for c in test_raw if unicodedata.category(c) != "Mn")
if test_base == "yu" and _yu_compatible_tail(s, test_end):
onset = ""
nucleus_start = i
# ---- nucleus ----
nuc_end, nuc_raw = _consume_nucleus(s, nucleus_start)
if nuc_end == nucleus_start:
raise ValueError(
"cannot find nucleus in Yale syllable -- "
+ repr(unicodedata.normalize("NFC", s[i:]))
)
# Syllabic nasals (onset-less "m" and "ng") never take a coda in Yale/Jyutping.
nuc_base = "".join(c for c in nuc_raw if unicodedata.category(c) != "Mn")
is_syllabic_nasal = onset == "" and nuc_base in ("m", "ng")
# ---- coda + h-marker, accounting for h placement ----
pos = nuc_end
if is_syllabic_nasal and (pos >= n or s[pos] != "h"):
# No 'h' follows: definitely no coda (prevents greedily consuming the
# next syllable's onset/nucleus as a coda, e.g. ng3+ng5 -> "ngnǵh").
return pos
if pos < n and s[pos] == "h":
# h before stop/nasal coda (low-tone case), or h at syllable end
after_h = pos + 1
for cand in ("ng", "p", "t", "k", "m", "n"):
if s.startswith(cand, after_h):
return after_h + len(cand) # h + stop/nasal coda
# No stop/nasal coda after h: h is either a low-tone marker (end of
# syllable) or the onset of the next syllable (followed by a vowel).
if after_h >= n or s[after_h] not in _YALE_VOWEL_LETTERS:
return after_h # low-tone h, no coda
return pos # h is the next syllable's onset
else:
# Try glide coda (h follows the coda for low tones)
for cand in ("i", "u", "w"):
if s.startswith(cand, pos):
coda_end = pos + len(cand)
if coda_end < n and s[coda_end] == "h":
after_h = coda_end + 1
if after_h >= n or s[after_h] not in _YALE_VOWEL_LETTERS:
return after_h # glide coda + low-tone h
return coda_end # glide coda, no h
# Try stop/nasal coda without h (tones 1-3)
for cand in ("ng", "p", "t", "k", "m", "n"):
if s.startswith(cand, pos):
return pos + len(cand)
return pos # no coda
def _yu_compatible_tail(s, pos):
"""True iff s[pos:] is a possible tail after a Yale "yu" nucleus.
The Jyutping "yu" nucleus only combines with codas in {"", "n", "t"}.
May be preceded by 'h' (low-tone marker) for codas "n"/"t" (h before
stop/nasal) or alone (no coda)."""
n = len(s)
if pos >= n:
return True # bare "yu" nucleus
ch = s[pos]
if ch == "h":
after = pos + 1
if after >= n:
return True # low-tone, no coda
if s[after] == "t":
return True
if s[after] == "n":
return after + 1 >= n or s[after + 1] != "g"
return False
if ch == "t":
return True
if ch == "n":
return pos + 1 >= n or s[pos + 1] != "g"
return False
def _looks_like_syllabic(s, start, onset):
"""Return True if the onset 'm' or 'ng' at s[start:] is really a syllabic
nasal nucleus (e.g., 'm̀h' or 'ǹgh'). True when the next char after the
onset letters is either end-of-string, a combining diacritic, an 'h', or
an apostrophe; i.e., not another vowel/consonant that would form a real
onset+rime."""
end = start + len(onset)
if end >= len(s):
return True
nxt = s[end]
if unicodedata.category(nxt) == "Mn":
return True
if nxt == "h":
return True
if nxt in _YALE_VOWEL_LETTERS:
return False # m + vowel = onset + nucleus
# Another consonant means this is the boundary; treat as syllabic.
return True
def _consume_nucleus(s, start):
"""Consume nucleus characters (vowels/syllabic-nasal base letters with at
most one combining diacritic on the first base letter) starting at
s[start]. Returns (end_index, base_nucleus_string_with_diacritic_attached).
The base_nucleus_string returned includes the combining diacritic
(preserving NFD form) so the caller can extract tone via _strip_diacritic.
"""
n = len(s)
if start >= n:
return start, ""
# Try multi-letter nuclei first (only against base letters, ignoring marks).
base_seq = []
spans = [] # parallel: end index after each base letter (including its mark)
j = start
while j < n and len(base_seq) < 2:
ch = s[j]
if unicodedata.category(ch) == "Mn":
# standalone mark with no preceding base — error
if not base_seq:
raise ValueError("orphan combining mark at start of nucleus")
j += 1
continue
if ch not in _YALE_VOWEL_LETTERS and ch not in ("m", "n", "g", "y"):
break
base_seq.append(ch)
k = j + 1
while k < n and unicodedata.category(s[k]) == "Mn":
k += 1
spans.append(k)
j = k
if not base_seq:
return start, ""
base_str = "".join(base_seq)
# Choose the longest matching nucleus from the longest-first list.
for cand in _YALE_NUCLEI_ORDERED:
L = len(cand)
if base_str.startswith(cand):
# Special: "ng" and "m" as nuclei are syllabic; only allowed as
# nucleus when nothing follows that could be a vowel (caller
# already enforces vowel-or-syllabic structure).
end = spans[L - 1]
return end, s[start:end]
# No multi-letter match: only fall back to single-letter nuclei.
if base_seq[0] in {"a", "e", "i", "o", "u", "m"}:
end = spans[0]
return end, s[start:end]
# 'y', 'n', 'g' alone are not valid nuclei.
return start, ""
def _build_jyutping(onset_yale, nucleus_yale, coda_yale, tone):
"""Convert decomposed Yale pieces to a Jyutping syllable string."""
if onset_yale not in _ONSETS_JYUTPING:
raise ValueError("unknown Yale onset -- " + repr(onset_yale))
if coda_yale not in _CODAS_JYUTPING and coda_yale != "w":
raise ValueError("unknown Yale coda -- " + repr(coda_yale))
onset_jp = _ONSETS_JYUTPING[onset_yale]
# Convention: bare nucleus "yu" with no Yale onset corresponds to Jyutping
# onset "j" + nucleus "yu" (real Cantonese has no /yu/ without a preceding
# /j/; the Yale form "yū" is shared by Jyutping "jyu1" and "yu1").
if onset_yale == "" and nucleus_yale == "yu":
onset_jp = "j"
# Yale coda "w" comes from Jyutping coda "u" with nucleus "oe" -> "ew".
if coda_yale == "w":
coda_jp = "u"
else:
coda_jp = _CODAS_JYUTPING[coda_yale]
# Nucleus resolution.
if nucleus_yale == "eu":
nucleus_jp = _resolve_eu(coda_yale)
elif nucleus_yale == "a" and coda_jp == "":
# Yale "a" with no coda corresponds to Jyutping "aa".
# But Yale also writes Jyutping "a" + coda as "a" + coda, so only
# promote to "aa" when there's no coda.
nucleus_jp = "aa"
elif nucleus_yale in _NUCLEI_JYUTPING_UNAMBIGUOUS:
nucleus_jp = _NUCLEI_JYUTPING_UNAMBIGUOUS[nucleus_yale]
else:
raise ValueError("unknown Yale nucleus -- " + repr(nucleus_yale))
return f"{onset_jp}{nucleus_jp}{coda_jp}{tone}"
def _convert_syllable(raw):
"""Convert a single NFD-form Yale syllable into a Jyutping string."""
n = len(raw)
# ---- onset ----
onset = ""
for cand in _YALE_ONSETS_ORDERED:
if raw.startswith(cand):
onset = cand
break
nucleus_start = len(onset)
# Backtrack: 'm'/'ng' may be a syllabic nasal nucleus.
if nucleus_start >= n or raw[nucleus_start] not in _YALE_VOWEL_LETTERS:
if onset in ("m", "ng") and _looks_like_syllabic(raw, 0, onset):
onset = ""
nucleus_start = 0
elif onset == "" and raw and raw[0] in _YALE_VOWEL_LETTERS:
pass
else:
if onset == "":
raise ValueError(
"cannot parse Yale syllable -- "
+ repr(unicodedata.normalize("NFC", raw))
)
# Backtrack onset "y" if it's really the prefix of nucleus "yu".
if onset == "y":
test_end, test_raw = _consume_nucleus(raw, 0)
test_base = "".join(c for c in test_raw if unicodedata.category(c) != "Mn")
if test_base == "yu" and _yu_compatible_tail(raw, test_end):
onset = ""
nucleus_start = 0
# ---- nucleus ----
nuc_end, nuc_chars = _consume_nucleus(raw, nucleus_start)
if nuc_end == nucleus_start:
raise ValueError(
"cannot parse Yale syllable -- " + repr(unicodedata.normalize("NFC", raw))
)
base_nucleus, mark = _strip_diacritic(nuc_chars)
# ---- coda + h-marker ----
# Yale low-tone 'h' placement: BEFORE stop/nasal coda, AFTER glide coda.
# Syllabic nasals (no onset) never take a coda.
is_syllabic_nasal = onset == "" and base_nucleus in ("m", "ng")
pos = nuc_end
has_h = False
coda = ""
if is_syllabic_nasal and (pos >= n or raw[pos] != "h"):
# No 'h' follows: no coda (e.g. ng3 or m3 standalone syllables).
tone = _tone_from(mark, has_h)
return _build_jyutping(onset, base_nucleus, coda, tone)
if pos < n and raw[pos] == "h":
after_h = pos + 1
for cand in ("ng", "p", "t", "k", "m", "n"):
if raw.startswith(cand, after_h):
has_h = True
coda = cand
break
else:
has_h = True
coda = ""
else:
for cand in ("i", "u", "w"):
if raw.startswith(cand, pos):
coda = cand
coda_end = pos + len(cand)
if coda_end < n and raw[coda_end] == "h":
has_h = True
break
else:
for cand in ("ng", "p", "t", "k", "m", "n"):
if raw.startswith(cand, pos):
coda = cand
break
tone = _tone_from(mark, has_h)
return _build_jyutping(onset, base_nucleus, coda, tone)
[docs]
def yale_to_jyutping(yale: str | list[str]) -> list[str]:
"""Convert Yale romanization into Jyutping romanization.
The inverse of :func:`jyutping_to_yale`. Accepts Yale in the diacritic +
``h`` low-tone style (same form produced by ``jyutping_to_yale``).
Args:
yale (str or list[str]): A Yale romanization string for a single word,
or a list of such strings carrying explicit word segmentation
(one word per element). Inside a single-word string, both
whitespace and apostrophes ``'`` are accepted as syllable-boundary
hints; neither creates a word boundary. Pass a ``list[str]`` to
mark word boundaries.
Returns:
list[str]: A list with one element per input word. Each element is
the Jyutping representation of that word, with syllables separated
by a single space.
Raises:
ValueError: If the Yale romanization is illegal (e.g., with
unrecognized elements or a missing low-tone marker on a tone-4
grave-accented syllable).
Examples:
>>> yale_to_jyutping("gwóngdūngwá") # 廣東話, Cantonese
['gwong2 dung1 waa2']
>>> yale_to_jyutping(["gāmyaht", "góng", "gwóngdūngwá"])
['gam1 jat6', 'gong2', 'gwong2 dung1 waa2']
"""
if not yale:
return []
words = [yale] if isinstance(yale, str) else yale
result = []
for word in words:
syllables = _split_word_syllables(word)
jp_parts = [_convert_syllable(s) for s in syllables]
result.append(" ".join(jp_parts))
return result