Modul:es-pronunc
Xuyakirin
Documentation for this module may be created at Modul:es-pronunc/belge
local export = {}
local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local usub = mw.ustring.sub
local ulen = mw.ustring.len
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
-- ɟ, ʂ, and ʃ are used internally to represent [ʝ⁓ɟ͡ʝ], [ʃ], and [t͡ʃ]
function export.show(text, LatinAmerica, phonetic, do_debug)
local debug = {}
if type(text) == "table" then
do_debug = text.args[4]
text = text.args[1]
end
local orig_word = text
text = ulower(text or mw.title.getCurrentTitle().text)
text = rsub(text, "[^ abcdefghijklmnopqrstuvwxyzáéíóúüñ.]", "")
-- put # at word beginning and end and double ## at text beginning/end
text = "##" .. rsub(text, " ", "# #") .. "##"
table.insert(debug, text)
local V = "[aeiouáéíóú]" -- vowel
local W = "[jw]"
local C = "[^aeiouáéíóú.# ]" -- consonant
local T = "[^aeiouáéíóú.# hlrɾjw]" -- obstruent or nasal
--determining whether "y" is a consonant or a vowel + diphthongs, "-mente" suffix
text = rsub(text, "y(" .. C .. ")", "i%1")
text = rsub(text, "y(" .. V .. ")", "ɟ%1") -- not the real sound
text = rsub(text, "hi(" .. V .. ")", "ɟ%1")
text = rsub(text, "y#", "ï")
text = rsub(text, "mente#", "ménte")
--x
text = rsub(text, "x", "ks")
--"c" & "g" before "i" and "e" and all that stuff
text = rsub(text, "c([ieíé])", (LatinAmerica and "s" or "θ") .. "%1")
text = rsub(text, "gü([ieíé])", "ɡw%1")
text = rsub(text, "ü", "")
text = rsub(text, "gu([ieíé])", "ɡ%1")
text = rsub(text, "g([ieíé])", "x%1")
text = rsub(text, "qu([ieíé])", "c%1")
text = rsub(text, "qu", "kw")
table.insert(debug, text)
--alphabet-to-phoneme
text = rsub(text, "ch", "ʃ") --not the real sound
-- We want to keep desh- ([[deshuesar]]) as-is. Converting to des- won't work because we want it syllabified as
-- 'des.we.saɾ' not #'de.swe.saɾ' (cf. [[desuelo]] /de.swe.lo/ from [[desolar]]).
text = rsub(text, "#desh", "!") --temporary symbol
text = rsub(text, "sh", "ʂ") --not the real sound
text = rsub(text, "!", "#desh") --restore
text = rsub(text, "#p([st])", "%1") -- [[psicología]], [[pterodáctilo]]
text = rsub(text, "[cgjñrvy]",
--["g"]="ɡ": U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
{["c"]="k", ["g"]="ɡ", ["j"]="x", ["ñ"]="ɲ", ["r"]="ɾ", ["v"]="b" })
-- voiceless stop to voiced before obstruent or nasal
local voice_stop = { ["p"] = "b", ["t"] = "d", ["k"] = "ɡ" }
text = rsub(text, "([ptk])([# .]*" .. T .. ")",
function(stop, after) return voice_stop[stop] .. after end)
-- trill in #r, lr, nr, sr, rr
text = rsub(text, "ɾɾ", "r")
-- FIXME: does this also apply to /θr/ (e.g. [[Azrael]], [[cruzrojista]])?
text = rsub(text, "([#lns])ɾ", "%1r")
-- double l
text = rsub(text, "ll", LatinAmerica and "ɟ" or "ʎ")
-- reduce any remaining double consonants (Addis Abeba, cappa, ...); do this before handling of -nm-
-- e.g. in [[inmigración]], which generates a double consonant
text = rsub(text, "([^#])%1", "%1")
text = rsub(text, "z", LatinAmerica and "z" or "θ") -- not the real LatAm sound
text = rsub(text, "n([# .]*[bm])", "m%1")
table.insert(debug, text)
--syllable division
text = rsub_repeatedly(text, "(" .. V .. ")(" .. C .. W .. "?" .. V .. ")", "%1.%2")
text = rsub_repeatedly(text, "(" .. V .. C .. ")(" .. C .. V .. ")", "%1.%2")
text = rsub_repeatedly(text, "(" .. V .. C .. "+)(" .. C .. C .. V .. ")", "%1.%2")
text = rsub(text, "([pbktdɡ])%.([lɾ])", ".%1%2")
text = rsub(text, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
text = rsub(text, "([aeoáéíóú])([aeoáéíóú])", "%1.%2")
text = rsub(text, "([ií])([ií])", "%1.%2")
text = rsub(text, "([uú])([uú])", "%1.%2")
table.insert(debug, text)
--diphthongs
text = rsub(text, "ih?([aeouáéóú])", "j%1")
text = rsub(text, "uh?([aeioáéíó])", "w%1")
table.insert(debug, text)
local words = rsplit(text, " ")
for j, word in ipairs(words) do
-- accentuation
local syllables = rsplit(word, "%.")
if rfind(word, "[áéíóú]") then
for i = 1, #syllables do
if rfind(syllables[i], "[áéíóú]") then
syllables[i] = "ˈ" .. syllables[i]
end
end
else
if rfind(word, "[^aeiouns#]#") then
syllables[#syllables] = "ˈ" .. syllables[#syllables]
else
if #syllables > 1 then
syllables[#syllables - 1] = "ˈ" .. syllables[#syllables - 1]
end
end
end
-- syllables nasalized if ending with "n", voiceless consonants in syllable-final position to voiced
local remove_accent = { ["á"] = "a", ["é"] = "e", ["í"] = "i", ["ó"] = "o", ["ú"] = "u"}
local nasalize = { ["a"] = "ã", ["e"] = "ẽ", ["i"] = "ĩ", ["o"] = "õ", ["u"] = "ũ" }
for i = 1, #syllables do
syllables[i] = rsub(syllables[i], "[áéíóú]", remove_accent)
if phonetic and (
rfind(syllables[i], "[mnɲ]" .. C .. "*#") or rfind(syllables[i], "[mnɲ]" .. C .. "*$")
) then
syllables[i] = rsub(syllables[i], "[aeiou]", nasalize)
end
end
words[j] = table.concat(syllables, phonetic and "." or "")
end
text = table.concat(words, " ")
--real sound of LatAm Z
text = rsub(text, "z", "s")
--secondary stress
text = rsub(text, "ˈ(.+)ˈ", "ˌ%1ˈ")
text = rsub(text, "ˈ(.+)ˌ", "ˌ%1ˌ")
text = rsub(text, "ˌ(.+)ˈ(.+)ˈ", "ˌ%1ˌ%2ˈ")
--phonetic transcription
if phonetic then
-- θ, s, f before voiced consonants
local voiced = "mnɲbdɟɡʎ"
local r = "ɾr"
local tovoiced = {
["θ"] = "θ̬",
["s"] = "z",
["f"] = "v",
}
local function voice(sound, following)
return tovoiced[sound] .. following
end
text = rsub(text, "([θs])([ˈˌ# .]*[" .. voiced .. r .. "])", voice)
text = rsub(text, "(f)([ˈˌ# .]*[" .. voiced .. "])", voice)
-- fricative vs. stop allophones; first convert stops to fricatives, then back to stops
-- after nasals and sometimes after l
local stop_to_fricative = {["b"] = "β", ["d"] = "ð", ["ɟ"] = "ʝ", ["ɡ"] = "ɣ"}
local fricative_to_stop = {["β"] = "b", ["ð"] = "d", ["ʝ"] = "ɟ", ["ɣ"] = "ɡ"}
text = rsub(text, "[bdɟɡ]", stop_to_fricative)
text = rsub(text, "([mnɲ][ˈˌ# .]*)([βɣ])",
function(nasal, fricative) return nasal .. fricative_to_stop[fricative] end
)
text = rsub(text, "([lʎmnɲ][ˈˌ# .]*)([ðʝ])",
function(nasal_l, fricative) return nasal_l .. fricative_to_stop[fricative] end
)
text = rsub(text, "(##[ˈˌ]*)([βɣðʝ])",
function(stress, fricative) return stress .. fricative_to_stop[fricative] end
)
text = rsub(text, "[td]", {["t"] = "t̪", ["d"] = "d̪"})
-- nasal assimilation before consonants
local labiodental, dentialveolar, dental, alveolopalatal, palatal, velar =
"ɱ", "n̪", "n̟", "nʲ", "ɲ", "ŋ"
local nasal_assimilation = {
["f"] = labiodental,
["t"] = dentialveolar, ["d"] = dentialveolar,
["θ"] = dental,
["ʃ"] = alveolopalatal,
["ʂ"] = alveolopalatal,
["ɟ"] = palatal, ["ʎ"] = palatal,
["k"] = velar, ["x"] = velar, ["ɡ"] = velar,
}
text = rsub(text, "n([ˈˌ# .]*)(.)",
function(stress, following) return (nasal_assimilation[following] or "n") .. stress .. following end
)
-- lateral assimilation before consonants
text = rsub(text, "l([ˈˌ# .]*)(.)",
function(stress, following)
local l = "l"
if following == "t" or following == "d" then -- dentialveolar
l = "l̪"
elseif following == "θ" then -- dental
l = "l̟"
elseif following == "ʃ" then -- alveolopalatal
l = "lʲ"
end
return l .. stress .. following
end)
--semivowels
text = rsub(text, "([aeouãẽõũ][iïĩ])", "%1̯")
text = rsub(text, "([aeioãẽĩõ][uũ])", "%1̯")
end
table.insert(debug, text)
-- remove silent "h" and convert fake symbols to real ones
local final_conversions = {
["h"] = "", -- silent "h"
["ʃ"] = "t͡ʃ", -- fake "ch" to real "ch"
["ʂ"] = "ʃ", -- fake "sh" to real "sh"
["ɟ"] = "ɟ͡ʝ", -- fake "y" to real "y"
["ï"] = "i", -- fake "y$" to real "y$"
}
text = rsub(text, "[hʃʂɟï]", final_conversions)
-- remove # symbols at word and text boundaries
text = rsub(text, "#", "")
if do_debug == "yes" then
return text .. table.concat(debug, "")
else
return text
end
end
function export.LatinAmerica(frame)
return export.show(frame, true)
end
function export.phonetic(frame)
return export.show(frame, false, true)
end
function export.phoneticLatinAmerica(frame)
return export.show(frame, true, true)
end
return export