Modul:es-pronunc

Documentation for this module may be created at Modul:es-pronunc/belge
local export = {}

local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local usub = mw.ustring.sub
local ulen = mw.ustring.len

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

-- ɟ, ʂ, and ʃ are used internally to represent [ʝ⁓ɟ͡ʝ], [ʃ], and [t͡ʃ]
function export.show(text, LatinAmerica, phonetic, do_debug)
	local debug = {}

	if type(text) == "table" then
		do_debug = text.args[4]
		text = text.args[1]
	end
	local orig_word = text
	text = ulower(text or mw.title.getCurrentTitle().text)
	text = rsub(text, "[^ abcdefghijklmnopqrstuvwxyzáéíóúüñ.]", "")
	-- put # at word beginning and end and double ## at text beginning/end
	text = "##" .. rsub(text, " ", "# #") .. "##"

	table.insert(debug, text)

	local V = "[aeiouáéíóú]" -- vowel
	local W = "[jw]"
	local C = "[^aeiouáéíóú.# ]" -- consonant
	local T = "[^aeiouáéíóú.# hlrɾjw]" -- obstruent or nasal
	--determining whether "y" is a consonant or a vowel + diphthongs, "-mente" suffix
	text = rsub(text, "y(" .. C .. ")", "i%1")
	text = rsub(text, "y(" .. V .. ")", "ɟ%1") -- not the real sound
	text = rsub(text, "hi(" .. V .. ")", "ɟ%1")
	text = rsub(text, "y#", "ï")
    text = rsub(text, "mente#", "ménte")

	--x
	text = rsub(text, "x", "ks")

	--"c" & "g" before "i" and "e" and all that stuff
	text = rsub(text, "c([ieíé])", (LatinAmerica and "s" or "θ") .. "%1")
	text = rsub(text, "gü([ieíé])", "ɡw%1")
	text = rsub(text, "ü", "")
	text = rsub(text, "gu([ieíé])", "ɡ%1")
	text = rsub(text, "g([ieíé])", "x%1")
	text = rsub(text, "qu([ieíé])", "c%1")
	text = rsub(text, "qu", "kw")

	table.insert(debug, text)

	--alphabet-to-phoneme
	text = rsub(text, "ch", "ʃ") --not the real sound
	-- We want to keep desh- ([[deshuesar]]) as-is. Converting to des- won't work because we want it syllabified as
	-- 'des.we.saɾ' not #'de.swe.saɾ' (cf. [[desuelo]] /de.swe.lo/ from [[desolar]]).
	text = rsub(text, "#desh", "!") --temporary symbol
	text = rsub(text, "sh", "ʂ") --not the real sound
	text = rsub(text, "!", "#desh") --restore 
	text = rsub(text, "#p([st])", "%1") -- [[psicología]], [[pterodáctilo]]
	text = rsub(text, "[cgjñrvy]",
		--["g"]="ɡ":  U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
		{["c"]="k", ["g"]="ɡ", ["j"]="x", ["ñ"]="ɲ", ["r"]="ɾ", ["v"]="b" })

	-- voiceless stop to voiced before obstruent or nasal
	local voice_stop = { ["p"] = "b", ["t"] = "d", ["k"] = "ɡ" }
	text = rsub(text, "([ptk])([# .]*" .. T .. ")",
		function(stop, after) return voice_stop[stop] .. after end)

	-- trill in #r, lr, nr, sr, rr
	text = rsub(text, "ɾɾ", "r")
	-- FIXME: does this also apply to /θr/ (e.g. [[Azrael]], [[cruzrojista]])?
	text = rsub(text, "([#lns])ɾ", "%1r")

	-- double l
	text = rsub(text, "ll", LatinAmerica and "ɟ" or "ʎ")

	-- reduce any remaining double consonants (Addis Abeba, cappa, ...); do this before handling of -nm-
	-- e.g. in [[inmigración]], which generates a double consonant
	text = rsub(text, "([^#])%1", "%1")

	text = rsub(text, "z", LatinAmerica and "z" or "θ") -- not the real LatAm sound
	text = rsub(text, "n([# .]*[bm])", "m%1")

	table.insert(debug, text)

	--syllable division
	text = rsub_repeatedly(text, "(" .. V .. ")(" .. C .. W .. "?" .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. C .. ")(" .. C .. V .. ")", "%1.%2")
	text = rsub_repeatedly(text, "(" .. V .. C .. "+)(" .. C .. C .. V .. ")", "%1.%2")
	text = rsub(text, "([pbktdɡ])%.([lɾ])", ".%1%2")
	text = rsub(text, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
	text = rsub(text, "([aeoáéíóú])([aeoáéíóú])", "%1.%2")
	text = rsub(text, "([ií])([ií])", "%1.%2")
	text = rsub(text, "([uú])([uú])", "%1.%2")

	table.insert(debug, text)

	--diphthongs
	text = rsub(text, "ih?([aeouáéóú])", "j%1")
	text = rsub(text, "uh?([aeioáéíó])", "w%1")

	table.insert(debug, text)

	local words = rsplit(text, " ")
	for j, word in ipairs(words) do
		-- accentuation
		local syllables = rsplit(word, "%.")
		if rfind(word, "[áéíóú]") then
			for i = 1, #syllables do
				if rfind(syllables[i], "[áéíóú]") then
					syllables[i] = "ˈ" .. syllables[i]
				end
			end
		else
			if rfind(word, "[^aeiouns#]#") then
				syllables[#syllables] = "ˈ" .. syllables[#syllables]
			else
				if #syllables > 1 then
					syllables[#syllables - 1] = "ˈ" .. syllables[#syllables - 1]
				end
			end
		end

		-- syllables nasalized if ending with "n", voiceless consonants in syllable-final position to voiced
		local remove_accent = { ["á"] = "a", ["é"] = "e", ["í"] = "i", ["ó"] = "o", ["ú"] = "u"}
		local nasalize = { ["a"] = "ã", ["e"] = "ẽ", ["i"] = "ĩ", ["o"] = "õ", ["u"] = "ũ" }
		for i = 1, #syllables do
			syllables[i] = rsub(syllables[i], "[áéíóú]", remove_accent)
			if phonetic and (
				rfind(syllables[i], "[mnɲ]" .. C .. "*#") or rfind(syllables[i], "[mnɲ]" .. C .. "*$")
			) then
				syllables[i] = rsub(syllables[i], "[aeiou]", nasalize)
			end
		end
		words[j] = table.concat(syllables, phonetic and "." or "")
	end

	text = table.concat(words, " ")

	--real sound of LatAm Z
	text = rsub(text, "z", "s")
	--secondary stress
	text = rsub(text, "ˈ(.+)ˈ", "ˌ%1ˈ")
	text = rsub(text, "ˈ(.+)ˌ", "ˌ%1ˌ")
	text = rsub(text, "ˌ(.+)ˈ(.+)ˈ", "ˌ%1ˌ%2ˈ")

	--phonetic transcription
	if phonetic then
		-- θ, s, f before voiced consonants
		local voiced = "mnɲbdɟɡʎ"
		local r = "ɾr"
		local tovoiced = {
			["θ"] = "θ̬",
			["s"] = "z",
			["f"] = "v",
		}
		local function voice(sound, following)
			return tovoiced[sound] .. following
		end
		text = rsub(text, "([θs])([ˈˌ# .]*[" .. voiced .. r .. "])", voice)
		text = rsub(text, "(f)([ˈˌ# .]*[" .. voiced .. "])", voice)

		-- fricative vs. stop allophones; first convert stops to fricatives, then back to stops
		-- after nasals and sometimes after l
		local stop_to_fricative = {["b"] = "β", ["d"] = "ð", ["ɟ"] = "ʝ", ["ɡ"] = "ɣ"}
		local fricative_to_stop = {["β"] = "b", ["ð"] = "d", ["ʝ"] = "ɟ", ["ɣ"] = "ɡ"}
		text = rsub(text, "[bdɟɡ]", stop_to_fricative)
		text = rsub(text, "([mnɲ][ˈˌ# .]*)([βɣ])",
			function(nasal, fricative) return nasal .. fricative_to_stop[fricative] end
		)
		text = rsub(text, "([lʎmnɲ][ˈˌ# .]*)([ðʝ])",
			function(nasal_l, fricative) return nasal_l .. fricative_to_stop[fricative] end
		)
		text = rsub(text, "(##[ˈˌ]*)([βɣðʝ])",
			function(stress, fricative) return stress .. fricative_to_stop[fricative] end
		)
		text = rsub(text, "[td]", {["t"] = "t̪", ["d"] = "d̪"})

		-- nasal assimilation before consonants
		local labiodental, dentialveolar, dental, alveolopalatal, palatal, velar =
			"ɱ", "n̪", "n̟", "nʲ", "ɲ", "ŋ"
		local nasal_assimilation = {
			["f"] = labiodental,
			["t"] = dentialveolar, ["d"] = dentialveolar,
			["θ"] = dental,
			["ʃ"] = alveolopalatal,
			["ʂ"] = alveolopalatal,
			["ɟ"] = palatal, ["ʎ"] = palatal,
			["k"] = velar, ["x"] = velar, ["ɡ"] = velar,
		}
		text = rsub(text, "n([ˈˌ# .]*)(.)",
			function(stress, following) return (nasal_assimilation[following] or "n") .. stress .. following end
		)

		-- lateral assimilation before consonants
		text = rsub(text, "l([ˈˌ# .]*)(.)",
			function(stress, following)
				local l = "l"
				if following == "t" or following == "d" then -- dentialveolar
					l = "l̪"
				elseif following == "θ" then -- dental
					l = "l̟"
				elseif following == "ʃ" then -- alveolopalatal
					l = "lʲ"
				end
				return l .. stress .. following
			end)

		--semivowels
		text = rsub(text, "([aeouãẽõũ][iïĩ])", "%1̯")
		text = rsub(text, "([aeioãẽĩõ][uũ])", "%1̯")
	end

	table.insert(debug, text)

	-- remove silent "h" and convert fake symbols to real ones
	local final_conversions =  {
		["h"] =  "", -- silent "h"
		["ʃ"] = "t͡ʃ", -- fake "ch" to real "ch"
		["ʂ"] = "ʃ", -- fake "sh" to real "sh"
		["ɟ"] = "ɟ͡ʝ", -- fake "y" to real "y"
		["ï"] = "i", -- fake "y$" to real "y$"
	}
	text = rsub(text, "[hʃʂɟï]", final_conversions)

	-- remove # symbols at word and text boundaries
	text = rsub(text, "#", "")

	if do_debug == "yes" then
		return text .. table.concat(debug, "")
	else
		return text
	end
end

function export.LatinAmerica(frame)
	return export.show(frame, true)
end

function export.phonetic(frame)
	return export.show(frame, false, true)
end

function export.phoneticLatinAmerica(frame)
	return export.show(frame, true, true)
end

return export