Módulo:generar-pron/ca

La documentación para este módulo puede ser creada en Módulo:generar-pron/ca/doc
local export = {}

local insert = table.insert
local concat = table.concat

local m_table = require("Módulo:tabla")
local listToSet = m_table.listToSet

local function concat_keys(tab)
	local res = {}
	for k, _ in pairs(tab) do
		insert(res, k)
	end
	return concat(res)
end

local function concat_vals(tab)
	local res = {}
	for _, v in pairs(tab) do
		insert(res, v)
	end
	return concat(res)
end

local m_str = require("Módulo:String")

local substr = m_str.sub
local strfind = m_str.find
local strmatch = m_str.match
local strsplit = m_str.split
local strsubn = m_str.gsub
local strsubb = m_str.gsubb
local strsubrep = m_str.gsub_rep
local strlower = m_str.lower
local strstrip = m_str.strip
local strmatchit = m_str.gmatch
local u = m_str.char
local strexplode = m_str.explode_utf8
local strnfc = m_str.toNFC
local strhtml = m_str.encode_html

-- Version of strsubn() that discards all but the first return value.
local function strsub(term, foo, bar)
	local retval = strsubn(term, foo, bar)
	return retval
end

local PUNTUACION = "[%(%)%[%]%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "[%(%)%[%]%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹]"

local BALEARICO = "bal"
local CENTRAL = "cen"
local VALENCIANO = "val"

local written_unaccented_vowel_l = "aeiouyAEIOUY"
local written_stressed_vowel_l = "àèéêëíòóôúýÀÈÉÊËÍÒÓÔÚÝ"
local written_accented_not_stressed_vowel_l = "ïüÏÜ"
local written_accented_vowel_l = written_stressed_vowel_l .. written_accented_not_stressed_vowel_l
local ipa_vowel_l = "ɔɛə"
local written_vowel_l = written_unaccented_vowel_l .. written_accented_vowel_l
local vowel_l = written_vowel_l .. ipa_vowel_l
local V = "[" .. vowel_l .. "]"

local written_accented_to_plain_vowel = {
	["à"] = "a",
	["è"] = "e",
	["é"] = "e",
	["ê"] = "e",
	["ë"] = "e",
	["í"] = "i",
	["ï"] = "i",
	["ò"] = "o",
	["ó"] = "o",
	["ô"] = "o",
	["ú"] = "u",
	["ü"] = "u",
	["ý"] = "y",
	["À"] = "A",
	["È"] = "E",
	["É"] = "E",
	["Ê"] = "E",
	["Ë"] = "E",
	["Í"] = "I",
	["Ï"] = "I",
	["Ò"] = "O",
	["Ó"] = "O",
	["Ô"] = "O",
	["Ú"] = "U",
	["Ü"] = "U",
	["Ý"] = "Y",
}

local AC = u(0x0301) -- acute =  ́
local GR = u(0x0300) -- grave =  ̀
local CFLEX = u(0x0302) -- circumflex =  ̂
local DOTOVER = u(0x0307) -- dot over =  ̇
local DIA = u(0x0308) -- diaeresis =  ̈
local LINEUNDER = u(0x0331) -- lineunder =  ̱

local stress_l = AC .. GR
local stress_c = "[" .. stress_l .. "]"
local ipa_stress_l = "ˈˌ"
local ipa_stress_c = "[" .. ipa_stress_l .. "]"
local sylsep_l = "%-."..ipa_stress_l -- hyphen included for syllabifying from spelling; FIXME: formerly included SYLDIV
local sylsep_c = "[" .. sylsep_l .. "]"
local tie_l = "‿'"
local tie_c = "[" .. tie_l .. "]"
local charsep_l = sylsep_l .. tie_l .. stress_l .. ipa_stress_l
local charsep_c = "[" .. charsep_l .. "]"
local wordsep_l = "# "
local wordsep_c = "[" .. wordsep_l .. "]"
local separator_l = charsep_l .. wordsep_l
local separator_c = "[" .. separator_l .. "]"
local neg_guts_of_cons = vowel_l .. separator_l
local C = "[^" .. neg_guts_of_cons .. "]" -- consonant class including h

export.mid_vowel_hints = "éèêëóòô"
export.mid_vowel_hint_c = "[" .. export.mid_vowel_hints .. "]"

local TEMP_PAREN_R = u(0xFFF1)
local TEMP_PAREN_RR = u(0xFFF2)
-- Pseudo-consonant at the edge of prefixes ending in a vowel and suffixes beginning with a vowel; FIXME: not currently
-- used.
local PSEUDOCONS = u(0xFFF3)
-- local PREFIX_MARKER = u(0xFFF4) -- marker indicating a prefix so we can convert primary to secondary accents


local valid_onsets = listToSet {
	"b", "bl", "br",
	"c", "cl", "cr",
	"ç",
	"d", "dj", "dr",
	"f", "fl", "fr",
	"g", "gl", "gr", "gu", "gü",
	"h",
	"i",
	"j",
	"k", "kl", "kr",
	"l", "ll",
	"m",
	"n", "ny", "ñ",
	"p", "pl", "pr",
	"qu", "qü",
	"r", "rr",
	"s", "ss",
	"t", "tg", "tj", "tr", "tx", "tz",
	"u",
	"v", "vl", "vr",
	"w",
	"x",
	"ʃ", -- e.g. 'χruʃóf' respelling of [[Khrusxov]]
	"χ", -- in case of respelling
	"y",
	"z",
} 

local decompose_dotover = {
	-- No composed i, u or U with DOTOVER.
	["ȧ"] = "a" .. DOTOVER,
	["ė"] = "e" .. DOTOVER,
	["ȯ"] = "o" .. DOTOVER,
	["ẏ"] = "y" .. DOTOVER,
	["Ȧ"] = "A" .. DOTOVER,
	["Ė"] = "E" .. DOTOVER,
	["İ"] = "I" .. DOTOVER,
	["Ȯ"] = "O" .. DOTOVER,
	["Ẏ"] = "Y" .. DOTOVER,
}

local dotover_keys = concat_keys(decompose_dotover)

local unstressed_words = listToSet {
	-- proclitic object pronouns
	"em", "et", "es", "el", "la", "els", "les", "li", "ens", "us", "ho", "hi", "en",
	-- enclitic object pronouns usually attach with hyphen to preceding verb but not always, cf. [[tant me fa]]
	"me", "te", "se", "lo", "los", "nos", "vos", "ne",
	-- contracted object pronouns and articles attached with apostrophe so no need to include
	-- unstressed possessives
	"mon", "ma", "mos", "mes", "ton", "ta", "tos", "tes", "son", "sa", "sos", "ses",
	-- prepositions
	"a", "de", "per", "amb", "ab", -- 'en' already included as proclitic object pronouns
	-- prepositional contractions
	"al", "als", "del", "dels", "pel", "pels",
	-- articles 'el', 'la', 'els', 'les' already included as proclitic pronouns
	-- personal articles
	"na", -- 'en' already included above
	-- indefinite articles
	"un", "uns",
	-- salat articles
	"ets", "so", -- 'es' already included as proclitic object pronouns and 'ses', 'sa', 'sos' as possessives
	-- conjunctions
	"i", "o", "si", "ni", "que",
}

local pron_abc = {
    ["A"] = {"a"},
    ["a"] = {"a"},
    ["B"] = {"be", "be alta"},
    ["b"] = {"be", "be alta"},
    ["C"] = {"ce"},
    ["c"] = {"ce"},
    ["D"] = {"de"},
    ["d"] = {"de"},
    ["E"] = {"e"},
    ["e"] = {"e"},
    ["F"] = {"efa", "efe", "ef"},
    ["f"] = {"efa", "efe", "ef"},
    ["G"] = {"ge"},
    ["g"] = {"ge"},
    ["H"] = {"hac"},
    ["h"] = {"hac"},
    ["I"] = {"i"},
    ["i"] = {"i"},
    ["J"] = {"jota"},
    ["j"] = {"jota"},
    ["K"] = {"ca", "ka"},
    ["k"] = {"ca", "ka"},
    ["L"] = {"ela", "ele", "el"},
    ["l"] = {"ela", "ele", "el"},
    ["M"] = {"ema", "eme", "em"},
    ["m"] = {"ema", "eme", "em"},
    ["N"] = {"ena", "ene", "en"},
    ["n"] = {"ena", "ene", "en"},
    ["O"] = {"o"},
    ["o"] = {"o"},
    ["P"] = {"pe"},
    ["p"] = {"pe"},
    ["Q"] = {"cu"},
    ["q"] = {"cu"},
    ["R"] = {"erra", "erre", "er"},
    ["r"] = {"erra", "erre", "er"},
    ["S"] = {"essa", "esse", "es"},
    ["s"] = {"essa", "esse", "es"},
    ["T"] = {"te"},
    ["t"] = {"te"},
    ["U"] = {"u"},
    ["u"] = {"u"},
    ["V"] = {"ve"},
    ["v"] = {"ve"},
    ["W"] = {"doble ve", "ve doble"},
    ["w"] = {"doble ve", "ve doble"},
    ["X"] = {"ics", "xeix"},
    ["x"] = {"ics", "xeix"},
    ["Y"] = {"i grega"},
    ["y"] = {"i grega"},
    ["Z"] = {"zeta"},
    ["z"] = {"zeta"},
    ["Ç"] = {"ce trencada"},
    ["ç"] = {"ce trencada"},
    ["L·L"] = {"ela geminada", "ele geminada", "el geminada"},
    ["l·l"] = {"ela geminada", "ele geminada", "el geminada"},
}

local SUST = 1
local ADJ = 2
local VERB = 3
local ADV = 4

local normalizar_cg = {
	["s"] = SUST,
	["sust"] = SUST,
	["sustantivo"] = SUST,
	["adj"] = ADJ,
	["adjetivo"] = ADJ,
	["v"] = VERB,
	["verb"] = VERB,
	["verbo"] = VERB,
	["adv"] = ADV,
	["adverbio"] = ADV,
}

local function descomponer(text)
	return strsub(text, "[" .. dotover_keys .. "]", decompose_dotover)
end

local function normalizar(texto)
	texto = strlower(texto)
	texto = descomponer(texto)
	texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
	texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
	texto = strsubrep(texto, "[%-‐]", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)

    texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
    texto = strsubrep(texto, "%s+", " ")
	texto = strstrip(texto, "[%s|]+")
	
	return texto
end

local function handle_unstressed_words(words_)
	local words = m_table.deepcopy(words_)

	-- Check if the word at index `i` in `words` is "amb" and the following word begins with a vowel.
	local function is_amb_to_join(words, i)
		return i < #words and words[i] == "a" .. DOTOVER .. "mb" and strfind(words[i + 1], "^h?" .. V)
	end
	local saw_amb_to_join = true

	-- Mark all unstressed words with DOTOVER, so that split_syllables() doesn't assign stress. We need to do this
	-- before special handling for [[amb]], because [[amb]] may join to another unstressed word like [[el]], in the
	-- process losing the identity of the two words. In the process, see if [[amb]] occurs before a following
	-- vowel-initial word (which may begin with h-).
	for i, word in ipairs(words) do
		-- Put DOTOVER after the last vowel (to handle the case of [[que]]). It doesn't actually matter where we put
		-- it, because split_syllables() just looks for DOTOVER anywhere in the word.
		if unstressed_words[word] then
			word = strsub(word, "^(.*" .. V .. ")", "%1" .. DOTOVER)
		end
		if is_amb_to_join(words, i) then
			saw_amb_to_join = true
		end
	end

	-- Join [[amb]] before vowel-initial word with following word.
	if saw_amb_to_join then
		local new_words = {}
		local i = 1
		while i <= #words do
			if is_amb_to_join(words, i) then
				insert(new_words, words[i] .. "‿" .. words[i + 1])
				i = i + 2
			else
				insert(new_words, words[i])
				i = i + 1
			end
		end
		words = new_words
	end

	-- Finally, rewrite some unstressed words to get the right pronunciation. Any remaining [[amb]] not before a
	-- vowel-initial word is pronounced [am] even in Valencian (where [amp]/[amb] would be expected), and [[per]] always
	-- has a pronounced <r>.
	local unstressed_word_replacement = {
		["a" .. DOTOVER .. "mb"] = "a" .. DOTOVER .. "m",
		["pe" .. DOTOVER .. "r"] = "pe" .. DOTOVER .. "rr",
	}

	for i, word in ipairs(words) do
		word = unstressed_word_replacement[word] or word
	end

	return words
end


local function fix_prefixes(word)
	-- Voiced s in prefix roots -fons-, -dins-, -trans-
	word = strsub(word, "^enfons([aàeèéiíoòóuú])", "enfonz%1")
	word = strsub(word, "^endins([aàeèéiíoòóuú])", "endinz%1")
	word = strsub(word, "tr([aà])ns([aàeèéiíoòóuúbdghlmv])", "tr%1nz%2")

	-- in + ex > ineks/inegz
	word = strsub(word, "^inex", "in.ex")

	return word
end

local function restore_diaereses(word)
	-- Some structural forms do not have diaeresis per diacritic savings, let's restore it to identify hiatus

	word = strsub(word, "([iu])um(s?)$", "%1üm%2") -- Latinisms (-ius is ambiguous but rare)

	word = strsub(word, "([aeiou])isme(s?)$", "%1ísme%2") -- suffix -isme
	word = strsub(word, "([aeiou])ist([ae]s?)$", "%1íst%2") -- suffix -ista

	word = strsub(word, "([aeou])ir$", "%1ír") -- verbs -ir
	word = strsub(word, "([aeou])int$", "%1ínt") -- present participle
	word = strsub(word, "([aeo])ir([éà])$", "%1ïr%2") -- future
	word = strsub(word, "([^gq]u)ir([éà])$", "%1ïr%2")
	word = strsub(word, "([aeo])iràs$", "%1ïràs")
	word = strsub(word, "([^gq]u)iràs$", "%1ïràs")
	word = strsub(word, "([aeo])ir(e[mu])$", "%1ïr%2")
	word = strsub(word, "([^gq]u)ir(e[mu])$", "%1ïr%2")
	word = strsub(word, "([aeo])iran$", "%1ïran")
	word = strsub(word, "([^gq]u)iran$", "%1ïran")
	word = strsub(word, "([aeo])iria$", "%1ïria") -- conditional
	word = strsub(word, "([^gq]u)iria$", "%1ïria")
	word = strsub(word, "([aeo])ir(ie[sn])$", "%1ïr%2")
	word = strsub(word, "([^gq]u)ir(ie[sn])$", "%1ïr%2")

	return word
end

local function fix_y(word)
	-- y > vowel i else consonant /j/, except ny

	word = strsub(word, "ny", "ñ")

	word = strsub(word, "y([^aeiouàèéêëíòóôúïü])", "i%1") -- vowel if not next to another vowel
	word = strsub(word, "([^aeiouàèéêëíòóôúïü·%-%.])y", "%1i") -- excluding also syllables separators

	return word
end

local function mid_vowel_fixes(word)
	local function track_mid_vowel(vowel, cont)
		require("Módulo:traza")("ca-vocales")
		return true
	end
	local changed
	-- final -el (not -ell) usually è but not too many cases
	word, changed = strsubb(word, "e(nts?)$", "é%1")
	if changed then
		track_mid_vowel("e", "nt-nts")
	end
	word, changed = strsubb(word, "e(rs?)$", "é%1")
	if changed then
		track_mid_vowel("e", "r-rs")
	end
	word, changed = strsubb(word, "o(rs?)$", "ó%1")
	if changed then
		track_mid_vowel("o", "r-rs")
	end
	word, changed = strsubb(word, "è(s?)$", "ê%1")
	if changed then
		track_mid_vowel("è", "s-blank")
	end
	word, changed = strsubb(word, "e(s[oe]s)$", "ê%1")
	if changed then
		track_mid_vowel("e", "sos-sa-ses")
	end
	word, changed = strsubb(word, "e(sa)$", "ê%1")
	if changed then
		track_mid_vowel("e", "sos-sa-ses")
	end
	return word
end

local function word_fixes(word, dialect)
	word = strsub(word, "%(rr%)", TEMP_PAREN_RR)
	word = strsub(word, "%(r%)", TEMP_PAREN_R)
	word = strsub(word, "%-([rs]?)", "-%1%1")
	if dialect == VALENCIANO then
		word = strsub(word, "%-x", "-tx")
	end
	word = strsub(word, "rç$", "rrs") -- silent r only in plurals -rs
	word = fix_prefixes(word) -- internal pause after a prefix
	word = restore_diaereses(word) -- no diaeresis saving
	word = fix_y(word) -- ny > ñ else y > i vowel or consonant
	word = mid_vowel_fixes(word)
	-- all words in pn- (e.g. [[pneumotòrax]] and mn- (e.g. [[mnemònic]]) have silent p/m in both Central and Valencian
	word = strsub(word, "^[pm]n", "n")
	-- Respell ch + vowel as tx, before we remove other h's after consonants.
	word = strsub(word, "ch(" .. V ..")", "tx%1")
	-- Delete h after a consonant. This must happen here, before split_syllables(). We don't delete h after a vowel
	-- yet because it indicates a hiatus.
	word = strsub(word, "(" .. C .. ")h", "%1")

	return word
end

local function split_vowels(vowels, saw_dotover, saw_lineunder)
	local syllables = {{onset = "", vowel = substr(vowels, 1, 1), coda = "", separator = "", has_dotover = saw_dotover,
		has_lineunder = saw_lineunder}}
	vowels = substr(vowels, 2)

	while vowels ~= "" do
		local syll = {onset = "", vowel = "", coda = ""}
		syll.onset, syll.vowel, vowels = strmatch(vowels, "^([iu]?)(.)(.-)$")
		insert(syllables, syll)
	end

	local count = #syllables

	if count >= 2 and (syllables[count].vowel == "i" or syllables[count].vowel == "u") then
		syllables[count - 1].coda = syllables[count].vowel
		syllables[count] = nil
	end

	return syllables
end

-- Split the word into syllables. Return a list of syllable objects, each of which contains fields `onset`, `vowel`,
-- `coda`, `separator` (a user-specified syllable divider that goes before the syllable; one of '·', '-' or '.') and
-- `stressed` (a boolean indicating that the syllable is stressed). In addition, the list has fields `stress` (the
-- index of the syllable with primary stress) and `is_prefix` (true if the word is a prefix, i.e. it ends in '-').
-- Normally, prefixes are treated as unstressed if a stressed syllable isn't explicitly marked, but this can be
-- overridden with `stress_prefixes`, which causes the automatic stress-assignment algorithm to run for these terms.
local function split_syllables(word, stress_prefixes, may_be_uppercase)
	local syllables = {}
	local saw_dotover = false

	local remainder = word
	local is_prefix = false
	if remainder:find("%-$") then -- prefix
		is_prefix = true
		remainder = remainder:gsub("%-$", "")
	end
	local is_suffix = false
	if remainder:find("^%-") then -- suffix
		is_suffix = true
		remainder = remainder:gsub("^%-", "")
	end

	while remainder ~= "" do
		local consonants, vowels

		-- FIXME: Using C and V below instead of the existing patterns slows things down TREMENDOUSLY.
		-- Not sure why.
		local vowel_list = may_be_uppercase and "aeiouàèéêëíòóôúïüAEIOUÀÈÉÊËÍÒÓÔÚÏÜ" .. DOTOVER .. LINEUNDER or
			"aeiouàèéêëíòóôúïü" .. DOTOVER .. LINEUNDER
		consonants, remainder = strmatch(remainder, "^([^" .. vowel_list .. "]*)(.-)$")
		vowels, remainder = strmatch(remainder, "^([" .. vowel_list .. "]*)(.-)$")
		local this_saw_dotover = not not strfind(vowels, DOTOVER)
		if this_saw_dotover then
			saw_dotover = true
			vowels = vowels:gsub(DOTOVER, "")
		end
		local this_saw_lineunder = not not strfind(vowels, LINEUNDER)
		if this_saw_lineunder then
			vowels = vowels:gsub(LINEUNDER, "")
		end

		if vowels == "" then
			if #syllables > 0 then
				syllables[#syllables].coda = syllables[#syllables].coda .. consonants
			else
				-- word without vowels, e.g. foot boundary |
				insert(syllables, {onset = consonants, vowel = "", coda = "", separator = ""})
			end
		else
			local onset = consonants
			local first_vowel = substr(vowels, 1, 1)

			if (strfind(onset, "[gqGQ]$") and (first_vowel == "ü" or (first_vowel == "u" and vowels ~= "u")))
			or ((onset == "" or onset == "h" or onset == "H") and #syllables == 0 and
				(first_vowel == "i" or first_vowel == "I") and (vowels ~= "i" and vowels ~= "I"))
			then
				onset = onset .. substr(vowels, 1, 1)
				vowels = substr(vowels, 2)
			end

			local vsyllables = split_vowels(vowels, this_saw_dotover, this_saw_lineunder)
			vsyllables[1].onset = onset .. vsyllables[1].onset

			for _, s in ipairs(vsyllables) do
				insert(syllables, s)
			end
		end
	end

	-- Shift over consonants from the onset to the preceding coda, until the syllable onset is valid
	for i = 2, #syllables do
		local current = syllables[i]
		local previous = syllables[i-1]

		while not (current.onset == "" or valid_onsets[strsub(strsub(current.onset, tie_c .. "[hH]?$", ""), "_", "")]) do
			local letter = substr(current.onset, 1, 1)
			current.onset = substr(current.onset, 2)
			if strfind(letter, "[·%-%.]") then -- syllable separators
				current.separator = letter
				break
			else
				previous.coda = previous.coda .. letter
				if strfind(letter, tie_c) then
					break
				end
			end
		end
	end

	-- Detect stress
	for i, syll in ipairs(syllables) do
		if strfind(syll.vowel, "^[" .. written_stressed_vowel_l .. "]$") then
			syll.stressed = true
			-- primary stress: the last one stressed without LINEUNDER
			if not syll.has_lineunder then
				syllables.stress = i
			end
		end
	end

	-- Assign default stress
	if not syllables.stress and not saw_dotover and (stress_prefixes or not is_prefix) then
		local count = #syllables

		if count == 1 then
			if syllables[1].vowel ~= "" then -- vowel-less words don't get stress
				syllables.stress = 1
			end
		else
			local final = syllables[count]

			-- Take account of tie symbols (apostrophes and ‿).
			if strfind(final.coda, "^[s" .. tie_l .. "]*$") or (strfind(final.coda, "^" .. tie_c .. "*n" .. tie_c .. "*$") and (
				final.vowel == "e" or final.vowel == "i" or final.vowel == "ï")) then
				syllables.stress = count - 1
			else
				syllables.stress = count
			end
		end
		if syllables.stress then
			syllables[syllables.stress].stressed = true
		end
	end

	syllables.is_prefix = is_prefix
	syllables.is_suffix = is_suffix
	return syllables
end

local IPA_vowels_central = {
	["ê"] = "ɛ", ["ë"] = "ɛ", ["ô"] = "ɔ",
}
local IPA_vowels_balearic = {
	["ê"] = "ə", ["ë"] = "ɛ", ["ô"] = "ɔ",
}
local IPA_vowels_valencian = {
	["ê"] = "e", ["ë"] = "e", ["ô"] = "o",
}

local IPA_vowels = {
	["à"] = "a",
	["è"] = "ɛ", ["ê"] = "ɛ", ["ë"] = "ɛ", ["é"] = "e",
	["í"] = "i", ["ï"] = "i",
	["ò"] = "ɔ", ["ô"] = "ɔ", ["ó"] = "o",
	["ú"] = "u", ["ü"] = "u",
}

local IPA_VOWEL_CLUSTER = "[" .. concat_vals(IPA_vowels) .. "]"

local function replace_context_free(cons)
	cons = strsub(cons, "ŀ", "l")

	cons = strsub(cons, "r", "ɾ")
	cons = strsub(cons, "ɾɾ", "r")
	cons = strsub(cons, "ss", "s")
	cons = strsub(cons, "ll", "ʎ")
	cons = strsub(cons, "ñ", "ɲ") -- hint ny > ñ

	-- NOTE: We use single-character affricate symbols during processing for ease in handling, and convert them
	-- to tied multi-character affricates at the end of join_syllables().
	cons = strsub(cons, "[dt]j", "ʤ")
	cons = strsub(cons, "tx", "ʧ")
	cons = strsub(cons, "[dt]z", "ʣ")

	cons = strsub(cons, "ç", "s")
	cons = strsub(cons, "[cq]", "k")
	cons = strsub(cons, "h", "")
	cons = strsub(cons, "j", "ʒ")
	-- Don't replace x -> ʃ yet so we can distinguish x from manually specified ʃ.

	cons = strsub(cons, "i", "j") -- must be after j > ʒ
	cons = strsub(cons, "y", "j") -- must be after j > ʒ and fix_y
	cons = strsub(cons, "[uü]", "w")
	cons = strsub(cons, "'", "‿")

	return cons
end


-- Do context-sensitive phonological changes. Formerly this was all done syllable-by-syllable but that made the code
-- tricky (since it often had to look at adjacent syllables) and full of subtle bugs. Now we first concatenate the
-- syllables back to words and the words to the combined text and work on the text as a whole. FIXME: We should move
-- more of the work done in preprocess_word(), e.g. most of replace_context_free(), here.
local function postprocess_general(text, dialect)
	local voiced = listToSet({"b", "d", "g", "m", "n", "ɲ", "l", "ʎ", "r", "ɾ", "v", "z", "ʒ", "ʣ", "ʤ"})
	--local voiced_keys = concat_keys(voiced)
	local voiceless = listToSet({"p", "t", "k", "f", "s", "ʃ", "ʦ", "ʧ"})
	--local voiceless_keys = concat_keys(voiceless)
	local voicing = {["p"] = "b", ["t"] = "d", ["k"] = "g", ["f"] = "v", ["s"] = "z", ["ʃ"] = "ʒ", ["ʦ"] = "ʤ",
		["ʧ"] = "ʤ"}
	--local voicing_keys = concat_keys(voicing)
	local devoicing = {}
	for k, v in pairs(voicing) do
		devoicing[v] = k
	end
	--local devoicing_keys = concat_keys(devoicing)

	------------------ Handle <x>

	-- Handle ex- + vowel > -egz-. We handle -x- on either side of the syllable boundary. Note that this also handles
	-- inex- + vowel because in fix_prefixes we respell inex- as in.ex-, which ends up at this stage as in.e.xV.
	text = strsubrep(text, "([.#][eɛ]" .. stress_c .. "*)(" .. charsep_c .. "*)x(" .. charsep_c .. "*" .. V ..
		")", function(e, pre, post)
			-- Preserve other character separators (especially the tie character ‿).
			pre = pre:gsub("%.", "")
			post = post:gsub("%.", "")
			return e .. pre .. "g.z" .. post
		end)
	-- -x- at the beginning of a coda becomes [ks], e.g. [[annex]], [[apèndix]], [[extracció]]; but not elsewhere in
	-- the coda, e.g. in [[romanx]], [[ponx]]; words with [ks] in -nx such as [[esfinx]], [[linx]], [[manx]] need
	-- respelling with [ks]; words ending in vowel + x like [[ídix]] need respelling with [ʃ]
	text = strsub(text, "(" .. V .. stress_c .. "*)x", "%1ks")
	if dialect == VALENCIANO then
		-- Word-initial <x> as well as <x> after a consonant other than /j/ (including in the coda, e.g. [[ponx]])
		-- becomes [t͡ʃ].
		text = strsub(text, "#x", "#ʧ")
		text = strsub(text, "([^" .. vowel_l .. separator_l .. "j]" .. charsep_c .. "*)x", "%1ʧ")
	end
	-- Other x becomes [ʃ]
	text = strsub(text, "x", "ʃ")

	-- Doubled ss -> s e.g. in exs-, exc(e/i)-, sc(e/i)-; FIXME: should this apply across word boundaries?
	text = strsub(text, "s(" .. charsep_c .. "*)s", "%1s")

	------------------ Coda consonant losses

	-- In Central Catalan, coda losses happen everywhere, but otherwise they don't happen when
	-- absolutely word-finally before a vowel or end of utterance (e.g. [[blanc]] has /k/ in Balearic and
	-- Valencian but not [[blancs]]). Must precede consonant assimilations.
	local boundary = dialect == CENTRAL and "(.)" or "([^#])"
	text = strsub(text, "m[pb]" .. boundary, "m%1")
	text = strsub(text, "([ln])[td]" .. boundary, "%1%2")
	text = strsub(text, "[nŋ][kg]" .. boundary, "ŋ%1")
	if dialect == VALENCIANO or dialect == BALEARICO then
		local before_cons = "(" .. separator_c .. "*" .. C .. ")"
		text = strsub(text, "m[pb]" .. before_cons, "m%1")
		text = strsub(text, "([ln])[td]" .. before_cons, "%1%2")
		text = strsub(text, "[nŋ][kg]" .. before_cons, "ŋ%1")
	end

	-- Delete /t/ between /s/ and any consonant other than /s/ or /ɾ/. Must precede voicing assimilation and
	-- t + lateral/nasal assimilation.
	text = strsub(text, "st(" .. sylsep_c .. "*[^" .. neg_guts_of_cons .. "sɾ])", "s%1")
	
	------------------ Consonant assimilations

	if dialect == CENTRAL then
		-- v > b in onsets (not in codas, e.g. [[ovni]] [ɔ́vni] and [[hafni]] [ávni]). This needs to precede
		-- assimilation of nb -> mb.
		text = strsub(text, "v(" .. C .. "*" .. V ..")", "b%1")
	end

	-- t + lateral assimilation -> geminate across syllable boundary. We don't any more do t + nasal assimiation
	-- because there are too many exceptions, e.g. [[aritmètic]], [[atmosfèric]], [[ètnia]]. Instead, we require that
	-- cases where it does happen use respelling to effect this. FIXME: this doesn't always happen in -tl- either,
	-- e.g. [[atlàntic]] has [əllántik] in GDLC but [adlántik] in DNV.
	--
	-- FIXME: Clean this up, maybe move below voicing assimilation, investigate whether it operates across words,
	-- move stuff below that special-cases tll in Valencian here.
	text = strsub(text, "t(" .. sylsep_c .. ")([lʎ])", "%2%1%2")

	-- n + labial > labialized assimilation
	text = strsub(text, "n(" .. separator_c .. "*[mbp])", "m%1")
	text = strsub(text, "n(" .. separator_c .. "*[fv])", "ɱ%1")

	-- n + velar > velarized assimilation
	text = strsub(text, "n(" .. separator_c .. "*[kg])", "ŋ%1")

	-- l/n + palatal > palatalized assimilation
	text = strsub(text, "([ln])(" .. separator_c .. "*[ʎɲʃʒʧʤ])", function(ln, palatal)
		ln = ({["l"] = "ʎ", ["n"] = "ɲ"})[ln]
		return ln .. palatal
	end)

	-- ɲs > ɲʃ; FIXME: not sure the purpose of this; it doesn't apply in [[menys]] or derived terms like [[menyspreu]]
	-- NOTE: Per [https://giec.iec.cat/textgramatica/codi/4.4], it does apply in these scenarios but the result is
	-- somewhere between [s] and [ʃ], which is why it isn't shown in GDLC.
	-- text = strsub(text, "ɲs", "%1ʃ")

	------------------ Handle <r>

	-- In replace_context_free(), we converted single r to ɾ and double rr to r.
	if dialect == CENTRAL then
		text = strsub(text, TEMP_PAREN_R, "")
		text = strsub(text, TEMP_PAREN_RR, "r")
	elseif dialect == BALEARICO then
		text = strsub(text, TEMP_PAREN_R, "")
		text = strsub(text, TEMP_PAREN_RR, "")
	else
		assert(dialect == VALENCIANO, ("Unrecognized dialect '%s'"):format(dialect))
		text = strsub(text, TEMP_PAREN_R, "ɾ")
		text = strsub(text, TEMP_PAREN_RR, "ɾ")
	end
	if dialect ~= VALENCIANO then
		-- Coda /ɾ/ -> /r/
		-- FIXME: This is inherited from the older code. Correct?
		text = strsub(text, "(" .. V .. stress_c .. "*" .. C .. "*)ɾ", "%1r")
	end		

	-- ɾ -> r word-initially or after [lns]; needs to precede voicing assimilation as <s> will be voiced to [z] before
	-- /ɾ/.
	text = strsub(text, "([#lns]" .. sylsep_c .. "*)ɾ", "%1r")

	------------------ Voicing assimilation

	-- Voicing or devoicing; we want to proceed from right to left, and due to the limitations of patterns (in
	-- particular, the lack of support for alternations), it's difficult to do this cleanly using Lua patterns, so we
	-- do it character by character.
	local chars = strexplode(text)
	-- We need to look two characters ahead in some cases, so start two characters from the end. This is safe because
	-- the overall respelling ends in "##". (Similarly, as an optimization, don't check the first two characters, which
	-- are always "##".)
	for i = #chars - 2, 3, -1 do
		-- We are looking for two consonants next to each other, possibly separated by a syllable or word divider.
		-- We also handle a consonant followed by a syllable divider then a vowel, and a consonant word-finally.
		-- Note that only coda consonants can change voicing, so we need to check to make sure we're in the coda.
		local first = chars[i]
		-- If `second` is nil, no assimilation occurs. Otherwise, `second` should be a consonant or empty string (which
		-- represents a syllable or word boundary followed by a vowel or end of string), and we assimilate to that
		-- consonant (empty string forces devoicing).
		local second
		-- If set to true, we're processing a consonant directly before a word boundary followed by a word beginning
		-- with a vowel. In this context, voiceless sibilants voice. Note that we handle voicing of <s> word-internally
		-- separately, in preprocess_word() [FIXME: maybe move much of the processing in preprocess_word() into this
		-- function].
		local word_boundary_before_vowel
		if not strfind(first, C) then
			-- leave `second` at nil; no assimilation
		elseif chars[i + 1] == "#" then -- word boundary
			if chars[i + 2] == " " then
				-- chars[i + 3] should always be "#"
				assert(chars[i + 3] == "#", "Word boundary followed by space but not #")
				if strfind(chars[i + 4], C) then
					second = chars[i + 4]
				else
					second = ""
					word_boundary_before_vowel = true
				end
			else
				second = ""
			end
		elseif strfind(chars[i + 1], sylsep_c) then -- syllable boundary
			if strfind(chars[i + 2], C) then
				second = chars[i + 2]
			else
				second = ""
			end
		elseif strfind(chars[i + 1], C) then
			second = chars[i + 1]
		else
			-- followed by a vowel not across a syllable or word boundary; leave `second` as nil, no assimilation
		end
		if second then
			-- Make sure we're in the coda. We have to look backwards until we find a vowel or syllable/word boundary.
			local in_coda = false
			local j = i - 1
			while true do
				assert(j > 0, "Missing word boundary at beginning of overall respelling")
				if strfind(chars[j], "[" .. sylsep_l .. wordsep_l .. "]") then
					break
				elseif strfind(chars[j], V) then
					in_coda = true
					break
				end
				j = j - 1
			end
			if in_coda then
				if word_boundary_before_vowel and strfind(first, "[zʒʣʤ]") then
					-- leave alone
				elseif voiced[second] and voicing[first] or word_boundary_before_vowel and strfind(first, "[sʃʦʧ]") then
					chars[i] = voicing[first]
				elseif (voiceless[second] or second == "") and devoicing[first] then
					chars[i] = devoicing[first]
				end
			end
		end
	end
	text = concat(chars)

	-- gn -> ŋn e.g. [[regnar]] (including word-initial gn- e.g. [[gnòmic]], [[gneis]]) 
	-- FIXME: This should be moved below voicing assimilation, and we need to investigate if it operates across words
	-- (here I'm guessing yes).
	if dialect ~= CENTRAL then
		text = strsub(text, "#gn", "#n")
	end
	text = strsub(text, "g(" .. separator_c .. "*n)", "ŋ%1")

	-- gʒ > d͡ʒ
	-- FIXME: We need to investigate if it operates across words
	text = strsub(text, "g(" .. sylsep_c .. "*)ʒ", "%1ʤ")
	-- sʃ -> ʃ ([[desxifrar]]), zʒ -> ʒ ([[disjuntor]])
	if dialect ~= VALENCIANO then
		text = strsub(text, "s(" .. separator_c .. "*ʃ)", "%1")
		text = strsub(text, "z(" .. separator_c .. "*ʒ)", "%1")
	end

	------------------ Gemination of <bl>, <gl>

	if dialect ~= VALENCIANO then
		-- bl -> bbl, gl -> ggl after the stress when following a vowel; to avoid this, use <b_l> or <g_l>.
		-- This must follow v > b above. To force a hard ungeminated [b] or [g], use <_b> or <_g>.
		text = strsub(text, "(" .. stress_c .. ")(" .. sylsep_c .. ")([bg])l", "%1%3%2%3l")
	else -- Valencian; undo manually written 'bbl', 'ggl' in words like [[poblar]], [[reglament]]
		text = strsub(text, "([bg])(" .. sylsep_c .. ")%1l", "%2%1l")
	end

	------------------ Lenition of voiced stops

	-- In Central Catalan, b/d/g become fricatives (actually approximants, like in Spanish) in the onset following a
	-- vowel and (except for <d>) after <l> and <ll> (cf. GDLC [[cabellblanc]] [kəβɛ̀ʎβláŋ]). This also happens across
	-- word boundaries but doesn't happen after stops, nor in Central Catalan after [r], [ɾ] or [z] (and hence probably
	-- not after [ʒ] either, although I can't find any examples in GDLC).
	--
	-- In Valencian, <b> doesn't lenite (at least formally?), but <d> and <g> do lenite after [r], [ɾ] or [z].
	--
	-- Balearic is like Valencian in not leniting <b>, and probably like Central Catalan otherwise.
	local lenite_bdg = {["b"] = "β", ["d"] = "ð", ["g"] = "ɣ"}
	if dialect == CENTRAL then
		text = strsub(text, "([" .. vowel_l .. "jwlʎv]" .. separator_c .. "*[.#]" .. separator_c .. "*)([bdg])",
			function(before, bdg) return before .. lenite_bdg[bdg] end)
	elseif dialect == VALENCIANO then
		text = strsub(text, "([" .. vowel_l .. "jwlʎvrɾzʣ]" .. separator_c .. "*[.#]" .. separator_c .. "*)([dg])",
			function(before, dg) return before .. lenite_bdg[dg] end)
	else
		assert(dialect == BALEARICO, ("Unrecognized dialect '%s'"):format(dialect))
		text = strsub(text, "([" .. vowel_l .. "jwlʎv]" .. separator_c .. "*[.#]" .. separator_c .. "*)([dg])",
			function(before, dg) return before .. lenite_bdg[dg] end)
	end

	------------------ Vowel reduction

	-- Reduction of unstressed a,e in Central and Balearic (Eastern Catalan).
	if dialect ~= VALENCIANO then
		-- The following rules seem to apply, based on the old code:
		-- (1) Stressed a and e are never reduced.
		-- (2) Unstressed e directly following ə is not reduced.
		-- (3) Unstressed e directly before written <a> or before /ɔ/ is not reduced.
		-- (4) Written <ee> when both vowels precede the primary stress is reduced to [əə]. (This rule preempts #2.)
		-- (5) Written <ee> when both vowels follow the primary stress isn't reduced at all.
		-- Rule #2 in particular seems to require that we proceed left to right, which is how the old code was
		-- implemented.
		-- FIXME: These rules seem overly complex and may produce incorrect results in some circumstances.
		local words = strsplit(text, " ")
		for j, word in ipairs(words) do
			local chars = strexplode(word)
			-- See above where voicing assimilation is handled. The overall respelling begins and ends in #, which we
			-- can ignore. We need to look ahead three chars in some circumstances, but in all those circumstances we
			-- shoudn't run off the end (and have assertions to check this).
			local seen_primary_stress = false
			for i = 2, #chars - 1 do
				local this = chars[i]
				if chars[i] == AC then
					seen_primary_stress = true
				end
				if (this ~= "a" and this ~= "e") or strfind(chars[i + 1], stress_c) then
					-- Not a/e, or a stressed vowel; continue
				else
					local reduction = true
					local prev, prev_stress, nxt, nxt_stress
					if not strfind(chars[i - 1], sylsep_c) then
						prev = ""
					else
						prev = chars[i - 2] -- this should be non-nil as chars[i - 1] is a syllable separator (not #)
						assert(prev, "Missing # at word boundary")
						prev_stress = ""
						if strfind(prev, stress_c) then
							prev_stress = prev
							prev = chars[i - 3]
							-- As above; chars[i - 2] is a stress indicator (not #).
							assert(prev, "Missing # at word boundary")
						end
					end
					if not strfind(chars[i + 1], sylsep_c) then
						nxt = ""
						-- leave nxt at nil
					else
						nxt = chars[i + 2]
						nxt_stress = chars[i + 3]
						-- chars[i + 1] is a syllable separator, so chars[i + 2] should not be a word boundary, so
						-- chars[i + 3] should exist.
						assert(nxt and nxt_stress, "Syllable separator at word boundary or missing # at word boundary")
					end
					if this == "e" and strfind(prev, "ə") then
						reduction = false
					elseif this == "e" and strfind(nxt, "[aɔ]") then
						reduction = false
					elseif this == "e" and nxt == "e" and not strfind(nxt_stress, AC) then
						-- FIXME: Check specifically for AC duplicates previous logic but is probably wrong or unnecessary.
						if not seen_primary_stress then
							chars[i + 2] = "ə"
						else
							reduction = false
						end
					end
					if reduction then
						chars[i] = "ə"
					end
				end
			end
			words[j] = concat(chars)
		end
		text = concat(words, " ")
	end

	if dialect == CENTRAL then
		-- Reduction of unstressed o (not before w)
		text = strsub(text, "o([^" .. stress_l .. "w])", "u%1")
	elseif dialect == BALEARICO then
		-- Reduction of unstressed o per vowel harmony: unstressed /o/ -> /u/ directly before stressed /i/ or /u/;
		-- as a Lua pattern, o can be followed only by consonants and/or syllable separators (no vowels, stress marks
		-- or word separators).
		text = strsub(text, "o([^" .. vowel_l .. stress_l .. wordsep_l .. "]*[iu]" .. stress_c .. ")", "u%1")
	end

	-- Final losses.
	text = strsub(text, "j(ʧs?#)", "%1") -- boigs /bɔt͡ʃ/
	text = strsub(text, "([ʃʧs])s#", "%1#") -- homophone plurals -xs, -igs, -çs

	if dialect ~= VALENCIANO then
		-- Remove j before palatal obstruents
		text = strsub(text, "j(" .. sylsep_c .. "*[ʃʒʧʤ])", "%1")
	else -- Valencian
		-- Fortition of palatal fricatives
		text = strsub(text, "ʒ", "ʤ")
		text = strsub(text, "(i" .. stress_c .. "*" .. sylsep_c .. ")ʣ", "%1z")
	end

	if dialect ~= CENTRAL then
		-- No palatal gemination ʎʎ > ll or ʎ, in Valencian and Balearic.
		-- FIXME: These conditions seem to be targeting specific words and should probably be fixed using respelling
		-- instead.
		text = strsub(text, "([bpw]a" .. stress_c .. "*)ʎ(" .. sylsep_c .. "*)ʎ", "%1l%2l")
		text = strsub(text, "([mv]e" .. stress_c .. "*)ʎ(" .. sylsep_c .. "*)ʎ", "%1l%2l")
		text = strsub(text, "(ti" .. stress_c .. "*)ʎ(" .. sylsep_c .. "*)ʎ", "%1l%2l")
		text = strsub(text, "(m[oɔ]" .. stress_c .. "*)ʎ(" .. sylsep_c .. "*)ʎ", "%1l%2l")
		text = strsub(text, "(u" .. stress_c .. "*)ʎ(" .. sylsep_c .. "*)ʎ", "%1l%2l")
		text = strsub(text, "ʎ(" .. sylsep_c .. "*ʎ)", "%1")
	end

	---------- Convert pseudo-symbols to real ones.

	-- Convert g to IPA ɡ.
	text = strsub(text, "g", "ɡ")

	-- Convert pseudo-afficate symbols to full affricates.
	local full_affricates = { ["ʦ"] = "t͡s", ["ʣ"] = "d͡z", ["ʧ"] = "t͡ʃ", ["ʤ"] = "d͡ʒ" }
	text = strsub(text, "([ʦʣʧʤ])", full_affricates)

	---------- Generate IPA stress marks.

	-- Convert acute and grave to IPA stress marks.
	text = strsub(text, AC, "ˈ")
	text = strsub(text, GR, "ˌ")
	-- Move IPA stress marks to the beginning of the syllable.
	text = strsubrep(text, "([#.])([^#.]*)(" .. ipa_stress_c .. ")", "%1%3%2")
	-- Suppress syllable divider before IPA stress indicator.
	text = strsub(text, "%.(#?" .. ipa_stress_c .. ")", "%1")
	-- Make all primary stresses but the last one in a given word be secondary. May be fed by the first rule above.
	-- FIXME: Currently this is handled earlier, but we might want to move it here, as is done in [[Module:pt-pronunc]].
	-- text = strsubrep(text, "ˈ([^ ]+)ˈ", "ˌ%1ˈ")
	-- Make primary stresses in prefixes become secondary. (FIXME: Handled earlier now.)
	-- text = strsubrep(text, "ˈ([^#]*#" .. PREFIX_MARKER .. ")", "ˌ%1")

	-- Remove # symbols at word/text boundaries, as well as _ (which forces separate interpretation), pseudo-consonant
	-- markers (at edges of some prefixes/suffixes), and prefix markers, and recompose.
	text = strsub(text, "[#_" .. PSEUDOCONS .. "]", "")
	text = strnfc(text)

	return text
end


local function preprocess_word(syllables, suffix_syllables, dialect, pos, orig_word)
	-- Stressed vowel is ambiguous
	if syllables.stress then
		if strfind(syllables[syllables.stress].vowel, "[eo]") then
			
			--[[
			local marks = {["e"] = {AC, GR, CFLEX, DIA}, ["o"] = {AC, GR, CFLEX}}
			local marked_vowels = {}
			for _, mark in ipairs(marks[stressed_vowel]) do
				insert(marked_vowels, stressed_vowel .. mark)
			end

			--error(("In respelling '%s', the stressed vowel '%s' is ambiguous. Please mark it with an acute, " ..
				--"grave, or combined accent: %s."):format(orig_word, stressed_vowel,
				--m_table.serialCommaJoin(marked_vowels, {dontTag = true, conj = "or"})))
			
			--]]
			
			-- en lugar de arrojar error, asumo posición cerrada que es lo más frecuente (no hay regla para decidir, lo va a tener q especificar el usuario)
			
			syllables[syllables.stress].vowel = strsub(syllables[syllables.stress].vowel, "[eo]", {["e"] = "é", ["o"] = "ó"})
		end
	end

	-- Final -r is ambiguous in many cases.
	local final = syllables[#syllables]
	-- Stressed final r after a or i in non-monosyllables is treated as (r), i.e. verbal infinitives are assumed (NOTE:
	-- not always the case, e.g. there are many adjectives and nouns in -ar that should be marked as '(rr)', and
	-- several loanword nouns in -ir that should be marked as 'rr'). Likewise for stressed final r or rs after é in
	-- non-monosyllables (which are usually adjectives or nouns with the -er ending, but may be verbal infinitives,
	-- which should be marked as 'ê(r)'). That is, it disappears other than in Valencian. All other final r and final
	-- rs are considered ambiguous and need to be rewritten using rr, (rr) or (r).
	if #syllables > 1 and final.stressed then
		if final.coda == "r" and strfind(final.vowel, "[aàiíé]") or final.coda == "rs" and final.vowel == "é" or
			final.vowel == "ó" and strfind(final.coda, "^rs?$") and strfind(final.onset, "[stdç]") then
			final.coda = TEMP_PAREN_R
		end
	end

	if strfind(final.coda, "^rs?$") or strfind(final.coda, "[^r]rs?$") then
		--[[
		error(("In respelling '%s', final -r by itself or in -rs is ambiguous except in the verbal endings -ar or " ..
			"-ir, in the nominal or adjectival endings -er(s) and -[dtsç]or(s). In all other cases it needs to be " ..
			"rewritten using one of 'rr' (pronounced everywhere), '(rr)' (pronounced everywhere but Balearic) or " ..
			"'(r)' (pronounced only in Valencian). Note that adjectives in -ar usually need rewriting using '(rr)'; " ..
			"nouns in -ar referring to places should be rewritten using '(r)'; and loanword nouns in -ir usually " ..
			"need rewriting using 'rr'."):format(orig_word))
		]]--

		final.coda = strsub(final.coda, "r(s?)$", TEMP_PAREN_R.."%1") -- asumo que es (r)
	end

	local syllables_IPA = {stress = syllables.stress, is_prefix = syllables.is_prefix, is_suffix = syllables.is_suffix}

	for key, val in ipairs(syllables) do
		syllables_IPA[key] = {onset = val.onset, vowel = val.vowel, coda = val.coda, stressed = val.stressed}
	end

	-- Replace letters with IPA equivalents
	for i, syll in ipairs(syllables_IPA) do
		-- Voicing of s
		if syll.onset == "s" and i > 1 and strfind(syllables[i - 1].coda, "^[iu]?$") then
			syll.onset = "z"
		end

		if strfind(syll.vowel, "^[eèéêëií]$") then
			syll.onset = strsub(syll.onset, "tg$", "ʤ")
			syll.onset = strsub(syll.onset, "[cg]$", {["c"] = "s", ["g"] = "ʒ"})
			syll.onset = strsub(syll.onset, "[qg]u$", {["qu"] = "k", ["gu"] = "g"})
		end

		syll.coda = strsub(syll.coda, "igs?$", "iʤ")

		syll.onset = replace_context_free(syll.onset)
		syll.coda = replace_context_free(syll.coda)

		syll.vowel = strsub(syll.vowel, ".",
			dialect == CENTRAL and IPA_vowels_central or
			dialect == BALEARICO and IPA_vowels_balearic or
			IPA_vowels_valencian
		)
		syll.vowel = strsub(syll.vowel, ".", IPA_vowels)
	end

	for _, suffix_syl in ipairs(suffix_syllables) do
		insert(syllables_IPA, suffix_syl)
	end

	return syllables_IPA
end

local function convertir_palabra(word, dialect, pos)
	local suffix_syllables = {}
	local orig_word = word

	if not pos or pos == ADV then
		local word_before_ment, ment = strmatch(word, "^(.*)(m[eé]nt)$")
		if word_before_ment and (pos == ADV or not strfind(word_before_ment, "[iï]$") and
			strfind(word_before_ment, V .. ".*" .. V)) then
			suffix_syllables = {{onset = "m", vowel = "e", coda = "nt", stressed = true}}
			pos = ADJ
			word = word_before_ment
		end
	end

	word = word_fixes(word, dialect)
	local syllables = split_syllables(word)
	syllables = preprocess_word(syllables, suffix_syllables, dialect, pos, orig_word)
	-- Combine syllables.
	local combined = {}
	local has_ment = #suffix_syllables > 0
	for i, syll in ipairs(syllables) do
		local ac = (i == syllables.stress and not syllables.is_prefix and not has_ment or has_ment and i == #syllables) and AC or -- primary stress
			syllables[i].stressed and GR or -- secondary stress
			""
		insert(combined, syll.onset .. syll.vowel .. ac .. syll.coda)
	end
	return concat(combined, ".")

end


local function generar_pron_aux(text, dialect, pos)
	local convertido = {}
	local fragmentos = strsplit(text, "%s*|%s*")
	local k = 1

	for _,fragmento in ipairs(fragmentos) do
		local palabras = strsplit(fragmento, "%s")
		palabras = handle_unstressed_words(palabras)
		local palabras_convertidas = {}
		for _,p in ipairs(palabras) do
			insert(palabras_convertidas, convertir_palabra(p, dialect, pos[k]))
			k = k + 1
		end
		insert(convertido, concat(palabras_convertidas, " "))
	end

	-- Put double ## at utterance boundaries (beginning/end of string) and at foot boundaries (marked with |).
	-- Note that if the string without pound signs is 'foo bar baz | bat quux', the final string will be
	-- '##foo# #bar# #baz## #|# ##bat# #quux##'.

	local texto_convertido = "##" .. concat(convertido, "# | #") .. "##"
	texto_convertido = strsub(texto_convertido, " ", "# #")
	return postprocess_general(texto_convertido, dialect)
end

local function generar_pron(text, pos)
	if strfind(text, "[áìùÁÌÙ]") then
		error(("Invalid accented character in respelling '%s'; use accented à í ú, not the reversed versions"):format(text))
	end
	
	text = normalizar(text)

	local conv_cen = generar_pron_aux(text, CENTRAL, pos)
	local conv_val = generar_pron_aux(text, VALENCIANO, pos)
	local conv_bal = generar_pron_aux(text, BALEARICO, pos)

	return {{"central"}, {"valenciano"}, {"baleárico"}}, {{strhtml(conv_cen)}, {strhtml(conv_val)}, {strhtml(conv_bal)}}
end

--Se obtiene el tipo de acentuación
local function determinar_acentuacion(w)
	if type(w) ~= "string" then
		return nil	
	end
	local silabas = {}
	for s in strmatchit(w, "[^"..sylsep_l.."]+") do
		table.insert(silabas, s)
	end
	local L = #silabas
	local sufijo = nil
	if L >= 4 and silabas[L-1] == "men" and silabas[L] == "te" then
		return "doble", L
	elseif L == 1 then
		return "monosílaba", L
	else
		local i = 1
		for silaba in strmatchit(w, sylsep_c..'*'.."[^"..sylsep_l.."]+") do
			if strfind(silaba, "ˈ") then
				local idx = L - i
				if idx == 0 then
					return "aguda", L
				elseif idx == 1 then
					return "llana", L
				elseif idx == 2 then
					return "esdrújula", L
				else
					return "sobreesdrújula", L
				end
				break
			end
			i = i + 1
		end
		error("Se esperaba que la pronunciación de la palabra hubiera sido generada con las marcas de acentuación")
	end
end


function export.procesar_pron_args(titulo, args)
	local tit = titulo
	local vino_ayuda, x

	if #args["ayuda"] < 1 then
		args["ayuda"][1] = tit
	else
		vino_ayuda = true
	end

	if #args["fone"] < 1 and #args["fono"] < 1 then
		x = pron_abc[args["ayuda"][1]]
		if x then
			args["ayuda"] = x
			args["tl"] = x
		end

		local A = #args["ayuda"]
		local j = 1 -- indice de la ayuda
		local k = 1 -- cantidad de pronunciaciones insertadas (máximo 9)
		while k <= 9 and j <= A do
			local cg = {}
			local flags = args["ayudaextra"][j] and strsplit(args["ayudaextra"][j], ";") or {}
			for _,flag in ipairs(flags) do
				local z = normalizar_cg[flag]
				if z then
					insert(cg, z)
				end
			end
			local pron, fone = generar_pron(args["ayuda"][j], cg)
			for i,_ in ipairs(fone) do
				insert(args["pron"], pron[i])
				insert(args["fone"], fone[i])
				if vino_ayuda then
					insert(args["fgraf"], {args["ayuda"][j]})
				end
				k = k + 1
				if k > 9 then
					break
				end
			end
			j = j + 1
		end
	end

	local tiene_espacios = strfind(tit, "%s")
	if args["fone"][1] and args["fone"][1][1] then
		local rim = strsub(args["fone"][1][1], ".*%s([^%s]+)$", "%1") -- me quedo con la última palabra
		rim = strsub(rim, "^.*ˈ(.-)$", "%1")
		args["rima"][1] = strsub(rim, ".-".."("..IPA_VOWEL_CLUSTER..".*"..")".."$", "%1")
	end
	
	if not tiene_espacios then
		if args["fone"][1] and args["fone"][1][1] then
			local ls, ac = {}, {}
			for _,f in ipairs(args["fone"]) do
				local ace, lon = determinar_acentuacion(f[1])
				ls[lon] = true
				ac[ace] = true
			end
			for lon,_ in pairs(ls) do
				insert(args["ls"], lon)
			end
			for ace,_ in pairs(ac) do
				insert(args["ac"], ace)
			end 
		end
	end	

	return args
end


return export