Module:uk-common

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local export = {}

local lang = require("Module:languages").getByCode("uk")
local m_links = require("Module:links")
local m_table = require("Module:table")
local m_string_utilities = require("Module:string utilities")
local m_uk_translit = require("Module:uk-translit")

local u = mw.ustring.char
local rsplit = mw.text.split
local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rsubn = mw.ustring.gsub
local ulen = mw.ustring.len
local ulower = mw.ustring.lower

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end


local AC = u(0x0301) -- acute =  ́
local GR = u(0x0300) -- acute =  `

export.VAR1 = u(0xFFF0)
export.VAR2 = u(0xFFF1)
export.VAR3 = u(0xFFF2)
export.var_code_c = "[" .. export.VAR1 .. export.VAR2 .. export.VAR3 .. "]"


export.vowel = "аеиоуіїяєюАЕИОУІЇЯЄЮ"
export.vowel_c = "[" .. export.vowel .. "]"
export.non_vowel_c = "[^" .. export.vowel .. "]"
export.cons_except_hushing_or_ts = "бдфгґйклмнпрствхзь'БДФГҐЙКЛМНПРСТВХЗЬ"
export.cons_except_hushing_or_ts_c = "[" .. export.cons_except_hushing_or_ts .. "]"
export.hushing = "чшжщЧШЖЩ"
export.hushing_c = "[" .. export.hushing .. "]"
export.hushing_or_ts = export.hushing .. "цЦ"
export.hushing_or_ts_c = "[" .. export.hushing_or_ts .. "]"
export.cons = export.cons_except_hushing_or_ts .. export.hushing_or_ts
export.cons_c = "[" .. export.cons .. "]"
-- Cyrillic velar consonants
export.velar = "кгґхКГҐХ"
export.velar_c = "[" .. export.velar .. "]"
-- uppercase Cyrillic consonants
export.uppercase = "АЕИОУІЇЯЄЮБЦДФГҐЧЙКЛМНПРСТВШХЗЖЬЩ"
export.uppercase_c = "[" .. export.uppercase .. "]"
export.accents_c = "[" .. AC .. GR .. "]"


local first_palatalization = {
	["к"] = "ч",
	["г"] = "ж",
	["ґ"] = "ж",
	["х"] = "ш",
	["ц"] = "ч",
}


local second_palatalization = {
	["к"] = "ц",
	["г"] = "з",
	["ґ"] = "з",
	["х"] = "с",
}


function export.translit_no_links(text)
	return m_uk_translit.tr(m_links.remove_links(text))
end


local grave_decomposer = {
	["ѐ"] = "е" .. GR,
	["Ѐ"] = "Е" .. GR,
	["ѝ"] = "и" .. GR,
	["Ѝ"] = "И" .. GR,
}

-- decompose precomposed Cyrillic chars w/grave accent; not necessary for
-- acute accent as there aren't precomposed Cyrillic chars w/acute accent,
-- and undesirable for precomposed й, й, ї, Ї, etc.
function export.decompose_grave(text)
	return rsub(text, "[ѐЀѝЍ]", grave_decomposer)
end


function export.needs_accents(text)
	text = export.decompose_grave(text)
	for _, word_with_hyphens in ipairs(rsplit(text, "%s+")) do
		-- A word needs accents if it contains no accent and has more than one vowel
		-- and doesn't begin or end with a hyphen (marking a prefix or suffix)
		if not rfind(word_with_hyphens, "^%-") and not rfind(word_with_hyphens, "%-$") then
			for _, word in ipairs(rsplit(word_with_hyphens, "%-")) do
				if not rfind(word, export.accents_c) and not export.is_monosyllabic(word) then
					return true
				end
			end
		end
	end
	return false
end


function export.is_stressed(word)
	return rfind(word, AC)
end


function export.is_multi_stressed(text)
	for _, word in ipairs(rsplit(text, "[%s%-]+")) do
		if ulen(rsub(word, "[^́]", "")) > 1 then
			return true
		end
	end
	return false
end


function export.remove_stress(word)
	return rsub(word, AC, "")
end


function export.remove_variant_codes(word)
	return rsub(word, export.var_code_c, "")
end


-- Handles the alternation between initial і/у and й/в.
function export.initial_alternation(word, previous)
	if rfind(word, "^[іІ]") or rfind(word, "^[йЙ]" .. export.non_vowel_c) then
		if rfind(previous, export.vowel_c .. AC .. "?$") then
			return rsub(word, "^[іІ]", {["і"] = "й", ["І"] = "Й"})
		else
			return rsub(word, "^[йЙ]", {["й"] = "і", ["Й"] = "І"})
		end
	elseif rfind(word, "^[уУ]") or rfind(word, "^[вВ]" .. export.non_vowel_c) then
		if rfind(previous, export.vowel_c .. AC .. "?$") then
			return rsub(word, "^[уУ]", {["у"] = "в", ["У"] = "В"})
		else
			return rsub(word, "^[вВ]", {["в"] = "у", ["В"] = "У"})
		end
	end
	
	return word
end


-- Check if word is monosyllabic (also includes words without vowels).
function export.is_monosyllabic(word)
	local num_syl = ulen(rsub(word, export.non_vowel_c, ""))
	return num_syl <= 1
end


-- If word is monosyllabic, add stress to the vowel.
function export.add_monosyllabic_stress(word)
	if export.is_monosyllabic(word) and not rfind(word, "^%-") and
		not rfind(word, "%-$") and not rfind(word, AC) then
		word = rsub(word, "(" .. export.vowel_c .. ")", "%1" .. AC)
	end
	return word
end


-- If word is monosyllabic, remove stress from the vowel.
function export.remove_monosyllabic_stress(word)
	if export.is_monosyllabic(word) and not rfind(word, "^%-") and
		not rfind(word, "%-$") then
		return export.remove_stress(word)
	end
	return word
end


-- Check if word is nonsyllabic.
function export.is_nonsyllabic(word)
	local num_syl = ulen(rsub(word, export.non_vowel_c, ""))
	return num_syl == 0
end


-- Check if word ends in a vowel.
function export.ends_in_vowel(stem)
	return rfind(stem, export.vowel_c .. AC .. "?$")
end


-- If word is unstressed, add stress onto initial syllable.
function export.maybe_stress_initial_syllable(word)
	if not rfind(word, AC) then
		-- stress first syllable
		word = rsub(word, "^(.-" .. export.vowel_c .. ")", "%1" .. AC)
	end
	return word
end


-- If word is unstressed, add stress onto final syllable.
function export.maybe_stress_final_syllable(word)
	if not rfind(word, AC) then
		-- stress last syllable
		word = rsub(word, "(.*" .. export.vowel_c .. ")", "%1" .. AC)
	end
	return word
end


function export.iotate(stem)
	stem = rsub(stem, "с[кт]$", "щ")
	stem = rsub(stem, "з[дгґ]$", "ждж")
	stem = rsub(stem, "к?т$", "ч")
	stem = rsub(stem, "зк$", "жч")
	stem = rsub(stem, "[кц]$", "ч")
	stem = rsub(stem, "[сх]$", "ш")
	stem = rsub(stem, "[гз]$", "ж")
	stem = rsub(stem, "д$", "дж")
	stem = rsub(stem, "([бвмпф])$", "%1л")
	return stem
end


function export.apply_first_palatalization(word)
	return rsub(word, "^(.*)([кгґхц])$",
		function(prefix, lastchar) return prefix .. first_palatalization[lastchar] end
	)
end


function export.apply_second_palatalization(word)
	return rsub(word, "^(.*)([кгґх])$",
		function(prefix, lastchar) return prefix .. second_palatalization[lastchar] end
	)
end


function export.reduce(word)
	local pre, letter, post = rmatch(word, "^(.*)([оОеЕєЄіІ])́?(" .. export.cons_c .. "+)$")
	if not pre then
		return nil
	end
	if letter == "о" or letter == "О" then
		-- FIXME, what about when the accent is on the removed letter?
		if post == "й" or post == "Й" then
			-- FIXME, is this correct?
			return nil
		end
		letter = ""
	else
		local is_upper = rfind(post, export.uppercase_c)
		if letter == "є" or letter == "Є" then
			-- англі́єц -> англі́йц-
			letter = is_upper and "Й" or "й"
		elseif post == "й" or post == "Й" then
			-- солове́й -> солов'-
			letter = "'"
			post = ""
		elseif (rfind(post, export.velar_c .. "$") and rfind(pre, export.cons_except_hushing_or_ts_c .. "$")) or
			(rfind(post, "[^йЙ" .. export.velar .. "]$") and rfind(pre, "[лЛ]$")) then
			-- FIXME, is this correct? This logic comes from ru-common.lua. The second clause that
			-- adds ь after л is needed but I'm not sure about the first one.
			letter = is_upper and "Ь" or "ь"
		else
			letter = ""
		end
	end
	return pre .. letter .. post
end


function export.dereduce(stem, epenthetic_stress)
	if epenthetic_stress then
		stem = export.remove_stress(stem)
	end
	-- We don't require there to be two consonants at the end because of ону́ка (gen pl ону́ок).
	local pre, letter, post = rmatch(stem, "^(.*)(.)(" .. export.cons_c .. ")$")
	if not pre then
		return nil
	end
	local is_upper = rfind(post, export.uppercase_c)
	local epvowel
	if rfind(letter, export.velar_c) or rfind(post, export.velar_c) or rfind(post, "[вВ]") then
		epvowel = is_upper and "О" or "о"
	elseif rfind(post, "['ьЬ]") then
		-- сім'я́ -> gen pl сіме́й
		-- ескадри́лья -> gen pl ескадри́лей
		epvowel = rfind(letter, export.uppercase_c) and "Е" or "е"
		post = ""
	elseif rfind(letter, "[йЙ]") then
		-- яйце́ -> gen pl я́єць
		epvowel = is_upper and "Є" or "є"
		letter = ""
	else
		if rfind(letter, "[ьЬ]") then
			-- кільце́ -> gen pl кі́лець
			letter = ""
		end
		epvowel = is_upper and "Е" or "е"
	end
	if epenthetic_stress then
		epvowel = epvowel .. AC
	end
	return pre .. letter .. epvowel .. post
end


function export.apply_vowel_alternation(ialt, stem)
	local modstem, origvowel
	if ialt == "io" then
		-- ріг, gen sg. ро́га; плід, gen sg. плода́/пло́ду; безкра́їсть gen sg. безкра́йості
		modstem = rsub(stem, "([іІїЇ])(́?" .. export.cons_c .. "*)$",
			function(vowel, post)
				origvowel = vowel
				if vowel == "і" then
					return "о" .. post
				elseif vowel == "І" then
					return "О" .. post
				elseif vowel == "ї" then
					return "йо" .. post
				else
					return "Йо" .. post
				end
			end
		)
		if modstem == stem then
			error("Indicator 'io' can't be applied because stem '" .. stem .. "' doesn't have an і as its last vowel")
		end
	elseif ialt == "ijo" then
		-- ко́лір, gen sg. ко́льору; вертолі́т, gen sg. вертольо́та
		modstem = rsub(stem, "і(́?" .. export.cons_c .. "*)$", "ьо%1")
		if modstem == stem then
			error("Indicator 'ijo' can't be applied because stem '" .. stem .. "' doesn't have an і as its last vowel")
		end
		origvowel = "і"
	elseif ialt == "ie" then
		modstem = rsub(stem, "([іїІЇ])(́?" .. export.cons_c .. "*)$",
			function(vowel, post)
				origvowel = vowel
				if vowel == "і" then
					-- ведмі́дь gen sg. ведме́дя
					return "е" .. post
				elseif vowel == "І" then
					return "Е" .. post
				elseif vowel == "ї" then
					-- Ки́їв gen sg. Ки́єва
					return "є" .. post
				else
					return "Є" .. post
				end
			end
		)
		if modstem == stem then
			error("Indicator 'ie' can't be applied because stem '" .. stem .. "' doesn't have an і or ї as its last vowel")
		end
	elseif ialt == "i" then
		modstem = rsub(stem, "ь?([оеОЕ])(́?" .. export.cons_c .. "*)$",
			function(vowel, post)
				origvowel = vowel
				if vowel == "о" or vowel == "е" then
					return "і" .. post
				else
					return "І" .. post
				end
			end
		)
		if modstem == stem then
			error("Indicator 'i' can't be applied because stem '" .. stem .. "' doesn't have an о or е as its last vowel")
		end
	else
		return stem, nil
	end
	return modstem, origvowel
end


-- Given a list of forms (each of which is a table of the form {form=FORM, footnotes=FOOTNOTES}),
-- concatenate into a SLOT=FORM,FORM,... string, replacing embedded | signs with <!>.
function export.concat_forms_in_slot(forms)
	if forms then
		local new_vals = {}
		for _, v in ipairs(forms) do
			table.insert(new_vals, rsub(v.form, "|", "<!>"))
		end
		return table.concat(new_vals, ",")
	else
		return nil
	end
end


function export.combine_stem_ending(stem, ending)
	if stem == "?" then
		return "?"
	elseif export.is_stressed(ending) then
		return export.remove_stress(stem) .. ending
	else
		return stem .. ending
	end
end


function export.generate_form(form, footnotes)
	if type(footnotes) == "string" then
		footnotes = {footnotes}
	end
	if footnotes then
		return {form = form, footnotes = footnotes}
	else
		return form
	end
end


function export.u_v_alternation_msg(frame)
	local params = {
		[1] = {}
	}
	local parargs = frame:getParent().args
	local args = require("Module:parameters").process(parargs, params)
	local alternant = args[1] or mw.title.getCurrentTitle().text
	local ualt, valt, ufirst
	if rfind(alternant, "^[вВ]") then
		valt = alternant
		ualt = rsub(export.add_monosyllabic_stress(valt), "^([вВ])", {["в"] = "у", ["В"] = "У"})
		ufirst = false
	else
		ualt = alternant
		valt = export.remove_monosyllabic_stress(rsub(ualt, "^([уУ])", {["у"] = "в", ["У"] = "В"}))
		ufirst = true
	end
	ualt = m_links.full_link({lang = lang, term = ualt}, "term") .. " (used after consonants or at the beginning of a clause)"
	valt = m_links.full_link({lang = lang, term = valt}, "term") .. " (used after vowels)"
	local first, second
	if ufirst then
		first, second = ualt, valt
	else
		first, second = valt, ualt
	end
	return "The forms " .. first .. " and " .. second .. " differ in pronunciation but are considered variants of the same word."
end

function export.i_j_alternation_msg(frame)
	local params = {
		[1] = {}
	}
	local parargs = frame:getParent().args
	local args = require("Module:parameters").process(parargs, params)
	local alternant = args[1] or mw.title.getCurrentTitle().text
	local ualt, valt, ufirst
	if rfind(alternant, "^[йЙ]") then
		valt = alternant
		ualt = rsub(export.add_monosyllabic_stress(valt), "^([йЙ])", {["й"] = "і", ["Й"] = "І"})
		ufirst = false
	else
		ualt = alternant
		valt = export.remove_monosyllabic_stress(rsub(ualt, "^([іІ])", {["і"] = "й", ["І"] = "Й"}))
		ufirst = true
	end
	ualt = m_links.full_link({lang = lang, term = ualt}, "term") .. " (used after consonants or at the beginning of a clause)"
	valt = m_links.full_link({lang = lang, term = valt}, "term") .. " (used after vowels)"
	local first, second
	if ufirst then
		first, second = ualt, valt
	else
		first, second = valt, ualt
	end
	return "The forms " .. first .. " and " .. second .. " differ in pronunciation but are considered variants of the same word."
end

return export