Module:grc-utilities/data

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local data = {}

local U = require("Module:string/char")
local macron = U(0x304)
local spacing_macron = U(0xAF)
local modifier_macron = U(0x2C9)
local breve = U(0x306)
local spacing_breve = U(0x2D8)
local rough = U(0x314)
local smooth = U(0x313)
local diaeresis = U(0x308)
local acute = U(0x301)
local grave = U(0x300)
local circum = U(0x342)
local Latin_circum = U(0x302)
local coronis = U(0x343)
local subscript = U(0x345)
local undertie = U(0x35C) -- actually "combining double breve below"

data["diacritics"] = {
	["macron"] = macron,
	["spacing_macron"] = spacing_macron,
	["modifier_macron"] = modifier_macron,
	["breve"] = breve,
	["spacing_breve"] = spacing_breve,
	["rough"] = rough,
	["smooth"] = smooth,
	["diaeresis"] = diaeresis,
	["acute"] = acute,
	["grave"] = grave,
	["circum"] = circum,
	["Latin_circum"] = Latin_circum,
	["coronis"] = coronis,
	["subscript"] = subscript,
}

data.diacritics.all = ""
for name, diacritic in pairs(data.diacritics) do
	data.diacritics.all = data.diacritics.all .. diacritic
end

data["named"] = data["diacritics"]

data["diacritic"] = "[" .. data.diacritics.all .. "]"
data["all"] = data["diacritic"]

data["diacritic_groups"] = {
	[1] = "[".. macron .. breve .."]",
	[2] = "[".. diaeresis .. smooth .. rough .."]",
	[3] = "[".. acute .. grave .. circum .. "]",
	[4] = subscript,
}
data["groups"] = data["diacritic_groups"]
data["diacritic_groups"]["accents"] = data["groups"][3]

data["diacritic_order"] = {
	[macron] = 1,
	[breve] = 1,
	[rough] = 2,
	[smooth] = 2,
	[diaeresis] = 2,
	[acute] = 3,
	[grave] = 3,
	[circum] = 3,
	[subscript] = 4,
}

data["diacritical_conversions"] = {
	-- Convert spacing to combining diacritics
	[spacing_macron] = macron, -- macron
	[modifier_macron] = macron,
	[spacing_breve] = breve, -- breve
	["῾"] = rough, -- rough breathing, modifier letter reversed comma
	["ʽ"] = rough,
	["᾿"] = smooth, -- smooth breathing, modifier letter apostrophe, coronis, combining coronis
	["ʼ"] = smooth,
	[coronis] = smooth,
	["´"] = acute, -- acute
	["`"] = grave, -- grave
	["῀"] = circum, -- Greek circumflex (perispomeni), circumflex, combining circumflex
	["ˆ"] = circum,
	[Latin_circum] = circum,
	["῎"] = smooth ..  acute, -- smooth and acute
	["῍"] = smooth ..  grave, -- smooth and grave
	["῏"] = smooth ..  circum, -- smooth and circumflex
	["῞"] = rough ..  acute, -- rough and acute
	["῝"] = rough ..  grave, -- rough and grave
	["῟"] = rough ..  circum, -- rough and circumflex
	["¨"] = diaeresis,
	["΅"] = diaeresis ..  acute,
	["῭"] = diaeresis ..  grave,
	["῁"] = diaeresis ..  circum,
}
data["conversions"] = data["diacritical_conversions"]

data["consonants"] = "ΒβΓγΔδΖζΘθΚκΛλΜμΝνΞξΠπΡρΣσςΤτΦφΧχΨψ"
data["consonant"] = "[" .. data.consonants .. "]"
data["vowels"] = "ΑαΕεΗηΙιΟοΥυΩω"
data["vowel"] = "[" .. data.vowels .. "]"
data["combining_diacritics"] = table.concat{
	macron, breve,
	rough, smooth, diaeresis,
	acute, grave, circum,
	subscript
}
data["combining_diacritic"] = "[" .. data.combining_diacritics .. "]"

-- Basic letters with and without diacritics
local letters_with_diacritics = 'ΆΈ-ώϜϝἀ-ᾼῂ-ῌῐ-' ..
	-- capital iota with oxia, normalized to capital iota with tonos if entered
	-- literally in a string
	U(0x1FDB) ..
	'Ὶῠ-Ῥῲ-ῼ'
data.word_characters = letters_with_diacritics .. data.combining_diacritics ..
	undertie
data.word_character = "[" .. data.word_characters .. "]"

return data