Pergi ke kandungan

Modul:languages: Perbezaan antara semakan

Daripada Wikikamus
Kandungan dihapus Kandungan ditambah
Tofeiku (bincang | sumb.)
ms
Hayate891 (bincang | sumb.)
Kemaskini
Teg-teg: Suntingan mudah alih Suntingan web mudah alih
Baris 1: Baris 1:
local export = {}
local export = {}


--[=[
--[=[ This function checks for things that could plausibly be a language code:
Throw an error for an invalid language code or script code.
two or three lowercase letters, two or three groups of three lowercase
letters with hyphens between them. If such a pattern is not found,
it is likely the editor simply forgot to enter a language code. ]=]


`lang_code` (required) is the bad code and can be nil or a non-string.
function export.err(langCode, param, text, template_tag, not_real_lang)

`param` (required) is the name of the parameter in which the code was contained. It can be a string, a number
(for a numeric param, in which case the param will show up in the error message as an ordinal such as
"first" or "second"), or `true` if no parameter can be clearly identified.

`code_desc` (optional) is text describing what the code is; by default, "language code".

`template_text` (optional) is a string specifying the template that generated the error, or a function
to generate this string. If given, it will be displayed in the error message.

`not_real_lang` (optional), if given, indicates that the code is not in the form of a language code
(e.g. it's a script code). Normally, this function checks for things that could plausibly be a language code:
two or three lowercase letters, two or three groups of three lowercase letters with hyphens between them.
If such a pattern is found, a different error message is displayed (indicating an invalid code) than otherwise
(indicating a missing code). If `not_real_lang` is given, this check is suppressed.
]=]

function export.err(lang_code, param, code_desc, template_tag, not_real_lang)
local ordinals = {
local ordinals = {
"pertama", "kedua", "ketiga", "keempat", "fifth", "sixth",
"pertama", "kedua", "ketiga", "keempat", "fifth", "sixth",
Baris 14: Baris 30:
}
}
text = text or "language code"
code_desc = code_desc or "language code"
if not template_tag then
if not template_tag then
Baris 24: Baris 40:
template_tag = " (Templat asal: " .. template_tag .. ")"
template_tag = " (Templat asal: " .. template_tag .. ")"
end
end
local paramType = type(param)
local function err(msg)
error(msg .. template_tag, 3)
if paramType == "number" then
end
ordinal = ordinals[param]
local param_type = type(param)
param = ordinal .. ' parameter'
local in_the_param
elseif paramType == "string" then
param = 'parameter "' .. param .. '"'
if param == true then
-- handled specially below
in_the_param = ""
else
else
if param_type == "number" then
error("The parameter name is "
param = ordinals[param] .. " parameter"
.. (paramType == "table" and "a table" or tostring(param))
elseif param_type == "string" then
.. ", but it should be a number or a string." .. template_tag, 2)
param = 'parameter "' .. param .. '"'
else
err("The parameter name is "
.. (param_type == "table" and "a table" or tostring(param))
.. ", but it should be a number or a string.")
end
in_the_param = " in the " .. param
end
end
if not lang_code or lang_code == "" then
if param == true then
err("The " .. code_desc .. " is missing.")
else
err("The " .. param .. " (" .. code_desc .. ") is missing.")
end
elseif type(lang_code) ~= "string" then
err("The " .. code_desc .. in_the_param .. " is supposed to be a string but is a " .. type(lang_code) .. ".")
-- Can use string.find because language codes only contain ASCII.
-- Can use string.find because language codes only contain ASCII.
elseif not_real_lang or lang_code:find("^%l%l%l?$")
if not langCode or langCode == "" then
or lang_code:find("^%l%l%l%-%l%l%l$")
error("The " .. param .. " (" .. text .. ") is missing." .. template_tag, 2)
elseif not_real_lang or langCode:find("^%l%l%l?$")
or lang_code:find("^%l%l%l%-%l%l%l%-%l%l%l$") then
err("The " .. code_desc .. " \"" .. lang_code .. "\"" .. in_the_param .. " is not valid. See [[Wiktionary:List of languages]].")
or langCode:find("^%l%l%l%-%l%l%l$")
or langCode:find("^%l%l%l%-%l%l%l%-%l%l%l$") then
error("The " .. text .. " \"" .. langCode .. "\" is not valid." .. template_tag, 2)
else
else
error("Please enter a " .. text .. " in the " .. param .. "." .. template_tag, 2)
err("Please specify a " .. code_desc .. in_the_param .. ". The value \"" .. lang_code .. "\" is not valid. See [[Wiktionary:List of languages]].")
end
end
end
end
Baris 76: Baris 107:
function Language:getCanonicalName()
function Language:getCanonicalName()
return self._rawData[1] or self._rawData.canonicalName
return self._rawData[1] or self._rawData.canonicalName
end


function Language:getDisplayForm()
return self:getCanonicalName()
end
end




function Language:getOtherNames(onlyOtherNames)
function Language:getOtherNames(onlyOtherNames)
self:loadInExtraData()
return require("Module:language-like").getOtherNames(self, onlyOtherNames)
return require("Module:language-like").getOtherNames(self, onlyOtherNames)
end
end
Baris 85: Baris 122:


function Language:getAliases()
function Language:getAliases()
self:loadInExtraData()
return self._rawData.aliases or {}
return self._extraData.aliases or {}
end
end




function Language:getVarieties(flatten)
function Language:getVarieties(flatten)
self:loadInExtraData()
return require("Module:language-like").getVarieties(self, flatten)
return require("Module:language-like").getVarieties(self, flatten)
end
end
Baris 132: Baris 171:


function Language:getWikidataItem()
function Language:getWikidataItem()
return self._rawData[2] or self._rawData.wikidata_item
local item = self._rawData[2]
if type(item) == "number" then
return "Q" .. item
else
return item
end
end
end


Baris 140: Baris 185:
self._scriptObjects = {}
self._scriptObjects = {}
for _, sc in ipairs(self._rawData.scripts or { "None" }) do
for _, sc in ipairs(self:getScriptCodes()) do
table.insert(self._scriptObjects, m_scripts.getByCode(sc))
table.insert(self._scriptObjects, m_scripts.getByCode(sc))
end
end
Baris 149: Baris 194:


function Language:getScriptCodes()
function Language:getScriptCodes()
return self._rawData.scripts or { "None" }
return self._rawData.scripts or self._rawData[4] or { "None" }
end
end


Baris 232: Baris 277:




function Language:getCategoryName()
function Language:getCategoryName(nocap)
local name = self:getCanonicalName()
local name = self:getCanonicalName()
-- If the name already has "language" in it, don't add it.
-- If the name already has "language" in it, don't add it.
if name:find("[Bb]ahasa$") then
if not name:find("[Bb]ahasa$") then
return name
name = "Bahasa " .. name
end
else
if not nocap then
return "Bahasa " .. name
name = mw.getContentLanguage():ucfirst(name)
end
end
return name
end
end




function Language:makeCategoryLink()
function Language:makeCategoryLink()
return "[[:Kategori:" .. self:getCategoryName() .. "|" .. self:getCanonicalName() .. "]]"
return "[[:Kategori:" .. self:getCategoryName() .. "|" .. self:getDisplayForm() .. "]]"
end
end


Baris 271: Baris 318:
if type(self._rawData.entry_name) == "table" then
if type(self._rawData.entry_name) == "table" then
text = do_entry_name_or_sort_key_replacements(text, self._rawData.entry_name)
text = do_entry_name_or_sort_key_replacements(text, self._rawData.entry_name)
end
return text
end


-- Return true if the language has display processing enabled, i.e. lang:makeDisplayText()
-- does non-trivial processing.
function Language:hasDisplayProcessing()
return not not self._rawData.display
end


-- Apply display-text replacements to `text`, if any.
function Language:makeDisplayText(text)
if type(self._rawData.display) == "table" then
text = do_entry_name_or_sort_key_replacements(text, self._rawData.display)
end
end
Baris 366: Baris 430:
ancestors = self._rawData.ancestors,
ancestors = self._rawData.ancestors,
canonicalName = self:getCanonicalName(),
canonicalName = self:getCanonicalName(),
categoryName = self:getCategoryName(),
categoryName = self:getCategoryName("nocap"),
code = self._code,
code = self._code,
entryNamePatterns = entryNamePatterns,
entryNamePatterns = entryNamePatterns,
Baris 374: Baris 438:
aliases = self:getAliases(),
aliases = self:getAliases(),
varieties = self:getVarieties(),
varieties = self:getVarieties(),
scripts = self._rawData.scripts,
scripts = self._rawData.scripts or self._rawData[4],
type = self:getType(),
type = self:getType(),
wikimediaLanguages = self._rawData.wikimedia_codes,
wikimediaLanguages = self._rawData.wikimedia_codes,
Baris 384: Baris 448:




-- Do NOT use this method!
-- Do NOT use these methods!
-- All uses should be pre-approved on the talk page!
-- All uses should be pre-approved on the talk page!
function Language:getRawData()
function Language:getRawData()
return self._rawData
return self._rawData
end

function Language:getRawExtraData()
self:loadInExtraData()
return self._extraData
end
end


Baris 401: Baris 470:
elseif code:find("^[%l-]+$") then
elseif code:find("^[%l-]+$") then
return "languages/datax"
return "languages/datax"
else
return nil
end
end


function export.getExtraDataModuleName(code)
if code:find("^%l%l$") then
return "languages/extradata2"
elseif code:find("^%l%l%l$") then
local prefix = code:sub(1, 1)
return "languages/extradata3/" .. prefix
elseif code:find("^[%l-]+$") then
return "languages/extradatax"
else
else
return nil
return nil
Baris 410: Baris 493:
local modulename = export.getDataModuleName(code)
local modulename = export.getDataModuleName(code)
return modulename and mw.loadData("Module:" .. modulename)[code] or nil
return modulename and mw.loadData("Module:" .. modulename)[code] or nil
end


local function getRawExtraLanguageData(code)
local modulename = export.getExtraDataModuleName(code)
return modulename and mw.loadData("Module:" .. modulename)[code] or nil
end


function Language:loadInExtraData()
if not self._extraData then
-- load extra data from module and assign to meta table
-- use empty table as a fallback if extra data is nil
local meta = getmetatable(self)
meta._extraData = getRawExtraLanguageData(self._code) or {}
setmetatable(self, meta)
end
end
end


Baris 448: Baris 548:
codetext = "language code"
codetext = "language code"
end
end
if paramForError == true then
export.err(code, paramForError, codetext)
error("The " .. codetext .. " \"" .. code .. "\" is not valid.")
else
export.err(code, paramForError, codetext)
end
end
end
return retval
return retval
Baris 464: Baris 560:
if not code then
if not code then
if errorIfInvalid then
if errorIfInvalid then
error("The language name \"" .. name .. "\" is not valid.")
error("The language name \"" .. name .. "\" is not valid. See [[Wiktionary:List of languages]].")
else
else
return nil
return nil
Baris 479: Baris 575:
local retval = code and export.makeObject(code, getRawLanguageData(code)) or nil
local retval = code and export.makeObject(code, getRawLanguageData(code)) or nil
if not retval and allowEtymLang then
if not retval and allowEtymLang then
retval = require("Module:etymology languages").getByCanonicalName(code)
retval = require("Module:etymology languages").getByCanonicalName(name)
end
end
if not retval and allowFamily then
if not retval and allowFamily then
local famname = name:match("^(.*) languages$")
retval = require("Module:families").getByCanonicalName(code)
famname = famname or name
retval = require("Module:families").getByCanonicalName(famname)
end
end
if not retval and errorIfInvalid then
if not retval and errorIfInvalid then

Semakan pada 15:38, 12 Disember 2021

local export = {}

--[=[
Throw an error for an invalid language code or script code.

`lang_code` (required) is the bad code and can be nil or a non-string.

`param` (required) is the name of the parameter in which the code was contained. It can be a string, a number
	(for a numeric param, in which case the param will show up in the error message as an ordinal such as
	"first" or "second"), or `true` if no parameter can be clearly identified.

`code_desc` (optional) is text describing what the code is; by default, "language code".

`template_text` (optional) is a string specifying the template that generated the error, or a function
	to generate this string. If given, it will be displayed in the error message.

`not_real_lang` (optional), if given, indicates that the code is not in the form of a language code
	(e.g. it's a script code). Normally, this function checks for things that could plausibly be a language code:
	two or three lowercase letters, two or three groups of three lowercase letters with hyphens between them.
	If such a pattern is found, a different error message is displayed (indicating an invalid code) than otherwise
	(indicating a missing code). If `not_real_lang` is given, this check is suppressed.
]=]

function export.err(lang_code, param, code_desc, template_tag, not_real_lang)
	local ordinals = {
		"pertama", "kedua", "ketiga", "keempat", "fifth", "sixth",
		"seventh", "eighth", "ninth", "tenth", "eleventh", "twelfth",
		"thirteenth", "fourteenth", "fifteenth", "sixteenth", "seventeenth",
		"eighteenth", "nineteenth", "twentieth"
	}
	
	code_desc = code_desc or "language code"
	
	if not template_tag then
		template_tag = ""
	else
		if type(template_tag) ~= "string" then
			template_tag = template_tag()
		end
		template_tag = " (Templat asal: " .. template_tag .. ")"
	end
	local function err(msg)
		error(msg .. template_tag, 3)
	end
	local param_type = type(param)
	local in_the_param
	if param == true then
		-- handled specially below
		in_the_param = ""
	else
		if param_type == "number" then
			param = ordinals[param] .. " parameter"
		elseif param_type == "string" then
			param = 'parameter "' .. param .. '"'
		else
			err("The parameter name is "
					.. (param_type == "table" and "a table" or tostring(param))
					.. ", but it should be a number or a string.")
		end
		in_the_param = " in the " .. param
	end
	
	if not lang_code or lang_code == "" then
		if param == true then
			err("The " .. code_desc .. " is missing.")
		else
			err("The " .. param .. " (" .. code_desc .. ") is missing.")
		end
	elseif type(lang_code) ~= "string" then
		err("The " .. code_desc .. in_the_param .. " is supposed to be a string but is a " .. type(lang_code) .. ".")
	-- Can use string.find because language codes only contain ASCII.
	elseif not_real_lang or lang_code:find("^%l%l%l?$")
			or lang_code:find("^%l%l%l%-%l%l%l$")
			or lang_code:find("^%l%l%l%-%l%l%l%-%l%l%l$") then
		err("The " .. code_desc .. " \"" .. lang_code .. "\"" .. in_the_param .. " is not valid. See [[Wiktionary:List of languages]].")
	else
		err("Please specify a " .. code_desc .. in_the_param .. ". The value \"" .. lang_code .. "\" is not valid. See [[Wiktionary:List of languages]].")
	end
end

local function do_entry_name_or_sort_key_replacements(text, replacements)
	if replacements.from then
		for i, from in ipairs(replacements.from) do
			local to = replacements.to[i] or ""
			text = mw.ustring.gsub(text, from, to)
		end
	end
	
	if replacements.remove_diacritics then
		text = mw.ustring.toNFD(text)
		text = mw.ustring.gsub(text,
			'[' .. replacements.remove_diacritics .. ']',
			'')
		text = mw.ustring.toNFC(text)
	end
	
	return text
end

local Language = {}

function Language:getCode()
	return self._code
end


function Language:getCanonicalName()
	return self._rawData[1] or self._rawData.canonicalName
end


function Language:getDisplayForm()
	return self:getCanonicalName()
end


function Language:getOtherNames(onlyOtherNames)
	self:loadInExtraData()
	return require("Module:language-like").getOtherNames(self, onlyOtherNames)
end


function Language:getAliases()
	self:loadInExtraData()
	return self._extraData.aliases or {}
end


function Language:getVarieties(flatten)
	self:loadInExtraData()
	return require("Module:language-like").getVarieties(self, flatten)
end


function Language:getType()
	return self._rawData.type or "regular"
end


function Language:getWikimediaLanguages()
	if not self._wikimediaLanguageObjects then
		local m_wikimedia_languages = require("Module:wikimedia languages")
		self._wikimediaLanguageObjects = {}
		local wikimedia_codes = self._rawData.wikimedia_codes or { self._code }
		
		for _, wlangcode in ipairs(wikimedia_codes) do
			table.insert(self._wikimediaLanguageObjects, m_wikimedia_languages.getByCode(wlangcode))
		end
	end
	
	return self._wikimediaLanguageObjects
end

function Language:getWikipediaArticle()
	if self._rawData.wikipedia_article then
		return self._rawData.wikipedia_article 
	elseif self._wikipedia_article then
		return self._wikipedia_article
	elseif self:getWikidataItem() and mw.wikibase then
		self._wikipedia_article = mw.wikibase.sitelink(self:getWikidataItem(), 'mswiki')
	end
	if not self._wikipedia_article then
		self._wikipedia_article = mw.ustring.gsub(self:getCategoryName(), "bahasa Kreol", "Kreol")
	end
	return self._wikipedia_article
end

function Language:makeWikipediaLink()
	return "bahasa [[w:" .. self:getWikipediaArticle() .. "|" .. self:getCanonicalName() .. "]]"
end

function Language:getWikidataItem()
	local item = self._rawData[2]
	
	if type(item) == "number" then
		return "Q" .. item
	else
		return item
	end
end

function Language:getScripts()
	if not self._scriptObjects then
		local m_scripts = require("Module:scripts")
		self._scriptObjects = {}
		
		for _, sc in ipairs(self:getScriptCodes()) do
			table.insert(self._scriptObjects, m_scripts.getByCode(sc))
		end
	end
	
	return self._scriptObjects
end

function Language:getScriptCodes()
	return self._rawData.scripts or self._rawData[4] or { "None" }
end

function Language:getFamily()
	if self._familyObject then
		return self._familyObject
	end
		
	local family = self._rawData[3] or self._rawData.family 
	if family then
		self._familyObject = require("Module:families").getByCode(family)
	end
	
	return self._familyObject
end


function Language:getAncestors()
	if not self._ancestorObjects then
		self._ancestorObjects = {}
		
		if self._rawData.ancestors then
			for _, ancestor in ipairs(self._rawData.ancestors) do
				table.insert(self._ancestorObjects, export.getByCode(ancestor) or require("Module:etymology languages").getByCode(ancestor))
			end
		else
			local fam = self:getFamily()
			local protoLang = fam and fam:getProtoLanguage() or nil
			
			-- For the case where the current language is the proto-language
			-- of its family, we need to step up a level higher right from the start.
			if protoLang and protoLang:getCode() == self:getCode() then
				fam = fam:getFamily()
				protoLang = fam and fam:getProtoLanguage() or nil
			end
			
			while not protoLang and not (not fam or fam:getCode() == "qfa-not") do
				fam = fam:getFamily()
				protoLang = fam and fam:getProtoLanguage() or nil
			end
			
			table.insert(self._ancestorObjects, protoLang)
		end
	end
	
	return self._ancestorObjects
end

local function iterateOverAncestorTree(node, func)
	for _, ancestor in ipairs(node:getAncestors()) do
		if ancestor then
			local ret = func(ancestor) or iterateOverAncestorTree(ancestor, func)
			if ret then
				return ret
			end
		end
	end
end

function Language:getAncestorChain()
	if not self._ancestorChain then
		self._ancestorChain = {}
		local step = #self:getAncestors() == 1 and self:getAncestors()[1] or nil
		
		while step do
			table.insert(self._ancestorChain, 1, step)
			step = #step:getAncestors() == 1 and step:getAncestors()[1] or nil
		end
	end
	
	return self._ancestorChain
end


function Language:hasAncestor(otherlang)
	local function compare(ancestor)
		return ancestor:getCode() == otherlang:getCode()
	end
	
	return iterateOverAncestorTree(self, compare) or false
end


function Language:getCategoryName(nocap)
	local name = self:getCanonicalName()
	
	-- If the name already has "language" in it, don't add it.
	if not name:find("[Bb]ahasa$") then
		name = "Bahasa " .. name
	end
	if not nocap then
		name = mw.getContentLanguage():ucfirst(name)
	end
	return name
end


function Language:makeCategoryLink()
	return "[[:Kategori:" .. self:getCategoryName() .. "|" .. self:getDisplayForm() .. "]]"
end


function Language:getStandardCharacters()
	return self._rawData.standardChars
end


function Language:makeEntryName(text)
	text = mw.ustring.match(text, "^[¿¡]?(.-[^%s%p].-)%s*[؟?!;՛՜ ՞ ՟?!︖︕।॥။၊་།]?$") or text
	
	if self:getCode() == "ar" then
		local U = mw.ustring.char
		local taTwiil = U(0x640)
		local waSla = U(0x671)
		-- diacritics ordinarily removed by entry_name replacements
		local Arabic_diacritics = U(0x64B, 0x64C, 0x64D, 0x64E, 0x64F, 0x650, 0x651, 0x652, 0x670)
		
		if text == waSla or mw.ustring.find(text, "^" .. taTwiil .. "?[" .. Arabic_diacritics .. "]" .. "$") then
			return text
		end
	end
	
	if type(self._rawData.entry_name) == "table" then
		text = do_entry_name_or_sort_key_replacements(text, self._rawData.entry_name)
	end
	
	return text
end


-- Return true if the language has display processing enabled, i.e. lang:makeDisplayText()
-- does non-trivial processing.
function Language:hasDisplayProcessing()
	return not not self._rawData.display
end


-- Apply display-text replacements to `text`, if any.
function Language:makeDisplayText(text)
	if type(self._rawData.display) == "table" then
		text = do_entry_name_or_sort_key_replacements(text, self._rawData.display)
	end
	
	return text
end


-- Add to data tables?
local has_dotted_undotted_i = {
	["az"] = true,
	["crh"] = true,
	["gag"] = true,
	["kaa"] = true,
	["tt"] = true,
	["tr"] = true,
	["zza"] = true,
}

function Language:makeSortKey(name, sc)
	if has_dotted_undotted_i[self:getCode()] then
		name = name:gsub("I", "ı")
	end
	
	name = mw.ustring.lower(name)
	
	-- Remove initial hyphens and *
	local hyphens_regex = "^[-־ـ*]+(.)"
	name = mw.ustring.gsub(name, hyphens_regex, "%1")
	
	-- If there are language-specific rules to generate the key, use those
	if type(self._rawData.sort_key) == "table" then
		name = do_entry_name_or_sort_key_replacements(name, self._rawData.sort_key)
	elseif type(self._rawData.sort_key) == "string" then
		name = require("Module:" .. self._rawData.sort_key).makeSortKey(name, self:getCode(), sc and sc:getCode())
	end
	
	-- Remove parentheses, as long as they are either preceded or followed by something
	name = mw.ustring.gsub(name, "(.)[()]+", "%1")
	name = mw.ustring.gsub(name, "[()]+(.)", "%1")
	
	if has_dotted_undotted_i[self:getCode()] then
		name = name:gsub("i", "İ")
	end
	
	return mw.ustring.upper(name)
end

function Language:overrideManualTranslit()
	if self._rawData.override_translit then
		return true
	else
		return false
	end
end


function Language:transliterate(text, sc, module_override)
	if not ((module_override or self._rawData.translit_module) and text) then
		return nil
	end
	
	if module_override then
		require("Module:debug").track("module_override")
	end
	
	return require("Module:" .. (module_override or self._rawData.translit_module)).tr(text, self:getCode(), sc and sc:getCode() or nil)
end

function Language:hasTranslit()
	return self._rawData.translit_module and true or false
end


function Language:link_tr()
	return self._rawData.link_tr and true or false
end


function Language:toJSON()
	local entryNamePatterns = nil
	local entryNameRemoveDiacritics = nil
	
	if self._rawData.entry_name then
		entryNameRemoveDiacritics = self._rawData.entry_name.remove_diacritics
		if self._rawData.entry_name.from then
			entryNamePatterns = {}
			for i, from in ipairs(self._rawData.entry_name.from) do
				local to = self._rawData.entry_name.to[i] or ""
				table.insert(entryNamePatterns, { from = from, to = to })
			end
		end
	end
	
	local ret = {
		ancestors = self._rawData.ancestors,
		canonicalName = self:getCanonicalName(),
		categoryName = self:getCategoryName("nocap"),
		code = self._code,
		entryNamePatterns = entryNamePatterns,
		entryNameRemoveDiacritics = entryNameRemoveDiacritics,
		family = self._rawData[3] or self._rawData.family,
		otherNames = self:getOtherNames(true),
		aliases = self:getAliases(),
		varieties = self:getVarieties(),
		scripts = self._rawData.scripts or self._rawData[4],
		type = self:getType(),
		wikimediaLanguages = self._rawData.wikimedia_codes,
		wikidataItem = self:getWikidataItem(),
	}
	
	return require("Module:JSON").toJSON(ret)
end


-- Do NOT use these methods!
-- All uses should be pre-approved on the talk page!
function Language:getRawData()
	return self._rawData
end

function Language:getRawExtraData()
	self:loadInExtraData()
	return self._extraData
end

Language.__index = Language


function export.getDataModuleName(code)
	if code:find("^%l%l$") then
		return "languages/data2"
	elseif code:find("^%l%l%l$") then
		local prefix = code:sub(1, 1)
		return "languages/data3/" .. prefix
	elseif code:find("^[%l-]+$") then
		return "languages/datax"
	else
		return nil
	end
end


function export.getExtraDataModuleName(code)
	if code:find("^%l%l$") then
		return "languages/extradata2"
	elseif code:find("^%l%l%l$") then
		local prefix = code:sub(1, 1)
		return "languages/extradata3/" .. prefix
	elseif code:find("^[%l-]+$") then
		return "languages/extradatax"
	else
		return nil
	end
end


local function getRawLanguageData(code)
	local modulename = export.getDataModuleName(code)
	return modulename and mw.loadData("Module:" .. modulename)[code] or nil
end


local function getRawExtraLanguageData(code)
	local modulename = export.getExtraDataModuleName(code)
	return modulename and mw.loadData("Module:" .. modulename)[code] or nil
end


function Language:loadInExtraData()
	if not self._extraData then
		-- load extra data from module and assign to meta table
		-- use empty table as a fallback if extra data is nil
		local meta = getmetatable(self)
		meta._extraData = getRawExtraLanguageData(self._code) or {}
		setmetatable(self, meta)
	end
end


function export.makeObject(code, data)
	if data and data.deprecated then
		require("Module:debug").track {
			"languages/deprecated",
			"languages/deprecated/" .. code
		}
	end
	
	return data and setmetatable({ _rawData = data, _code = code }, Language) or nil
end


function export.getByCode(code, paramForError, allowEtymLang, allowFamily)
	if type(code) ~= "string" then
		error("The function getByCode expects a string as its first argument, but received " .. (code == nil and "nil" or "a " .. type(code)) .. ".")
	end
	
	local retval = export.makeObject(code, getRawLanguageData(code))
	if not retval and allowEtymLang then
		retval = require("Module:etymology languages").getByCode(code)
	end
	if not retval and allowFamily then
		retval = require("Module:families").getByCode(code)
	end
	if not retval and paramForError then
		local codetext = nil
		if allowEtymLang and allowFamily then
			codetext = "language, etymology language or family code"
		elseif allowEtymLang then
			codetext = "language or etymology language code"
		elseif allowFamily then
			codetext = "language or family code"
		else
			codetext = "language code"
		end
		export.err(code, paramForError, codetext)
	end
	return retval
end


function export.getByName(name, errorIfInvalid)
	local byName = mw.loadData("Module:languages/by name")
	local code = byName.all and byName.all[name] or byName[name]
	
	if not code then
		if errorIfInvalid then
			error("The language name \"" .. name .. "\" is not valid. See [[Wiktionary:List of languages]].")
		else
			return nil
		end
	end
	
	return export.makeObject(code, getRawLanguageData(code))
end

function export.getByCanonicalName(name, errorIfInvalid, allowEtymLang, allowFamily)
	local byName = mw.loadData("Module:languages/canonical names")
	local code = byName and byName[name]

	local retval = code and export.makeObject(code, getRawLanguageData(code)) or nil
	if not retval and allowEtymLang then
		retval = require("Module:etymology languages").getByCanonicalName(name)
	end
	if not retval and allowFamily then
		local famname = name:match("^(.*) languages$")
		famname = famname or name
		retval = require("Module:families").getByCanonicalName(famname)
	end
	if not retval and errorIfInvalid then
		local text
		if allowEtymLang and allowFamily then
			text = "language, etymology language or family name"
		elseif allowEtymLang then
			text = "language or etymology language name"
		elseif allowFamily then
			text = "language or family name"
		else
			text = "language name"
		end
		error("The " .. text .. " \"" .. name .. "\" is not valid.")
	end
	return retval
end

function export.iterateAll()
	mw.incrementExpensiveFunctionCount()
	local m_data = mw.loadData("Module:languages/alldata")
	local func, t, var = pairs(m_data)
	
	return function()
		local code, data = func(t, var)
		return export.makeObject(code, data)
	end
end

--[[	If language is an etymology language, iterates through parent languages
		until it finds a non-etymology language. ]]
function export.getNonEtymological(lang)
	while lang:getType() == "etymology language" do
		local parentCode = lang:getParentCode()
		lang = export.getByCode(parentCode)
			or require("Module:etymology languages").getByCode(parentCode)
			or require("Module:families").getByCode(parentCode)
	end
	
	return lang
end

return export