Jump to content

Module:category tree/poscatboiler/data/languages: difference between revisions

From Wiktionary, the free dictionary
Content deleted Content added
getRawData changed to getData.
Use the new "extra" parameter in lang:getData() to avoid the need to explicitly call lang:loadInExtraData().
 
Line 174: Line 174:
table.insert(ret, '<table class="wikitable language-category-info"')
table.insert(ret, '<table class="wikitable language-category-info"')
local raw_data = lang:getData("extra")
lang:loadInExtraData()
local raw_data = lang:getData()
if raw_data then
if raw_data then
local replacements = {
local replacements = {

Latest revision as of 22:47, 6 December 2024


This data submodule defines part of Wiktionary's category structure.

For an introduction to the poscatboiler system and a description of how to add or modify categories, see Module:category tree/poscatboiler/data/documentation.


local new_title = mw.title.new
local ucfirst = require("Module:string utilities").ucfirst

local raw_categories = {}
local raw_handlers = {}

local m_languages = require("Module:languages")
local m_sc_getByCode = require("Module:scripts").getByCode
local m_table = require("Module:table")

local to_json = require("Module:JSON").toJSON

local Hang = m_sc_getByCode("Hang")
local Hani = m_sc_getByCode("Hani")
local Hira = m_sc_getByCode("Hira")
local Hrkt = m_sc_getByCode("Hrkt")
local Kana = m_sc_getByCode("Kana")

local function track(page)
	-- [[Special:WhatLinksHere/Wiktionary:Tracking/poscatboiler/languages/PAGE]]
	return require("Module:debug/track")("poscatboiler/languages/" .. page)
end

-- This handles language categories of the form e.g. [[:Category:French language]] and
-- [[:Category:British Sign Language]]; categories like [[:Category:Languages of Indonesia]]; categories like
-- [[:Category:English-based creole or pidgin languages]]; and categories like
-- [[:Category:English-based constructed languages]].


-----------------------------------------------------------------------------
--                                                                         --
--                              RAW CATEGORIES                             --
--                                                                         --
-----------------------------------------------------------------------------


raw_categories["All languages"] = {
	topright = "{{commonscat|Languages}}\n[[File:Languages world map-transparent background.svg|thumb|right|250px|Rough world map of language families]]",
	description = "This category contains the categories for every language on Wiktionary.",
	additional = "Not all languages that Wiktionary recognises may have a category here yet. There are many that have " ..
	"not yet received any attention from editors, mainly because not all Wiktionary users know about every single " ..
	"language. See [[Wiktionary:List of languages]] for a full list.",
	parents = {
		"Fundamental",
	},
}

raw_categories["All extinct languages"] = {
	description = "This category contains the categories for every [[extinct language]] on Wiktionary.",
	additional = "Do not confuse this category with [[:Category:Extinct languages]], which is an umbrella category for the names of extinct languages in specific other languages (e.g. {{m+|de|Langobardisch}} for the ancient [[Lombardic]] language).",
	parents = {
		"All languages",
	},
}

raw_categories["Languages by country"] = {
	topright = "{{commonscat|Languages by continent}}",
	description = "Categories that group languages by country.",
	additional = "{{{umbrella_meta_msg}}}",
	parents = {
		"All languages",
	},
}

raw_categories["Language isolates"] = {
	topright = "{{wikipedia|Language isolate}}\n{{commonscat|Language isolates}}",
	description = "Languages with no known relatives.",
	parents = {
		{name = "Languages by family", sort = "*Isolates"},
		{name = "All language families", sort = "Isolates"},
	},
}


-----------------------------------------------------------------------------
--                                                                         --
--                                RAW HANDLERS                             --
--                                                                         --
-----------------------------------------------------------------------------


local function linkbox(lang, setwiki, setwikt, setsister, entryname)
	local wiktionarylinks = {}
	
	local canonicalName = lang:getCanonicalName()
	local wikimediaLanguages = lang:getWikimediaLanguages()
	local wikipediaArticle = setwiki or lang:getWikipediaArticle()
	setsister = setsister and ucfirst(setsister) or nil
	
	if setwikt then
		track("setwikt")
		if setwikt == "-" then
			track("setwikt/hyphen")
		end
	end
	
	if setwikt ~= "-" and wikimediaLanguages and wikimediaLanguages[1] then
		for _, wikimedialang in ipairs(wikimediaLanguages) do
			local check = new_title(wikimedialang:getCode() .. ":")
			if check and check.isExternal then
				table.insert(wiktionarylinks,
					(wikimedialang:getCanonicalName() ~= canonicalName and "(''" .. wikimedialang:getCanonicalName() .. "'') " or "") ..
					"'''[[:" .. wikimedialang:getCode() .. ":|" .. wikimedialang:getCode() .. ".wiktionary.org]]'''")
			end
		end
		
		wiktionarylinks = table.concat(wiktionarylinks, "<br/>")
	end
	
	local wikt_plural = wikimediaLanguages[2] and "s" or ""
	
	if #wiktionarylinks == 0 then
		wiktionarylinks = "''None.''"
	end
	
	if setsister then
		track("setsister")
		if setsister == "-" then
			track("setsister/hyphen")
		else
			setsister = "Category:" .. setsister
		end
	else
		setsister = lang:getCommonsCategory() or "-"
	end
	
	return table.concat{
[=[<div class="wikitable" style="float: right; clear: right; margin: 0 0 0.5em 1em; width: 300px; padding: 5px;">
<div style="text-align: center; margin-bottom: 10px; margin-top: 5px">''']=], canonicalName, [=[ language links'''</div>

{| style="font-size: 90%"
|-
| style="vertical-align: top; height: 35px; border-bottom: 1px solid lightgray;" | [[File:Wikipedia-logo.png|35px|none|Wikipedia]]
| style="border-bottom: 1px solid lightgray;" | '''English Wikipedia''' has an article on:
<div style="padding: 5px 10px">]=], (setwiki == "-" and "''None.''" or "'''[[w:" .. wikipediaArticle .. "|" .. wikipediaArticle .. "]]'''"), [=[</div>

|-
| style="vertical-align: top; height: 35px; border-bottom: 1px solid lightgray;" | [[File:Wikimedia-logo.svg|35px|none|Wikimedia Commons]]
| style="border-bottom: 1px solid lightgray;" | '''Wikimedia Commons''' has links to ]=], canonicalName, [=[-related content in sister projects:
<div style="padding: 5px 10px">]=], (setsister == "-" and "''None.''" or "'''[[commons:" .. setsister .. "|" .. setsister .. "]]'''"), [=[</div>

|-
| style="vertical-align: top; height: 35px; width: 40px; border-bottom: 1px solid lightgray;" | [[File:Wiktionary-logo-v2.svg|35px|none|Wiktionary]]
|style="border-bottom: 1px solid lightgray;" | '''Wiktionary edition''']=], wikt_plural, [=[ written in ]=], canonicalName, [=[:
<div style="padding: 5px 10px">]=], wiktionarylinks, [=[</div>

|-
| style="vertical-align: top; height: 35px; border-bottom: 1px solid lightgray;" | [[File:Open book nae 02.svg|35px|none|Entry]]
| style="border-bottom: 1px solid lightgray;" | '''Wiktionary entry''' for the language's English name:
<div style="padding: 5px 10px">''']=], require("Module:links").full_link({lang = m_languages.getByCode("en"), term = entryname or canonicalName}), [=['''</div>

|-
| style="vertical-align: top; height: 35px;" | [[File:Crystal kfind.png|35px|none|Considerations]]
|| '''Wiktionary resources''' for editors contributing to ]=], canonicalName, [=[ entries:
<div style="padding: 5px 0">
* '''[[Wiktionary:About ]=], canonicalName, [=[]]'''
* '''[[:Category:]=], canonicalName, [=[ reference templates|Reference templates]] ({{PAGESINCAT:]=], canonicalName, [=[ reference templates}})'''
* '''[[Appendix:]=], canonicalName, [=[ bibliography|Bibliography]]'''
|}
</div>]=]
}
end

local function edit_link(title, text)
	return '<span class="plainlinks">['
		.. tostring(mw.uri.fullUrl(title, { action = "edit" }))
		.. ' ' .. text .. ']</span>'
end

-- Should perhaps use wiki syntax.
local function infobox(lang)
	local ret = {}
	
	table.insert(ret, '<table class="wikitable language-category-info"')
	
	local raw_data = lang:getData("extra")
	if raw_data then
		local replacements = {
			[1] = "canonical-name",
			[2] = "wikidata-item",
			[3] = "family",
			[4] = "scripts",
		}
		local function replacer(letter1, letter2)
			return letter1:lower() .. "-" .. letter2:lower()
		end
		-- For each key in the language data modules, returns a descriptive
		-- kebab-case version (containing ASCII lowercase words separated
		-- by hyphens).
		local function kebab_case(key)
			key = replacements[key] or key
			key = key:gsub("(%l)(%u)", replacer):gsub("(%l)_(%l)", replacer)
			return key
		end
		local compress = {compress = true}
		local function html_attribute_encode(str)
			str = to_json(str, compress)
				:gsub('"', "&quot;")
				-- & in attributes is automatically escaped.
				-- :gsub("&", "&amp;")
				:gsub("<", "&lt;")
				:gsub(">", "&gt;")
			return str
		end
		table.insert(ret, ' data-code="' .. lang:getCode() .. '"')
		for k, v in m_table.sortedPairs(raw_data) do
			table.insert(ret, " data-" .. kebab_case(k)
			.. '="'
			.. html_attribute_encode(v)
			.. '"')
		end
	end
	table.insert(ret, '>\n')
	table.insert(ret, '<tr class="language-category-data">\n<th colspan="2">'
		.. edit_link("Module:" .. m_languages.getDataModuleName(lang:getCode()),
			"Edit language data")
		.. "</th>\n</tr>\n")
	table.insert(ret, "<tr>\n<th>Canonical name</th><td>" .. lang:getCanonicalName() .. "</td>\n</tr>\n")

	local otherNames = lang:getOtherNames()
	if otherNames then
		local names = {}
		
		for _, name in ipairs(otherNames) do
			table.insert(names, "<li>" .. name .. "</li>")
		end
		
		if #names > 0 then
			table.insert(ret, "<tr>\n<th>Other names</th><td><ul>" .. table.concat(names, "\n") .. "</ul></td>\n</tr>\n")
		end
	end
	
	local aliases = lang:getAliases()
	if aliases then
		local names = {}
		
		for _, name in ipairs(aliases) do
			table.insert(names, "<li>" .. name .. "</li>")
		end
		
		if #names > 0 then
			table.insert(ret, "<tr>\n<th>Aliases</th><td><ul>" .. table.concat(names, "\n") .. "</ul></td>\n</tr>\n")
		end
	end

	local varieties = lang:getVarieties()
	if varieties then
		local names = {}
		
		for _, name in ipairs(varieties) do
			if type(name) == "string" then
				table.insert(names, "<li>" .. name .. "</li>")
			else
				assert(type(name) == "table")
				local first_var
				local subvars = {}
				for i, var in ipairs(name) do
					if i == 1 then
						first_var = var
					else
						table.insert(subvars, "<li>" .. var .. "</li>")
					end
				end
				if #subvars > 0 then
					table.insert(names, "<li><dl><dt>" .. first_var .. "</dt>\n<dd><ul>" .. table.concat(subvars, "\n") .. "</ul></dd></dl></li>")
				elseif first_var then
					table.insert(names, "<li>" .. first_var .. "</li>")
				end
			end
		end
		
		if #names > 0 then
			table.insert(ret, "<tr>\n<th>Varieties</th><td><ul>" .. table.concat(names, "\n") .. "</ul></td>\n</tr>\n")
		end
	end

	table.insert(ret, "<tr>\n<th>[[Wiktionary:Languages|Language code]]</th><td><code>" .. lang:getCode() .. "</code></td>\n</tr>\n")
	table.insert(ret, "<tr>\n<th>[[Wiktionary:Families|Language family]]</th>\n")
	
	local fam = lang:getFamily()
	local famCode = fam and fam:getCode()
	
	if not fam then
		table.insert(ret, "<td>unclassified</td>")
	elseif famCode == "qfa-iso" then
		table.insert(ret, "<td>[[:Category:Language isolates|language isolate]]</td>")
	elseif famCode == "qfa-mix" then
		table.insert(ret, "<td>[[:Category:Mixed languages|mixed language]]</td>")
	elseif famCode == "sgn" then
		table.insert(ret, "<td>[[:Category:Sign languages|sign language]]</td>")
	elseif famCode == "crp" then
		table.insert(ret, "<td>[[:Category:Creole or pidgin languages|creole or pidgin]]</td>")
	elseif famCode == "art" then
		table.insert(ret, "<td>[[:Category:Constructed languages|constructed language]]</td>")
	else
		table.insert(ret, "<td>" .. fam:makeCategoryLink() .. "</td>")
	end
	
	table.insert(ret, "\n</tr>\n<tr>\n<th>Ancestors</th>\n")
	
	local ancestors, ancestorChain = lang:getAncestors(), lang:getAncestorChain()
	if ancestors[2] then
		local ancestorList = {}
		
		for i, anc in ipairs(ancestors) do
			ancestorList[i] = "<li>" .. anc:makeCategoryLink() .. "</li>"
		end
		
		table.insert(ret, "<td><ul>\n" .. table.concat(ancestorList, "\n") .. "</ul></td>\n")
	elseif ancestorChain[1] then
		table.insert(ret, "<td><ul>\n")
		
		local chain = {}
		
		for i, anc in ipairs(ancestorChain) do
			chain[i] = "<li>" .. anc:makeCategoryLink() .. "</li>"
		end
		
		table.insert(ret, table.concat(chain, "\n<ul>\n"))
		
		for _, _ in ipairs(chain) do
			table.insert(ret, "</ul>")
		end
		
		table.insert(ret, "</td>\n")
	else
		table.insert(ret, "<td>unknown</td>\n")
	end
	
	table.insert(ret, "</tr>\n")
	
	local scripts = lang:getScripts()
	
	if scripts[1] then
		local script_text = {}
		
		local function makeScriptLine(sc)
			local code = sc:getCode()
			local url = tostring(mw.uri.fullUrl('Special:Search', {
				search = 'contentmodel:css insource:"' .. code
					.. '" insource:/\\.' .. code .. '/',
				ns8 = '1'
			}))
			return sc:makeCategoryLink()
				.. ' (<span class="plainlinks" title="Search for stylesheets referencing this script">[' .. url .. ' <code>' .. code .. '</code>]</span>)'
		end
		
		local function add_Hrkt(text)
			table.insert(text, "<li>" .. makeScriptLine(Hrkt))
			table.insert(text, "<ul>")
			table.insert(text, "<li>" .. makeScriptLine(Hira) .. "</li>")
			table.insert(text, "<li>" .. makeScriptLine(Kana) .. "</li>")
			table.insert(text, "</ul>")
			table.insert(text, "</li>")
		end
		
		for _, sc in ipairs(scripts) do
			local text = {}
			local code = sc:getCode()
			
			if code == "Hrkt" then
				add_Hrkt(text)
			else
				table.insert(text, "<li>" .. makeScriptLine(sc))
				if code == "Jpan" then
					table.insert(text, "<ul>")
					table.insert(text, "<li>" .. makeScriptLine(Hani) .. "</li>")
					add_Hrkt(text)
					table.insert(text, "</ul>")
				elseif code == "Kore" then
					table.insert(text, "<ul>")
					table.insert(text, "<li>" .. makeScriptLine(Hang) .. "</li>")
					table.insert(text, "<li>" .. makeScriptLine(Hani) .. "</li>")
					table.insert(text, "</ul>")
				end
				table.insert(text, "</li>")
			end
			
			table.insert(script_text, table.concat(text, "\n"))
		end
		
		table.insert(ret, "<tr>\n<th>[[Wiktionary:Scripts|Scripts]]</th>\n<td><ul>\n" .. table.concat(script_text, "\n") .. "</ul></td>\n</tr>\n")
	else
		table.insert(ret, "<tr>\n<th>[[Wiktionary:Scripts|Scripts]]</th>\n<td>not specified</td>\n</tr>\n")
	end
	
	local function add_module_info(raw_data, heading)
		if raw_data then
			local scripts = lang:getScriptCodes()
			local module_info, n, add = {}, 0, false
			if type(raw_data) == "string" then
				table.insert(module_info,
					("[[Module:%s]]"):format(raw_data))
				add = true
			elseif type(raw_data) == "table" and m_table.size(scripts) == 1 and type(raw_data[scripts[1]]) == "string" then
				table.insert(module_info,
					("[[Module:%s]]"):format(raw_data[scripts[1]]))
				add = true
			elseif type(raw_data) == "table" then
				table.insert(module_info, "<ul>")
				for script, data in m_table.sortedPairs(raw_data) do
					local script_info
					if m_sc_getByCode(script) then
						if type(data) == "string" then
							script_info = ("[[Module:%s]]</li>"):format(data)
						else
							n = n + 1
							script_info = "(none)\n"
						end
						table.insert(module_info, ("<li><code>%s</code>: %s"):format(script, script_info))
					end
				end
				table.insert(module_info, "</ul>")
				if m_table.size(module_info) > 2 and n < (m_table.size(module_info) - 2) then add = true end
			end
			
			if add then
				table.insert(ret, [=[
<tr>
<th>]=] .. heading .. [=[</th>
<td>]=] .. table.concat(module_info) .. [=[</td>
</tr>
]=])
			end
		end
	end
	
	add_module_info(raw_data.generate_forms, "Form-generating<br>module")
	add_module_info(raw_data.translit, "[[Wiktionary:Transliteration and romanization|Transliteration<br>module]]")
	add_module_info(raw_data.display_text, "Display text<br>module")
	add_module_info(raw_data.entry_name, "Entry name<br>module")
	add_module_info(raw_data.sort_key, "[[sortkey|Sortkey]]<br>module")
	
	local wikidataItem = lang:getWikidataItem()
	if lang:getWikidataItem() and mw.wikibase then
		local URL = mw.wikibase.getEntityUrl(wikidataItem)
		local link
		if URL then
			link = '[' .. URL .. ' ' .. wikidataItem .. ']'
		else
			link = '<span class="error">Invalid Wikidata item: <code>' .. wikidataItem .. '</code></span>'
		end
		table.insert(ret, "<tr><th>Wikidata</th><td>" .. link .. "</td></tr>")
	end
	
	table.insert(ret, "</table>")
	
	return table.concat(ret)
end

local function NavFrame(content, title)
	return '<div class="NavFrame"><div class="NavHead">'
		.. (title or '{{{title}}}') .. '</div>'
		.. '<div class="NavContent" style="text-align: left;">'
		.. content
		.. '</div></div>'
end


local function get_description_topright_additional(lang, countries, extinct, setwiki, setwikt, setsister, entryname)
	local nameWithLanguage = lang:getCategoryName("nocap")
	if lang:getCode() == "und" then
		local description =
			"This is the main category of the '''" .. nameWithLanguage .. "''', represented in Wiktionary by the [[Wiktionary:Languages|code]] '''" .. lang:getCode() .. "'''. " ..
			"This language contains terms in historical writing, whose meaning has not yet been determined by scholars."
		return description, nil, nil
	end
	
	local canonicalName = lang:getCanonicalName()
	
	local topright = linkbox(lang, setwiki, setwikt, setsister, entryname)

	local the_prefix
	if canonicalName:find(" Language$") then
		the_prefix = ""
	else
		the_prefix = "the "
	end
	local description = "This is the main category of " .. the_prefix .. "'''" .. nameWithLanguage .. "'''."

	local country_links = {}
	local prep
	for _, country in ipairs(countries) do
		local this_prep
		if country == "the world" then
			this_prep = "across"
			table.insert(country_links, country)
		elseif country ~= "UNKNOWN" then
			this_prep = "in"
			local country_without_the = country:match("^the (.*)$")
			if country_without_the then
				table.insert(country_links, "the [[" .. country_without_the .. "]]")
			else
				table.insert(country_links, "[[" .. country .. "]]")
			end
		end
		if this_prep then
			if prep and this_prep ~= prep then
				error("Can't handle country 'the world' along with another country (clashing prepositions)")
			end
			prep = this_prep
		end
	end
	local country_desc
	if #country_links > 0 then
		local country_link_text = m_table.serialCommaJoin(country_links)
		country_desc = ("It is %s %s %s.\n\n"):format(
			extinct and "an [[extinct language]] that was formerly spoken" or "spoken", prep, country_link_text)
	elseif extinct then
		country_desc = "It is an [[extinct language]].\n\n"
	else
		country_desc = ""
	end

	local add = country_desc .. "Information about " .. canonicalName .. ":\n\n" .. infobox(lang)
	
	if lang:hasType("reconstructed") then
		add = add .. "\n\n" ..
			ucfirst(canonicalName) .. " is a reconstructed language. Its words and roots are not directly attested in any written works, but have been reconstructed through the ''comparative method'', " ..
			"which finds regular similarities between languages that cannot be explained by coincidence or word-borrowing, and extrapolates ancient forms from these similarities.\n\n" ..
			"According to our [[Wiktionary:Criteria for inclusion|criteria for inclusion]], terms in " .. canonicalName ..
			" should '''not''' be present in entries in the main namespace, but may be added to the Reconstruction: namespace."
	elseif lang:hasType("appendix-constructed") then
		add = add .. "\n\n" ..
			ucfirst(canonicalName) .. " is a constructed language that is only in sporadic use. " ..
			"According to our [[Wiktionary:Criteria for inclusion|criteria for inclusion]], terms in " .. canonicalName ..
			" should '''not''' be present in entries in the main namespace, but may be added to the Appendix: namespace. " ..
			"All terms in this language may be available at [[Appendix:" .. ucfirst(canonicalName) .. "]]."
	end
	
	local about = new_title("Wiktionary:About " .. canonicalName)
	
	if about.exists then
		add = add .. "\n\n" ..
			"Please see '''[[Wiktionary:About " .. canonicalName .. "]]''' for information and special considerations for creating " .. nameWithLanguage .. " entries."
	end
	
	local ok, tree_of_descendants = pcall(
		require("Module:family tree").print_children,
		lang:getCode(), {
			protolanguage_under_family = true,
			must_have_descendants = true
		})
	
	if ok then
		if tree_of_descendants then
			add = add .. NavFrame(
				tree_of_descendants,
				"Family tree")
		else
			add = add .. "\n\n" .. ucfirst(lang:getCanonicalName())
				.. " has no descendants or varieties listed in Wiktionary's language data modules."
		end
	else
		mw.log("error while generating tree: " .. tostring(tree_of_descendants))
	end

	return description, topright, add
end


local function get_parents(lang, countries, extinct)
	local canonicalName = lang:getCanonicalName()
	
	local sortkey = {sort_base = canonicalName, lang = "en"}
	local ret = {{name = "All languages", sort = sortkey}}
	
	local fam = lang:getFamily()
	local famCode = fam and fam:getCode()
	
	-- FIXME: Some of the following categories should be added to this module.
	if not fam then
		table.insert(ret, {name = "Category:Unclassified languages", sort = sortkey})
	elseif famCode == "qfa-iso" then
		table.insert(ret, {name = "Category:Language isolates", sort = sortkey})
	elseif famCode == "qfa-mix" then
		table.insert(ret, {name = "Category:Mixed languages", sort = sortkey})
	elseif famCode == "sgn" then
		table.insert(ret, {name = "Category:All sign languages", sort = sortkey})
	elseif famCode == "crp" then
		table.insert(ret, {name = "Category:Creole or pidgin languages", sort = sortkey})
		for _, anc in ipairs(lang:getAncestors()) do
			-- Avoid Haitian Creole being categorised in [[:Category:Haitian Creole-based creole or pidgin languages]], as one of its ancestors is an etymology-only variety of it.
			-- Use that ancestor's ancestors instead.
			if anc:getFullCode() == lang:getCode() then
				for _, anc_extra in ipairs(anc:getAncestors()) do
					table.insert(ret, {name = "Category:" .. ucfirst(anc_extra:getFullName()) .. "-based creole or pidgin languages", sort = sortkey})
				end
			else
				table.insert(ret, {name = "Category:" .. ucfirst(anc:getFullName()) .. "-based creole or pidgin languages", sort = sortkey})
			end
		end
	elseif famCode == "art" then
		if lang:hasType("appendix-constructed") then
			table.insert(ret, {name = "Category:Appendix-only constructed languages", sort = sortkey})
		else
			table.insert(ret, {name = "Category:Constructed languages", sort = sortkey})
		end
		for _, anc in ipairs(lang:getAncestors()) do
			if anc:getFullCode() == lang:getCode() then
				for _, anc_extra in ipairs(anc:getAncestors()) do
					table.insert(ret, {name = "Category:" .. ucfirst(anc_extra:getFullName()) .. "-based constructed languages", sort = sortkey})
				end
			else
				table.insert(ret, {name = "Category:" .. ucfirst(anc:getFullName()) .. "-based constructed languages", sort = sortkey})
			end
		end
	else
		table.insert(ret, {name = "Category:" .. fam:getCategoryName(), sort = sortkey})
		if lang:hasType("reconstructed") then
			table.insert(ret, {
				name = "Category:Reconstructed languages",
				sort = {sort_base = canonicalName:gsub("^Proto%-", ""), lang = "en"}
			})
		end
	end
	
	local function add_sc_cat(sc)
		table.insert(ret, {name = "Category:" .. sc:getCategoryName() .. " languages", sort = sortkey})
	end
	
	local function add_Hrkt()
		add_sc_cat(Hrkt)
		add_sc_cat(Hira)
		add_sc_cat(Kana)
	end
	
	for _, sc in ipairs(lang:getScripts()) do
		if sc:getCode() == "Hrkt" then
			add_Hrkt()
		else
			add_sc_cat(sc)
			if sc:getCode() == "Jpan" then
				add_sc_cat(Hani)
				add_Hrkt()
			elseif sc:getCode() == "Kore" then
				add_sc_cat(Hang)
				add_sc_cat(Hani)
			end
		end
	end
	
	if lang:hasTranslit() then
		table.insert(ret, {name = "Category:Languages with automatic transliteration", sort = sortkey})
	end
	
	local saw_country = false
	for _, country in ipairs(countries) do
		if country ~= "UNKNOWN" then
			table.insert(ret, {name = "Category:Languages of " .. country, sort = sortkey})
			saw_country = true
		end
	end

	if extinct then
		table.insert(ret, {name = "Category:All extinct languages", sort = sortkey})
	end

	if not saw_country then
		table.insert(ret, {name = "Category:Languages not sorted into a country category", sort = sortkey})
	end

	return ret
end


local function get_children()
	local ret = {}

	-- FIXME: We should work on the children mechanism so it isn't necessary to manually specify these.
	for _, label in ipairs({"appendices", "entry maintenance", "lemmas", "names", "phrases", "rhymes", "symbols", "templates", "terms by etymology", "terms by usage"}) do
		table.insert(ret, {name = label, is_label = true})
	end

	table.insert(ret, {name = "terms derived from {{{langname}}}", is_label = true, lang = false})
	table.insert(ret, {module = "topic cat", args = {code = "{{{langcode}}}", label = "all topics"}, sort = "all topics"})
	table.insert(ret, {name = "Varieties of {{{langname}}}"})
	table.insert(ret, {name = "Requests concerning {{{langname}}}"})
	table.insert(ret, {name = "Category:Rhymes:{{{langname}}}", description = "Lists of {{{langname}}} words by their rhymes."})
	table.insert(ret, {name = "Category:User {{{langcode}}}", description = "Wiktionary users categorized by fluency levels in {{{langname}}}."})
	return ret
end


-- Handle language categories of the form e.g. [[:Category:French language]] and
-- [[:Category:British Sign Language]].
table.insert(raw_handlers, function(data)
	local lang
	local langname = data.category:match("^(.*) language$")
	if langname then
		lang = m_languages.getByCanonicalName(langname)
	elseif data.category:find(" Language$") then
		lang = m_languages.getByCanonicalName(data.category)
	end
	if not lang then
		return nil
	end
	local args = require("Module:parameters").process(data.args, {
		[1] = {list = true},
		["setwiki"] = true,
		["setwikt"] = true,
		["setsister"] = true,
		["entryname"] = true,
		["extinct"] = {type = "boolean"},
	})
	-- If called from inside, don't require any arguments, as they can't be known
	-- in general and aren't needed just to generate the first parent (used for
	-- breadcrumbs).
	if #args[1] == 0 and not data.called_from_inside then
		-- At least one country must be specified unless the language is constructed (e.g. Esperanto) or reconstructed (e.g. Proto-Indo-European).
		local fam = lang:getFamily()
		if not (lang:hasType("reconstructed") or (fam and fam:getCode() == "art")) then
			error("At least one country (param 1=) must be specified for language '" .. lang:getCanonicalName() .. "' (code '" .. lang:getCode() .. "'). " ..
				"Use the value UNKNOWN if the language's location is truly unknown.")
		end
	end
	local description, topright, additional = "", "", ""
	-- If called from inside the category tree system, it's called when generating
	-- parents or children, and we don't need to generate the description or additional
	-- text (which is very expensive in terms of memory because it calls [[Module:family tree]],
	-- which calls [[Module:languages/data/all]]).
	if not data.called_from_inside then
		description, topright, additional = get_description_topright_additional(
			lang, args[1], args.extinct, args.setwiki, args.setwikt, args.setsister, args.entryname
		)
	end
	return {
		description = description,
		lang = lang:getCode(),
		topright = topright,
		additional = additional,
		breadcrumb = lang:getCanonicalName(),
		parents = get_parents(lang, args[1], args.extinct),
		extra_children = get_children(lang),
		umbrella = false,
		can_be_empty = true,
	}, true
end)


-- Handle categories such as [[:Category:Languages of Indonesia]].
table.insert(raw_handlers, function(data)
	local country = data.category:match("^Languages of (.*)$")
	if country then
		local args, topright = require("Module:parameters").process(data.args, {
			["flagfile"] = true,
			["commonscat"] = true,
			["wp"] = true,
		})
		if args.flagfile ~= "-" then
			local flagfile = args.flagfile and "File:" .. args.flagfile or ("File:Flag of %s.svg"):format(country)
			local flagfile_page = new_title(flagfile)
			if flagfile_page and flagfile_page.file.exists then
				topright = ("[[%s|right|100px|border]]"):format(flagfile)
			elseif args.flagfile then
				error(("Explicit flagfile '%s' doesn't exist"):format(flagfile))
			end
		end

		if args.wp then
			local wp = require("Module:yesno")(args.wp, "+")
			if wp == "+" or wp == true then
				wp = data.category
			end
			if wp then
				local wp_topright = ("{{wikipedia|%s}}"):format(wp)
				if topright then
					topright = topright .. wp_topright
				else
					topright = wp_topright
				end
			end
		end

		if args.commonscat then
			local commonscat = require("Module:yesno")(args.commonscat, "+")
			if commonscat == "+" or commonscat == true then
				commonscat = data.category
			end
			if commonscat then
				local commons_topright = ("{{commonscat|%s}}"):format(commonscat)
				if topright then
					topright = topright .. commons_topright
				else
					topright = commons_topright
				end
			end
		end

		local country_no_the = country:match("^the (.*)$")
		local base_country = country_no_the or country
		local country_link
		if country_no_the then
			country_link = ("the [[%s]]"):format(country_no_the)
		else
			country_link = ("[[%s]]"):format(country)
		end
		
		local parents = {{name = "Languages by country", sort = {sort_base = base_country, lang = "en"}}}
		local country_cat = ("Category:%s"):format(base_country)
		local country_page = new_title(country_cat)
		if country_page and country_page.exists then
			table.insert(parents, {name = country_cat, sort = "Languages"})
		end
		local description = ("Categories for languages of %s (including sublects)."):format(country_link)

		return {
			topright = topright,
			description = description,
			parents = parents,
			breadcrumb = country,
			additional = "{{{umbrella_msg}}}",
		}, true
	end
end)


-- Handle categories such as [[:Category:English-based creole or pidgin languages]].
table.insert(raw_handlers, function(data)
	local langname = data.category:match("(.*)%-based creole or pidgin languages$")
	if langname then
		local lang = require("Module:languages").getByCanonicalName(langname)
		if lang then
			return {
				lang = lang:getCode(),
				description = "Languages which developed as a [[creole]] or [[pidgin]] from " .. lang:makeCategoryLink() .. ".",
				parents = {{name = "Creole or pidgin languages", sort = {sort_base = "*" .. langname, lang = "en"}}},
				breadcrumb = lang:getCanonicalName() .. "-based",
			}
		end
	end
end)


-- Handle categories such as [[:Category:English-based constructed languages]].
table.insert(raw_handlers, function(data)
	local langname = data.category:match("(.*)%-based constructed languages$")
	if langname then
		local lang = require("Module:languages").getByCanonicalName(langname)
		if lang then
			return {
				lang = lang:getCode(),
				description = "Constructed languages which are based on " .. lang:makeCategoryLink() .. ".",
				parents = {{name = "Constructed languages", sort = {sort_base = "*" .. langname, lang = "en"}}},
				breadcrumb = lang:getCanonicalName() .. "-based",
			}
		end
	end
end)


return {RAW_CATEGORIES = raw_categories, RAW_HANDLERS = raw_handlers}