Module:Language/scripts/codepoints/make

Documentation for this module may be created at Module:Language/scripts/codepoints/make/doc

local p = {}

local sortedPairs = require("Module:TableTools").sortedPairs

local function highlight(content)
	return mw.getCurrentFrame():extensionTag{
		name = "syntaxhighlight",
		content = content,
		args = { lang = "lua" }
	}
end

local function dump(data)
	local output = setmetatable({}, { __index = table })

	output:insert [[
{
	individual = {
]]
	
	for codePoint, scriptCode in sortedPairs(data.individual) do
		output:insert(([[
		[0x%05X] = "%s",
]]):format(codePoint, scriptCode))
	end

	output:insert [[
	},
	ranges = {
]]

	for _, range in ipairs(data.ranges) do
		output:insert(([[
		{ 0x%05X, 0x%05X, "%s" },
]]):format(unpack(range)))
	end

	output:insert([[
	},
}]])
	
	return output:concat()
end

function p.parseUnicodeScripts(frame)
	local content = mw.title.new("Module:Language/scripts/codepoints/make/doc"):getContent()
	local _, j, scriptData = content:find("<!%-%-(.-)%-%->")
	local scriptCodeData = content:match("<!%-%-(.-)%-%->", j + 1)
	
	local scriptNameToCode = {}
	for code, name in scriptCodeData:gmatch("%f[^\n]sc +; +(%a+) +; +([%a_]+)") do
		scriptNameToCode[name] = code
	end
	
	local ranges, individual = {}, {}
	local script_data = { ranges = ranges, individual = individual }
	local i = 0 -- current index in ranges
	local prevScriptCode, prevCodepoint
	for codepoint1, codepoint2, scriptName in scriptData:gmatch("%f[^\n](%x+)%.?%.?(%x*) +; +([%a_]+)") do
		codepoint1, codepoint2 = tonumber(codepoint1, 16), tonumber(codepoint2, 16)
		scriptCode = scriptNameToCode[scriptName] or scriptName .. "(???)"
		
		if scriptCode == prevScriptCode and prevCodepoint and codepoint1 == prevCodepoint + 1 then
			if individual[prevCodepoint] then
				individual[prevCodepoint] = nil
				i = i + 1
				ranges[i] = { prevCodepoint, codepoint2 or codepoint1, scriptCode }
			else
				ranges[i][2] = codepoint2 or codepoint1
			end
		elseif codepoint2 then
			i = i + 1
			ranges[i] = { codepoint1, codepoint2, scriptCode }
		else
			individual[codepoint1] = scriptCode
		end
		prevCodepoint = codepoint2 or codepoint1
		prevScriptCode = scriptCode
	end
	
	table.sort(
		ranges,
		function(range_table1, range_table2)
			return range_table1[1] < range_table2[1]
		end)
	
	return highlight(dump(script_data))
end

return p