Modul:scripts/findBestScript
Utsjånad
Dokumentasjon for modulen kan opprettast på Modul:scripts/findBestScript/dok
return function (export, text, lang, scripts, forceDetect)
--[=[
Remove any HTML entities; catfix function in [[Module:utilities]]
adds tagging to a no-break space ( ), which contains Latin characters;
hence Latin was returned as the script if "Latn" is one of the language's scripts.
]=]
text = string.gsub(text, "&[a-zA-Z0-9]+;", "")
-- Try to match every script against the text,
-- and return the one with the most matching characters.
local bestcount = 0
local bestscript = nil
-- Get length of text minus any spacing or punctuation characters.
-- Counting instances of UTF-8 character pattern is faster than mw.ustring.len.
local _, length = string.gsub(mw.ustring.gsub(text, "[%s%p]+", ""), "[\1-\127\194-\244][\128-\191]*", "")
if length == 0 then
return export.getByCode("None")
end
for i, script in ipairs(scripts) do
local count = script:countCharacters(text)
-- Special case for "Hani" (general Han), which is overridden by "Hant" (traditional) or "Hans" (simplified) if they have matched at least one character. Otherwise, "Hani" would be selected in situations where an exclusively traditional or simplified character is used in a string with a shared character, as "Hani" would match both, while the other two will not match with the shared character. This is to prevent having to include all shared characters on both the traditional and simplified character lists.
if script._code == "Hani" and count >= 1 and bestscript and (bestscript._code == "Hant" or bestscript._code == "Hans") then
return bestscript
end
if count >= length then
return script
end
if count > bestcount then
bestcount = count
bestscript = script
end
end
if bestscript then
return bestscript
end
-- No matching script was found. Return "None".
return export.getByCode("None")
end