模組:Citation/CS1/Language
< Module:Citation | CS1
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
]]
local is_set, in_array, wrap_msg, wrap_style;
local add_prop_cat, add_maint_cat;
--[[--------------------------< L O C A L _ T A B L E >-------------------------------------------------------
]]
local local_table = {
['abe']= '西阿贝纳基语',
['abq']= '阿巴扎语',
['abq-latn']= '阿巴扎语(拉丁文字)',
['abs']= '安汶马来语',
['ady-cyrl']= '阿迪格语(西里尔文)',
['aeb-arab']= '突尼斯阿拉伯语(阿拉伯文字)',
['aeb-latn']= '突尼斯阿拉伯语(拉丁文字)',
['alc']= '阿拉卡卢夫语',
['ami']= '阿美语',
['azb']= '南阿塞拜疆语',
['bat-smg']= '萨莫吉希亚语',
['bbc-latn']= '多巴巴塔克语(拉丁文字)',
['bcc']= '南俾路支语',
['bcl']= '中比科尔语',
['bdr']= '西海岸巴瑶语',
['be-tarask']= '白俄罗斯语(传统正写法)',
['be-x-old']= '白俄罗斯语(传统正写法)',
['bgp']= '东俾路支语',
['bh']='博杰普尔语',
['bxr']='俄罗斯布里亚特语',
['cja']='西部占语',
['cja-arab']='西部占语(阿拉伯文字)',
['cja-cham']='西部占语(阿拉伯文字)',
['cja-latn']='西部占语(拉丁文字)',
['cjm']='东部占语',
['cjm-arab']='东部占语(阿拉伯文字)',
['cjm-cham']='东部占语(阿拉伯文字)',
['cjm-latn']='东部占语(拉丁文字)',
['cjy']='晋语',
['cjy-hans']='晋语(简体)',
['cjy-hant']='晋语(繁体)',
['ckt']='楚科奇语',
['en-in']= '印度英语',
['ike-cans']='东加拿大语(原住民音节)',
['ike-latn']='东加拿大语(拉丁文字)',
['ruq']='梅格莱诺-罗马尼亚语',
['ruq-cyrl']='梅格列诺-罗马尼亚语(西里尔文字)',
['ruq-grek']='梅格列诺-罗马尼亚语(希腊文字)',
['ruq-latn']='梅格列诺-罗马尼亚语(拉丁文字)',
['cdo']= '闽东语',
['cdo-hani']= '闽东语(汉字)',
['ja-hani'] = '日语(汉字文字)',
['ja-hira'] = '日语(平假名文字)',
['ja-hrkt'] = '日语(假名文字)',
['ja-kana'] = '日语(片假名文字)',
['ko-kp'] = '朝鲜朝鲜语',
['kk-cn'] = '中国哈萨克语',
['no'] = '挪威语',
['ojp'] = '古日语',
['ojp-hani'] = '古日语(汉字文字)',
['ojp-hira'] = '古日语(平假名文字)',
['tet'] = '德顿语',
['ug-arab'] = '维吾尔语(阿拉伯文字)',
['ug-latn'] = '维吾尔语(拉丁文字)',
-- ['zh-cn'] = '中国大陆中文',
-- ['zh-hans'] = '简体中文',
-- ['zh-hant'] = '繁體中文',
-- ['zh-hk'] = '香港中文',
['zh-min-nan'] = '闽南语',
-- ['zh-mo'] = '澳門中文',
-- ['zh-my'] = '马来西亚中文',
-- ['zh-sg'] = '新加坡中文',
-- ['zh-tw'] = '臺灣中文',
}
local function fetchLocalLanguageName (code)
return local_table[code];
end
--[[--------------------------< F O R M A T _ S C R I P T _ V A L U E >----------------------------------------
|script-title= holds title parameters that are not written in Latin based scripts: Chinese, Japanese, Arabic, Hebrew, etc. These scripts should
not be italicized and may be written right-to-left. The value supplied by |script-title= is concatenated onto Title after Title has been wrapped
in italic markup.
Regardless of language, all values provided by |script-title= are wrapped in <bdi>...</bdi> tags to isolate rtl languages from the English left to right.
|script-title= provides a unique feature. The value in |script-title= may be prefixed with a two-character ISO639-1 language code and a colon:
|script-title=ja:*** *** (where * represents a Japanese character)
Spaces between the two-character code and the colon and the colon and the first script character are allowed:
|script-title=ja : *** ***
|script-title=ja: *** ***
|script-title=ja :*** ***
Spaces preceding the prefix are allowed: |script-title = ja:*** ***
The prefix is checked for validity. If it is a valid ISO639-1 language code, the lang attribute (lang="ja") is added to the <bdi> tag so that browsers can
know the language the tag contains. This may help the browser render the script more correctly. If the prefix is invalid, the lang attribute
is not added. At this time there is no error message for this condition.
Supports |script-title= and |script-chapter=
TODO: error messages when prefix is invalid ISO639-1 code; when script_value has prefix but no script;
]]
local function format_script_value (script_value)
local lang=''; -- initialize to empty string
local name;
if script_value:match('^%l%l%s*:') then -- if first 3 non-space characters are script language prefix
lang = script_value:match('^(%l%l)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script
if not is_set (lang) then
return ''; -- script_value was just the prefix so return empty string
end
-- if we get this far we have prefix and script
name = mw.language.fetchLanguageName( lang, mw.getContentLanguage():getCode() ); -- get language name so that we can use it to categorize
if is_set (name) then -- is prefix a proper ISO 639-1 language code?
script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script
-- is prefix one of these language codes?
if in_array (lang, {'ar', 'bg', 'bs', 'dv', 'el', 'fa', 'he', 'hy', 'ja', 'ka', 'ko', 'ku', 'mk', 'ps', 'ru', 'sd', 'sr', 'th', 'uk', 'ug', 'yi', 'zh'}) then
add_prop_cat ('script_with_name', {name, lang})
else
add_prop_cat ('script')
end
lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute
else
lang = ''; -- invalid so set lang to empty string
end
end
if is_set(script_value) then
script_value = '-{R|' .. script_value .. '}-';
end
script_value = wrap_style('bdi', {lang, script_value}); -- isolate in case script is rtl
return script_value;
end
--[[--------------------------< S C R I P T _ C O N C A T E N A T E >------------------------------------------
Initially for |title= and |script-title=, this function concatenates those two parameter values after the script value has been
wrapped in <bdi> tags.
]]
local function script_concatenate (title, script)
if is_set(title) then
title = '-{R|' .. title .. '}-';
end
if is_set (script) then
script = format_script_value (script); -- <bdi> tags, lang atribute, categorization, etc; returns empty string on error
if is_set (script) then
title = title .. ' ' .. script; -- concatenate title and script title
end
end
return title;
end
--[[--------------------------< G E T _ I S O 6 3 9 _ C O D E >------------------------------------------------
Validates language names provided in |language= parameter if not an ISO639-1 code. Handles the special case that is Norwegian where
ISO639-1 code 'no' is mapped to language name 'Norwegian Bokmål' by Extention:CLDR.
Returns the language name and associated ISO639-1 code. Because case of the source may be incorrect or different from the case that Wikimedia
uses, the name comparisons are done in lower case and when a match is found, the Wikimedia version (assumed to be correct) is returned along
with the code. When there is no match, we return the original language name string.
mw.language.fetchLanguageNames() will return a list of languages that aren't part of ISO639-1. Names that aren't ISO639-1 but that are included
in the list will be found if that name is provided in the |language= parameter. For example, if |language=Samaritan Aramaic, that name will be
found with the associated code 'sam', not an ISO639-1 code. When names are found and the associated code is not two characters, this function
returns only the Wikimedia language name.
Adapted from code taken from Module:Check ISO 639-1.
]]
local function get_iso639_code (lang)
if 'norwegian' == lang:lower() then -- special case related to Wikimedia remap of code 'no' at Extension:CLDR
return '挪威语', 'no'; -- Make sure rendered version is properly capitalized
end
local languages = mw.language.fetchLanguageNames (mw.getContentLanguage():getCode(), 'all') -- get a list of language names known to Wikimedia
-- ('all' is required for North Ndebele, South Ndebele, and Ojibwa)
local langlc = mw.ustring.lower (lang); -- lower case version for comparisons
for code, name in pairs (languages) do -- scan the list to see if we can find our language
if langlc == mw.ustring.lower (name) then
if 2 ~= code:len() then -- ISO639-1 codes only
return name; -- so return the name but not the code
end
return name, code; -- found it, return name to ensure proper capitalization and the ISO639-1 code
end
end
return lang; -- not valid language; return language in original case and nil for ISO639-1 code
end
--[[--------------------------< L A N G U A G E _ P A R A M E T E R >------------------------------------------
Get language name from ISO639-1 code value provided. If a code is valid use the returned name; if not, then use the value that was provided with the language parameter.
There is an exception. There are three ISO639-1 codes for Norewegian language variants. There are two official variants: Norwegian Bokmål (code 'nb') and
Norwegian Nynorsk (code 'nn'). The third, code 'no', is defined by ISO639-1 as 'Norwegian' though in Norway this is pretty much meaningless. However, it appears
that on enwiki, editors are for the most part unaware of the nb and nn variants (compare page counts for these variants at Category:Articles with non-English-language external links.
Because Norwegian Bokmål is the most common language variant, Media wiki has been modified to return Norwegian Bokmål for ISO639-1 code 'no'. Here we undo that and
return 'Norwegian' when editors use |language=no. We presume that editors don't know about the variants or can't descriminate between them.
See Help talk:Citation Style_1#An ISO 639-1 language name test
When |language= contains a valid ISO639-1 code, the page is assigned to the category for that code: Category:Norwegian-language sources (no) if
the page is a mainspace page and the ISO639-1 code is not 'en'. Similarly, if the parameter is |language=Norwegian, it will be categorized in the same way.
This function supports multiple languages in the form |language=nb, French, th where the language names or codes are separated from each other by commas.
]]
local function language_parameter (lang)
local code; -- the ISO639-1 two character code
local name; -- the language name
local language_list = {}; -- table of language names to be rendered
local names_table = {}; -- table made from the value assigned to |language=
if not is_set (lang) then
return '';
end
names_table = mw.text.split (lang, '%s*,%s*'); -- names should be a comma separated list
for _, lang in ipairs (names_table) do -- reuse lang
name = fetchLocalLanguageName (lang:lower()); -- local table first
if not is_set (name) then
if lang:match ('^%a%a%-') or 2 == lang:len() then -- ISO639-1 language code are 2 characters (fetchLanguageName also supports 3 character codes)
if lang:match ('^zh-') then
name = mw.language.fetchLanguageName (lang:lower(), lang:lower());
else
if not is_set (name) then
name = mw.language.fetchLanguageName (lang:lower(), mw.getContentLanguage():getCode());
-- get ISO 639-1 language name if Language is a proper code
end
end
end
end
if is_set (name) then -- if Language specified a valid ISO639-1 code
code = lang:lower(); -- save it
else
name, code = get_iso639_code (lang); -- attempt to get code from name (assign name here so that we are sure of proper capitalization)
end
if is_set (code) then
if 'zh' ~= code and not code:match ('^zh-') then -- Chinese not the language
add_prop_cat ('foreign_lang_source', {name, code})
end
else
add_maint_cat ('unknown_lang'); -- add maint category if not already added
end
table.insert (language_list, name);
name = ''; -- so we can reuse it
end
code = #language_list -- reuse code as number of languages in the list
if 2 >= code then
name = table.concat (language_list, '及') -- insert '及' between two language names
elseif 2 < code then
language_list[code] = '及' .. language_list[code]; -- prepend last name with '及'
name = table.concat (language_list, '、'); -- and concatenate with '<comma><space>' separators
name = name:gsub ('、及', '及', 1);
end
return (" " .. wrap_msg ('language', name)); -- otherwise wrap with '(in ...)'
end
--[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------
]]
local function set_selected_modules (utilities_page_ptr, error_page_ptr)
is_set = utilities_page_ptr.is_set; -- import functions from selected Module:Citation/CS1/Utilities module
in_array = utilities_page_ptr.in_array;
wrap_style = utilities_page_ptr.wrap_style;
wrap_msg = utilities_page_ptr.wrap_msg;
add_prop_cat = error_page_ptr.add_prop_cat; -- import functions from selected Module:Citation/CS1/Error module
add_maint_cat = error_page_ptr.add_maint_cat;
end
--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------
]]
return {
script_concatenate = script_concatenate,
language_parameter =language_parameter,
set_selected_modules = set_selected_modules
}