Module:Citation/CS1/Identifiers: Difference between revisions
Content added Content deleted
m (1 revision imported: vndb) |
(sync from sandbox;) |
||
Line 1: | Line 1: | ||
local identifiers = {}; |
|||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- |
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- |
||
]] |
]] |
||
local is_set, in_array, set_error, select_one, add_maint_cat, substitute; |
local is_set, in_array, set_error, select_one, add_maint_cat, substitute, make_wikilink; -- functions in Module:Citation/CS1/Utilities |
||
local z; -- table of tables defined in Module:Citation/CS1/Utilities |
local z; -- table of tables defined in Module:Citation/CS1/Utilities |
||
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration |
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration |
||
--============================<< H E L P E R F U N C T I O N S >>============================================ |
|||
--[[--------------------------< W I K I D A T A _ A R T I C L E _ N A M E _ G E T >---------------------------- |
|||
as an aid to internationalizing identifier-label wikilinks, gets identifier article names from wikidata. |
|||
returns :<lang code>:<article title> when <q> has an <article title> for <lang code>; nil else |
|||
for identifiers that do not have q, returns nil |
|||
for wikis that do not have mw.wikibase installed, returns nil |
|||
]] |
|||
local function wikidata_article_name_get (q) |
|||
if not is_set (q) or (q and not mw.wikibase) then -- when no q number or when a q number but mw.wikibase not installed on this wiki |
|||
return nil; -- abandon |
|||
end |
|||
local wd_article; |
|||
local this_wiki_code = cfg.this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org |
|||
wd_article = mw.wikibase.getSitelink (q, this_wiki_code .. 'wiki'); -- fetch article title from wd; nil when no title available at this wiki |
|||
if wd_article then |
|||
wd_article = table.concat ({':', this_wiki_code, ':', wd_article}); -- interwiki-style link without brackets if taken from wd; leading colon required |
|||
end |
|||
return wd_article; -- article title from wd; nil else |
|||
end |
|||
Line 22: | Line 51: | ||
local url_string = options.id; |
local url_string = options.id; |
||
local ext_link; |
local ext_link; |
||
local this_wiki_code = cfg.this_wiki_code; -- wikipedia subdomain; 'en' for en.wikipedia.org |
|||
local wd_article; -- article title from wikidata |
|||
if options.encode == true or options.encode == nil then |
if options.encode == true or options.encode == nil then |
||
url_string = mw.uri.encode( url_string ); |
url_string = mw.uri.encode( url_string ); |
||
end |
end |
||
ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki(options.id)); |
ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki(options.id)); |
||
if is_set(options.access) then |
if is_set(options.access) then |
||
ext_link = substitute (cfg.presentation['access-signal'], { |
ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link}); -- add the free-to-read / paywall lock |
||
end |
|||
if not (cfg.use_identifier_redirects and is_set (options.redirect)) then -- redirect has priority so if enabled and available don't fetch from wikidata because expensive |
|||
wd_article = wikidata_article_name_get (options.q); -- if wikidata has an article title for this wiki, get it; |
|||
end |
end |
||
local label_link = (cfg.use_identifier_redirects and is_set (options.redirect) and options.redirect) or wd_article or options.link; |
|||
return mw.ustring.format( '[[%s|%s]]%s%s', options.link, options.label, options.separator or " ", ext_link); |
|||
return table.concat ({ |
|||
make_wikilink (label_link, options.label), -- redirect, wikidata link, or locally specified link (in that order) |
|||
options.separator or ' ', |
|||
ext_link |
|||
}); |
|||
end |
end |
||
Line 43: | Line 84: | ||
local function internal_link_id(options) |
local function internal_link_id(options) |
||
local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9 |
|||
return mw.ustring.format( '[[%s|%s]]%s[[%s%s%s|%s]]', |
|||
options.link, options.label, options.separator or " ", |
|||
if not (cfg.use_identifier_redirects and is_set (options.redirect)) then -- redirect has priority so if enabled and available don't fetch from wikidata because expensive |
|||
options.prefix, options.id, options.suffix or "", |
|||
wd_article = wikidata_article_name_get (options.q); -- if wikidata has an article title for this wiki, get it; |
|||
mw.text.nowiki(options.id) |
|||
end |
|||
local label_link = (cfg.use_identifier_redirects and is_set (options.redirect) and options.redirect) or wd_article or options.link; |
|||
return table.concat ( |
|||
{ |
|||
make_wikilink (label_link, options.label), -- wiki link the identifier label |
|||
options.separator or ' ', -- add the separator |
|||
make_wikilink ( |
|||
table.concat ( |
|||
{ |
|||
options.prefix, |
|||
id, -- translated to western digits |
|||
options.suffix or '' |
|||
}), |
|||
substitute (cfg.presentation['bdi'], {'', mw.text.nowiki (options.id)}) -- bdi tags to prevent Latn script identifiers from being reversed at rtl language wikis |
|||
); -- nowiki because MediaWiki still has magic links for ISBN and the like; TODO: is it really required? |
|||
}); |
|||
end |
|||
--[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------ |
|||
Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is |
|||
in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because |
|||
|embargo= was not set in this cite. |
|||
]] |
|||
local function is_embargoed (embargo) |
|||
if is_set (embargo) then |
|||
local lang = mw.getContentLanguage(); |
|||
local good1, embargo_date, good2, todays_date; |
|||
good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo ); |
|||
good2, todays_date = pcall( lang.formatDate, lang, 'U' ); |
|||
if good1 and good2 then -- if embargo date and today's date are good dates |
|||
if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future? |
|||
return embargo; -- still embargoed |
|||
else |
|||
add_maint_cat ('embargo') |
|||
return ''; -- unset because embargo has expired |
|||
end |
|||
end |
|||
end |
|||
return ''; -- |embargo= not set return empty string |
|||
end |
|||
--[=[-------------------------< I S _ V A L I D _ B I O R X I V _ D A T E >------------------------------------ |
|||
returns true if: |
|||
2019-12-11T00:00Z <= biorxiv_date < today + 2 days |
|||
The dated form of biorxiv identifier has a start date of 2019-12-11. The unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400 |
|||
biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC |
|||
today is the current date at time 00:00:00 UTC plus 48 hours |
|||
if today is 2015-01-01T00:00:00 then |
|||
adding 24 hours gives 2015-01-02T00:00:00 – one second more than today |
|||
adding 24 hours gives 2015-01-03T00:00:00 – one second more than tomorrow |
|||
This function does not work if it is fed month names for languages other than English. Wikimedia #time: parser |
|||
apparently doesn't understand non-Engish date month names. This function will always return false when the date |
|||
contains a non-English month name because good1 is false after the call to lang.formatDate(). To get around that |
|||
call this function with YYYY-MM-DD format dates. |
|||
]=] |
|||
local function is_valid_biorxiv_date (biorxiv_date) |
|||
local good1, good2; |
|||
local biorxiv_ts, tomorrow_ts; -- to hold unix time stamps representing the dates |
|||
local lang_object = mw.getContentLanguage(); |
|||
good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date ); -- convert biorxiv_date value to unix timesatmp |
|||
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow |
|||
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand |
|||
biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts); -- convert to numbers for the comparison; |
|||
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts); |
|||
else |
|||
return false; -- one or both failed to convert to unix time stamp |
|||
end |
|||
return ((1576022400 <= biorxiv_ts) and (biorxiv_ts < tomorrow_ts)) -- 2012-12-11T00:00Z <= biorxiv_date < tomorrow's date |
|||
end |
end |
||
Line 54: | Line 179: | ||
ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit. |
ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit. |
||
ISBN-13 is checked in |
ISBN-13 is checked in isbn(). |
||
If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length |
If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length |
||
Line 63: | Line 188: | ||
local function is_valid_isxn (isxn_str, len) |
local function is_valid_isxn (isxn_str, len) |
||
local temp = 0; |
local temp = 0; |
||
isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' |
isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58 |
||
len = len+1; -- adjust to be a loop counter |
len = len+1; -- adjust to be a loop counter |
||
for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum |
for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum |
||
if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58) |
if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58) |
||
temp = temp + 10*( len - i ); -- it represents 10 decimal |
temp = temp + 10*( len - i ); -- it represents 10 decimal |
||
else |
else |
||
temp = temp + tonumber( string.char(v) )*(len-i); |
temp = temp + tonumber( string.char(v) )*(len-i); |
||
end |
end |
||
end |
end |
||
return temp % 11 == 0; -- returns true if calculation result is zero |
return temp % 11 == 0; -- returns true if calculation result is zero |
||
end |
end |
||
--[[--------------------------< IS _ V A L I D _ I S X N |
--[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >----------------------------------------------- |
||
ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit. |
ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit. |
||
Line 87: | Line 212: | ||
local temp=0; |
local temp=0; |
||
isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' |
isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39 |
||
for i, v in ipairs( isxn_str ) do |
for i, v in ipairs( isxn_str ) do |
||
temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit |
temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit |
||
Line 95: | Line 220: | ||
--[[--------------------------< |
--[[--------------------------< N O R M A L I Z E _ L C C N >-------------------------------------------------- |
||
lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization) |
|||
Determines whether an ISBN string is valid |
|||
1. Remove all blanks. |
|||
2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash. |
|||
3. If there is a hyphen in the string: |
|||
a. Remove it. |
|||
b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out): |
|||
1. All these characters should be digits, and there should be six or less. (not done in this function) |
|||
2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six. |
|||
Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function. |
|||
]] |
]] |
||
local function |
local function normalize_lccn (lccn) |
||
lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace |
|||
if nil ~= isbn_str:match("[^%s-0-9X]") then |
|||
return false, 'invalid character'; -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X |
|||
end |
|||
isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces |
|||
local len = isbn_str:len(); |
|||
if len ~= 10 and len ~= 13 then |
|||
return false, 'length'; -- fail if incorrect length |
|||
end |
|||
if |
if nil ~= string.find (lccn,'/') then |
||
lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it |
|||
return false, 'invalid form'; |
|||
end |
|||
return is_valid_isxn(isbn_str, 10), 'checksum'; |
|||
else |
|||
if isbn_str:match( "^%d+$" ) == nil then |
|||
return false, 'invalid character'; -- fail if isbn13 is not all digits |
|||
end |
|||
if isbn_str:match( "^97[89]%d*$" ) == nil then |
|||
return false, 'invalid prefix'; -- fail when isbn13 does not begin with 978 or 979 |
|||
end |
|||
return is_valid_isxn_13 (isbn_str), 'checksum'; |
|||
end |
end |
||
end |
|||
local prefix |
|||
local suffix |
|||
prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix |
|||
if nil ~= suffix then -- if there was a hyphen |
|||
--[[--------------------------< I S M N >---------------------------------------------------------------------- |
|||
suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6 |
|||
lccn=prefix..suffix; -- reassemble the lccn |
|||
Determines whether an ISMN string is valid. Similar to isbn-13, ismn is 13 digits begining 979-0-... and uses the |
|||
same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf |
|||
section 2, pages 9–12. |
|||
]] |
|||
local function ismn (id) |
|||
local handler = cfg.id_handlers['ISMN']; |
|||
local text; |
|||
local valid_ismn = true; |
|||
local id_copy; |
|||
id_copy = id; -- save a copy because this testing is destructive |
|||
id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn |
|||
if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ismn must be 13 digits and begin 9790 |
|||
valid_ismn = false; |
|||
else |
|||
valid_ismn=is_valid_isxn_13 (id); -- validate ismn |
|||
end |
end |
||
-- text = internal_link_id({link = handler.link, label = handler.label, -- use this (or external version) when there is some place to link to |
|||
-- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) |
|||
text="[[" .. handler.link .. "|" .. handler.label .. "]]" .. handler.separator .. id_copy; -- because no place to link to yet |
|||
if false == valid_ismn then |
|||
text = text .. ' ' .. set_error( 'bad_ismn' ) -- add an error message if the ismn is invalid |
|||
end |
|||
return |
return lccn; |
||
end |
|||
--[[--------------------------< I S S N >---------------------------------------------------------------------- |
|||
Validate and format an issn. This code fixes the case where an editor has included an ISSN in the citation but |
|||
has separated the two groups of four digits with a space. When that condition occurred, the resulting link looked |
|||
like this: |
|||
|issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link |
|||
This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length |
|||
and makes sure that the checkdigit agrees with the calculated value. Incorrect length (8 digits), characters |
|||
other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn error message. The |
|||
issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits. |
|||
]] |
|||
local function issn(id, e) |
|||
local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate |
|||
local handler; |
|||
local text; |
|||
local valid_issn = true; |
|||
if e then |
|||
handler = cfg.id_handlers['EISSN']; |
|||
else |
|||
handler = cfg.id_handlers['ISSN']; |
|||
end |
end |
||
--============================<< I D E N T I F I E R F U N C T I O N S >>==================================== |
|||
id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn |
|||
if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position |
|||
valid_issn=false; -- wrong length or improper character |
|||
else |
|||
valid_issn=is_valid_isxn(id, 8); -- validate issn |
|||
end |
|||
if true == valid_issn then |
|||
id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version |
|||
else |
|||
id = issn_copy; -- if not valid, use the show the invalid issn with error message |
|||
end |
|||
text = external_link_id({link = handler.link, label = handler.label, |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) |
|||
if false == valid_issn then |
|||
text = text .. ' ' .. set_error( 'bad_issn', e and 'e' or '' ) -- add an error message if the issn is invalid |
|||
end |
|||
return text |
|||
end |
|||
--[[--------------------------< A M A Z O N >------------------------------------------------------------------ |
|||
Formats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alpha |
|||
characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit |
|||
isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=. |
|||
Error message if not 10 characters, if not isbn10, if mixed and first character is a digit. |
|||
]] |
|||
local function amazon(id, domain) |
|||
local err_cat = "" |
|||
if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then |
|||
err_cat = ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters |
|||
else |
|||
if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) |
|||
if check_isbn( id ) then -- see if asin value is isbn10 |
|||
add_maint_cat ('ASIN'); |
|||
elseif not is_set (err_cat) then |
|||
err_cat = ' ' .. set_error ('bad_asin'); -- asin is not isbn10 |
|||
end |
|||
elseif not id:match("^%u[%d%u]+$") then |
|||
err_cat = ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha |
|||
end |
|||
end |
|||
if not is_set(domain) then |
|||
domain = "com"; |
|||
elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom |
|||
domain = "co." .. domain; |
|||
elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico |
|||
domain = "com." .. domain; |
|||
end |
|||
local handler = cfg.id_handlers['ASIN']; |
|||
return external_link_id({link=handler.link, |
|||
label=handler.label, prefix=handler.prefix .. domain .. "/dp/", |
|||
id=id, encode=handler.encode, separator = handler.separator}) .. err_cat; |
|||
end |
|||
--[[--------------------------< A R X I V >-------------------------------------------------------------------- |
--[[--------------------------< A R X I V >-------------------------------------------------------------------- |
||
Line 265: | Line 264: | ||
where: |
where: |
||
<archive> is a string of alpha characters - may be hyphenated; no other punctuation |
<archive> is a string of alpha characters - may be hyphenated; no other punctuation |
||
<class> is a string of alpha characters - may be hyphenated; no other punctuation |
<class> is a string of alpha characters - may be hyphenated; no other punctuation; not the same as |class= parameter which is not supported in this form |
||
<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01 |
<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01 |
||
first digit of YY for this form can only 9 and 0 |
first digit of YY for this form can only 9 and 0 |
||
Line 288: | Line 287: | ||
local handler = cfg.id_handlers['ARXIV']; |
local handler = cfg.id_handlers['ARXIV']; |
||
local year, month, version; |
local year, month, version; |
||
local err_cat = |
local err_cat = false; -- assume no error message |
||
local text; |
local text; -- output text |
||
if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version |
if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version |
||
Line 297: | Line 296: | ||
if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month |
if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month |
||
((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok? |
((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok? |
||
err_cat = |
err_cat = true; -- flag for error message |
||
end |
end |
||
elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version |
elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version |
||
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$"); |
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$"); |
||
Line 305: | Line 305: | ||
if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years) |
if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years) |
||
((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)? |
((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)? |
||
err_cat = |
err_cat = true; -- flag for error message |
||
end |
end |
||
elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version |
elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version |
||
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$"); |
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$"); |
||
Line 312: | Line 313: | ||
month = tonumber(month); |
month = tonumber(month); |
||
if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years) |
if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years) |
||
err_cat = |
err_cat = true; -- flag for error message |
||
end |
end |
||
else |
else |
||
err_cat = |
err_cat = true; -- not a recognized format; flag for error message |
||
end |
end |
||
err_cat = err_cat and table.concat ({' ', set_error ('bad_arxiv')}) or ''; -- set error message if flag is true |
|||
text = external_link_id({link = handler.link, label = handler.label, |
|||
text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; |
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; |
||
if is_set (class) then |
if is_set (class) then |
||
if id:match ('^%d+') then |
|||
class = ' [[' .. '//arxiv.org/archive/' .. class .. ' ' .. class .. ']]'; -- external link within square brackets, not wikilink |
|||
text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink |
|||
else |
|||
else |
|||
class = ''; -- empty string for concatenation |
|||
text = table.concat ({text, ' ', set_error ('class_ignored')}); |
|||
end |
|||
end |
end |
||
return text .. class; |
|||
end |
|||
return text; |
|||
--[[--------------------------< B I O R X I V >----------------------------------------------------------------- |
|||
Format bioRxiv id and do simple error checking. BiorXiv ids are exactly 6 digits. |
|||
The bioRxiv id is the number following the last slash in the bioRxiv-issued DOI: |
|||
https://doi.org/10.1101/078733 -> 078733 |
|||
]] |
|||
local function biorxiv(id) |
|||
local handler = cfg.id_handlers['BIORXIV']; |
|||
local err_cat = ''; -- presume that bioRxiv id is valid |
|||
if nil == id:match("^%d%d%d%d%d%d$") then -- if bioRxiv id has anything but six digits |
|||
err_cat = ' ' .. set_error( 'bad_biorxiv'); -- set an error message |
|||
end |
|||
return external_link_id({link = handler.link, label = handler.label, |
|||
prefix=handler.prefix,id=id,separator=handler.separator, |
|||
encode=handler.encode, access=handler.access}) .. err_cat; |
|||
end |
end |
||
--[[--------------------------< |
--[[--------------------------< B I B C O D E >-------------------------------------------------------------------- |
||
Validates (sort of) and formats a bibcode id. |
|||
lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization) |
|||
1. Remove all blanks. |
|||
2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash. |
|||
3. If there is a hyphen in the string: |
|||
a. Remove it. |
|||
b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out): |
|||
1. All these characters should be digits, and there should be six or less. (not done in this function) |
|||
2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six. |
|||
Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes |
|||
Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function. |
|||
]] |
|||
But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters |
|||
local function normalize_lccn (lccn) |
|||
and first four digits must be a year. This function makes these tests: |
|||
lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace |
|||
length must be 19 characters |
|||
characters in position |
|||
if nil ~= string.find (lccn,'/') then |
|||
1–4 must be digits and must represent a year in the range of 1000 – next year |
|||
lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it |
|||
5 must be a letter |
|||
end |
|||
6–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. ) |
|||
9–18 must be letter, digit, or dot |
|||
local prefix |
|||
19 must be a letter or dot |
|||
local suffix |
|||
prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix |
|||
if nil ~= suffix then -- if there was a hyphen |
|||
suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6 |
|||
lccn=prefix..suffix; -- reassemble the lccn |
|||
end |
|||
return lccn; |
|||
end |
|||
--[[--------------------------< L C C N >---------------------------------------------------------------------- |
|||
Format LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of |
|||
the LCCN dictates the character type of the first 1-3 characters; the rightmost eight are always digits. |
|||
http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/ |
|||
length = 8 then all digits |
|||
length = 9 then lccn[1] is lower case alpha |
|||
length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits |
|||
length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits |
|||
length = 12 then lccn[1] and lccn[2] are both lower case alpha |
|||
]] |
]] |
||
local function |
local function bibcode (id, access) |
||
local handler = cfg.id_handlers[' |
local handler = cfg.id_handlers['BIBCODE']; |
||
local err_type; |
|||
local err_cat = ''; -- presume that LCCN is valid |
|||
local year; |
|||
local id = lccn; -- local copy of the lccn |
|||
local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes) |
|||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, |
|||
local len = id:len(); -- get the length of the lccn |
|||
access=access}); |
|||
if 8 == len then |
|||
if 19 ~= id:len() then |
|||
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) |
|||
err_type = cfg.err_msg_supl.length; |
|||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message |
|||
else |
|||
end |
|||
year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$") -- |
|||
elseif 9 == len then -- LCCN should be adddddddd |
|||
if |
if not year then -- if nil then no pattern match |
||
err_type = cfg.err_msg_supl.value; -- so value error |
|||
else |
|||
local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year |
|||
elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd |
|||
year = tonumber (year); -- convert year portion of bibcode to a number |
|||
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ... |
|||
if (1000 > year) or (year > next_year) then |
|||
if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern |
|||
err_type = cfg.err_msg_supl.year; -- year out of bounds |
|||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message |
|||
end |
|||
if id:find('&%.') then |
|||
err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does its missing a letter) |
|||
end |
end |
||
end |
end |
||
elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd |
|||
if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns |
|||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message |
|||
end |
|||
elseif 12 == len then -- LCCN should be aadddddddddd |
|||
if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern |
|||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message |
|||
end |
|||
else |
|||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message |
|||
end |
end |
||
if |
if is_set (err_type) then -- if there was an error detected |
||
text = text .. ' ' .. set_error( 'bad_bibcode', {err_type}); |
|||
end |
end |
||
return text; |
|||
return external_link_id({link = handler.link, label = handler.label, |
|||
prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat; |
|||
end |
end |
||
--[[--------------------------< |
--[[--------------------------< B I O R X I V >----------------------------------------------------------------- |
||
Format |
Format bioRxiv id and do simple error checking. Before 2019-12-11, biorXiv ids were 10.1101/ followed by exactly |
||
6 digits. After 2019-12-11, biorXiv ids retained the six-digit identifier but prefixed that with a yyyy.mm.dd. |
|||
code checks the PMID to see that it contains only digits and is less than test_limit; the value in local variable |
|||
date and suffixed with an optional version identifier. |
|||
test_limit will need to be updated periodically as more PMIDs are issued. |
|||
The bioRxiv id is the string of characters: |
|||
https://doi.org/10.1101/078733 -> 10.1101/078733 |
|||
or a date followed by a six-digit number followed by an optional version indicator 'v' and one or more digits: |
|||
https://www.biorxiv.org/content/10.1101/2019.12.11.123456v2 -> 10.1101/2019.12.11.123456v2 |
|||
see https://www.biorxiv.org/about-biorxiv |
|||
]] |
]] |
||
local function |
local function biorxiv(id) |
||
local handler = cfg.id_handlers['BIORXIV']; |
|||
local test_limit = 30000000; -- update this value as PMIDs approach |
|||
local err_cat = true; -- flag; assume that there will be an error |
|||
local handler = cfg.id_handlers['PMID']; |
|||
local err_cat = ''; -- presume that PMID is valid |
|||
local patterns = { |
|||
if id:match("[^%d]") then -- if PMID has anything but digits |
|||
'^10.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11) |
|||
err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message |
|||
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11) |
|||
else -- PMID is only digits |
|||
'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11) |
|||
local id_num = tonumber(id); -- convert id to a number for range testing |
|||
} |
|||
if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries |
|||
err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message |
|||
end |
|||
end |
|||
for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match |
|||
return external_link_id({link = handler.link, label = handler.label, |
|||
if id:match (pattern) then |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; |
|||
local y, m, d = id:match (pattern); -- found a match, attempt to get year, month and date from the identifier |
|||
end |
|||
if m then -- m is nil when id is the six-digit form |
|||
if not is_valid_biorxiv_date (y .. '-' .. m .. '-' .. d) then -- validate the encoded date; TODO: don't ignore leapyear and actual month lengths ({{#time:}} is a poor date validator) |
|||
--[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------ |
|||
break; -- date fail; break out early so we don't unset the error message |
|||
end |
|||
Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is |
|||
in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because |
|||
|embargo= was not set in this cite. |
|||
]] |
|||
local function is_embargoed (embargo) |
|||
if is_set (embargo) then |
|||
local lang = mw.getContentLanguage(); |
|||
local good1, embargo_date, good2, todays_date; |
|||
good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo ); |
|||
good2, todays_date = pcall( lang.formatDate, lang, 'U' ); |
|||
if good1 and good2 then -- if embargo date and today's date are good dates |
|||
if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future? |
|||
return embargo; -- still embargoed |
|||
else |
|||
add_maint_cat ('embargo') |
|||
return ''; -- unset because embargo has expired |
|||
end |
end |
||
err_cat = nil; -- we found a match so unset the error message |
|||
break; -- and done |
|||
end |
end |
||
end -- err_cat remains set here when no match |
|||
end |
|||
return ''; -- |embargo= not set return empty string |
|||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
prefix=handler.prefix,id=id,separator=handler.separator, |
|||
encode=handler.encode, access=handler.access}) .. (err_cat and (' ' .. set_error( 'bad_biorxiv')) or ''); |
|||
end |
end |
||
--[[--------------------------< |
--[[--------------------------< C I T E S E E R X >------------------------------------------------------------ |
||
CiteSeerX use their own notion of "doi" (not to be confused with the identifiers resolved via doi.org). |
|||
Format a PMC, do simple error checking, and check for embargoed articles. |
|||
The embargo parameter takes a date for a value. If the embargo date is in the future the PMC identifier will not |
|||
be linked to the article. If the embargo date is today or in the past, or if it is empty or omitted, then the |
|||
PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix. |
|||
PMC embargo date testing is done in function is_embargoed () which is called earlier because when the citation |
|||
has |pmc=<value> but does not have a |url= then |title= is linked with the PMC link. Function is_embargoed () |
|||
returns the embargo date if the PMC article is still embargoed, otherwise it returns an empty string. |
|||
PMCs are sequential numbers beginning at 1 and counting up. This code checks the PMC to see that it contains only digits and is less |
|||
than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued. |
|||
The description of the structure of this identifier can be found at Help_talk:Citation_Style_1/Archive_26#CiteSeerX_id_structure |
|||
]] |
]] |
||
local function |
local function citeseerx (id) |
||
local handler = cfg.id_handlers['CITESEERX']; |
|||
local test_limit = 6000000; -- update this value as PMCs approach |
|||
local matched; |
|||
local handler = cfg.id_handlers['PMC']; |
|||
local err_cat = ''; -- presume that PMC is valid |
|||
local id_num; |
|||
local text; |
|||
local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with pmc prefix |
|||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, |
|||
access=handler.access}); |
|||
if is_set (id_num) then |
|||
add_maint_cat ('pmc_format'); |
|||
else -- plain number without pmc prefix |
|||
id_num = id:match ('^%d+$'); -- if here id is all digits |
|||
end |
|||
if is_set (id_num) then -- id_num has a value so test it |
|||
id_num = tonumber(id_num); -- convert id_num to a number for range testing |
|||
if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries |
|||
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message |
|||
else |
|||
id = tostring (id_num); -- make sure id is a string |
|||
end |
|||
else -- when id format incorrect |
|||
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message |
|||
end |
|||
matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$"); |
|||
if is_set (embargo) then -- is PMC is still embargoed? |
|||
if not matched then |
|||
text="[[" .. handler.link .. "|" .. handler.label .. "]]" .. handler.separator .. id .. err_cat; -- still embargoed so no external link |
|||
text = text .. ' ' .. set_error( 'bad_citeseerx' ); |
|||
else |
|||
text = external_link_id({link = handler.link, label = handler.label, -- no embargo date or embargo has expired, ok to link to article |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; |
|||
end |
end |
||
return text; |
return text; |
||
Line 575: | Line 480: | ||
local text; |
local text; |
||
if is_set(inactive) then |
if is_set(inactive) then |
||
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date |
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date |
||
local inactive_month, good; |
|||
text = "[[" .. handler.link .. "|" .. handler.label .. "]]:" .. id; |
|||
if is_set(inactive_year) then |
|||
if is_set (inactive_year) then |
|||
table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year ); |
|||
if 4 < inactive:len() then -- inactive date has more than just a year (could be anything) |
|||
local lang_obj = mw.getContentLanguage(); -- get a language object for this wiki |
|||
good, inactive_month = pcall (lang_obj.formatDate, lang_obj, 'F', inactive); -- try to get the month name from the inactive date |
|||
if not good then |
|||
inactive_month = nil; -- something went wrong so make sure this is unset |
|||
end |
|||
end |
|||
else |
else |
||
inactive_year = nil; -- |doi-broken= has something but it isn't a date |
|||
table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year |
|||
end |
end |
||
inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")" |
|||
if is_set(inactive_year) and is_set (inactive_month) then |
|||
else |
|||
table.insert( z.error_categories, 'Pages with DOIs inactive as of ' .. inactive_year .. ' ' .. inactive_month); -- use inactive month in category name |
|||
text = external_link_id({link = handler.link, label = handler.label, |
|||
elseif is_set(inactive_year) then |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) |
|||
table.insert( z.error_categories, 'Pages with DOIs inactive as of ' .. inactive_year); |
|||
inactive = "" |
|||
else |
|||
table.insert( z.error_categories, 'Pages with inactive DOIs'); -- when inactive doesn't contain a recognizable date |
|||
end |
|||
inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')'; |
|||
end |
end |
||
text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '') |
|||
cat = ' ' .. set_error( 'bad_doi' ); |
|||
local registrant = id:match ('^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when doi has the proper basic form |
|||
registrant_err_patterns = { -- these patterns are for code ranges that are not supported |
|||
'^[^1-3]%d%d%d%d%.%d%d*$', -- 5 digits with subcode (0xxxx, 40000+); accecpts: 10000–39999 |
|||
'^[^1-4]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 40000+); accecpts: 10000–49999 |
|||
'^[^1-9]%d%d%d%.%d%d*$', -- 4 digits with subcode (0xxx); accecpts: 1000–9999 |
|||
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accecpts: 1000–9999 |
|||
'^%d%d%d%d%d%d+', -- 6 or more digits |
|||
'^%d%d?%d?$', -- less than 4 digits without subcode (with subcode is legitimate) |
|||
'^5555$', -- test registrant will never resolve |
|||
'%s', -- any space character in registrant |
|||
} |
|||
if registrant then -- when doi has proper form |
|||
for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns |
|||
if registrant:match (pattern) then -- to validate registrant codes |
|||
cat = ' ' .. set_error ('bad_doi'); -- when found, mark this doi as bad |
|||
break; -- and done |
|||
end |
|||
end |
|||
else |
|||
cat = ' ' .. set_error ('bad_doi'); -- invalid directory or malformed |
|||
end |
end |
||
return text .. inactive .. cat |
|||
return text .. cat |
|||
end |
end |
||
Line 610: | Line 550: | ||
terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely |
terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely |
||
if ever used in HDLs. |
if ever used in HDLs. |
||
Query string parameters are named here: http://www.handle.net/proxy_servlet.html. query strings are not displayed |
|||
but since '?' is anallowed character in an hdl, '?' followed by one of the query parameters is the only way we |
|||
have to detect the query string so that it isn't url encoded with the rest of the identifier. |
|||
]] |
]] |
||
Line 615: | Line 559: | ||
local function hdl(id, access) |
local function hdl(id, access) |
||
local handler = cfg.id_handlers['HDL']; |
local handler = cfg.id_handlers['HDL']; |
||
local query_params = { -- list of known query parameters from http://www.handle.net/proxy_servlet.html |
|||
'noredirect', |
|||
'ignore_aliases', |
|||
'auth', |
|||
'cert', |
|||
'index', |
|||
'type', |
|||
'urlappend', |
|||
'locatt', |
|||
'action', |
|||
} |
|||
local hdl, suffix, param = id:match ('(.-)(%?(%a+).+)$'); -- look for query string |
|||
local text = external_link_id({link = handler.link, label = handler.label, |
|||
local found; |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) |
|||
if hdl then -- when there are query strings, this is the handle identifier portion |
|||
for _, q in ipairs (query_params) do -- spin through the list of query parameters |
|||
if param:match ('^' .. q) then -- if the query string begins with one of the parameters |
|||
found = true; -- announce a find |
|||
break; -- and stop looking |
|||
end |
|||
end |
|||
end |
|||
if found then |
|||
id = hdl; -- found so replace id with the handle portion; this will be url encoded, suffix will not |
|||
else |
|||
suffix = ''; -- make sure suffix is empty string for concatenation else |
|||
end |
|||
local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
prefix=handler.prefix, id=id, suffix=suffix, separator=handler.separator, encode=handler.encode, access=access}) |
|||
if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma |
if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma |
||
Line 626: | Line 599: | ||
--[[--------------------------< |
--[[--------------------------< I S B N >---------------------------------------------------------------------- |
||
Determines whether an ISBN string is valid |
|||
Formats an OpenLibrary link, and checks for associated errors. |
|||
]] |
]] |
||
local function |
local function isbn( isbn_str ) |
||
if nil ~= isbn_str:match("[^%s-0-9X]") then |
|||
local code = id:match("^%d+([AMW])$"); -- only digits followed by 'A', 'M', or 'W' |
|||
return false, cfg.err_msg_supl.char; -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X |
|||
local handler = cfg.id_handlers['OL']; |
|||
end |
|||
isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces |
|||
local len = isbn_str:len(); |
|||
if len ~= 10 and len ~= 13 then |
|||
return false, cfg.err_msg_supl.length; -- fail if incorrect length |
|||
end |
|||
if |
if len == 10 then |
||
if isbn_str:match( "^%d*X?$" ) == nil then -- fail if isbn_str has 'X' anywhere but last position |
|||
return external_link_id({link=handler.link, label=handler.label, |
|||
return false, cfg.err_msg_supl.form; |
|||
prefix=handler.prefix .. 'authors/OL', |
|||
end |
|||
id=id, separator=handler.separator, encode = handler.encode, |
|||
return is_valid_isxn(isbn_str, 10), cfg.err_msg_supl.check; |
|||
access = access}) |
|||
elseif ( code == "M" ) then |
|||
return external_link_id({link=handler.link, label=handler.label, |
|||
prefix=handler.prefix .. 'books/OL', |
|||
id=id, separator=handler.separator, encode = handler.encode, |
|||
access = access}) |
|||
elseif ( code == "W" ) then |
|||
return external_link_id({link=handler.link, label=handler.label, |
|||
prefix=handler.prefix .. 'works/OL', |
|||
id=id, separator=handler.separator, encode = handler.encode, |
|||
access = access}) |
|||
else |
else |
||
if isbn_str:match( "^%d+$" ) == nil then |
|||
return external_link_id({link=handler.link, label=handler.label, |
|||
return false, cfg.err_msg_supl.char; -- fail if isbn13 is not all digits |
|||
prefix=handler.prefix .. 'OL', |
|||
end |
|||
id=id, separator=handler.separator, encode = handler.encode, |
|||
if isbn_str:match( "^97[89]%d*$" ) == nil then |
|||
access = access}) .. ' ' .. set_error( 'bad_ol' ); |
|||
return false, cfg.err_msg_supl.prefix; -- fail when isbn13 does not begin with 978 or 979 |
|||
end |
|||
if isbn_str:match ('^9790') then |
|||
return false, cfg.err_msg_supl.group; -- group identifier '0' is reserved to ismn |
|||
end |
|||
return is_valid_isxn_13 (isbn_str), cfg.err_msg_supl.check; |
|||
end |
end |
||
end |
end |
||
--[[--------------------------< M |
--[[--------------------------< A M A Z O N >------------------------------------------------------------------ |
||
Formats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alpha |
|||
characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit |
|||
'<' and/or '>' angle brackets. |
|||
isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=. |
|||
Error message if not 10 characters, if not isbn10, if mixed and first character is a digit. |
|||
This function is positioned here because it calls isbn() |
|||
]] |
]] |
||
local function |
local function asin(id, domain) |
||
local |
local err_cat = "" |
||
if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then |
|||
local text = external_link_id({link = handler.link, label = handler.label, |
|||
err_cat = ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) |
|||
else |
|||
if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) |
|||
if isbn( id ) then -- see if asin value is isbn10 |
|||
add_maint_cat ('ASIN'); |
|||
elseif not is_set (err_cat) then |
|||
err_cat = ' ' .. set_error ('bad_asin'); -- asin is not isbn10 |
|||
end |
|||
elseif not id:match("^%u[%d%u]+$") then |
|||
err_cat = ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha |
|||
end |
|||
end |
|||
if not is_set(domain) then |
|||
domain = "com"; |
|||
elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom |
|||
domain = "co." .. domain; |
|||
elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico |
|||
domain = "com." .. domain; |
|||
end |
|||
local handler = cfg.id_handlers['ASIN']; |
|||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
prefix=handler.prefix .. domain .. "/dp/", |
|||
id=id, encode=handler.encode, separator = handler.separator}) .. err_cat; |
|||
end |
|||
--[[--------------------------< I S M N >---------------------------------------------------------------------- |
|||
Determines whether an ISMN string is valid. Similar to isbn-13, ismn is 13 digits begining 979-0-... and uses the |
|||
same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf |
|||
section 2, pages 9–12. |
|||
]] |
|||
local function ismn (id) |
|||
local handler = cfg.id_handlers['ISMN']; |
|||
local text; |
|||
local valid_ismn = true; |
|||
local id_copy; |
|||
id_copy = id; -- save a copy because this testing is destructive |
|||
id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn |
|||
if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ismn must be 13 digits and begin 9790 |
|||
valid_ismn = false; |
|||
else |
|||
valid_ismn=is_valid_isxn_13 (id); -- validate ismn |
|||
end |
|||
-- text = internal_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, -- use this (or external version) when there is some place to link to |
|||
-- prefix=handler.prefix, id=id_copy, separator=handler.separator, encode=handler.encode}) |
|||
local label_link = (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or wd_article or handler.link; -- because no place to link to yet |
|||
if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>' |
|||
text = text .. ' ' .. set_error( 'bad_message_id' ) -- add an error message if the message id is invalid |
|||
text = table.concat ( -- because no place to link to yet |
|||
{ |
|||
make_wikilink (label_link, handler.label), |
|||
handler.separator, |
|||
id_copy |
|||
}); |
|||
if false == valid_ismn then |
|||
text = text .. ' ' .. set_error( 'bad_ismn' ) -- add an error message if the ismn is invalid |
|||
end |
|||
return text; |
|||
end |
|||
--[[--------------------------< I S S N >---------------------------------------------------------------------- |
|||
Validate and format an issn. This code fixes the case where an editor has included an ISSN in the citation but |
|||
has separated the two groups of four digits with a space. When that condition occurred, the resulting link looked |
|||
like this: |
|||
|issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link |
|||
This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length |
|||
and makes sure that the checkdigit agrees with the calculated value. Incorrect length (8 digits), characters |
|||
other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn error message. The |
|||
issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits. |
|||
]] |
|||
local function issn(id, e) |
|||
local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate |
|||
local handler; |
|||
local text; |
|||
local valid_issn = true; |
|||
if e then |
|||
handler = cfg.id_handlers['EISSN']; |
|||
else |
|||
handler = cfg.id_handlers['ISSN']; |
|||
end |
|||
id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn |
|||
if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position |
|||
valid_issn=false; -- wrong length or improper character |
|||
else |
|||
valid_issn=is_valid_isxn(id, 8); -- validate issn |
|||
end |
|||
if true == valid_issn then |
|||
id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version |
|||
else |
|||
id = issn_copy; -- if not valid, use the show the invalid issn with error message |
|||
end |
|||
text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) |
|||
if false == valid_issn then |
|||
text = text .. ' ' .. set_error( 'bad_issn', e and 'e' or '' ) -- add an error message if the issn is invalid |
|||
end |
end |
||
return text |
return text |
||
end |
|||
--[[--------------------------< J F M >----------------------------------------------------------------------- |
|||
A numerical identifier in the form nn.nnnn.nn |
|||
]] |
|||
local function jfm (id) |
|||
local handler = cfg.id_handlers['JFM']; |
|||
local id_num; |
|||
local err_cat = ''; |
|||
id_num = id:match ('^[Jj][Ff][Mm](.*)$'); -- identifier with jfm prefix; extract identifier |
|||
if is_set (id_num) then |
|||
add_maint_cat ('jfm_format'); |
|||
else -- plain number without mr prefix |
|||
id_num = id; -- if here id does not have prefix |
|||
end |
|||
if id_num and id_num:match('^%d%d%.%d%d%d%d%.%d%d$') then |
|||
id = id_num; -- jfm matches pattern |
|||
else |
|||
err_cat = ' ' .. set_error( 'bad_jfm' ); -- set an error message |
|||
end |
|||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; |
|||
end |
|||
--[[--------------------------< L C C N >---------------------------------------------------------------------- |
|||
Format LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of |
|||
the LCCN dictates the character type of the first 1-3 characters; the rightmost eight are always digits. |
|||
http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/ |
|||
length = 8 then all digits |
|||
length = 9 then lccn[1] is lower case alpha |
|||
length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits |
|||
length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits |
|||
length = 12 then lccn[1] and lccn[2] are both lower case alpha |
|||
]] |
|||
local function lccn(lccn) |
|||
local handler = cfg.id_handlers['LCCN']; |
|||
local err_cat = ''; -- presume that LCCN is valid |
|||
local id = lccn; -- local copy of the lccn |
|||
id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes) |
|||
local len = id:len(); -- get the length of the lccn |
|||
if 8 == len then |
|||
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) |
|||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message |
|||
end |
|||
elseif 9 == len then -- LCCN should be adddddddd |
|||
if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern? |
|||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message |
|||
end |
|||
elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd |
|||
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ... |
|||
if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern |
|||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message |
|||
end |
|||
end |
|||
elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd |
|||
if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns |
|||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message |
|||
end |
|||
elseif 12 == len then -- LCCN should be aadddddddddd |
|||
if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern |
|||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message |
|||
end |
|||
else |
|||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message |
|||
end |
|||
if not is_set (err_cat) and nil ~= lccn:find ('%s') then |
|||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message |
|||
end |
|||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat; |
|||
end |
|||
--[[--------------------------< M R >-------------------------------------------------------------------------- |
|||
A seven digit number; if not seven digits, zero-fill leading digits to make seven digits. |
|||
]] |
|||
local function mr (id) |
|||
local handler = cfg.id_handlers['MR']; |
|||
local id_num; |
|||
local id_len; |
|||
local err_cat = ''; |
|||
id_num = id:match ('^[Mm][Rr](%d+)$'); -- identifier with mr prefix |
|||
if is_set (id_num) then |
|||
add_maint_cat ('mr_format'); |
|||
else -- plain number without mr prefix |
|||
id_num = id:match ('^%d+$'); -- if here id is all digits |
|||
end |
|||
id_len = id_num and id_num:len() or 0; |
|||
if (7 >= id_len) and (0 ~= id_len) then |
|||
id = string.rep ('0', 7-id_len ) .. id_num; -- zero-fill leading digits |
|||
else |
|||
err_cat = ' ' .. set_error( 'bad_mr' ); -- set an error message |
|||
end |
|||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; |
|||
end |
end |
||
Line 683: | Line 893: | ||
--[[--------------------------< O C L C >---------------------------------------------------------------------- |
--[[--------------------------< O C L C >---------------------------------------------------------------------- |
||
Validate and format an oclc id. https://www.oclc.org/batchload/controlnumber.en.html |
Validate and format an oclc id. https://www.oclc.org/batchload/controlnumber.en.html {{dead link}} |
||
archived at: https://web.archive.org/web/20161228233804/https://www.oclc.org/batchload/controlnumber.en.html |
|||
]] |
]] |
||
Line 716: | Line 927: | ||
end |
end |
||
local text = external_link_id({link=handler.link, label=handler.label, |
local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg; |
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg; |
||
Line 723: | Line 934: | ||
--[[--------------------------< |
--[[--------------------------< O P E N L I B R A R Y >-------------------------------------------------------- |
||
Formats an OpenLibrary link, and checks for associated errors. |
|||
Validates (sort of) and formats a bibcode id. |
|||
]] |
|||
Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes |
|||
local function openlibrary(id, access) |
|||
But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters |
|||
local code; |
|||
and first four digits must be a year. This function makes these tests: |
|||
local handler = cfg.id_handlers['OL']; |
|||
length must be 19 characters |
|||
local ident; |
|||
characters in position |
|||
1–4 must be digits and must represent a year in the range of 1000 – next year |
|||
ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W'; remove OL prefix |
|||
5 must be a letter |
|||
6 must be letter, ampersand, or dot (ampersand cannot directly precede a dot; &. ) |
|||
7–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. ) |
|||
9–18 must be letter, digit, or dot |
|||
19 must be a letter or dot |
|||
if not is_set (ident) then -- if malformed return an error |
|||
]] |
|||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
prefix=handler.prefix .. 'OL', |
|||
id=id, separator=handler.separator, encode = handler.encode, |
|||
access = access}) .. ' ' .. set_error( 'bad_ol' ); |
|||
end |
|||
id = ident; -- use ident without the optional OL prefix (it has been removed) |
|||
if ( code == "A" ) then |
|||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
prefix=handler.prefix .. 'authors/OL', |
|||
id=id, separator=handler.separator, encode = handler.encode, |
|||
access = access}) |
|||
end |
|||
if ( code == "M" ) then |
|||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
prefix=handler.prefix .. 'books/OL', |
|||
id=id, separator=handler.separator, encode = handler.encode, |
|||
access = access}) |
|||
end |
|||
if ( code == "W" ) then |
|||
local function bibcode (id, access) |
|||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
local handler = cfg.id_handlers['BIBCODE']; |
|||
prefix=handler.prefix .. 'works/OL', |
|||
local err_type; |
|||
id=id, separator=handler.separator, encode = handler.encode, |
|||
local year; |
|||
access = access}) |
|||
end |
|||
end |
|||
local text = external_link_id({link=handler.link, label=handler.label, |
|||
--[[--------------------------< P M C >------------------------------------------------------------------------ |
|||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, |
|||
access=access}); |
|||
Format a PMC, do simple error checking, and check for embargoed articles. |
|||
The embargo parameter takes a date for a value. If the embargo date is in the future the PMC identifier will not |
|||
be linked to the article. If the embargo date is today or in the past, or if it is empty or omitted, then the |
|||
PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix. |
|||
PMC embargo date testing is done in function is_embargoed () which is called earlier because when the citation |
|||
has |pmc=<value> but does not have a |url= then |title= is linked with the PMC link. Function is_embargoed () |
|||
returns the embargo date if the PMC article is still embargoed, otherwise it returns an empty string. |
|||
PMCs are sequential numbers beginning at 1 and counting up. This code checks the PMC to see that it contains only digits and is less |
|||
than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued. |
|||
]] |
|||
local function pmc(id, embargo) |
|||
local handler = cfg.id_handlers['PMC']; |
|||
local err_cat = ''; -- presume that PMC is valid |
|||
local id_num; |
|||
local text; |
|||
id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with pmc prefix |
|||
if 19 ~= id:len() then |
|||
err_type = 'length'; |
|||
if is_set (id_num) then |
|||
else |
|||
add_maint_cat ('pmc_format'); |
|||
year = id:match ("^(%d%d%d%d)[%a][%a&%.][%a&%.%d][%a&%.%d][%a%d%.]+[%a%.]$") -- |
|||
else -- plain number without pmc prefix |
|||
id_num = id:match ('^%d+$'); -- if here id is all digits |
|||
end |
|||
if is_set (id_num) then -- id_num has a value so test it |
|||
id_num = tonumber(id_num); -- convert id_num to a number for range testing |
|||
if 1 > id_num or handler.id_limit < id_num then -- if PMC is outside test limit boundaries |
|||
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message |
|||
else |
else |
||
id = tostring (id_num); -- make sure id is a string |
|||
local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year |
|||
year = tonumber (year); -- convert year portion of bibcode to a number |
|||
if (1000 > year) or (year > next_year) then |
|||
err_type = 'year'; -- year out of bounds |
|||
end |
|||
if id:find('&%.') then |
|||
err_type = 'journal'; -- journal abbreviation must not have '&.' (if it does its missing a letter) |
|||
end |
|||
end |
end |
||
else -- when id format incorrect |
|||
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message |
|||
end |
end |
||
if is_set ( |
if is_set (embargo) then -- is PMC is still embargoed? |
||
text = table.concat ( -- still embargoed so no external link |
|||
text = text .. ' ' .. set_error( 'bad_bibcode', {err_type}); |
|||
{ |
|||
make_wikilink (handler.link, handler.label), |
|||
handler.separator, |
|||
id, |
|||
err_cat |
|||
}); |
|||
else |
|||
text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, -- no embargo date or embargo has expired, ok to link to article |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; |
|||
end |
end |
||
return text; |
return text; |
||
Line 776: | Line 1,037: | ||
--[[--------------------------< |
--[[--------------------------< P M I D >---------------------------------------------------------------------- |
||
Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This |
|||
CiteSeerX use their own notion of "doi" (not to be confused with the identifiers resolved via doi.org). |
|||
code checks the PMID to see that it contains only digits and is less than test_limit; the value in local variable |
|||
test_limit will need to be updated periodically as more PMIDs are issued. |
|||
The description of the structure of this identifier can be found at Help_talk:Citation_Style_1/Archive_26#CiteSeerX_id_structure |
|||
]] |
]] |
||
local function |
local function pmid(id) |
||
local handler = cfg.id_handlers[' |
local handler = cfg.id_handlers['PMID']; |
||
local err_cat = ''; -- presume that PMID is valid |
|||
local matched; |
|||
if id:match("[^%d]") then -- if PMID has anything but digits |
|||
local text = external_link_id({link=handler.link, label=handler.label, |
|||
err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message |
|||
prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, |
|||
else -- PMID is only digits |
|||
access=handler.access}); |
|||
local id_num = tonumber(id); -- convert id to a number for range testing |
|||
if 1 > id_num or handler.id_limit < id_num then -- if PMID is outside test limit boundaries |
|||
err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message |
|||
end |
|||
end |
|||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$"); |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; |
|||
if not matched then |
|||
end |
|||
text = text .. ' ' .. set_error( 'bad_citeseerx' ); |
|||
--[[--------------------------< S 2 C I D >-------------------------------------------------------------------- |
|||
Format an s2cid, do simple error checking |
|||
S2CIDs are sequential numbers beginning at 1 and counting up. This code checks the s2cid to see that it is only |
|||
digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically |
|||
as more S2CIDs are issued. |
|||
]] |
|||
local function s2cid (id, access) |
|||
local handler = cfg.id_handlers['S2CID']; |
|||
local err_cat = ''; -- presume that S2CID is valid |
|||
local id_num; |
|||
local text; |
|||
id_num = id:match ('^[1-9]%d*$'); -- id must be all digits; must not begin with 0; no open access flag |
|||
if is_set (id_num) then -- id_num has a value so test it |
|||
id_num = tonumber(id_num); -- convert id_num to a number for range testing |
|||
if handler.id_limit < id_num then -- if S2CID is outside test limit boundaries |
|||
err_cat = ' ' .. set_error( 'bad_s2cid' ); -- set an error message |
|||
end |
|||
else -- when id format incorrect |
|||
err_cat = ' ' .. set_error( 'bad_s2cid' ); -- set an error message |
|||
end |
end |
||
text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
prefix=handler.prefix, id=id:gsub ('%.%a%a', ''), separator=handler.separator, encode=handler.encode, access=access}) .. err_cat; |
|||
return text; |
return text; |
||
end |
|||
--[[--------------------------< S B N >------------------------------------------------------------------------ |
|||
9-digit form of isbn10; uses same check-digit validation when sbn is prefixed with an additional '0' to make 10 digits |
|||
]] |
|||
local function sbn (id) |
|||
local check; |
|||
local err_type = ''; |
|||
if nil ~= id:match("[^%s-0-9X]") then |
|||
return false, cfg.err_msg_supl.char; -- fail if sbn contains anything but digits, hyphens, or the uppercase X |
|||
end |
|||
id=id:gsub( "[%s-]", "" ); -- strip spaces and hyphens from the sbn |
|||
if 9 ~= id:len() then |
|||
return false, cfg.err_msg_supl.length; -- fail if incorrect length |
|||
end |
|||
if id:match( "^%d*X?$" ) == nil then -- fail if sbn has 'X' anywhere but last position |
|||
return false, cfg.err_msg_supl.form; |
|||
end |
|||
return is_valid_isxn('0' .. id, 10), cfg.err_msg_supl.check; -- prefix sbn with '0' and validate as isbn10 |
|||
end |
end |
||
Line 810: | Line 1,137: | ||
local function ssrn (id) |
local function ssrn (id) |
||
local test_limit = 3500000; -- update this value as SSRNs approach |
|||
local handler = cfg.id_handlers['SSRN']; |
local handler = cfg.id_handlers['SSRN']; |
||
local err_cat = |
local err_cat = ''; -- presume that SSRN is valid |
||
local id_num; |
local id_num; |
||
local text; |
local text; |
||
Line 820: | Line 1,146: | ||
if is_set (id_num) then -- id_num has a value so test it |
if is_set (id_num) then -- id_num has a value so test it |
||
id_num = tonumber(id_num); -- convert id_num to a number for range testing |
id_num = tonumber(id_num); -- convert id_num to a number for range testing |
||
if 100 > id_num or |
if 100 > id_num or handler.id_limit < id_num then -- if SSRN is outside test limit boundaries |
||
err_cat = ' ' .. set_error( 'bad_ssrn' ); -- set an error message |
err_cat = ' ' .. set_error( 'bad_ssrn' ); -- set an error message |
||
end |
end |
||
Line 827: | Line 1,153: | ||
end |
end |
||
text = external_link_id({link |
text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; |
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; |
||
Line 833: | Line 1,159: | ||
end |
end |
||
--[[--------------------------< U S E N E T _ I D >------------------------------------------------------------ |
|||
Validate and format a usenet message id. Simple error checking, looks for 'id-left@id-right' not enclosed in |
|||
'<' and/or '>' angle brackets. |
|||
]] |
|||
local function usenet_id (id) |
|||
local handler = cfg.id_handlers['USENETID']; |
|||
local text = external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) |
|||
if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>' |
|||
text = text .. ' ' .. set_error( 'bad_usenet_id' ) -- add an error message if the message id is invalid |
|||
end |
|||
return text |
|||
end |
|||
--[[--------------------------< Z B L >----------------------------------------------------------------------- |
|||
A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional |
|||
format described here: http://emis.mi.sanu.ac.rs/ZMATH/zmath/en/help/search/ |
|||
temporary format is apparently eight digits. Anything else is an error |
|||
]] |
|||
local function zbl (id) |
|||
local handler = cfg.id_handlers['ZBL']; |
|||
local err_cat = ''; |
|||
if id:match('^%d%d%d%d%d%d%d%d$') then -- is this identifier using temporary format? |
|||
add_maint_cat ('zbl'); -- yes, add maint cat |
|||
elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format? |
|||
err_cat = ' ' .. set_error( 'bad_zbl' ); -- no, set an error message |
|||
end |
|||
return external_link_id ({link=handler.link, label=handler.label, q=handler.q, redirect=handler.redirect, |
|||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; |
|||
end |
|||
--============================<< I N T E R F A C E F U N C T I O N S >>========================================== |
|||
--[[--------------------------< B U I L D _ I D _ L I S T >-------------------------------------------------------- |
--[[--------------------------< B U I L D _ I D _ L I S T >-------------------------------------------------------- |
||
Line 850: | Line 1,224: | ||
for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table |
for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table |
||
-- fallback to read-only cfg |
-- fallback to read-only cfg |
||
handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) ); |
handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) ); |
||
Line 860: | Line 1,233: | ||
elseif handler.mode ~= 'manual' then |
elseif handler.mode ~= 'manual' then |
||
error( cfg.messages['unknown_ID_mode'] ); |
error( cfg.messages['unknown_ID_mode'] ); |
||
elseif k == 'ARXIV' then |
|||
table.insert( new_list, {handler.label, arxiv( v, options.Class ) } ); |
|||
elseif k == 'ASIN' then |
|||
table.insert( new_list, {handler.label, asin( v, options.ASINTLD ) } ); |
|||
elseif k == 'BIBCODE' then |
elseif k == 'BIBCODE' then |
||
table.insert( new_list, {handler.label, bibcode( v, handler.access ) } ); |
table.insert( new_list, {handler.label, bibcode( v, handler.access ) } ); |
||
Line 868: | Line 1,245: | ||
elseif k == 'DOI' then |
elseif k == 'DOI' then |
||
table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access ) } ); |
table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access ) } ); |
||
elseif k == 'HDL' then |
|||
table.insert( new_list, {handler.label, hdl( v, handler.access ) } ); |
|||
elseif k == 'ARXIV' then |
|||
table.insert( new_list, {handler.label, arxiv( v, options.Class ) } ); |
|||
elseif k == 'ASIN' then |
|||
table.insert( new_list, {handler.label, amazon( v, options.ASINTLD ) } ); |
|||
elseif k == 'LCCN' then |
|||
table.insert( new_list, {handler.label, lccn( v ) } ); |
|||
elseif k == 'OL' or k == 'OLA' then |
|||
table.insert( new_list, {handler.label, openlibrary( v, handler.access ) } ); |
|||
elseif k == 'PMC' then |
|||
table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } ); |
|||
elseif k == 'PMID' then |
|||
table.insert( new_list, {handler.label, pmid( v ) } ); |
|||
elseif k == 'OCLC' then |
|||
table.insert( new_list, {handler.label, oclc( v ) } ); |
|||
elseif k == 'SSRN' then |
|||
table.insert( new_list, {handler.label, ssrn( v ) } ); |
|||
elseif k == 'ISMN' then |
|||
table.insert( new_list, {handler.label, ismn( v ) } ); |
|||
elseif k == 'ISSN' then |
|||
table.insert( new_list, {handler.label, issn( v ) } ); |
|||
elseif k == 'EISSN' then |
elseif k == 'EISSN' then |
||
table.insert( new_list, {handler.label, issn( v, true ) } ); -- true distinguishes eissn from issn |
table.insert( new_list, {handler.label, issn( v, true ) } ); -- true distinguishes eissn from issn |
||
elseif k == 'HDL' then |
|||
table.insert( new_list, {handler.label, hdl( v, handler.access ) } ); |
|||
elseif k == 'ISBN' then |
elseif k == 'ISBN' then |
||
local ISBN = internal_link_id( handler ); |
local ISBN = internal_link_id( handler ); |
||
local check; |
local check; |
||
local err_type = ''; |
local err_type = ''; |
||
check, err_type = |
check, err_type = isbn( v ); |
||
if not check then |
if not check then |
||
if is_set(options.IgnoreISBN) then -- ISBN is invalid; if |ignore-isbn-error= set |
if is_set(options.IgnoreISBN) then -- ISBN is invalid; if |ignore-isbn-error= set |
||
Line 904: | Line 1,261: | ||
end |
end |
||
elseif is_set(options.IgnoreISBN) then -- ISBN is OK; if |ignore-isbn-error= set |
elseif is_set(options.IgnoreISBN) then -- ISBN is OK; if |ignore-isbn-error= set |
||
add_maint_cat ('ignore_isbn_err'); -- because |ignore-isbn-error= unnecessary |
|||
end |
end |
||
table.insert( new_list, {handler.label, ISBN } ); |
table.insert( new_list, {handler.label, ISBN } ); |
||
elseif k == 'ISMN' then |
|||
table.insert( new_list, {handler.label, ismn( v ) } ); |
|||
elseif k == 'ISSN' then |
|||
table.insert( new_list, {handler.label, issn( v ) } ); |
|||
elseif k == 'JFM' then |
|||
table.insert( new_list, {handler.label, jfm( v ) } ); |
|||
elseif k == 'LCCN' then |
|||
table.insert( new_list, {handler.label, lccn( v ) } ); |
|||
elseif k == 'MR' then |
|||
table.insert( new_list, {handler.label, mr( v ) } ); |
|||
elseif k == 'OCLC' then |
|||
table.insert( new_list, {handler.label, oclc( v ) } ); |
|||
elseif k == 'OL' or k == 'OLA' then |
|||
table.insert( new_list, {handler.label, openlibrary( v, handler.access ) } ); |
|||
elseif k == 'PMC' then |
|||
table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } ); |
|||
elseif k == 'PMID' then |
|||
table.insert( new_list, {handler.label, pmid( v ) } ); |
|||
elseif k == 'S2CID' then |
|||
table.insert( new_list, {handler.label, s2cid( v, handler.access ) } ); |
|||
elseif k == 'SBN' then |
|||
local SBN = internal_link_id (handler); |
|||
local check; -- boolean validation result |
|||
local err_type = ''; |
|||
check, err_type = sbn (v); |
|||
if not check then |
|||
SBN = SBN .. set_error( 'bad_sbn', {err_type}, false, " ", "" ); -- display an error message |
|||
end |
|||
table.insert( new_list, {handler.label, SBN } ); |
|||
elseif k == 'SSRN' then |
|||
table.insert( new_list, {handler.label, ssrn( v ) } ); |
|||
elseif k == 'USENETID' then |
elseif k == 'USENETID' then |
||
table.insert( new_list, {handler.label, |
table.insert( new_list, {handler.label, usenet_id( v ) } ); |
||
elseif k == 'ZBL' then |
|||
table.insert( new_list, {handler.label, zbl( v ) } ); |
|||
else |
else |
||
error( cfg.messages['unknown_manual_ID'] ); |
error( cfg.messages['unknown_manual_ID'] ); |
||
Line 915: | Line 1,305: | ||
local function comp( a, b ) -- used in following table.sort() |
local function comp( a, b ) -- used in following table.sort() |
||
return a[1] < b[1]; |
return a[1]:lower() < b[1]:lower(); |
||
end |
end |
||
Line 950: | Line 1,340: | ||
Parameters which have a predefined access level (e.g. arxiv) do not use this |
Parameters which have a predefined access level (e.g. arxiv) do not use this |
||
function as they are directly rendered as free without using an additional parameter. |
function as they are directly rendered as free without using an additional parameter. |
||
access-level values must match the case used in cfg.keywords_lists['id-access'] (lowercase unless there is some special reason for something else) |
|||
]] |
]] |
||
Line 956: | Line 1,348: | ||
local id_accesses_list = {}; |
local id_accesses_list = {}; |
||
for k, v in pairs( cfg.id_handlers ) do |
for k, v in pairs( cfg.id_handlers ) do |
||
local access_param = v.custom_access; |
local access_param = v.custom_access; -- name of identifier's access-level parameter |
||
local k_lower = string.lower(k); |
|||
if is_set(access_param) then |
if is_set(access_param) then |
||
local access_level = args[access_param]; |
local access_level = args[access_param]; -- get the assigned value if there is one |
||
if is_set(access_level) then |
if is_set (access_level) then |
||
if not in_array (access_level |
if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required |
||
table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } ); |
table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } ); |
||
access_level = nil; |
access_level = nil; -- invalid so unset |
||
end |
|||
if not is_set(id_list[k]) then |
|||
table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k_lower}, true ) } ); |
|||
end |
end |
||
if not is_set(id_list[k]) then -- identifer access-level must have a matching identifier |
|||
if is_set(access_level) then |
|||
table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k:lower()}, true ) } ); -- param name is uppercase in cfg.id_handlers (k); lowercase for error message |
|||
access_level = access_level:lower(); |
|||
end |
end |
||
id_accesses_list[k] = access_level; |
id_accesses_list[k] = cfg.keywords_xlate[access_level]; -- get translated keyword |
||
end |
end |
||
end |
end |
||
Line 994: | Line 1,382: | ||
add_maint_cat = utilities_page_ptr.add_maint_cat; |
add_maint_cat = utilities_page_ptr.add_maint_cat; |
||
substitute = utilities_page_ptr.substitute; |
substitute = utilities_page_ptr.substitute; |
||
make_wikilink = utilities_page_ptr.make_wikilink; |
|||
z = utilities_page_ptr.z; -- table of tables in Module:Citation/CS1/Utilities |
z = utilities_page_ptr.z; -- table of tables in Module:Citation/CS1/Utilities |
||
end |
end |
||
--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------ |
|||
]] |
|||
return { |
return { |