Modul:UrlCheck: Unterschied zwischen den Versionen
KKeine Bearbeitungszusammenfassung |
(Wartung) |
||
| Zeile 2: | Zeile 2: | ||
uc.msg = { | uc.msg = { | ||
[0] = 'No error detected', | unknown = 'Unknown error', | ||
[1] = 'Host with non-ASCII characters', | [ 0 ] = 'No error detected', | ||
[2] = 'Host is IP address', | [ 1 ] = 'Host with non-ASCII characters', | ||
[3] = 'URL is empty', | [ 2 ] = 'Host is IP address', | ||
[4] = 'URL has more then 2048 characters', | [ 3 ] = 'URL is empty', | ||
[5] = 'URL contains control marks or spaces', | [ 4 ] = 'URL has more then 2048 characters', | ||
[6] = 'Missing or wrong protocol', | [ 5 ] = 'URL contains control marks or spaces', | ||
[7] = 'Host starts with slash / character', | [ 6 ] = 'Missing or wrong protocol', | ||
[8] = 'Host contains invalid character combinations', | [ 7 ] = 'Host starts with slash / character', | ||
[9] = 'More than one commercial ats @ in host detected', | [ 8 ] = 'Host contains invalid character combinations', | ||
[10] = 'Commercial at @ detected but user or host are missing', | [ 9 ] = 'More than one commercial ats @ in host detected', | ||
[11] = 'More than one colon : in user detected', | [ 10 ] = 'Commercial at @ detected but user or host are missing', | ||
[12] = 'Colon : detected but user or password are missing', | [ 11 ] = 'More than one colon : in user detected', | ||
[13] = 'User has more than 64 characters', | [ 12 ] = 'Colon : detected but user or password are missing', | ||
[14] = 'Host ist empty', | [ 13 ] = 'User has more than 64 characters', | ||
[15] = 'More than one colon : in host detected', | [ 14 ] = 'Host ist empty', | ||
[16] = 'Colon : detected but host or port are missing', | [ 15 ] = 'More than one colon : in host detected', | ||
[17] = 'Port out of range of 0 | [ 16 ] = 'Colon : detected but host or port are missing', | ||
[18] = 'Host has more than 253 characters', | [ 17 ] = 'Port out of range of 0 … 65535', | ||
[19] = 'Host with missing dot as domain separator', | [ 18 ] = 'Host has more than 253 characters', | ||
[20] = 'Host has dot but no top-level domain', | [ 19 ] = 'Host with missing dot as domain separator', | ||
[21] = 'Unknown top-level domain', | [ 20 ] = 'Host has dot but no top-level domain', | ||
[22] = 'Invalid host structure', | [ 21 ] = 'Unknown top-level domain', | ||
[ 22 ] = 'Invalid host structure', | |||
} | } | ||
function uc.ip4( address ) | function uc.ip4( address ) | ||
local parts = { address:match( '(%d+)%.(%d+)%.(%d+)%.(%d+)' ) }, value | |||
local parts = { address:match( | |||
if #parts == 4 then | if #parts == 4 then | ||
for _,value in pairs( parts ) do | for _, value in pairs( parts ) do | ||
if tonumber( value ) < 0 or tonumber( value ) > 255 | if tonumber( value ) < 0 or tonumber( value ) > 255 then | ||
return 0 | |||
end | |||
end | end | ||
return 1 -- ok | return 1 -- ok | ||
end | end | ||
return 0 | |||
end | end | ||
function uc.isUrl( url ) | function uc.isUrl( url ) | ||
-- return codes 0 through 2 reserved | -- return codes 0 through 2 reserved | ||
if | if not url or type( url ) ~= 'string' then | ||
return 3 | |||
end | |||
local s = mw.text.trim( url ) | local s = mw.text.trim( url ), count | ||
if s == '' then return 3 | if s == '' then | ||
return 3 | |||
elseif #s > 2048 then -- limitation because of search engines or IE | |||
return 4 | |||
elseif s:find( '%s' ) or s:find( '%c' ) or s:match( '^%.' ) then | |||
return 5 | |||
end | |||
-- https://max:muster@www.example.com:8080/index.html?p1=A&p2=B#ressource | -- https://max:muster@www.example.com:8080/index.html?p1=A&p2=B#ressource | ||
-- protocol | -- protocol | ||
s, count = s:gsub( '^https?://', '' ) | |||
s,count = s:gsub( '^ | if count == 0 then -- missing or wrong protocol | ||
if count == 0 then | return 6 | ||
-- | |||
end | end | ||
local user = '' | local user = '', at | ||
local password = '' | local password = '' | ||
local host = '' | local host = '' | ||
local port = '' | local port = '' | ||
local | local aPath = '' | ||
local topLevel = '' | local topLevel = '' | ||
-- split path from host | -- split path from host | ||
at = s:find( '/' ) | at = s:find( '/' ) | ||
if at | if at then | ||
aPath = s:sub( at + 1, #s ) | |||
s = s:sub( 1, at - 1 ) | s = s:sub( 1, at - 1 ) | ||
if not s then return 7 end | if not s then | ||
return 7 | |||
end | |||
end | end | ||
| Zeile 83: | Zeile 84: | ||
-- split at '/', last part: t == mw.uri.encode( t ) ? | -- split at '/', last part: t == mw.uri.encode( t ) ? | ||
if s:find( '%.%.' ) or s:find( '%.@' ) or s:find( '@[%.%-]' ) | if s:find( '%.%.' ) or s:find( '%.@' ) or s:find( '@[%.%-]' ) or s:find( '%-%.' ) | ||
or s:find( '%.%-' ) or s:find( '%./' ) or s:find( '/%.' ) then | |||
return 8 | |||
end | |||
-- user and password | -- user and password | ||
_,count = s:gsub( '@', '@' ) | _, count = s:gsub( '@', '@' ) | ||
if count > 1 then return 9 | if count > 1 then | ||
return 9 | |||
elseif count == 1 then | |||
at = s:find( '@' ) | at = s:find( '@' ) | ||
user = s:sub( 1, at - 1 ) | user = s:sub( 1, at - 1 ) | ||
host = s:sub( at + 1, #s ) | host = s:sub( at + 1, #s ) | ||
if not user or not s then return 10 end | if not user or not s then | ||
return 10 | |||
end | |||
_,count = user:gsub( ':', ':' ) | _,count = user:gsub( ':', ':' ) | ||
if count > 1 then return 11 | if count > 1 then | ||
return 11 | |||
elseif count == 1 then | |||
at = user:find( ':' ) | at = user:find( ':' ) | ||
password = user:sub( at + 1, #user ) | password = user:sub( at + 1, #user ) | ||
user = user:sub( 1, at - 1 ) | user = user:sub( 1, at - 1 ) | ||
if not user or not password then return 12 | if not user or not password then | ||
return 12 | |||
elseif #user > 64 then | |||
return 13 | |||
end | |||
end | end | ||
else | else | ||
host = s | host = s | ||
end | end | ||
if host == '' then return 14 end | if host == '' then | ||
return 14 | |||
end | |||
-- host and port | -- host and port | ||
_,count = host:gsub( ':', ':' ) | _, count = host:gsub( ':', ':' ) | ||
if count > 1 then return 15 | if count > 1 then | ||
return 15 | |||
elseif count == 1 then | |||
at = host:find( ':' ) | at = host:find( ':' ) | ||
port = host:sub( at + 1, #host ) | port = host:sub( at + 1, #host ) | ||
host = host:sub( 1, at - 1 ) | host = host:sub( 1, at - 1 ) | ||
if not host or not port then return 16 | if not host or not port then | ||
return 16 | |||
elseif not port:match( '^[1-9]%d*$' ) or tonumber( port ) > 65535 then | |||
return 17 | |||
end | |||
end | end | ||
-- handle host part | -- handle host part | ||
if #host > 253 then return 18 end | if #host > 253 then | ||
return 18 | |||
end | |||
-- getting top-level domain | -- getting top-level domain | ||
at = host:match( '^.*()%.' ) -- find last dot | at = host:match( '^.*()%.' ) -- find last dot | ||
if at | if not at then | ||
return 19 | |||
end | |||
topLevel = host:sub( at + 1, #host ) | topLevel = host:sub( at + 1, #host ) | ||
if not topLevel then return 20 end | if not topLevel then | ||
return 20 | |||
end | |||
-- future: check of top-level domain | -- future: check of top-level domain | ||
if uc.ip4( host ) == 1 then | if uc.ip4( host ) == 1 then -- is ip4 address | ||
return 2 | |||
elseif not mw.ustring.match( host, '^[%w%.%-]+%.%a%a+$' ) then | |||
return 22 | |||
elseif not host:match( '^[%w%.%-]+%.%a%a+$' ) then | |||
return 1 -- matches domain only in UTF 8 mode | return 1 -- matches domain only in UTF 8 mode | ||
end | end | ||
| Zeile 149: | Zeile 171: | ||
local result = uc.isUrl( args.url ) | local result = uc.isUrl( args.url ) | ||
if args.show:lower() == 'msg' then | if args.show:lower() == 'msg' then | ||
if uc.msg[result] | if uc.msg[ result ] then | ||
return | return uc.msg[ result ] | ||
else | else | ||
return uc.msg | return uc.msg.unknown | ||
end | end | ||
end | end | ||
return result | |||
end | end | ||
return uc | return uc | ||
Version vom 16. Juli 2019, 17:30 Uhr
Die Dokumentation für dieses Modul kann unter Modul:UrlCheck/doc erstellt werden
local uc = {}
uc.msg = {
unknown = 'Unknown error',
[ 0 ] = 'No error detected',
[ 1 ] = 'Host with non-ASCII characters',
[ 2 ] = 'Host is IP address',
[ 3 ] = 'URL is empty',
[ 4 ] = 'URL has more then 2048 characters',
[ 5 ] = 'URL contains control marks or spaces',
[ 6 ] = 'Missing or wrong protocol',
[ 7 ] = 'Host starts with slash / character',
[ 8 ] = 'Host contains invalid character combinations',
[ 9 ] = 'More than one commercial ats @ in host detected',
[ 10 ] = 'Commercial at @ detected but user or host are missing',
[ 11 ] = 'More than one colon : in user detected',
[ 12 ] = 'Colon : detected but user or password are missing',
[ 13 ] = 'User has more than 64 characters',
[ 14 ] = 'Host ist empty',
[ 15 ] = 'More than one colon : in host detected',
[ 16 ] = 'Colon : detected but host or port are missing',
[ 17 ] = 'Port out of range of 0 … 65535',
[ 18 ] = 'Host has more than 253 characters',
[ 19 ] = 'Host with missing dot as domain separator',
[ 20 ] = 'Host has dot but no top-level domain',
[ 21 ] = 'Unknown top-level domain',
[ 22 ] = 'Invalid host structure',
}
function uc.ip4( address )
local parts = { address:match( '(%d+)%.(%d+)%.(%d+)%.(%d+)' ) }, value
if #parts == 4 then
for _, value in pairs( parts ) do
if tonumber( value ) < 0 or tonumber( value ) > 255 then
return 0
end
end
return 1 -- ok
end
return 0
end
function uc.isUrl( url )
-- return codes 0 through 2 reserved
if not url or type( url ) ~= 'string' then
return 3
end
local s = mw.text.trim( url ), count
if s == '' then
return 3
elseif #s > 2048 then -- limitation because of search engines or IE
return 4
elseif s:find( '%s' ) or s:find( '%c' ) or s:match( '^%.' ) then
return 5
end
-- https://max:muster@www.example.com:8080/index.html?p1=A&p2=B#ressource
-- protocol
s, count = s:gsub( '^https?://', '' )
if count == 0 then -- missing or wrong protocol
return 6
end
local user = '', at
local password = ''
local host = ''
local port = ''
local aPath = ''
local topLevel = ''
-- split path from host
at = s:find( '/' )
if at then
aPath = s:sub( at + 1, #s )
s = s:sub( 1, at - 1 )
if not s then
return 7
end
end
-- future: add path check
-- split at '/', last part: t == mw.uri.encode( t ) ?
if s:find( '%.%.' ) or s:find( '%.@' ) or s:find( '@[%.%-]' ) or s:find( '%-%.' )
or s:find( '%.%-' ) or s:find( '%./' ) or s:find( '/%.' ) then
return 8
end
-- user and password
_, count = s:gsub( '@', '@' )
if count > 1 then
return 9
elseif count == 1 then
at = s:find( '@' )
user = s:sub( 1, at - 1 )
host = s:sub( at + 1, #s )
if not user or not s then
return 10
end
_,count = user:gsub( ':', ':' )
if count > 1 then
return 11
elseif count == 1 then
at = user:find( ':' )
password = user:sub( at + 1, #user )
user = user:sub( 1, at - 1 )
if not user or not password then
return 12
elseif #user > 64 then
return 13
end
end
else
host = s
end
if host == '' then
return 14
end
-- host and port
_, count = host:gsub( ':', ':' )
if count > 1 then
return 15
elseif count == 1 then
at = host:find( ':' )
port = host:sub( at + 1, #host )
host = host:sub( 1, at - 1 )
if not host or not port then
return 16
elseif not port:match( '^[1-9]%d*$' ) or tonumber( port ) > 65535 then
return 17
end
end
-- handle host part
if #host > 253 then
return 18
end
-- getting top-level domain
at = host:match( '^.*()%.' ) -- find last dot
if not at then
return 19
end
topLevel = host:sub( at + 1, #host )
if not topLevel then
return 20
end
-- future: check of top-level domain
if uc.ip4( host ) == 1 then -- is ip4 address
return 2
elseif not mw.ustring.match( host, '^[%w%.%-]+%.%a%a+$' ) then
return 22
elseif not host:match( '^[%w%.%-]+%.%a%a+$' ) then
return 1 -- matches domain only in UTF 8 mode
end
return 0
end
function uc.checkUrl( frame )
local args = frame.args
args.url = args.url or ''
args.show = args.show or ''
local result = uc.isUrl( args.url )
if args.show:lower() == 'msg' then
if uc.msg[ result ] then
return uc.msg[ result ]
else
return uc.msg.unknown
end
end
return result
end
return uc