MatchesRfc2396🔗
Check that
text
matches to the URI pattern defined in RFC 2396
The definition has been taken from: https://datatracker.ietf.org/doc/html/rfc2396
Note that RFX 2396 alone is not enough for specifying xs:anyURI
for
XSD version 1.0, as that specifies URI together with the amendment of
RFC 2732.
Code
alphanum = '[a-zA-Z0-9]'
mark = (
"[\\-_.!~*'()]"
)
unreserved = (
f'({alphanum}|{mark})'
)
hex = (
'([0-9]|[aA]|[bB]|[cC]|[dD]|[eE]|[fF]|[aA]|[bB]|[cC]|[dD]|[eE]|[fF])'
)
escaped = (
f'%{hex}{hex}'
)
pchar = (
f'({unreserved}|{escaped}|[:@&=+$,])'
)
param = (
f'({pchar})*'
)
segment = (
f'({pchar})*(;{param})*'
)
pathSegments = (
f'{segment}(/{segment})*'
)
absPath = (
f'/{pathSegments}'
)
scheme = (
'[a-zA-Z][a-zA-Z0-9+\\-.]*'
)
userinfo = (
f'({unreserved}|{escaped}|[;:&=+$,])*'
)
domainlabel = (
f'({alphanum}|{alphanum}({alphanum}|-)*{alphanum})'
)
toplabel = (
f'([a-zA-Z]|[a-zA-Z]({alphanum}|-)*{alphanum})'
)
hostname = (
f'({domainlabel}\\.)*{toplabel}(\\.)?'
)
ipv4address = (
'[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+'
)
host = (
f'({hostname}|{ipv4address})'
)
port = '[0-9]*'
hostport = (
f'{host}(:{port})?'
)
server = (
f'(({userinfo}@)?{hostport})?'
)
regName = (
f'({unreserved}|{escaped}|[$,;:@&=+])+'
)
authority = (
f'({server}|{regName})'
)
netPath = (
f'//{authority}({absPath})?'
)
reserved = (
'[;/?:@&=+$,]'
)
uric = (
f'({reserved}|{unreserved}|{escaped})'
)
query = (
f'({uric})*'
)
hierPart = (
f'({netPath}|{absPath})(\\?{query})?'
)
uricNoSlash = (
f'({unreserved}|{escaped}|[;?:@&=+$,])'
)
opaquePart = (
f'{uricNoSlash}({uric})*'
)
absoluteuri = (
f'{scheme}:({hierPart}|{opaquePart})'
)
fragment = (
f'({uric})*'
)
relSegment = (
f'({unreserved}|{escaped}|[;@&=+$,])+'
)
relPath = (
f'{relSegment}({absPath})?'
)
relativeuri = (
f'({netPath}|{absPath}|{relPath})(\\?{query})?'
)
uriReference = (
f'^({absoluteuri}|{relativeuri})?(\\#{fragment})?$'
)
return match(
uriReference,
text
) is not None