MatchesBcp47🔗
Check that
text
is a valid BCP 47 language tag.
Code
alphanum = '[a-zA-Z0-9]'
singleton = (
'[0-9A-WY-Za-wy-z]'
)
extension = (
f'{singleton}(-({alphanum}){{2,8}})+'
)
extlang = (
'[a-zA-Z]{3}(-[a-zA-Z]{3}){,2}'
)
irregular = (
'(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)'
)
regular = (
'(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)'
)
grandfathered = (
f'({irregular}|{regular})'
)
language = (
f'([a-zA-Z]{{2,3}}(-{extlang})?|[a-zA-Z]{{4}}|[a-zA-Z]{{5,8}})'
)
script = '[a-zA-Z]{4}'
region = (
'([a-zA-Z]{2}|[0-9]{3})'
)
variant = (
f'(({alphanum}){{5,8}}|[0-9]({alphanum}){{3}})'
)
privateuse = (
f'[xX](-({alphanum}){{1,8}})+'
)
langtag = (
f'{language}(-{script})?(-{region})?(-{variant})*(-{extension})*(-{privateuse})?'
)
languageTag = (
f'({langtag}|{privateuse}|{grandfathered})'
)
pattern = (
f'^{languageTag}$'
)
return match(
pattern,
text
) is not None