lector.types.regex#

Common regex patterns used in mutiple modules.

lector.types.regex.RE_FRATIONAL_SECONDS: str = '(?P<frac>\\.\\d+)'[source]#

Allows for timezone after fractional seconds, capturing part to be replaced.

lector.types.regex.RE_INT_SIGN = '^(?P<sign>[+-])?(?P<num>[0-9]+)$'[source]#

Capture optional sign and numeric parts in integer strings.

lector.types.regex.RE_IS_FLOAT = '^[-]?[0-9]*[.]?[0-9]*(?:[e][+-]?[0-9]+)?$'[source]#

Strings matching float representations convertable by Arrow. Allows ints too, but those should have been inferred before trying floats.

lector.types.regex.RE_IS_INT = '^\\+?\\-?[0-9]+$'[source]#

Strings matching int representations we’re able to parse.

lector.types.regex.RE_LIST_CLEAN: str = '^[\\[\\{\\(\\|<]|[\\]\\}\\)\\|>]$|\\r?\\n'[source]#

Remove all parenthesis-like characters from start and end as well as line breaks.

lector.types.regex.RE_LIST_LIKE: str = '^[\\(\\[\\|][\\s\\S]*[\\)\\]\\|]$'[source]#

Lists start and end with parenthesis-like characters.

lector.types.regex.RE_TRAILING_DECIMALS: str = '\\.(\\d+)$'[source]#

Strictly trailing, i.e. nothing after the decimals.

lector.types.regex.RE_TZ_OFFSET: str = '(?P<tz>Z|[+-]\\d{2}:?\\d{2}$)'[source]#

Detect 4 digit timezone offsets, with or without colon.

lector.types.regex.RE_URL = '^(http://www\\.|https://www\\.|http://|https://)?(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\\.)+[A-Z...'[source]#