lector.csv.abc#

Fast and robust parser for potentially messy CSV data.

Classes#

CleanTextBuffer

Remove null bytes on the fly.

Format

Holds all parameters needed to successfully read a CSV file.

Reader

Base class for CSV readers.

Functions#

is_empty(buffer)

Check if a binary or text buffer is empty (from current position onwards).

Attributes#

FileLike

PreambleRegistry

exception lector.csv.abc.EmptyFileError[source]#

Bases: Exception

Raised when a binary file read() returns 0 bytes.

class lector.csv.abc.CleanTextBuffer[source]#

Bases: io.TextIOWrapper

Remove null bytes on the fly.

read(*args)[source]#

Read at most n characters from stream.

Read from underlying buffer until we have n characters or we hit EOF. If n is negative or omitted, read until EOF.

readline(*args)[source]#

Read until newline or EOF.

Returns an empty string if EOF is hit immediately.

class lector.csv.abc.Format[source]#

Holds all parameters needed to successfully read a CSV file.

columns: list[str] | None[source]#
dialect: lector.csv.dialects.Dialect | None[source]#
encoding: str | None = 'utf-8'[source]#
preamble: int | None = 0[source]#
__rich__()[source]#
Return type:

rich.table.Table

class lector.csv.abc.Reader(fp, encoding=None, dialect=None, preamble=None, log=True)[source]#

Bases: abc.ABC

Base class for CSV readers.

Parameters:
__call__[source]#
analyze()[source]#

Infer all parameters required for reading a csv file.

decode(fp)[source]#

Make sure we have a text buffer.

Parameters:

fp (FileLike) –

Return type:

TextIO

classmethod detect_columns(buffer, dialect)[source]#

Extract column names from buffer pointing at header row.

Parameters:
Return type:

list[str]

detect_dialect(buffer)[source]#

Detect separator, quote character etc.

Parameters:

buffer (TextIO) –

Return type:

dict

detect_preamble(buffer)[source]#

Detect the number of junk lines at the start of the file.

Parameters:

buffer (TextIO) –

Return type:

int

abstract parse(*args, **kwds)[source]#

Parse the file pointer or text buffer. Args are forwarded to read().

Return type:

Any

read(*args, **kwds)[source]#
Return type:

Any

lector.csv.abc.is_empty(buffer)[source]#

Check if a binary or text buffer is empty (from current position onwards).

Parameters:

buffer (IO) –

Return type:

bool

lector.csv.abc.FileLike[source]#
lector.csv.abc.PreambleRegistry[source]#