Skip to content

CLN: Added static types #33126

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 3, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 26 additions & 37 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -279,73 +279,61 @@ cdef class TextReader:

cdef public:
int64_t leading_cols, table_width, skipfooter, buffer_lines
object allow_leading_cols
object delimiter, converters, delim_whitespace
bint allow_leading_cols, mangle_dupe_cols, memory_map, low_memory
bint delim_whitespace
object delimiter, converters
object na_values
object memory_map
object header, orig_header, names, header_start, header_end
object index_col
object low_memory
object skiprows
object dtype
object encoding
object compression
object mangle_dupe_cols
object usecols
list dtype_cast_order
set unnamed_cols
set noconvert

def __cinit__(self, source,
delimiter=b',',

header=0,
header_start=0,
header_end=0,
index_col=None,
names=None,

memory_map=False,
bint memory_map=False,
tokenize_chunksize=DEFAULT_CHUNKSIZE,
delim_whitespace=False,

bint delim_whitespace=False,
compression=None,

converters=None,

skipinitialspace=False,
bint skipinitialspace=False,
escapechar=None,
doublequote=True,
bint doublequote=True,
quotechar=b'"',
quoting=0,
lineterminator=None,

encoding=None,

comment=None,
decimal=b'.',
thousands=None,

dtype=None,
usecols=None,
error_bad_lines=True,
warn_bad_lines=True,

na_filter=True,
bint error_bad_lines=True,
bint warn_bad_lines=True,
bint na_filter=True,
na_values=None,
na_fvalues=None,
keep_default_na=True,

bint keep_default_na=True,
true_values=None,
false_values=None,
allow_leading_cols=True,
low_memory=False,
bint allow_leading_cols=True,
bint low_memory=False,
skiprows=None,
skipfooter=0,
verbose=False,
mangle_dupe_cols=True,
bint verbose=False,
bint mangle_dupe_cols=True,
float_precision=None,
skip_blank_lines=True):
bint skip_blank_lines=True):

# set encoding for native Python and C library
if encoding is not None:
Expand Down Expand Up @@ -591,6 +579,9 @@ cdef class TextReader:
self.parser.quotechar = ord(quote_char)

cdef _make_skiprow_set(self):
cdef:
int64_t i
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For anything that is used to index Python objects you should opt for Py_ssize_t

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, my reason to have this as int64_t is because below we have the line:

            for i in self.skiprows:
                parser_add_skiprow(self.parser, i)

Which calls parser_add_skiprow, parser_add_skiprow is defined here:

int parser_add_skiprow(parser_t *self, int64_t row)

and i is in the place of row, I tried to match the type, to avoid (potential) casting.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that should also be a Py_ssize_t; I don't know what effort it would require to update that but OK to split off on a separate PR too


if isinstance(self.skiprows, (int, np.integer)):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

util.is_integer

parser_set_skipfirstnrows(self.parser, self.skiprows)
elif not callable(self.skiprows):
Expand Down Expand Up @@ -683,15 +674,14 @@ cdef class TextReader:
# header is now a list of lists, so field_count should use header[0]

cdef:
Py_ssize_t i, start, field_count, passed_count, unnamed_count
Py_ssize_t i, start, field_count, passed_count, unnamed_count, level
char *word
object name, old_name
uint64_t hr, data_line = 0
char *errors = "strict"
StringPath path = _string_path(self.c_encoding)

header = []
unnamed_cols = set()
list header = []
set unnamed_cols = set()

if self.parser.header_start >= 0:

Expand Down Expand Up @@ -847,7 +837,7 @@ cdef class TextReader:
cdef _read_low_memory(self, rows):
cdef:
size_t rows_read = 0
chunks = []
list chunks = []

if rows is None:
while True:
Expand Down Expand Up @@ -2038,12 +2028,11 @@ def _concatenate_chunks(list chunks):
cdef:
list names = list(chunks[0].keys())
object name
list warning_columns
list warning_columns = []
object warning_names
object common_type

result = {}
warning_columns = list()
for name in names:
arrs = [chunk.pop(name) for chunk in chunks]
# Check each arr for consistent types.
Expand Down Expand Up @@ -2147,7 +2136,7 @@ def _maybe_encode(values):


def sanitize_objects(ndarray[object] values, set na_values,
convert_empty=True):
bint convert_empty=True):
"""
Convert specified values, including the given set na_values and empty
strings if convert_empty is True, to np.nan.
Expand All @@ -2156,7 +2145,7 @@ def sanitize_objects(ndarray[object] values, set na_values,
----------
values : ndarray[object]
na_values : set
convert_empty : bool (default True)
convert_empty : bool, default True
"""
cdef:
Py_ssize_t i, n
Expand Down