Skip to content

Some code cleanups #31792

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 5 additions & 14 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,6 @@
import pandas.core.missing as missing
from pandas.core.nanops import nanpercentile

from pandas.io.formats.printing import pprint_thing


class Block(PandasObject):
"""
Expand Down Expand Up @@ -159,7 +157,8 @@ def _check_ndim(self, values, ndim):

@property
def _holder(self):
"""The array-like that can hold the underlying values.
"""
The array-like that can hold the underlying values.

None for 'Block', overridden by subclasses that don't
use an ndarray.
Expand Down Expand Up @@ -284,16 +283,11 @@ def __repr__(self) -> str:
# don't want to print out all of the items here
name = type(self).__name__
if self._is_single_block:

result = f"{name}: {len(self)} dtype: {self.dtype}"

else:

shape = " x ".join(pprint_thing(s) for s in self.shape)
result = (
f"{name}: {pprint_thing(self.mgr_locs.indexer)}, "
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFICT the pprint_thing is just a python2 thing.

@jbrockmendel can you please confirm or explain what it is?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes - shouldn’t be needed now py3 only. Worth checking though

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you please confirm or explain what it is?

I think it might matter if you were dealing with nested objects, but for these i think it is just a leftover py2 thing

f"{shape}, dtype: {self.dtype}"
)
shape = " x ".join(str(s) for s in self.shape)
result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}"

return result

Expand All @@ -319,10 +313,7 @@ def getitem_block(self, slicer, new_mgr_locs=None):
As of now, only supports slices that preserve dimensionality.
"""
if new_mgr_locs is None:
if isinstance(slicer, tuple):
axis0_slicer = slicer[0]
else:
axis0_slicer = slicer
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i tend to like this version because i can see in coverage output whether both cases are reached

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel I can see your point, I like the one-liner version because it's making less noise (IMO).

Anyway I am fine with reverting this one and also others.

Can we open a discussion for it (in a separate issue)? so we will put it in Pandas code style guide

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel are you sticking on this one? +/- 0 on this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not a deal breaker

axis0_slicer = slicer[0] if isinstance(slicer, tuple) else slicer
new_mgr_locs = self.mgr_locs[axis0_slicer]

new_values = self._slice(slicer)
Expand Down
9 changes: 4 additions & 5 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,10 +204,9 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
missing_arr.fill(fill_value)
return missing_arr

if not self.indexers:
if not self.block._can_consolidate:
# preserve these for validation in concat_compat
return self.block.values
if (not self.indexers) and (not self.block._can_consolidate):
# preserve these for validation in concat_compat
return self.block.values

if self.block.is_bool and not self.block.is_categorical:
# External code requested filling/upcasting, bool values must
Expand Down Expand Up @@ -372,7 +371,7 @@ def _get_empty_dtype_and_na(join_units):
raise AssertionError(msg)


def is_uniform_join_units(join_units):
def is_uniform_join_units(join_units) -> bool:
"""
Check if the join units consist of blocks of uniform type that can
be concatenated using Block.concat_same_type instead of the generic
Expand Down
7 changes: 2 additions & 5 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ def comp(s, regex=False):
)
return _compare_or_regex_search(values, s, regex)

masks = [comp(s, regex) for i, s in enumerate(src_list)]
masks = [comp(s, regex) for s in src_list]

result_blocks = []
src_len = len(src_list) - 1
Expand Down Expand Up @@ -755,10 +755,7 @@ def copy(self, deep=True):
# hit in e.g. tests.io.json.test_pandas

def copy_func(ax):
if deep == "all":
return ax.copy(deep=True)
else:
return ax.view()
return ax.copy(deep=True) if deep == "all" else ax.view()

new_axes = [copy_func(ax) for ax in self.axes]
else:
Expand Down
7 changes: 3 additions & 4 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1492,11 +1492,10 @@ def extract(r):
# level, then our header was too long.
for n in range(len(columns[0])):
if all(ensure_str(col[n]) in self.unnamed_cols for col in columns):
header = ",".join(str(x) for x in self.header)
raise ParserError(
"Passed header=[{header}] are too many rows for this "
"multi_index of columns".format(
header=",".join(str(x) for x in self.header)
)
f"Passed header=[{header}] are too many rows "
"for this multi_index of columns"
)

# Clean the column names (if we have an index_col).
Expand Down
7 changes: 3 additions & 4 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3085,9 +3085,8 @@ def write(self, obj, **kwargs):

self.attrs.ndim = data.ndim
for i, ax in enumerate(data.axes):
if i == 0:
if not ax.is_unique:
raise ValueError("Columns index has to be unique for fixed format")
if i == 0 and (not ax.is_unique):
raise ValueError("Columns index has to be unique for fixed format")
self.write_index(f"axis{i}", ax)

# Supporting mixed-type DataFrame objects...nontrivial
Expand Down Expand Up @@ -4216,7 +4215,7 @@ def write_data(self, chunksize: Optional[int], dropna: bool = False):
chunksize = 100000

rows = np.empty(min(chunksize, nrows), dtype=self.dtype)
chunks = int(nrows / chunksize) + 1
chunks = nrows // chunksize + 1
for i in range(chunks):
start_i = i * chunksize
end_i = min((i + 1) * chunksize, nrows)
Expand Down