Skip to content

Commit 3ab37e6

Browse files
authored
Merge branch 'master' into bug_issue16770
2 parents df5bfcf + 9e55af2 commit 3ab37e6

33 files changed

+367
-78
lines changed

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
- [ ] closes #xxxx
22
- [ ] tests added / passed
3-
- [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff``
3+
- [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff`` (On Windows, ``git diff upstream/master -u -- "*.py" | flake8 --diff`` might work as an alternative.)
44
- [ ] whatsnew entry

.travis.yml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,61 +35,61 @@ matrix:
3535
language: generic
3636
env:
3737
- JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network"
38-
- os: linux
38+
- dist: trusty
3939
env:
4040
- JOB="2.7_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8"
4141
addons:
4242
apt:
4343
packages:
4444
- language-pack-zh-hans
45-
- os: linux
45+
- dist: trusty
4646
env:
4747
- JOB="2.7" TEST_ARGS="--skip-slow" LINT=true
4848
addons:
4949
apt:
5050
packages:
5151
- python-gtk2
52-
- os: linux
52+
- dist: trusty
5353
env:
5454
- JOB="3.5" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true
5555
addons:
5656
apt:
5757
packages:
5858
- xsel
59-
- os: linux
59+
- dist: trusty
6060
env:
6161
- JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true
6262
# In allow_failures
63-
- os: linux
63+
- dist: trusty
6464
env:
6565
- JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network"
6666
# In allow_failures
67-
- os: linux
67+
- dist: trusty
6868
env:
6969
- JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
7070
# In allow_failures
71-
- os: linux
71+
- dist: trusty
7272
env:
7373
- JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
7474
# In allow_failures
75-
- os: linux
75+
- dist: trusty
7676
env:
7777
- JOB="3.6_DOC" DOC=true
7878
addons:
7979
apt:
8080
packages:
8181
- xsel
8282
allow_failures:
83-
- os: linux
83+
- dist: trusty
8484
env:
8585
- JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network"
86-
- os: linux
86+
- dist: trusty
8787
env:
8888
- JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
89-
- os: linux
89+
- dist: trusty
9090
env:
9191
- JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
92-
- os: linux
92+
- dist: trusty
9393
env:
9494
- JOB="3.6_DOC" DOC=true
9595

ci/requirements-2.7.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ source activate pandas
44

55
echo "install 27"
66

7-
conda install -n pandas -c conda-forge feather-format jemalloc=4.4.0
7+
conda install -n pandas -c conda-forge feather-format

ci/requirements-2.7_BUILD_TEST.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ source activate pandas
44

55
echo "install 27 BUILD_TEST"
66

7-
conda install -n pandas -c conda-forge pyarrow dask jemalloc=4.4.0
7+
conda install -n pandas -c conda-forge pyarrow dask

ci/requirements-3.5.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ source activate pandas
44

55
echo "install 35"
66

7-
conda install -n pandas -c conda-forge feather-format jemalloc=4.4.0
7+
conda install -n pandas -c conda-forge feather-format

ci/requirements-3.6.run

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ html5lib
1414
jinja2
1515
sqlalchemy
1616
pymysql
17-
jemalloc=4.4.0
1817
feather-format
1918
# psycopg2 (not avail on defaults ATM)
2019
beautifulsoup4

ci/requirements-3.6_DOC.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ echo "[install DOC_BUILD deps]"
66

77
pip install pandas-gbq
88

9-
conda install -n pandas -c conda-forge feather-format nbsphinx pandoc jemalloc=4.4.0
9+
conda install -n pandas -c conda-forge feather-format nbsphinx pandoc
1010

1111
conda install -n pandas -c r r rpy2 --yes

doc/source/contributing.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,12 @@ run this slightly modified command::
525525

526526
git diff master --name-only -- '*.py' | grep 'pandas/' | xargs flake8
527527

528+
Note that on Windows, ``grep``, ``xargs``, and other tools are likely
529+
unavailable. However, this has been shown to work on smaller commits in the
530+
standard Windows command line::
531+
532+
git diff master -u -- "*.py" | flake8 --diff
533+
528534
Backwards Compatibility
529535
~~~~~~~~~~~~~~~~~~~~~~~
530536

doc/source/whatsnew/v0.20.3.txt

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,18 @@ Performance Improvements
3737
Bug Fixes
3838
~~~~~~~~~
3939
- Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`)
40+
- Fixed issue with :meth:`DataFrame.style` where element id's were not unique (:issue:`16780`)
41+
- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`)
42+
- Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`)
43+
- Fixed bug where computing the rolling covariance of a MultiIndexed ``DataFrame`` improperly raised a ``ValueError`` (:issue:`16789`)
4044
- Handle reindexing an empty categorical index rather than throwing (:issue:`16770`)
4145

42-
43-
4446
Conversion
4547
^^^^^^^^^^
4648

4749
- Bug in pickle compat prior to the v0.20.x series, when ``UTC`` is a timezone in a Series/DataFrame/Index (:issue:`16608`)
48-
- Bug in Series construction when passing a Series with ``dtype='category'`` (:issue:`16524`).
50+
- Bug in ``Series`` construction when passing a ``Series`` with ``dtype='category'`` (:issue:`16524`).
51+
- Bug in ``DataFrame.astype()`` when passing a ``Series`` as the ``dtype`` kwarg. (:issue:`16717`).
4952

5053
Indexing
5154
^^^^^^^^
@@ -55,8 +58,9 @@ Indexing
5558
I/O
5659
^^^
5760

58-
- Bug in :func:`read_csv`` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue:`16675`)
59-
- Bug in :func:`read_hdf`` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`)
61+
- Bug in :func:`read_csv` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue:`16675`)
62+
- Bug in :func:`read_hdf` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`)
63+
- Bug in :func:`DataFrame.to_latex` where ``bold_rows`` was wrongly specified to be ``True`` by default, whereas in reality row labels remained non-bold whatever parameter provided. (:issue:`16707`)
6064

6165
Plotting
6266
^^^^^^^^
@@ -78,6 +82,8 @@ Sparse
7882
Reshaping
7983
^^^^^^^^^
8084

85+
- Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`).
86+
- Bug in :func:`merge` when merging/joining with multiple categorical columns (:issue:`16767`)
8187

8288

8389
Numeric

doc/source/whatsnew/v0.21.0.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ Performance Improvements
9292
Bug Fixes
9393
~~~~~~~~~
9494

95+
- Fixes regression in 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`)
96+
9597
Conversion
9698
^^^^^^^^^^
9799

@@ -133,6 +135,7 @@ Reshaping
133135

134136
Numeric
135137
^^^^^^^
138+
- Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`)
136139

137140

138141
Categorical

pandas/_libs/src/reduce.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ cdef class SeriesGrouper:
419419
cdef inline _extract_result(object res):
420420
""" extract the result object, it might be a 0-dim ndarray
421421
or a len-1 0-dim, or a scalar """
422-
if hasattr(res, 'values'):
422+
if hasattr(res, 'values') and isinstance(res.values, np.ndarray):
423423
res = res.values
424424
if not np.isscalar(res):
425425
if isinstance(res, np.ndarray):

pandas/core/generic.py

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
from pandas.compat.numpy import function as nv
5353
from pandas.compat import (map, zip, lzip, lrange, string_types,
5454
isidentifier, set_function_name, cPickle as pkl)
55+
from pandas.core.ops import _align_method_FRAME
5556
import pandas.core.nanops as nanops
5657
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
5758
from pandas.util._validators import validate_bool_kwarg
@@ -1538,7 +1539,7 @@ def to_xarray(self):
15381539
15391540
`to_latex`-specific options:
15401541
1541-
bold_rows : boolean, default True
1542+
bold_rows : boolean, default False
15421543
Make the row labels bold in the output
15431544
column_format : str, default None
15441545
The columns format as specified in `LaTeX table format
@@ -1587,7 +1588,7 @@ def to_xarray(self):
15871588
@Appender(_shared_docs['to_latex'] % _shared_doc_kwargs)
15881589
def to_latex(self, buf=None, columns=None, col_space=None, header=True,
15891590
index=True, na_rep='NaN', formatters=None, float_format=None,
1590-
sparsify=None, index_names=True, bold_rows=True,
1591+
sparsify=None, index_names=True, bold_rows=False,
15911592
column_format=None, longtable=None, escape=None,
15921593
encoding=None, decimal='.', multicolumn=None,
15931594
multicolumn_format=None, multirow=None):
@@ -3507,12 +3508,12 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs):
35073508
-------
35083509
casted : type of caller
35093510
"""
3510-
if isinstance(dtype, collections.Mapping):
3511+
if is_dict_like(dtype):
35113512
if self.ndim == 1: # i.e. Series
3512-
if len(dtype) > 1 or list(dtype.keys())[0] != self.name:
3513+
if len(dtype) > 1 or self.name not in dtype:
35133514
raise KeyError('Only the Series name can be used for '
35143515
'the key in Series dtype mappings.')
3515-
new_type = list(dtype.values())[0]
3516+
new_type = dtype[self.name]
35163517
return self.astype(new_type, copy, errors, **kwargs)
35173518
elif self.ndim > 2:
35183519
raise NotImplementedError(
@@ -4413,6 +4414,34 @@ def _clip_with_scalar(self, lower, upper, inplace=False):
44134414
else:
44144415
return result
44154416

4417+
def _clip_with_one_bound(self, threshold, method, axis, inplace):
4418+
4419+
inplace = validate_bool_kwarg(inplace, 'inplace')
4420+
if axis is not None:
4421+
axis = self._get_axis_number(axis)
4422+
4423+
if np.any(isnull(threshold)):
4424+
raise ValueError("Cannot use an NA value as a clip threshold")
4425+
4426+
# method is self.le for upper bound and self.ge for lower bound
4427+
if is_scalar(threshold) and is_number(threshold):
4428+
if method.__name__ == 'le':
4429+
return self._clip_with_scalar(None, threshold, inplace=inplace)
4430+
return self._clip_with_scalar(threshold, None, inplace=inplace)
4431+
4432+
subset = method(threshold, axis=axis) | isnull(self)
4433+
4434+
# GH #15390
4435+
# In order for where method to work, the threshold must
4436+
# be transformed to NDFrame from other array like structure.
4437+
if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold):
4438+
if isinstance(self, ABCSeries):
4439+
threshold = pd.Series(threshold, index=self.index)
4440+
else:
4441+
threshold = _align_method_FRAME(self, np.asarray(threshold),
4442+
axis)
4443+
return self.where(subset, threshold, axis=axis, inplace=inplace)
4444+
44164445
def clip(self, lower=None, upper=None, axis=None, inplace=False,
44174446
*args, **kwargs):
44184447
"""
@@ -4515,16 +4544,8 @@ def clip_upper(self, threshold, axis=None, inplace=False):
45154544
-------
45164545
clipped : same type as input
45174546
"""
4518-
if np.any(isnull(threshold)):
4519-
raise ValueError("Cannot use an NA value as a clip threshold")
4520-
4521-
if is_scalar(threshold) and is_number(threshold):
4522-
return self._clip_with_scalar(None, threshold, inplace=inplace)
4523-
4524-
inplace = validate_bool_kwarg(inplace, 'inplace')
4525-
4526-
subset = self.le(threshold, axis=axis) | isnull(self)
4527-
return self.where(subset, threshold, axis=axis, inplace=inplace)
4547+
return self._clip_with_one_bound(threshold, method=self.le,
4548+
axis=axis, inplace=inplace)
45284549

45294550
def clip_lower(self, threshold, axis=None, inplace=False):
45304551
"""
@@ -4547,16 +4568,8 @@ def clip_lower(self, threshold, axis=None, inplace=False):
45474568
-------
45484569
clipped : same type as input
45494570
"""
4550-
if np.any(isnull(threshold)):
4551-
raise ValueError("Cannot use an NA value as a clip threshold")
4552-
4553-
if is_scalar(threshold) and is_number(threshold):
4554-
return self._clip_with_scalar(threshold, None, inplace=inplace)
4555-
4556-
inplace = validate_bool_kwarg(inplace, 'inplace')
4557-
4558-
subset = self.ge(threshold, axis=axis) | isnull(self)
4559-
return self.where(subset, threshold, axis=axis, inplace=inplace)
4571+
return self._clip_with_one_bound(threshold, method=self.ge,
4572+
axis=axis, inplace=inplace)
45604573

45614574
def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
45624575
group_keys=True, squeeze=False, **kwargs):

pandas/core/indexes/category.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,9 @@ def take(self, indices, axis=0, allow_fill=True,
565565
na_value=-1)
566566
return self._create_from_codes(taken)
567567

568+
def is_dtype_equal(self, other):
569+
return self._data.is_dtype_equal(other)
570+
568571
take_nd = take
569572

570573
def map(self, mapper):

pandas/core/reshape/merge.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1440,13 +1440,14 @@ def _factorize_keys(lk, rk, sort=True):
14401440
lk = lk.values
14411441
rk = rk.values
14421442

1443-
# if we exactly match in categories, allow us to use codes
1443+
# if we exactly match in categories, allow us to factorize on codes
14441444
if (is_categorical_dtype(lk) and
14451445
is_categorical_dtype(rk) and
14461446
lk.is_dtype_equal(rk)):
1447-
return lk.codes, rk.codes, len(lk.categories)
1448-
1449-
if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):
1447+
klass = libhashtable.Int64Factorizer
1448+
lk = _ensure_int64(lk.codes)
1449+
rk = _ensure_int64(rk.codes)
1450+
elif is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):
14501451
klass = libhashtable.Int64Factorizer
14511452
lk = _ensure_int64(com._values_from_object(lk))
14521453
rk = _ensure_int64(com._values_from_object(rk))

pandas/core/window.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1948,7 +1948,7 @@ def dataframe_from_int_dict(data, frame_template):
19481948
result.columns = Index(result.columns).set_names(
19491949
arg2.columns.name)
19501950
result.index = result.index.set_names(
1951-
[arg1.index.name, arg1.columns.name])
1951+
arg1.index.names + arg1.columns.names)
19521952

19531953
return result
19541954

pandas/io/formats/format.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -845,6 +845,7 @@ def __init__(self, formatter, column_format=None, longtable=False,
845845
multicolumn=False, multicolumn_format=None, multirow=False):
846846
self.fmt = formatter
847847
self.frame = self.fmt.frame
848+
self.bold_rows = self.fmt.kwds.get('bold_rows', False)
848849
self.column_format = column_format
849850
self.longtable = longtable
850851
self.multicolumn = multicolumn
@@ -943,6 +944,11 @@ def get_col_type(dtype):
943944
if x else '{}') for x in row]
944945
else:
945946
crow = [x if x else '{}' for x in row]
947+
if self.bold_rows and self.fmt.index:
948+
# bold row labels
949+
crow = ['\\textbf{%s}' % x
950+
if j < ilevels and x.strip() not in ['', '{}'] else x
951+
for j, x in enumerate(crow)]
946952
if i < clevels and self.fmt.header and self.multicolumn:
947953
# sum up columns to multicolumns
948954
crow = self._format_multicolumn(crow, ilevels)

pandas/io/formats/style.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,13 +281,14 @@ def format_attr(pair):
281281
for r, idx in enumerate(self.data.index):
282282
row_es = []
283283
for c, value in enumerate(rlabels[r]):
284+
rid = [ROW_HEADING_CLASS, "level%s" % c, "row%s" % r]
284285
es = {
285286
"type": "th",
286287
"is_visible": _is_visible(r, c, idx_lengths),
287288
"value": value,
288289
"display_value": value,
289-
"class": " ".join([ROW_HEADING_CLASS, "level%s" % c,
290-
"row%s" % r]),
290+
"id": "_".join(rid[1:]),
291+
"class": " ".join(rid)
291292
}
292293
rowspan = idx_lengths.get((c, r), 0)
293294
if rowspan > 1:

pandas/io/pytables.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2591,8 +2591,8 @@ def read_index_node(self, node, start=None, stop=None):
25912591
if 'name' in node._v_attrs:
25922592
name = _ensure_str(node._v_attrs.name)
25932593

2594-
index_class = self._alias_to_class(getattr(node._v_attrs,
2595-
'index_class', ''))
2594+
index_class = self._alias_to_class(_ensure_decoded(
2595+
getattr(node._v_attrs, 'index_class', '')))
25962596
factory = self._get_index_factory(index_class)
25972597

25982598
kwargs = {}

0 commit comments

Comments
 (0)