Merge pull request #34 from pandas-dev/master

sthagen · web-flow · commit 72bf4ef182cb · 2020-02-04T06:02:12.000+01:00
Sync Fork from Upstream Repo
diff --git a/doc/redirects.csv b/doc/redirects.csv
@@ -46,7 +46,10 @@ developer,development/developer
 extending,development/extending
 internals,development/internals
 
-# api
+# api moved function
+reference/api/pandas.io.json.json_normalize,pandas.json_normalize
+
+# api rename
 api,reference/index
 generated/pandas.api.extensions.ExtensionArray.argsort,../reference/api/pandas.api.extensions.ExtensionArray.argsort
 generated/pandas.api.extensions.ExtensionArray.astype,../reference/api/pandas.api.extensions.ExtensionArray.astype
diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst
@@ -17,6 +17,7 @@ Fixed regressions
 
 - Fixed regression in :class:`DataFrame` setting values with a slice (e.g. ``df[-4:] = 1``) indexing by label instead of position (:issue:`31469`)
 - Fixed regression when indexing a ``Series`` or ``DataFrame`` indexed by ``DatetimeIndex`` with a slice containg a :class:`datetime.date` (:issue:`31501`)
+- Fixed regression in ``DataFrame.__setitem__`` raising an ``AttributeError`` with a :class:`MultiIndex` and a non-monotonic indexer (:issue:`31449`)
 - Fixed regression in :class:`Series` multiplication when multiplying a numeric :class:`Series` with >10000 elements with a timedelta-like scalar (:issue:`31457`)
 - Fixed regression in :meth:`GroupBy.apply` if called with a function which returned a non-pandas non-scalar object (e.g. a list or numpy array) (:issue:`31441`)
 - Fixed regression in :meth:`to_datetime` when parsing non-nanosecond resolution datetimes (:issue:`31491`)
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -109,6 +109,7 @@ Datetimelike
 - :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`)
 - Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`)
 - Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with a tz-aware index (:issue:`26683`)
+- Bug in :meth:`Period.to_timestamp`, :meth:`Period.start_time` with microsecond frequency returning a timestamp one nanosecond earlier than the correct time (:issue:`31475`)
 
 Timedelta
 ^^^^^^^^^
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
@@ -22,7 +22,7 @@ PyDateTime_IMPORT
 from pandas._libs.tslibs.np_datetime cimport (
     npy_datetimestruct, dtstruct_to_dt64, dt64_to_dtstruct,
     pandas_datetime_to_datetimestruct, check_dts_bounds,
-    NPY_DATETIMEUNIT, NPY_FR_D)
+    NPY_DATETIMEUNIT, NPY_FR_D, NPY_FR_us)
 
 cdef extern from "src/datetime/np_datetime.h":
     int64_t npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr,
@@ -1169,7 +1169,12 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1:
     if ordinal == NPY_NAT:
         return NPY_NAT
 
-    get_date_info(ordinal, freq, &dts)
+    if freq == 11000:
+        # Microsecond, avoid get_date_info to prevent floating point errors
+        pandas_datetime_to_datetimestruct(ordinal, NPY_FR_us, &dts)
+    else:
+        get_date_info(ordinal, freq, &dts)
+
     check_dts_bounds(&dts)
     return dtstruct_to_dt64(&dts)
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -893,7 +893,8 @@ def _setitem_with_indexer(self, indexer, value):
 
                     # we can directly set the series here
                     # as we select a slice indexer on the mi
-                    idx = index._convert_slice_indexer(idx)
+                    if isinstance(idx, slice):
+                        idx = index._convert_slice_indexer(idx)
                     obj._consolidate_inplace()
                     obj = obj.copy()
                     obj._data = obj._data.setitem(indexer=tuple([idx]), value=value)
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
@@ -352,8 +352,8 @@ def __init__(
         for obj in objs:
             if not isinstance(obj, (Series, DataFrame)):
                 msg = (
-                    "cannot concatenate object of type '{typ}'; "
-                    "only Series and DataFrame objs are valid".format(typ=type(obj))
+                    f"cannot concatenate object of type '{type(obj)}'; "
+                    "only Series and DataFrame objs are valid"
                 )
                 raise TypeError(msg)
 
@@ -403,8 +403,7 @@ def __init__(
         self._is_series = isinstance(sample, ABCSeries)
         if not 0 <= axis <= sample.ndim:
             raise AssertionError(
-                "axis must be between 0 and {ndim}, input was "
-                "{axis}".format(ndim=sample.ndim, axis=axis)
+                f"axis must be between 0 and {sample.ndim}, input was {axis}"
             )
 
         # if we have mixed ndims, then convert to highest ndim
@@ -622,11 +621,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
                 try:
                     i = level.get_loc(key)
                 except KeyError:
-                    raise ValueError(
-                        "Key {key!s} not in level {level!s}".format(
-                            key=key, level=level
-                        )
-                    )
+                    raise ValueError(f"Key {key} not in level {level}")
 
                 to_concat.append(np.repeat(i, len(index)))
             codes_list.append(np.concatenate(to_concat))
@@ -677,11 +672,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
 
         mask = mapped == -1
         if mask.any():
-            raise ValueError(
-                "Values not found in passed level: {hlevel!s}".format(
-                    hlevel=hlevel[mask]
-                )
-            )
+            raise ValueError(f"Values not found in passed level: {hlevel[mask]!s}")
 
         new_codes.append(np.repeat(mapped, n))
 
diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py
@@ -88,9 +88,7 @@ def melt(
             if len(frame.columns.names) == len(set(frame.columns.names)):
                 var_name = frame.columns.names
             else:
-                var_name = [
-                    "variable_{i}".format(i=i) for i in range(len(frame.columns.names))
-                ]
+                var_name = [f"variable_{i}" for i in range(len(frame.columns.names))]
         else:
             var_name = [
                 frame.columns.name if frame.columns.name is not None else "variable"
@@ -417,9 +415,7 @@ def wide_to_long(
     """
 
     def get_var_names(df, stub: str, sep: str, suffix: str) -> List[str]:
-        regex = r"^{stub}{sep}{suffix}$".format(
-            stub=re.escape(stub), sep=re.escape(sep), suffix=suffix
-        )
+        regex = fr"^{re.escape(stub)}{re.escape(sep)}{suffix}$"
         pattern = re.compile(regex)
         return [col for col in df.columns if pattern.match(col)]
 
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -611,8 +611,9 @@ def __init__(
         if _left.columns.nlevels != _right.columns.nlevels:
             msg = (
                 "merging between different levels can give an unintended "
-                "result ({left} levels on the left, {right} on the right)"
-            ).format(left=_left.columns.nlevels, right=_right.columns.nlevels)
+                f"result ({left.columns.nlevels} levels on the left,"
+                f"{right.columns.nlevels} on the right)"
+            )
             warnings.warn(msg, UserWarning)
 
         self._validate_specification()
@@ -679,7 +680,7 @@ def _indicator_pre_merge(
             if i in columns:
                 raise ValueError(
                     "Cannot use `indicator=True` option when "
-                    "data contains a column named {name}".format(name=i)
+                    f"data contains a column named {i}"
                 )
         if self.indicator_name in columns:
             raise ValueError(
@@ -831,7 +832,7 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
                     else:
                         result.index = Index(key_col, name=name)
                 else:
-                    result.insert(i, name or "key_{i}".format(i=i), key_col)
+                    result.insert(i, name or f"key_{i}", key_col)
 
     def _get_join_indexers(self):
         """ return the join indexers """
@@ -1185,13 +1186,10 @@ def _validate_specification(self):
                 if len(common_cols) == 0:
                     raise MergeError(
                         "No common columns to perform merge on. "
-                        "Merge options: left_on={lon}, right_on={ron}, "
-                        "left_index={lidx}, right_index={ridx}".format(
-                            lon=self.left_on,
-                            ron=self.right_on,
-                            lidx=self.left_index,
-                            ridx=self.right_index,
-                        )
+                        f"Merge options: left_on={self.left_on}, "
+                        f"right_on={self.right_on}, "
+                        f"left_index={self.left_index}, "
+                        f"right_index={self.right_index}"
                     )
                 if not common_cols.is_unique:
                     raise MergeError(f"Data columns not unique: {repr(common_cols)}")
@@ -1486,12 +1484,12 @@ def get_result(self):
 
 
 def _asof_function(direction: str):
-    name = "asof_join_{dir}".format(dir=direction)
+    name = f"asof_join_{direction}"
     return getattr(libjoin, name, None)
 
 
 def _asof_by_function(direction: str):
-    name = "asof_join_{dir}_on_X_by_Y".format(dir=direction)
+    name = f"asof_join_{direction}_on_X_by_Y"
     return getattr(libjoin, name, None)
 
 
@@ -1601,9 +1599,7 @@ def _validate_specification(self):
 
         # check 'direction' is valid
         if self.direction not in ["backward", "forward", "nearest"]:
-            raise MergeError(
-                "direction invalid: {direction}".format(direction=self.direction)
-            )
+            raise MergeError(f"direction invalid: {self.direction}")
 
     @property
     def _asof_key(self):
@@ -1628,17 +1624,13 @@ def _get_merge_keys(self):
                     # later with a ValueError, so we don't *need* to check
                     # for them here.
                     msg = (
-                        "incompatible merge keys [{i}] {lkdtype} and "
-                        "{rkdtype}, both sides category, but not equal ones".format(
-                            i=i, lkdtype=repr(lk.dtype), rkdtype=repr(rk.dtype)
-                        )
+                        f"incompatible merge keys [{i}] {repr(lk.dtype)} and "
+                        f"{repr(rk.dtype)}, both sides category, but not equal ones"
                     )
                 else:
                     msg = (
-                        "incompatible merge keys [{i}] {lkdtype} and "
-                        "{rkdtype}, must be the same type".format(
-                            i=i, lkdtype=repr(lk.dtype), rkdtype=repr(rk.dtype)
-                        )
+                        f"incompatible merge keys [{i}] {repr(lk.dtype)} and "
+                        f"{repr(rk.dtype)}, must be the same type"
                     )
                 raise MergeError(msg)
 
@@ -1651,10 +1643,8 @@ def _get_merge_keys(self):
                 lt = left_join_keys[-1]
 
             msg = (
-                "incompatible tolerance {tolerance}, must be compat "
-                "with type {lkdtype}".format(
-                    tolerance=type(self.tolerance), lkdtype=repr(lt.dtype)
-                )
+                f"incompatible tolerance {self.tolerance}, must be compat "
+                f"with type {repr(lk.dtype)}"
             )
 
             if needs_i8_conversion(lt):
@@ -1680,8 +1670,11 @@ def _get_merge_keys(self):
 
         # validate allow_exact_matches
         if not is_bool(self.allow_exact_matches):
-            msg = "allow_exact_matches must be boolean, passed {passed}"
-            raise MergeError(msg.format(passed=self.allow_exact_matches))
+            msg = (
+                "allow_exact_matches must be boolean, "
+                f"passed {self.allow_exact_matches}"
+            )
+            raise MergeError(msg)
 
         return left_join_keys, right_join_keys, join_names
 
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -200,7 +200,7 @@ def _add_margins(
     if not isinstance(margins_name, str):
         raise ValueError("margins_name argument must be a string")
 
-    msg = 'Conflicting name "{name}" in margins'.format(name=margins_name)
+    msg = f'Conflicting name "{margins_name}" in margins'
     for level in table.index.names:
         if margins_name in table.index.get_level_values(level):
             raise ValueError(msg)
@@ -650,9 +650,7 @@ def _normalize(table, normalize, margins: bool, margins_name="All"):
         if (margins_name not in table.iloc[-1, :].name) | (
             margins_name != table.iloc[:, -1].name
         ):
-            raise ValueError(
-                "{mname} not in pivoted DataFrame".format(mname=margins_name)
-            )
+            raise ValueError(f"{margins_name} not in pivoted DataFrame")
         column_margin = table.iloc[:-1, -1]
         index_margin = table.iloc[-1, :-1]
 
@@ -702,7 +700,7 @@ def _get_names(arrs, names, prefix: str = "row"):
             if isinstance(arr, ABCSeries) and arr.name is not None:
                 names.append(arr.name)
             else:
-                names.append("{prefix}_{i}".format(prefix=prefix, i=i))
+                names.append(f"{prefix}_{i}")
     else:
         if len(names) != len(arrs):
             raise AssertionError("arrays and names must have the same length")
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -873,15 +873,13 @@ def get_dummies(
 
         # validate prefixes and separator to avoid silently dropping cols
         def check_len(item, name):
-            len_msg = (
-                "Length of '{name}' ({len_item}) did not match the "
-                "length of the columns being encoded ({len_enc})."
-            )
 
             if is_list_like(item):
                 if not len(item) == data_to_encode.shape[1]:
-                    len_msg = len_msg.format(
-                        name=name, len_item=len(item), len_enc=data_to_encode.shape[1]
+                    len_msg = (
+                        f"Length of '{name}' ({len(item)}) did not match the "
+                        "length of the columns being encoded "
+                        f"({data_to_encode.shape[1]})."
                     )
                     raise ValueError(len_msg)
 
@@ -990,8 +988,7 @@ def get_empty_frame(data) -> DataFrame:
 
         # PY2 embedded unicode, gh-22084
         def _make_col_name(prefix, prefix_sep, level) -> str:
-            fstr = "{prefix}{prefix_sep}{level}"
-            return fstr.format(prefix=prefix, prefix_sep=prefix_sep, level=level)
+            return f"{prefix}{prefix_sep}{level}"
 
         dummy_cols = [_make_col_name(prefix, prefix_sep, level) for level in levels]
 
diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py
@@ -414,6 +414,16 @@ def test_astype_assignment_with_dups(self):
         df["A"] = df["A"].astype(np.float64)
         tm.assert_index_equal(df.index, index)
 
+    def test_setitem_nonmonotonic(self):
+        # https://github.com/pandas-dev/pandas/issues/31449
+        index = pd.MultiIndex.from_tuples(
+            [("a", "c"), ("b", "x"), ("a", "d")], names=["l1", "l2"]
+        )
+        df = pd.DataFrame(data=[0, 1, 2], index=index, columns=["e"])
+        df.loc["a", "e"] = np.arange(99, 101, dtype="int64")
+        expected = pd.DataFrame({"e": [99, 1, 100]}, index=index)
+        tm.assert_frame_equal(df, expected)
+
 
 def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
     # this works because we are modifying the underlying array
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
@@ -60,9 +60,7 @@ def compare_element(result, expected, typ, version=None):
             assert result == expected
             assert result.freq == expected.freq
     else:
-        comparator = getattr(
-            tm, "assert_{typ}_equal".format(typ=typ), tm.assert_almost_equal
-        )
+        comparator = getattr(tm, f"assert_{typ}_equal", tm.assert_almost_equal)
         comparator(result, expected)
 
 
@@ -77,7 +75,7 @@ def compare(data, vf, version):
 
             # use a specific comparator
             # if available
-            comparator = "compare_{typ}_{dt}".format(typ=typ, dt=dt)
+            comparator = f"compare_{typ}_{dt}"
 
             comparator = m.get(comparator, m["compare_element"])
             comparator(result, expected, typ, version)
@@ -234,7 +232,7 @@ def test_legacy_sparse_warning(datapath):
 
 @pytest.fixture
 def get_random_path():
-    return "__{}__.pickle".format(tm.rands(10))
+    return f"__{tm.rands(10)}__.pickle"
 
 
 class TestCompression:
@@ -262,7 +260,7 @@ def compress_file(self, src_path, dest_path, compression):
         elif compression == "xz":
             f = _get_lzma_file(lzma)(dest_path, "w")
         else:
-            msg = "Unrecognized compression type: {}".format(compression)
+            msg = f"Unrecognized compression type: {compression}"
             raise ValueError(msg)
 
         if compression != "zip":
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -371,10 +371,8 @@ def test_no_overlap_more_informative_error(self):
 
         msg = (
             "No common columns to perform merge on. "
-            "Merge options: left_on={lon}, right_on={ron}, "
-            "left_index={lidx}, right_index={ridx}".format(
-                lon=None, ron=None, lidx=False, ridx=False
-            )
+            f"Merge options: left_on={None}, right_on={None}, "
+            f"left_index={False}, right_index={False}"
         )
 
         with pytest.raises(MergeError, match=msg):
diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py