pandas-dev
diff --git a/‎.github/workflows/python-dev.yml
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/python-dev.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎README.md
Lines changed: 0 additions & 2 deletions b/‎README.md
Lines changed: 0 additions & 2 deletions
diff --git a/‎doc/source/whatsnew/v1.6.0.rst
Lines changed: 2 additions & 0 deletions b/‎doc/source/whatsnew/v1.6.0.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/tslibs/timedeltas.pyx
Lines changed: 15 additions & 5 deletions b/‎pandas/_libs/tslibs/timedeltas.pyx
Lines changed: 15 additions & 5 deletions
diff --git a/‎pandas/core/arrays/datetimelike.py
Lines changed: 2 additions & 1 deletion b/‎pandas/core/arrays/datetimelike.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎pandas/core/arrays/masked.py
Lines changed: 10 additions & 2 deletions b/‎pandas/core/arrays/masked.py
Lines changed: 10 additions & 2 deletions
diff --git a/‎pandas/core/arrays/timedeltas.py
Lines changed: 16 additions & 2 deletions b/‎pandas/core/arrays/timedeltas.py
Lines changed: 16 additions & 2 deletions
diff --git a/‎pandas/core/construction.py
Lines changed: 3 additions & 2 deletions b/‎pandas/core/construction.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎pandas/core/dtypes/astype.py
Lines changed: 8 additions & 0 deletions b/‎pandas/core/dtypes/astype.py
Lines changed: 8 additions & 0 deletions
diff --git a/‎pandas/core/dtypes/cast.py
Lines changed: 1 addition & 6 deletions b/‎pandas/core/dtypes/cast.py
Lines changed: 1 addition & 6 deletions
diff --git a/‎pandas/core/generic.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/generic.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/groupby/categorical.py
Lines changed: 14 additions & 14 deletions b/‎pandas/core/groupby/categorical.py
Lines changed: 14 additions & 14 deletions
diff --git a/‎pandas/core/groupby/grouper.py
Lines changed: 6 additions & 4 deletions b/‎pandas/core/groupby/grouper.py
Lines changed: 6 additions & 4 deletions
diff --git a/‎pandas/tests/arithmetic/test_numeric.py
Lines changed: 19 additions & 0 deletions b/‎pandas/tests/arithmetic/test_numeric.py
Lines changed: 19 additions & 0 deletions
diff --git a/‎pandas/tests/dtypes/cast/test_promote.py
Lines changed: 6 additions & 0 deletions b/‎pandas/tests/dtypes/cast/test_promote.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎pandas/tests/frame/methods/test_astype.py
Lines changed: 9 additions & 2 deletions b/‎pandas/tests/frame/methods/test_astype.py
Lines changed: 9 additions & 2 deletions
@@ -54,7 +54,7 @@ jobs:
         os: [ubuntu-latest, macOS-latest, windows-latest]
 
     name: actions-311-dev
-    timeout-minutes: 80
+    timeout-minutes: 120
 
     concurrency:
       #https://i.8713187.xyzmunity/t/concurrecy-not-work-for-push/183068/7
@@ -75,7 +75,7 @@ jobs:
       run: |
         python --version
         python -m pip install --upgrade pip setuptools wheel
-        python -m pip install git+https://github.com/numpy/numpy.git
+        python -m pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
         python -m pip install git+https://github.com/nedbat/coveragepy.git
         python -m pip install python-dateutil pytz cython hypothesis==6.52.1 pytest>=6.2.5 pytest-xdist pytest-cov pytest-asyncio>=0.17
         python -m pip list
@@ -84,7 +84,7 @@ jobs:
     - name: Build Pandas
       run: |
         python setup.py build_ext -q -j1
-        python -m pip install -e . --no-build-isolation --no-use-pep517
+        python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
 
     - name: Build Version
       run: |
 
@@ -128,8 +128,6 @@ or for installing in [development mode](https://pip.pypa.io/en/latest/cli/pip_in
 python -m pip install -e . --no-build-isolation --no-use-pep517
 ```
 
-If you have `make`, you can also use `make develop` to run the same command.
-
 or alternatively
 
 ```sh
 
@@ -119,6 +119,7 @@ Other API changes
 - Passing ``nanoseconds`` greater than 999 or less than 0 in :class:`Timestamp` now raises a ``ValueError`` (:issue:`48538`, :issue:`48255`)
 - :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser.
 - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`)
+- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting timedelta64 data to any of "timedelta64[s]", "timedelta64[ms]", "timedelta64[us]" will return an object with the given resolution instead of coercing to "float64" dtype (:issue:`48963`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -153,6 +154,7 @@ Performance improvements
 - Performance improvement in ``var`` for nullable dtypes (:issue:`48379`).
 - Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47405`, :issue:`47656`, :issue:`48502`)
 - Memory improvement in :meth:`RangeIndex.sort_values` (:issue:`48801`)
+- Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``sort=False`` (:issue:`48976`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_160.bug_fixes:
 
@@ -341,8 +341,9 @@ cdef convert_to_timedelta64(object ts, str unit):
     elif isinstance(ts, _Timedelta):
         # already in the proper format
         if ts._reso != NPY_FR_ns:
-            raise NotImplementedError
-        ts = np.timedelta64(ts.value, "ns")
+            ts = ts._as_unit("ns").asm8
+        else:
+            ts = np.timedelta64(ts.value, "ns")
     elif is_timedelta64_object(ts):
         ts = ensure_td64ns(ts)
     elif is_integer_object(ts):
@@ -1706,7 +1707,13 @@ class Timedelta(_Timedelta):
                 value = parse_timedelta_string(value)
             value = np.timedelta64(value)
         elif PyDelta_Check(value):
-            value = convert_to_timedelta64(value, 'ns')
+            # pytimedelta object -> microsecond resolution
+            new_value = delta_to_nanoseconds(
+                value, reso=NPY_DATETIMEUNIT.NPY_FR_us
+            )
+            return cls._from_value_and_reso(
+                new_value, reso=NPY_DATETIMEUNIT.NPY_FR_us
+            )
         elif is_timedelta64_object(value):
             # Retain the resolution if possible, otherwise cast to the nearest
             #  supported resolution.
@@ -1720,7 +1727,7 @@ class Timedelta(_Timedelta):
             if reso != NPY_DATETIMEUNIT.NPY_FR_GENERIC:
                 try:
                     new_value = convert_reso(
-                        get_timedelta64_value(value),
+                        new_value,
                         reso,
                         new_reso,
                         round_ok=True,
@@ -1730,7 +1737,10 @@ class Timedelta(_Timedelta):
             return cls._from_value_and_reso(new_value, reso=new_reso)
 
         elif is_tick_object(value):
-            value = np.timedelta64(value.nanos, 'ns')
+            new_reso = get_supported_reso(value._reso)
+            new_value = delta_to_nanoseconds(value, reso=new_reso)
+            return cls._from_value_and_reso(new_value, reso=new_reso)
+
         elif is_integer_object(value) or is_float_object(value):
             # unit=None is de-facto 'ns'
             unit = parse_timedelta_unit(unit)
 
@@ -1275,7 +1275,8 @@ def _add_timedeltalike_scalar(self, other):
 
         # PeriodArray overrides, so we only get here with DTA/TDA
         self = cast("DatetimeArray | TimedeltaArray", self)
-        other = Timedelta(other)._as_unit(self._unit)
+        other = Timedelta(other)
+        self, other = self._ensure_matching_resos(other)
         return self._add_timedeltalike(other)
 
     def _add_timedelta_arraylike(self, other: TimedeltaArray):
 
@@ -17,6 +17,10 @@
     lib,
     missing as libmissing,
 )
+from pandas._libs.tslibs import (
+    get_unit_from_dtype,
+    is_supported_unit,
+)
 from pandas._typing import (
     ArrayLike,
     AstypeArg,
@@ -750,12 +754,16 @@ def _maybe_mask_result(self, result, mask):
 
             return BooleanArray(result, mask, copy=False)
 
-        elif result.dtype == "timedelta64[ns]":
+        elif (
+            isinstance(result.dtype, np.dtype)
+            and result.dtype.kind == "m"
+            and is_supported_unit(get_unit_from_dtype(result.dtype))
+        ):
             # e.g. test_numeric_arr_mul_tdscalar_numexpr_path
             from pandas.core.arrays import TimedeltaArray
 
             if not isinstance(result, TimedeltaArray):
-                result = TimedeltaArray._simple_new(result)
+                result = TimedeltaArray._simple_new(result, dtype=result.dtype)
 
             result[mask] = result.dtype.type("NaT")
             return result
 
@@ -20,7 +20,9 @@
     Tick,
     Timedelta,
     astype_overflowsafe,
+    get_unit_from_dtype,
     iNaT,
+    is_supported_unit,
     periods_per_second,
     to_offset,
 )
@@ -257,10 +259,10 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
             )
 
         if start is not None:
-            start = Timedelta(start)
+            start = Timedelta(start)._as_unit("ns")
 
         if end is not None:
-            end = Timedelta(end)
+            end = Timedelta(end)._as_unit("ns")
 
         left_closed, right_closed = validate_endpoints(closed)
 
@@ -308,6 +310,18 @@ def astype(self, dtype, copy: bool = True):
         dtype = pandas_dtype(dtype)
 
         if dtype.kind == "m":
+            if dtype == self.dtype:
+                if copy:
+                    return self.copy()
+                return self
+
+            if is_supported_unit(get_unit_from_dtype(dtype)):
+                # unit conversion e.g. timedelta64[s]
+                res_values = astype_overflowsafe(self._ndarray, dtype, copy=False)
+                return type(self)._simple_new(
+                    res_values, dtype=res_values.dtype, freq=self.freq
+                )
+
             return astype_td64_unit_conversion(self._ndarray, dtype, copy=copy)
 
         return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)
 
@@ -45,7 +45,6 @@
     maybe_convert_platform,
     maybe_infer_to_datetimelike,
     maybe_upcast,
-    sanitize_to_nanoseconds,
 )
 from pandas.core.dtypes.common import (
     is_datetime64_ns_dtype,
@@ -782,7 +781,9 @@ def _try_cast(
         if is_ndarray:
             arr = cast(np.ndarray, arr)
             if arr.dtype != object:
-                return sanitize_to_nanoseconds(arr, copy=copy)
+                if copy:
+                    return arr.copy()
+                return arr
 
             out = maybe_infer_to_datetimelike(arr)
             if out is arr and copy:
 
@@ -136,6 +136,14 @@ def astype_nansafe(
             return arr.view(dtype)
 
         elif dtype.kind == "m":
+            # TODO(2.0): change to use the same logic as TDA.astype, i.e.
+            #  giving the requested dtype for supported units (s, ms, us, ns)
+            #  and doing the old convert-to-float behavior otherwise.
+            if is_supported_unit(get_unit_from_dtype(arr.dtype)):
+                from pandas.core.construction import ensure_wrapped_if_datetimelike
+
+                arr = ensure_wrapped_if_datetimelike(arr)
+                return arr.astype(dtype, copy=copy)
             return astype_td64_unit_conversion(arr, dtype, copy=copy)
 
         raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]")
 
@@ -1423,12 +1423,7 @@ def maybe_cast_to_datetime(
             return astype_nansafe(value, dtype)  # type: ignore[arg-type]
 
     elif isinstance(value, np.ndarray):
-        if value.dtype.kind in ["M", "m"]:
-            # catch a datetime/timedelta that is not of ns variety
-            # and no coercion specified
-            value = sanitize_to_nanoseconds(value)
-
-        elif value.dtype == _dtype_obj:
+        if value.dtype == _dtype_obj:
             value = maybe_infer_to_datetimelike(value)
 
     elif isinstance(value, list):
 
@@ -5878,7 +5878,7 @@ def pipe(
 
         If you have a function that takes the data as (say) the second
         argument, pass a tuple indicating which keyword expects the
-        data. For example, suppose ``f`` takes its data as ``arg2``:
+        data. For example, suppose ``func`` takes its data as ``arg2``:
 
         >>> (df.pipe(h)
         ...    .pipe(g, arg1=a)
 
@@ -75,21 +75,21 @@ def recode_for_groupby(
         return c, None
 
     # sort=False should order groups in as-encountered order (GH-8868)
-    cat = c.unique()
 
-    # See GH-38140 for block below
-    # exclude nan from indexer for categories
-    take_codes = cat.codes[cat.codes != -1]
-    if cat.ordered:
-        take_codes = np.sort(take_codes)
-    cat = cat.set_categories(cat.categories.take(take_codes))
-
-    # But for groupby to work, all categories should be present,
-    # including those missing from the data (GH-13179), which .unique()
-    # above dropped
-    cat = cat.add_categories(c.categories[~c.categories.isin(cat.categories)])
-
-    return c.reorder_categories(cat.categories), None
+    # xref GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
+    all_codes = np.arange(c.categories.nunique(), dtype=np.int8)
+    # GH 38140: exclude nan from indexer for categories
+    unique_notnan_codes = unique1d(c.codes[c.codes != -1])
+    if c.ordered:
+        unique_notnan_codes = np.sort(unique_notnan_codes)
+    if len(all_codes) > len(unique_notnan_codes):
+        # GH 13179: All categories need to be present, even if missing from the data
+        missing_codes = np.setdiff1d(all_codes, unique_notnan_codes, assume_unique=True)
+        take_codes = np.concatenate((unique_notnan_codes, missing_codes))
+    else:
+        take_codes = unique_notnan_codes
+
+    return Categorical(c, c.unique().categories.take(take_codes)), None
 
 
 def recode_from_groupby(
 
@@ -26,7 +26,6 @@
 from pandas.util._decorators import cache_readonly
 from pandas.util._exceptions import find_stack_level
 
-from pandas.core.dtypes.cast import sanitize_to_nanoseconds
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     is_list_like,
@@ -558,9 +557,12 @@ def __init__(
                 raise AssertionError(errmsg)
 
         if isinstance(self.grouping_vector, np.ndarray):
-            # if we have a date/time-like grouper, make sure that we have
-            # Timestamps like
-            self.grouping_vector = sanitize_to_nanoseconds(self.grouping_vector)
+            if self.grouping_vector.dtype.kind in ["m", "M"]:
+                # if we have a date/time-like grouper, make sure that we have
+                # Timestamps like
+                # TODO 2022-10-08 we only have one test that gets here and
+                #  values are already in nanoseconds in that case.
+                self.grouping_vector = Series(self.grouping_vector).to_numpy()
 
     def __repr__(self) -> str:
         return f"Grouping({self.name})"
 
@@ -4,6 +4,7 @@
 from __future__ import annotations
 
 from collections import abc
+from datetime import timedelta
 from decimal import Decimal
 import operator
 from typing import Any
@@ -27,6 +28,7 @@
     Int64Index,
     UInt64Index,
 )
+from pandas.core.arrays import TimedeltaArray
 from pandas.core.computation import expressions as expr
 from pandas.tests.arithmetic.common import (
     assert_invalid_addsub_type,
@@ -209,6 +211,11 @@ def test_numeric_arr_mul_tdscalar(self, scalar_td, numeric_idx, box_with_array):
             tda = expected._data
             dtype = scalar_td.dtype
             expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype)
+        elif type(scalar_td) is timedelta and box not in [Index, Series]:
+            # TODO(2.0): once TDA.astype converts to m8, just do expected.astype
+            tda = expected._data
+            dtype = np.dtype("m8[us]")
+            expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype)
 
         index = tm.box_expected(index, box)
         expected = tm.box_expected(expected, box)
@@ -240,6 +247,13 @@ def test_numeric_arr_mul_tdscalar_numexpr_path(
         obj = tm.box_expected(arr, box, transpose=False)
 
         expected = arr_i8.view("timedelta64[D]").astype("timedelta64[ns]")
+        if type(scalar_td) is timedelta and box is array:
+            # TODO(2.0): this shouldn't depend on 'box'
+            expected = expected.astype("timedelta64[us]")
+            # TODO(2.0): won't be necessary to construct TimedeltaArray
+            #  explicitly.
+            expected = TimedeltaArray._simple_new(expected, dtype=expected.dtype)
+
         expected = tm.box_expected(expected, box, transpose=False)
 
         result = obj * scalar_td
@@ -262,6 +276,11 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array
                 # i.e. resolution is lower -> use lowest supported resolution
                 dtype = np.dtype("m8[s]")
             expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype)
+        elif type(three_days) is timedelta and box not in [Index, Series]:
+            # TODO(2.0): just use expected.astype
+            tda = expected._data
+            dtype = np.dtype("m8[us]")
+            expected = type(tda)._simple_new(tda._ndarray.astype(dtype), dtype=dtype)
 
         index = tm.box_expected(index, box)
         expected = tm.box_expected(expected, box)
 
@@ -480,6 +480,12 @@ def test_maybe_promote_any_with_timedelta64(
                 "Timedelta scalar"
             )
             request.node.add_marker(mark)
+        elif type(fill_value) is datetime.timedelta:
+            mark = pytest.mark.xfail(
+                reason="maybe_promote not yet updated to handle non-nano "
+                "Timedelta scalar"
+            )
+            request.node.add_marker(mark)
     else:
         expected_dtype = np.dtype(object)
         exp_val_for_scalar = fill_value
 
@@ -480,12 +480,19 @@ def test_astype_to_timedelta_unit_ns(self, unit):
     @pytest.mark.parametrize("unit", ["us", "ms", "s", "h", "m", "D"])
     def test_astype_to_timedelta_unit(self, unit):
         # coerce to float
-        # GH#19223
+        # GH#19223 until 2.0 used to coerce to float
         dtype = f"m8[{unit}]"
         arr = np.array([[1, 2, 3]], dtype=dtype)
         df = DataFrame(arr)
         result = df.astype(dtype)
-        expected = DataFrame(df.values.astype(dtype).astype(float))
+
+        if unit in ["m", "h", "D"]:
+            # We don't support these, so we use the old logic to convert to float
+            expected = DataFrame(df.values.astype(dtype).astype(float))
+        else:
+            tda = pd.core.arrays.TimedeltaArray._simple_new(arr, dtype=arr.dtype)
+            expected = DataFrame(tda)
+            assert (expected.dtypes == dtype).all()
 
         tm.assert_frame_equal(result, expected)