pandas-dev
diff --git a/‎ci/code_checks.sh
Lines changed: 0 additions & 34 deletions b/‎ci/code_checks.sh
Lines changed: 0 additions & 34 deletions
diff --git a/‎doc/source/whatsnew/v2.3.0.rst
Lines changed: 3 additions & 0 deletions b/‎doc/source/whatsnew/v2.3.0.rst
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 3 additions & 3 deletions b/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 3 additions & 3 deletions
diff --git a/‎pandas/conftest.py
Lines changed: 3 additions & 0 deletions b/‎pandas/conftest.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/core/arrays/categorical.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/arrays/categorical.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/arrays/datetimes.py
Lines changed: 1 addition & 6 deletions b/‎pandas/core/arrays/datetimes.py
Lines changed: 1 addition & 6 deletions
diff --git a/‎pandas/core/frame.py
Lines changed: 5 additions & 5 deletions b/‎pandas/core/frame.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎pandas/core/indexes/base.py
Lines changed: 27 additions & 26 deletions b/‎pandas/core/indexes/base.py
Lines changed: 27 additions & 26 deletions
diff --git a/‎pandas/core/resample.py
Lines changed: 2 additions & 2 deletions b/‎pandas/core/resample.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/core/shared_docs.py
Lines changed: 2 additions & 2 deletions b/‎pandas/core/shared_docs.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pandas/core/strings/accessor.py
Lines changed: 5 additions & 5 deletions b/‎pandas/core/strings/accessor.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎pandas/errors/__init__.py
Lines changed: 9 additions & 9 deletions b/‎pandas/errors/__init__.py
Lines changed: 9 additions & 9 deletions
diff --git a/‎pandas/plotting/_core.py
Lines changed: 2 additions & 2 deletions b/‎pandas/plotting/_core.py
Lines changed: 2 additions & 2 deletions
@@ -71,21 +71,12 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
 
     MSG='Partially validate docstrings (EX03)' ;  echo $MSG
     $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX03 --ignore_functions \
-        pandas.Series.dt.day_name \
-        pandas.Series.str.len \
-        pandas.Series.cat.set_categories \
-        pandas.Series.plot.bar \
-        pandas.Series.plot.hist \
         pandas.Series.plot.line \
         pandas.Series.to_sql \
         pandas.Series.to_latex \
-        pandas.errors.CategoricalConversionWarning \
-        pandas.errors.ChainedAssignmentError \
-        pandas.errors.ClosedFileError \
         pandas.errors.DatabaseError \
         pandas.errors.IndexingError \
         pandas.errors.InvalidColumnName \
-        pandas.errors.NumExprClobberingError \
         pandas.errors.PossibleDataLossError \
         pandas.errors.PossiblePrecisionLoss \
         pandas.errors.SettingWithCopyError \
@@ -106,7 +97,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.DataFrame.to_sql \
         pandas.read_stata \
         pandas.core.resample.Resampler.pipe \
-        pandas.core.resample.Resampler.fillna \
         pandas.core.resample.Resampler.interpolate \
         pandas.plotting.scatter_matrix \
         pandas.pivot \
@@ -115,26 +105,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.Index.rename \
         pandas.Index.droplevel \
         pandas.Index.isin \
-        pandas.CategoricalIndex.set_categories \
         pandas.MultiIndex.names \
         pandas.MultiIndex.droplevel \
         pandas.IndexSlice \
-        pandas.DatetimeIndex.month_name \
-        pandas.DatetimeIndex.day_name \
-        pandas.core.window.rolling.Rolling.corr \
         pandas.Grouper \
-        pandas.core.groupby.SeriesGroupBy.apply \
-        pandas.core.groupby.DataFrameGroupBy.apply \
-        pandas.core.groupby.SeriesGroupBy.transform \
-        pandas.core.groupby.SeriesGroupBy.pipe \
-        pandas.core.groupby.DataFrameGroupBy.pipe \
-        pandas.core.groupby.DataFrameGroupBy.describe \
-        pandas.core.groupby.DataFrameGroupBy.idxmax \
-        pandas.core.groupby.DataFrameGroupBy.idxmin \
-        pandas.core.groupby.DataFrameGroupBy.value_counts \
-        pandas.core.groupby.SeriesGroupBy.describe \
-        pandas.core.groupby.DataFrameGroupBy.boxplot \
-        pandas.core.groupby.DataFrameGroupBy.hist \
         pandas.io.formats.style.Styler.map \
         pandas.io.formats.style.Styler.apply_index \
         pandas.io.formats.style.Styler.map_index \
@@ -152,20 +126,12 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.io.formats.style.Styler.text_gradient \
         pandas.DataFrame.values \
         pandas.DataFrame.groupby \
-        pandas.DataFrame.skew \
-        pandas.DataFrame.var \
         pandas.DataFrame.idxmax \
         pandas.DataFrame.idxmin \
-        pandas.DataFrame.last \
         pandas.DataFrame.pivot \
         pandas.DataFrame.sort_values \
-        pandas.DataFrame.tz_convert \
-        pandas.DataFrame.tz_localize \
-        pandas.DataFrame.plot.bar \
         pandas.DataFrame.plot.hexbin \
-        pandas.DataFrame.plot.hist \
         pandas.DataFrame.plot.line \
-        pandas.DataFrame.hist \
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
 fi
 
@@ -101,6 +101,8 @@ Deprecations
 
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
+- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
+- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
 - Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
 -
 
@@ -119,6 +121,7 @@ Categorical
 
 Datetimelike
 ^^^^^^^^^^^^
+- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
 - Bug in :func:`date_range` where a timestamp out of the valid range would be produced with a negative ``freq`` parameter (:issue:`56147`)
 -
 
 
@@ -4860,15 +4860,15 @@ cpdef to_offset(freq, bint is_period=False):
 
             tups = zip(split[0::4], split[1::4], split[2::4])
             for n, (sep, stride, name) in enumerate(tups):
-                if is_period is False and name in c_OFFSET_DEPR_FREQSTR:
+                if is_period is False and name.upper() in c_OFFSET_DEPR_FREQSTR:
                     warnings.warn(
                         f"\'{name}\' is deprecated and will be removed "
                         f"in a future version, please use "
-                        f"\'{c_OFFSET_DEPR_FREQSTR.get(name)}\' instead.",
+                        f"\'{c_OFFSET_DEPR_FREQSTR.get(name.upper())}\' instead.",
                         FutureWarning,
                         stacklevel=find_stack_level(),
                     )
-                    name = c_OFFSET_DEPR_FREQSTR[name]
+                    name = c_OFFSET_DEPR_FREQSTR[name.upper()]
                 if is_period is True and name in c_REVERSE_OFFSET_DEPR_FREQSTR:
                     if name.startswith("Y"):
                         raise ValueError(
 
@@ -1973,4 +1973,7 @@ def warsaw(request) -> str:
 
 @pytest.fixture
 def arrow_string_storage():
+    """
+    Fixture that lists possible PyArrow values for StringDtype storage field.
+    """
     return ("pyarrow", "pyarrow_numpy")
@@ -1082,7 +1082,7 @@ def set_categories(
         For :class:`pandas.Series`:
 
         >>> raw_cat = pd.Categorical(['a', 'b', 'c', 'A'],
-        ...                           categories=['a', 'b', 'c'], ordered=True)
+        ...                          categories=['a', 'b', 'c'], ordered=True)
         >>> ser = pd.Series(raw_cat)
         >>> ser
         0   a
 
@@ -1365,7 +1365,7 @@ def day_name(self, locale=None) -> npt.NDArray[np.object_]:
         >>> idx
         DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
                       dtype='datetime64[ns]', freq='D')
-        >>> idx.day_name(locale='pt_BR.utf8') # doctest: +SKIP
+        >>> idx.day_name(locale='pt_BR.utf8')  # doctest: +SKIP
         Index(['Segunda', 'Terça', 'Quarta'], dtype='object')
         """
         values = self._local_timestamps()
@@ -2780,11 +2780,6 @@ def _generate_range(
         else:
             start = offset.rollback(start)  # type: ignore[assignment]
 
-    elif end and not offset.is_on_offset(end):
-        # Incompatible types in assignment (expression has type "datetime",
-        # variable has type "Optional[Timestamp]")
-        end = offset.rollback(end)  # type: ignore[assignment]
-
     # Unsupported operand types for < ("Timestamp" and "None")
     if periods is None and end < start and offset.n >= 0:  # type: ignore[operator]
         end = None
 
@@ -9223,11 +9223,11 @@ def groupby(
         You could also assign a list of column names or a list of index names.
 
         >>> df = pd.DataFrame({
-        ...        "lev1": [1, 1, 1, 2, 2, 2],
-        ...        "lev2": [1, 1, 2, 1, 1, 2],
-        ...        "lev3": [1, 2, 1, 2, 1, 2],
-        ...        "lev4": [1, 2, 3, 4, 5, 6],
-        ...        "values": [0, 1, 2, 3, 4, 5]})
+        ...                   "lev1": [1, 1, 1, 2, 2, 2],
+        ...                   "lev2": [1, 1, 2, 1, 1, 2],
+        ...                   "lev3": [1, 2, 1, 2, 1, 2],
+        ...                   "lev4": [1, 2, 3, 4, 5, 6],
+        ...                   "values": [0, 1, 2, 3, 4, 5]})
         >>> df
             lev1 lev2 lev3 lev4 values
         0   1    1    1    1    0
 
@@ -4809,11 +4809,18 @@ def _join_non_unique(
         left_idx, right_idx = get_join_indexers_non_unique(
             self._values, other._values, how=how, sort=sort
         )
-        mask = left_idx == -1
 
-        join_idx = self.take(left_idx)
-        right = other.take(right_idx)
-        join_index = join_idx.putmask(mask, right)
+        if how == "right":
+            join_index = other.take(right_idx)
+        else:
+            join_index = self.take(left_idx)
+
+        if how == "outer":
+            mask = left_idx == -1
+            if mask.any():
+                right = other.take(right_idx)
+                join_index = join_index.putmask(mask, right)
+
         if isinstance(join_index, ABCMultiIndex) and how == "outer":
             # test_join_index_levels
             join_index = join_index._sort_levels_monotonic()
@@ -4989,35 +4996,29 @@ def _join_monotonic(
         ridx: npt.NDArray[np.intp] | None
         lidx: npt.NDArray[np.intp] | None
 
-        if self.is_unique and other.is_unique:
-            # We can perform much better than the general case
-            if how == "left":
+        if how == "left":
+            if other.is_unique:
+                # We can perform much better than the general case
                 join_index = self
                 lidx = None
                 ridx = self._left_indexer_unique(other)
-            elif how == "right":
+            else:
+                join_array, lidx, ridx = self._left_indexer(other)
+                join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
+        elif how == "right":
+            if self.is_unique:
+                # We can perform much better than the general case
                 join_index = other
                 lidx = other._left_indexer_unique(self)
                 ridx = None
-            elif how == "inner":
-                join_array, lidx, ridx = self._inner_indexer(other)
-                join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
-            elif how == "outer":
-                join_array, lidx, ridx = self._outer_indexer(other)
-                join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
-        else:
-            if how == "left":
-                join_array, lidx, ridx = self._left_indexer(other)
-            elif how == "right":
+            else:
                 join_array, ridx, lidx = other._left_indexer(self)
-            elif how == "inner":
-                join_array, lidx, ridx = self._inner_indexer(other)
-            elif how == "outer":
-                join_array, lidx, ridx = self._outer_indexer(other)
-
-            assert lidx is not None
-            assert ridx is not None
-
+                join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
+        elif how == "inner":
+            join_array, lidx, ridx = self._inner_indexer(other)
+            join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
+        elif how == "outer":
+            join_array, lidx, ridx = self._outer_indexer(other)
             join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
 
         lidx = None if lidx is None else ensure_platform_int(lidx)
 
@@ -1039,15 +1039,15 @@ def interpolate(
         2023-03-01 07:00:04    3
         Freq: s, dtype: int64
 
-        Upsample the dataframe to 0.5Hz by providing the period time of 2s.
+        Downsample the dataframe to 0.5Hz by providing the period time of 2s.
 
         >>> series.resample("2s").interpolate("linear")
         2023-03-01 07:00:00    1
         2023-03-01 07:00:02    2
         2023-03-01 07:00:04    3
         Freq: 2s, dtype: int64
 
-        Downsample the dataframe to 2Hz by providing the period time of 500ms.
+        Upsample the dataframe to 2Hz by providing the period time of 500ms.
 
         >>> series.resample("500ms").interpolate("linear")
         2023-03-01 07:00:00.000    1.0
 
@@ -839,7 +839,7 @@
     Consider a dataset containing food consumption in Argentina.
 
     >>> df = pd.DataFrame({{'consumption': [10.51, 103.11, 55.48],
-    ...                     'co2_emissions': [37.2, 19.66, 1712]}},
+    ...                   'co2_emissions': [37.2, 19.66, 1712]}},
     ...                   index=['Pork', 'Wheat Products', 'Beef'])
 
     >>> df
@@ -904,7 +904,7 @@
     Consider a dataset containing food consumption in Argentina.
 
     >>> df = pd.DataFrame({{'consumption': [10.51, 103.11, 55.48],
-    ...                     'co2_emissions': [37.2, 19.66, 1712]}},
+    ...                   'co2_emissions': [37.2, 19.66, 1712]}},
     ...                   index=['Pork', 'Wheat Products', 'Beef'])
 
     >>> df
 
@@ -3055,11 +3055,11 @@ def len(self):
         number of entries for dictionaries, lists or tuples.
 
         >>> s = pd.Series(['dog',
-        ...                 '',
-        ...                 5,
-        ...                 {'foo' : 'bar'},
-        ...                 [2, 3, 5, 7],
-        ...                 ('one', 'two', 'three')])
+        ...                '',
+        ...                5,
+        ...                {'foo' : 'bar'},
+        ...                [2, 3, 5, 7],
+        ...                ('one', 'two', 'three')])
         >>> s
         0                  dog
         1
 
@@ -469,7 +469,7 @@ class ChainedAssignmentError(Warning):
     --------
     >>> pd.options.mode.copy_on_write = True
     >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
-    >>> df["A"][0:3] = 10 # doctest: +SKIP
+    >>> df["A"][0:3] = 10  # doctest: +SKIP
     ... # ChainedAssignmentError: ...
     >>> pd.options.mode.copy_on_write = False
     """
@@ -561,10 +561,10 @@ class NumExprClobberingError(NameError):
     Examples
     --------
     >>> df = pd.DataFrame({'abs': [1, 1, 1]})
-    >>> df.query("abs > 2") # doctest: +SKIP
+    >>> df.query("abs > 2")  # doctest: +SKIP
     ... # NumExprClobberingError: Variables in expression "(abs) > (2)" overlap...
     >>> sin, a = 1, 2
-    >>> pd.eval("sin + a", engine='numexpr') # doctest: +SKIP
+    >>> pd.eval("sin + a", engine='numexpr')  # doctest: +SKIP
     ... # NumExprClobberingError: Variables in expression "(sin) + (a)" overlap...
     """
 
@@ -677,9 +677,9 @@ class ClosedFileError(Exception):
 
     Examples
     --------
-    >>> store = pd.HDFStore('my-store', 'a') # doctest: +SKIP
-    >>> store.close() # doctest: +SKIP
-    >>> store.keys() # doctest: +SKIP
+    >>> store = pd.HDFStore('my-store', 'a')  # doctest: +SKIP
+    >>> store.close()  # doctest: +SKIP
+    >>> store.keys()  # doctest: +SKIP
     ... # ClosedFileError: my-store file is not open!
     """
 
@@ -773,9 +773,9 @@ class CategoricalConversionWarning(Warning):
     Examples
     --------
     >>> from pandas.io.stata import StataReader
-    >>> with StataReader('dta_file', chunksize=2) as reader: # doctest: +SKIP
-    ...   for i, block in enumerate(reader):
-    ...      print(i, block)
+    >>> with StataReader('dta_file', chunksize=2) as reader:  # doctest: +SKIP
+    ...     for i, block in enumerate(reader):
+    ...         print(i, block)
     ... # CategoricalConversionWarning: One or more series with value labels...
     """
 
 
@@ -1114,7 +1114,7 @@ def line(
         .. plot::
             :context: close-figs
 
-            >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
+            >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]})
             >>> ax = df.plot.bar(x='lab', y='val', rot=0)
 
         Plot a whole dataframe to a bar plot. Each column is assigned a
@@ -1195,7 +1195,7 @@ def bar(  # pylint: disable=disallowed-name
         """
         See Also
         --------
-        DataFrame.plot.bar: Vertical bar plot.
+        DataFrame.plot.bar : Vertical bar plot.
         DataFrame.plot : Make plots of DataFrame using matplotlib.
         matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib.
Original file line number	Diff line number	Diff line change
`@@ -101,6 +101,8 @@ Deprecations`
`101`	`101`
`102`	`102`	`Performance improvements`
`103`	`103`	`~~~~~~~~~~~~~~~~~~~~~~~~`
	`104`	+- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
	`105`	+- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
`104`	`106`	- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
`105`	`107`	`-`
`106`	`108`
`@@ -119,6 +121,7 @@ Categorical`
`119`	`121`
`120`	`122`	`Datetimelike`
`121`	`123`	`^^^^^^^^^^^^`
	`124`	+- Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
`122`	`125`	- Bug in :func:`date_range` where a timestamp out of the valid range would be produced with a negative ``freq`` parameter (:issue:`56147`)
`123`	`126`	`-`
`124`	`127`