Skip to content

Commit a2b65e1

Browse files
committed
Merge remote-tracking branch 'upstream/main' into read-csv-from-directory
2 parents 2a66b92 + 25e6462 commit a2b65e1

File tree

9 files changed

+51
-52
lines changed

9 files changed

+51
-52
lines changed

.pre-commit-config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ci:
1919
skip: [pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.11.8
22+
rev: v0.11.12
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
@@ -74,7 +74,7 @@ repos:
7474
hooks:
7575
- id: isort
7676
- repo: https://github.com/asottile/pyupgrade
77-
rev: v3.19.1
77+
rev: v3.20.0
7878
hooks:
7979
- id: pyupgrade
8080
args: [--py310-plus]
@@ -95,14 +95,14 @@ repos:
9595
- id: sphinx-lint
9696
args: ["--enable", "all", "--disable", "line-too-long"]
9797
- repo: https://github.com/pre-commit/mirrors-clang-format
98-
rev: v20.1.3
98+
rev: v20.1.5
9999
hooks:
100100
- id: clang-format
101101
files: ^pandas/_libs/src|^pandas/_libs/include
102102
args: [-i]
103103
types_or: [c, c++]
104104
- repo: https://github.com/trim21/pre-commit-mirror-meson
105-
rev: v1.8.0
105+
rev: v1.8.1
106106
hooks:
107107
- id: meson-fmt
108108
args: ['--inplace']

doc/cheatsheet/Pandas_Cheat_Sheet.pdf

28.6 KB
Binary file not shown.
-1 Bytes
Binary file not shown.

doc/source/user_guide/indexing.rst

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1461,16 +1461,33 @@ Looking up values by index/column labels
14611461

14621462
Sometimes you want to extract a set of values given a sequence of row labels
14631463
and column labels, this can be achieved by ``pandas.factorize`` and NumPy indexing.
1464-
For instance:
14651464

1466-
.. ipython:: python
1465+
For heterogeneous column types, we subset columns to avoid unnecessary NumPy conversions:
1466+
1467+
.. code-block:: python
1468+
1469+
def pd_lookup_het(df, row_labels, col_labels):
1470+
rows = df.index.get_indexer(row_labels)
1471+
cols = df.columns.get_indexer(col_labels)
1472+
sub = df.take(np.unique(cols), axis=1)
1473+
sub = sub.take(np.unique(rows), axis=0)
1474+
rows = sub.index.get_indexer(row_labels)
1475+
values = sub.melt()["value"]
1476+
cols = sub.columns.get_indexer(col_labels)
1477+
flat_index = rows + cols * len(sub)
1478+
result = values[flat_index]
1479+
return result
1480+
1481+
For homogeneous column types, it is fastest to skip column subsetting and go directly to NumPy:
1482+
1483+
.. code-block:: python
14671484
1468-
df = pd.DataFrame({'col': ["A", "A", "B", "B"],
1469-
'A': [80, 23, np.nan, 22],
1470-
'B': [80, 55, 76, 67]})
1471-
df
1472-
idx, cols = pd.factorize(df['col'])
1473-
df.reindex(cols, axis=1).to_numpy()[np.arange(len(df)), idx]
1485+
def pd_lookup_hom(df, row_labels, col_labels):
1486+
rows = df.index.get_indexer(row_labels)
1487+
df = df.loc[:, sorted(set(col_labels))]
1488+
cols = df.columns.get_indexer(col_labels)
1489+
result = df.to_numpy()[rows, cols]
1490+
return result
14741491
14751492
Formerly this could be achieved with the dedicated ``DataFrame.lookup`` method
14761493
which was deprecated in version 1.2.0 and removed in version 2.0.0.

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,7 @@ Other
891891
- Bug in :meth:`DataFrame.query` which raised an exception when querying integer column names using backticks. (:issue:`60494`)
892892
- Bug in :meth:`DataFrame.shift` where passing a ``freq`` on a DataFrame with no columns did not shift the index correctly. (:issue:`60102`)
893893
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
894+
- Bug in :meth:`DataFrame.sort_values` where sorting by a column explicitly named ``None`` raised a ``KeyError`` instead of sorting by the column as expected. (:issue:`61512`)
894895
- Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
895896
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
896897
- Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`)

pandas/core/generic.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1645,11 +1645,7 @@ def _is_label_reference(self, key: Level, axis: Axis = 0) -> bool:
16451645
axis_int = self._get_axis_number(axis)
16461646
other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis_int)
16471647

1648-
return (
1649-
key is not None
1650-
and is_hashable(key)
1651-
and any(key in self.axes[ax] for ax in other_axes)
1652-
)
1648+
return is_hashable(key) and any(key in self.axes[ax] for ax in other_axes)
16531649

16541650
@final
16551651
def _is_label_or_level_reference(self, key: Level, axis: AxisInt = 0) -> bool:

pandas/plotting/_misc.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,15 @@ def autocorrelation_plot(series: Series, ax: Axes | None = None, **kwargs) -> Ax
633633
"""
634634
Autocorrelation plot for time series.
635635
636+
This method generates an autocorrelation plot for a given time series,
637+
which helps to identify any periodic structure or correlation within the
638+
data across various lags. It shows the correlation of a time series with a
639+
delayed copy of itself as a function of delay. Autocorrelation plots are useful for
640+
checking randomness in a data set. If the data are random, the autocorrelations
641+
should be near zero for any and all time-lag separations. If the data are not
642+
random, then one or more of the autocorrelations will be significantly
643+
non-zero.
644+
636645
Parameters
637646
----------
638647
series : Series

pandas/tests/arrays/integer/test_arithmetic.py

Lines changed: 4 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -178,25 +178,9 @@ def test_error_invalid_values(data, all_arithmetic_operators):
178178
ops = getattr(s, op)
179179

180180
# invalid scalars
181-
msg = "|".join(
182-
[
183-
r"can only perform ops with numeric values",
184-
r"IntegerArray cannot perform the operation mod",
185-
r"unsupported operand type",
186-
r"can only concatenate str \(not \"int\"\) to str",
187-
"not all arguments converted during string",
188-
"ufunc '.*' not supported for the input types, and the inputs could not",
189-
"ufunc '.*' did not contain a loop with signature matching types",
190-
"Addition/subtraction of integers and integer-arrays with Timestamp",
191-
"has no kernel",
192-
"not implemented",
193-
"The 'out' kwarg is necessary. Use numpy.strings.multiply without it.",
194-
"not supported for dtype",
195-
]
196-
)
197-
with pytest.raises(TypeError, match=msg):
181+
with tm.external_error_raised(TypeError):
198182
ops("foo")
199-
with pytest.raises(TypeError, match=msg):
183+
with tm.external_error_raised(TypeError):
200184
ops(pd.Timestamp("20180101"))
201185

202186
# invalid array-likes
@@ -214,25 +198,10 @@ def test_error_invalid_values(data, all_arithmetic_operators):
214198
# more-correct than np.nan here.
215199
tm.assert_series_equal(res, expected)
216200
else:
217-
with pytest.raises(TypeError, match=msg):
201+
with tm.external_error_raised(TypeError):
218202
ops(str_ser)
219203

220-
msg = "|".join(
221-
[
222-
"can only perform ops with numeric values",
223-
"cannot perform .* with this index type: DatetimeArray",
224-
"Addition/subtraction of integers and integer-arrays "
225-
"with DatetimeArray is no longer supported. *",
226-
"unsupported operand type",
227-
r"can only concatenate str \(not \"int\"\) to str",
228-
"not all arguments converted during string",
229-
"cannot subtract DatetimeArray from ndarray",
230-
"has no kernel",
231-
"not implemented",
232-
"not supported for dtype",
233-
]
234-
)
235-
with pytest.raises(TypeError, match=msg):
204+
with tm.external_error_raised(TypeError):
236205
ops(pd.Series(pd.date_range("20180101", periods=len(s))))
237206

238207

pandas/tests/frame/methods/test_sort_values.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,13 @@ def test_sort_values_no_op_reset_index(self):
630630
expected = DataFrame({"A": [10, 20], "B": [1, 5]})
631631
tm.assert_frame_equal(result, expected)
632632

633+
def test_sort_by_column_named_none(self):
634+
# GH#61512
635+
df = DataFrame([[3, 1], [2, 2]], columns=[None, "C1"])
636+
result = df.sort_values(by=None)
637+
expected = DataFrame([[2, 2], [3, 1]], columns=[None, "C1"], index=[1, 0])
638+
tm.assert_frame_equal(result, expected)
639+
633640

634641
class TestDataFrameSortKey: # test key sorting (issue 27237)
635642
def test_sort_values_inplace_key(self, sort_by_key):

0 commit comments

Comments
 (0)