-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
CLN/TST: normalize test_frame_apply #40113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,62 +38,65 @@ def int_frame_const_col(): | |
def test_apply(float_frame): | ||
with np.errstate(all="ignore"): | ||
# ufunc | ||
applied = float_frame.apply(np.sqrt) | ||
tm.assert_series_equal(np.sqrt(float_frame["A"]), applied["A"]) | ||
result = np.sqrt(float_frame["A"]) | ||
expected = float_frame.apply(np.sqrt)["A"] | ||
tm.assert_series_equal(result, expected) | ||
|
||
# aggregator | ||
applied = float_frame.apply(np.mean) | ||
assert applied["A"] == np.mean(float_frame["A"]) | ||
result = float_frame.apply(np.mean)["A"] | ||
expected = np.mean(float_frame["A"]) | ||
assert result == expected | ||
|
||
d = float_frame.index[0] | ||
applied = float_frame.apply(np.mean, axis=1) | ||
assert applied[d] == np.mean(float_frame.xs(d)) | ||
assert applied.index is float_frame.index # want this | ||
result = float_frame.apply(np.mean, axis=1) | ||
expected = np.mean(float_frame.xs(d)) | ||
assert result[d] == expected | ||
assert result.index is float_frame.index | ||
|
||
# GH 9573 | ||
df = DataFrame({"c0": ["A", "A", "B", "B"], "c1": ["C", "C", "D", "D"]}) | ||
result = df.apply(lambda ts: ts.astype("category")) | ||
|
||
# invalid axis | ||
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"]) | ||
msg = "No axis named 2 for object type DataFrame" | ||
with pytest.raises(ValueError, match=msg): | ||
df.apply(lambda x: x, 2) | ||
|
||
# GH 9573 | ||
df = DataFrame({"c0": ["A", "A", "B", "B"], "c1": ["C", "C", "D", "D"]}) | ||
df = df.apply(lambda ts: ts.astype("category")) | ||
|
||
assert df.shape == (4, 2) | ||
assert isinstance(df["c0"].dtype, CategoricalDtype) | ||
assert isinstance(df["c1"].dtype, CategoricalDtype) | ||
assert result.shape == (4, 2) | ||
assert isinstance(result["c0"].dtype, CategoricalDtype) | ||
assert isinstance(result["c1"].dtype, CategoricalDtype) | ||
|
||
|
||
def test_apply_axis1_with_ea(): | ||
# GH#36785 | ||
df = DataFrame({"A": [Timestamp("2013-01-01", tz="UTC")]}) | ||
result = df.apply(lambda x: x, axis=1) | ||
tm.assert_frame_equal(result, df) | ||
# GH 36785 | ||
expected = DataFrame({"A": [Timestamp("2013-01-01", tz="UTC")]}) | ||
result = expected.apply(lambda x: x, axis=1) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
def test_apply_mixed_datetimelike(): | ||
# mixed datetimelike | ||
# GH 7778 | ||
df = DataFrame( | ||
expected = DataFrame( | ||
{ | ||
"A": date_range("20130101", periods=3), | ||
"B": pd.to_timedelta(np.arange(3), unit="s"), | ||
} | ||
) | ||
result = df.apply(lambda x: x, axis=1) | ||
tm.assert_frame_equal(result, df) | ||
result = expected.apply(lambda x: x, axis=1) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
def test_apply_empty(float_frame): | ||
# empty | ||
empty_frame = DataFrame() | ||
|
||
applied = empty_frame.apply(np.sqrt) | ||
assert applied.empty | ||
result = empty_frame.apply(np.sqrt) | ||
assert result.empty | ||
|
||
applied = empty_frame.apply(np.mean) | ||
assert applied.empty | ||
result = empty_frame.apply(np.mean) | ||
assert result.empty | ||
|
||
no_rows = float_frame[:0] | ||
result = no_rows.apply(lambda x: x.mean()) | ||
|
@@ -108,7 +111,7 @@ def test_apply_empty(float_frame): | |
# GH 2476 | ||
expected = DataFrame(index=["a"]) | ||
result = expected.apply(lambda x: x["a"], axis=1) | ||
tm.assert_frame_equal(expected, result) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
def test_apply_with_reduce_empty(): | ||
|
@@ -285,14 +288,13 @@ def _assert_raw(x): | |
float_frame.apply(_assert_raw, raw=True) | ||
float_frame.apply(_assert_raw, axis=1, raw=True) | ||
|
||
result0 = float_frame.apply(np.mean, raw=True) | ||
result1 = float_frame.apply(np.mean, axis=1, raw=True) | ||
|
||
expected0 = float_frame.apply(lambda x: x.values.mean()) | ||
expected1 = float_frame.apply(lambda x: x.values.mean(), axis=1) | ||
result = float_frame.apply(np.mean, raw=True) | ||
expected = float_frame.apply(lambda x: x.values.mean()) | ||
tm.assert_series_equal(result, expected) | ||
|
||
tm.assert_series_equal(result0, expected0) | ||
tm.assert_series_equal(result1, expected1) | ||
result = float_frame.apply(np.mean, axis=1, raw=True) | ||
expected = float_frame.apply(lambda x: x.values.mean(), axis=1) | ||
tm.assert_series_equal(result, expected) | ||
|
||
# no reduction | ||
result = float_frame.apply(lambda x: x * 2, raw=True) | ||
|
@@ -306,8 +308,9 @@ def _assert_raw(x): | |
|
||
def test_apply_axis1(float_frame): | ||
d = float_frame.index[0] | ||
tapplied = float_frame.apply(np.mean, axis=1) | ||
assert tapplied[d] == np.mean(float_frame.xs(d)) | ||
result = float_frame.apply(np.mean, axis=1)[d] | ||
expected = np.mean(float_frame.xs(d)) | ||
assert result == expected | ||
|
||
|
||
def test_apply_mixed_dtype_corner(): | ||
|
@@ -401,27 +404,25 @@ def test_apply_reduce_to_dict(): | |
# GH 25196 37544 | ||
data = DataFrame([[1, 2], [3, 4]], columns=["c0", "c1"], index=["i0", "i1"]) | ||
|
||
result0 = data.apply(dict, axis=0) | ||
expected0 = Series([{"i0": 1, "i1": 3}, {"i0": 2, "i1": 4}], index=data.columns) | ||
tm.assert_series_equal(result0, expected0) | ||
result = data.apply(dict, axis=0) | ||
expected = Series([{"i0": 1, "i1": 3}, {"i0": 2, "i1": 4}], index=data.columns) | ||
tm.assert_series_equal(result, expected) | ||
|
||
result1 = data.apply(dict, axis=1) | ||
expected1 = Series([{"c0": 1, "c1": 2}, {"c0": 3, "c1": 4}], index=data.index) | ||
tm.assert_series_equal(result1, expected1) | ||
result = data.apply(dict, axis=1) | ||
expected = Series([{"c0": 1, "c1": 2}, {"c0": 3, "c1": 4}], index=data.index) | ||
tm.assert_series_equal(result, expected) | ||
|
||
|
||
def test_apply_differently_indexed(): | ||
df = DataFrame(np.random.randn(20, 10)) | ||
|
||
result0 = df.apply(Series.describe, axis=0) | ||
expected0 = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns) | ||
tm.assert_frame_equal(result0, expected0) | ||
result = df.apply(Series.describe, axis=0) | ||
expected = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
result1 = df.apply(Series.describe, axis=1) | ||
expected1 = DataFrame( | ||
{i: v.describe() for i, v in df.T.items()}, columns=df.index | ||
).T | ||
tm.assert_frame_equal(result1, expected1) | ||
result = df.apply(Series.describe, axis=1) | ||
expected = DataFrame({i: v.describe() for i, v in df.T.items()}, columns=df.index).T | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
def test_apply_modify_traceback(): | ||
|
@@ -525,7 +526,7 @@ def f(r): | |
|
||
|
||
def test_apply_convert_objects(): | ||
data = DataFrame( | ||
expected = DataFrame( | ||
{ | ||
"A": [ | ||
"foo", | ||
|
@@ -572,8 +573,8 @@ def test_apply_convert_objects(): | |
} | ||
) | ||
|
||
result = data.apply(lambda x: x, axis=1) | ||
tm.assert_frame_equal(result._convert(datetime=True), data) | ||
result = expected.apply(lambda x: x, axis=1)._convert(datetime=True) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
def test_apply_attach_name(float_frame): | ||
|
@@ -635,17 +636,17 @@ def test_applymap(float_frame): | |
float_frame.applymap(type) | ||
|
||
# GH 465: function returning tuples | ||
result = float_frame.applymap(lambda x: (x, x)) | ||
assert isinstance(result["A"][0], tuple) | ||
result = float_frame.applymap(lambda x: (x, x))["A"][0] | ||
assert isinstance(result, tuple) | ||
|
||
# GH 2909: object conversion to float in constructor? | ||
df = DataFrame(data=[1, "a"]) | ||
result = df.applymap(lambda x: x) | ||
assert result.dtypes[0] == object | ||
result = df.applymap(lambda x: x).dtypes[0] | ||
assert result == object | ||
|
||
df = DataFrame(data=[1.0, "a"]) | ||
result = df.applymap(lambda x: x) | ||
assert result.dtypes[0] == object | ||
result = df.applymap(lambda x: x).dtypes[0] | ||
assert result == object | ||
|
||
# GH 2786 | ||
df = DataFrame(np.random.random((3, 4))) | ||
|
@@ -672,10 +673,10 @@ def test_applymap(float_frame): | |
DataFrame(index=list("ABC")), | ||
DataFrame({"A": [], "B": [], "C": []}), | ||
] | ||
for frame in empty_frames: | ||
for expected in empty_frames: | ||
for func in [round, lambda x: x]: | ||
result = frame.applymap(func) | ||
tm.assert_frame_equal(result, frame) | ||
result = expected.applymap(func) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
def test_applymap_na_ignore(float_frame): | ||
|
@@ -743,7 +744,8 @@ def test_frame_apply_dont_convert_datetime64(): | |
df = df.applymap(lambda x: x + BDay()) | ||
df = df.applymap(lambda x: x + BDay()) | ||
|
||
assert df.x1.dtype == "M8[ns]" | ||
result = df.x1.dtype | ||
assert result == "M8[ns]" | ||
|
||
|
||
def test_apply_non_numpy_dtype(): | ||
|
@@ -786,16 +788,18 @@ def apply_list(row): | |
|
||
|
||
def test_apply_noreduction_tzaware_object(): | ||
# https://github.com/pandas-dev/pandas/issues/31505 | ||
df = DataFrame({"foo": [Timestamp("2020", tz="UTC")]}, dtype="datetime64[ns, UTC]") | ||
result = df.apply(lambda x: x) | ||
tm.assert_frame_equal(result, df) | ||
result = df.apply(lambda x: x.copy()) | ||
tm.assert_frame_equal(result, df) | ||
# GH 31505 | ||
expected = DataFrame( | ||
{"foo": [Timestamp("2020", tz="UTC")]}, dtype="datetime64[ns, UTC]" | ||
) | ||
result = expected.apply(lambda x: x) | ||
tm.assert_frame_equal(result, expected) | ||
result = expected.apply(lambda x: x.copy()) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
def test_apply_function_runs_once(): | ||
# https://github.com/pandas-dev/pandas/issues/30815 | ||
# GH 30815 | ||
|
||
df = DataFrame({"a": [1, 2, 3]}) | ||
names = [] # Save row names function is applied to | ||
|
@@ -815,7 +819,7 @@ def non_reducing_function(row): | |
|
||
|
||
def test_apply_raw_function_runs_once(): | ||
# https://github.com/pandas-dev/pandas/issues/34506 | ||
# GH 34506 | ||
|
||
df = DataFrame({"a": [1, 2, 3]}) | ||
values = [] # Save row values function is applied to | ||
|
@@ -885,11 +889,11 @@ def test_infer_row_shape(): | |
# GH 17437 | ||
# if row shape is changing, infer it | ||
df = DataFrame(np.random.rand(10, 2)) | ||
result = df.apply(np.fft.fft, axis=0) | ||
assert result.shape == (10, 2) | ||
result = df.apply(np.fft.fft, axis=0).shape | ||
assert result == (10, 2) | ||
|
||
result = df.apply(np.fft.rfft, axis=0) | ||
assert result.shape == (6, 2) | ||
result = df.apply(np.fft.rfft, axis=0).shape | ||
assert result == (6, 2) | ||
|
||
|
||
def test_with_dictlike_columns(): | ||
|
@@ -1604,7 +1608,7 @@ def test_apply_dtype(col): | |
|
||
|
||
def test_apply_mutating(): | ||
# GH#35462 case where applied func pins a new BlockManager to a row | ||
# GH 35462 case where applied func pins a new BlockManager to a row | ||
df = DataFrame({"a": range(100), "b": range(100, 200)}) | ||
|
||
def func(row): | ||
|
@@ -1623,7 +1627,7 @@ def func(row): | |
|
||
|
||
def test_apply_empty_list_reduce(): | ||
# GH#35683 get columns correct | ||
# GH 35683 get columns correct | ||
df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]], columns=["a", "b"]) | ||
|
||
result = df.apply(lambda x: [], result_type="reduce") | ||
|
@@ -1643,7 +1647,7 @@ def test_apply_no_suffix_index(): | |
|
||
|
||
def test_apply_raw_returns_string(): | ||
# https://github.com/pandas-dev/pandas/issues/35940 | ||
# GH 35940 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I personally don't find this an improvement. I know we are inconsistent about it, but if there is a full link, it's much easier to go to the issue There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is how we do for every other comment about the issue number if we want to require a full link it's possible but would need a precommit hook There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't do that for "every other comment". We currently use the full link in 400 cases. Not every detail needs to be controlled with a pre-commit hook There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and how many w/o a link? i don't know but i would say it's way higher we have to have a standard and then enforce it we simply cannot track stylistic things any other way or involve personal preference here (once we agree on a standard) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jorisvandenbossche - Thanks for the feedback here, I didn't realize there wasn't a consensus. I actually agree on the preference for a link (but also more strongly prefer consistency). I've reverted these changes. Assuming we can get a consensus, if things were made consistent and we updated the dev docs, it seems to me that consistency would become the norm and not require enforcement after a short amount of time. If it doesn't turn out to be the case and becomes a hassle, we could then implement a pre-commit hook. |
||
df = DataFrame({"A": ["aa", "bbb"]}) | ||
result = df.apply(lambda x: x[0], axis=1, raw=True) | ||
expected = Series(["aa", "bbb"]) | ||
|
Uh oh!
There was an error while loading. Please reload this page.