-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
TST add test for dtype consistency with pd replace #23305 #35234
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
e15488e
9b699b5
65b040b
22e0779
4ef1a53
3deef23
0fce9e6
d440c52
5569894
e22a632
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1420,3 +1420,83 @@ def test_replace_period_ignore_float(self): | |
result = df.replace(1.0, 0.0) | ||
expected = pd.DataFrame({"Per": [pd.Period("2020-01")] * 3}) | ||
tm.assert_frame_equal(expected, result) | ||
|
||
def test_replace_value_category_type(self): | ||
""" | ||
Test to ensure category dtypes are maintained | ||
after replace with direct values | ||
""" | ||
|
||
# create input data | ||
input_dict = { | ||
"col1": [1, 2, 3, 4], | ||
"col2": ["a", "b", "c", "d"], | ||
"col3": [1.5, 2.5, 3.5, 4.5], | ||
"col4": ["cat1", "cat2", "cat3", "cat4"], | ||
"col5": ["obj1", "obj2", "obj3", "obj4"], | ||
} | ||
# explicitly cast columns as category and order them | ||
input_df = pd.DataFrame(data=input_dict).astype( | ||
{"col2": "category", "col4": "category"} | ||
) | ||
input_df["col2"] = input_df["col2"].cat.reorder_categories( | ||
["a", "b", "c", "d"], ordered=True | ||
) | ||
input_df["col4"] = input_df["col4"].cat.reorder_categories( | ||
["cat1", "cat2", "cat3", "cat4"], ordered=True | ||
) | ||
|
||
# create expected dataframe | ||
expected_dict = { | ||
"col1": [1, 2, 3, 4], | ||
"col2": ["a", "b", "c", "z"], | ||
"col3": [1.5, 2.5, 3.5, 4.5], | ||
"col4": ["cat1", "catX", "cat3", "cat4"], | ||
"col5": ["obj9", "obj2", "obj3", "obj4"], | ||
} | ||
# explicitly cast columns as category and order them | ||
expected_df = pd.DataFrame(data=expected_dict).astype( | ||
{"col2": "category", "col4": "category"} | ||
) | ||
expected_df["col2"] = expected_df["col2"].cat.reorder_categories( | ||
["a", "b", "c", "z"], ordered=True | ||
) | ||
expected_df["col4"] = expected_df["col4"].cat.reorder_categories( | ||
["cat1", "catX", "cat3", "cat4"], ordered=True | ||
) | ||
|
||
# replace values in input dataframe | ||
input_df = input_df.replace("d", "z") | ||
input_df = input_df.replace("obj1", "obj9") | ||
input_df = input_df.replace("cat2", "catX") | ||
|
||
tm.assert_frame_equal(input_df, expected_df) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. similar to below |
||
|
||
@pytest.mark.xfail( | ||
reason="category dtype gets changed to object type after replace, see #35268", | ||
strict=True, | ||
) | ||
def test_replace_dict_category_type(self, input_category_df, expected_category_df): | ||
""" | ||
Test to ensure category dtypes are maintained | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
after replace with dict values | ||
""" | ||
|
||
# create input dataframe | ||
input_dict = {"col1": ["a"], "col2": ["obj1"], "col3": ["cat1"]} | ||
# explicitly cast columns as category | ||
input_df = pd.DataFrame(data=input_dict).astype( | ||
{"col1": "category", "col2": "category", "col3": "category"} | ||
) | ||
|
||
# create expected dataframe | ||
expected_dict = {"col1": ["z"], "col2": ["obj9"], "col3": ["catX"]} | ||
# explicitly cast columns as category | ||
expected_df = pd.DataFrame(data=expected_dict).astype( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can u call this expected |
||
{"col1": "category", "col2": "category", "col3": "category"} | ||
) | ||
|
||
# replace values in input dataframe using a dict | ||
input_df = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can u call this result |
||
|
||
tm.assert_frame_equal(input_df, expected_df) |
Uh oh!
There was an error while loading. Please reload this page.