Skip to content

TST: de-xfail pyarrow parser tests #56035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 56 additions & 16 deletions pandas/tests/io/parser/test_comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@
from pandas import DataFrame
import pandas._testing as tm

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@xfail_pyarrow # ValueError: The 'comment' option is not supported
@pytest.mark.parametrize("na_values", [None, ["NaN"]])
def test_comment(all_parsers, na_values):
parser = all_parsers
Expand All @@ -24,11 +21,15 @@ def test_comment(all_parsers, na_values):
expected = DataFrame(
[[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
)
if parser.engine == "pyarrow":
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), comment="#", na_values=na_values)
return
result = parser.read_csv(StringIO(data), comment="#", na_values=na_values)
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: The 'comment' option is not supported
@pytest.mark.parametrize(
"read_kwargs", [{}, {"lineterminator": "*"}, {"delim_whitespace": True}]
)
Expand All @@ -43,15 +44,25 @@ def test_line_comment(all_parsers, read_kwargs, request):
if read_kwargs.get("delim_whitespace"):
data = data.replace(",", " ")
elif read_kwargs.get("lineterminator"):
if parser.engine != "c":
mark = pytest.mark.xfail(
reason="Custom terminator not supported with Python engine"
)
request.applymarker(mark)

data = data.replace("\n", read_kwargs.get("lineterminator"))

read_kwargs["comment"] = "#"
if parser.engine == "pyarrow":
if "lineterminator" in read_kwargs:
msg = (
"The 'lineterminator' option is not supported with the 'pyarrow' engine"
)
else:
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), **read_kwargs)
return
elif parser.engine == "python" and read_kwargs.get("lineterminator"):
msg = r"Custom line terminators not supported in python parser \(yet\)"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), **read_kwargs)
return

result = parser.read_csv(StringIO(data), **read_kwargs)

expected = DataFrame(
Expand All @@ -60,7 +71,6 @@ def test_line_comment(all_parsers, read_kwargs, request):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: The 'comment' option is not supported
def test_comment_skiprows(all_parsers):
parser = all_parsers
data = """# empty
Expand All @@ -75,11 +85,16 @@ def test_comment_skiprows(all_parsers):
expected = DataFrame(
[[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
)
if parser.engine == "pyarrow":
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), comment="#", skiprows=4)
return

result = parser.read_csv(StringIO(data), comment="#", skiprows=4)
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: The 'comment' option is not supported
def test_comment_header(all_parsers):
parser = all_parsers
data = """# empty
Expand All @@ -93,11 +108,15 @@ def test_comment_header(all_parsers):
expected = DataFrame(
[[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
)
if parser.engine == "pyarrow":
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), comment="#", header=1)
return
result = parser.read_csv(StringIO(data), comment="#", header=1)
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: The 'comment' option is not supported
def test_comment_skiprows_header(all_parsers):
parser = all_parsers
data = """# empty
Expand All @@ -115,15 +134,28 @@ def test_comment_skiprows_header(all_parsers):
expected = DataFrame(
[[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
)
if parser.engine == "pyarrow":
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), comment="#", skiprows=4, header=1)
return

result = parser.read_csv(StringIO(data), comment="#", skiprows=4, header=1)
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: The 'comment' option is not supported
@pytest.mark.parametrize("comment_char", ["#", "~", "&", "^", "*", "@"])
def test_custom_comment_char(all_parsers, comment_char):
parser = all_parsers
data = "a,b,c\n1,2,3#ignore this!\n4,5,6#ignorethistoo"

if parser.engine == "pyarrow":
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(
StringIO(data.replace("#", comment_char)), comment=comment_char
)
return
result = parser.read_csv(
StringIO(data.replace("#", comment_char)), comment=comment_char
)
Expand All @@ -132,7 +164,6 @@ def test_custom_comment_char(all_parsers, comment_char):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: The 'comment' option is not supported
@pytest.mark.parametrize("header", ["infer", None])
def test_comment_first_line(all_parsers, header):
# see gh-4623
Expand All @@ -144,11 +175,15 @@ def test_comment_first_line(all_parsers, header):
else:
expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"])

if parser.engine == "pyarrow":
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), comment="#", header=header)
return
result = parser.read_csv(StringIO(data), comment="#", header=header)
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: The 'comment' option is not supported
def test_comment_char_in_default_value(all_parsers, request):
# GH#34002
if all_parsers.engine == "c":
Expand All @@ -164,6 +199,11 @@ def test_comment_char_in_default_value(all_parsers, request):
"4,5#,6,10\n"
"7,8,#N/A,11\n"
)
if parser.engine == "pyarrow":
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), comment="#", na_values="#N/A")
return
result = parser.read_csv(StringIO(data), comment="#", na_values="#N/A")
expected = DataFrame(
{
Expand Down
76 changes: 64 additions & 12 deletions pandas/tests/io/parser/test_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,21 @@
)
import pandas._testing as tm

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@xfail_pyarrow # ValueError: The 'converters' option is not supported
def test_converters_type_must_be_dict(all_parsers):
parser = all_parsers
data = """index,A,B,C,D
foo,2,3,4,5
"""

if parser.engine == "pyarrow":
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), converters=0)
return
with pytest.raises(TypeError, match="Type converters.+"):
parser.read_csv(StringIO(data), converters=0)


@xfail_pyarrow # ValueError: The 'converters' option is not supported
@pytest.mark.parametrize("column", [3, "D"])
@pytest.mark.parametrize(
"converter", [parse, lambda x: int(x.split("/")[2])] # Produce integer.
Expand All @@ -41,6 +41,12 @@ def test_converters(all_parsers, column, converter):
b,3,4,01/02/2009
c,4,5,01/03/2009
"""
if parser.engine == "pyarrow":
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), converters={column: converter})
return

result = parser.read_csv(StringIO(data), converters={column: converter})

expected = parser.read_csv(StringIO(data))
Expand All @@ -49,21 +55,26 @@ def test_converters(all_parsers, column, converter):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: The 'converters' option is not supported
def test_converters_no_implicit_conv(all_parsers):
# see gh-2184
parser = all_parsers
data = """000102,1.2,A\n001245,2,B"""

converters = {0: lambda x: x.strip()}

if parser.engine == "pyarrow":
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), header=None, converters=converters)
return

result = parser.read_csv(StringIO(data), header=None, converters=converters)

# Column 0 should not be casted to numeric and should remain as object.
expected = DataFrame([["000102", 1.2, "A"], ["001245", 2, "B"]])
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: The 'converters' option is not supported
def test_converters_euro_decimal_format(all_parsers):
# see gh-583
converters = {}
Expand All @@ -77,6 +88,12 @@ def test_converters_euro_decimal_format(all_parsers):
"Number3"
] = lambda x: float(x.replace(",", "."))

if parser.engine == "pyarrow":
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), sep=";", converters=converters)
return

result = parser.read_csv(StringIO(data), sep=";", converters=converters)
expected = DataFrame(
[
Expand All @@ -89,7 +106,6 @@ def test_converters_euro_decimal_format(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: The 'converters' option is not supported
def test_converters_corner_with_nans(all_parsers):
parser = all_parsers
data = """id,score,days
Expand Down Expand Up @@ -146,6 +162,16 @@ def convert_score(x):
results = []

for day_converter in [convert_days, convert_days_sentinel]:
if parser.engine == "pyarrow":
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(
StringIO(data),
converters={"score": convert_score, "days": day_converter},
na_values=["", None],
)
continue

result = parser.read_csv(
StringIO(data),
converters={"score": convert_score, "days": day_converter},
Expand All @@ -154,16 +180,24 @@ def convert_score(x):
assert pd.isna(result["days"][1])
results.append(result)

tm.assert_frame_equal(results[0], results[1])
if parser.engine != "pyarrow":
tm.assert_frame_equal(results[0], results[1])


@xfail_pyarrow # ValueError: The 'converters' option is not supported
@pytest.mark.parametrize("conv_f", [lambda x: x, str])
def test_converter_index_col_bug(all_parsers, conv_f):
# see gh-1835 , GH#40589
parser = all_parsers
data = "A;B\n1;2\n3;4"

if parser.engine == "pyarrow":
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(
StringIO(data), sep=";", index_col="A", converters={"A": conv_f}
)
return

rs = parser.read_csv(
StringIO(data), sep=";", index_col="A", converters={"A": conv_f}
)
Expand All @@ -172,24 +206,42 @@ def test_converter_index_col_bug(all_parsers, conv_f):
tm.assert_frame_equal(rs, xp)


@xfail_pyarrow # ValueError: The 'converters' option is not supported
def test_converter_identity_object(all_parsers):
# GH#40589
parser = all_parsers
data = "A,B\n1,2\n3,4"

if parser.engine == "pyarrow":
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), converters={"A": lambda x: x})
return

rs = parser.read_csv(StringIO(data), converters={"A": lambda x: x})

xp = DataFrame({"A": ["1", "3"], "B": [2, 4]})
tm.assert_frame_equal(rs, xp)


@xfail_pyarrow # ValueError: The 'converters' option is not supported
def test_converter_multi_index(all_parsers):
# GH 42446
parser = all_parsers
data = "A,B,B\nX,Y,Z\n1,2,3"

if parser.engine == "pyarrow":
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(
StringIO(data),
header=list(range(2)),
converters={
("A", "X"): np.int32,
("B", "Y"): np.int32,
("B", "Z"): np.float32,
},
)
return

result = parser.read_csv(
StringIO(data),
header=list(range(2)),
Expand Down