Skip to content

Commit b2b27af

Browse files
authored
TST: de-xfail pyarrow parser tests (#56035)
1 parent 1230529 commit b2b27af

File tree

2 files changed

+120
-28
lines changed

2 files changed

+120
-28
lines changed

pandas/tests/io/parser/test_comment.py

Lines changed: 56 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,7 @@
1010
from pandas import DataFrame
1111
import pandas._testing as tm
1212

13-
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
1413

15-
16-
@xfail_pyarrow # ValueError: The 'comment' option is not supported
1714
@pytest.mark.parametrize("na_values", [None, ["NaN"]])
1815
def test_comment(all_parsers, na_values):
1916
parser = all_parsers
@@ -24,11 +21,15 @@ def test_comment(all_parsers, na_values):
2421
expected = DataFrame(
2522
[[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
2623
)
24+
if parser.engine == "pyarrow":
25+
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
26+
with pytest.raises(ValueError, match=msg):
27+
parser.read_csv(StringIO(data), comment="#", na_values=na_values)
28+
return
2729
result = parser.read_csv(StringIO(data), comment="#", na_values=na_values)
2830
tm.assert_frame_equal(result, expected)
2931

3032

31-
@xfail_pyarrow # ValueError: The 'comment' option is not supported
3233
@pytest.mark.parametrize(
3334
"read_kwargs", [{}, {"lineterminator": "*"}, {"delim_whitespace": True}]
3435
)
@@ -43,15 +44,25 @@ def test_line_comment(all_parsers, read_kwargs, request):
4344
if read_kwargs.get("delim_whitespace"):
4445
data = data.replace(",", " ")
4546
elif read_kwargs.get("lineterminator"):
46-
if parser.engine != "c":
47-
mark = pytest.mark.xfail(
48-
reason="Custom terminator not supported with Python engine"
49-
)
50-
request.applymarker(mark)
51-
5247
data = data.replace("\n", read_kwargs.get("lineterminator"))
5348

5449
read_kwargs["comment"] = "#"
50+
if parser.engine == "pyarrow":
51+
if "lineterminator" in read_kwargs:
52+
msg = (
53+
"The 'lineterminator' option is not supported with the 'pyarrow' engine"
54+
)
55+
else:
56+
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
57+
with pytest.raises(ValueError, match=msg):
58+
parser.read_csv(StringIO(data), **read_kwargs)
59+
return
60+
elif parser.engine == "python" and read_kwargs.get("lineterminator"):
61+
msg = r"Custom line terminators not supported in python parser \(yet\)"
62+
with pytest.raises(ValueError, match=msg):
63+
parser.read_csv(StringIO(data), **read_kwargs)
64+
return
65+
5566
result = parser.read_csv(StringIO(data), **read_kwargs)
5667

5768
expected = DataFrame(
@@ -60,7 +71,6 @@ def test_line_comment(all_parsers, read_kwargs, request):
6071
tm.assert_frame_equal(result, expected)
6172

6273

63-
@xfail_pyarrow # ValueError: The 'comment' option is not supported
6474
def test_comment_skiprows(all_parsers):
6575
parser = all_parsers
6676
data = """# empty
@@ -75,11 +85,16 @@ def test_comment_skiprows(all_parsers):
7585
expected = DataFrame(
7686
[[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
7787
)
88+
if parser.engine == "pyarrow":
89+
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
90+
with pytest.raises(ValueError, match=msg):
91+
parser.read_csv(StringIO(data), comment="#", skiprows=4)
92+
return
93+
7894
result = parser.read_csv(StringIO(data), comment="#", skiprows=4)
7995
tm.assert_frame_equal(result, expected)
8096

8197

82-
@xfail_pyarrow # ValueError: The 'comment' option is not supported
8398
def test_comment_header(all_parsers):
8499
parser = all_parsers
85100
data = """# empty
@@ -93,11 +108,15 @@ def test_comment_header(all_parsers):
93108
expected = DataFrame(
94109
[[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
95110
)
111+
if parser.engine == "pyarrow":
112+
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
113+
with pytest.raises(ValueError, match=msg):
114+
parser.read_csv(StringIO(data), comment="#", header=1)
115+
return
96116
result = parser.read_csv(StringIO(data), comment="#", header=1)
97117
tm.assert_frame_equal(result, expected)
98118

99119

100-
@xfail_pyarrow # ValueError: The 'comment' option is not supported
101120
def test_comment_skiprows_header(all_parsers):
102121
parser = all_parsers
103122
data = """# empty
@@ -115,15 +134,28 @@ def test_comment_skiprows_header(all_parsers):
115134
expected = DataFrame(
116135
[[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
117136
)
137+
if parser.engine == "pyarrow":
138+
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
139+
with pytest.raises(ValueError, match=msg):
140+
parser.read_csv(StringIO(data), comment="#", skiprows=4, header=1)
141+
return
142+
118143
result = parser.read_csv(StringIO(data), comment="#", skiprows=4, header=1)
119144
tm.assert_frame_equal(result, expected)
120145

121146

122-
@xfail_pyarrow # ValueError: The 'comment' option is not supported
123147
@pytest.mark.parametrize("comment_char", ["#", "~", "&", "^", "*", "@"])
124148
def test_custom_comment_char(all_parsers, comment_char):
125149
parser = all_parsers
126150
data = "a,b,c\n1,2,3#ignore this!\n4,5,6#ignorethistoo"
151+
152+
if parser.engine == "pyarrow":
153+
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
154+
with pytest.raises(ValueError, match=msg):
155+
parser.read_csv(
156+
StringIO(data.replace("#", comment_char)), comment=comment_char
157+
)
158+
return
127159
result = parser.read_csv(
128160
StringIO(data.replace("#", comment_char)), comment=comment_char
129161
)
@@ -132,7 +164,6 @@ def test_custom_comment_char(all_parsers, comment_char):
132164
tm.assert_frame_equal(result, expected)
133165

134166

135-
@xfail_pyarrow # ValueError: The 'comment' option is not supported
136167
@pytest.mark.parametrize("header", ["infer", None])
137168
def test_comment_first_line(all_parsers, header):
138169
# see gh-4623
@@ -144,11 +175,15 @@ def test_comment_first_line(all_parsers, header):
144175
else:
145176
expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"])
146177

178+
if parser.engine == "pyarrow":
179+
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
180+
with pytest.raises(ValueError, match=msg):
181+
parser.read_csv(StringIO(data), comment="#", header=header)
182+
return
147183
result = parser.read_csv(StringIO(data), comment="#", header=header)
148184
tm.assert_frame_equal(result, expected)
149185

150186

151-
@xfail_pyarrow # ValueError: The 'comment' option is not supported
152187
def test_comment_char_in_default_value(all_parsers, request):
153188
# GH#34002
154189
if all_parsers.engine == "c":
@@ -164,6 +199,11 @@ def test_comment_char_in_default_value(all_parsers, request):
164199
"4,5#,6,10\n"
165200
"7,8,#N/A,11\n"
166201
)
202+
if parser.engine == "pyarrow":
203+
msg = "The 'comment' option is not supported with the 'pyarrow' engine"
204+
with pytest.raises(ValueError, match=msg):
205+
parser.read_csv(StringIO(data), comment="#", na_values="#N/A")
206+
return
167207
result = parser.read_csv(StringIO(data), comment="#", na_values="#N/A")
168208
expected = DataFrame(
169209
{

pandas/tests/io/parser/test_converters.py

Lines changed: 64 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,21 @@
1515
)
1616
import pandas._testing as tm
1717

18-
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
1918

20-
21-
@xfail_pyarrow # ValueError: The 'converters' option is not supported
2219
def test_converters_type_must_be_dict(all_parsers):
2320
parser = all_parsers
2421
data = """index,A,B,C,D
2522
foo,2,3,4,5
2623
"""
27-
24+
if parser.engine == "pyarrow":
25+
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
26+
with pytest.raises(ValueError, match=msg):
27+
parser.read_csv(StringIO(data), converters=0)
28+
return
2829
with pytest.raises(TypeError, match="Type converters.+"):
2930
parser.read_csv(StringIO(data), converters=0)
3031

3132

32-
@xfail_pyarrow # ValueError: The 'converters' option is not supported
3333
@pytest.mark.parametrize("column", [3, "D"])
3434
@pytest.mark.parametrize(
3535
"converter", [parse, lambda x: int(x.split("/")[2])] # Produce integer.
@@ -41,6 +41,12 @@ def test_converters(all_parsers, column, converter):
4141
b,3,4,01/02/2009
4242
c,4,5,01/03/2009
4343
"""
44+
if parser.engine == "pyarrow":
45+
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
46+
with pytest.raises(ValueError, match=msg):
47+
parser.read_csv(StringIO(data), converters={column: converter})
48+
return
49+
4450
result = parser.read_csv(StringIO(data), converters={column: converter})
4551

4652
expected = parser.read_csv(StringIO(data))
@@ -49,21 +55,26 @@ def test_converters(all_parsers, column, converter):
4955
tm.assert_frame_equal(result, expected)
5056

5157

52-
@xfail_pyarrow # ValueError: The 'converters' option is not supported
5358
def test_converters_no_implicit_conv(all_parsers):
5459
# see gh-2184
5560
parser = all_parsers
5661
data = """000102,1.2,A\n001245,2,B"""
5762

5863
converters = {0: lambda x: x.strip()}
64+
65+
if parser.engine == "pyarrow":
66+
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
67+
with pytest.raises(ValueError, match=msg):
68+
parser.read_csv(StringIO(data), header=None, converters=converters)
69+
return
70+
5971
result = parser.read_csv(StringIO(data), header=None, converters=converters)
6072

6173
# Column 0 should not be casted to numeric and should remain as object.
6274
expected = DataFrame([["000102", 1.2, "A"], ["001245", 2, "B"]])
6375
tm.assert_frame_equal(result, expected)
6476

6577

66-
@xfail_pyarrow # ValueError: The 'converters' option is not supported
6778
def test_converters_euro_decimal_format(all_parsers):
6879
# see gh-583
6980
converters = {}
@@ -77,6 +88,12 @@ def test_converters_euro_decimal_format(all_parsers):
7788
"Number3"
7889
] = lambda x: float(x.replace(",", "."))
7990

91+
if parser.engine == "pyarrow":
92+
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
93+
with pytest.raises(ValueError, match=msg):
94+
parser.read_csv(StringIO(data), sep=";", converters=converters)
95+
return
96+
8097
result = parser.read_csv(StringIO(data), sep=";", converters=converters)
8198
expected = DataFrame(
8299
[
@@ -89,7 +106,6 @@ def test_converters_euro_decimal_format(all_parsers):
89106
tm.assert_frame_equal(result, expected)
90107

91108

92-
@xfail_pyarrow # ValueError: The 'converters' option is not supported
93109
def test_converters_corner_with_nans(all_parsers):
94110
parser = all_parsers
95111
data = """id,score,days
@@ -146,6 +162,16 @@ def convert_score(x):
146162
results = []
147163

148164
for day_converter in [convert_days, convert_days_sentinel]:
165+
if parser.engine == "pyarrow":
166+
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
167+
with pytest.raises(ValueError, match=msg):
168+
parser.read_csv(
169+
StringIO(data),
170+
converters={"score": convert_score, "days": day_converter},
171+
na_values=["", None],
172+
)
173+
continue
174+
149175
result = parser.read_csv(
150176
StringIO(data),
151177
converters={"score": convert_score, "days": day_converter},
@@ -154,16 +180,24 @@ def convert_score(x):
154180
assert pd.isna(result["days"][1])
155181
results.append(result)
156182

157-
tm.assert_frame_equal(results[0], results[1])
183+
if parser.engine != "pyarrow":
184+
tm.assert_frame_equal(results[0], results[1])
158185

159186

160-
@xfail_pyarrow # ValueError: The 'converters' option is not supported
161187
@pytest.mark.parametrize("conv_f", [lambda x: x, str])
162188
def test_converter_index_col_bug(all_parsers, conv_f):
163189
# see gh-1835 , GH#40589
164190
parser = all_parsers
165191
data = "A;B\n1;2\n3;4"
166192

193+
if parser.engine == "pyarrow":
194+
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
195+
with pytest.raises(ValueError, match=msg):
196+
parser.read_csv(
197+
StringIO(data), sep=";", index_col="A", converters={"A": conv_f}
198+
)
199+
return
200+
167201
rs = parser.read_csv(
168202
StringIO(data), sep=";", index_col="A", converters={"A": conv_f}
169203
)
@@ -172,24 +206,42 @@ def test_converter_index_col_bug(all_parsers, conv_f):
172206
tm.assert_frame_equal(rs, xp)
173207

174208

175-
@xfail_pyarrow # ValueError: The 'converters' option is not supported
176209
def test_converter_identity_object(all_parsers):
177210
# GH#40589
178211
parser = all_parsers
179212
data = "A,B\n1,2\n3,4"
180213

214+
if parser.engine == "pyarrow":
215+
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
216+
with pytest.raises(ValueError, match=msg):
217+
parser.read_csv(StringIO(data), converters={"A": lambda x: x})
218+
return
219+
181220
rs = parser.read_csv(StringIO(data), converters={"A": lambda x: x})
182221

183222
xp = DataFrame({"A": ["1", "3"], "B": [2, 4]})
184223
tm.assert_frame_equal(rs, xp)
185224

186225

187-
@xfail_pyarrow # ValueError: The 'converters' option is not supported
188226
def test_converter_multi_index(all_parsers):
189227
# GH 42446
190228
parser = all_parsers
191229
data = "A,B,B\nX,Y,Z\n1,2,3"
192230

231+
if parser.engine == "pyarrow":
232+
msg = "The 'converters' option is not supported with the 'pyarrow' engine"
233+
with pytest.raises(ValueError, match=msg):
234+
parser.read_csv(
235+
StringIO(data),
236+
header=list(range(2)),
237+
converters={
238+
("A", "X"): np.int32,
239+
("B", "Y"): np.int32,
240+
("B", "Z"): np.float32,
241+
},
242+
)
243+
return
244+
193245
result = parser.read_csv(
194246
StringIO(data),
195247
header=list(range(2)),

0 commit comments

Comments
 (0)