TST: de-xfail pyarrow parser tests (#56035)

jbrockmendel · web-flow · commit b2b27afc8a04 · 2023-11-17T18:36:27.000-08:00
diff --git a/pandas/tests/io/parser/test_comment.py b/pandas/tests/io/parser/test_comment.py
@@ -10,10 +10,7 @@
 from pandas import DataFrame
 import pandas._testing as tm
 
-xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
-
-@xfail_pyarrow  # ValueError: The 'comment' option is not supported
 @pytest.mark.parametrize("na_values", [None, ["NaN"]])
 def test_comment(all_parsers, na_values):
     parser = all_parsers
@@ -24,11 +21,15 @@ def test_comment(all_parsers, na_values):
     expected = DataFrame(
         [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
     )
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), comment="#", na_values=na_values)
+        return
     result = parser.read_csv(StringIO(data), comment="#", na_values=na_values)
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # ValueError: The 'comment' option is not supported
 @pytest.mark.parametrize(
     "read_kwargs", [{}, {"lineterminator": "*"}, {"delim_whitespace": True}]
 )
@@ -43,15 +44,25 @@ def test_line_comment(all_parsers, read_kwargs, request):
     if read_kwargs.get("delim_whitespace"):
         data = data.replace(",", " ")
     elif read_kwargs.get("lineterminator"):
-        if parser.engine != "c":
-            mark = pytest.mark.xfail(
-                reason="Custom terminator not supported with Python engine"
-            )
-            request.applymarker(mark)
-
         data = data.replace("\n", read_kwargs.get("lineterminator"))
 
     read_kwargs["comment"] = "#"
+    if parser.engine == "pyarrow":
+        if "lineterminator" in read_kwargs:
+            msg = (
+                "The 'lineterminator' option is not supported with the 'pyarrow' engine"
+            )
+        else:
+            msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), **read_kwargs)
+        return
+    elif parser.engine == "python" and read_kwargs.get("lineterminator"):
+        msg = r"Custom line terminators not supported in python parser \(yet\)"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), **read_kwargs)
+        return
+
     result = parser.read_csv(StringIO(data), **read_kwargs)
 
     expected = DataFrame(
@@ -60,7 +71,6 @@ def test_line_comment(all_parsers, read_kwargs, request):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # ValueError: The 'comment' option is not supported
 def test_comment_skiprows(all_parsers):
     parser = all_parsers
     data = """# empty
@@ -75,11 +85,16 @@ def test_comment_skiprows(all_parsers):
     expected = DataFrame(
         [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
     )
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), comment="#", skiprows=4)
+        return
+
     result = parser.read_csv(StringIO(data), comment="#", skiprows=4)
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # ValueError: The 'comment' option is not supported
 def test_comment_header(all_parsers):
     parser = all_parsers
     data = """# empty
@@ -93,11 +108,15 @@ def test_comment_header(all_parsers):
     expected = DataFrame(
         [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
     )
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), comment="#", header=1)
+        return
     result = parser.read_csv(StringIO(data), comment="#", header=1)
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # ValueError: The 'comment' option is not supported
 def test_comment_skiprows_header(all_parsers):
     parser = all_parsers
     data = """# empty
@@ -115,15 +134,28 @@ def test_comment_skiprows_header(all_parsers):
     expected = DataFrame(
         [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]
     )
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), comment="#", skiprows=4, header=1)
+        return
+
     result = parser.read_csv(StringIO(data), comment="#", skiprows=4, header=1)
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # ValueError: The 'comment' option is not supported
 @pytest.mark.parametrize("comment_char", ["#", "~", "&", "^", "*", "@"])
 def test_custom_comment_char(all_parsers, comment_char):
     parser = all_parsers
     data = "a,b,c\n1,2,3#ignore this!\n4,5,6#ignorethistoo"
+
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data.replace("#", comment_char)), comment=comment_char
+            )
+        return
     result = parser.read_csv(
         StringIO(data.replace("#", comment_char)), comment=comment_char
     )
@@ -132,7 +164,6 @@ def test_custom_comment_char(all_parsers, comment_char):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # ValueError: The 'comment' option is not supported
 @pytest.mark.parametrize("header", ["infer", None])
 def test_comment_first_line(all_parsers, header):
     # see gh-4623
@@ -144,11 +175,15 @@ def test_comment_first_line(all_parsers, header):
     else:
         expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"])
 
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), comment="#", header=header)
+        return
     result = parser.read_csv(StringIO(data), comment="#", header=header)
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # ValueError: The 'comment' option is not supported
 def test_comment_char_in_default_value(all_parsers, request):
     # GH#34002
     if all_parsers.engine == "c":
@@ -164,6 +199,11 @@ def test_comment_char_in_default_value(all_parsers, request):
         "4,5#,6,10\n"
         "7,8,#N/A,11\n"
     )
+    if parser.engine == "pyarrow":
+        msg = "The 'comment' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), comment="#", na_values="#N/A")
+        return
     result = parser.read_csv(StringIO(data), comment="#", na_values="#N/A")
     expected = DataFrame(
         {
diff --git a/pandas/tests/io/parser/test_converters.py b/pandas/tests/io/parser/test_converters.py
@@ -15,21 +15,21 @@
 )
 import pandas._testing as tm
 
-xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
-
-@xfail_pyarrow  # ValueError: The 'converters' option is not supported
 def test_converters_type_must_be_dict(all_parsers):
     parser = all_parsers
     data = """index,A,B,C,D
 foo,2,3,4,5
 """
-
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), converters=0)
+        return
     with pytest.raises(TypeError, match="Type converters.+"):
         parser.read_csv(StringIO(data), converters=0)
 
 
-@xfail_pyarrow  # ValueError: The 'converters' option is not supported
 @pytest.mark.parametrize("column", [3, "D"])
 @pytest.mark.parametrize(
     "converter", [parse, lambda x: int(x.split("/")[2])]  # Produce integer.
@@ -41,6 +41,12 @@ def test_converters(all_parsers, column, converter):
 b,3,4,01/02/2009
 c,4,5,01/03/2009
 """
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), converters={column: converter})
+        return
+
     result = parser.read_csv(StringIO(data), converters={column: converter})
 
     expected = parser.read_csv(StringIO(data))
@@ -49,21 +55,26 @@ def test_converters(all_parsers, column, converter):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # ValueError: The 'converters' option is not supported
 def test_converters_no_implicit_conv(all_parsers):
     # see gh-2184
     parser = all_parsers
     data = """000102,1.2,A\n001245,2,B"""
 
     converters = {0: lambda x: x.strip()}
+
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), header=None, converters=converters)
+        return
+
     result = parser.read_csv(StringIO(data), header=None, converters=converters)
 
     # Column 0 should not be casted to numeric and should remain as object.
     expected = DataFrame([["000102", 1.2, "A"], ["001245", 2, "B"]])
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # ValueError: The 'converters' option is not supported
 def test_converters_euro_decimal_format(all_parsers):
     # see gh-583
     converters = {}
@@ -77,6 +88,12 @@ def test_converters_euro_decimal_format(all_parsers):
         "Number3"
     ] = lambda x: float(x.replace(",", "."))
 
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), sep=";", converters=converters)
+        return
+
     result = parser.read_csv(StringIO(data), sep=";", converters=converters)
     expected = DataFrame(
         [
@@ -89,7 +106,6 @@ def test_converters_euro_decimal_format(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # ValueError: The 'converters' option is not supported
 def test_converters_corner_with_nans(all_parsers):
     parser = all_parsers
     data = """id,score,days
@@ -146,6 +162,16 @@ def convert_score(x):
     results = []
 
     for day_converter in [convert_days, convert_days_sentinel]:
+        if parser.engine == "pyarrow":
+            msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(
+                    StringIO(data),
+                    converters={"score": convert_score, "days": day_converter},
+                    na_values=["", None],
+                )
+            continue
+
         result = parser.read_csv(
             StringIO(data),
             converters={"score": convert_score, "days": day_converter},
@@ -154,16 +180,24 @@ def convert_score(x):
         assert pd.isna(result["days"][1])
         results.append(result)
 
-    tm.assert_frame_equal(results[0], results[1])
+    if parser.engine != "pyarrow":
+        tm.assert_frame_equal(results[0], results[1])
 
 
-@xfail_pyarrow  # ValueError: The 'converters' option is not supported
 @pytest.mark.parametrize("conv_f", [lambda x: x, str])
 def test_converter_index_col_bug(all_parsers, conv_f):
     # see gh-1835 , GH#40589
     parser = all_parsers
     data = "A;B\n1;2\n3;4"
 
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data), sep=";", index_col="A", converters={"A": conv_f}
+            )
+        return
+
     rs = parser.read_csv(
         StringIO(data), sep=";", index_col="A", converters={"A": conv_f}
     )
@@ -172,24 +206,42 @@ def test_converter_index_col_bug(all_parsers, conv_f):
     tm.assert_frame_equal(rs, xp)
 
 
-@xfail_pyarrow  # ValueError: The 'converters' option is not supported
 def test_converter_identity_object(all_parsers):
     # GH#40589
     parser = all_parsers
     data = "A,B\n1,2\n3,4"
 
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), converters={"A": lambda x: x})
+        return
+
     rs = parser.read_csv(StringIO(data), converters={"A": lambda x: x})
 
     xp = DataFrame({"A": ["1", "3"], "B": [2, 4]})
     tm.assert_frame_equal(rs, xp)
 
 
-@xfail_pyarrow  # ValueError: The 'converters' option is not supported
 def test_converter_multi_index(all_parsers):
     # GH 42446
     parser = all_parsers
     data = "A,B,B\nX,Y,Z\n1,2,3"
 
+    if parser.engine == "pyarrow":
+        msg = "The 'converters' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data),
+                header=list(range(2)),
+                converters={
+                    ("A", "X"): np.int32,
+                    ("B", "Y"): np.int32,
+                    ("B", "Z"): np.float32,
+                },
+            )
+        return
+
     result = parser.read_csv(
         StringIO(data),
         header=list(range(2)),