pandas-dev · jreback · Dec 29, 2021 · Dec 23, 2021 · Dec 23, 2021 · Dec 28, 2021
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -77,52 +77,6 @@
 )
 from pandas.io.date_converters import generic_parser
 
-parser_defaults = {
-    "delimiter": None,
-    "escapechar": None,
-    "quotechar": '"',
-    "quoting": csv.QUOTE_MINIMAL,
-    "doublequote": True,
-    "skipinitialspace": False,
-    "lineterminator": None,
-    "header": "infer",
-    "index_col": None,
-    "names": None,
-    "prefix": None,
-    "skiprows": None,
-    "skipfooter": 0,
-    "nrows": None,
-    "na_values": None,
-    "keep_default_na": True,
-    "true_values": None,
-    "false_values": None,
-    "converters": None,
-    "dtype": None,
-    "cache_dates": True,
-    "thousands": None,
-    "comment": None,
-    "decimal": ".",
-    # 'engine': 'c',
-    "parse_dates": False,
-    "keep_date_col": False,
-    "dayfirst": False,
-    "date_parser": None,
-    "usecols": None,
-    # 'iterator': False,
-    "chunksize": None,
-    "verbose": False,
-    "encoding": None,
-    "squeeze": None,
-    "compression": None,
-    "mangle_dupe_cols": True,
-    "infer_datetime_format": False,
-    "skip_blank_lines": True,
-    "encoding_errors": "strict",
-    "on_bad_lines": "error",
-    "error_bad_lines": None,
-    "warn_bad_lines": None,
-}
-
 
 class ParserBase:
     class BadLineHandleMethod(Enum):
@@ -1178,6 +1132,53 @@ def converter(*date_cols):
     return converter
 
 
+parser_defaults = {
+    "delimiter": None,
+    "escapechar": None,
+    "quotechar": '"',
+    "quoting": csv.QUOTE_MINIMAL,
+    "doublequote": True,
+    "skipinitialspace": False,
+    "lineterminator": None,
+    "header": "infer",
+    "index_col": None,
+    "names": None,
+    "prefix": None,
+    "skiprows": None,
+    "skipfooter": 0,
+    "nrows": None,
+    "na_values": None,
+    "keep_default_na": True,
+    "true_values": None,
+    "false_values": None,
+    "converters": None,
+    "dtype": None,
+    "cache_dates": True,
+    "thousands": None,
+    "comment": None,
+    "decimal": ".",
+    # 'engine': 'c',
+    "parse_dates": False,
+    "keep_date_col": False,
+    "dayfirst": False,
+    "date_parser": None,
+    "usecols": None,
+    # 'iterator': False,
+    "chunksize": None,
+    "verbose": False,
+    "encoding": None,
+    "squeeze": None,
+    "compression": None,
+    "mangle_dupe_cols": True,
+    "infer_datetime_format": False,
+    "skip_blank_lines": True,
+    "encoding_errors": "strict",
+    "on_bad_lines": ParserBase.BadLineHandleMethod.ERROR,
+    "error_bad_lines": None,
+    "warn_bad_lines": None,
+}
+
+
 def _process_date_conversion(
     data_dict,
     converter: Callable,

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -434,10 +434,7 @@
     "dialect",
     "warn_bad_lines",
     "error_bad_lines",
-    # TODO(1.4)
-    # This doesn't error properly ATM, fix for release
-    # but not blocker for initial PR
-    # "on_bad_lines",
+    "on_bad_lines",
     "delim_whitespace",
     "quoting",
     "lineterminator",
@@ -932,7 +929,13 @@ def _get_options_with_defaults(self, engine):
                 engine == "pyarrow"
                 and argname in _pyarrow_unsupported
                 and value != default
+                and value != getattr(value, "value", default)
             ):
+                if argname == "on_bad_lines" and kwds.get("error_bad_lines"):
+                    argname = "error_bad_lines"
+                elif argname == "on_bad_lines" and kwds.get("warn_bad_lines"):
+                    argname = "warn_bad_lines"
+
                 raise ValueError(
                     f"The {repr(argname)} option is not supported with the "
                     f"'pyarrow' engine"

diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py
@@ -135,6 +135,9 @@ def test_pyarrow_engine(self):
         1,2,3,4,"""
 
         for default in pa_unsupported:
+            if default == "on_bad_lines":
+                continue
+
             msg = (
                 f"The {repr(default)} option is not "
                 f"supported with the 'pyarrow' engine"
@@ -147,3 +150,20 @@ def test_pyarrow_engine(self):
                 kwargs[default] = True
             with pytest.raises(ValueError, match=msg):
                 read_csv(StringIO(data), engine="pyarrow", **kwargs)
+
+    @pytest.mark.parametrize(
+        "kwds",
+        [{"on_bad_lines": "warn"}, {"error_bad_lines": True}, {"warn_bad_lines": True}],
+    )
+    def test_pyarrow_bad_lines_fails(self, pyarrow_parser_only, kwds):
+        # GH#
+        data = """a,b,c
+    1,2,3
+    """
+        parser = pyarrow_parser_only
+        msg = (
+            f"The '{list(kwds.keys())[0]}' option is not supported "
+            f"with the 'pyarrow' engine"
+        )
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), **kwds)