Skip to content

TST: GH30999 Add match=msg to all but two pytest.raises in tests/io #38724

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions pandas/tests/io/excel/test_xlrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ def test_read_xlrd_book(read_ext, frame):
# TODO: test for openpyxl as well
def test_excel_table_sheet_by_index(datapath, read_ext):
path = datapath("io", "data", "excel", f"test1{read_ext}")
msg = "No sheet named <'invalid_sheet_name'>"
with ExcelFile(path, engine="xlrd") as excel:
with pytest.raises(xlrd.XLRDError):
pd.read_excel(excel, sheet_name="asdf")
with pytest.raises(xlrd.XLRDError, match=msg):
pd.read_excel(excel, sheet_name="invalid_sheet_name")


def test_excel_file_warning_with_xlsx_file(datapath):
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/io/parser/test_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,12 +200,13 @@ def test_parse_public_s3_bucket_nrows_python(self, tips_df, s3so):
tm.assert_frame_equal(tips_df.iloc[:10], df)

def test_read_s3_fails(self, s3so):
with pytest.raises(IOError):
msg = "The specified bucket does not exist"
with pytest.raises(IOError, match=msg):
read_csv("s3://nyqpug/asdf.csv", storage_options=s3so)

# Receive a permission error when trying to read a private bucket.
# It's irrelevant here that this isn't actually a table.
with pytest.raises(IOError):
with pytest.raises(IOError, match=msg):
read_csv("s3://cant_get_it/file.csv")

def test_write_s3_csv_fails(self, tips_df, s3so):
Expand Down
29 changes: 10 additions & 19 deletions pandas/tests/io/parser/test_python_parser_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,19 +249,15 @@ def test_multi_char_sep_quotes(python_parser_only, quoting):
parser = python_parser_only

data = 'a,,b\n1,,a\n2,,"2,,b"'
msg = "ignored when a multi-char delimiter is used"

def fail_read():
if quoting == csv.QUOTE_NONE:
msg = "Expected 2 fields in line 3, saw 3"
with pytest.raises(ParserError, match=msg):
parser.read_csv(StringIO(data), quoting=quoting, **kwargs)

if quoting == csv.QUOTE_NONE:
# We expect no match, so there should be an assertion
# error out of the inner context manager.
with pytest.raises(AssertionError):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks, this was some really old testing code

fail_read()
else:
fail_read()
msg = "ignored when a multi-char delimiter is used"
with pytest.raises(ParserError, match=msg):
parser.read_csv(StringIO(data), quoting=quoting, **kwargs)


def test_none_delimiter(python_parser_only, capsys):
Expand All @@ -286,20 +282,15 @@ def test_none_delimiter(python_parser_only, capsys):
@pytest.mark.parametrize("skipfooter", [0, 1])
def test_skipfooter_bad_row(python_parser_only, data, skipfooter):
# see gh-13879 and gh-15910
msg = "parsing errors in the skipped footer rows"
parser = python_parser_only

def fail_read():
if skipfooter:
msg = "parsing errors in the skipped footer rows"
with pytest.raises(ParserError, match=msg):
parser.read_csv(StringIO(data), skipfooter=skipfooter)

if skipfooter:
fail_read()
else:
# We expect no match, so there should be an assertion
# error out of the inner context manager.
with pytest.raises(AssertionError):
fail_read()
msg = "unexpected end of data|expected after"
with pytest.raises(ParserError, match=msg):
parser.read_csv(StringIO(data), skipfooter=skipfooter)


def test_malformed_skipfooter(python_parser_only):
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/io/sas/test_sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,15 +213,16 @@ def test_inconsistent_number_of_rows(datapath):
def test_zero_variables(datapath):
# Check if the SAS file has zero variables (PR #18184)
fname = datapath("io", "sas", "data", "zero_variables.sas7bdat")
with pytest.raises(EmptyDataError):
with pytest.raises(EmptyDataError, match="No columns to parse from file"):
pd.read_sas(fname)


def test_corrupt_read(datapath):
# We don't really care about the exact failure, the important thing is
# that the resource should be cleaned up afterwards (BUG #35566)
fname = datapath("io", "sas", "data", "corrupt.sas7bdat")
with pytest.raises(AttributeError):
msg = "'SAS7BDATReader' object has no attribute 'row_count'"
with pytest.raises(AttributeError, match=msg):
pd.read_sas(fname)


Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,8 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
df.to_csv(path, compression=compression_, encoding=encoding)

# reading should fail (otherwise we wouldn't need the warning)
with pytest.raises(Exception):
msg = r"UTF-\d+ stream does not start with BOM"
with pytest.raises(UnicodeError, match=msg):
pd.read_csv(path, compression=compression_, encoding=encoding)


Expand Down
35 changes: 25 additions & 10 deletions pandas/tests/io/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,19 @@
@filter_sparse
@pytest.mark.single
class TestFeather:
def check_error_on_write(self, df, exc):
def check_error_on_write(self, df, exc, err_msg):
# check that we are raising the exception
# on writing

with pytest.raises(exc):
with pytest.raises(exc, match=err_msg):
with tm.ensure_clean() as path:
to_feather(df, path)

def check_external_error_on_write(self, df):
# check that we are raising the exception
# on writing

with tm.external_error_raised(Exception):
with tm.ensure_clean() as path:
to_feather(df, path)

Expand All @@ -42,14 +50,15 @@ def check_round_trip(self, df, expected=None, write_kwargs={}, **read_kwargs):

def test_error(self):

msg = "feather only support IO with DataFrames"
for obj in [
pd.Series([1, 2, 3]),
1,
"foo",
pd.Timestamp("20130101"),
np.array([1, 2, 3]),
]:
self.check_error_on_write(obj, ValueError)
self.check_error_on_write(obj, ValueError, msg)

def test_basic(self):

Expand Down Expand Up @@ -95,12 +104,13 @@ def test_duplicate_columns(self):
# https://github.com/wesm/feather/issues/53
# not currently able to handle duplicate columns
df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy()
self.check_error_on_write(df, ValueError)
self.check_external_error_on_write(df)

def test_stringify_columns(self):

df = pd.DataFrame(np.arange(12).reshape(4, 3)).copy()
self.check_error_on_write(df, ValueError)
msg = "feather must have string column names"
self.check_error_on_write(df, ValueError, msg)

def test_read_columns(self):
# GH 24025
Expand All @@ -125,8 +135,7 @@ def test_unsupported_other(self):

# mixed python objects
df = pd.DataFrame({"a": ["a", 1, 2.0]})
# Some versions raise ValueError, others raise ArrowInvalid.
self.check_error_on_write(df, Exception)
self.check_external_error_on_write(df)

def test_rw_use_threads(self):
df = pd.DataFrame({"A": np.arange(100000)})
Expand All @@ -138,6 +147,10 @@ def test_write_with_index(self):
df = pd.DataFrame({"A": [1, 2, 3]})
self.check_round_trip(df)

msg = (
r"feather does not support serializing .* for the index; "
r"you can \.reset_index\(\) to make the index into column\(s\)"
)
# non-default index
for index in [
[2, 3, 4],
Expand All @@ -148,17 +161,19 @@ def test_write_with_index(self):
]:

df.index = index
self.check_error_on_write(df, ValueError)
self.check_error_on_write(df, ValueError, msg)

# index with meta-data
df.index = [0, 1, 2]
df.index.name = "foo"
self.check_error_on_write(df, ValueError)
msg = "feather does not serialize index meta-data on a default index"
self.check_error_on_write(df, ValueError, msg)

# column multi-index
df.index = [0, 1, 2]
df.columns = pd.MultiIndex.from_tuples([("a", 1)])
self.check_error_on_write(df, ValueError)
msg = "feather must have string column names"
self.check_error_on_write(df, ValueError, msg)

def test_path_pathlib(self):
df = tm.makeDataFrame().reset_index()
Expand Down
14 changes: 6 additions & 8 deletions pandas/tests/io/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,16 @@ def test_reasonable_error(monkeypatch, cleared_fs):
from fsspec.registry import known_implementations

registry.target.clear()
with pytest.raises(ValueError) as e:
with pytest.raises(ValueError, match="nosuchprotocol"):
read_csv("nosuchprotocol://test/test.csv")
assert "nosuchprotocol" in str(e.value)
err_mgs = "test error messgae"
err_msg = "test error message"
monkeypatch.setitem(
known_implementations,
"couldexist",
{"class": "unimportable.CouldExist", "err": err_mgs},
{"class": "unimportable.CouldExist", "err": err_msg},
)
with pytest.raises(ImportError) as e:
with pytest.raises(ImportError, match=err_msg):
read_csv("couldexist://test/test.csv")
assert err_mgs in str(e.value)


def test_to_csv(cleared_fs):
Expand Down Expand Up @@ -225,9 +223,9 @@ def test_s3_parquet(s3_resource, s3so):

@td.skip_if_installed("fsspec")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extra skip here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback fixed this, merged master, green except for Travis CI

def test_not_present_exception():
with pytest.raises(ImportError) as e:
msg = "Missing optional dependency 'fsspec'|fsspec library is required"
with pytest.raises(ImportError, match=msg):
read_csv("memory://test/test.csv")
assert "fsspec library is required" in str(e.value)


@td.skip_if_no("pyarrow")
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/io/test_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import pandas as pd
from pandas import DataFrame
import pandas._testing as tm

api_exceptions = pytest.importorskip("google.api_core.exceptions")
bigquery = pytest.importorskip("google.cloud.bigquery")
Expand Down Expand Up @@ -195,7 +196,7 @@ def test_roundtrip(self, gbq_dataset):
"if_exists, expected_num_rows, expectation",
[
("append", 300, does_not_raise()),
("fail", 200, pytest.raises(pandas_gbq.gbq.TableCreationError)),
("fail", 200, tm.external_error_raised(pandas_gbq.gbq.TableCreationError)),
("replace", 100, does_not_raise()),
],
)
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/io/test_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,5 @@ def open(self, path, mode="r", *args):

@td.skip_if_installed("gcsfs")
def test_gcs_not_present_exception():
with pytest.raises(ImportError) as e:
with tm.external_error_raised(ImportError):
read_csv("gs://test/test.csv")
assert "gcsfs library is required" in str(e.value)
20 changes: 13 additions & 7 deletions pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,17 +302,18 @@ def test_file_like(self):

@tm.network
def test_bad_url_protocol(self):
with pytest.raises(URLError):
with pytest.raises(URLError, match="urlopen error unknown url type: git"):
self.read_html("git://github.com", match=".*Water.*")

@tm.network
@pytest.mark.slow
def test_invalid_url(self):
try:
with pytest.raises(URLError):
self.read_html("http://www.a23950sdfa908sd.com", match=".*Water.*")
except ValueError as e:
assert "No tables found" in str(e)
msg = (
"Name or service not known|Temporary failure in name resolution|"
"No tables found"
)
with pytest.raises((URLError, ValueError), match=msg):
self.read_html("http://www.a23950sdfa908sd.com", match=".*Water.*")

@pytest.mark.slow
def test_file_url(self):
Expand Down Expand Up @@ -949,8 +950,13 @@ def test_decimal_rows(self):

def test_bool_header_arg(self):
# GH 6114
msg = re.escape(
"Passing a bool to header is invalid. Use header=None for no header or "
"header=int or list-like of ints to specify the row(s) making up the "
"column names"
)
for arg in [True, False]:
with pytest.raises(TypeError):
with pytest.raises(TypeError, match=msg):
self.read_html(self.spam_data, header=arg)

def test_converters(self):
Expand Down
Loading