|
29 | 29 | import pandas.io.date_converters as conv
|
30 | 30 | import pandas.io.parsers as parsers
|
31 | 31 |
|
| 32 | +# constant |
| 33 | +_DEFAULT_DATETIME = datetime(1, 1, 1) |
| 34 | +# Strategy for hypothesis |
| 35 | +gen_random_datetime = st.dates( |
| 36 | + min_value=date(1900, 1, 1), # on Windows for %y need: year > 1900 |
| 37 | + max_value=date(9999, 12, 31) |
| 38 | +) |
| 39 | + |
32 | 40 |
|
33 | 41 | def test_separator_date_conflict(all_parsers):
|
34 | 42 | # Regression test for gh-4678
|
@@ -854,77 +862,79 @@ def test_parse_timezone(all_parsers):
|
854 | 862 | tm.assert_frame_equal(result, expected)
|
855 | 863 |
|
856 | 864 |
|
857 |
| -@pytest.mark.parametrize("datestring", [ |
| 865 | +@pytest.mark.parametrize("date_string", [ |
858 | 866 | "32/32/2019",
|
859 | 867 | "02/30/2019",
|
860 | 868 | "13/13/2019",
|
861 | 869 | "13/2019",
|
862 | 870 | "a3/11/2018",
|
863 | 871 | "10/11/2o17"
|
864 | 872 | ])
|
865 |
| -def test_invalid_parse_delimited_date(all_parsers, datestring): |
| 873 | +def test_invalid_parse_delimited_date(all_parsers, date_string): |
866 | 874 | parser = all_parsers
|
867 |
| - expected = DataFrame({0: [datestring]}, dtype="object") |
868 |
| - result = parser.read_csv(StringIO(datestring), |
| 875 | + expected = DataFrame({0: [date_string]}, dtype="object") |
| 876 | + result = parser.read_csv(StringIO(date_string), |
869 | 877 | header=None, parse_dates=[0])
|
870 | 878 | tm.assert_frame_equal(result, expected)
|
871 | 879 |
|
872 | 880 |
|
873 |
| -@pytest.mark.parametrize("date_format, delimiters", [ |
874 |
| - ("%m %d %Y", " -.\\/"), |
875 |
| - ("%m %Y", " -\\/") |
876 |
| -]) |
877 |
| -def test_parse_delimited_date(all_parsers, date_format, delimiters): |
878 |
| - parser = all_parsers |
879 |
| - date = datetime(2019, 4, 1) |
880 |
| - data = '\n'.join(date.strftime(date_format.replace(' ', delim)) |
881 |
| - for delim in delimiters) |
882 |
| - expected = DataFrame({0: [date] * len(delimiters)}, dtype="datetime64[ns]") |
883 |
| - result = parser.read_csv(StringIO(data), header=None, parse_dates=[0]) |
884 |
| - tm.assert_frame_equal(result, expected) |
885 |
| - |
886 |
| - |
887 |
| -@pytest.mark.parametrize("datestring,dayfirst,expected", [ |
888 |
| - # DD/MM/YYYY; month > 12 thus replacement |
889 |
| - ("13/02/2019", False, datetime(2019, 2, 13)), |
890 |
| - ("13/02/2019", True, datetime(2019, 2, 13)), |
891 |
| - ("02/13/2019", False, datetime(2019, 2, 13)), |
892 |
| - ("02/13/2019", True, datetime(2019, 2, 13)), |
893 |
| - # DD/MM/YYYY; dayfirst==True thus replacement |
894 |
| - ("04/02/2019", True, datetime(2019, 2, 4)) |
| 881 | +@pytest.mark.parametrize("date_string,dayfirst,expected", [ |
| 882 | + # %d/%m/%Y; month > 12 thus replacement |
| 883 | + ("13\\02\\2019", False, datetime(2019, 2, 13)), |
| 884 | + ("13\\02\\2019", True, datetime(2019, 2, 13)), |
| 885 | + # %m/%d/%Y; day > 12 thus there will be no replacement |
| 886 | + ("02\\13\\2019", False, datetime(2019, 2, 13)), |
| 887 | + ("02\\13\\2019", True, datetime(2019, 2, 13)), |
| 888 | + # %d/%m/%Y; dayfirst==True thus replacement |
| 889 | + ("04\\02\\2019", True, datetime(2019, 2, 4)) |
895 | 890 | ])
|
896 |
| -def test_parse_delimited_date_swap(all_parsers, datestring, |
| 891 | +def test_parse_delimited_date_swap(all_parsers, date_string, |
897 | 892 | dayfirst, expected):
|
898 | 893 | parser = all_parsers
|
899 | 894 | expected = DataFrame({0: [expected]}, dtype="datetime64[ns]")
|
900 |
| - result = parser.read_csv(StringIO(datestring), header=None, |
| 895 | + result = parser.read_csv(StringIO(date_string), header=None, |
901 | 896 | dayfirst=dayfirst, parse_dates=[0])
|
902 | 897 | tm.assert_frame_equal(result, expected)
|
903 | 898 |
|
904 | 899 |
|
905 |
| -gen_random_datetime = st.dates( |
906 |
| - min_value=date(1000, 1, 1), |
907 |
| - max_value=date(9999, 12, 31) |
908 |
| -) |
909 |
| -_DEFAULT_DATETIME = datetime(1, 1, 1) |
| 900 | +def _helper_hypothesis_delimited_date(call, date_string, **kwargs): |
| 901 | + msg, result = None, None |
| 902 | + try: |
| 903 | + result = call(date_string, **kwargs) |
| 904 | + except ValueError as er: |
| 905 | + msg = str(er) |
| 906 | + pass |
| 907 | + return msg, result |
910 | 908 |
|
911 | 909 |
|
912 | 910 | @given(gen_random_datetime)
|
913 | 911 | @pytest.mark.parametrize("delimiter", list(" -./"))
|
914 | 912 | @pytest.mark.parametrize("dayfirst", [True, False])
|
915 | 913 | @pytest.mark.parametrize("date_format", [
|
916 |
| - "%m %d %Y", |
917 | 914 | "%d %m %Y",
|
| 915 | + "%m %d %Y", |
918 | 916 | "%m %Y",
|
919 |
| - "%Y %m %d" |
| 917 | + "%Y %m %d", |
| 918 | + "%y %m %d", |
| 919 | + "%Y%m%d", |
| 920 | + "%y%m%d", |
920 | 921 | ])
|
921 | 922 | def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, date):
|
922 | 923 | if date_format == "%m %Y" and delimiter == ".":
|
923 | 924 | # parse_datetime_string cannot reliably tell whether e.g. %m.%Y
|
924 | 925 | # is a float or a date, thus we skip it
|
925 | 926 | pytest.skip()
|
| 927 | + result, expected = None, None |
| 928 | + except_in_dateutil, except_out_dateutil = None, None |
926 | 929 | date_string = date.strftime(date_format.replace(' ', delimiter))
|
927 |
| - result = parse_datetime_string(date_string, dayfirst=dayfirst) |
928 |
| - expected = du_parse(date_string, default=_DEFAULT_DATETIME, |
929 |
| - dayfirst=dayfirst, yearfirst=False) |
| 930 | + |
| 931 | + except_out_dateutil, result = _helper_hypothesis_delimited_date( |
| 932 | + parse_datetime_string, date_string, |
| 933 | + dayfirst=dayfirst) |
| 934 | + except_in_dateutil, expected = _helper_hypothesis_delimited_date( |
| 935 | + du_parse, date_string, |
| 936 | + default=_DEFAULT_DATETIME, |
| 937 | + dayfirst=dayfirst, yearfirst=False) |
| 938 | + |
| 939 | + assert except_out_dateutil == except_in_dateutil |
930 | 940 | assert result == expected
|
0 commit comments