fix and replace dead econdb tests

StochasticDoge · StochasticDoge · commit 3eda392d98de · 2022-10-03T17:20:55.000+02:00
-test more important aspects of the return data format
-pick more stable example sets and series to test
diff --git a/pandas_datareader/tests/test_econdb.py b/pandas_datareader/tests/test_econdb.py
@@ -1,13 +1,16 @@
 import numpy as np
 import pandas as pd
-from pandas import testing as tm
 import pytest
 
 from pandas_datareader import data as web
 
 pytestmark = pytest.mark.stable
 
 
+def assert_equal(x, y):
+    assert np.isclose(x, y, rtol=1e-2)
+
+
 class TestEcondb(object):
     def test_infer_start_end_from_symbols(self):
         df = web.DataReader(
@@ -23,88 +26,61 @@ def test_infer_start_end_from_symbols(self):
         assert df.index[0].year == 2010
         assert df.index[-1].year == 2018
 
-    @pytest.mark.xfail(reason="Dataset does not exist on Econdb")
-    def test_get_cdh_e_fos(self):
-        # EUROSTAT
-        # Employed doctorate holders in non managerial and non professional
-        # occupations by fields of science (%)
-        df = web.DataReader(
-            "dataset=CDH_E_FOS&GEO=NO,PL,PT,RU&FOS07=FOS1&Y_GRAD=TOTAL",
-            "econdb",
-            start=pd.Timestamp("2005-01-01"),
-            end=pd.Timestamp("2010-01-01"),
-        )
-        assert isinstance(df, pd.DataFrame)
-        assert df.shape == (2, 4)
-
-        # the levels and not returned consistently for econdb
-        names = list(df.columns.names)
-        levels = [lvl.values.tolist() for lvl in list(df.columns.levels)]
-
-        exp_col = pd.MultiIndex.from_product(levels, names=names)
-        exp_idx = pd.DatetimeIndex(["2006-01-01", "2009-01-01"], name="TIME_PERIOD")
-
-        values = np.array([[25.49, np.nan, 39.05, np.nan], [20.38, 25.1, 27.77, 38.1]])
-        expected = pd.DataFrame(values, index=exp_idx, columns=exp_col)
-        tm.assert_frame_equal(df, expected)
-
-    def test_get_tourism(self):
-        # OECD
-        # TOURISM_INBOUND
+    tickers = [f'{sec}{geo}' for sec in ['RGDP', 'CPI', 'URATE']
+               for geo in ['US', 'UK', 'ES', 'AR']]
 
+    @pytest.mark.parametrize("ticker", tickers)
+    def test_fetch_single_ticker_series(self, ticker):
         df = web.DataReader(
-            "dataset=OE_TOURISM_INBOUND&COUNTRY=JPN,USA&VARIABLE=INB_ARRIVALS_TOTAL",
-            "econdb",
-            start=pd.Timestamp("2008-01-01"),
-            end=pd.Timestamp("2012-01-01"),
-        )
-        df = df.astype(float)
-        jp = np.array([8351000, 6790000, 8611000, 6219000, 8368000], dtype=float)
-        us = np.array(
-            [175702304, 160507424, 164079728, 167600272, 171320416], dtype=float
+            f"ticker={ticker}", "econdb",
+            start=pd.Timestamp("2010-01-01"),
+            end=pd.Timestamp("2013-01-27"),
         )
-        index = pd.date_range("2008-01-01", "2012-01-01", freq="AS", name="TIME_PERIOD")
-
-        # check the values coming back are equal
-        np.testing.assert_array_equal(df.values[:, 0], jp)
-        np.testing.assert_array_equal(df.values[:, 1], us)
+        assert df.shape[1] == 1
+        assert isinstance(df.index, pd.DatetimeIndex)
 
-        # sometimes the country and variable columns are swapped
-        df = df.swaplevel(2, 1, axis=1)
-        for label, values in [("Japan", jp), ("United States", us)]:
-            expected = pd.Series(
-                values, index=index, name="Total international arrivals"
-            )
-            expected.index.freq = None
-            tm.assert_series_equal(
-                df[label]["Tourism demand surveys"]["Total international arrivals"],
-                expected,
-            )
-
-    def test_bls(self):
-        # BLS
-        # CPI
+    def test_single_nonticker_series(self):
         df = web.DataReader(
             "ticker=BLS_CU.CUSR0000SA0.M.US",
             "econdb",
             start=pd.Timestamp("2010-01-01"),
             end=pd.Timestamp("2013-01-27"),
         )
+        assert df.shape[1] == 1
+        assert isinstance(df.index, pd.DatetimeIndex)
+        assert_equal(df.loc["2010-05-01"][0], 217.3)
 
-        assert df.loc["2010-05-01"][0] == 217.3
+    def test_filtered_dataset(self):
+        df = web.DataReader(
+            '&'.join([
+                'dataset=PRC_HICP_MIDX',
+                'v=Geopolitical entity (reporting)',
+                'h=TIME',
+                'from=2022-03-01',
+                'to=2022-09-01',
+                'COICOP=[CP00]',
+                'FREQ=[M]',
+                'GEO=[ES,AT,CZ,IT,CH]',
+                'UNIT=[I15]'
+            ]),
+            'econdb'
+        )
+        assert df.shape[1] == 5
+        assert isinstance(df.index, pd.DatetimeIndex)
 
     def test_australia_gdp(self):
         df = web.DataReader(
-            "dataset=ABS_GDP&to=2019-09-01&from=1959-09-01&h=TIME&v=Indicator", "econdb"
-        )
-        assert (
-            df.loc[
-                "2017-10-01",
-                (
-                    "GDP per capita: Current prices - National Accounts",
-                    "Seasonally Adjusted",
-                    "AUD",
-                ),
-            ]
-            == 18329
+            '&'.join([
+                'dataset=ABS_GDP',
+                '4=[7]',
+                '6=[11]',
+                '16=[1267]',
+                'v=TIME',
+                'h=Indicator',
+                'from=2019-10-01',
+                'to=2022-06-01',
+                'GEO=[13]'
+            ]),
+            'econdb'
         )
+        assert_equal(df.squeeze().loc["2020-10-01"], 508603)