Skip to content

Commit 3eda392

Browse files
fix and replace dead econdb tests
-test more important aspects of the return data format -pick more stable example sets and series to test
1 parent ab26ad2 commit 3eda392

File tree

1 file changed

+47
-71
lines changed

1 file changed

+47
-71
lines changed
Lines changed: 47 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import numpy as np
22
import pandas as pd
3-
from pandas import testing as tm
43
import pytest
54

65
from pandas_datareader import data as web
76

87
pytestmark = pytest.mark.stable
98

109

10+
def assert_equal(x, y):
11+
assert np.isclose(x, y, rtol=1e-2)
12+
13+
1114
class TestEcondb(object):
1215
def test_infer_start_end_from_symbols(self):
1316
df = web.DataReader(
@@ -23,88 +26,61 @@ def test_infer_start_end_from_symbols(self):
2326
assert df.index[0].year == 2010
2427
assert df.index[-1].year == 2018
2528

26-
@pytest.mark.xfail(reason="Dataset does not exist on Econdb")
27-
def test_get_cdh_e_fos(self):
28-
# EUROSTAT
29-
# Employed doctorate holders in non managerial and non professional
30-
# occupations by fields of science (%)
31-
df = web.DataReader(
32-
"dataset=CDH_E_FOS&GEO=NO,PL,PT,RU&FOS07=FOS1&Y_GRAD=TOTAL",
33-
"econdb",
34-
start=pd.Timestamp("2005-01-01"),
35-
end=pd.Timestamp("2010-01-01"),
36-
)
37-
assert isinstance(df, pd.DataFrame)
38-
assert df.shape == (2, 4)
39-
40-
# the levels and not returned consistently for econdb
41-
names = list(df.columns.names)
42-
levels = [lvl.values.tolist() for lvl in list(df.columns.levels)]
43-
44-
exp_col = pd.MultiIndex.from_product(levels, names=names)
45-
exp_idx = pd.DatetimeIndex(["2006-01-01", "2009-01-01"], name="TIME_PERIOD")
46-
47-
values = np.array([[25.49, np.nan, 39.05, np.nan], [20.38, 25.1, 27.77, 38.1]])
48-
expected = pd.DataFrame(values, index=exp_idx, columns=exp_col)
49-
tm.assert_frame_equal(df, expected)
50-
51-
def test_get_tourism(self):
52-
# OECD
53-
# TOURISM_INBOUND
29+
tickers = [f'{sec}{geo}' for sec in ['RGDP', 'CPI', 'URATE']
30+
for geo in ['US', 'UK', 'ES', 'AR']]
5431

32+
@pytest.mark.parametrize("ticker", tickers)
33+
def test_fetch_single_ticker_series(self, ticker):
5534
df = web.DataReader(
56-
"dataset=OE_TOURISM_INBOUND&COUNTRY=JPN,USA&VARIABLE=INB_ARRIVALS_TOTAL",
57-
"econdb",
58-
start=pd.Timestamp("2008-01-01"),
59-
end=pd.Timestamp("2012-01-01"),
60-
)
61-
df = df.astype(float)
62-
jp = np.array([8351000, 6790000, 8611000, 6219000, 8368000], dtype=float)
63-
us = np.array(
64-
[175702304, 160507424, 164079728, 167600272, 171320416], dtype=float
35+
f"ticker={ticker}", "econdb",
36+
start=pd.Timestamp("2010-01-01"),
37+
end=pd.Timestamp("2013-01-27"),
6538
)
66-
index = pd.date_range("2008-01-01", "2012-01-01", freq="AS", name="TIME_PERIOD")
67-
68-
# check the values coming back are equal
69-
np.testing.assert_array_equal(df.values[:, 0], jp)
70-
np.testing.assert_array_equal(df.values[:, 1], us)
39+
assert df.shape[1] == 1
40+
assert isinstance(df.index, pd.DatetimeIndex)
7141

72-
# sometimes the country and variable columns are swapped
73-
df = df.swaplevel(2, 1, axis=1)
74-
for label, values in [("Japan", jp), ("United States", us)]:
75-
expected = pd.Series(
76-
values, index=index, name="Total international arrivals"
77-
)
78-
expected.index.freq = None
79-
tm.assert_series_equal(
80-
df[label]["Tourism demand surveys"]["Total international arrivals"],
81-
expected,
82-
)
83-
84-
def test_bls(self):
85-
# BLS
86-
# CPI
42+
def test_single_nonticker_series(self):
8743
df = web.DataReader(
8844
"ticker=BLS_CU.CUSR0000SA0.M.US",
8945
"econdb",
9046
start=pd.Timestamp("2010-01-01"),
9147
end=pd.Timestamp("2013-01-27"),
9248
)
49+
assert df.shape[1] == 1
50+
assert isinstance(df.index, pd.DatetimeIndex)
51+
assert_equal(df.loc["2010-05-01"][0], 217.3)
9352

94-
assert df.loc["2010-05-01"][0] == 217.3
53+
def test_filtered_dataset(self):
54+
df = web.DataReader(
55+
'&'.join([
56+
'dataset=PRC_HICP_MIDX',
57+
'v=Geopolitical entity (reporting)',
58+
'h=TIME',
59+
'from=2022-03-01',
60+
'to=2022-09-01',
61+
'COICOP=[CP00]',
62+
'FREQ=[M]',
63+
'GEO=[ES,AT,CZ,IT,CH]',
64+
'UNIT=[I15]'
65+
]),
66+
'econdb'
67+
)
68+
assert df.shape[1] == 5
69+
assert isinstance(df.index, pd.DatetimeIndex)
9570

9671
def test_australia_gdp(self):
9772
df = web.DataReader(
98-
"dataset=ABS_GDP&to=2019-09-01&from=1959-09-01&h=TIME&v=Indicator", "econdb"
99-
)
100-
assert (
101-
df.loc[
102-
"2017-10-01",
103-
(
104-
"GDP per capita: Current prices - National Accounts",
105-
"Seasonally Adjusted",
106-
"AUD",
107-
),
108-
]
109-
== 18329
73+
'&'.join([
74+
'dataset=ABS_GDP',
75+
'4=[7]',
76+
'6=[11]',
77+
'16=[1267]',
78+
'v=TIME',
79+
'h=Indicator',
80+
'from=2019-10-01',
81+
'to=2022-06-01',
82+
'GEO=[13]'
83+
]),
84+
'econdb'
11085
)
86+
assert_equal(df.squeeze().loc["2020-10-01"], 508603)

0 commit comments

Comments
 (0)