1
1
import numpy as np
2
2
import pandas as pd
3
- from pandas import testing as tm
4
3
import pytest
5
4
6
5
from pandas_datareader import data as web
7
6
8
7
pytestmark = pytest .mark .stable
9
8
10
9
10
+ def assert_equal (x , y ):
11
+ assert np .isclose (x , y , rtol = 1e-2 )
12
+
13
+
11
14
class TestEcondb (object ):
12
15
def test_infer_start_end_from_symbols (self ):
13
16
df = web .DataReader (
@@ -23,88 +26,61 @@ def test_infer_start_end_from_symbols(self):
23
26
assert df .index [0 ].year == 2010
24
27
assert df .index [- 1 ].year == 2018
25
28
26
- @pytest .mark .xfail (reason = "Dataset does not exist on Econdb" )
27
- def test_get_cdh_e_fos (self ):
28
- # EUROSTAT
29
- # Employed doctorate holders in non managerial and non professional
30
- # occupations by fields of science (%)
31
- df = web .DataReader (
32
- "dataset=CDH_E_FOS&GEO=NO,PL,PT,RU&FOS07=FOS1&Y_GRAD=TOTAL" ,
33
- "econdb" ,
34
- start = pd .Timestamp ("2005-01-01" ),
35
- end = pd .Timestamp ("2010-01-01" ),
36
- )
37
- assert isinstance (df , pd .DataFrame )
38
- assert df .shape == (2 , 4 )
39
-
40
- # the levels and not returned consistently for econdb
41
- names = list (df .columns .names )
42
- levels = [lvl .values .tolist () for lvl in list (df .columns .levels )]
43
-
44
- exp_col = pd .MultiIndex .from_product (levels , names = names )
45
- exp_idx = pd .DatetimeIndex (["2006-01-01" , "2009-01-01" ], name = "TIME_PERIOD" )
46
-
47
- values = np .array ([[25.49 , np .nan , 39.05 , np .nan ], [20.38 , 25.1 , 27.77 , 38.1 ]])
48
- expected = pd .DataFrame (values , index = exp_idx , columns = exp_col )
49
- tm .assert_frame_equal (df , expected )
50
-
51
- def test_get_tourism (self ):
52
- # OECD
53
- # TOURISM_INBOUND
29
+ tickers = [f'{ sec } { geo } ' for sec in ['RGDP' , 'CPI' , 'URATE' ]
30
+ for geo in ['US' , 'UK' , 'ES' , 'AR' ]]
54
31
32
+ @pytest .mark .parametrize ("ticker" , tickers )
33
+ def test_fetch_single_ticker_series (self , ticker ):
55
34
df = web .DataReader (
56
- "dataset=OE_TOURISM_INBOUND&COUNTRY=JPN,USA&VARIABLE=INB_ARRIVALS_TOTAL" ,
57
- "econdb" ,
58
- start = pd .Timestamp ("2008-01-01" ),
59
- end = pd .Timestamp ("2012-01-01" ),
60
- )
61
- df = df .astype (float )
62
- jp = np .array ([8351000 , 6790000 , 8611000 , 6219000 , 8368000 ], dtype = float )
63
- us = np .array (
64
- [175702304 , 160507424 , 164079728 , 167600272 , 171320416 ], dtype = float
35
+ f"ticker={ ticker } " , "econdb" ,
36
+ start = pd .Timestamp ("2010-01-01" ),
37
+ end = pd .Timestamp ("2013-01-27" ),
65
38
)
66
- index = pd .date_range ("2008-01-01" , "2012-01-01" , freq = "AS" , name = "TIME_PERIOD" )
67
-
68
- # check the values coming back are equal
69
- np .testing .assert_array_equal (df .values [:, 0 ], jp )
70
- np .testing .assert_array_equal (df .values [:, 1 ], us )
39
+ assert df .shape [1 ] == 1
40
+ assert isinstance (df .index , pd .DatetimeIndex )
71
41
72
- # sometimes the country and variable columns are swapped
73
- df = df .swaplevel (2 , 1 , axis = 1 )
74
- for label , values in [("Japan" , jp ), ("United States" , us )]:
75
- expected = pd .Series (
76
- values , index = index , name = "Total international arrivals"
77
- )
78
- expected .index .freq = None
79
- tm .assert_series_equal (
80
- df [label ]["Tourism demand surveys" ]["Total international arrivals" ],
81
- expected ,
82
- )
83
-
84
- def test_bls (self ):
85
- # BLS
86
- # CPI
42
+ def test_single_nonticker_series (self ):
87
43
df = web .DataReader (
88
44
"ticker=BLS_CU.CUSR0000SA0.M.US" ,
89
45
"econdb" ,
90
46
start = pd .Timestamp ("2010-01-01" ),
91
47
end = pd .Timestamp ("2013-01-27" ),
92
48
)
49
+ assert df .shape [1 ] == 1
50
+ assert isinstance (df .index , pd .DatetimeIndex )
51
+ assert_equal (df .loc ["2010-05-01" ][0 ], 217.3 )
93
52
94
- assert df .loc ["2010-05-01" ][0 ] == 217.3
53
+ def test_filtered_dataset (self ):
54
+ df = web .DataReader (
55
+ '&' .join ([
56
+ 'dataset=PRC_HICP_MIDX' ,
57
+ 'v=Geopolitical entity (reporting)' ,
58
+ 'h=TIME' ,
59
+ 'from=2022-03-01' ,
60
+ 'to=2022-09-01' ,
61
+ 'COICOP=[CP00]' ,
62
+ 'FREQ=[M]' ,
63
+ 'GEO=[ES,AT,CZ,IT,CH]' ,
64
+ 'UNIT=[I15]'
65
+ ]),
66
+ 'econdb'
67
+ )
68
+ assert df .shape [1 ] == 5
69
+ assert isinstance (df .index , pd .DatetimeIndex )
95
70
96
71
def test_australia_gdp (self ):
97
72
df = web .DataReader (
98
- "dataset=ABS_GDP&to=2019-09-01&from=1959-09-01&h=TIME&v=Indicator" , "econdb"
99
- )
100
- assert (
101
- df . loc [
102
- "2017-10-01" ,
103
- (
104
- "GDP per capita: Current prices - National Accounts" ,
105
- "Seasonally Adjusted" ,
106
- "AUD" ,
107
- ),
108
- ]
109
- == 18329
73
+ '&' . join ([
74
+ 'dataset=ABS_GDP' ,
75
+ '4=[7]' ,
76
+ '6=[11]' ,
77
+ '16=[1267]' ,
78
+ 'v=TIME' ,
79
+ 'h=Indicator' ,
80
+ 'from=2019-10-01' ,
81
+ 'to=2022-06-01' ,
82
+ 'GEO=[13]'
83
+ ]),
84
+ 'econdb'
110
85
)
86
+ assert_equal (df .squeeze ().loc ["2020-10-01" ], 508603 )
0 commit comments