Skip to content

Commit 4c10022

Browse files
authored
Merge pull request #1 from VapingDoge/econ-fix
Econdb fixes and improvements
2 parents fdade48 + 555e42f commit 4c10022

File tree

6 files changed

+173
-91
lines changed

6 files changed

+173
-91
lines changed

docs/source/remote_data.rst

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ performances through the top-level function ``get_sector_performance_av``.
290290
291291
292292
293-
.. _remote_data.enigma:
293+
.. _remote_data.econdb:
294294

295295
Econdb
296296
======
@@ -305,11 +305,26 @@ for United States, is as simple as taking the ticker segment from the URL path
305305

306306
.. code-block:: ipython
307307
308-
import os
309-
import pandas_datareader.data as web
308+
In [1]: import os
310309
311-
f = web.DataReader('ticker=RGDPUS', 'econdb')
312-
f.head()
310+
In [2]: import pandas_datareader as pdr
311+
312+
In [3]: f = pdr.get_data_econdb('ticker=RGDPUS')
313+
In [4]: f.head()
314+
Out[4]:
315+
TableName T10106
316+
SeriesCode A191RX
317+
Table Table 1.1.6. Real Gross Domestic Product, Ch...
318+
Series description Gross domestic product
319+
CL_UNIT Level
320+
CL_FREQ Q
321+
Note Table 1.1.6. Real Gross Domestic Product, Ch...
322+
TIME_PERIOD
323+
2018-01-01 18437128
324+
2018-04-01 18565696
325+
2018-07-01 18699748
326+
2018-10-01 18733740
327+
2019-01-01 18835412
313328
314329
The code snippet for exporting the whole dataset, or its filtered down subset,
315330
can be generated by using the Export -> Pandas Python3 functionality
@@ -318,18 +333,48 @@ such as the Eurostat's `GDP and main components <https://www.econdb.com/dataset/
318333

319334
.. code-block:: ipython
320335
321-
import os
322-
import pandas_datareader.data as web
336+
In [1]: import os
323337
324-
df = web.DataReader('dataset=NAMQ_10_GDP&v=Geopolitical entity (reporting)&h=TIME&from=2018-05-01&to=2021-01-01&GEO=[AL,AT,BE,BA,BG,HR,CY,CZ,DK,EE,EA19,FI,FR,DE,EL,HU,IS,IE,IT,XK,LV,LT,LU,MT,ME,NL,MK,NO,PL,PT,RO,RS,SK,SI,ES,SE,CH,TR,UK]&NA_ITEM=[B1GQ]&S_ADJ=[SCA]&UNIT=[CLV10_MNAC]', 'econdb')
325-
df.columns
338+
In [2]: import pandas_datareader as pdr
339+
340+
In [3]: df = pdr.get_data_econdb('dataset=NAMQ_10_GDP&v=Geopolitical entity (reporting)'
341+
'&h=TIME&from=2018-05-01&to=2021-01-01'
342+
'&GEO=[UK,ES,IT,DE,FR,CH,AT]&NA_ITEM=[B1GQ]'
343+
'&S_ADJ=[SCA]&UNIT=[CLV10_MNAC]')
344+
In [4]: df.head()
345+
Out[4]:
346+
Frequency Quarterly ...
347+
Unit of measure Chain linked volumes (2010), million units of national currency ...
348+
Seasonal adjustment Seasonally and calendar adjusted data ...
349+
National accounts indicator (ESA10) Gross domestic product at market prices ...
350+
Geopolitical entity (reporting) Austria ... Switzerland
351+
TIME_PERIOD ...
352+
2018-07-01 83427 ... 181338
353+
2018-10-01 84268 ... 181767
354+
2019-01-01 84919 ... 182039
355+
2019-04-01 84476 ... 182848
356+
2019-07-01 84822 ... 183866
357+
358+
In both cases, metadata for the requested Econdb series or dataset
359+
is in the ``MultiIndex`` columns of the returned ``DataFrame``,
360+
and can be conveniently converted to a ``dict`` as demonstrated below
361+
362+
.. code-block:: ipython
363+
364+
In [5]: meta = df.columns.to_frame().iloc[0].to_dict() # first column, positionally
365+
Out[5]: meta
366+
{'Frequency': 'Quarterly',
367+
'Unit of measure': 'Chain linked volumes (2010), million units of national currency',
368+
'Seasonal adjustment': 'Seasonally and calendar adjusted data',
369+
'National accounts indicator (ESA10)': 'Gross domestic product at market prices',
370+
'Geopolitical entity (reporting)': 'Austria'}
326371
327372
Datasets can be located through Econdb's `search <https://www.econdb.com/search>`__
328373
engine, or discovered by exploring the `tree <https://www.econdb.com/tree/>`__
329374
of available statistical sources.
330375

331376

332-
.. _remote_data.econdb:
377+
.. _remote_data.enigma:
333378

334379
Enigma
335380
======

pandas_datareader/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
get_components_yahoo,
99
get_dailysummary_iex,
1010
get_data_alphavantage,
11+
get_data_econdb,
1112
get_data_enigma,
1213
get_data_famafrench,
1314
get_data_fred,
@@ -38,6 +39,7 @@
3839
__all__ = [
3940
"__version__",
4041
"get_components_yahoo",
42+
"get_data_econdb",
4143
"get_data_enigma",
4244
"get_data_famafrench",
4345
"get_data_yahoo",

pandas_datareader/compat/__init__.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
from distutils.version import LooseVersion
2-
from functools import reduce
1+
from packaging import version
32
from io import StringIO
43
from urllib.error import HTTPError
54

@@ -8,11 +7,11 @@
87
from pandas.io import common as com
98
from pandas.testing import assert_frame_equal
109

11-
PANDAS_VERSION = LooseVersion(pd.__version__)
10+
PANDAS_VERSION = version.parse(pd.__version__)
1211

13-
PANDAS_0210 = PANDAS_VERSION >= LooseVersion("0.21.0")
14-
PANDAS_0220 = PANDAS_VERSION >= LooseVersion("0.22.0")
15-
PANDAS_0230 = PANDAS_VERSION >= LooseVersion("0.23.0")
12+
PANDAS_0210 = PANDAS_VERSION >= version.parse("0.21.0")
13+
PANDAS_0220 = PANDAS_VERSION >= version.parse("0.22.0")
14+
PANDAS_0230 = PANDAS_VERSION >= version.parse("0.23.0")
1615

1716
__all__ = [
1817
"HTTPError",

pandas_datareader/data.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141

4242
__all__ = [
4343
"get_components_yahoo",
44+
"get_data_econdb",
4445
"get_data_enigma",
4546
"get_data_famafrench",
4647
"get_data_fred",
@@ -80,6 +81,10 @@ def get_data_yahoo(*args, **kwargs):
8081
return YahooDailyReader(*args, **kwargs).read()
8182

8283

84+
def get_data_econdb(*args, **kwargs):
85+
return EcondbReader(*args, **kwargs).read()
86+
87+
8388
def get_data_enigma(*args, **kwargs):
8489
return EnigmaReader(*args, **kwargs).read()
8590

pandas_datareader/econdb.py

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,65 @@
44

55

66
class EcondbReader(_BaseReader):
7-
"""Get data for the given name from Econdb."""
7+
"""
8+
Returns DataFrame of historical stock prices from symbol, over date
9+
range, start to end.
10+
11+
.. versionadded:: 0.5.0
12+
13+
Parameters
14+
----------
15+
symbols : string
16+
Can be in two different formats:
17+
1. 'ticker=<code>' for fetching a single series,
18+
where <code> is CPIUS for, e.g. the series at
19+
https://www.econdb.com/series/CPIUS/
20+
2. 'dataset=<dataset>&<params>' for fetching full
21+
or filtered subset of a dataset, like the one at
22+
https://www.econdb.com/dataset/ABS_GDP. After choosing the desired filters,
23+
the correctly formatted query string can be easily generated
24+
from that dataset's page by using the Export function, and choosing Pandas Python3.
25+
start : string, int, date, datetime, Timestamp
26+
Starting date. Parses many different kind of date
27+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
28+
end : string, int, date, datetime, Timestamp
29+
Ending date
30+
retry_count : int, default 3
31+
Number of times to retry query request.
32+
pause : int, default 0.1
33+
Time, in seconds, to pause between consecutive queries of chunks. If
34+
single value given for symbol, represents the pause between retries.
35+
session : Session, default None
36+
requests.sessions.Session instance to be used
37+
"""
838

939
_URL = "https://www.econdb.com/api/series/"
1040
_format = None
1141
_show = "labels"
1242

13-
def __init__(self, *args, **kwargs):
14-
super().__init__(**kwargs)
43+
def __init__(
44+
self,
45+
symbols,
46+
start=None,
47+
end=None,
48+
retry_count=3,
49+
pause=0.1,
50+
session=None,
51+
freq=None,
52+
):
53+
super(EcondbReader, self).__init__(
54+
symbols=symbols,
55+
start=start,
56+
end=end,
57+
retry_count=retry_count,
58+
pause=pause,
59+
session=session,
60+
freq=freq,
61+
)
1562
params = dict(s.split("=") for s in self.symbols.split("&"))
16-
if "from" in params and not kwargs.get("start"):
63+
if "from" in params and not start:
1764
self.start = pd.to_datetime(params["from"], format="%Y-%m-%d")
18-
if "to" in params and not kwargs.get("end"):
65+
if "to" in params and not end:
1966
self.end = pd.to_datetime(params["to"], format="%Y-%m-%d")
2067

2168
@property
Lines changed: 54 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import numpy as np
22
import pandas as pd
3-
from pandas import testing as tm
43
import pytest
54

65
from pandas_datareader import data as web
76

87
pytestmark = pytest.mark.stable
98

109

10+
def assert_equal(x, y):
11+
assert np.isclose(x, y, rtol=1e-2)
12+
13+
1114
class TestEcondb(object):
1215
def test_infer_start_end_from_symbols(self):
1316
df = web.DataReader(
@@ -23,88 +26,69 @@ def test_infer_start_end_from_symbols(self):
2326
assert df.index[0].year == 2010
2427
assert df.index[-1].year == 2018
2528

26-
@pytest.mark.xfail(reason="Dataset does not exist on Econdb")
27-
def test_get_cdh_e_fos(self):
28-
# EUROSTAT
29-
# Employed doctorate holders in non managerial and non professional
30-
# occupations by fields of science (%)
31-
df = web.DataReader(
32-
"dataset=CDH_E_FOS&GEO=NO,PL,PT,RU&FOS07=FOS1&Y_GRAD=TOTAL",
33-
"econdb",
34-
start=pd.Timestamp("2005-01-01"),
35-
end=pd.Timestamp("2010-01-01"),
36-
)
37-
assert isinstance(df, pd.DataFrame)
38-
assert df.shape == (2, 4)
39-
40-
# the levels and not returned consistently for econdb
41-
names = list(df.columns.names)
42-
levels = [lvl.values.tolist() for lvl in list(df.columns.levels)]
43-
44-
exp_col = pd.MultiIndex.from_product(levels, names=names)
45-
exp_idx = pd.DatetimeIndex(["2006-01-01", "2009-01-01"], name="TIME_PERIOD")
46-
47-
values = np.array([[25.49, np.nan, 39.05, np.nan], [20.38, 25.1, 27.77, 38.1]])
48-
expected = pd.DataFrame(values, index=exp_idx, columns=exp_col)
49-
tm.assert_frame_equal(df, expected)
50-
51-
def test_get_tourism(self):
52-
# OECD
53-
# TOURISM_INBOUND
29+
tickers = [
30+
f"{sec}{geo}"
31+
for sec in ["RGDP", "CPI", "URATE"]
32+
for geo in ["US", "UK", "ES", "AR"]
33+
]
5434

35+
@pytest.mark.parametrize("ticker", tickers)
36+
def test_fetch_single_ticker_series(self, ticker):
5537
df = web.DataReader(
56-
"dataset=OE_TOURISM_INBOUND&COUNTRY=JPN,USA&VARIABLE=INB_ARRIVALS_TOTAL",
38+
f"ticker={ticker}",
5739
"econdb",
58-
start=pd.Timestamp("2008-01-01"),
59-
end=pd.Timestamp("2012-01-01"),
60-
)
61-
df = df.astype(float)
62-
jp = np.array([8351000, 6790000, 8611000, 6219000, 8368000], dtype=float)
63-
us = np.array(
64-
[175702304, 160507424, 164079728, 167600272, 171320416], dtype=float
40+
start=pd.Timestamp("2010-01-01"),
41+
end=pd.Timestamp("2013-01-27"),
6542
)
66-
index = pd.date_range("2008-01-01", "2012-01-01", freq="AS", name="TIME_PERIOD")
67-
68-
# check the values coming back are equal
69-
np.testing.assert_array_equal(df.values[:, 0], jp)
70-
np.testing.assert_array_equal(df.values[:, 1], us)
71-
72-
# sometimes the country and variable columns are swapped
73-
df = df.swaplevel(2, 1, axis=1)
74-
for label, values in [("Japan", jp), ("United States", us)]:
75-
expected = pd.Series(
76-
values, index=index, name="Total international arrivals"
77-
)
78-
expected.index.freq = None
79-
tm.assert_series_equal(
80-
df[label]["Tourism demand surveys"]["Total international arrivals"],
81-
expected,
82-
)
43+
assert df.shape[1] == 1
44+
assert isinstance(df.index, pd.DatetimeIndex)
8345

84-
def test_bls(self):
85-
# BLS
86-
# CPI
46+
def test_single_nonticker_series(self):
8747
df = web.DataReader(
8848
"ticker=BLS_CU.CUSR0000SA0.M.US",
8949
"econdb",
9050
start=pd.Timestamp("2010-01-01"),
9151
end=pd.Timestamp("2013-01-27"),
9252
)
53+
assert df.shape[1] == 1
54+
assert isinstance(df.index, pd.DatetimeIndex)
55+
assert_equal(df.loc["2010-05-01"][0], 217.3)
9356

94-
assert df.loc["2010-05-01"][0] == 217.3
57+
def test_filtered_dataset(self):
58+
df = web.DataReader(
59+
"&".join(
60+
[
61+
"dataset=PRC_HICP_MIDX",
62+
"v=Geopolitical entity (reporting)",
63+
"h=TIME",
64+
"from=2022-03-01",
65+
"to=2022-09-01",
66+
"COICOP=[CP00]",
67+
"FREQ=[M]",
68+
"GEO=[ES,AT,CZ,IT,CH]",
69+
"UNIT=[I15]",
70+
]
71+
),
72+
"econdb",
73+
)
74+
assert df.shape[1] == 5
75+
assert isinstance(df.index, pd.DatetimeIndex)
9576

9677
def test_australia_gdp(self):
9778
df = web.DataReader(
98-
"dataset=ABS_GDP&to=2019-09-01&from=1959-09-01&h=TIME&v=Indicator", "econdb"
99-
)
100-
assert (
101-
df.loc[
102-
"2017-10-01",
103-
(
104-
"GDP per capita: Current prices - National Accounts",
105-
"Seasonally Adjusted",
106-
"AUD",
107-
),
108-
]
109-
== 18329
79+
"&".join(
80+
[
81+
"dataset=ABS_GDP",
82+
"4=[7]",
83+
"6=[11]",
84+
"16=[1267]",
85+
"v=TIME",
86+
"h=Indicator",
87+
"from=2019-10-01",
88+
"to=2022-06-01",
89+
"GEO=[13]",
90+
]
91+
),
92+
"econdb",
11093
)
94+
assert_equal(df.squeeze().loc["2020-10-01"], 508603)

0 commit comments

Comments
 (0)