Skip to content

Fixing econdb tests #650

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Aug 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@ docs/build
.idea
*.iml
*~
env/
.pytest_cache/
.vscode/
20 changes: 11 additions & 9 deletions pandas_datareader/econdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,20 @@ def read(self):
results = requests.get(self.url).json()['results']
df = pd.DataFrame({'dates': []}).set_index('dates')

if self._show == 'labels':
def show_func(x): return x.split(':')[1]
elif self._show == 'codes':
def show_func(x): return x.split(':')[0]

for entry in results:
head = entry['additional_metadata']
series = (pd.DataFrame(entry['data'])[['dates', 'values']]
.set_index('dates'))
if self._show == 'labels':
def show_func(x): return x.split(':')[1]
elif self._show == 'codes':
def show_func(x): return x.split(':')[0]

series.columns = pd.MultiIndex.from_tuples(
[[show_func(x) for x in head.values()]],
names=[show_func(x) for x in head.keys()])

head = entry['additional_metadata']
if head != "": # this additional metadata is not blank
series.columns = pd.MultiIndex.from_tuples(
[[show_func(x) for x in head.values()]],
names=[show_func(x) for x in head.keys()])

if not df.empty:
df = df.join(series, how='outer')
Expand Down
2 changes: 1 addition & 1 deletion pandas_datareader/nasdaq_trader.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ def get_nasdaq_symbols(retry_count=3, timeout=30, pause=None):
retry_count = -1
except RemoteDataError:
# retry on any exception
retry_count -= 1
if retry_count <= 0:
raise
else:
retry_count -= 1
time.sleep(pause)

return _ticker_cache
16 changes: 9 additions & 7 deletions pandas_datareader/tests/test_econdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,11 @@ def test_get_cdh_e_fos(self):
assert isinstance(df, pd.DataFrame)
assert df.shape == (2, 4)

df = df['Natural sciences']['Annual'][
['Norway', 'Poland', 'Portugal', 'Russia']]
# the levels and not returned consistently for econdb
names = list(df.columns.names)
levels = [lvl.values.tolist() for lvl in list(df.columns.levels)]

exp_col = pd.MultiIndex.from_product(
[['Norway', 'Poland', 'Portugal', 'Russia'],
['Percentage'], ['Total']],
names=['Geopolitical entity (reporting)', 'Unit of measure',
'Year of graduation'])
exp_col = pd.MultiIndex.from_product(levels, names=names)
exp_idx = pd.DatetimeIndex(['2006-01-01', '2009-01-01'],
name='TIME_PERIOD')

Expand All @@ -49,6 +46,11 @@ def test_get_tourism(self):
171320408], dtype=float)
index = pd.date_range('2008-01-01', '2012-01-01', freq='AS',
name='TIME_PERIOD')

# sometimes the country and variable columns are swapped
lvl1 = df.columns.levels[0][0]
if lvl1 == "Total international arrivals":
df = df.swaplevel(0, 1, axis=1)
for label, values in [('Japan', jp), ('United States', us)]:
expected = pd.Series(values, index=index,
name='Total international arrivals')
Expand Down
32 changes: 15 additions & 17 deletions pandas_datareader/tests/test_eurostat.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,25 @@

class TestEurostat(object):

def test_get_cdh_e_fos(self):
# Employed doctorate holders in non managerial and non professional
# occupations by fields of science (%)
df = web.DataReader('cdh_e_fos', 'eurostat',
start=pd.Timestamp('2005-01-01'),
def test_get_ert_h_eur_a(self):
# Former euro area national currencies vs. euro/ECU
# annual data (ert_h_eur_a)
df = web.DataReader('ert_h_eur_a', 'eurostat',
start=pd.Timestamp('2009-01-01'),
end=pd.Timestamp('2010-01-01'))

assert isinstance(df, pd.DataFrame)
assert df.shape == (2, 336)

df = df['Percentage']['Total']['Natural sciences']
df = df[['Norway', 'Poland', 'Portugal', 'Russia']]
header = df.columns.levels[0][0]
currencies = ["Italian lira", "Lithuanian litas"]
df = df[header]
df = df["Average"][currencies]

exp_col = pd.MultiIndex.from_product([['Norway', 'Poland', 'Portugal',
'Russia'], ['Annual']],
names=['GEO', 'FREQ'])
exp_idx = pd.DatetimeIndex(['2006-01-01', '2009-01-01'],
exp_col = pd.MultiIndex.from_product([currencies, ['Annual']],
names=['CURRENCY', 'FREQ'])
exp_idx = pd.DatetimeIndex(['2009-01-01', '2010-01-01'],
name='TIME_PERIOD')

values = np.array([[25.49, np.nan, 39.05, np.nan],
[20.38, 25.1, 27.77, 38.1]])
values = np.array([[1936.27, 3.4528],
[1936.27, 3.4528]])
expected = pd.DataFrame(values, index=exp_idx, columns=exp_col)
tm.assert_frame_equal(df, expected)

Expand Down Expand Up @@ -72,7 +70,7 @@ def test_get_nrg_pc_202(self):
end=pd.Timestamp('2013-01-01'))

name = ('All taxes and levies included',
'Gigajoules (gross calorific value = GCV)',
'Gigajoules (gross calorific value - GCV)',
'Euro',
'Band D1 : Consumption < 20 GJ',
'Natural gas', 'Denmark', 'Semi-annual')
Expand Down
2 changes: 1 addition & 1 deletion pandas_datareader/tests/test_fred.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def test_fred_multi(self): # pragma: no cover

received = web.DataReader(names, "fred", start, end).head(1)

expected = DataFrame([[217.488, 99.68746, 220.633]], columns=names,
expected = DataFrame([[217.488, 91.712409, 220.633]], columns=names,
index=[pd.Timestamp('2010-01-01 00:00:00')])
expected.index.rename('DATE', inplace=True)
tm.assert_frame_equal(received, expected, check_less_precise=True)
Expand Down
3 changes: 3 additions & 0 deletions pandas_datareader/tests/test_nasdaq.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import pandas_datareader.data as web
from pandas_datareader._utils import RemoteDataError
from pandas_datareader._testing import skip_on_exception


class TestNasdaqSymbols(object):

@skip_on_exception(RemoteDataError)
def test_get_symbols(self):
symbols = web.DataReader('symbols', 'nasdaq')
assert 'IBM' in symbols.index
2 changes: 2 additions & 0 deletions pandas_datareader/tests/test_tiingo.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ def test_tiingo_no_api_key(symbols):
TiingoMetaDataReader(symbols=symbols)


@pytest.mark.skipif(pd.__version__ == "0.19.2",
reason='pandas 0.19.2 does not like this file format')
def test_tiingo_stock_symbols():
sym = get_tiingo_symbols()
assert isinstance(sym, pd.DataFrame)
2 changes: 1 addition & 1 deletion pandas_datareader/tests/test_wb.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_wdi_download(self):
('Mexico', '2004'): 7000.0,
('Mexico', '2003'): 7000.0,
('United States', '2004'): 42000.0,
('United States', '2003'): 40000.0}}
('United States', '2003'): 39000.0}}
expected = pd.DataFrame(expected)
expected = expected.sort_index()

Expand Down
18 changes: 10 additions & 8 deletions pandas_datareader/tests/yahoo/test_yahoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def test_get_data_adjust_price(self):
assert 'Adj Close' not in goog_adj.columns
assert (goog['Open'] * goog_adj['Adj_Ratio']).equals(goog_adj['Open'])

@skip_on_exception(RemoteDataError)
@pytest.mark.xfail(reason="Yahoo are returning an extra day 31st Dec 2012")
def test_get_data_interval(self):
# daily interval data
pan = web.get_data_yahoo('XOM', '2013-01-01',
Expand Down Expand Up @@ -231,7 +231,7 @@ def test_yahoo_reader_class(self):
r = YahooDailyReader('GOOG')
df = r.read()

assert df.Volume.loc['JAN-02-2015'] == 1447600
assert df.Volume.loc['JAN-02-2015'] == 1447500

session = requests.Session()

Expand All @@ -241,6 +241,7 @@ def test_yahoo_reader_class(self):
def test_yahoo_DataReader(self):
start = datetime(2010, 1, 1)
end = datetime(2015, 5, 9)
# yahoo will adjust for dividends by default
result = web.DataReader('AAPL', 'yahoo-actions', start, end)

exp_idx = pd.DatetimeIndex(['2015-05-07', '2015-02-05',
Expand All @@ -257,26 +258,27 @@ def test_yahoo_DataReader(self):
'DIVIDEND', 'DIVIDEND', 'DIVIDEND',
'DIVIDEND', 'DIVIDEND'],
'value': [0.52, 0.47, 0.47, 0.47, 0.14285714,
3.29, 3.05, 3.05, 3.05,
3.05, 2.65, 2.65, 2.65]},
0.47, 0.43571, 0.43571, 0.43571,
0.43571, 0.37857, 0.37857, 0.37857]},
index=exp_idx)
exp.index.name = 'Date'
tm.assert_frame_equal(result.reindex_like(exp).round(2), exp.round(2))

# where dividends are not adjusted for splits
result = web.get_data_yahoo_actions('AAPL', start, end,
adjust_dividends=True)
adjust_dividends=False)

exp = pd.DataFrame({'action': ['DIVIDEND', 'DIVIDEND', 'DIVIDEND',
'DIVIDEND', 'SPLIT', 'DIVIDEND',
'DIVIDEND', 'DIVIDEND',
'DIVIDEND', 'DIVIDEND', 'DIVIDEND',
'DIVIDEND', 'DIVIDEND'],
'value': [0.52, 0.47, 0.47, 0.47, 0.14285714,
0.47, 0.43571, 0.43571, 0.43571,
0.43571, 0.37857, 0.37857, 0.37857]},
3.29, 3.05, 3.05, 3.05,
3.05, 2.65, 2.65, 2.65]},
index=exp_idx)
exp.index.name = 'Date'
tm.assert_frame_equal(result.reindex_like(exp).round(5), exp.round(5))
tm.assert_frame_equal(result.reindex_like(exp).round(4), exp.round(4))

# test cases with "1/0" split ratio in actions -
# no split, just chnage symbol from POT to NTR
Expand Down
10 changes: 5 additions & 5 deletions pandas_datareader/yahoo/daily.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,14 @@ class YahooDailyReader(_DailyBaseReader):
'm' for monthly.
get_actions : bool, default False
If True, adds Dividend and Split columns to dataframe.
adjust_dividends: bool, default false
adjust_dividends: bool, default true
If True, adjusts dividends for splits.
"""

def __init__(self, symbols=None, start=None, end=None, retry_count=3,
pause=0.1, session=None, adjust_price=False,
ret_index=False, chunksize=1, interval='d',
get_actions=False, adjust_dividends=False):
get_actions=False, adjust_dividends=True):
super(YahooDailyReader, self).__init__(symbols=symbols,
start=start, end=end,
retry_count=retry_count,
Expand Down Expand Up @@ -187,11 +187,11 @@ def split_ratio(row):
splits['Splits'] = splits['SplitRatio']
prices = prices.join(splits['Splits'], how='outer')

if 'DIVIDEND' in types and self.adjust_dividends:
# Adjust dividends to deal with splits
if 'DIVIDEND' in types and not self.adjust_dividends:
# dividends are adjusted automatically by Yahoo
adj = prices['Splits'].sort_index(ascending=False).fillna(
1).cumprod()
prices['Dividends'] = prices['Dividends'] * adj
prices['Dividends'] = prices['Dividends'] / adj

return prices

Expand Down