|
| 1 | +from datetime import datetime |
| 2 | +import requests |
| 3 | + |
| 4 | +import numpy as np |
| 5 | +import pandas as pd |
| 6 | +from pandas import DataFrame |
| 7 | +from requests.exceptions import ConnectionError |
| 8 | +import pytest |
| 9 | +import pandas.util.testing as tm |
| 10 | + |
| 11 | +import pandas_datareader.data as web |
| 12 | +from pandas_datareader.data import YahooDailyReader |
| 13 | +from pandas_datareader._utils import RemoteDataError |
| 14 | +from pandas_datareader._testing import skip_on_exception |
| 15 | + |
| 16 | +XFAIL_REASON = 'Known connection failures on Yahoo when testing!' |
| 17 | + |
| 18 | + |
| 19 | +class TestYahoo(object): |
| 20 | + |
| 21 | + @classmethod |
| 22 | + def setup_class(cls): |
| 23 | + pytest.importorskip("lxml") |
| 24 | + |
| 25 | + @skip_on_exception(RemoteDataError) |
| 26 | + def test_yahoo(self): |
| 27 | + # Asserts that yahoo is minimally working |
| 28 | + start = datetime(2010, 1, 1) |
| 29 | + end = datetime(2013, 1, 25) |
| 30 | + |
| 31 | + assert round(web.DataReader('F', 'yahoo', start, end)['Close'][-1], |
| 32 | + 2) == 13.68 |
| 33 | + |
| 34 | + def test_yahoo_fails(self): |
| 35 | + start = datetime(2010, 1, 1) |
| 36 | + end = datetime(2013, 1, 27) |
| 37 | + |
| 38 | + with pytest.raises(Exception): |
| 39 | + web.DataReader('NON EXISTENT TICKER', 'yahoo', start, end) |
| 40 | + |
| 41 | + def test_get_quote_series(self): |
| 42 | + stringlist = ['GOOG', 'AAPL'] |
| 43 | + fields = ['exchange', 'sharesOutstanding', 'epsForward'] |
| 44 | + try: |
| 45 | + AAPL = web.get_quote_yahoo('AAPL') |
| 46 | + df = web.get_quote_yahoo(pd.Series(stringlist)) |
| 47 | + except ConnectionError: |
| 48 | + pytest.xfail(reason=XFAIL_REASON) |
| 49 | + tm.assert_series_equal(AAPL.iloc[0][fields], df.loc['AAPL'][fields]) |
| 50 | + assert sorted(stringlist) == sorted(list(df.index.values)) |
| 51 | + |
| 52 | + def test_get_quote_string(self): |
| 53 | + try: |
| 54 | + df = web.get_quote_yahoo('GOOG') |
| 55 | + except ConnectionError: |
| 56 | + pytest.xfail(reason=XFAIL_REASON) |
| 57 | + |
| 58 | + assert not pd.isnull(df['marketCap'][0]) |
| 59 | + |
| 60 | + def test_get_quote_stringlist(self): |
| 61 | + stringlist = ['GOOG', 'AAPL'] |
| 62 | + try: |
| 63 | + df = web.get_quote_yahoo(stringlist) |
| 64 | + except ConnectionError: |
| 65 | + pytest.xfail(reason=XFAIL_REASON) |
| 66 | + assert sorted(stringlist) == sorted(list(df.index.values)) |
| 67 | + |
| 68 | + def test_get_quote_comma_name(self): |
| 69 | + try: |
| 70 | + df = web.get_quote_yahoo(['RGLD']) |
| 71 | + except ConnectionError: |
| 72 | + pytest.xfail(reason=XFAIL_REASON) |
| 73 | + assert df['longName'][0] == 'Royal Gold, Inc.' |
| 74 | + |
| 75 | + @pytest.mark.skip('Unreliable test, receive partial ' |
| 76 | + 'components back for dow_jones') |
| 77 | + def test_get_components_dow_jones(self): # pragma: no cover |
| 78 | + df = web.get_components_yahoo('^DJI') # Dow Jones |
| 79 | + assert isinstance(df, pd.DataFrame) |
| 80 | + assert len(df) == 30 |
| 81 | + |
| 82 | + @pytest.mark.skip('Unreliable test, receive partial ' |
| 83 | + 'components back for dax') |
| 84 | + def test_get_components_dax(self): # pragma: no cover |
| 85 | + df = web.get_components_yahoo('^GDAXI') # DAX |
| 86 | + assert isinstance(df, pd.DataFrame) |
| 87 | + |
| 88 | + assert len(df) == 30 |
| 89 | + assert df[df.name.str.contains('adidas', case=False)].index == 'ADS.DE' |
| 90 | + |
| 91 | + @pytest.mark.skip('Unreliable test, receive partial ' |
| 92 | + 'components back for nasdaq_100') |
| 93 | + def test_get_components_nasdaq_100(self): # pragma: no cover |
| 94 | + # As of 7/12/13, the conditional will |
| 95 | + # return false because the link is invalid |
| 96 | + |
| 97 | + df = web.get_components_yahoo('^NDX') # NASDAQ-100 |
| 98 | + assert isinstance(df, pd.DataFrame) |
| 99 | + |
| 100 | + if len(df) > 1: |
| 101 | + # Usual culprits, should be around for a while |
| 102 | + assert 'AAPL' in df.index |
| 103 | + assert 'GOOG' in df.index |
| 104 | + assert 'AMZN' in df.index |
| 105 | + else: |
| 106 | + expected = DataFrame({'exchange': 'N/A', 'name': '@^NDX'}, |
| 107 | + index=['@^NDX']) |
| 108 | + tm.assert_frame_equal(df, expected) |
| 109 | + |
| 110 | + @skip_on_exception(RemoteDataError) |
| 111 | + def test_get_data_single_symbol(self): |
| 112 | + # single symbol |
| 113 | + # http://finance.yahoo.com/q/hp?s=GOOG&a=09&b=08&c=2010&d=09&e=10&f=2010&g=d |
| 114 | + # just test that we succeed |
| 115 | + web.get_data_yahoo('GOOG') |
| 116 | + |
| 117 | + @skip_on_exception(RemoteDataError) |
| 118 | + def test_data_with_no_actions(self): |
| 119 | + web.get_data_yahoo('TSLA') |
| 120 | + |
| 121 | + @skip_on_exception(RemoteDataError) |
| 122 | + def test_get_data_adjust_price(self): |
| 123 | + goog = web.get_data_yahoo('GOOG') |
| 124 | + goog_adj = web.get_data_yahoo('GOOG', adjust_price=True) |
| 125 | + assert 'Adj Close' not in goog_adj.columns |
| 126 | + assert (goog['Open'] * goog_adj['Adj_Ratio']).equals(goog_adj['Open']) |
| 127 | + |
| 128 | + @skip_on_exception(RemoteDataError) |
| 129 | + def test_get_data_interval(self): |
| 130 | + # daily interval data |
| 131 | + pan = web.get_data_yahoo('XOM', '2013-01-01', |
| 132 | + '2013-12-31', interval='d') |
| 133 | + assert len(pan) == 252 |
| 134 | + |
| 135 | + # weekly interval data |
| 136 | + pan = web.get_data_yahoo('XOM', '2013-01-01', |
| 137 | + '2013-12-31', interval='w') |
| 138 | + assert len(pan) == 53 |
| 139 | + |
| 140 | + # monthly interval data |
| 141 | + pan = web.get_data_yahoo('XOM', '2012-12-31', |
| 142 | + '2013-12-31', interval='m') |
| 143 | + assert len(pan) == 12 |
| 144 | + |
| 145 | + # test fail on invalid interval |
| 146 | + with pytest.raises(ValueError): |
| 147 | + web.get_data_yahoo('XOM', interval='NOT VALID') |
| 148 | + |
| 149 | + @skip_on_exception(RemoteDataError) |
| 150 | + def test_get_data_multiple_symbols(self): |
| 151 | + # just test that we succeed |
| 152 | + sl = ['AAPL', 'AMZN', 'GOOG'] |
| 153 | + web.get_data_yahoo(sl, '2012') |
| 154 | + |
| 155 | + @pytest.mark.parametrize('adj_pr', [True, False]) |
| 156 | + @skip_on_exception(RemoteDataError) |
| 157 | + def test_get_data_null_as_missing_data(self, adj_pr): |
| 158 | + result = web.get_data_yahoo('SRCE', '20160626', '20160705', |
| 159 | + adjust_price=adj_pr) |
| 160 | + # sanity checking |
| 161 | + floats = ['Open', 'High', 'Low', 'Close'] |
| 162 | + if adj_pr: |
| 163 | + floats.append('Adj_Ratio') |
| 164 | + else: |
| 165 | + floats.append('Adj Close') |
| 166 | + |
| 167 | + assert result[floats].dtypes.all() == np.floating |
| 168 | + |
| 169 | + @skip_on_exception(RemoteDataError) |
| 170 | + def test_get_data_multiple_symbols_two_dates(self): |
| 171 | + data = web.get_data_yahoo(['GE', 'MSFT', 'INTC'], 'JAN-01-12', |
| 172 | + 'JAN-31-12') |
| 173 | + result = data.Close.loc['01-18-12'].T |
| 174 | + assert result.size == 3 |
| 175 | + |
| 176 | + # sanity checking |
| 177 | + assert result.dtypes == np.floating |
| 178 | + |
| 179 | + expected = np.array([[18.99, 28.4, 25.18], |
| 180 | + [18.58, 28.31, 25.13], |
| 181 | + [19.03, 28.16, 25.52], |
| 182 | + [18.81, 28.82, 25.87]]) |
| 183 | + df = data.Open |
| 184 | + result = df[(df.index >= 'Jan-15-12') & (df.index <= 'Jan-20-12')] |
| 185 | + assert expected.shape == result.shape |
| 186 | + |
| 187 | + def test_get_date_ret_index(self): |
| 188 | + pan = web.get_data_yahoo(['GE', 'INTC', 'IBM'], '1977', '1987', |
| 189 | + ret_index=True) |
| 190 | + assert hasattr(pan, 'Ret_Index') |
| 191 | + |
| 192 | + if hasattr(pan, 'Ret_Index') and hasattr(pan.Ret_Index, 'INTC'): |
| 193 | + tstamp = pan.Ret_Index.INTC.first_valid_index() |
| 194 | + result = pan.Ret_Index.loc[tstamp, 'INTC'] |
| 195 | + assert result == 1.0 |
| 196 | + |
| 197 | + # sanity checking |
| 198 | + assert np.issubdtype(pan.values.dtype, np.floating) |
| 199 | + |
| 200 | + def test_get_data_yahoo_actions(self): |
| 201 | + start = datetime(1990, 1, 1) |
| 202 | + end = datetime(2018, 4, 5) |
| 203 | + |
| 204 | + actions = web.get_data_yahoo_actions('AAPL', start, end, |
| 205 | + adjust_dividends=False) |
| 206 | + |
| 207 | + assert sum(actions['action'] == 'DIVIDEND') == 47 |
| 208 | + assert sum(actions['action'] == 'SPLIT') == 3 |
| 209 | + |
| 210 | + assert actions.loc['2005-02-28', 'action'][0] == 'SPLIT' |
| 211 | + assert actions.loc['2005-02-28', 'value'][0] == 1/2.0 |
| 212 | + |
| 213 | + assert actions.loc['1995-11-21', 'action'][0] == 'DIVIDEND' |
| 214 | + assert round(actions.loc['1995-11-21', 'value'][0], 3) == 0.120 |
| 215 | + |
| 216 | + actions = web.get_data_yahoo_actions('AAPL', start, end, |
| 217 | + adjust_dividends=True) |
| 218 | + |
| 219 | + assert actions.loc['1995-11-21', 'action'][0] == 'DIVIDEND' |
| 220 | + assert round(actions.loc['1995-11-21', 'value'][0], 4) == 0.0043 |
| 221 | + |
| 222 | + def test_get_data_yahoo_actions_invalid_symbol(self): |
| 223 | + start = datetime(1990, 1, 1) |
| 224 | + end = datetime(2000, 4, 5) |
| 225 | + |
| 226 | + with pytest.raises(IOError): |
| 227 | + web.get_data_yahoo_actions('UNKNOWN TICKER', start, end) |
| 228 | + |
| 229 | + @skip_on_exception(RemoteDataError) |
| 230 | + def test_yahoo_reader_class(self): |
| 231 | + r = YahooDailyReader('GOOG') |
| 232 | + df = r.read() |
| 233 | + |
| 234 | + assert df.Volume.loc['JAN-02-2015'] == 1447600 |
| 235 | + |
| 236 | + session = requests.Session() |
| 237 | + |
| 238 | + r = YahooDailyReader('GOOG', session=session) |
| 239 | + assert r.session is session |
| 240 | + |
| 241 | + def test_yahoo_DataReader(self): |
| 242 | + start = datetime(2010, 1, 1) |
| 243 | + end = datetime(2015, 5, 9) |
| 244 | + result = web.DataReader('AAPL', 'yahoo-actions', start, end, adjust_dividends=False) |
| 245 | + |
| 246 | + exp_idx = pd.DatetimeIndex(['2015-05-07', '2015-02-05', |
| 247 | + '2014-11-06', '2014-08-07', |
| 248 | + '2014-06-09', '2014-05-08', |
| 249 | + '2014-02-06', '2013-11-06', |
| 250 | + '2013-08-08', '2013-05-09', |
| 251 | + '2013-02-07', '2012-11-07', |
| 252 | + '2012-08-09']) |
| 253 | + |
| 254 | + exp = pd.DataFrame({'action': ['DIVIDEND', 'DIVIDEND', 'DIVIDEND', |
| 255 | + 'DIVIDEND', 'SPLIT', 'DIVIDEND', |
| 256 | + 'DIVIDEND', 'DIVIDEND', |
| 257 | + 'DIVIDEND', 'DIVIDEND', 'DIVIDEND', |
| 258 | + 'DIVIDEND', 'DIVIDEND'], |
| 259 | + 'value': [0.52, 0.47, 0.47, 0.47, 0.14285714, |
| 260 | + 3.29, 3.05, 3.05, 3.05, |
| 261 | + 3.05, 2.65, 2.65, 2.65]}, |
| 262 | + index=exp_idx) |
| 263 | + exp.index.name = 'Date' |
| 264 | + tm.assert_frame_equal(result.reindex_like(exp).round(2), exp.round(2)) |
| 265 | + |
| 266 | + result = web.get_data_yahoo_actions('AAPL', start, end, |
| 267 | + adjust_dividends=True) |
| 268 | + |
| 269 | + exp = pd.DataFrame({'action': ['DIVIDEND', 'DIVIDEND', 'DIVIDEND', |
| 270 | + 'DIVIDEND', 'SPLIT', 'DIVIDEND', |
| 271 | + 'DIVIDEND', 'DIVIDEND', |
| 272 | + 'DIVIDEND', 'DIVIDEND', 'DIVIDEND', |
| 273 | + 'DIVIDEND', 'DIVIDEND'], |
| 274 | + 'value': [0.52, 0.47, 0.47, 0.47, 0.14285714, |
| 275 | + 0.47, 0.43571, 0.43571, 0.43571, |
| 276 | + 0.43571, 0.37857, 0.37857, 0.37857]}, |
| 277 | + index=exp_idx) |
| 278 | + exp.index.name = 'Date' |
| 279 | + tm.assert_frame_equal(result.reindex_like(exp).round(5), exp.round(5)) |
| 280 | + |
| 281 | + @skip_on_exception(RemoteDataError) |
| 282 | + def test_yahoo_DataReader_multi(self): |
| 283 | + start = datetime(2010, 1, 1) |
| 284 | + end = datetime(2015, 5, 9) |
| 285 | + result = web.DataReader(['AAPL', 'F'], 'yahoo-actions', start, end) |
| 286 | + assert isinstance(result, dict) |
0 commit comments