Skip to content

Commit 389b022

Browse files
committed
Merge pull request #9072 from alexamici/master
ENH: add interval kwarg to get_data_yahoo issue #9071
2 parents f481283 + f88f15b commit 389b022

File tree

3 files changed

+39
-12
lines changed

3 files changed

+39
-12
lines changed

doc/source/whatsnew/v0.16.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ Enhancements
6161
.. _whatsnew_0160.enhancements:
6262

6363
- Paths beginning with ~ will now be expanded to begin with the user's home directory (:issue:`9066`)
64+
- Added time interval selection in get_data_yahoo (:issue:`9071`)
6465

6566
Performance
6667
~~~~~~~~~~~

pandas/io/data.py

+17-12
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def _retry_read_url(url, retry_count, pause, name):
180180
_HISTORICAL_YAHOO_URL = 'http://ichart.finance.yahoo.com/table.csv?'
181181

182182

183-
def _get_hist_yahoo(sym, start, end, retry_count, pause):
183+
def _get_hist_yahoo(sym, start, end, interval, retry_count, pause):
184184
"""
185185
Get historical data for the given name from yahoo.
186186
Date format is datetime
@@ -195,15 +195,15 @@ def _get_hist_yahoo(sym, start, end, retry_count, pause):
195195
'&d=%s' % (end.month - 1) +
196196
'&e=%s' % end.day +
197197
'&f=%s' % end.year +
198-
'&g=d' +
198+
'&g=%s' % interval +
199199
'&ignore=.csv')
200200
return _retry_read_url(url, retry_count, pause, 'Yahoo!')
201201

202202

203203
_HISTORICAL_GOOGLE_URL = 'http://www.google.com/finance/historical?'
204204

205205

206-
def _get_hist_google(sym, start, end, retry_count, pause):
206+
def _get_hist_google(sym, start, end, interval, retry_count, pause):
207207
"""
208208
Get historical data for the given name from google.
209209
Date format is datetime
@@ -314,14 +314,14 @@ def get_components_yahoo(idx_sym):
314314
return idx_df
315315

316316

317-
def _dl_mult_symbols(symbols, start, end, chunksize, retry_count, pause,
317+
def _dl_mult_symbols(symbols, start, end, interval, chunksize, retry_count, pause,
318318
method):
319319
stocks = {}
320320
failed = []
321321
for sym_group in _in_chunks(symbols, chunksize):
322322
for sym in sym_group:
323323
try:
324-
stocks[sym] = method(sym, start, end, retry_count, pause)
324+
stocks[sym] = method(sym, start, end, interval, retry_count, pause)
325325
except IOError:
326326
warnings.warn('Failed to read symbol: {0!r}, replacing with '
327327
'NaN.'.format(sym), SymbolWarning)
@@ -343,20 +343,20 @@ def _dl_mult_symbols(symbols, start, end, chunksize, retry_count, pause,
343343
_source_functions = {'google': _get_hist_google, 'yahoo': _get_hist_yahoo}
344344

345345

346-
def _get_data_from(symbols, start, end, retry_count, pause, adjust_price,
346+
def _get_data_from(symbols, start, end, interval, retry_count, pause, adjust_price,
347347
ret_index, chunksize, source):
348348

349349
src_fn = _source_functions[source]
350350

351351
# If a single symbol, (e.g., 'GOOG')
352352
if isinstance(symbols, (compat.string_types, int)):
353-
hist_data = src_fn(symbols, start, end, retry_count, pause)
353+
hist_data = src_fn(symbols, start, end, interval, retry_count, pause)
354354
# Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
355355
elif isinstance(symbols, DataFrame):
356-
hist_data = _dl_mult_symbols(symbols.index, start, end, chunksize,
356+
hist_data = _dl_mult_symbols(symbols.index, start, end, interval, chunksize,
357357
retry_count, pause, src_fn)
358358
else:
359-
hist_data = _dl_mult_symbols(symbols, start, end, chunksize,
359+
hist_data = _dl_mult_symbols(symbols, start, end, interval, chunksize,
360360
retry_count, pause, src_fn)
361361
if source.lower() == 'yahoo':
362362
if ret_index:
@@ -369,7 +369,7 @@ def _get_data_from(symbols, start, end, retry_count, pause, adjust_price,
369369

370370
def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3,
371371
pause=0.001, adjust_price=False, ret_index=False,
372-
chunksize=25):
372+
chunksize=25, interval='d'):
373373
"""
374374
Returns DataFrame/Panel of historical stock prices from symbols, over date
375375
range, start to end. To avoid being penalized by Yahoo! Finance servers,
@@ -398,12 +398,17 @@ def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3,
398398
If True, includes a simple return index 'Ret_Index' in hist_data.
399399
chunksize : int, default 25
400400
Number of symbols to download consecutively before intiating pause.
401+
interval : string, default 'd'
402+
Time interval code, valid values are 'd' for daily, 'w' for weekly,
403+
'm' for monthly and 'v' for dividend.
401404
402405
Returns
403406
-------
404407
hist_data : DataFrame (str) or Panel (array-like object, DataFrame)
405408
"""
406-
return _get_data_from(symbols, start, end, retry_count, pause,
409+
if interval not in ['d', 'w', 'm', 'v']:
410+
raise ValueError("Invalid interval: valid values are 'd', 'w', 'm' and 'v'")
411+
return _get_data_from(symbols, start, end, interval, retry_count, pause,
407412
adjust_price, ret_index, chunksize, 'yahoo')
408413

409414

@@ -437,7 +442,7 @@ def get_data_google(symbols=None, start=None, end=None, retry_count=3,
437442
-------
438443
hist_data : DataFrame (str) or Panel (array-like object, DataFrame)
439444
"""
440-
return _get_data_from(symbols, start, end, retry_count, pause,
445+
return _get_data_from(symbols, start, end, None, retry_count, pause,
441446
adjust_price, ret_index, chunksize, 'google')
442447

443448

pandas/io/tests/test_data.py

+21
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,27 @@ def test_get_data_single_symbol(self):
213213
# just test that we succeed
214214
web.get_data_yahoo('GOOG')
215215

216+
@network
217+
def test_get_data_interval(self):
218+
# daily interval data
219+
pan = web.get_data_yahoo('XOM', '2013-01-01', '2013-12-31', interval='d')
220+
self.assertEqual(len(pan), 252)
221+
222+
# weekly interval data
223+
pan = web.get_data_yahoo('XOM', '2013-01-01', '2013-12-31', interval='w')
224+
self.assertEqual(len(pan), 53)
225+
226+
# montly interval data
227+
pan = web.get_data_yahoo('XOM', '2013-01-01', '2013-12-31', interval='m')
228+
self.assertEqual(len(pan), 12)
229+
230+
# dividend data
231+
pan = web.get_data_yahoo('XOM', '2013-01-01', '2013-12-31', interval='v')
232+
self.assertEqual(len(pan), 4)
233+
234+
# test fail on invalid interval
235+
self.assertRaises(ValueError, web.get_data_yahoo, 'XOM', interval='NOT VALID')
236+
216237
@network
217238
def test_get_data_multiple_symbols(self):
218239
# just test that we succeed

0 commit comments

Comments
 (0)