Skip to content

Commit 5979a6c

Browse files
Merge pull request pydata#45 from brotchie/master
ENH: Extended DataReader to fetch Dividends and Stock Splits from Yahoo!
2 parents 4062317 + 0d69076 commit 5979a6c

File tree

3 files changed

+118
-1
lines changed

3 files changed

+118
-1
lines changed

docs/source/remote_data.rst

+14
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ It should be noted, that various sources support different kinds of data, so not
3636
Yahoo! Finance
3737
==============
3838

39+
Historical stock prices from Yahoo! Finance.
40+
3941
.. ipython:: python
4042
4143
import pandas_datareader.data as web
@@ -45,6 +47,18 @@ Yahoo! Finance
4547
f = web.DataReader("F", 'yahoo', start, end)
4648
f.ix['2010-01-04']
4749
50+
Historical corporate actions (Dividends and Stock Splits) with ex-dates from Yahoo! Finance.
51+
52+
.. ipython:: python
53+
54+
import pandas_datareader.data as web
55+
import datetime
56+
57+
start = datetime.datetime(2010, 1, 1)
58+
end = datetime.datetime(2015, 5, 9)
59+
60+
web.DataReader('AAPL', 'yahoo-actions', start, end)
61+
4862
.. _remote_data.yahoo_options:
4963

5064
Yahoo! Finance Options

pandas_datareader/data.py

+83-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import tempfile
88
import datetime as dt
99
import time
10+
import csv
1011

1112
from collections import defaultdict
1213

@@ -45,7 +46,7 @@ def DataReader(name, data_source=None, start=None, end=None,
4546
the name of the dataset. Some data sources (yahoo, google, fred) will
4647
accept a list of names.
4748
data_source: str
48-
the data source ("yahoo", "google", "fred", or "ff")
49+
the data source ("yahoo", "yahoo-actions", "google", "fred", or "ff")
4950
start : {datetime, None}
5051
left boundary for range (defaults to 1/1/2010)
5152
end : {datetime, None}
@@ -57,6 +58,9 @@ def DataReader(name, data_source=None, start=None, end=None,
5758
# Data from Yahoo! Finance
5859
gs = DataReader("GS", "yahoo")
5960
61+
# Corporate Actions (Dividend and Split Data) with ex-dates from Yahoo! Finance
62+
gs = DataReader("GS", "yahoo-actions")
63+
6064
# Data from Google Finance
6165
aapl = DataReader("AAPL", "google")
6266
@@ -75,6 +79,9 @@ def DataReader(name, data_source=None, start=None, end=None,
7579
return get_data_yahoo(symbols=name, start=start, end=end,
7680
adjust_price=False, chunksize=25,
7781
retry_count=retry_count, pause=pause)
82+
elif data_source == "yahoo-actions":
83+
return get_data_yahoo_actions(symbol=name, start=start, end=end,
84+
retry_count=retry_count, pause=pause)
7885
elif data_source == "google":
7986
return get_data_google(symbols=name, start=start, end=end,
8087
adjust_price=False, chunksize=25,
@@ -423,6 +430,81 @@ def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3,
423430
return _get_data_from(symbols, start, end, interval, retry_count, pause,
424431
adjust_price, ret_index, chunksize, 'yahoo')
425432

433+
_HISTORICAL_YAHOO_ACTIONS_URL = 'http://ichart.finance.yahoo.com/x?'
434+
435+
def get_data_yahoo_actions(symbol, start=None, end=None, retry_count=3,
436+
pause=0.001):
437+
"""
438+
Returns DataFrame of historical corporate actions (dividends and stock
439+
splits) from symbols, over date range, start to end. All dates in the
440+
resulting DataFrame correspond with dividend and stock split ex-dates.
441+
442+
Parameters
443+
----------
444+
sym : string with a single Single stock symbol (ticker).
445+
start : string, (defaults to '1/1/2010')
446+
Starting date, timestamp. Parses many different kind of date
447+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
448+
end : string, (defaults to today)
449+
Ending date, timestamp. Same format as starting date.
450+
retry_count : int, default 3
451+
Number of times to retry query request.
452+
pause : int, default 0
453+
Time, in seconds, of the pause between retries.
454+
"""
455+
456+
start, end = _sanitize_dates(start, end)
457+
url = (_HISTORICAL_YAHOO_ACTIONS_URL + 's=%s' % symbol +
458+
'&a=%s' % (start.month - 1) +
459+
'&b=%s' % start.day +
460+
'&c=%s' % start.year +
461+
'&d=%s' % (end.month - 1) +
462+
'&e=%s' % end.day +
463+
'&f=%s' % end.year +
464+
'&g=v')
465+
466+
for _ in range(retry_count):
467+
time.sleep(pause)
468+
469+
try:
470+
with urlopen(url) as resp:
471+
lines = resp.read()
472+
except _network_error_classes:
473+
pass
474+
else:
475+
actions_index = []
476+
actions_entries = []
477+
478+
for line in csv.reader(StringIO(bytes_to_str(lines))):
479+
# Ignore lines that aren't dividends or splits (Yahoo
480+
# add a bunch of irrelevant fields.)
481+
if len(line) != 3 or line[0] not in ('DIVIDEND', 'SPLIT'):
482+
continue
483+
484+
action, date, value = line
485+
if action == 'DIVIDEND':
486+
actions_index.append(to_datetime(date))
487+
actions_entries.append({
488+
'action': action,
489+
'value': float(value)
490+
})
491+
elif action == 'SPLIT' and ':' in value:
492+
# Convert the split ratio to a fraction. For example a
493+
# 4:1 split expressed as a fraction is 1/4 = 0.25.
494+
denominator, numerator = value.split(':', 1)
495+
split_fraction = float(numerator) / float(denominator)
496+
497+
actions_index.append(to_datetime(date))
498+
actions_entries.append({
499+
'action': action,
500+
'value': split_fraction
501+
})
502+
503+
return DataFrame(actions_entries, index=actions_index)
504+
505+
raise IOError("after %d tries, Yahoo! did not "
506+
"return a 200 for url %r" % (retry_count, url))
507+
426508

427509
def get_data_google(symbols=None, start=None, end=None, retry_count=3,
428510
pause=0.001, adjust_price=False, ret_index=False,

pandas_datareader/tests/test_data.py

+21
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,27 @@ def test_get_date_ret_index(self):
265265
# sanity checking
266266
assert np.issubdtype(pan.values.dtype, np.floating)
267267

268+
def test_get_data_yahoo_actions(self):
269+
start = datetime(1990, 1, 1)
270+
end = datetime(2000, 4, 5)
271+
272+
actions = web.get_data_yahoo_actions('BHP.AX', start, end)
273+
274+
self.assertEqual(sum(actions['action'] == 'DIVIDEND'), 20)
275+
self.assertEqual(sum(actions['action'] == 'SPLIT'), 1)
276+
277+
self.assertEqual(actions.ix['1995-05-11']['action'][0], 'SPLIT')
278+
self.assertEqual(actions.ix['1995-05-11']['value'][0], 1/1.1)
279+
280+
self.assertEqual(actions.ix['1993-05-10']['action'][0], 'DIVIDEND')
281+
self.assertEqual(actions.ix['1993-05-10']['value'][0], 0.3)
282+
283+
def test_get_data_yahoo_actions_invalid_symbol(self):
284+
start = datetime(1990, 1, 1)
285+
end = datetime(2000, 4, 5)
286+
287+
self.assertRaises(IOError, web.get_data_yahoo_actions, 'UNKNOWN TICKER', start, end)
288+
268289

269290
class TestYahooOptions(tm.TestCase):
270291
@classmethod

0 commit comments

Comments
 (0)