7
7
import tempfile
8
8
import datetime as dt
9
9
import time
10
+ import csv
10
11
11
12
from collections import defaultdict
12
13
@@ -45,7 +46,7 @@ def DataReader(name, data_source=None, start=None, end=None,
45
46
the name of the dataset. Some data sources (yahoo, google, fred) will
46
47
accept a list of names.
47
48
data_source: str
48
- the data source ("yahoo", "google", "fred", or "ff")
49
+ the data source ("yahoo", "yahoo-actions", " google", "fred", or "ff")
49
50
start : {datetime, None}
50
51
left boundary for range (defaults to 1/1/2010)
51
52
end : {datetime, None}
@@ -57,6 +58,9 @@ def DataReader(name, data_source=None, start=None, end=None,
57
58
# Data from Yahoo! Finance
58
59
gs = DataReader("GS", "yahoo")
59
60
61
+ # Corporate Actions (Dividend and Split Data) from Yahoo! Finance
62
+ gs = DataReader("GS", "yahoo-actions")
63
+
60
64
# Data from Google Finance
61
65
aapl = DataReader("AAPL", "google")
62
66
@@ -75,6 +79,9 @@ def DataReader(name, data_source=None, start=None, end=None,
75
79
return get_data_yahoo (symbols = name , start = start , end = end ,
76
80
adjust_price = False , chunksize = 25 ,
77
81
retry_count = retry_count , pause = pause )
82
+ elif data_source == "yahoo-actions" :
83
+ return get_data_yahoo_actions (symbol = name , start = start , end = end ,
84
+ retry_count = retry_count , pause = pause )
78
85
elif data_source == "google" :
79
86
return get_data_google (symbols = name , start = start , end = end ,
80
87
adjust_price = False , chunksize = 25 ,
@@ -423,6 +430,80 @@ def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3,
423
430
return _get_data_from (symbols , start , end , interval , retry_count , pause ,
424
431
adjust_price , ret_index , chunksize , 'yahoo' )
425
432
433
+ _HISTORICAL_YAHOO_ACTIONS_URL = 'http://ichart.finance.yahoo.com/x?'
434
+
435
+ def get_data_yahoo_actions (symbol , start = None , end = None , retry_count = 3 ,
436
+ pause = 0.001 ):
437
+ """
438
+ Returns DataFrame of historical corporate actions (dividends and stock
439
+ splits) from symbols, over date range, start to end.
440
+
441
+ Parameters
442
+ ----------
443
+ sym : string with a single Single stock symbol (ticker).
444
+ start : string, (defaults to '1/1/2010')
445
+ Starting date, timestamp. Parses many different kind of date
446
+ representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
447
+ end : string, (defaults to today)
448
+ Ending date, timestamp. Same format as starting date.
449
+ retry_count : int, default 3
450
+ Number of times to retry query request.
451
+ pause : int, default 0
452
+ Time, in seconds, of the pause between retries.
453
+ """
454
+
455
+ start , end = _sanitize_dates (start , end )
456
+ url = (_HISTORICAL_YAHOO_ACTIONS_URL + 's=%s' % symbol +
457
+ '&a=%s' % (start .month - 1 ) +
458
+ '&b=%s' % start .day +
459
+ '&c=%s' % start .year +
460
+ '&d=%s' % (end .month - 1 ) +
461
+ '&e=%s' % end .day +
462
+ '&f=%s' % end .year +
463
+ '&g=v' )
464
+
465
+ for _ in range (retry_count ):
466
+ time .sleep (pause )
467
+
468
+ try :
469
+ with urlopen (url ) as resp :
470
+ lines = resp .read ()
471
+ except _network_error_classes :
472
+ pass
473
+ else :
474
+ actions_index = []
475
+ actions_entries = []
476
+
477
+ for line in csv .reader (StringIO (bytes_to_str (lines ))):
478
+ # Ignore lines that aren't dividends or splits (Yahoo
479
+ # add a bunch of irrelevant fields.)
480
+ if len (line ) != 3 or line [0 ] not in ('DIVIDEND' , 'SPLIT' ):
481
+ continue
482
+
483
+ action , date , value = line
484
+ if action == 'DIVIDEND' :
485
+ actions_index .append (to_datetime (date ))
486
+ actions_entries .append ({
487
+ 'action' : action ,
488
+ 'value' : float (value )
489
+ })
490
+ elif action == 'SPLIT' and ':' in value :
491
+ # Convert the split ratio to a fraction. For example a
492
+ # 4:1 split expressed as a fraction is 1/4 = 0.25.
493
+ denominator , numerator = value .split (':' , 1 )
494
+ split_fraction = float (numerator ) / float (denominator )
495
+
496
+ actions_index .append (to_datetime (date ))
497
+ actions_entries .append ({
498
+ 'action' : action ,
499
+ 'value' : split_fraction
500
+ })
501
+
502
+ return DataFrame (actions_entries , index = actions_index )
503
+
504
+ raise IOError ("after %d tries, Yahoo! did not "
505
+ "return a 200 for url %r" % (retry_count , url ))
506
+
426
507
427
508
def get_data_google (symbols = None , start = None , end = None , retry_count = 3 ,
428
509
pause = 0.001 , adjust_price = False , ret_index = False ,
0 commit comments