7
7
import tempfile
8
8
import datetime as dt
9
9
import time
10
+ import csv
10
11
11
12
from collections import defaultdict
12
13
@@ -45,7 +46,7 @@ def DataReader(name, data_source=None, start=None, end=None,
45
46
the name of the dataset. Some data sources (yahoo, google, fred) will
46
47
accept a list of names.
47
48
data_source: str
48
- the data source ("yahoo", "google", "fred", or "ff")
49
+ the data source ("yahoo", "yahoo-actions", " google", "fred", or "ff")
49
50
start : {datetime, None}
50
51
left boundary for range (defaults to 1/1/2010)
51
52
end : {datetime, None}
@@ -57,6 +58,9 @@ def DataReader(name, data_source=None, start=None, end=None,
57
58
# Data from Yahoo! Finance
58
59
gs = DataReader("GS", "yahoo")
59
60
61
+ # Corporate Actions (Dividend and Split Data) with ex-dates from Yahoo! Finance
62
+ gs = DataReader("GS", "yahoo-actions")
63
+
60
64
# Data from Google Finance
61
65
aapl = DataReader("AAPL", "google")
62
66
@@ -75,6 +79,9 @@ def DataReader(name, data_source=None, start=None, end=None,
75
79
return get_data_yahoo (symbols = name , start = start , end = end ,
76
80
adjust_price = False , chunksize = 25 ,
77
81
retry_count = retry_count , pause = pause )
82
+ elif data_source == "yahoo-actions" :
83
+ return get_data_yahoo_actions (symbol = name , start = start , end = end ,
84
+ retry_count = retry_count , pause = pause )
78
85
elif data_source == "google" :
79
86
return get_data_google (symbols = name , start = start , end = end ,
80
87
adjust_price = False , chunksize = 25 ,
@@ -423,6 +430,81 @@ def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3,
423
430
return _get_data_from (symbols , start , end , interval , retry_count , pause ,
424
431
adjust_price , ret_index , chunksize , 'yahoo' )
425
432
433
+ _HISTORICAL_YAHOO_ACTIONS_URL = 'http://ichart.finance.yahoo.com/x?'
434
+
435
+ def get_data_yahoo_actions (symbol , start = None , end = None , retry_count = 3 ,
436
+ pause = 0.001 ):
437
+ """
438
+ Returns DataFrame of historical corporate actions (dividends and stock
439
+ splits) from symbols, over date range, start to end. All dates in the
440
+ resulting DataFrame correspond with dividend and stock split ex-dates.
441
+
442
+ Parameters
443
+ ----------
444
+ sym : string with a single Single stock symbol (ticker).
445
+ start : string, (defaults to '1/1/2010')
446
+ Starting date, timestamp. Parses many different kind of date
447
+ representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
448
+ end : string, (defaults to today)
449
+ Ending date, timestamp. Same format as starting date.
450
+ retry_count : int, default 3
451
+ Number of times to retry query request.
452
+ pause : int, default 0
453
+ Time, in seconds, of the pause between retries.
454
+ """
455
+
456
+ start , end = _sanitize_dates (start , end )
457
+ url = (_HISTORICAL_YAHOO_ACTIONS_URL + 's=%s' % symbol +
458
+ '&a=%s' % (start .month - 1 ) +
459
+ '&b=%s' % start .day +
460
+ '&c=%s' % start .year +
461
+ '&d=%s' % (end .month - 1 ) +
462
+ '&e=%s' % end .day +
463
+ '&f=%s' % end .year +
464
+ '&g=v' )
465
+
466
+ for _ in range (retry_count ):
467
+ time .sleep (pause )
468
+
469
+ try :
470
+ with urlopen (url ) as resp :
471
+ lines = resp .read ()
472
+ except _network_error_classes :
473
+ pass
474
+ else :
475
+ actions_index = []
476
+ actions_entries = []
477
+
478
+ for line in csv .reader (StringIO (bytes_to_str (lines ))):
479
+ # Ignore lines that aren't dividends or splits (Yahoo
480
+ # add a bunch of irrelevant fields.)
481
+ if len (line ) != 3 or line [0 ] not in ('DIVIDEND' , 'SPLIT' ):
482
+ continue
483
+
484
+ action , date , value = line
485
+ if action == 'DIVIDEND' :
486
+ actions_index .append (to_datetime (date ))
487
+ actions_entries .append ({
488
+ 'action' : action ,
489
+ 'value' : float (value )
490
+ })
491
+ elif action == 'SPLIT' and ':' in value :
492
+ # Convert the split ratio to a fraction. For example a
493
+ # 4:1 split expressed as a fraction is 1/4 = 0.25.
494
+ denominator , numerator = value .split (':' , 1 )
495
+ split_fraction = float (numerator ) / float (denominator )
496
+
497
+ actions_index .append (to_datetime (date ))
498
+ actions_entries .append ({
499
+ 'action' : action ,
500
+ 'value' : split_fraction
501
+ })
502
+
503
+ return DataFrame (actions_entries , index = actions_index )
504
+
505
+ raise IOError ("after %d tries, Yahoo! did not "
506
+ "return a 200 for url %r" % (retry_count , url ))
507
+
426
508
427
509
def get_data_google (symbols = None , start = None , end = None , retry_count = 3 ,
428
510
pause = 0.001 , adjust_price = False , ret_index = False ,
0 commit comments