3
3
4
4
5
5
"""
6
+ import warnings
6
7
7
8
import numpy as np
8
9
import datetime as dt
13
14
from zipfile import ZipFile
14
15
from pandas .util .py3compat import StringIO , BytesIO , bytes_to_str
15
16
16
- from pandas import DataFrame , read_csv , concat
17
+ from pandas import Panel , DataFrame , Series , read_csv , concat
17
18
from pandas .io .parsers import TextParser
18
19
19
20
@@ -54,7 +55,8 @@ def DataReader(name, data_source=None, start=None, end=None,
54
55
start , end = _sanitize_dates (start , end )
55
56
56
57
if (data_source == "yahoo" ):
57
- return get_data_yahoo (name = name , start = start , end = end ,
58
+ return get_data_yahoo (symbols = name , start = start , end = end ,
59
+ adjust_price = False , chunk = 25 ,
58
60
retry_count = retry_count , pause = pause )
59
61
elif (data_source == "fred" ):
60
62
return get_data_fred (name = name , start = start , end = end )
@@ -73,14 +75,27 @@ def _sanitize_dates(start, end):
73
75
return start , end
74
76
75
77
78
+ def _in_chunks (seq , size ):
79
+ """
80
+ Return sequence in 'chunks' of size defined by size
81
+ """
82
+ return (seq [pos :pos + size ] for pos in xrange (0 , len (seq ), size ))
83
+
84
+
76
85
def get_quote_yahoo (symbols ):
77
86
"""
78
87
Get current yahoo quote
79
88
80
89
Returns a DataFrame
81
90
"""
82
- if not isinstance (symbols , list ):
83
- raise TypeError ("symbols must be a list" )
91
+ if isinstance (symbols , str ):
92
+ sym_list = symbols
93
+ elif not isinstance (symbols , Series ):
94
+ symbols = Series (symbols )
95
+ sym_list = str .join ('+' , symbols )
96
+ else :
97
+ sym_list = str .join ('+' , symbols )
98
+
84
99
# for codes see: http://www.gummy-stuff.org/Yahoo-data.htm
85
100
codes = {'symbol' : 's' , 'last' : 'l1' , 'change_pct' : 'p2' , 'PE' : 'r' ,
86
101
'time' : 't1' , 'short_ratio' : 's7' }
@@ -90,7 +105,7 @@ def get_quote_yahoo(symbols):
90
105
data = dict (zip (codes .keys (), [[] for i in range (len (codes ))]))
91
106
92
107
urlStr = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (
93
- str . join ( '+' , symbols ) , request )
108
+ sym_list , request )
94
109
95
110
try :
96
111
lines = urllib2 .urlopen (urlStr ).readlines ()
@@ -117,19 +132,20 @@ def get_quote_yahoo(symbols):
117
132
return DataFrame (data , index = idx )
118
133
119
134
120
- def get_data_yahoo (name = None , start = None , end = None , retry_count = 3 , pause = 0 ):
135
+ def _get_hist_yahoo (name = None , start = None , end = None , retry_count = 3 ,
136
+ pause = 0 ):
121
137
"""
122
138
Get historical data for the given name from yahoo.
123
139
Date format is datetime
124
140
125
141
Returns a DataFrame.
126
142
"""
127
- start , end = _sanitize_dates (start , end )
128
-
129
143
if (name is None ):
130
- print "Need to provide a name"
144
+ warnings . warn ( "Need to provide a name." )
131
145
return None
132
146
147
+ start , end = _sanitize_dates (start , end )
148
+
133
149
yahoo_URL = 'http://ichart.yahoo.com/table.csv?'
134
150
135
151
url = yahoo_URL + 's=%s' % name + \
@@ -162,6 +178,154 @@ def get_data_yahoo(name=None, start=None, end=None, retry_count=3, pause=0):
162
178
"return a 200 for url %s" % (pause , url ))
163
179
164
180
181
+ def _adjust_prices (hist_data , price_list = ['Open' , 'High' , 'Low' , 'Close' ]):
182
+ """
183
+ Return modifed DataFrame or Panel with adjusted prices based on
184
+ 'Adj Close' price. Adds 'Adj_Ratio' column.
185
+ """
186
+ adj_ratio = hist_data ['Adj Close' ] / hist_data ['Close' ]
187
+
188
+ data = hist_data .copy ()
189
+ for item in price_list :
190
+ data [item ] = hist_data [item ] * adj_ratio
191
+ data ['Adj_Ratio' ] = adj_ratio
192
+ del data ['Adj Close' ]
193
+ return data
194
+
195
+
196
+ def _calc_return_index (price_df ):
197
+ """
198
+ Return a returns index from a input price df or series.
199
+ """
200
+
201
+ ret_index = price_df .pct_change ().add (1 ).cumprod ()
202
+ ret_index .ix [0 ] = 1
203
+ return ret_index
204
+
205
+
206
+ def get_components_yahoo (idx_sym = '^DJI' ):
207
+ """
208
+ Returns DataFrame containing list of component information for index
209
+ represented in idx_sym from yahoo. Includes component symbol
210
+ (ticker), exchange, and name.
211
+
212
+ Parameters
213
+ ----------
214
+ idx_sym : str
215
+ Index symbol, default '^DJI' (Dow Jones Industrial Average)
216
+ Examples:
217
+ '^NYA' (NYSE Composite)
218
+ '^IXIC' (NASDAQ Composite)
219
+
220
+ See: http://finance.yahoo.com/indices for other index symbols
221
+
222
+ Returns
223
+ -------
224
+ idx_df : DataFrame
225
+ """
226
+ stats = 'snx'
227
+ #URL of form:
228
+ #http://download.finance.yahoo.com/d/quotes.csv?s=@%5EIXIC&f=snxl1d1t1c1ohgv
229
+ url = 'http://download.finance.yahoo.com/d/quotes.csv?s={0}&f={1}' \
230
+ '&e=.csv&h={2}'
231
+
232
+ idx_mod = idx_sym .replace ('^' , '@%5E' )
233
+ urlStr = url .format (idx_mod , stats , 1 )
234
+
235
+ idx_df = DataFrame ()
236
+ mask = [True ]
237
+ comp_idx = 1
238
+
239
+ #LOOP across component index structure,
240
+ #break when no new components are found
241
+ while (True in mask ):
242
+ urlStr = url .format (idx_mod , stats , comp_idx )
243
+ lines = (urllib .urlopen (urlStr ).read ().strip ().
244
+ strip ('"' ).split ('"\r \n "' ))
245
+
246
+ lines = [line .strip ().split ('","' ) for line in lines ]
247
+
248
+ temp_df = DataFrame (lines , columns = ['ticker' , 'name' , 'exchange' ])
249
+ temp_df = temp_df .drop_duplicates ()
250
+ temp_df = temp_df .set_index ('ticker' )
251
+ mask = ~ temp_df .index .isin (idx_df .index )
252
+
253
+ comp_idx = comp_idx + 50
254
+ idx_df = idx_df .append (temp_df [mask ])
255
+
256
+ return idx_df
257
+
258
+
259
+ def get_data_yahoo (symbols = None , start = None , end = None , adjust_price = False ,
260
+ ret_index = False , chunk = 25 , pause = 0 , ** kwargs ):
261
+ """
262
+ Returns DataFrame/Panel of historical stock prices from symbols, over date
263
+ range, start to end. To avoid being penalized by Yahoo! Finance servers,
264
+ pauses between downloading 'chunks' of symbols can be specified.
265
+
266
+ Parameters
267
+ ----------
268
+ symbols : string, list-like object (list, tupel, Series), DataFrame
269
+ Single stock symbol (ticker), list-like object of symbols or
270
+ DataFrame with index containing of stock symbols
271
+ start : string, (defaults to '1/1/2010')
272
+ Starting date, timestamp. Parses many different kind of date
273
+ representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
274
+ end : string, (defaults to today)
275
+ Ending date, timestamp. Same format as starting date.
276
+ adjust_price : bool, default False
277
+ Adjust all prices in hist_data ('Open', 'High', 'Low', 'Close') via
278
+ 'Adj Close' price. Adds 'Adj_Ratio' column and drops 'Adj Close'.
279
+ ret_index: bool, default False
280
+ Include a simple return index 'Ret_Index' in hist_data.
281
+ chunk : int, default 25
282
+ Number of symbols to download consecutively before intiating pause.
283
+ pause : int, default 0
284
+ Time, in seconds, to pause between consecutive chunks.
285
+ **kwargs: additional arguments to pass to _get_hist_yahoo
286
+
287
+ Returns
288
+ -------
289
+ hist_data : DataFrame (str) or Panel (list-like object, DataFrame)
290
+ """
291
+ def dl_mult_symbols (symbols ):
292
+ stocks = {}
293
+ for sym_group in _in_chunks (symbols , chunk ):
294
+ for sym in sym_group :
295
+ try :
296
+ stocks [sym ] = _get_hist_yahoo (name = sym , start = start ,
297
+ end = end , ** kwargs )
298
+ except :
299
+ warnings .warn ('Error with sym: ' + sym + '... skipping.' )
300
+
301
+ time .sleep (pause )
302
+
303
+ return Panel (stocks ).swapaxes ('items' , 'minor' )
304
+
305
+ #If a scalar (single symbol, e.g. 'GOOG')
306
+ if isinstance (symbols , (str , int )):
307
+ sym = symbols
308
+ hist_data = _get_hist_yahoo (sym , start = start , end = end , ** kwargs )
309
+ #Multiple symbols
310
+ elif isinstance (symbols , DataFrame ):
311
+ try :
312
+ hist_data = dl_mult_symbols (Series (symbols .index ))
313
+ except ValueError :
314
+ raise
315
+ else : #Guess a Series
316
+ try :
317
+ hist_data = dl_mult_symbols (symbols )
318
+ except TypeError :
319
+ hist_data = dl_mult_symbols (Series (symbols ))
320
+
321
+ if (ret_index ):
322
+ hist_data ['Ret_Index' ] = _calc_return_index (hist_data ['Adj Close' ])
323
+ if (adjust_price ):
324
+ hist_data = _adjust_prices (hist_data )
325
+
326
+ return hist_data
327
+
328
+
165
329
def get_data_fred (name = None , start = dt .datetime (2010 , 1 , 1 ),
166
330
end = dt .datetime .today ()):
167
331
"""
0 commit comments