8
8
import numpy as np
9
9
import datetime as dt
10
10
import urllib
11
- import urllib2
12
11
import time
13
- import warnings
12
+ from contextlib import closing
13
+ from urllib2 import urlopen
14
14
15
15
from zipfile import ZipFile
16
16
from pandas .util .py3compat import StringIO , BytesIO , bytes_to_str
@@ -109,10 +109,11 @@ def get_quote_yahoo(symbols):
109
109
110
110
data = dict (zip (codes .keys (), [[] for i in range (len (codes ))]))
111
111
112
- urlStr = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (
113
- sym_list , request )
112
+ url_str = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (sym_list ,
113
+ request )
114
114
115
- lines = urllib2 .urlopen (urlStr ).readlines ()
115
+ with closing (urlopen (url_str )) as url :
116
+ lines = url .readlines ()
116
117
117
118
for line in lines :
118
119
fields = line .decode ('utf-8' ).strip ().split (',' )
@@ -151,29 +152,29 @@ def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
151
152
152
153
yahoo_URL = 'http://ichart.yahoo.com/table.csv?'
153
154
154
- url = yahoo_URL + 's=%s' % sym + \
155
- '&a=%s' % (start .month - 1 ) + \
156
- '&b=%s' % start .day + \
157
- '&c=%s' % start .year + \
158
- '&d=%s' % (end .month - 1 ) + \
159
- '&e=%s' % end .day + \
160
- '&f=%s' % end .year + \
161
- '&g=d' + \
162
- '&ignore=.csv'
163
-
164
- for _ in range (retry_count ):
165
- resp = urllib2 . urlopen (url )
166
- if resp .code == 200 :
167
- lines = resp .read ()
168
- rs = read_csv (StringIO (bytes_to_str (lines )), index_col = 0 ,
169
- parse_dates = True )[::- 1 ]
170
-
171
- # Yahoo! Finance sometimes does this awesome thing where they
172
- # return 2 rows for the most recent business day
173
- if len (rs ) > 2 and rs .index [- 1 ] == rs .index [- 2 ]: # pragma: no cover
174
- rs = rs [:- 1 ]
175
-
176
- return rs
155
+ url = ( yahoo_URL + 's=%s' % sym +
156
+ '&a=%s' % (start .month - 1 ) +
157
+ '&b=%s' % start .day +
158
+ '&c=%s' % start .year +
159
+ '&d=%s' % (end .month - 1 ) +
160
+ '&e=%s' % end .day +
161
+ '&f=%s' % end .year +
162
+ '&g=d' +
163
+ '&ignore=.csv' )
164
+
165
+ for _ in xrange (retry_count ):
166
+ with closing ( urlopen (url )) as resp :
167
+ if resp .code == 200 :
168
+ lines = resp .read ()
169
+ rs = read_csv (StringIO (bytes_to_str (lines )), index_col = 0 ,
170
+ parse_dates = True )[::- 1 ]
171
+
172
+ # Yahoo! Finance sometimes does this awesome thing where they
173
+ # return 2 rows for the most recent business day
174
+ if len (rs ) > 2 and rs .index [- 1 ] == rs .index [- 2 ]: # pragma: no cover
175
+ rs = rs [:- 1 ]
176
+
177
+ return rs
177
178
178
179
time .sleep (pause )
179
180
@@ -198,17 +199,19 @@ def _get_hist_google(sym=None, start=None, end=None, retry_count=3,
198
199
google_URL = 'http://www.google.com/finance/historical?'
199
200
200
201
# www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv
201
- url = google_URL + urllib .urlencode ({"q" : sym , \
202
- "startdate" : start .strftime ('%b %d, %Y' ), \
203
- "enddate" : end .strftime ('%b %d, %Y' ), "output" : "csv" })
204
- for _ in range (retry_count ):
205
- resp = urllib2 .urlopen (url )
206
- if resp .code == 200 :
207
- lines = resp .read ()
208
- rs = read_csv (StringIO (bytes_to_str (lines )), index_col = 0 ,
209
- parse_dates = True )[::- 1 ]
210
-
211
- return rs
202
+ url = google_URL + urllib .urlencode ({"q" : sym ,
203
+ "startdate" : start .strftime ('%b %d, '
204
+ '%Y' ),
205
+ "enddate" : end .strftime ('%b %d, %Y' ),
206
+ "output" : "csv" })
207
+ for _ in xrange (retry_count ):
208
+ with closing (urlopen (url )) as resp :
209
+ if resp .code == 200 :
210
+ lines = resp .read ()
211
+ rs = read_csv (StringIO (bytes_to_str (lines )), index_col = 0 ,
212
+ parse_dates = True )[::- 1 ]
213
+
214
+ return rs
212
215
213
216
time .sleep (pause )
214
217
@@ -280,19 +283,19 @@ def get_components_yahoo(idx_sym):
280
283
'&e=.csv&h={2}'
281
284
282
285
idx_mod = idx_sym .replace ('^' , '@%5E' )
283
- urlStr = url .format (idx_mod , stats , 1 )
286
+ url_str = url .format (idx_mod , stats , 1 )
284
287
285
288
idx_df = DataFrame ()
286
289
mask = [True ]
287
290
comp_idx = 1
288
291
289
- #LOOP across component index structure,
290
- #break when no new components are found
291
- while ( True in mask ) :
292
- urlStr = url .format (idx_mod , stats , comp_idx )
293
- lines = ( urllib . urlopen (urlStr ). read (). decode ( 'utf-8' ). strip ().
294
- strip ( '"' ). split ( '" \r \n "' ) )
295
-
292
+ # LOOP across component index structure,
293
+ # break when no new components are found
294
+ while True in mask :
295
+ url_str = url .format (idx_mod , stats , comp_idx )
296
+ with closing ( urlopen (url_str )) as resp :
297
+ raw = resp . read ( )
298
+ lines = raw . decode ( 'utf-8' ). strip (). strip ( '"' ). split ( '" \r \n "' )
296
299
lines = [line .strip ().split ('","' ) for line in lines ]
297
300
298
301
temp_df = DataFrame (lines , columns = ['ticker' , 'name' , 'exchange' ])
@@ -468,11 +471,11 @@ def get_data_fred(name=None, start=dt.datetime(2010, 1, 1),
468
471
469
472
fred_URL = "http://research.stlouisfed.org/fred2/series/"
470
473
471
- url = fred_URL + '%s' % name + \
472
- '/downloaddata/%s' % name + '.csv'
473
- data = read_csv (urllib . urlopen ( url ) , index_col = 0 , parse_dates = True ,
474
- header = None , skiprows = 1 , names = ["DATE" , name ],
475
- na_values = '.' )
474
+ url = fred_URL + '%s' % name + '/downloaddata/%s' % name + '.csv'
475
+ with closing ( urlopen ( url )) as resp :
476
+ data = read_csv (resp , index_col = 0 , parse_dates = True ,
477
+ header = None , skiprows = 1 , names = ["DATE" , name ],
478
+ na_values = '.' )
476
479
try :
477
480
return data .truncate (start , end )
478
481
except KeyError :
@@ -489,9 +492,9 @@ def get_data_famafrench(name, start=None, end=None):
489
492
# path of zip files
490
493
zipFileURL = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/"
491
494
492
- url = urllib . urlopen (zipFileURL + name + ".zip" )
493
- zipfile = ZipFile (StringIO (url .read ()))
494
- data = zipfile . open (name + ".txt" ).readlines ()
495
+ with closing ( urlopen (zipFileURL + name + ".zip" )) as url :
496
+ with closing ( ZipFile (StringIO (url .read ()))) as zf :
497
+ data = zf . read (name + ".txt" ).splitlines ()
495
498
496
499
file_edges = np .where (np .array ([len (d ) for d in data ]) == 2 )[0 ]
497
500
@@ -638,7 +641,7 @@ def get_options_data(self, month=None, year=None, expiry=None):
638
641
url = str ('http://finance.yahoo.com/q/op?s=' + self .symbol +
639
642
'+Options' )
640
643
641
- parsed = parse (urllib2 . urlopen ( url ) )
644
+ parsed = parse (url )
642
645
doc = parsed .getroot ()
643
646
tables = doc .findall ('.//table' )
644
647
calls = tables [9 ]
@@ -709,7 +712,7 @@ def get_call_data(self, month=None, year=None, expiry=None):
709
712
url = str ('http://finance.yahoo.com/q/op?s=' + self .symbol +
710
713
'+Options' )
711
714
712
- parsed = parse (urllib2 . urlopen ( url ) )
715
+ parsed = parse (url )
713
716
doc = parsed .getroot ()
714
717
tables = doc .findall ('.//table' )
715
718
calls = tables [9 ]
@@ -777,7 +780,7 @@ def get_put_data(self, month=None, year=None, expiry=None):
777
780
url = str ('http://finance.yahoo.com/q/op?s=' + self .symbol +
778
781
'+Options' )
779
782
780
- parsed = parse (urllib2 . urlopen ( url ) )
783
+ parsed = parse (url )
781
784
doc = parsed .getroot ()
782
785
tables = doc .findall ('.//table' )
783
786
puts = tables [13 ]
0 commit comments