Skip to content

Commit f09a03c

Browse files
committed
Merge pull request #3985 from cpcloud/network-socket-errno-thing
BUG/TST: catch socket.error in py2/3.2 and ConnectionError in py3.3
2 parents 3b28ece + 8fffb1b commit f09a03c

File tree

3 files changed

+65
-58
lines changed

3 files changed

+65
-58
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,8 @@ pandas 0.12
283283
- Fixed flattening of columns when renaming MultiIndex columns DataFrame (:issue:`4004`)
284284
- Fix ``Series.clip`` for datetime series. NA/NaN threshold values will now throw ValueError (:issue:`3996`)
285285
- Fixed insertion issue into DataFrame, after rename (:issue:`4032`)
286+
- Fixed testing issue where too many sockets where open thus leading to a
287+
connection reset issue (:issue:`3982`, :issue:`3985`)
286288

287289

288290
pandas 0.11.0

doc/source/v0.12.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,8 @@ Bug Fixes
422422
explicitly checking a website as a proxy for seeing if there is network
423423
connectivity. Plus, new ``optional_args`` decorator factory for decorators.
424424
(:issue:`3910`, :issue:`3914`)
425+
- Fixed testing issue where too many sockets where open thus leading to a
426+
connection reset issue (:issue:`3982`, :issue:`3985`)
425427

426428
See the :ref:`full release notes
427429
<release>` or issue tracker

pandas/io/data.py

+61-58
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
import numpy as np
99
import datetime as dt
1010
import urllib
11-
import urllib2
1211
import time
13-
import warnings
12+
from contextlib import closing
13+
from urllib2 import urlopen
1414

1515
from zipfile import ZipFile
1616
from pandas.util.py3compat import StringIO, BytesIO, bytes_to_str
@@ -109,10 +109,11 @@ def get_quote_yahoo(symbols):
109109

110110
data = dict(zip(codes.keys(), [[] for i in range(len(codes))]))
111111

112-
urlStr = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (
113-
sym_list, request)
112+
url_str = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (sym_list,
113+
request)
114114

115-
lines = urllib2.urlopen(urlStr).readlines()
115+
with closing(urlopen(url_str)) as url:
116+
lines = url.readlines()
116117

117118
for line in lines:
118119
fields = line.decode('utf-8').strip().split(',')
@@ -151,29 +152,29 @@ def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
151152

152153
yahoo_URL = 'http://ichart.yahoo.com/table.csv?'
153154

154-
url = yahoo_URL + 's=%s' % sym + \
155-
'&a=%s' % (start.month - 1) + \
156-
'&b=%s' % start.day + \
157-
'&c=%s' % start.year + \
158-
'&d=%s' % (end.month - 1) + \
159-
'&e=%s' % end.day + \
160-
'&f=%s' % end.year + \
161-
'&g=d' + \
162-
'&ignore=.csv'
163-
164-
for _ in range(retry_count):
165-
resp = urllib2.urlopen(url)
166-
if resp.code == 200:
167-
lines = resp.read()
168-
rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
169-
parse_dates=True)[::-1]
170-
171-
# Yahoo! Finance sometimes does this awesome thing where they
172-
# return 2 rows for the most recent business day
173-
if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover
174-
rs = rs[:-1]
175-
176-
return rs
155+
url = (yahoo_URL + 's=%s' % sym +
156+
'&a=%s' % (start.month - 1) +
157+
'&b=%s' % start.day +
158+
'&c=%s' % start.year +
159+
'&d=%s' % (end.month - 1) +
160+
'&e=%s' % end.day +
161+
'&f=%s' % end.year +
162+
'&g=d' +
163+
'&ignore=.csv')
164+
165+
for _ in xrange(retry_count):
166+
with closing(urlopen(url)) as resp:
167+
if resp.code == 200:
168+
lines = resp.read()
169+
rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
170+
parse_dates=True)[::-1]
171+
172+
# Yahoo! Finance sometimes does this awesome thing where they
173+
# return 2 rows for the most recent business day
174+
if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover
175+
rs = rs[:-1]
176+
177+
return rs
177178

178179
time.sleep(pause)
179180

@@ -198,17 +199,19 @@ def _get_hist_google(sym=None, start=None, end=None, retry_count=3,
198199
google_URL = 'http://www.google.com/finance/historical?'
199200

200201
# www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv
201-
url = google_URL + urllib.urlencode({"q": sym, \
202-
"startdate": start.strftime('%b %d, %Y'), \
203-
"enddate": end.strftime('%b %d, %Y'), "output": "csv" })
204-
for _ in range(retry_count):
205-
resp = urllib2.urlopen(url)
206-
if resp.code == 200:
207-
lines = resp.read()
208-
rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
209-
parse_dates=True)[::-1]
210-
211-
return rs
202+
url = google_URL + urllib.urlencode({"q": sym,
203+
"startdate": start.strftime('%b %d, '
204+
'%Y'),
205+
"enddate": end.strftime('%b %d, %Y'),
206+
"output": "csv"})
207+
for _ in xrange(retry_count):
208+
with closing(urlopen(url)) as resp:
209+
if resp.code == 200:
210+
lines = resp.read()
211+
rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
212+
parse_dates=True)[::-1]
213+
214+
return rs
212215

213216
time.sleep(pause)
214217

@@ -280,19 +283,19 @@ def get_components_yahoo(idx_sym):
280283
'&e=.csv&h={2}'
281284

282285
idx_mod = idx_sym.replace('^', '@%5E')
283-
urlStr = url.format(idx_mod, stats, 1)
286+
url_str = url.format(idx_mod, stats, 1)
284287

285288
idx_df = DataFrame()
286289
mask = [True]
287290
comp_idx = 1
288291

289-
#LOOP across component index structure,
290-
#break when no new components are found
291-
while (True in mask):
292-
urlStr = url.format(idx_mod, stats, comp_idx)
293-
lines = (urllib.urlopen(urlStr).read().decode('utf-8').strip().
294-
strip('"').split('"\r\n"'))
295-
292+
# LOOP across component index structure,
293+
# break when no new components are found
294+
while True in mask:
295+
url_str = url.format(idx_mod, stats, comp_idx)
296+
with closing(urlopen(url_str)) as resp:
297+
raw = resp.read()
298+
lines = raw.decode('utf-8').strip().strip('"').split('"\r\n"')
296299
lines = [line.strip().split('","') for line in lines]
297300

298301
temp_df = DataFrame(lines, columns=['ticker', 'name', 'exchange'])
@@ -468,11 +471,11 @@ def get_data_fred(name=None, start=dt.datetime(2010, 1, 1),
468471

469472
fred_URL = "http://research.stlouisfed.org/fred2/series/"
470473

471-
url = fred_URL + '%s' % name + \
472-
'/downloaddata/%s' % name + '.csv'
473-
data = read_csv(urllib.urlopen(url), index_col=0, parse_dates=True,
474-
header=None, skiprows=1, names=["DATE", name],
475-
na_values='.')
474+
url = fred_URL + '%s' % name + '/downloaddata/%s' % name + '.csv'
475+
with closing(urlopen(url)) as resp:
476+
data = read_csv(resp, index_col=0, parse_dates=True,
477+
header=None, skiprows=1, names=["DATE", name],
478+
na_values='.')
476479
try:
477480
return data.truncate(start, end)
478481
except KeyError:
@@ -489,9 +492,9 @@ def get_data_famafrench(name, start=None, end=None):
489492
# path of zip files
490493
zipFileURL = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/"
491494

492-
url = urllib.urlopen(zipFileURL + name + ".zip")
493-
zipfile = ZipFile(StringIO(url.read()))
494-
data = zipfile.open(name + ".txt").readlines()
495+
with closing(urlopen(zipFileURL + name + ".zip")) as url:
496+
with closing(ZipFile(StringIO(url.read()))) as zf:
497+
data = zf.read(name + ".txt").splitlines()
495498

496499
file_edges = np.where(np.array([len(d) for d in data]) == 2)[0]
497500

@@ -638,7 +641,7 @@ def get_options_data(self, month=None, year=None, expiry=None):
638641
url = str('http://finance.yahoo.com/q/op?s=' + self.symbol +
639642
'+Options')
640643

641-
parsed = parse(urllib2.urlopen(url))
644+
parsed = parse(url)
642645
doc = parsed.getroot()
643646
tables = doc.findall('.//table')
644647
calls = tables[9]
@@ -709,7 +712,7 @@ def get_call_data(self, month=None, year=None, expiry=None):
709712
url = str('http://finance.yahoo.com/q/op?s=' + self.symbol +
710713
'+Options')
711714

712-
parsed = parse(urllib2.urlopen(url))
715+
parsed = parse(url)
713716
doc = parsed.getroot()
714717
tables = doc.findall('.//table')
715718
calls = tables[9]
@@ -777,7 +780,7 @@ def get_put_data(self, month=None, year=None, expiry=None):
777780
url = str('http://finance.yahoo.com/q/op?s=' + self.symbol +
778781
'+Options')
779782

780-
parsed = parse(urllib2.urlopen(url))
783+
parsed = parse(url)
781784
doc = parsed.getroot()
782785
tables = doc.findall('.//table')
783786
puts = tables[13]

0 commit comments

Comments
 (0)