Skip to content

Commit 6f4a22f

Browse files
authored
Replace Yahoo iCharts API (#355)
* Replaces ichart API for single-stock price exports from Yahoo, multi-stock still failing (#315) Restores change necessary for Google to function Fixes yahoo-actions per API endpoint update Update regex pattern for crumbs, per heyuhere's review 'v' is no longer a valid interval value Fixes Yahoo intervals and cases where the Yahoo cookie could not be extracted. Implements multi-stock queries to Yahoo API Adds a pause multiplier for subsequent requests from Yahoo, error handling for empty data requests, and updates some test logic for pandas 0.20.x (notably ix deprecation) Check object type before checking contents Replacement regex logic for additional Yahoo cookie token structures, per chris-b1 Improved error handling and refactoring test to best practices, per jreback review. closes #315 * better error handling after get_response * docs for 0.5.0 * remove deprecation warnings: ix usage -> loc/iloc remove deprecation warnings: sortlevel usage -> sort_index * more resource cleaning * update changelog * skip enigma tests locally if no api key * fixturize test_yahoo_options * add in test.sh script * CI: use trusty dist
1 parent b4d0a17 commit 6f4a22f

26 files changed

+450
-272
lines changed

.travis.yml

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,33 +5,30 @@ language: python
55
matrix:
66
fast_finish: true
77
include:
8-
- os: linux
8+
- dist: trusty
99
env:
1010
- PYTHON=2.7 PANDAS=0.17.1
11-
- os: linux
11+
- dist: trusty
1212
env:
1313
- PYTHON=2.7 PANDAS=0.19.2
14-
- os: linux
14+
- dist: trusty
1515
env:
1616
- PYTHON=3.5 PANDAS=0.17.1
17-
- os: linux
17+
- dist: trusty
1818
env:
1919
- PYTHON=3.5 PANDAS=0.18.1
20-
- os: linux
21-
env:
22-
- PYTHON=3.5 PANDAS=0.19.2
23-
- os: linux
20+
- dist: trusty
2421
env:
2522
- PYTHON=3.6 PANDAS=0.19.2
26-
- os: linux
23+
- dist: trusty
2724
env:
28-
- PYTHON=3.6 PANDAS=0.20.1
25+
- PYTHON=3.6 PANDAS=0.20.2
2926
# In allow failures
30-
- os: linux
27+
- dist: trusty
3128
env:
3229
- PYTHON=3.6 PANDAS="MASTER"
3330
allow_failures:
34-
- os: linux
31+
- dist: trusty
3532
env:
3633
- PYTHON=3.6 PANDAS="MASTER"
3734

@@ -70,7 +67,7 @@ install:
7067

7168
script:
7269
- export ENIGMA_API_KEY=$ENIGMA_API_KEY
73-
- pytest -s --cov=pandas_datareader --cov-report xml:/tmp/cov-datareader.xml --junitxml=/tmp/datareader.xml
70+
- pytest -s -r xX --cov=pandas_datareader --cov-report xml:/tmp/cov-datareader.xml --junitxml=/tmp/datareader.xml
7471
- flake8 --version
7572
- flake8 pandas_datareader
7673

docs/source/whatsnew.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ What's New
1818

1919
These are new features and improvements of note in each release.
2020

21+
.. include:: whatsnew/v0.5.0.txt
2122
.. include:: whatsnew/v0.4.0.txt
2223
.. include:: whatsnew/v0.3.0.txt
2324
.. include:: whatsnew/v0.2.1.txt

docs/source/whatsnew/v0.5.0.txt

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
.. _whatsnew_050:
2+
3+
v0.5.0 (July ??, 2017)
4+
----------------------
5+
6+
This is a major release from 0.4.0.
7+
8+
Highlights include:
9+
10+
.. contents:: What's new in v0.5.0
11+
:local:
12+
:backlinks: none
13+
14+
.. _whatsnew_050.enhancements:
15+
16+
Enhancements
17+
~~~~~~~~~~~~
18+
19+
- Compat with new Yahoo API (:issue:`315`)
20+
21+
.. _whatsnew_050.bug_fixes:
22+
23+
Bug Fixes
24+
~~~~~~~~~
25+
26+
- web sessions are closed properly at the end of use (:issue:`355`)
27+
- Handle commas in large price quotes (:issue:`345`)
28+
- Test suite fixes for test_get_options_data (:issue:`352`)
29+
- Test suite fixes for test_wdi_download (:issue:`350`)

pandas_datareader/base.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,13 @@ def __init__(self, symbols, start=None, end=None,
5353
self.retry_count = retry_count
5454
self.pause = pause
5555
self.timeout = timeout
56+
self.pause_multiplier = 1
5657
self.session = _init_session(session, retry_count)
5758

59+
def close(self):
60+
""" close my session """
61+
self.session.close()
62+
5863
@property
5964
def url(self):
6065
# must be overridden in subclass
@@ -66,7 +71,10 @@ def params(self):
6671

6772
def read(self):
6873
""" read data """
69-
return self._read_one_data(self.url, self.params)
74+
try:
75+
return self._read_one_data(self.url, self.params)
76+
finally:
77+
self.close()
7078

7179
def _read_one_data(self, url, params):
7280
""" read one data from specified URL """
@@ -85,6 +93,10 @@ def _read_url_as_StringIO(self, url, params=None):
8593
response = self._get_response(url, params=params)
8694
text = self._sanitize_response(response)
8795
out = StringIO()
96+
if len(text) == 0:
97+
service = self.__class__.__name__
98+
raise IOError("{} request returned no data; check URL for invalid "
99+
"inputs: {}".format(service, self.url))
88100
if isinstance(text, compat.binary_type):
89101
out.write(bytes_to_str(text))
90102
else:
@@ -99,7 +111,7 @@ def _sanitize_response(response):
99111
"""
100112
return response.content
101113

102-
def _get_response(self, url, params=None):
114+
def _get_response(self, url, params=None, headers=None):
103115
""" send raw HTTP request to get requests.Response from the specified url
104116
Parameters
105117
----------
@@ -110,15 +122,29 @@ def _get_response(self, url, params=None):
110122
"""
111123

112124
# initial attempt + retry
125+
pause = self.pause
113126
for i in range(self.retry_count + 1):
114-
response = self.session.get(url, params=params)
127+
response = self.session.get(url,
128+
params=params,
129+
headers=headers)
115130
if response.status_code == requests.codes.ok:
116131
return response
117-
time.sleep(self.pause)
132+
133+
time.sleep(pause)
134+
135+
# Increase time between subsequent requests, per subclass.
136+
pause *= self.pause_multiplier
137+
# Get a new breadcrumb if necessary, in case ours is invalidated
138+
if isinstance(params, list) and 'crumb' in params:
139+
params['crumb'] = self._get_crumb(self.retry_count)
118140
if params is not None and len(params) > 0:
119141
url = url + "?" + urlencode(params)
120142
raise RemoteDataError('Unable to read URL: {0}'.format(url))
121143

144+
def _get_crumb(self, *args):
145+
""" To be implemented by subclass """
146+
raise NotImplementedError("Subclass has not implemented method.")
147+
122148
def _read_lines(self, out):
123149
rs = read_csv(out, index_col=0, parse_dates=True, na_values='-')[::-1]
124150
# Yahoo! Finance sometimes does this awesome thing where they

pandas_datareader/data.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from pandas_datareader.yahoo.daily import YahooDailyReader
1111
from pandas_datareader.yahoo.quotes import YahooQuotesReader
12-
from pandas_datareader.yahoo.actions import YahooActionReader
12+
from pandas_datareader.yahoo.actions import (YahooActionReader, YahooDivReader)
1313
from pandas_datareader.yahoo.components import _get_data as get_components_yahoo # noqa
1414
from pandas_datareader.yahoo.options import Options as YahooOptions
1515
from pandas_datareader.google.options import Options as GoogleOptions
@@ -121,10 +121,10 @@ def DataReader(name, data_source=None, start=None, end=None,
121121
retry_count=retry_count, pause=pause,
122122
session=session).read()
123123
elif data_source == "yahoo-dividends":
124-
return YahooDailyReader(symbols=name, start=start, end=end,
125-
adjust_price=False, chunksize=25,
126-
retry_count=retry_count, pause=pause,
127-
session=session, interval='v').read()
124+
return YahooDivReader(symbols=name, start=start, end=end,
125+
adjust_price=False, chunksize=25,
126+
retry_count=retry_count, pause=pause,
127+
session=session, interval='d').read()
128128

129129
elif data_source == "google":
130130
return GoogleDailyReader(symbols=name, start=start, end=end,

pandas_datareader/edgar.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,12 @@ def _fix_old_file_paths(self, path):
150150
return path
151151

152152
def read(self):
153+
try:
154+
return self._read()
155+
finally:
156+
self.close()
157+
158+
def _read(self):
153159
try:
154160
self._sec_ftp_session = FTP(_SEC_FTP, timeout=self.timeout)
155161
self._sec_ftp_session.login()

pandas_datareader/enigma.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,12 @@ def extract_export_url(self, delay=10, max_attempts=10):
100100
return resp.json()[self.export_key]
101101

102102
def read(self):
103+
try:
104+
return self._read()
105+
finally:
106+
self.close()
107+
108+
def _read(self):
103109
export_gzipped_req = self._request(self.extract_export_url())
104110
decompressed_data = self._decompress_export(
105111
export_gzipped_req.content).decode("utf-8")

pandas_datareader/fred.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ def url(self):
2020
return "http://research.stlouisfed.org/fred2/series/"
2121

2222
def read(self):
23+
try:
24+
return self._read()
25+
finally:
26+
self.close()
27+
28+
def _read(self):
2329
if not is_list_like(self.symbols):
2430
names = [self.symbols]
2531
else:

pandas_datareader/tests/google/test_google.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,21 +83,21 @@ def assert_option_result(self, df):
8383

8484
def test_get_quote_string(self):
8585
df = web.get_quote_google('GOOG')
86-
assert df.ix['GOOG']['last'] > 0.0
86+
assert df.loc['GOOG', 'last'] > 0.0
8787
tm.assert_index_equal(df.index, pd.Index(['GOOG']))
8888
self.assert_option_result(df)
8989

9090
def test_get_quote_stringlist(self):
9191
df = web.get_quote_google(['GOOG', 'AMZN', 'GOOG'])
92-
assert_series_equal(df.ix[0], df.ix[2])
92+
assert_series_equal(df.iloc[0], df.iloc[2])
9393
tm.assert_index_equal(df.index, pd.Index(['GOOG', 'AMZN', 'GOOG']))
9494
self.assert_option_result(df)
9595

9696
def test_get_goog_volume(self):
9797
for locale in self.locales:
9898
with tm.set_locale(locale):
9999
df = web.get_data_google('GOOG').sort_index()
100-
assert df.Volume.ix['JAN-02-2015'] == 1446662
100+
assert df.Volume.loc['JAN-02-2015'] == 1446662
101101

102102
def test_get_multi1(self):
103103
for locale in self.locales:
@@ -130,13 +130,13 @@ def test_get_multi2(self):
130130
with tm.set_locale(locale):
131131
pan = web.get_data_google(['GE', 'MSFT', 'INTC'],
132132
'JAN-01-12', 'JAN-31-12')
133-
result = pan.Close.ix['01-18-12']
133+
result = pan.Close.loc['01-18-12']
134134
assert_n_failed_equals_n_null_columns(w, result)
135135

136136
# sanity checking
137137

138138
assert np.issubdtype(result.dtype, np.floating)
139-
result = pan.Open.ix['Jan-15-12':'Jan-20-12']
139+
result = pan.Open.loc['Jan-15-12':'Jan-20-12']
140140

141141
assert result.shape == (4, 3)
142142
assert_n_failed_equals_n_null_columns(w, result)
@@ -158,7 +158,7 @@ def test_unicode_date(self):
158158
def test_google_reader_class(self):
159159
r = GoogleDailyReader('GOOG')
160160
df = r.read()
161-
assert df.Volume.ix['JAN-02-2015'] == 1446662
161+
assert df.Volume.loc['JAN-02-2015'] == 1446662
162162

163163
session = requests.Session()
164164
r = GoogleDailyReader('GOOG', session=session)

pandas_datareader/tests/google/test_options.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
import pandas.util.testing as tm
88

99
import pandas_datareader.data as web
10-
from pandas_datareader._utils import RemoteDataError
11-
from pandas_datareader._testing import skip_on_exception
1210

1311

1412
class TestGoogleOptions(object):
@@ -18,7 +16,6 @@ def setup_class(cls):
1816
# GOOG has monthlies
1917
cls.goog = web.Options('GOOG', 'google')
2018

21-
@skip_on_exception(RemoteDataError)
2219
def test_get_options_data(self):
2320
options = self.goog.get_options_data(expiry=self.goog.expiry_dates[0])
2421

@@ -46,7 +43,6 @@ def test_get_options_data_yearmonth(self):
4643
with pytest.raises(NotImplementedError):
4744
self.goog.get_options_data(month=1, year=2016)
4845

49-
@skip_on_exception(RemoteDataError)
5046
def test_expiry_dates(self):
5147
dates = self.goog.expiry_dates
5248

pandas_datareader/tests/io/test_jsdmx.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def test_land_use(self):
5050
result = read_jsdmx(os.path.join(self.dirpath, 'jsdmx',
5151
'land_use.json'))
5252
assert isinstance(result, pd.DataFrame)
53-
result = result.ix['2010':'2011']
53+
result = result.loc['2010':'2011']
5454

5555
exp_col = pd.MultiIndex.from_product([
5656
['Japan', 'United States'],

pandas_datareader/tests/test_data.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import pandas_datareader.data as web
55

66
from pandas import DataFrame
7+
from pandas_datareader._utils import RemoteDataError
8+
from pandas_datareader._testing import skip_on_exception
79
from pandas_datareader.data import DataReader
810

911

@@ -15,10 +17,13 @@ def test_options_source_warning(self):
1517

1618

1719
class TestDataReader(object):
20+
21+
@skip_on_exception(RemoteDataError)
1822
def test_read_yahoo(self):
1923
gs = DataReader("GS", "yahoo")
2024
assert isinstance(gs, DataFrame)
2125

26+
@pytest.mark.xfail(RemoteDataError, reason="failing after #355")
2227
def test_read_yahoo_dividends(self):
2328
gs = DataReader("GS", "yahoo-dividends")
2429
assert isinstance(gs, DataFrame)

pandas_datareader/tests/test_edgar.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
import pandas.util.testing as tm
55
import pandas_datareader.data as web
66

7-
from pandas_datareader._utils import RemoteDataError
8-
from pandas_datareader._testing import skip_on_exception
9-
107

118
class TestEdgarIndex(object):
129

@@ -16,7 +13,6 @@ def setup_class(cls):
1613
# Disabling tests until re-write.
1714
pytest.skip("Disabling tests until re-write.")
1815

19-
@skip_on_exception(RemoteDataError)
2016
def test_get_full_index(self):
2117
ed = web.DataReader('full', 'edgar-index')
2218
assert len(ed) > 1000
@@ -25,7 +21,6 @@ def test_get_full_index(self):
2521
'date_filed', 'filename'], dtype='object')
2622
tm.assert_index_equal(ed.columns, exp_columns)
2723

28-
@skip_on_exception(RemoteDataError)
2924
def test_get_nonzip_index_and_low_date(self):
3025
ed = web.DataReader('daily', 'edgar-index', '1994-06-30',
3126
'1994-07-02')
@@ -38,14 +33,12 @@ def test_get_nonzip_index_and_low_date(self):
3833
'filename'], dtype='object')
3934
tm.assert_index_equal(ed.columns, exp_columns)
4035

41-
@skip_on_exception(RemoteDataError)
4236
def test_get_gz_index_and_no_date(self):
4337
# TODO: Rewrite, as this test causes Travis to timeout.
4438

4539
ed = web.DataReader('daily', 'edgar-index')
4640
assert len(ed) > 2000
4741

48-
@skip_on_exception(RemoteDataError)
4942
def test_6_digit_date(self):
5043
ed = web.DataReader('daily', 'edgar-index', start='1998-05-18',
5144
end='1998-05-18')

0 commit comments

Comments
 (0)