Skip to content

Replace Yahoo iCharts API #331

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas_datareader/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = version = '0.4.0'
__version__ = version = '0.4.1'

from .data import (get_components_yahoo, get_data_famafrench, get_data_google, get_data_yahoo, get_data_enigma, # noqa
get_data_yahoo_actions, get_quote_google, get_quote_yahoo, DataReader, Options) # noqa
25 changes: 21 additions & 4 deletions pandas_datareader/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def __init__(self, symbols, start=None, end=None,
self.retry_count = retry_count
self.pause = pause
self.timeout = timeout
self.pause_multiplier = 1
self.session = _init_session(session, retry_count)

@property
Expand Down Expand Up @@ -85,6 +86,10 @@ def _read_url_as_StringIO(self, url, params=None):
response = self._get_response(url, params=params)
text = self._sanitize_response(response)
out = StringIO()
if len(text) == 0:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally return a more informative error (e.g. service name / url)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the error to include subclass and requested URL, and cleaned up my PR.

service = self.__class__.__name__
raise IOError("{} request returned no data; check URL for invalid "
"inputs: {}".format(service, self.url))
if isinstance(text, compat.binary_type):
out.write(bytes_to_str(text))
else:
Expand All @@ -99,7 +104,7 @@ def _sanitize_response(response):
"""
return response.content

def _get_response(self, url, params=None):
def _get_response(self, url, params=None, headers=None):
""" send raw HTTP request to get requests.Response from the specified url
Parameters
----------
Expand All @@ -110,17 +115,29 @@ def _get_response(self, url, params=None):
"""

# initial attempt + retry
pause = self.pause
for i in range(self.retry_count + 1):
response = self.session.get(url, params=params)
response = self.session.get(url, params=params, headers=headers)
if response.status_code == requests.codes.ok:
return response
time.sleep(self.pause)
time.sleep(pause)

# Increase time between subsequent requests, per subclass.
pause *= self.pause_multiplier
# Get a new breadcrumb if necessary, in case ours is invalidated
if isinstance(params, list) and 'crumb' in params:
params['crumb'] = self._get_crumb(self.retry_count)
if params is not None and len(params) > 0:
url = url + "?" + urlencode(params)
raise RemoteDataError('Unable to read URL: {0}'.format(url))

def _get_crumb(self, *args):
""" To be implemented by subclass """
raise NotImplementedError("Subclass has not implemented method.")

def _read_lines(self, out):
rs = read_csv(out, index_col=0, parse_dates=True, na_values='-')[::-1]
rs = read_csv(out, index_col=0, parse_dates=True,
na_values=('-', 'null'))[::-1]
# Yahoo! Finance sometimes does this awesome thing where they
# return 2 rows for the most recent business day
if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover
Expand Down
10 changes: 5 additions & 5 deletions pandas_datareader/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from pandas_datareader.yahoo.daily import YahooDailyReader
from pandas_datareader.yahoo.quotes import YahooQuotesReader
from pandas_datareader.yahoo.actions import YahooActionReader
from pandas_datareader.yahoo.actions import (YahooActionReader, YahooDivReader)
from pandas_datareader.yahoo.components import _get_data as get_components_yahoo # noqa
from pandas_datareader.yahoo.options import Options as YahooOptions
from pandas_datareader.google.options import Options as GoogleOptions
Expand Down Expand Up @@ -121,10 +121,10 @@ def DataReader(name, data_source=None, start=None, end=None,
retry_count=retry_count, pause=pause,
session=session).read()
elif data_source == "yahoo-dividends":
return YahooDailyReader(symbols=name, start=start, end=end,
adjust_price=False, chunksize=25,
retry_count=retry_count, pause=pause,
session=session, interval='v').read()
return YahooDivReader(symbols=name, start=start, end=end,
adjust_price=False, chunksize=25,
retry_count=retry_count, pause=pause,
session=session, interval='d').read()

elif data_source == "google":
return GoogleDailyReader(symbols=name, start=start, end=end,
Expand Down
36 changes: 24 additions & 12 deletions pandas_datareader/tests/yahoo/test_yahoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@

import pandas_datareader.data as web
from pandas_datareader.data import YahooDailyReader
from pandas_datareader._utils import RemoteDataError
from pandas_datareader.yahoo.quotes import _yahoo_codes
from pandas_datareader._testing import skip_on_exception


class TestYahoo(object):
Expand Down Expand Up @@ -87,18 +89,21 @@ def test_get_components_nasdaq_100(self): # pragma: no cover
index=['@^NDX'])
tm.assert_frame_equal(df, expected)

@skip_on_exception(RemoteDataError)
def test_get_data_single_symbol(self):
# single symbol
# http://finance.yahoo.com/q/hp?s=GOOG&a=09&b=08&c=2010&d=09&e=10&f=2010&g=d
# just test that we succeed
web.get_data_yahoo('GOOG')

@skip_on_exception(RemoteDataError)
def test_get_data_adjust_price(self):
goog = web.get_data_yahoo('GOOG')
goog_adj = web.get_data_yahoo('GOOG', adjust_price=True)
assert 'Adj Close' not in goog_adj.columns
assert (goog['Open'] * goog_adj['Adj_Ratio']).equals(goog_adj['Open'])

@skip_on_exception(RemoteDataError)
def test_get_data_interval(self):
# daily interval data
pan = web.get_data_yahoo('XOM', '2013-01-01',
Expand All @@ -108,43 +113,42 @@ def test_get_data_interval(self):
# weekly interval data
pan = web.get_data_yahoo('XOM', '2013-01-01',
'2013-12-31', interval='w')
assert len(pan) == 53
assert len(pan) == 52

# montly interval data
pan = web.get_data_yahoo('XOM', '2013-01-01',
# monthly interval data
pan = web.get_data_yahoo('XOM', '2012-12-31',
'2013-12-31', interval='m')
assert len(pan) == 12

# dividend data
pan = web.get_data_yahoo('XOM', '2013-01-01',
'2013-12-31', interval='v')
assert len(pan) == 4

# test fail on invalid interval
with pytest.raises(ValueError):
web.get_data_yahoo('XOM', interval='NOT VALID')

@skip_on_exception(RemoteDataError)
def test_get_data_multiple_symbols(self):
# just test that we succeed
sl = ['AAPL', 'AMZN', 'GOOG']
web.get_data_yahoo(sl, '2012')

@skip_on_exception(RemoteDataError)
def test_get_data_multiple_symbols_two_dates(self):
pan = web.get_data_yahoo(['GE', 'MSFT', 'INTC'], 'JAN-01-12',
'JAN-31-12')
result = pan.Close.ix['01-18-12']
assert len(result) == 3
result = pan.Close['01-18-12'].T
assert result.size == 3

# sanity checking
assert np.issubdtype(result.dtype, np.floating)
assert result.dtypes.all() == np.floating

expected = np.array([[18.99, 28.4, 25.18],
[18.58, 28.31, 25.13],
[19.03, 28.16, 25.52],
[18.81, 28.82, 25.87]])
result = pan.Open.ix['Jan-15-12':'Jan-20-12']
df = pan.Open
result = df[(df.index >= 'Jan-15-12') & (df.index <= 'Jan-20-12')]
assert expected.shape == result.shape

@skip_on_exception(RemoteDataError)
def test_get_date_ret_index(self):
pan = web.get_data_yahoo(['GE', 'INTC', 'IBM'], '1977', '1987',
ret_index=True)
Expand All @@ -158,6 +162,7 @@ def test_get_date_ret_index(self):
# sanity checking
assert np.issubdtype(pan.values.dtype, np.floating)

@skip_on_exception(RemoteDataError)
def test_get_data_yahoo_actions(self):
start = datetime(1990, 1, 1)
end = datetime(2000, 4, 5)
Expand Down Expand Up @@ -191,6 +196,7 @@ def test_yahoo_reader_class(self):
r = YahooDailyReader('GOOG', session=session)
assert r.session is session

@skip_on_exception(RemoteDataError)
def test_yahoo_DataReader(self):
start = datetime(2010, 1, 1)
end = datetime(2015, 5, 9)
Expand All @@ -212,8 +218,14 @@ def test_yahoo_DataReader(self):
0.47, 0.43571, 0.43571, 0.43571,
0.43571, 0.37857, 0.37857, 0.37857]},
index=exp_idx)
exp.index.name = 'Date'

exp = exp.sort_index(axis=1)
result = result.sort_index(axis=1)

tm.assert_frame_equal(result, exp)

@skip_on_exception(RemoteDataError)
def test_yahoo_DataReader_multi(self):
start = datetime(2010, 1, 1)
end = datetime(2015, 5, 9)
Expand Down
96 changes: 44 additions & 52 deletions pandas_datareader/yahoo/actions.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,53 @@
import csv
from pandas import to_datetime, DataFrame
from pandas import (concat, DataFrame)
from pandas_datareader.yahoo.daily import YahooDailyReader

from pandas_datareader.base import _DailyBaseReader


class YahooActionReader(_DailyBaseReader):

class YahooActionReader(YahooDailyReader):
"""
Returns DataFrame of historical corporate actions (dividends and stock
splits) from symbols, over date range, start to end. All dates in the
resulting DataFrame correspond with dividend and stock split ex-dates.
"""
def read(self):
dividends = YahooDivReader(symbols=self.symbols,
start=self.start,
end=self.end,
retry_count=self.retry_count,
pause=self.pause,
session=self.session).read()
# Add a label column so we can combine our two DFs
if isinstance(dividends, DataFrame):
dividends["action"] = "DIVIDEND"
dividends = dividends.rename(columns={'Dividends': 'value'})

splits = YahooSplitReader(symbols=self.symbols,
start=self.start,
end=self.end,
retry_count=self.retry_count,
pause=self.pause,
session=self.session).read()
# Add a label column so we can combine our two DFs
if isinstance(splits, DataFrame):
splits["action"] = "SPLIT"
splits = splits.rename(columns={'Stock Splits': 'value'})
# Converts fractional form splits (i.e. "2/1") into conversion
# ratios, then take the reciprocal
splits['value'] = splits.apply(lambda x: 1/eval(x['value']), axis=1) # noqa

output = concat([dividends, splits]).sort_index(ascending=False)

return output


class YahooDivReader(YahooDailyReader):

@property
def service(self):
return 'div'


class YahooSplitReader(YahooDailyReader):

@property
def url(self):
return 'http://ichart.finance.yahoo.com/x'

def _get_params(self, symbols=None):
params = {
's': self.symbols,
'a': self.start.month - 1,
'b': self.start.day,
'c': self.start.year,
'd': self.end.month - 1,
'e': self.end.day,
'f': self.end.year,
'g': 'v'
}
return params

def _read_lines(self, out):
actions_index = []
actions_entries = []

for line in csv.reader(out.readlines()):
# Ignore lines that aren't dividends or splits (Yahoo
# add a bunch of irrelevant fields.)
if len(line) != 3 or line[0] not in ('DIVIDEND', 'SPLIT'):
continue

action, date, value = line
if action == 'DIVIDEND':
actions_index.append(to_datetime(date))
actions_entries.append({
'action': action,
'value': float(value)
})
elif action == 'SPLIT' and ':' in value:
# Convert the split ratio to a fraction. For example a
# 4:1 split expressed as a fraction is 1/4 = 0.25.
denominator, numerator = value.split(':', 1)
split_fraction = float(numerator) / float(denominator)

actions_index.append(to_datetime(date))
actions_entries.append({
'action': action,
'value': split_fraction
})

return DataFrame(actions_entries, index=actions_index)
def service(self):
return 'split'
Loading