Skip to content

Replace Yahoo iCharts API #331

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions pandas_datareader/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def __init__(self, symbols, start=None, end=None,
self.retry_count = retry_count
self.pause = pause
self.timeout = timeout
self.pause_multiplier = 1
self.session = _init_session(session, retry_count)

@property
Expand Down Expand Up @@ -85,6 +86,10 @@ def _read_url_as_StringIO(self, url, params=None):
response = self._get_response(url, params=params)
text = self._sanitize_response(response)
out = StringIO()
if len(text) == 0:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally return a more informative error (e.g. service name / url)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the error to include subclass and requested URL, and cleaned up my PR.

service = self.__class__.__name__
raise IOError("{} request returned no data; check URL for invalid "
"inputs: {}".format(service, self.url))
if isinstance(text, compat.binary_type):
out.write(bytes_to_str(text))
else:
Expand All @@ -99,7 +104,7 @@ def _sanitize_response(response):
"""
return response.content

def _get_response(self, url, params=None):
def _get_response(self, url, params=None, headers=None):
""" send raw HTTP request to get requests.Response from the specified url
Parameters
----------
Expand All @@ -110,15 +115,26 @@ def _get_response(self, url, params=None):
"""

# initial attempt + retry
pause = self.pause
for i in range(self.retry_count + 1):
response = self.session.get(url, params=params)
response = self.session.get(url, params=params, headers=headers)
if response.status_code == requests.codes.ok:
return response
time.sleep(self.pause)
time.sleep(pause)

# Increase time between subsequent requests, per subclass.
pause *= self.pause_multiplier
# Get a new breadcrumb if necessary, in case ours is invalidated
if isinstance(params, list) and 'crumb' in params:
params['crumb'] = self._get_crumb(self.retry_count)
if params is not None and len(params) > 0:
url = url + "?" + urlencode(params)
raise RemoteDataError('Unable to read URL: {0}'.format(url))

def _get_crumb(self, *args):
""" To be implemented by subclass """
raise NotImplementedError("Subclass has not implemented method.")

def _read_lines(self, out):
rs = read_csv(out, index_col=0, parse_dates=True, na_values='-')[::-1]
# Yahoo! Finance sometimes does this awesome thing where they
Expand Down
10 changes: 5 additions & 5 deletions pandas_datareader/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from pandas_datareader.yahoo.daily import YahooDailyReader
from pandas_datareader.yahoo.quotes import YahooQuotesReader
from pandas_datareader.yahoo.actions import YahooActionReader
from pandas_datareader.yahoo.actions import (YahooActionReader, YahooDivReader)
from pandas_datareader.yahoo.components import _get_data as get_components_yahoo # noqa
from pandas_datareader.yahoo.options import Options as YahooOptions
from pandas_datareader.google.options import Options as GoogleOptions
Expand Down Expand Up @@ -121,10 +121,10 @@ def DataReader(name, data_source=None, start=None, end=None,
retry_count=retry_count, pause=pause,
session=session).read()
elif data_source == "yahoo-dividends":
return YahooDailyReader(symbols=name, start=start, end=end,
adjust_price=False, chunksize=25,
retry_count=retry_count, pause=pause,
session=session, interval='v').read()
return YahooDivReader(symbols=name, start=start, end=end,
adjust_price=False, chunksize=25,
retry_count=retry_count, pause=pause,
session=session, interval='d').read()

elif data_source == "google":
return GoogleDailyReader(symbols=name, start=start, end=end,
Expand Down
22 changes: 10 additions & 12 deletions pandas_datareader/tests/yahoo/test_yahoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,18 +108,13 @@ def test_get_data_interval(self):
# weekly interval data
pan = web.get_data_yahoo('XOM', '2013-01-01',
'2013-12-31', interval='w')
assert len(pan) == 53
assert len(pan) == 52

# montly interval data
pan = web.get_data_yahoo('XOM', '2013-01-01',
# monthly interval data
pan = web.get_data_yahoo('XOM', '2012-12-31',
'2013-12-31', interval='m')
assert len(pan) == 12

# dividend data
pan = web.get_data_yahoo('XOM', '2013-01-01',
'2013-12-31', interval='v')
assert len(pan) == 4

# test fail on invalid interval
with pytest.raises(ValueError):
web.get_data_yahoo('XOM', interval='NOT VALID')
Expand All @@ -132,17 +127,18 @@ def test_get_data_multiple_symbols(self):
def test_get_data_multiple_symbols_two_dates(self):
pan = web.get_data_yahoo(['GE', 'MSFT', 'INTC'], 'JAN-01-12',
'JAN-31-12')
result = pan.Close.ix['01-18-12']
assert len(result) == 3
result = pan.Close['01-18-12'].T
assert result.size == 3

# sanity checking
assert np.issubdtype(result.dtype, np.floating)
assert result.dtypes.all() == np.floating

expected = np.array([[18.99, 28.4, 25.18],
[18.58, 28.31, 25.13],
[19.03, 28.16, 25.52],
[18.81, 28.82, 25.87]])
result = pan.Open.ix['Jan-15-12':'Jan-20-12']
df = pan.Open
result = df[(df.index >= 'Jan-15-12') & (df.index <= 'Jan-20-12')]
assert expected.shape == result.shape

def test_get_date_ret_index(self):
Expand Down Expand Up @@ -212,6 +208,8 @@ def test_yahoo_DataReader(self):
0.47, 0.43571, 0.43571, 0.43571,
0.43571, 0.37857, 0.37857, 0.37857]},
index=exp_idx)
exp.index.name = 'Date'

tm.assert_frame_equal(result, exp)

def test_yahoo_DataReader_multi(self):
Expand Down
96 changes: 44 additions & 52 deletions pandas_datareader/yahoo/actions.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,53 @@
import csv
from pandas import to_datetime, DataFrame
from pandas import (concat, DataFrame)
from pandas_datareader.yahoo.daily import YahooDailyReader

from pandas_datareader.base import _DailyBaseReader


class YahooActionReader(_DailyBaseReader):

class YahooActionReader(YahooDailyReader):
"""
Returns DataFrame of historical corporate actions (dividends and stock
splits) from symbols, over date range, start to end. All dates in the
resulting DataFrame correspond with dividend and stock split ex-dates.
"""
def read(self):
dividends = YahooDivReader(symbols=self.symbols,
start=self.start,
end=self.end,
retry_count=self.retry_count,
pause=self.pause,
session=self.session).read()
# Add a label column so we can combine our two DFs
if isinstance(dividends, DataFrame):
dividends["action"] = "DIVIDEND"
dividends = dividends.rename(columns={'Dividends': 'value'})

splits = YahooSplitReader(symbols=self.symbols,
start=self.start,
end=self.end,
retry_count=self.retry_count,
pause=self.pause,
session=self.session).read()
# Add a label column so we can combine our two DFs
if isinstance(splits, DataFrame):
splits["action"] = "SPLIT"
splits = splits.rename(columns={'Stock Splits': 'value'})
# Converts fractional form splits (i.e. "2/1") into conversion
# ratios, then take the reciprocal
splits['value'] = splits.apply(lambda x: 1/eval(x['value']), axis=1) # noqa

output = concat([dividends, splits]).sort_index(ascending=False)

return output


class YahooDivReader(YahooDailyReader):

@property
def service(self):
return 'div'


class YahooSplitReader(YahooDailyReader):

@property
def url(self):
return 'http://ichart.finance.yahoo.com/x'

def _get_params(self, symbols=None):
params = {
's': self.symbols,
'a': self.start.month - 1,
'b': self.start.day,
'c': self.start.year,
'd': self.end.month - 1,
'e': self.end.day,
'f': self.end.year,
'g': 'v'
}
return params

def _read_lines(self, out):
actions_index = []
actions_entries = []

for line in csv.reader(out.readlines()):
# Ignore lines that aren't dividends or splits (Yahoo
# add a bunch of irrelevant fields.)
if len(line) != 3 or line[0] not in ('DIVIDEND', 'SPLIT'):
continue

action, date, value = line
if action == 'DIVIDEND':
actions_index.append(to_datetime(date))
actions_entries.append({
'action': action,
'value': float(value)
})
elif action == 'SPLIT' and ':' in value:
# Convert the split ratio to a fraction. For example a
# 4:1 split expressed as a fraction is 1/4 = 0.25.
denominator, numerator = value.split(':', 1)
split_fraction = float(numerator) / float(denominator)

actions_index.append(to_datetime(date))
actions_entries.append({
'action': action,
'value': split_fraction
})

return DataFrame(actions_entries, index=actions_index)
def service(self):
return 'split'
112 changes: 95 additions & 17 deletions pandas_datareader/yahoo/daily.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
from pandas_datareader.base import _DailyBaseReader
import re
import time
import warnings
import numpy as np
from pandas import Panel
from pandas_datareader.base import (_DailyBaseReader, _in_chunks)
from pandas_datareader._utils import (RemoteDataError, SymbolWarning)


class YahooDailyReader(_DailyBaseReader):
Expand Down Expand Up @@ -39,36 +45,66 @@ class YahooDailyReader(_DailyBaseReader):
"""

def __init__(self, symbols=None, start=None, end=None, retry_count=3,
pause=0.001, session=None, adjust_price=False,
pause=0.35, session=None, adjust_price=False,
ret_index=False, chunksize=25, interval='d'):
super(YahooDailyReader, self).__init__(symbols=symbols,
start=start, end=end,
retry_count=retry_count,
pause=pause, session=session,
chunksize=chunksize)
# Ladder up the wait time between subsequent requests to improve
# probability of a successful retry
self.pause_multiplier = 2.5

self.headers = {
'Connection': 'keep-alive',
'Expires': str(-1),
'Upgrade-Insecure-Requests': str(1),
# Google Chrome:
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36' # noqa
}

self.adjust_price = adjust_price
self.ret_index = ret_index

if interval not in ['d', 'w', 'm', 'v']:
raise ValueError("Invalid interval: valid values are "
"'d', 'w', 'm' and 'v'")
self.interval = interval

if self.interval not in ['d', 'wk', 'mo', 'm', 'w']:
raise ValueError("Invalid interval: valid values are 'd', 'wk' and 'mo'. 'm' and 'w' have been implemented for " # noqa
"backward compatibility. 'v' has been moved to the yahoo-actions or yahoo-dividends APIs.") # noqa
elif self.interval in ['m', 'mo']:
self.pdinterval = 'm'
self.interval = 'mo'
elif self.interval in ['w', 'wk']:
self.pdinterval = 'w'
self.interval = 'wk'

self.interval = '1' + self.interval
self.crumb = self._get_crumb(retry_count)

@property
def service(self):
return 'history'

@property
def url(self):
return 'http://ichart.finance.yahoo.com/table.csv'
return 'https://query1.finance.yahoo.com/v7/finance/download/{}'\
.format(self.symbols)

@staticmethod
def yurl(symbol):
return 'https://query1.finance.yahoo.com/v7/finance/download/{}'\
.format(symbol)

def _get_params(self, symbol):
unix_start = int(time.mktime(self.start.timetuple()))
unix_end = int(time.mktime(self.end.timetuple()))

params = {
's': symbol,
'a': self.start.month - 1,
'b': self.start.day,
'c': self.start.year,
'd': self.end.month - 1,
'e': self.end.day,
'f': self.end.year,
'g': self.interval,
'ignore': '.csv'
'period1': unix_start,
'period2': unix_end,
'interval': self.interval,
'events': self.service,
'crumb': self.crumb
}
return params

Expand All @@ -79,7 +115,49 @@ def read(self):
df['Ret_Index'] = _calc_return_index(df['Adj Close'])
if self.adjust_price:
df = _adjust_prices(df)
return df
return df.sort_index()

def _dl_mult_symbols(self, symbols):
stocks = {}
failed = []
passed = []
for sym_group in _in_chunks(symbols, self.chunksize):
for sym in sym_group:
try:
stocks[sym] = self._read_one_data(self.yurl(sym),
self._get_params(sym))
passed.append(sym)
except IOError:
msg = 'Failed to read symbol: {0!r}, replacing with NaN.'
warnings.warn(msg.format(sym), SymbolWarning)
failed.append(sym)

if len(passed) == 0:
msg = "No data fetched using {0!r}"
raise RemoteDataError(msg.format(self.__class__.__name__))
try:
if len(stocks) > 0 and len(failed) > 0 and len(passed) > 0:
df_na = stocks[passed[0]].copy()
df_na[:] = np.nan
for sym in failed:
stocks[sym] = df_na
return Panel(stocks).swapaxes('items', 'minor')
except AttributeError:
# cannot construct a panel with just 1D nans indicating no data
msg = "No data fetched using {0!r}"
raise RemoteDataError(msg.format(self.__class__.__name__))

def _get_crumb(self, retries):
# Scrape a history page for a valid crumb ID:
tu = "https://finance.yahoo.com/quote/{}/history".format(self.symbols)
response = self._get_response(tu,
params=self.params, headers=self.headers)
out = str(self._sanitize_response(response))
# Matches: {"crumb":"AlphaNumeric"}
rpat = '"CrumbStore":{"crumb":"([^"]+)"}'

crumb = re.findall(rpat, out)[0]
return crumb.encode('ascii').decode('unicode-escape')


def _adjust_prices(hist_data, price_list=None):
Expand Down