Skip to content

Commit c86cbf8

Browse files
committed
Replaces ichart API for single-stock price exports from Yahoo, multi-stock still failing (pydata#315)
1 parent 526b832 commit c86cbf8

File tree

2 files changed

+49
-14
lines changed

2 files changed

+49
-14
lines changed

pandas_datareader/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def _sanitize_response(response):
9999
"""
100100
return response.content
101101

102-
def _get_response(self, url, params=None):
102+
def _get_response(self, url, params=None, headers=None):
103103
""" send raw HTTP request to get requests.Response from the specified url
104104
Parameters
105105
----------
@@ -111,7 +111,7 @@ def _get_response(self, url, params=None):
111111

112112
# initial attempt + retry
113113
for i in range(self.retry_count + 1):
114-
response = self.session.get(url, params=params)
114+
response = self.session.get(url, params=params, headers=headers)
115115
if response.status_code == requests.codes.ok:
116116
return response
117117
time.sleep(self.pause)
@@ -120,7 +120,7 @@ def _get_response(self, url, params=None):
120120
raise RemoteDataError('Unable to read URL: {0}'.format(url))
121121

122122
def _read_lines(self, out):
123-
rs = read_csv(out, index_col=0, parse_dates=True, na_values='-')[::-1]
123+
rs = read_csv(out, index_col=0, parse_dates=True, na_values='-')
124124
# Yahoo! Finance sometimes does this awesome thing where they
125125
# return 2 rows for the most recent business day
126126
if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover

pandas_datareader/yahoo/daily.py

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import re
2+
import time
3+
import pandas as pd
14
from pandas_datareader.base import _DailyBaseReader
25

36

@@ -46,29 +49,39 @@ def __init__(self, symbols=None, start=None, end=None, retry_count=3,
4649
retry_count=retry_count,
4750
pause=pause, session=session,
4851
chunksize=chunksize)
52+
53+
self.headers = {
54+
'Connection': 'keep-alive',
55+
'Expires': str(-1),
56+
'Upgrade-Insecure-Requests': str(1),
57+
# Google Chrome:
58+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36' # noqa
59+
}
60+
4961
self.adjust_price = adjust_price
5062
self.ret_index = ret_index
5163

5264
if interval not in ['d', 'w', 'm', 'v']:
5365
raise ValueError("Invalid interval: valid values are "
5466
"'d', 'w', 'm' and 'v'")
55-
self.interval = interval
67+
self.interval = '1' + interval
68+
# self.crumb = '64ZkTeri7Xq'
69+
self.crumb = self._get_crumb(retry_count)
5670

5771
@property
5872
def url(self):
59-
return 'http://ichart.finance.yahoo.com/table.csv'
73+
return 'https://query1.finance.yahoo.com/v7/finance/download/{}'.format(self.symbols) # noqa
6074

6175
def _get_params(self, symbol):
76+
unix_start = int(time.mktime(self.start.timetuple()))
77+
unix_end = int(time.mktime(self.end.timetuple()))
78+
6279
params = {
63-
's': symbol,
64-
'a': self.start.month - 1,
65-
'b': self.start.day,
66-
'c': self.start.year,
67-
'd': self.end.month - 1,
68-
'e': self.end.day,
69-
'f': self.end.year,
70-
'g': self.interval,
71-
'ignore': '.csv'
80+
'period1': unix_start,
81+
'period2': unix_end,
82+
'interval': self.interval,
83+
'events': 'history',
84+
'crumb': self.crumb
7285
}
7386
return params
7487

@@ -79,8 +92,30 @@ def read(self):
7992
df['Ret_Index'] = _calc_return_index(df['Adj Close'])
8093
if self.adjust_price:
8194
df = _adjust_prices(df)
95+
temp = pd.date_range(self.start, self.end, None, self.interval)
8296
return df
8397

98+
def _get_crumb(self, retries):
99+
# Scrape a history page for a valid crumb ID:
100+
tu = "https://finance.yahoo.com/quote/{}/history".format(self.symbols)
101+
response = self._get_response(tu,
102+
params=self.params, headers=self.headers)
103+
out = str(self._sanitize_response(response))
104+
# Matches: {"crumb":"AlphaNumeric"}
105+
regex = re.search(r'{"crumb" ?: ?"([A-Za-z0-9.]{11,})"}', out)
106+
107+
try:
108+
crumbs = regex.groups()
109+
except:
110+
# It is possible we hit a 401 with frequent requests. Cool-off:
111+
if retries > 0:
112+
time.sleep(2)
113+
retries -= 1
114+
crumbs = [self._get_crumb(retries)]
115+
raise OSError("Unable to retrieve Yahoo breadcrumb, exiting.")
116+
117+
return crumbs[0]
118+
84119

85120
def _adjust_prices(hist_data, price_list=None):
86121
"""

0 commit comments

Comments
 (0)