Skip to content

Commit ee10caa

Browse files
committed
Implement _get_hist_google
1 parent ad89365 commit ee10caa

File tree

2 files changed

+10
-40
lines changed

2 files changed

+10
-40
lines changed

pandas/io/data.py

+8-31
Original file line numberDiff line numberDiff line change
@@ -246,35 +246,24 @@ def _get_hist_google(sym=None, start=None, end=None, retry_count=3,
246246

247247
start, end = _sanitize_dates(start, end)
248248

249-
yahoo_URL = 'http://ichart.yahoo.com/table.csv?'
250-
251-
url = yahoo_URL + 's=%s' % sym + \
252-
'&a=%s' % (start.month - 1) + \
253-
'&b=%s' % start.day + \
254-
'&c=%s' % start.year + \
255-
'&d=%s' % (end.month - 1) + \
256-
'&e=%s' % end.day + \
257-
'&f=%s' % end.year + \
258-
'&g=d' + \
259-
'&ignore=.csv'
249+
google_URL = 'http://www.google.com/finance/historical?'
260250

251+
# www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv
252+
url = google_URL + urllib.urlencode({"q": sym, \
253+
"startdate": start.strftime('%b %d, %Y'), \
254+
"enddate": end.strftime('%b %d, %Y'), "output": "csv" })
261255
for _ in range(retry_count):
262256
resp = urllib2.urlopen(url)
263257
if resp.code == 200:
264258
lines = resp.read()
265259
rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
266260
parse_dates=True)[::-1]
267261

268-
# Yahoo! Finance sometimes does this awesome thing where they
269-
# return 2 rows for the most recent business day
270-
if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover
271-
rs = rs[:-1]
272-
273262
return rs
274263

275264
time.sleep(pause)
276265

277-
raise Exception("after %d tries, Yahoo did not "
266+
raise Exception("after %d tries, Google did not "
278267
"return a 200 for url %s" % (pause, url))
279268

280269

@@ -448,11 +437,10 @@ def dl_mult_symbols(symbols):
448437
return hist_data
449438

450439
def get_data_google(symbols=None, start=None, end=None, retry_count=3, pause=0,
451-
adjust_price=False, ret_index=False, chunksize=25,
452-
**kwargs):
440+
chunksize=25, **kwargs):
453441
"""
454442
Returns DataFrame/Panel of historical stock prices from symbols, over date
455-
range, start to end. To avoid being penalized by Yahoo! Finance servers,
443+
range, start to end. To avoid being penalized by Google Finance servers,
456444
pauses between downloading 'chunks' of symbols can be specified.
457445
458446
Parameters
@@ -470,12 +458,6 @@ def get_data_google(symbols=None, start=None, end=None, retry_count=3, pause=0,
470458
pause : int, default 0
471459
Time, in seconds, to pause between consecutive queries of chunks. If
472460
single value given for symbol, represents the pause between retries.
473-
adjust_price : bool, default False
474-
If True, adjusts all prices in hist_data ('Open', 'High', 'Low', 'Close')
475-
based on 'Adj Close' price. Adds 'Adj_Ratio' column and drops
476-
'Adj Close'.
477-
ret_index : bool, default False
478-
If True, includes a simple return index 'Ret_Index' in hist_data.
479461
chunksize : int, default 25
480462
Number of symbols to download consecutively before intiating pause.
481463
@@ -519,11 +501,6 @@ def dl_mult_symbols(symbols):
519501
except TypeError:
520502
hist_data = dl_mult_symbols(Series(symbols))
521503

522-
if(ret_index):
523-
hist_data['Ret_Index'] = _calc_return_index(hist_data['Adj Close'])
524-
if(adjust_price):
525-
hist_data = _adjust_prices(hist_data)
526-
527504
return hist_data
528505

529506
def get_data_fred(name=None, start=dt.datetime(2010, 1, 1),

pandas/io/tests/test_google.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ def test_get_quote(self):
5151
def test_get_data(self):
5252
import numpy as np
5353
df = web.get_data_google('GOOG')
54-
assert df.Volume.ix['OCT-08-2010'] == 2859200
54+
print(df.Volume.ix['OCT-08-2010'])
55+
assert df.Volume.ix['OCT-08-2010'] == 2863473
5556

5657
sl = ['AAPL', 'AMZN', 'GOOG']
5758
pan = web.get_data_google(sl, '2012')
@@ -75,14 +76,6 @@ def test_get_data(self):
7576
result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values
7677
assert (result == expected).all()
7778

78-
#Check ret_index
79-
pan = web.get_data_google(['GE', 'INTC', 'IBM'], '1977', '1987',
80-
ret_index=True)
81-
tstamp = pan.Ret_Index.INTC.first_valid_index()
82-
result = pan.Ret_Index.ix[tstamp]['INTC']
83-
expected = 1.0
84-
assert result == expected
85-
8679
# sanity checking
8780
t= np.array(pan)
8881
assert np.issubdtype(t.dtype, np.floating)

0 commit comments

Comments
 (0)