Skip to content

Commit 602625c

Browse files
committed
Merge pull request #9358 from dstephens99/master
CLN: Bring pandas up to date with pandas-datareader
2 parents ba895cd + a9c01b0 commit 602625c

File tree

2 files changed

+57
-15
lines changed

2 files changed

+57
-15
lines changed

pandas/io/data.py

+30-7
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,15 @@ def _retry_read_url(url, retry_count, pause, name):
171171
# return 2 rows for the most recent business day
172172
if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover
173173
rs = rs[:-1]
174-
return rs
174+
175+
#Get rid of unicode characters in index name.
176+
try:
177+
rs.index.name = rs.index.name.decode('unicode_escape').encode('ascii', 'ignore')
178+
except AttributeError:
179+
#Python 3 string has no decode method.
180+
rs.index.name = rs.index.name.encode('ascii', 'ignore').decode()
181+
182+
return rs
175183

176184
raise IOError("after %d tries, %s did not "
177185
"return a 200 for url %r" % (retry_count, name, url))
@@ -686,7 +694,7 @@ def _option_frames_from_url(self, url):
686694

687695
if not hasattr(self, 'underlying_price'):
688696
try:
689-
self.underlying_price, self.quote_time = self._get_underlying_price(url)
697+
self.underlying_price, self.quote_time = self._underlying_price_and_time_from_url(url)
690698
except IndexError:
691699
self.underlying_price, self.quote_time = np.nan, np.nan
692700

@@ -701,23 +709,38 @@ def _option_frames_from_url(self, url):
701709

702710
return {'calls': calls, 'puts': puts}
703711

704-
def _get_underlying_price(self, url):
712+
def _underlying_price_and_time_from_url(self, url):
705713
root = self._parse_url(url)
706-
underlying_price = float(root.xpath('.//*[@class="time_rtq_ticker Fz-30 Fw-b"]')[0]\
707-
.getchildren()[0].text)
714+
underlying_price = self._underlying_price_from_root(root)
715+
quote_time = self._quote_time_from_root(root)
716+
return underlying_price, quote_time
717+
718+
@staticmethod
719+
def _underlying_price_from_root(root):
720+
underlying_price = root.xpath('.//*[@class="time_rtq_ticker Fz-30 Fw-b"]')[0]\
721+
.getchildren()[0].text
722+
underlying_price = underlying_price.replace(',', '') #GH11
708723

724+
try:
725+
underlying_price = float(underlying_price)
726+
except ValueError:
727+
underlying_price = np.nan
728+
729+
return underlying_price
730+
731+
@staticmethod
732+
def _quote_time_from_root(root):
709733
#Gets the time of the quote, note this is actually the time of the underlying price.
710734
try:
711735
quote_time_text = root.xpath('.//*[@class="time_rtq Fz-m"]')[0].getchildren()[1].getchildren()[0].text
712736
##TODO: Enable timezone matching when strptime can match EST with %Z
713737
quote_time_text = quote_time_text.split(' ')[0]
714738
quote_time = dt.datetime.strptime(quote_time_text, "%I:%M%p")
715-
716739
quote_time = quote_time.replace(year=CUR_YEAR, month=CUR_MONTH, day=CUR_DAY)
717740
except ValueError:
718741
quote_time = np.nan
719742

720-
return underlying_price, quote_time
743+
return quote_time
721744

722745
def _get_option_data(self, expiry, name):
723746
frame_name = '_frames' + self._expiry_to_string(expiry)

pandas/io/tests/test_data.py

+27-8
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ def test_get_multi2(self):
117117
self.assertEqual((4, 3), result.shape)
118118
assert_n_failed_equals_n_null_columns(w, result)
119119

120+
@network
120121
def test_dtypes(self):
121122
#GH3995, #GH8980
122123
data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13')
@@ -126,6 +127,13 @@ def test_dtypes(self):
126127
assert np.issubdtype(data.High.dtype, np.number)
127128
assert np.issubdtype(data.Volume.dtype, np.number)
128129

130+
@network
131+
def test_unicode_date(self):
132+
#GH8967
133+
data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13')
134+
self.assertEquals(data.index.name, 'Date')
135+
136+
129137
class TestYahoo(tm.TestCase):
130138
@classmethod
131139
def setUpClass(cls):
@@ -366,34 +374,45 @@ def test_get_all_data_calls_only(self):
366374
self.assertTrue(len(data) > 1)
367375

368376
@network
377+
def test_get_underlying_price(self):
378+
#GH7
379+
try:
380+
options_object = web.Options('^spxpm', 'yahoo')
381+
url = options_object._yahoo_url_from_expiry(options_object.expiry_dates[0])
382+
root = options_object._parse_url(url)
383+
quote_price = options_object._underlying_price_from_root(root)
384+
except RemoteDataError as e:
385+
raise nose.SkipTest(e)
386+
self.assert_(isinstance(quote_price, float))
387+
369388
def test_sample_page_price_quote_time1(self):
370389
#Tests the weekend quote time format
371-
price, quote_time = self.aapl._get_underlying_price(self.html1)
372-
self.assertIsInstance(price, (int, float, complex))
373-
self.assertIsInstance(quote_time, (datetime, Timestamp))
390+
price, quote_time = self.aapl._underlying_price_and_time_from_url(self.html1)
391+
self.assert_(isinstance(price, (int, float, complex)))
392+
self.assert_(isinstance(quote_time, (datetime, Timestamp)))
374393

375394
def test_chop(self):
376395
#regression test for #7625
377396
self.aapl.chop_data(self.data1, above_below=2, underlying_price=np.nan)
378397
chopped = self.aapl.chop_data(self.data1, above_below=2, underlying_price=100)
379-
self.assertIsInstance(chopped, DataFrame)
398+
self.assert_(isinstance(chopped, DataFrame))
380399
self.assertTrue(len(chopped) > 1)
381400

382401
def test_chop_out_of_strike_range(self):
383402
#regression test for #7625
384403
self.aapl.chop_data(self.data1, above_below=2, underlying_price=np.nan)
385404
chopped = self.aapl.chop_data(self.data1, above_below=2, underlying_price=100000)
386-
self.assertIsInstance(chopped, DataFrame)
405+
self.assert_(isinstance(chopped, DataFrame))
387406
self.assertTrue(len(chopped) > 1)
388407

389408

390409
@network
391410
def test_sample_page_price_quote_time2(self):
392411
#Tests the EDT page format
393412
#regression test for #8741
394-
price, quote_time = self.aapl._get_underlying_price(self.html2)
395-
self.assertIsInstance(price, (int, float, complex))
396-
self.assertIsInstance(quote_time, (datetime, Timestamp))
413+
price, quote_time = self.aapl._underlying_price_and_time_from_url(self.html2)
414+
self.assert_(isinstance(price, (int, float, complex)))
415+
self.assert_(isinstance(quote_time, (datetime, Timestamp)))
397416

398417
@network
399418
def test_sample_page_chg_float(self):

0 commit comments

Comments
 (0)