diff --git a/pandas/io/data.py b/pandas/io/data.py index b5cf5f9d9be19..ea635e85ed177 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -171,7 +171,15 @@ def _retry_read_url(url, retry_count, pause, name): # return 2 rows for the most recent business day if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover rs = rs[:-1] - return rs + + #Get rid of unicode characters in index name. + try: + rs.index.name = rs.index.name.decode('unicode_escape').encode('ascii', 'ignore') + except AttributeError: + #Python 3 string has no decode method. + rs.index.name = rs.index.name.encode('ascii', 'ignore').decode() + + return rs raise IOError("after %d tries, %s did not " "return a 200 for url %r" % (retry_count, name, url)) @@ -686,7 +694,7 @@ def _option_frames_from_url(self, url): if not hasattr(self, 'underlying_price'): try: - self.underlying_price, self.quote_time = self._get_underlying_price(url) + self.underlying_price, self.quote_time = self._underlying_price_and_time_from_url(url) except IndexError: self.underlying_price, self.quote_time = np.nan, np.nan @@ -701,23 +709,38 @@ def _option_frames_from_url(self, url): return {'calls': calls, 'puts': puts} - def _get_underlying_price(self, url): + def _underlying_price_and_time_from_url(self, url): root = self._parse_url(url) - underlying_price = float(root.xpath('.//*[@class="time_rtq_ticker Fz-30 Fw-b"]')[0]\ - .getchildren()[0].text) + underlying_price = self._underlying_price_from_root(root) + quote_time = self._quote_time_from_root(root) + return underlying_price, quote_time + + @staticmethod + def _underlying_price_from_root(root): + underlying_price = root.xpath('.//*[@class="time_rtq_ticker Fz-30 Fw-b"]')[0]\ + .getchildren()[0].text + underlying_price = underlying_price.replace(',', '') #GH11 + try: + underlying_price = float(underlying_price) + except ValueError: + underlying_price = np.nan + + return underlying_price + + @staticmethod + def _quote_time_from_root(root): #Gets the time of the quote, note this is actually the time of the underlying price. try: quote_time_text = root.xpath('.//*[@class="time_rtq Fz-m"]')[0].getchildren()[1].getchildren()[0].text ##TODO: Enable timezone matching when strptime can match EST with %Z quote_time_text = quote_time_text.split(' ')[0] quote_time = dt.datetime.strptime(quote_time_text, "%I:%M%p") - quote_time = quote_time.replace(year=CUR_YEAR, month=CUR_MONTH, day=CUR_DAY) except ValueError: quote_time = np.nan - return underlying_price, quote_time + return quote_time def _get_option_data(self, expiry, name): frame_name = '_frames' + self._expiry_to_string(expiry) diff --git a/pandas/io/tests/test_data.py b/pandas/io/tests/test_data.py index 2d6f14c79633a..b3753f4cb941b 100644 --- a/pandas/io/tests/test_data.py +++ b/pandas/io/tests/test_data.py @@ -117,6 +117,7 @@ def test_get_multi2(self): self.assertEqual((4, 3), result.shape) assert_n_failed_equals_n_null_columns(w, result) + @network def test_dtypes(self): #GH3995, #GH8980 data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13') @@ -126,6 +127,13 @@ def test_dtypes(self): assert np.issubdtype(data.High.dtype, np.number) assert np.issubdtype(data.Volume.dtype, np.number) + @network + def test_unicode_date(self): + #GH8967 + data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13') + self.assertEquals(data.index.name, 'Date') + + class TestYahoo(tm.TestCase): @classmethod def setUpClass(cls): @@ -366,24 +374,35 @@ def test_get_all_data_calls_only(self): self.assertTrue(len(data) > 1) @network + def test_get_underlying_price(self): + #GH7 + try: + options_object = web.Options('^spxpm', 'yahoo') + url = options_object._yahoo_url_from_expiry(options_object.expiry_dates[0]) + root = options_object._parse_url(url) + quote_price = options_object._underlying_price_from_root(root) + except RemoteDataError as e: + raise nose.SkipTest(e) + self.assert_(isinstance(quote_price, float)) + def test_sample_page_price_quote_time1(self): #Tests the weekend quote time format - price, quote_time = self.aapl._get_underlying_price(self.html1) - self.assertIsInstance(price, (int, float, complex)) - self.assertIsInstance(quote_time, (datetime, Timestamp)) + price, quote_time = self.aapl._underlying_price_and_time_from_url(self.html1) + self.assert_(isinstance(price, (int, float, complex))) + self.assert_(isinstance(quote_time, (datetime, Timestamp))) def test_chop(self): #regression test for #7625 self.aapl.chop_data(self.data1, above_below=2, underlying_price=np.nan) chopped = self.aapl.chop_data(self.data1, above_below=2, underlying_price=100) - self.assertIsInstance(chopped, DataFrame) + self.assert_(isinstance(chopped, DataFrame)) self.assertTrue(len(chopped) > 1) def test_chop_out_of_strike_range(self): #regression test for #7625 self.aapl.chop_data(self.data1, above_below=2, underlying_price=np.nan) chopped = self.aapl.chop_data(self.data1, above_below=2, underlying_price=100000) - self.assertIsInstance(chopped, DataFrame) + self.assert_(isinstance(chopped, DataFrame)) self.assertTrue(len(chopped) > 1) @@ -391,9 +410,9 @@ def test_chop_out_of_strike_range(self): def test_sample_page_price_quote_time2(self): #Tests the EDT page format #regression test for #8741 - price, quote_time = self.aapl._get_underlying_price(self.html2) - self.assertIsInstance(price, (int, float, complex)) - self.assertIsInstance(quote_time, (datetime, Timestamp)) + price, quote_time = self.aapl._underlying_price_and_time_from_url(self.html2) + self.assert_(isinstance(price, (int, float, complex))) + self.assert_(isinstance(quote_time, (datetime, Timestamp))) @network def test_sample_page_chg_float(self):