Skip to content

CLN: Bring pandas up to date with pandas-datareader #9358

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 5, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 30 additions & 7 deletions pandas/io/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,15 @@ def _retry_read_url(url, retry_count, pause, name):
# return 2 rows for the most recent business day
if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover
rs = rs[:-1]
return rs

#Get rid of unicode characters in index name.
try:
rs.index.name = rs.index.name.decode('unicode_escape').encode('ascii', 'ignore')
except AttributeError:
#Python 3 string has no decode method.
rs.index.name = rs.index.name.encode('ascii', 'ignore').decode()

return rs

raise IOError("after %d tries, %s did not "
"return a 200 for url %r" % (retry_count, name, url))
Expand Down Expand Up @@ -686,7 +694,7 @@ def _option_frames_from_url(self, url):

if not hasattr(self, 'underlying_price'):
try:
self.underlying_price, self.quote_time = self._get_underlying_price(url)
self.underlying_price, self.quote_time = self._underlying_price_and_time_from_url(url)
except IndexError:
self.underlying_price, self.quote_time = np.nan, np.nan

Expand All @@ -701,23 +709,38 @@ def _option_frames_from_url(self, url):

return {'calls': calls, 'puts': puts}

def _get_underlying_price(self, url):
def _underlying_price_and_time_from_url(self, url):
root = self._parse_url(url)
underlying_price = float(root.xpath('.//*[@class="time_rtq_ticker Fz-30 Fw-b"]')[0]\
.getchildren()[0].text)
underlying_price = self._underlying_price_from_root(root)
quote_time = self._quote_time_from_root(root)
return underlying_price, quote_time

@staticmethod
def _underlying_price_from_root(root):
underlying_price = root.xpath('.//*[@class="time_rtq_ticker Fz-30 Fw-b"]')[0]\
.getchildren()[0].text
underlying_price = underlying_price.replace(',', '') #GH11

try:
underlying_price = float(underlying_price)
except ValueError:
underlying_price = np.nan

return underlying_price

@staticmethod
def _quote_time_from_root(root):
#Gets the time of the quote, note this is actually the time of the underlying price.
try:
quote_time_text = root.xpath('.//*[@class="time_rtq Fz-m"]')[0].getchildren()[1].getchildren()[0].text
##TODO: Enable timezone matching when strptime can match EST with %Z
quote_time_text = quote_time_text.split(' ')[0]
quote_time = dt.datetime.strptime(quote_time_text, "%I:%M%p")

quote_time = quote_time.replace(year=CUR_YEAR, month=CUR_MONTH, day=CUR_DAY)
except ValueError:
quote_time = np.nan

return underlying_price, quote_time
return quote_time

def _get_option_data(self, expiry, name):
frame_name = '_frames' + self._expiry_to_string(expiry)
Expand Down
35 changes: 27 additions & 8 deletions pandas/io/tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def test_get_multi2(self):
self.assertEqual((4, 3), result.shape)
assert_n_failed_equals_n_null_columns(w, result)

@network
def test_dtypes(self):
#GH3995, #GH8980
data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13')
Expand All @@ -126,6 +127,13 @@ def test_dtypes(self):
assert np.issubdtype(data.High.dtype, np.number)
assert np.issubdtype(data.Volume.dtype, np.number)

@network
def test_unicode_date(self):
#GH8967
data = web.get_data_google('F', start='JAN-01-10', end='JAN-27-13')
self.assertEquals(data.index.name, 'Date')


class TestYahoo(tm.TestCase):
@classmethod
def setUpClass(cls):
Expand Down Expand Up @@ -366,34 +374,45 @@ def test_get_all_data_calls_only(self):
self.assertTrue(len(data) > 1)

@network
def test_get_underlying_price(self):
#GH7
try:
options_object = web.Options('^spxpm', 'yahoo')
url = options_object._yahoo_url_from_expiry(options_object.expiry_dates[0])
root = options_object._parse_url(url)
quote_price = options_object._underlying_price_from_root(root)
except RemoteDataError as e:
raise nose.SkipTest(e)
self.assert_(isinstance(quote_price, float))

def test_sample_page_price_quote_time1(self):
#Tests the weekend quote time format
price, quote_time = self.aapl._get_underlying_price(self.html1)
self.assertIsInstance(price, (int, float, complex))
self.assertIsInstance(quote_time, (datetime, Timestamp))
price, quote_time = self.aapl._underlying_price_and_time_from_url(self.html1)
self.assert_(isinstance(price, (int, float, complex)))
self.assert_(isinstance(quote_time, (datetime, Timestamp)))

def test_chop(self):
#regression test for #7625
self.aapl.chop_data(self.data1, above_below=2, underlying_price=np.nan)
chopped = self.aapl.chop_data(self.data1, above_below=2, underlying_price=100)
self.assertIsInstance(chopped, DataFrame)
self.assert_(isinstance(chopped, DataFrame))
self.assertTrue(len(chopped) > 1)

def test_chop_out_of_strike_range(self):
#regression test for #7625
self.aapl.chop_data(self.data1, above_below=2, underlying_price=np.nan)
chopped = self.aapl.chop_data(self.data1, above_below=2, underlying_price=100000)
self.assertIsInstance(chopped, DataFrame)
self.assert_(isinstance(chopped, DataFrame))
self.assertTrue(len(chopped) > 1)


@network
def test_sample_page_price_quote_time2(self):
#Tests the EDT page format
#regression test for #8741
price, quote_time = self.aapl._get_underlying_price(self.html2)
self.assertIsInstance(price, (int, float, complex))
self.assertIsInstance(quote_time, (datetime, Timestamp))
price, quote_time = self.aapl._underlying_price_and_time_from_url(self.html2)
self.assert_(isinstance(price, (int, float, complex)))
self.assert_(isinstance(quote_time, (datetime, Timestamp)))

@network
def test_sample_page_chg_float(self):
Expand Down