Skip to content

TST/BUG: fix failing data.py tests for good #4054

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 28, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,11 @@ pandas 0.12
- Fix ``Series.clip`` for datetime series. NA/NaN threshold values will now throw ValueError (:issue:`3996`)
- Fixed insertion issue into DataFrame, after rename (:issue:`4032`)
- Fixed testing issue where too many sockets where open thus leading to a
connection reset issue (:issue:`3982`, :issue:`3985`)
connection reset issue (:issue:`3982`, :issue:`3985`, :issue:`4028`,
:issue:`4054`)
- Fixed failing tests in test_yahoo, test_google where symbols were not
retrieved but were being accessed (:issue:`3982`, :issue:`3985`,
:issue:`4028`, :issue:`4054`)


pandas 0.11.0
Expand Down
6 changes: 5 additions & 1 deletion doc/source/v0.12.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,11 @@ Bug Fixes
connectivity. Plus, new ``optional_args`` decorator factory for decorators.
(:issue:`3910`, :issue:`3914`)
- Fixed testing issue where too many sockets where open thus leading to a
connection reset issue (:issue:`3982`, :issue:`3985`)
connection reset issue (:issue:`3982`, :issue:`3985`, :issue:`4028`,
:issue:`4054`)
- Fixed failing tests in test_yahoo, test_google where symbols were not
retrieved but were being accessed (:issue:`3982`, :issue:`3985`,
:issue:`4028`, :issue:`4054`)

See the :ref:`full release notes
<release>` or issue tracker
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None):
else:
errors = 'replace'
encoding = 'utf-8'
bytes = filepath_or_buffer.read()
filepath_or_buffer = StringIO(bytes.decode(encoding, errors))
bytes = filepath_or_buffer.read().decode(encoding, errors)
filepath_or_buffer = StringIO(bytes)
return filepath_or_buffer, encoding
return filepath_or_buffer, None

Expand Down
28 changes: 17 additions & 11 deletions pandas/io/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

"""
import warnings
import tempfile

import numpy as np
import datetime as dt
Expand All @@ -13,14 +14,14 @@
from urllib2 import urlopen

from zipfile import ZipFile
from pandas.util.py3compat import StringIO, BytesIO, bytes_to_str
from pandas.util.py3compat import StringIO, bytes_to_str

from pandas import Panel, DataFrame, Series, read_csv, concat
from pandas.io.parsers import TextParser


def DataReader(name, data_source=None, start=None, end=None,
retry_count=3, pause=0):
retry_count=3, pause=0.001):
"""
Imports data from a number of online sources.

Expand Down Expand Up @@ -137,7 +138,7 @@ def get_quote_google(symbols):
raise NotImplementedError("Google Finance doesn't have this functionality")

def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
pause=0, **kwargs):
pause=0.001, **kwargs):
"""
Get historical data for the given name from yahoo.
Date format is datetime
Expand Down Expand Up @@ -183,7 +184,7 @@ def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,


def _get_hist_google(sym=None, start=None, end=None, retry_count=3,
pause=0, **kwargs):
pause=0.001, **kwargs):
"""
Get historical data for the given name from google.
Date format is datetime
Expand Down Expand Up @@ -309,7 +310,7 @@ def get_components_yahoo(idx_sym):
return idx_df


def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3, pause=0,
def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3, pause=0.001,
adjust_price=False, ret_index=False, chunksize=25,
**kwargs):
"""
Expand Down Expand Up @@ -388,8 +389,8 @@ def dl_mult_symbols(symbols):

return hist_data

def get_data_google(symbols=None, start=None, end=None, retry_count=3, pause=0,
chunksize=25, **kwargs):
def get_data_google(symbols=None, start=None, end=None, retry_count=3,
pause=0.001, chunksize=25, **kwargs):
"""
Returns DataFrame/Panel of historical stock prices from symbols, over date
range, start to end. To avoid being penalized by Google Finance servers,
Expand Down Expand Up @@ -493,8 +494,13 @@ def get_data_famafrench(name, start=None, end=None):
zipFileURL = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/"

with closing(urlopen(zipFileURL + name + ".zip")) as url:
with closing(ZipFile(StringIO(url.read()))) as zf:
data = zf.read(name + ".txt").splitlines()
raw = url.read()

with tempfile.TemporaryFile() as tmpf:
tmpf.write(raw)

with closing(ZipFile(tmpf, 'r')) as zf:
data = zf.read(name + '.txt').splitlines()

file_edges = np.where(np.array([len(d) for d in data]) == 2)[0]

Expand Down Expand Up @@ -847,7 +853,7 @@ def get_near_stock_price(self, above_below=2, call=True, put=False,

chop_call = df_c.ix[get_range, :]

chop_call = chop_call.dropna()
chop_call = chop_call.dropna(how='all')
chop_call = chop_call.reset_index()

if put:
Expand All @@ -868,7 +874,7 @@ def get_near_stock_price(self, above_below=2, call=True, put=False,

chop_put = df_p.ix[get_range, :]

chop_put = chop_put.dropna()
chop_put = chop_put.dropna(how='all')
chop_put = chop_put.reset_index()

if call and put:
Expand Down
30 changes: 30 additions & 0 deletions pandas/io/tests/test_data_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import unittest

from pandas.core.generic import PandasObject
from pandas.io.data import DataReader
from pandas.util.testing import network


class TestDataReader(unittest.TestCase):
@network
def test_read_yahoo(self):
gs = DataReader("GS", "yahoo")
assert isinstance(gs, PandasObject)

@network
def test_read_google(self):
gs = DataReader("GS", "google")
assert isinstance(gs, PandasObject)

@network
def test_read_fred(self):
vix = DataReader("VIXCLS", "fred")
assert isinstance(vix, PandasObject)

@network
def test_read_famafrench(self):
for name in ("F-F_Research_Data_Factors",
"F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3",
"F-F_ST_Reversal_Factor"):
ff = DataReader(name, "famafrench")
assert isinstance(ff, dict)
40 changes: 14 additions & 26 deletions pandas/io/tests/test_fred.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,15 @@
import nose
from datetime import datetime

from pandas.util.py3compat import StringIO, BytesIO

import pandas as pd
import numpy as np
import pandas.io.data as web
from pandas.util.testing import (network, assert_frame_equal,
assert_series_equal,
assert_almost_equal, with_connectivity_check)
from numpy.testing.decorators import slow

import urllib2
from pandas.util.testing import network
from numpy.testing import assert_array_equal


class TestFred(unittest.TestCase):

@slow
@with_connectivity_check("http://www.google.com")
@network
def test_fred(self):
"""
Throws an exception when DataReader can't get a 200 response from
Expand All @@ -28,50 +21,45 @@ def test_fred(self):

self.assertEquals(
web.DataReader("GDP", "fred", start, end)['GDP'].tail(1),
16004.5)
15984.1)

self.assertRaises(
Exception,
lambda: web.DataReader("NON EXISTENT SERIES", 'fred',
start, end))
self.assertRaises(Exception, web.DataReader, "NON EXISTENT SERIES",
'fred', start, end)

@slow
@network
def test_fred_nan(self):
start = datetime(2010, 1, 1)
end = datetime(2013, 01, 27)
df = web.DataReader("DFII5", "fred", start, end)
assert pd.isnull(df.ix['2010-01-01'])

@slow
@network
def test_fred_parts(self):
import numpy as np

start = datetime(2010, 1, 1)
end = datetime(2013, 01, 27)
df = web.get_data_fred("CPIAUCSL", start, end)
assert df.ix['2010-05-01'] == 217.23
self.assertEqual(df.ix['2010-05-01'], 217.23)

t = np.array(df.CPIAUCSL.tolist())
t = df.CPIAUCSL.values
assert np.issubdtype(t.dtype, np.floating)
assert t.shape == (37,)
self.assertEqual(t.shape, (37,))

# Test some older ones:
@network
def test_fred_part2(self):
expected = [[576.7],
[962.9],
[684.7],
[848.3],
[933.3]]
result = web.get_data_fred("A09024USA144NNBR", start="1915").ix[:5]
assert (result.values == expected).all()
assert_array_equal(result.values, np.array(expected))

@slow
@network
def test_invalid_series(self):
name = "NOT A REAL SERIES"
self.assertRaises(Exception, web.get_data_fred, name)


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
62 changes: 31 additions & 31 deletions pandas/io/tests/test_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

class TestGoogle(unittest.TestCase):

@with_connectivity_check("http://www.google.com")
@network
def test_google(self):
# asserts that google is minimally working and that it throws
# an exception when DataReader can't get a 200 response from
Expand All @@ -22,51 +22,51 @@ def test_google(self):
web.DataReader("F", 'google', start, end)['Close'][-1],
13.68)

self.assertRaises(
Exception,
lambda: web.DataReader("NON EXISTENT TICKER", 'google',
start, end))

self.assertRaises(Exception, web.DataReader, "NON EXISTENT TICKER",
'google', start, end)

@network
def test_get_quote(self):
self.assertRaises(NotImplementedError,
lambda: web.get_quote_google(pd.Series(['GOOG', 'AAPL', 'GOOG'])))
def test_get_quote_fails(self):
self.assertRaises(NotImplementedError, web.get_quote_google,
pd.Series(['GOOG', 'AAPL', 'GOOG']))

@with_connectivity_check('http://www.google.com')
@network
def test_get_goog_volume(self):
df = web.get_data_google('GOOG')
assert df.Volume.ix['OCT-08-2010'] == 2863473
self.assertEqual(df.Volume.ix['OCT-08-2010'], 2863473)

@with_connectivity_check('http://www.google.com')
@network
def test_get_multi1(self):
sl = ['AAPL', 'AMZN', 'GOOG']
pan = web.get_data_google(sl, '2012')
ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG]
assert ts[0].dayofyear == 96

@with_connectivity_check('http://www.google.com')
def testit():
ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG]
self.assertEquals(ts[0].dayofyear, 96)

if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and
hasattr(pan.Close, 'AAPL')):
testit()
else:
self.assertRaises(AttributeError, testit)

@network
def test_get_multi2(self):
pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12')
expected = [19.02, 28.23, 25.39]
result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist()
assert result == expected
pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12',
'JAN-31-12')
result = pan.Close.ix['01-18-12']
self.assertEqual(len(result), 3)

# sanity checking
t= np.array(result)
assert np.issubdtype(t.dtype, np.floating)
assert t.shape == (3,)
assert np.issubdtype(result.dtype, np.floating)

expected = [[ 18.99, 28.4 , 25.18],
[ 18.58, 28.31, 25.13],
[ 19.03, 28.16, 25.52],
[ 18.81, 28.82, 25.87]]
result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values
assert (result == expected).all()
expected = np.array([[ 18.99, 28.4 , 25.18],
[ 18.58, 28.31, 25.13],
[ 19.03, 28.16, 25.52],
[ 18.81, 28.82, 25.87]])
result = pan.Open.ix['Jan-15-12':'Jan-20-12']
self.assertEqual(np.array(expected).shape, result.shape)

# sanity checking
t= np.array(pan)
assert np.issubdtype(t.dtype, np.floating)

if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down
6 changes: 4 additions & 2 deletions pandas/io/tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@

from pandas.util.py3compat import StringIO, BytesIO, PY3
from datetime import datetime
from os.path import split as psplit
import csv
import os
import sys
import re
import unittest
from contextlib import closing
from urllib2 import urlopen

import nose

Expand Down Expand Up @@ -1391,7 +1392,8 @@ def test_url(self):

except urllib2.URLError:
try:
urllib2.urlopen('http://www.google.com')
with closing(urlopen('http://www.google.com')) as resp:
pass
except urllib2.URLError:
raise nose.SkipTest
else:
Expand Down
Loading