From c3bb3505e007394252fa56d6d8e8f1b49f1ca07e Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Tue, 25 Jun 2013 15:24:10 -0400 Subject: [PATCH 1/9] TST: add test for DataReader class --- pandas/io/tests/test_data_reader.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 pandas/io/tests/test_data_reader.py diff --git a/pandas/io/tests/test_data_reader.py b/pandas/io/tests/test_data_reader.py new file mode 100644 index 0000000000000..a319f27d413ef --- /dev/null +++ b/pandas/io/tests/test_data_reader.py @@ -0,0 +1,29 @@ +import unittest + +from pandas.core.generic import PandasObject +from pandas.io.data import DataReader +from pandas.util.testing import network + + +class TestDataReader(unittest.TestCase): + @network + def test_read_yahoo(self): + gs = DataReader("GS", "yahoo") + assert isinstance(gs, PandasObject) + + @network + def test_read_google(self): + pass + + @network + def test_read_fred(self): + vix = DataReader("VIXCLS", "fred") + assert isinstance(vix, PandasObject) + + @network + def test_read_famafrench(self): + for name in ("F-F_Research_Data_Factors", + "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3", + "F-F_ST_Reversal_Factor"): + ff = DataReader(name, "famafrench") + assert isinstance(ff, dict) From 29661299d2a87d51b5647ac5cd14043fba656d34 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 26 Jun 2013 14:12:49 -0400 Subject: [PATCH 2/9] TST: add read_google test --- pandas/io/data.py | 6 ++++-- pandas/io/tests/test_data_reader.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/io/data.py b/pandas/io/data.py index 9cf5eeb1fed4e..b24803a9f238b 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -493,8 +493,10 @@ def get_data_famafrench(name, start=None, end=None): zipFileURL = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/" with closing(urlopen(zipFileURL + name + ".zip")) as url: - with closing(ZipFile(StringIO(url.read()))) as zf: - data = zf.read(name + ".txt").splitlines() + zf = ZipFile(StringIO(url.read())) + + with closing(zf.open(name + ".txt")) as z: + data = z.readlines() file_edges = np.where(np.array([len(d) for d in data]) == 2)[0] diff --git a/pandas/io/tests/test_data_reader.py b/pandas/io/tests/test_data_reader.py index a319f27d413ef..129e35921335c 100644 --- a/pandas/io/tests/test_data_reader.py +++ b/pandas/io/tests/test_data_reader.py @@ -13,7 +13,8 @@ def test_read_yahoo(self): @network def test_read_google(self): - pass + gs = DataReader("GS", "google") + assert isinstance(gs, PandasObject) @network def test_read_fred(self): From 91eda13412f8934409be07f54f9089f9e76c34bc Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 26 Jun 2013 16:58:33 -0400 Subject: [PATCH 3/9] TST: use builtin equality testing --- pandas/io/tests/test_yahoo.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/io/tests/test_yahoo.py b/pandas/io/tests/test_yahoo.py index f2a55a4231c00..8a6fbee69db32 100644 --- a/pandas/io/tests/test_yahoo.py +++ b/pandas/io/tests/test_yahoo.py @@ -5,7 +5,9 @@ import pandas as pd import pandas.io.data as web -from pandas.util.testing import network, assert_series_equal, with_connectivity_check +from pandas.util.testing import (network, assert_series_equal, + with_connectivity_check) +from numpy.testing import assert_array_equal class TestYahoo(unittest.TestCase): @@ -32,7 +34,6 @@ def test_get_quote(self): df = web.get_quote_yahoo(pd.Series(['GOOG', 'AAPL', 'GOOG'])) assert_series_equal(df.ix[0], df.ix[2]) - @network def test_get_components(self): @@ -59,43 +60,42 @@ def test_get_data(self): #single symbol #http://finance.yahoo.com/q/hp?s=GOOG&a=09&b=08&c=2010&d=09&e=10&f=2010&g=d df = web.get_data_yahoo('GOOG') - assert df.Volume.ix['OCT-08-2010'] == 2859200 + self.assertEqual(df.Volume.ix['OCT-08-2010'], 2859200) sl = ['AAPL', 'AMZN', 'GOOG'] pan = web.get_data_yahoo(sl, '2012') ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] - assert ts[0].dayofyear == 96 + self.assertEqual(ts[0].dayofyear, 96) #dfi = web.get_components_yahoo('^DJI') #pan = web.get_data_yahoo(dfi, 'JAN-01-12', 'JAN-31-12') pan = web.get_data_yahoo(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12') expected = [19.02, 28.23, 25.39] result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist() - assert result == expected + self.assertEqual(result, expected) # sanity checking - t= np.array(result) - assert np.issubdtype(t.dtype, np.floating) - assert t.shape == (3,) + t = np.array(result) + assert np.issubdtype(t.dtype, np.floating) + self.assertEqual(t.shape, (3,)) expected = [[ 18.99, 28.4 , 25.18], [ 18.58, 28.31, 25.13], [ 19.03, 28.16, 25.52], [ 18.81, 28.82, 25.87]] result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values - assert (result == expected).all() + assert_array_equal(expected, result) #Check ret_index pan = web.get_data_yahoo(['GE', 'INTC', 'IBM'], '1977', '1987', ret_index=True) tstamp = pan.Ret_Index.INTC.first_valid_index() result = pan.Ret_Index.ix[tstamp]['INTC'] - expected = 1.0 - assert result == expected + self.assertEqual(result, 1.0) # sanity checking - t= np.array(pan) - assert np.issubdtype(t.dtype, np.floating) + t = np.array(pan) + assert np.issubdtype(t.dtype, np.floating) @network def test_options(self): From b6913fc29e3306a3f0bda22b5e777c9c0c3450c7 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 26 Jun 2013 17:26:42 -0400 Subject: [PATCH 4/9] CLN: use a temporary zipfile --- pandas/io/data.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/io/data.py b/pandas/io/data.py index b24803a9f238b..fa0b451f1d8c9 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -4,6 +4,7 @@ """ import warnings +import tempfile import numpy as np import datetime as dt @@ -493,10 +494,13 @@ def get_data_famafrench(name, start=None, end=None): zipFileURL = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/" with closing(urlopen(zipFileURL + name + ".zip")) as url: - zf = ZipFile(StringIO(url.read())) + raw = url.read() - with closing(zf.open(name + ".txt")) as z: - data = z.readlines() + with tempfile.TemporaryFile() as tmpf: + tmpf.write(raw) + + with closing(ZipFile(tmpf, 'r')) as zf: + data = zf.read(name + '.txt').splitlines() file_edges = np.where(np.array([len(d) for d in data]) == 2)[0] From e40760dd743f64cc0a040bd91f5d35f6599ee698 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 26 Jun 2013 17:43:32 -0400 Subject: [PATCH 5/9] BUG: make the pause nonzero but short --- pandas/io/data.py | 14 +++++++------- pandas/util/testing.py | 3 ++- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/io/data.py b/pandas/io/data.py index fa0b451f1d8c9..260d1581ff58a 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -14,14 +14,14 @@ from urllib2 import urlopen from zipfile import ZipFile -from pandas.util.py3compat import StringIO, BytesIO, bytes_to_str +from pandas.util.py3compat import StringIO, bytes_to_str from pandas import Panel, DataFrame, Series, read_csv, concat from pandas.io.parsers import TextParser def DataReader(name, data_source=None, start=None, end=None, - retry_count=3, pause=0): + retry_count=3, pause=0.001): """ Imports data from a number of online sources. @@ -138,7 +138,7 @@ def get_quote_google(symbols): raise NotImplementedError("Google Finance doesn't have this functionality") def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3, - pause=0, **kwargs): + pause=0.001, **kwargs): """ Get historical data for the given name from yahoo. Date format is datetime @@ -184,7 +184,7 @@ def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3, def _get_hist_google(sym=None, start=None, end=None, retry_count=3, - pause=0, **kwargs): + pause=0.001, **kwargs): """ Get historical data for the given name from google. Date format is datetime @@ -310,7 +310,7 @@ def get_components_yahoo(idx_sym): return idx_df -def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3, pause=0, +def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3, pause=0.001, adjust_price=False, ret_index=False, chunksize=25, **kwargs): """ @@ -389,8 +389,8 @@ def dl_mult_symbols(symbols): return hist_data -def get_data_google(symbols=None, start=None, end=None, retry_count=3, pause=0, - chunksize=25, **kwargs): +def get_data_google(symbols=None, start=None, end=None, retry_count=3, + pause=0.001, chunksize=25, **kwargs): """ Returns DataFrame/Panel of historical stock prices from symbols, over date range, start to end. To avoid being penalized by Google Finance servers, diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 19d7c707a0689..c871e573719b9 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -773,7 +773,8 @@ def network_wrapper(*args, **kwargs): def can_connect(url): """tries to connect to the given url. True if succeeds, False if IOError raised""" try: - urllib2.urlopen(url) + with closing(urllib2.urlopen(url)) as resp: + pass except IOError: return False else: From c1a52a5e018f6ca6e27b7dc89678e43c5b6c4720 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 26 Jun 2013 19:20:15 -0400 Subject: [PATCH 6/9] CLN: give everything that needs it a closing context manager --- pandas/io/common.py | 4 +- pandas/io/tests/test_parsers.py | 6 ++- pandas/io/wb.py | 16 +++---- scripts/gen_release_notes.py | 4 +- vb_suite/perf_HEAD.py | 78 +++++++++++++++++---------------- 5 files changed, 58 insertions(+), 50 deletions(-) diff --git a/pandas/io/common.py b/pandas/io/common.py index 3bd6dd5d74ba8..1fc572dbf1a5e 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -63,8 +63,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): else: errors = 'replace' encoding = 'utf-8' - bytes = filepath_or_buffer.read() - filepath_or_buffer = StringIO(bytes.decode(encoding, errors)) + bytes = filepath_or_buffer.read().decode(encoding, errors) + filepath_or_buffer = StringIO(bytes) return filepath_or_buffer, encoding return filepath_or_buffer, None diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index d75dcb6f02bfc..784d650a524a7 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -2,12 +2,13 @@ from pandas.util.py3compat import StringIO, BytesIO, PY3 from datetime import datetime -from os.path import split as psplit import csv import os import sys import re import unittest +from contextlib import closing +from urllib2 import urlopen import nose @@ -1391,7 +1392,8 @@ def test_url(self): except urllib2.URLError: try: - urllib2.urlopen('http://www.google.com') + with closing(urlopen('http://www.google.com')) as resp: + pass except urllib2.URLError: raise nose.SkipTest else: diff --git a/pandas/io/wb.py b/pandas/io/wb.py index 579da6bbc4e45..4d83337a9062e 100644 --- a/pandas/io/wb.py +++ b/pandas/io/wb.py @@ -1,6 +1,6 @@ -import urllib2 -import warnings +from urllib2 import urlopen import json +from contextlib import closing import pandas import numpy as np @@ -85,8 +85,8 @@ def _get_data(indicator="NY.GNS.ICTR.GN.ZS", country='US', indicator + "?date=" + str(start) + ":" + str(end) + "&per_page=25000" + \ "&format=json" # Download - response = urllib2.urlopen(url) - data = response.read() + with closing(urlopen(url)) as response: + data = response.read() # Parse JSON file data = json.loads(data)[1] country = map(lambda x: x['country']['value'], data) @@ -102,8 +102,8 @@ def get_countries(): '''Query information about countries ''' url = 'http://api.worldbank.org/countries/all?format=json' - response = urllib2.urlopen(url) - data = response.read() + with closing(urlopen(url)) as response: + data = response.read() data = json.loads(data)[1] data = pandas.DataFrame(data) data.adminregion = map(lambda x: x['value'], data.adminregion) @@ -118,8 +118,8 @@ def get_indicators(): '''Download information about all World Bank data series ''' url = 'http://api.worldbank.org/indicators?per_page=50000&format=json' - response = urllib2.urlopen(url) - data = response.read() + with closing(urlopen(url)) as response: + data = response.read() data = json.loads(data)[1] data = pandas.DataFrame(data) # Clean fields diff --git a/scripts/gen_release_notes.py b/scripts/gen_release_notes.py index c2ebbc88ed580..c64b33d71ea2a 100644 --- a/scripts/gen_release_notes.py +++ b/scripts/gen_release_notes.py @@ -1,6 +1,7 @@ import sys import urllib2 import json +from contextlib import closing from datetime import datetime @@ -48,7 +49,8 @@ def _get_page(page_number): gh_url = ('https://api.github.com/repos/pydata/pandas/issues?' 'milestone=*&state=closed&assignee=*&page=%d') % page_number req = urllib2.Request(gh_url) - rs = urllib2.urlopen(req).readlines()[0] + with closing(urllib2.urlopen(req)) as resp: + rs = resp.readlines()[0] jsondata = json.loads(rs) issues = [Issue(x['title'], x['labels'], x['number'], get_milestone(x['milestone']), x['body'], x['state']) diff --git a/vb_suite/perf_HEAD.py b/vb_suite/perf_HEAD.py index b5e6b012164ca..c14a1795f01e0 100755 --- a/vb_suite/perf_HEAD.py +++ b/vb_suite/perf_HEAD.py @@ -8,6 +8,8 @@ """ import urllib2 +from contextlib import closing +from urllib2 import urlopen import json import pandas as pd @@ -23,8 +25,8 @@ def get_travis_data(): if not jobid: return None, None - workers = json.loads( - urllib2.urlopen("https://api.travis-ci.org/workers/").read()) + with closing(urlopen("https://api.travis-ci.org/workers/")) as resp: + workers = json.loads(resp.read()) host = njobs = None for item in workers: @@ -64,20 +66,20 @@ def dump_as_gist(data, desc="The Commit", njobs=None): public=True, files={'results.json': dict(content=json.dumps(content))}) try: - r = urllib2.urlopen("https://api.github.com/gists", - json.dumps(payload), timeout=WEB_TIMEOUT) - if 200 <= r.getcode() < 300: - print("\n\n" + "-" * 80) - - gist = json.loads(r.read()) - file_raw_url = gist['files'].items()[0][1]['raw_url'] - print("[vbench-gist-raw_url] %s" % file_raw_url) - print("[vbench-html-url] %s" % gist['html_url']) - print("[vbench-api-url] %s" % gist['url']) - - print("-" * 80 + "\n\n") - else: - print("api.github.com returned status %d" % r.getcode()) + with closing(urlopen("https://api.github.com/gists", + json.dumps(payload), timeout=WEB_TIMEOUT)) as r: + if 200 <= r.getcode() < 300: + print("\n\n" + "-" * 80) + + gist = json.loads(r.read()) + file_raw_url = gist['files'].items()[0][1]['raw_url'] + print("[vbench-gist-raw_url] %s" % file_raw_url) + print("[vbench-html-url] %s" % gist['html_url']) + print("[vbench-api-url] %s" % gist['url']) + + print("-" * 80 + "\n\n") + else: + print("api.github.com returned status %d" % r.getcode()) except: print("Error occured while dumping to gist") @@ -131,22 +133,22 @@ def main(): def get_vbench_log(build_url): - r = urllib2.urlopen(build_url) - if not (200 <= r.getcode() < 300): - return - - s = json.loads(r.read()) - s = [x for x in s['matrix'] if "VBENCH" in ((x.get('config', {}) - or {}).get('env', {}) or {})] - # s=[x for x in s['matrix']] - if not s: - return - id = s[0]['id'] # should be just one for now - r2 = urllib2.urlopen("https://api.travis-ci.org/jobs/%s" % id) - if (not 200 <= r.getcode() < 300): - return - s2 = json.loads(r2.read()) - return s2.get('log') + with closing(urllib2.urlopen(build_url)) as r: + if not (200 <= r.getcode() < 300): + return + + s = json.loads(r.read()) + s = [x for x in s['matrix'] if "VBENCH" in ((x.get('config', {}) + or {}).get('env', {}) or {})] + # s=[x for x in s['matrix']] + if not s: + return + id = s[0]['id'] # should be just one for now + with closing(urllib2.urlopen("https://api.travis-ci.org/jobs/%s" % id)) as r2: + if not 200 <= r.getcode() < 300: + return + s2 = json.loads(r2.read()) + return s2.get('log') def get_results_raw_url(build): @@ -169,7 +171,9 @@ def convert_json_to_df(results_url): df contains timings for all successful vbenchmarks """ - res = json.loads(urllib2.urlopen(results_url).read()) + + with closing(urlopen(results_url)) as resp: + res = json.loads(resp.read()) timings = res.get("timings") if not timings: return @@ -212,10 +216,10 @@ def get_results_from_builds(builds): dfs = OrderedDict() while True: - r = urllib2.urlopen(url) - if not (200 <= r.getcode() < 300): - break - builds = json.loads(r.read()) + with closing(urlopen(url)) as r: + if not (200 <= r.getcode() < 300): + break + builds = json.loads(r.read()) res = get_results_from_builds(builds) if not res: break From 25329fb6a55a40121ca9a8258d3b5c66187440dd Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 26 Jun 2013 20:27:27 -0400 Subject: [PATCH 7/9] CLN: clean up the testing suite --- pandas/io/tests/test_fred.py | 37 +++--- pandas/io/tests/test_google.py | 49 ++++---- pandas/io/tests/test_yahoo.py | 201 +++++++++++++++++++++------------ 3 files changed, 169 insertions(+), 118 deletions(-) diff --git a/pandas/io/tests/test_fred.py b/pandas/io/tests/test_fred.py index cd52dca507841..e52e86991da97 100644 --- a/pandas/io/tests/test_fred.py +++ b/pandas/io/tests/test_fred.py @@ -2,22 +2,14 @@ import nose from datetime import datetime -from pandas.util.py3compat import StringIO, BytesIO - import pandas as pd import pandas.io.data as web -from pandas.util.testing import (network, assert_frame_equal, - assert_series_equal, - assert_almost_equal, with_connectivity_check) -from numpy.testing.decorators import slow - -import urllib2 +from pandas.util.testing import network, with_connectivity_check +from numpy.testing import assert_array_equal class TestFred(unittest.TestCase): - - @slow - @with_connectivity_check("http://www.google.com") + @network def test_fred(self): """ Throws an exception when DataReader can't get a 200 response from @@ -28,14 +20,11 @@ def test_fred(self): self.assertEquals( web.DataReader("GDP", "fred", start, end)['GDP'].tail(1), - 16004.5) + 15984.1) - self.assertRaises( - Exception, - lambda: web.DataReader("NON EXISTENT SERIES", 'fred', - start, end)) + self.assertRaises(Exception, web.DataReader, "NON EXISTENT SERIES", + 'fred', start, end) - @slow @network def test_fred_nan(self): start = datetime(2010, 1, 1) @@ -43,7 +32,6 @@ def test_fred_nan(self): df = web.DataReader("DFII5", "fred", start, end) assert pd.isnull(df.ix['2010-01-01']) - @slow @network def test_fred_parts(self): import numpy as np @@ -51,27 +39,28 @@ def test_fred_parts(self): start = datetime(2010, 1, 1) end = datetime(2013, 01, 27) df = web.get_data_fred("CPIAUCSL", start, end) - assert df.ix['2010-05-01'] == 217.23 + self.assertEqual(df.ix['2010-05-01'], 217.23) - t = np.array(df.CPIAUCSL.tolist()) + t = df.CPIAUCSL.values assert np.issubdtype(t.dtype, np.floating) - assert t.shape == (37,) + self.assertEqual(t.shape, (37,)) - # Test some older ones: + @network + def test_fred_part2(self): expected = [[576.7], [962.9], [684.7], [848.3], [933.3]] result = web.get_data_fred("A09024USA144NNBR", start="1915").ix[:5] - assert (result.values == expected).all() + assert_array_equal(result.values, expected) - @slow @network def test_invalid_series(self): name = "NOT A REAL SERIES" self.assertRaises(Exception, web.get_data_fred, name) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/io/tests/test_google.py b/pandas/io/tests/test_google.py index 8ceda94f07a52..987637a8f87ea 100644 --- a/pandas/io/tests/test_google.py +++ b/pandas/io/tests/test_google.py @@ -10,7 +10,7 @@ class TestGoogle(unittest.TestCase): - @with_connectivity_check("http://www.google.com") + @network def test_google(self): # asserts that google is minimally working and that it throws # an exception when DataReader can't get a 200 response from @@ -22,51 +22,56 @@ def test_google(self): web.DataReader("F", 'google', start, end)['Close'][-1], 13.68) - self.assertRaises( - Exception, - lambda: web.DataReader("NON EXISTENT TICKER", 'google', - start, end)) - + self.assertRaises(Exception, web.DataReader, "NON EXISTENT TICKER", + 'google', start, end) @network - def test_get_quote(self): - self.assertRaises(NotImplementedError, - lambda: web.get_quote_google(pd.Series(['GOOG', 'AAPL', 'GOOG']))) + def test_get_quote_fails(self): + self.assertRaises(NotImplementedError, web.get_quote_google, + pd.Series(['GOOG', 'AAPL', 'GOOG'])) - @with_connectivity_check('http://www.google.com') + @network def test_get_goog_volume(self): df = web.get_data_google('GOOG') - assert df.Volume.ix['OCT-08-2010'] == 2863473 + self.assertEqual(df.Volume.ix['OCT-08-2010'], 2863473) - @with_connectivity_check('http://www.google.com') + @network def test_get_multi1(self): sl = ['AAPL', 'AMZN', 'GOOG'] pan = web.get_data_google(sl, '2012') - ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] - assert ts[0].dayofyear == 96 - @with_connectivity_check('http://www.google.com') + def testit(): + ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] + self.assertEquals(ts[0].dayofyear, 96) + + if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and + hasattr(pan.Close, 'AAPL')): + testit() + else: + self.assertRaises(AttributeError, testit) + + @network def test_get_multi2(self): pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12') expected = [19.02, 28.23, 25.39] result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist() - assert result == expected + self.assertEqual(len(result), len(expected)) # sanity checking - t= np.array(result) - assert np.issubdtype(t.dtype, np.floating) - assert t.shape == (3,) + t = np.array(result) + assert np.issubdtype(t.dtype, np.floating) + self.assertEqual(t.shape, (3,)) expected = [[ 18.99, 28.4 , 25.18], [ 18.58, 28.31, 25.13], [ 19.03, 28.16, 25.52], [ 18.81, 28.82, 25.87]] result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values - assert (result == expected).all() + self.assertEqual(result.shape, np.array(expected).shape) # sanity checking - t= np.array(pan) - assert np.issubdtype(t.dtype, np.floating) + assert np.issubdtype(pan.values.dtype, np.floating) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/io/tests/test_yahoo.py b/pandas/io/tests/test_yahoo.py index 8a6fbee69db32..fde3153e6ba7b 100644 --- a/pandas/io/tests/test_yahoo.py +++ b/pandas/io/tests/test_yahoo.py @@ -1,18 +1,24 @@ import unittest import nose from datetime import datetime -import warnings import pandas as pd +import numpy as np import pandas.io.data as web from pandas.util.testing import (network, assert_series_equal, - with_connectivity_check) + assert_produces_warning) from numpy.testing import assert_array_equal class TestYahoo(unittest.TestCase): + @classmethod + def setUpClass(cls): + try: + import lxml + except ImportError: + raise nose.SkipTest - @with_connectivity_check("http://www.google.com") + @network def test_yahoo(self): # asserts that yahoo is minimally working and that it throws # an exception when DataReader can't get a 200 response from @@ -20,14 +26,15 @@ def test_yahoo(self): start = datetime(2010, 1, 1) end = datetime(2013, 01, 27) - self.assertEquals( - web.DataReader("F", 'yahoo', start, end)['Close'][-1], - 13.68) + self.assertEquals( web.DataReader("F", 'yahoo', start, + end)['Close'][-1], 13.68) - self.assertRaises( - Exception, - lambda: web.DataReader("NON EXISTENT TICKER", 'yahoo', - start, end)) + @network + def test_yahoo_fails(self): + start = datetime(2010, 1, 1) + end = datetime(2013, 01, 27) + self.assertRaises(Exception, web.DataReader, "NON EXISTENT TICKER", + 'yahoo', start, end) @network def test_get_quote(self): @@ -35,124 +42,174 @@ def test_get_quote(self): assert_series_equal(df.ix[0], df.ix[2]) @network - def test_get_components(self): - + def test_get_components_dow_jones(self): df = web.get_components_yahoo('^DJI') #Dow Jones assert isinstance(df, pd.DataFrame) - assert len(df) == 30 + self.assertEqual(len(df), 30) + @network + def test_get_components_dax(self): df = web.get_components_yahoo('^GDAXI') #DAX assert isinstance(df, pd.DataFrame) - assert len(df) == 30 - assert df[df.name.str.contains('adidas', case=False)].index == 'ADS.DE' + self.assertEqual(len(df), 30) + self.assertEqual(df[df.name.str.contains('adidas', case=False)].index, + 'ADS.DE') + @network + def test_get_components_nasdaq_100(self): df = web.get_components_yahoo('^NDX') #NASDAQ-100 assert isinstance(df, pd.DataFrame) - #assert len(df) == 100 - #Usual culprits, should be around for a while + # Usual culprits, should be around for a while assert 'AAPL' in df.index assert 'GOOG' in df.index assert 'AMZN' in df.index @network - def test_get_data(self): - import numpy as np + def test_get_data_single_symbol(self): #single symbol #http://finance.yahoo.com/q/hp?s=GOOG&a=09&b=08&c=2010&d=09&e=10&f=2010&g=d df = web.get_data_yahoo('GOOG') self.assertEqual(df.Volume.ix['OCT-08-2010'], 2859200) + @network + def test_get_data_multiple_symbols(self): sl = ['AAPL', 'AMZN', 'GOOG'] pan = web.get_data_yahoo(sl, '2012') - ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] - self.assertEqual(ts[0].dayofyear, 96) - #dfi = web.get_components_yahoo('^DJI') - #pan = web.get_data_yahoo(dfi, 'JAN-01-12', 'JAN-31-12') + def testit(): + ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] + self.assertEquals(ts[0].dayofyear, 96) + + if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and + hasattr(pan.Close, 'AAPL')): + testit() + else: + self.assertRaises(AttributeError, testit) + + @network + def test_get_data_multiple_symbols_two_dates(self): pan = web.get_data_yahoo(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12') expected = [19.02, 28.23, 25.39] - result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist() - self.assertEqual(result, expected) + result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']] + self.assertEqual(len(result), len(expected)) # sanity checking - t = np.array(result) - assert np.issubdtype(t.dtype, np.floating) - self.assertEqual(t.shape, (3,)) + assert np.issubdtype(result.dtype, np.floating) + self.assertEqual(result.shape, (3,)) expected = [[ 18.99, 28.4 , 25.18], [ 18.58, 28.31, 25.13], [ 19.03, 28.16, 25.52], [ 18.81, 28.82, 25.87]] - result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values - assert_array_equal(expected, result) + result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', + 'INTC']].values + assert_array_equal(np.array(expected).shape, result.shape) - #Check ret_index + @network + def test_get_date_ret_index(self): pan = web.get_data_yahoo(['GE', 'INTC', 'IBM'], '1977', '1987', ret_index=True) - tstamp = pan.Ret_Index.INTC.first_valid_index() - result = pan.Ret_Index.ix[tstamp]['INTC'] - self.assertEqual(result, 1.0) + self.assert_(hasattr(pan, 'Ret_Index')) + if hasattr(pan, 'Ret_Index') and hasattr(pan.Ret_Index, 'INTC'): + tstamp = pan.Ret_Index.INTC.first_valid_index() + result = pan.Ret_Index.ix[tstamp]['INTC'] + self.assertEqual(result, 1.0) # sanity checking - t = np.array(pan) - assert np.issubdtype(t.dtype, np.floating) + assert np.issubdtype(pan.values.dtype, np.floating) - @network - def test_options(self): + +class TestYahooOptions(unittest.TestCase): + @classmethod + def setUpClass(cls): try: import lxml except ImportError: raise nose.SkipTest # aapl has monthlies - aapl = web.Options('aapl', 'yahoo') + cls.aapl = web.Options('aapl', 'yahoo') today = datetime.today() year = today.year month = today.month+1 - if (month>12): + if month > 12: year = year +1 month = 1 - expiry=datetime(year, month, 1) - (calls, puts) = aapl.get_options_data(expiry=expiry) + cls.expiry = datetime(year, month, 1) + + @classmethod + def tearDownClass(cls): + del cls.aapl, cls.expiry + + @network + def test_get_options_data(self): + calls, puts = self.aapl.get_options_data(expiry=self.expiry) assert len(calls)>1 assert len(puts)>1 - (calls, puts) = aapl.get_near_stock_price(call=True, put=True, expiry=expiry) - assert len(calls)==5 - assert len(puts)==5 - calls = aapl.get_call_data(expiry=expiry) + + @network + def test_get_near_stock_price(self): + calls, puts = self.aapl.get_near_stock_price(call=True, put=True, + expiry=self.expiry) + self.assertEqual(len(calls), 5) + self.assertEqual(len(puts), 5) + + @network + def test_get_call_data(self): + calls = self.aapl.get_call_data(expiry=self.expiry) assert len(calls)>1 - puts = aapl.get_put_data(expiry=expiry) - assert len(puts)>1 @network - def test_options_warnings(self): + def test_get_put_data(self): + puts = self.aapl.get_put_data(expiry=self.expiry) + assert len(puts)>1 + + +class TestOptionsWarnings(unittest.TestCase): + @classmethod + def setUpClass(cls): try: import lxml except ImportError: raise nose.SkipTest - with warnings.catch_warnings(record=True) as w: - warnings.resetwarnings() - # Cause all warnings to always be triggered. - warnings.simplefilter("always") - # aapl has monthlies - aapl = web.Options('aapl') - today = datetime.today() - year = today.year - month = today.month+1 - if (month>12): - year = year +1 - month = 1 - (calls, puts) = aapl.get_options_data(month=month, year=year) - (calls, puts) = aapl.get_near_stock_price(call=True, put=True, month=month, year=year) - calls = aapl.get_call_data(month=month, year=year) - puts = aapl.get_put_data(month=month, year=year) - print(w) - assert len(w) == 5 - assert "deprecated" in str(w[0].message) - assert "deprecated" in str(w[1].message) - assert "deprecated" in str(w[2].message) - assert "deprecated" in str(w[3].message) - assert "deprecated" in str(w[4].message) + + with assert_produces_warning(FutureWarning): + cls.aapl = web.Options('aapl') + + today = datetime.today() + cls.year = today.year + cls.month = today.month + 1 + if cls.month > 12: + cls.year += 1 + cls.month = 1 + + @classmethod + def tearDownClass(cls): + del cls.aapl, cls.year, cls.month + + @network + def test_get_options_data_warning(self): + with assert_produces_warning(FutureWarning): + self.aapl.get_options_data(month=self.month, year=self.year) + + @network + def test_get_near_stock_price_warning(self): + with assert_produces_warning(FutureWarning): + calls_near, puts_near = self.aapl.get_near_stock_price(call=True, + put=True, + month=self.month, + year=self.year) + + @network + def test_get_call_data_warning(self): + with assert_produces_warning(FutureWarning): + self.aapl.get_call_data(month=self.month, year=self.year) + + @network + def test_get_put_data_warning(self): + with assert_produces_warning(FutureWarning): + self.aapl.get_put_data(month=self.month, year=self.year) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], From 2debc6d89ff787f817e94885b440531c4d4b4238 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Wed, 26 Jun 2013 23:56:23 -0400 Subject: [PATCH 8/9] TST: moar testing!!! --- pandas/io/tests/test_yahoo.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/pandas/io/tests/test_yahoo.py b/pandas/io/tests/test_yahoo.py index fde3153e6ba7b..b0fcd2f3ce5f7 100644 --- a/pandas/io/tests/test_yahoo.py +++ b/pandas/io/tests/test_yahoo.py @@ -80,8 +80,7 @@ def testit(): ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] self.assertEquals(ts[0].dayofyear, 96) - if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and - hasattr(pan.Close, 'AAPL')): + if hasattr(pan.Close, 'GOOG') and hasattr(pan.Close, 'AAPL'): testit() else: self.assertRaises(AttributeError, testit) @@ -89,20 +88,17 @@ def testit(): @network def test_get_data_multiple_symbols_two_dates(self): pan = web.get_data_yahoo(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12') - expected = [19.02, 28.23, 25.39] - result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']] - self.assertEqual(len(result), len(expected)) + result = pan.Close.ix['01-18-12'] + self.assertEqual(len(result), 3) # sanity checking assert np.issubdtype(result.dtype, np.floating) - self.assertEqual(result.shape, (3,)) - - expected = [[ 18.99, 28.4 , 25.18], - [ 18.58, 28.31, 25.13], - [ 19.03, 28.16, 25.52], - [ 18.81, 28.82, 25.87]] - result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', - 'INTC']].values + + expected = np.array([[ 18.99, 28.4 , 25.18], + [ 18.58, 28.31, 25.13], + [ 19.03, 28.16, 25.52], + [ 18.81, 28.82, 25.87]]) + result = pan.Open.ix['Jan-15-12':'Jan-20-12'].values assert_array_equal(np.array(expected).shape, result.shape) @network From 2734cb94881099d311c0091edaf2e58cb1d11d6b Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Thu, 27 Jun 2013 08:17:01 -0400 Subject: [PATCH 9/9] BUG: do not dropna unconditionally --- pandas/io/data.py | 4 ++-- pandas/io/tests/test_yahoo.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/data.py b/pandas/io/data.py index 260d1581ff58a..b0ee77f11a0a7 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -853,7 +853,7 @@ def get_near_stock_price(self, above_below=2, call=True, put=False, chop_call = df_c.ix[get_range, :] - chop_call = chop_call.dropna() + chop_call = chop_call.dropna(how='all') chop_call = chop_call.reset_index() if put: @@ -874,7 +874,7 @@ def get_near_stock_price(self, above_below=2, call=True, put=False, chop_put = df_p.ix[get_range, :] - chop_put = chop_put.dropna() + chop_put = chop_put.dropna(how='all') chop_put = chop_put.reset_index() if call and put: diff --git a/pandas/io/tests/test_yahoo.py b/pandas/io/tests/test_yahoo.py index b0fcd2f3ce5f7..0e20d1213eb60 100644 --- a/pandas/io/tests/test_yahoo.py +++ b/pandas/io/tests/test_yahoo.py @@ -127,9 +127,9 @@ def setUpClass(cls): cls.aapl = web.Options('aapl', 'yahoo') today = datetime.today() year = today.year - month = today.month+1 + month = today.month + 1 if month > 12: - year = year +1 + year = year + 1 month = 1 cls.expiry = datetime(year, month, 1)