diff --git a/pandas/io/common.py b/pandas/io/common.py index 3bd6dd5d74ba8..1fc572dbf1a5e 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -63,8 +63,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): else: errors = 'replace' encoding = 'utf-8' - bytes = filepath_or_buffer.read() - filepath_or_buffer = StringIO(bytes.decode(encoding, errors)) + bytes = filepath_or_buffer.read().decode(encoding, errors) + filepath_or_buffer = StringIO(bytes) return filepath_or_buffer, encoding return filepath_or_buffer, None diff --git a/pandas/io/data.py b/pandas/io/data.py index 9cf5eeb1fed4e..b0ee77f11a0a7 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -4,6 +4,7 @@ """ import warnings +import tempfile import numpy as np import datetime as dt @@ -13,14 +14,14 @@ from urllib2 import urlopen from zipfile import ZipFile -from pandas.util.py3compat import StringIO, BytesIO, bytes_to_str +from pandas.util.py3compat import StringIO, bytes_to_str from pandas import Panel, DataFrame, Series, read_csv, concat from pandas.io.parsers import TextParser def DataReader(name, data_source=None, start=None, end=None, - retry_count=3, pause=0): + retry_count=3, pause=0.001): """ Imports data from a number of online sources. @@ -137,7 +138,7 @@ def get_quote_google(symbols): raise NotImplementedError("Google Finance doesn't have this functionality") def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3, - pause=0, **kwargs): + pause=0.001, **kwargs): """ Get historical data for the given name from yahoo. Date format is datetime @@ -183,7 +184,7 @@ def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3, def _get_hist_google(sym=None, start=None, end=None, retry_count=3, - pause=0, **kwargs): + pause=0.001, **kwargs): """ Get historical data for the given name from google. Date format is datetime @@ -309,7 +310,7 @@ def get_components_yahoo(idx_sym): return idx_df -def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3, pause=0, +def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3, pause=0.001, adjust_price=False, ret_index=False, chunksize=25, **kwargs): """ @@ -388,8 +389,8 @@ def dl_mult_symbols(symbols): return hist_data -def get_data_google(symbols=None, start=None, end=None, retry_count=3, pause=0, - chunksize=25, **kwargs): +def get_data_google(symbols=None, start=None, end=None, retry_count=3, + pause=0.001, chunksize=25, **kwargs): """ Returns DataFrame/Panel of historical stock prices from symbols, over date range, start to end. To avoid being penalized by Google Finance servers, @@ -493,8 +494,13 @@ def get_data_famafrench(name, start=None, end=None): zipFileURL = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/" with closing(urlopen(zipFileURL + name + ".zip")) as url: - with closing(ZipFile(StringIO(url.read()))) as zf: - data = zf.read(name + ".txt").splitlines() + raw = url.read() + + with tempfile.TemporaryFile() as tmpf: + tmpf.write(raw) + + with closing(ZipFile(tmpf, 'r')) as zf: + data = zf.read(name + '.txt').splitlines() file_edges = np.where(np.array([len(d) for d in data]) == 2)[0] @@ -847,7 +853,7 @@ def get_near_stock_price(self, above_below=2, call=True, put=False, chop_call = df_c.ix[get_range, :] - chop_call = chop_call.dropna() + chop_call = chop_call.dropna(how='all') chop_call = chop_call.reset_index() if put: @@ -868,7 +874,7 @@ def get_near_stock_price(self, above_below=2, call=True, put=False, chop_put = df_p.ix[get_range, :] - chop_put = chop_put.dropna() + chop_put = chop_put.dropna(how='all') chop_put = chop_put.reset_index() if call and put: diff --git a/pandas/io/tests/test_data_reader.py b/pandas/io/tests/test_data_reader.py new file mode 100644 index 0000000000000..129e35921335c --- /dev/null +++ b/pandas/io/tests/test_data_reader.py @@ -0,0 +1,30 @@ +import unittest + +from pandas.core.generic import PandasObject +from pandas.io.data import DataReader +from pandas.util.testing import network + + +class TestDataReader(unittest.TestCase): + @network + def test_read_yahoo(self): + gs = DataReader("GS", "yahoo") + assert isinstance(gs, PandasObject) + + @network + def test_read_google(self): + gs = DataReader("GS", "google") + assert isinstance(gs, PandasObject) + + @network + def test_read_fred(self): + vix = DataReader("VIXCLS", "fred") + assert isinstance(vix, PandasObject) + + @network + def test_read_famafrench(self): + for name in ("F-F_Research_Data_Factors", + "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3", + "F-F_ST_Reversal_Factor"): + ff = DataReader(name, "famafrench") + assert isinstance(ff, dict) diff --git a/pandas/io/tests/test_fred.py b/pandas/io/tests/test_fred.py index cd52dca507841..e52e86991da97 100644 --- a/pandas/io/tests/test_fred.py +++ b/pandas/io/tests/test_fred.py @@ -2,22 +2,14 @@ import nose from datetime import datetime -from pandas.util.py3compat import StringIO, BytesIO - import pandas as pd import pandas.io.data as web -from pandas.util.testing import (network, assert_frame_equal, - assert_series_equal, - assert_almost_equal, with_connectivity_check) -from numpy.testing.decorators import slow - -import urllib2 +from pandas.util.testing import network, with_connectivity_check +from numpy.testing import assert_array_equal class TestFred(unittest.TestCase): - - @slow - @with_connectivity_check("http://www.google.com") + @network def test_fred(self): """ Throws an exception when DataReader can't get a 200 response from @@ -28,14 +20,11 @@ def test_fred(self): self.assertEquals( web.DataReader("GDP", "fred", start, end)['GDP'].tail(1), - 16004.5) + 15984.1) - self.assertRaises( - Exception, - lambda: web.DataReader("NON EXISTENT SERIES", 'fred', - start, end)) + self.assertRaises(Exception, web.DataReader, "NON EXISTENT SERIES", + 'fred', start, end) - @slow @network def test_fred_nan(self): start = datetime(2010, 1, 1) @@ -43,7 +32,6 @@ def test_fred_nan(self): df = web.DataReader("DFII5", "fred", start, end) assert pd.isnull(df.ix['2010-01-01']) - @slow @network def test_fred_parts(self): import numpy as np @@ -51,27 +39,28 @@ def test_fred_parts(self): start = datetime(2010, 1, 1) end = datetime(2013, 01, 27) df = web.get_data_fred("CPIAUCSL", start, end) - assert df.ix['2010-05-01'] == 217.23 + self.assertEqual(df.ix['2010-05-01'], 217.23) - t = np.array(df.CPIAUCSL.tolist()) + t = df.CPIAUCSL.values assert np.issubdtype(t.dtype, np.floating) - assert t.shape == (37,) + self.assertEqual(t.shape, (37,)) - # Test some older ones: + @network + def test_fred_part2(self): expected = [[576.7], [962.9], [684.7], [848.3], [933.3]] result = web.get_data_fred("A09024USA144NNBR", start="1915").ix[:5] - assert (result.values == expected).all() + assert_array_equal(result.values, expected) - @slow @network def test_invalid_series(self): name = "NOT A REAL SERIES" self.assertRaises(Exception, web.get_data_fred, name) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/io/tests/test_google.py b/pandas/io/tests/test_google.py index 8ceda94f07a52..987637a8f87ea 100644 --- a/pandas/io/tests/test_google.py +++ b/pandas/io/tests/test_google.py @@ -10,7 +10,7 @@ class TestGoogle(unittest.TestCase): - @with_connectivity_check("http://www.google.com") + @network def test_google(self): # asserts that google is minimally working and that it throws # an exception when DataReader can't get a 200 response from @@ -22,51 +22,56 @@ def test_google(self): web.DataReader("F", 'google', start, end)['Close'][-1], 13.68) - self.assertRaises( - Exception, - lambda: web.DataReader("NON EXISTENT TICKER", 'google', - start, end)) - + self.assertRaises(Exception, web.DataReader, "NON EXISTENT TICKER", + 'google', start, end) @network - def test_get_quote(self): - self.assertRaises(NotImplementedError, - lambda: web.get_quote_google(pd.Series(['GOOG', 'AAPL', 'GOOG']))) + def test_get_quote_fails(self): + self.assertRaises(NotImplementedError, web.get_quote_google, + pd.Series(['GOOG', 'AAPL', 'GOOG'])) - @with_connectivity_check('http://www.google.com') + @network def test_get_goog_volume(self): df = web.get_data_google('GOOG') - assert df.Volume.ix['OCT-08-2010'] == 2863473 + self.assertEqual(df.Volume.ix['OCT-08-2010'], 2863473) - @with_connectivity_check('http://www.google.com') + @network def test_get_multi1(self): sl = ['AAPL', 'AMZN', 'GOOG'] pan = web.get_data_google(sl, '2012') - ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] - assert ts[0].dayofyear == 96 - @with_connectivity_check('http://www.google.com') + def testit(): + ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] + self.assertEquals(ts[0].dayofyear, 96) + + if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and + hasattr(pan.Close, 'AAPL')): + testit() + else: + self.assertRaises(AttributeError, testit) + + @network def test_get_multi2(self): pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12') expected = [19.02, 28.23, 25.39] result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist() - assert result == expected + self.assertEqual(len(result), len(expected)) # sanity checking - t= np.array(result) - assert np.issubdtype(t.dtype, np.floating) - assert t.shape == (3,) + t = np.array(result) + assert np.issubdtype(t.dtype, np.floating) + self.assertEqual(t.shape, (3,)) expected = [[ 18.99, 28.4 , 25.18], [ 18.58, 28.31, 25.13], [ 19.03, 28.16, 25.52], [ 18.81, 28.82, 25.87]] result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values - assert (result == expected).all() + self.assertEqual(result.shape, np.array(expected).shape) # sanity checking - t= np.array(pan) - assert np.issubdtype(t.dtype, np.floating) + assert np.issubdtype(pan.values.dtype, np.floating) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index d75dcb6f02bfc..784d650a524a7 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -2,12 +2,13 @@ from pandas.util.py3compat import StringIO, BytesIO, PY3 from datetime import datetime -from os.path import split as psplit import csv import os import sys import re import unittest +from contextlib import closing +from urllib2 import urlopen import nose @@ -1391,7 +1392,8 @@ def test_url(self): except urllib2.URLError: try: - urllib2.urlopen('http://www.google.com') + with closing(urlopen('http://www.google.com')) as resp: + pass except urllib2.URLError: raise nose.SkipTest else: diff --git a/pandas/io/tests/test_yahoo.py b/pandas/io/tests/test_yahoo.py index f2a55a4231c00..0e20d1213eb60 100644 --- a/pandas/io/tests/test_yahoo.py +++ b/pandas/io/tests/test_yahoo.py @@ -1,16 +1,24 @@ import unittest import nose from datetime import datetime -import warnings import pandas as pd +import numpy as np import pandas.io.data as web -from pandas.util.testing import network, assert_series_equal, with_connectivity_check +from pandas.util.testing import (network, assert_series_equal, + assert_produces_warning) +from numpy.testing import assert_array_equal class TestYahoo(unittest.TestCase): + @classmethod + def setUpClass(cls): + try: + import lxml + except ImportError: + raise nose.SkipTest - @with_connectivity_check("http://www.google.com") + @network def test_yahoo(self): # asserts that yahoo is minimally working and that it throws # an exception when DataReader can't get a 200 response from @@ -18,141 +26,186 @@ def test_yahoo(self): start = datetime(2010, 1, 1) end = datetime(2013, 01, 27) - self.assertEquals( - web.DataReader("F", 'yahoo', start, end)['Close'][-1], - 13.68) + self.assertEquals( web.DataReader("F", 'yahoo', start, + end)['Close'][-1], 13.68) - self.assertRaises( - Exception, - lambda: web.DataReader("NON EXISTENT TICKER", 'yahoo', - start, end)) + @network + def test_yahoo_fails(self): + start = datetime(2010, 1, 1) + end = datetime(2013, 01, 27) + self.assertRaises(Exception, web.DataReader, "NON EXISTENT TICKER", + 'yahoo', start, end) @network def test_get_quote(self): df = web.get_quote_yahoo(pd.Series(['GOOG', 'AAPL', 'GOOG'])) assert_series_equal(df.ix[0], df.ix[2]) - @network - def test_get_components(self): - + def test_get_components_dow_jones(self): df = web.get_components_yahoo('^DJI') #Dow Jones assert isinstance(df, pd.DataFrame) - assert len(df) == 30 + self.assertEqual(len(df), 30) + @network + def test_get_components_dax(self): df = web.get_components_yahoo('^GDAXI') #DAX assert isinstance(df, pd.DataFrame) - assert len(df) == 30 - assert df[df.name.str.contains('adidas', case=False)].index == 'ADS.DE' + self.assertEqual(len(df), 30) + self.assertEqual(df[df.name.str.contains('adidas', case=False)].index, + 'ADS.DE') + @network + def test_get_components_nasdaq_100(self): df = web.get_components_yahoo('^NDX') #NASDAQ-100 assert isinstance(df, pd.DataFrame) - #assert len(df) == 100 - #Usual culprits, should be around for a while + # Usual culprits, should be around for a while assert 'AAPL' in df.index assert 'GOOG' in df.index assert 'AMZN' in df.index @network - def test_get_data(self): - import numpy as np + def test_get_data_single_symbol(self): #single symbol #http://finance.yahoo.com/q/hp?s=GOOG&a=09&b=08&c=2010&d=09&e=10&f=2010&g=d df = web.get_data_yahoo('GOOG') - assert df.Volume.ix['OCT-08-2010'] == 2859200 + self.assertEqual(df.Volume.ix['OCT-08-2010'], 2859200) + @network + def test_get_data_multiple_symbols(self): sl = ['AAPL', 'AMZN', 'GOOG'] pan = web.get_data_yahoo(sl, '2012') - ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] - assert ts[0].dayofyear == 96 - #dfi = web.get_components_yahoo('^DJI') - #pan = web.get_data_yahoo(dfi, 'JAN-01-12', 'JAN-31-12') + def testit(): + ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] + self.assertEquals(ts[0].dayofyear, 96) + + if hasattr(pan.Close, 'GOOG') and hasattr(pan.Close, 'AAPL'): + testit() + else: + self.assertRaises(AttributeError, testit) + + @network + def test_get_data_multiple_symbols_two_dates(self): pan = web.get_data_yahoo(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12') - expected = [19.02, 28.23, 25.39] - result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist() - assert result == expected + result = pan.Close.ix['01-18-12'] + self.assertEqual(len(result), 3) # sanity checking - t= np.array(result) - assert np.issubdtype(t.dtype, np.floating) - assert t.shape == (3,) - - expected = [[ 18.99, 28.4 , 25.18], - [ 18.58, 28.31, 25.13], - [ 19.03, 28.16, 25.52], - [ 18.81, 28.82, 25.87]] - result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values - assert (result == expected).all() - - #Check ret_index + assert np.issubdtype(result.dtype, np.floating) + + expected = np.array([[ 18.99, 28.4 , 25.18], + [ 18.58, 28.31, 25.13], + [ 19.03, 28.16, 25.52], + [ 18.81, 28.82, 25.87]]) + result = pan.Open.ix['Jan-15-12':'Jan-20-12'].values + assert_array_equal(np.array(expected).shape, result.shape) + + @network + def test_get_date_ret_index(self): pan = web.get_data_yahoo(['GE', 'INTC', 'IBM'], '1977', '1987', ret_index=True) - tstamp = pan.Ret_Index.INTC.first_valid_index() - result = pan.Ret_Index.ix[tstamp]['INTC'] - expected = 1.0 - assert result == expected + self.assert_(hasattr(pan, 'Ret_Index')) + if hasattr(pan, 'Ret_Index') and hasattr(pan.Ret_Index, 'INTC'): + tstamp = pan.Ret_Index.INTC.first_valid_index() + result = pan.Ret_Index.ix[tstamp]['INTC'] + self.assertEqual(result, 1.0) # sanity checking - t= np.array(pan) - assert np.issubdtype(t.dtype, np.floating) + assert np.issubdtype(pan.values.dtype, np.floating) - @network - def test_options(self): + +class TestYahooOptions(unittest.TestCase): + @classmethod + def setUpClass(cls): try: import lxml except ImportError: raise nose.SkipTest # aapl has monthlies - aapl = web.Options('aapl', 'yahoo') + cls.aapl = web.Options('aapl', 'yahoo') today = datetime.today() year = today.year - month = today.month+1 - if (month>12): - year = year +1 + month = today.month + 1 + if month > 12: + year = year + 1 month = 1 - expiry=datetime(year, month, 1) - (calls, puts) = aapl.get_options_data(expiry=expiry) + cls.expiry = datetime(year, month, 1) + + @classmethod + def tearDownClass(cls): + del cls.aapl, cls.expiry + + @network + def test_get_options_data(self): + calls, puts = self.aapl.get_options_data(expiry=self.expiry) assert len(calls)>1 assert len(puts)>1 - (calls, puts) = aapl.get_near_stock_price(call=True, put=True, expiry=expiry) - assert len(calls)==5 - assert len(puts)==5 - calls = aapl.get_call_data(expiry=expiry) + + @network + def test_get_near_stock_price(self): + calls, puts = self.aapl.get_near_stock_price(call=True, put=True, + expiry=self.expiry) + self.assertEqual(len(calls), 5) + self.assertEqual(len(puts), 5) + + @network + def test_get_call_data(self): + calls = self.aapl.get_call_data(expiry=self.expiry) assert len(calls)>1 - puts = aapl.get_put_data(expiry=expiry) - assert len(puts)>1 @network - def test_options_warnings(self): + def test_get_put_data(self): + puts = self.aapl.get_put_data(expiry=self.expiry) + assert len(puts)>1 + + +class TestOptionsWarnings(unittest.TestCase): + @classmethod + def setUpClass(cls): try: import lxml except ImportError: raise nose.SkipTest - with warnings.catch_warnings(record=True) as w: - warnings.resetwarnings() - # Cause all warnings to always be triggered. - warnings.simplefilter("always") - # aapl has monthlies - aapl = web.Options('aapl') - today = datetime.today() - year = today.year - month = today.month+1 - if (month>12): - year = year +1 - month = 1 - (calls, puts) = aapl.get_options_data(month=month, year=year) - (calls, puts) = aapl.get_near_stock_price(call=True, put=True, month=month, year=year) - calls = aapl.get_call_data(month=month, year=year) - puts = aapl.get_put_data(month=month, year=year) - print(w) - assert len(w) == 5 - assert "deprecated" in str(w[0].message) - assert "deprecated" in str(w[1].message) - assert "deprecated" in str(w[2].message) - assert "deprecated" in str(w[3].message) - assert "deprecated" in str(w[4].message) + + with assert_produces_warning(FutureWarning): + cls.aapl = web.Options('aapl') + + today = datetime.today() + cls.year = today.year + cls.month = today.month + 1 + if cls.month > 12: + cls.year += 1 + cls.month = 1 + + @classmethod + def tearDownClass(cls): + del cls.aapl, cls.year, cls.month + + @network + def test_get_options_data_warning(self): + with assert_produces_warning(FutureWarning): + self.aapl.get_options_data(month=self.month, year=self.year) + + @network + def test_get_near_stock_price_warning(self): + with assert_produces_warning(FutureWarning): + calls_near, puts_near = self.aapl.get_near_stock_price(call=True, + put=True, + month=self.month, + year=self.year) + + @network + def test_get_call_data_warning(self): + with assert_produces_warning(FutureWarning): + self.aapl.get_call_data(month=self.month, year=self.year) + + @network + def test_get_put_data_warning(self): + with assert_produces_warning(FutureWarning): + self.aapl.get_put_data(month=self.month, year=self.year) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/io/wb.py b/pandas/io/wb.py index 579da6bbc4e45..4d83337a9062e 100644 --- a/pandas/io/wb.py +++ b/pandas/io/wb.py @@ -1,6 +1,6 @@ -import urllib2 -import warnings +from urllib2 import urlopen import json +from contextlib import closing import pandas import numpy as np @@ -85,8 +85,8 @@ def _get_data(indicator="NY.GNS.ICTR.GN.ZS", country='US', indicator + "?date=" + str(start) + ":" + str(end) + "&per_page=25000" + \ "&format=json" # Download - response = urllib2.urlopen(url) - data = response.read() + with closing(urlopen(url)) as response: + data = response.read() # Parse JSON file data = json.loads(data)[1] country = map(lambda x: x['country']['value'], data) @@ -102,8 +102,8 @@ def get_countries(): '''Query information about countries ''' url = 'http://api.worldbank.org/countries/all?format=json' - response = urllib2.urlopen(url) - data = response.read() + with closing(urlopen(url)) as response: + data = response.read() data = json.loads(data)[1] data = pandas.DataFrame(data) data.adminregion = map(lambda x: x['value'], data.adminregion) @@ -118,8 +118,8 @@ def get_indicators(): '''Download information about all World Bank data series ''' url = 'http://api.worldbank.org/indicators?per_page=50000&format=json' - response = urllib2.urlopen(url) - data = response.read() + with closing(urlopen(url)) as response: + data = response.read() data = json.loads(data)[1] data = pandas.DataFrame(data) # Clean fields diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 19d7c707a0689..c871e573719b9 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -773,7 +773,8 @@ def network_wrapper(*args, **kwargs): def can_connect(url): """tries to connect to the given url. True if succeeds, False if IOError raised""" try: - urllib2.urlopen(url) + with closing(urllib2.urlopen(url)) as resp: + pass except IOError: return False else: diff --git a/scripts/gen_release_notes.py b/scripts/gen_release_notes.py index c2ebbc88ed580..c64b33d71ea2a 100644 --- a/scripts/gen_release_notes.py +++ b/scripts/gen_release_notes.py @@ -1,6 +1,7 @@ import sys import urllib2 import json +from contextlib import closing from datetime import datetime @@ -48,7 +49,8 @@ def _get_page(page_number): gh_url = ('https://api.github.com/repos/pydata/pandas/issues?' 'milestone=*&state=closed&assignee=*&page=%d') % page_number req = urllib2.Request(gh_url) - rs = urllib2.urlopen(req).readlines()[0] + with closing(urllib2.urlopen(req)) as resp: + rs = resp.readlines()[0] jsondata = json.loads(rs) issues = [Issue(x['title'], x['labels'], x['number'], get_milestone(x['milestone']), x['body'], x['state']) diff --git a/vb_suite/perf_HEAD.py b/vb_suite/perf_HEAD.py index b5e6b012164ca..c14a1795f01e0 100755 --- a/vb_suite/perf_HEAD.py +++ b/vb_suite/perf_HEAD.py @@ -8,6 +8,8 @@ """ import urllib2 +from contextlib import closing +from urllib2 import urlopen import json import pandas as pd @@ -23,8 +25,8 @@ def get_travis_data(): if not jobid: return None, None - workers = json.loads( - urllib2.urlopen("https://api.travis-ci.org/workers/").read()) + with closing(urlopen("https://api.travis-ci.org/workers/")) as resp: + workers = json.loads(resp.read()) host = njobs = None for item in workers: @@ -64,20 +66,20 @@ def dump_as_gist(data, desc="The Commit", njobs=None): public=True, files={'results.json': dict(content=json.dumps(content))}) try: - r = urllib2.urlopen("https://api.github.com/gists", - json.dumps(payload), timeout=WEB_TIMEOUT) - if 200 <= r.getcode() < 300: - print("\n\n" + "-" * 80) - - gist = json.loads(r.read()) - file_raw_url = gist['files'].items()[0][1]['raw_url'] - print("[vbench-gist-raw_url] %s" % file_raw_url) - print("[vbench-html-url] %s" % gist['html_url']) - print("[vbench-api-url] %s" % gist['url']) - - print("-" * 80 + "\n\n") - else: - print("api.github.com returned status %d" % r.getcode()) + with closing(urlopen("https://api.github.com/gists", + json.dumps(payload), timeout=WEB_TIMEOUT)) as r: + if 200 <= r.getcode() < 300: + print("\n\n" + "-" * 80) + + gist = json.loads(r.read()) + file_raw_url = gist['files'].items()[0][1]['raw_url'] + print("[vbench-gist-raw_url] %s" % file_raw_url) + print("[vbench-html-url] %s" % gist['html_url']) + print("[vbench-api-url] %s" % gist['url']) + + print("-" * 80 + "\n\n") + else: + print("api.github.com returned status %d" % r.getcode()) except: print("Error occured while dumping to gist") @@ -131,22 +133,22 @@ def main(): def get_vbench_log(build_url): - r = urllib2.urlopen(build_url) - if not (200 <= r.getcode() < 300): - return - - s = json.loads(r.read()) - s = [x for x in s['matrix'] if "VBENCH" in ((x.get('config', {}) - or {}).get('env', {}) or {})] - # s=[x for x in s['matrix']] - if not s: - return - id = s[0]['id'] # should be just one for now - r2 = urllib2.urlopen("https://api.travis-ci.org/jobs/%s" % id) - if (not 200 <= r.getcode() < 300): - return - s2 = json.loads(r2.read()) - return s2.get('log') + with closing(urllib2.urlopen(build_url)) as r: + if not (200 <= r.getcode() < 300): + return + + s = json.loads(r.read()) + s = [x for x in s['matrix'] if "VBENCH" in ((x.get('config', {}) + or {}).get('env', {}) or {})] + # s=[x for x in s['matrix']] + if not s: + return + id = s[0]['id'] # should be just one for now + with closing(urllib2.urlopen("https://api.travis-ci.org/jobs/%s" % id)) as r2: + if not 200 <= r.getcode() < 300: + return + s2 = json.loads(r2.read()) + return s2.get('log') def get_results_raw_url(build): @@ -169,7 +171,9 @@ def convert_json_to_df(results_url): df contains timings for all successful vbenchmarks """ - res = json.loads(urllib2.urlopen(results_url).read()) + + with closing(urlopen(results_url)) as resp: + res = json.loads(resp.read()) timings = res.get("timings") if not timings: return @@ -212,10 +216,10 @@ def get_results_from_builds(builds): dfs = OrderedDict() while True: - r = urllib2.urlopen(url) - if not (200 <= r.getcode() < 300): - break - builds = json.loads(r.read()) + with closing(urlopen(url)) as r: + if not (200 <= r.getcode() < 300): + break + builds = json.loads(r.read()) res = get_results_from_builds(builds) if not res: break