diff --git a/.travis.yml b/.travis.yml index 387dec1ed2658..818278eebf5b5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,13 +6,13 @@ python: matrix: include: - python: 2.6 - env: NOSE_ARGS="not slow" CLIPBOARD=xclip + env: NOSE_ARGS="not slow" CLIPBOARD=xclip LOCALE_OVERRIDE="it_IT.UTF-8" - python: 2.7 env: NOSE_ARGS="slow and not network" LOCALE_OVERRIDE="zh_CN.GB18030" FULL_DEPS=true JOB_TAG=_LOCALE - python: 2.7 - env: NOSE_ARGS="not slow" FULL_DEPS=true GUI=gtk2 + env: NOSE_ARGS="not slow" FULL_DEPS=true CLIPBOARD_GUI=gtk2 - python: 3.2 - env: NOSE_ARGS="not slow" FULL_DEPS=true GUI=qt4 + env: NOSE_ARGS="not slow" FULL_DEPS=true CLIPBOARD_GUI=qt4 - python: 3.3 env: NOSE_ARGS="not slow" FULL_DEPS=true CLIPBOARD=xsel exclude: @@ -25,28 +25,25 @@ virtualenv: system_site_packages: true before_install: - - echo "Waldo1" + - echo "before_install" - echo $VIRTUAL_ENV - df -h - date - # - export PIP_ARGS=-q # comment this this to debug travis install issues - # - export APT_ARGS=-qq # comment this to debug travis install issues - # - set -x # enable this to see bash commands - - export ZIP_FLAGS=-q # comment this to debug travis install issues - ci/before_install.sh - python -V + # Xvfb stuff for clipboard functionality; see the travis-ci documentation - export DISPLAY=:99.0 - sh -e /etc/init.d/xvfb start install: - - echo "Waldo2" + - echo "install" - ci/install.sh before_script: - mysql -e 'create database pandas_nosetest;' script: - - echo "Waldo3" + - echo "script" - ci/script.sh after_script: diff --git a/ci/install.sh b/ci/install.sh index a30aba9338db2..528d669ae693c 100755 --- a/ci/install.sh +++ b/ci/install.sh @@ -13,20 +13,37 @@ # (no compiling needed), then directly goto script and collect 200$. # -echo "inside $0" +function edit_init() +{ + if [ -n "$LOCALE_OVERRIDE" ]; then + echo "Adding locale to the first line of pandas/__init__.py" + rm -f pandas/__init__.pyc + sedc="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LOCALE_OVERRIDE')\n" + sed -i "$sedc" pandas/__init__.py + echo "head -4 pandas/__init__.py" + head -4 pandas/__init__.py + echo + fi +} + +edit_init # Install Dependencies -# as of pip 1.4rc2, wheel files are still being broken regularly, this is a known good -# commit. should revert to pypi when a final release is out -pip install -I git+https://github.com/pypa/pip@42102e9deaea99db08b681d06906c2945f6f95e2#egg=pip -pv="${TRAVIS_PYTHON_VERSION:0:1}" -[ "$pv" == "2" ] && pv="" +# as of pip 1.4rc2, wheel files are still being broken regularly, this is a +# known good commit. should revert to pypi when a final release is out +pip_commit=42102e9deaea99db08b681d06906c2945f6f95e2 +pip install -I git+https://github.com/pypa/pip@$pip_commit#egg=pip + +python_major_version="${TRAVIS_PYTHON_VERSION:0:1}" +[ "$python_major_version" == "2" ] && python_major_version="" pip install -I -U setuptools pip install wheel # comment this line to disable the fetching of wheel files -PIP_ARGS+=" -I --use-wheel --find-links=http://cache27diy-cpycloud.rhcloud.com/${TRAVIS_PYTHON_VERSION}${JOB_TAG}/" +base_url=http://cache27diy-cpycloud.rhcloud.com +wheel_box=${TRAVIS_PYTHON_VERSION}${JOB_TAG} +PIP_ARGS+=" -I --use-wheel --find-links=$base_url/$wheel_box/" # Force virtualenv to accpet system_site_packages rm -f $VIRTUAL_ENV/lib/python$TRAVIS_PYTHON_VERSION/no-global-site-packages.txt @@ -35,25 +52,37 @@ rm -f $VIRTUAL_ENV/lib/python$TRAVIS_PYTHON_VERSION/no-global-site-packages.txt if [ -n "$LOCALE_OVERRIDE" ]; then # make sure the locale is available # probably useless, since you would need to relogin - sudo locale-gen "$LOCALE_OVERRIDE" + time sudo locale-gen "$LOCALE_OVERRIDE" fi - # show-skipped is working at this particular commit -time pip install git+git://github.com/cpcloud/nose-show-skipped.git@fa4ff84e53c09247753a155b428c1bf2c69cb6c3 -time pip install $PIP_ARGS -r ci/requirements-${TRAVIS_PYTHON_VERSION}${JOB_TAG}.txt -time sudo apt-get install libatlas-base-dev gfortran +show_skipped_commit=fa4ff84e53c09247753a155b428c1bf2c69cb6c3 +time pip install git+git://github.com/cpcloud/nose-show-skipped.git@$show_skipped_commit +time pip install $PIP_ARGS -r ci/requirements-${wheel_box}.txt + +# we need these for numpy +time sudo apt-get $APT_ARGS install libatlas-base-dev gfortran + + +# Need to enable for locale testing. The location of the locale file(s) is +# distro specific. For example, on Arch Linux all of the locales are in a +# commented file--/etc/locale.gen--that must be commented in to be used +# whereas Ubuntu looks in /var/lib/locales/supported.d/* and generates locales +# based on what's in the files in that folder +time echo 'it_CH.UTF-8 UTF-8' | sudo tee -a /var/lib/locales/supported.d/it +time sudo locale-gen # install gui for clipboard testing -if [ -n "$GUI" ]; then - echo "Using GUI clipboard: $GUI" - [ -n "$pv" ] && py="py" - time sudo apt-get $APT_ARGS install python${pv}-${py}${GUI} +if [ -n "$CLIPBOARD_GUI" ]; then + echo "Using CLIPBOARD_GUI: $CLIPBOARD_GUI" + [ -n "$python_major_version" ] && py="py" + python_cb_gui_pkg=python${python_major_version}-${py}${CLIPBOARD_GUI} + time sudo apt-get $APT_ARGS install $python_cb_gui_pkg fi -# install a clipboard +# install a clipboard if $CLIPBOARD is not empty if [ -n "$CLIPBOARD" ]; then echo "Using clipboard: $CLIPBOARD" time sudo apt-get $APT_ARGS install $CLIPBOARD @@ -61,13 +90,15 @@ fi # Optional Deps -if [ x"$FULL_DEPS" == x"true" ]; then +if [ -n "$FULL_DEPS" ]; then echo "Installing FULL_DEPS" - # for pytables gets the lib as well + + # need libhdf5 for PyTables time sudo apt-get $APT_ARGS install libhdf5-serial-dev fi -# build pandas + +# build and install pandas time python setup.py build_ext install true diff --git a/ci/script.sh b/ci/script.sh index 2bafe13687505..67dadde2b20fb 100755 --- a/ci/script.sh +++ b/ci/script.sh @@ -5,8 +5,8 @@ echo "inside $0" if [ -n "$LOCALE_OVERRIDE" ]; then export LC_ALL="$LOCALE_OVERRIDE"; echo "Setting LC_ALL to $LOCALE_OVERRIDE" - (cd /; python -c 'import pandas; print("pandas detected console encoding: %s" % pandas.get_option("display.encoding"))') - + pycmd='import pandas; print("pandas detected console encoding: %s" % pandas.get_option("display.encoding"))' + python -c "$pycmd" fi echo nosetests --exe -w /tmp -A "$NOSE_ARGS" pandas --show-skipped diff --git a/doc/source/release.rst b/doc/source/release.rst index 7776ee1efba4f..4a25a98f2cfbe 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -578,6 +578,9 @@ Bug Fixes - Fix a bug with ``NDFrame.replace()`` which made replacement appear as though it was (incorrectly) using regular expressions (:issue:`5143`). - Fix better error message for to_datetime (:issue:`4928`) + - Made sure different locales are tested on travis-ci (:issue:`4918`). Also + adds a couple of utilities for getting locales and setting locales with a + context manager. pandas 0.12.0 ------------- diff --git a/pandas/io/tests/test_data.py b/pandas/io/tests/test_data.py index f647b217fb260..4e2331f05001d 100644 --- a/pandas/io/tests/test_data.py +++ b/pandas/io/tests/test_data.py @@ -13,6 +13,7 @@ from pandas.io.data import DataReader, SymbolWarning from pandas.util.testing import (assert_series_equal, assert_produces_warning, network, assert_frame_equal) +import pandas.util.testing as tm from numpy.testing import assert_array_equal @@ -35,6 +36,15 @@ def assert_n_failed_equals_n_null_columns(wngs, obj, cls=SymbolWarning): class TestGoogle(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.locales = tm.get_locales(prefix='en_US') + if not cls.locales: + raise nose.SkipTest("US English locale not available for testing") + + @classmethod + def tearDownClass(cls): + del cls.locales @network def test_google(self): @@ -44,9 +54,10 @@ def test_google(self): start = datetime(2010, 1, 1) end = datetime(2013, 1, 27) - self.assertEquals( - web.DataReader("F", 'google', start, end)['Close'][-1], - 13.68) + for locale in self.locales: + with tm.set_locale(locale): + panel = web.DataReader("F", 'google', start, end) + self.assertEquals(panel.Close[-1], 13.68) self.assertRaises(Exception, web.DataReader, "NON EXISTENT TICKER", 'google', start, end) @@ -58,38 +69,40 @@ def test_get_quote_fails(self): @network def test_get_goog_volume(self): - df = web.get_data_google('GOOG') - self.assertEqual(df.Volume.ix['OCT-08-2010'], 2863473) + for locale in self.locales: + with tm.set_locale(locale): + df = web.get_data_google('GOOG').sort_index() + self.assertEqual(df.Volume.ix['OCT-08-2010'], 2863473) @network def test_get_multi1(self): - sl = ['AAPL', 'AMZN', 'GOOG'] - pan = web.get_data_google(sl, '2012') - - def testit(): + for locale in self.locales: + sl = ['AAPL', 'AMZN', 'GOOG'] + with tm.set_locale(locale): + pan = web.get_data_google(sl, '2012') ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] - self.assertEquals(ts[0].dayofyear, 96) - - if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and - hasattr(pan.Close, 'AAPL')): - testit() - else: - self.assertRaises(AttributeError, testit) + if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and + hasattr(pan.Close, 'AAPL')): + self.assertEquals(ts[0].dayofyear, 96) + else: + self.assertRaises(AttributeError, lambda: pan.Close) @network def test_get_multi2(self): with warnings.catch_warnings(record=True) as w: - pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12', - 'JAN-31-12') - result = pan.Close.ix['01-18-12'] - assert_n_failed_equals_n_null_columns(w, result) - - # sanity checking - - assert np.issubdtype(result.dtype, np.floating) - result = pan.Open.ix['Jan-15-12':'Jan-20-12'] - self.assertEqual((4, 3), result.shape) - assert_n_failed_equals_n_null_columns(w, result) + for locale in self.locales: + with tm.set_locale(locale): + pan = web.get_data_google(['GE', 'MSFT', 'INTC'], + 'JAN-01-12', 'JAN-31-12') + result = pan.Close.ix['01-18-12'] + assert_n_failed_equals_n_null_columns(w, result) + + # sanity checking + + assert np.issubdtype(result.dtype, np.floating) + result = pan.Open.ix['Jan-15-12':'Jan-20-12'] + self.assertEqual((4, 3), result.shape) + assert_n_failed_equals_n_null_columns(w, result) class TestYahoo(unittest.TestCase): diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index 8c7d89641bdd4..6d392eb265752 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -1,11 +1,9 @@ # pylint: disable-msg=W0612,E1101 from pandas.compat import range, lrange, StringIO from pandas import compat -from pandas.io.common import URLError import os import unittest -import nose import numpy as np from pandas import Series, DataFrame, DatetimeIndex, Timestamp @@ -16,7 +14,6 @@ assert_series_equal, network, ensure_clean, assert_index_equal) import pandas.util.testing as tm -from numpy.testing.decorators import slow _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() @@ -53,17 +50,35 @@ def setUp(self): self.tsframe = _tsframe.copy() self.mixed_frame = _mixed_frame.copy() + def tearDown(self): + del self.dirpath + + del self.ts + + del self.series + + del self.objSeries + + del self.empty_series + del self.empty_frame + + del self.frame + del self.frame2 + del self.intframe + del self.tsframe + del self.mixed_frame + def test_frame_double_encoded_labels(self): df = DataFrame([['a', 'b'], ['c', 'd']], index=['index " 1', 'index / 2'], columns=['a \\ b', 'y / z']) - assert_frame_equal( - df, read_json(df.to_json(orient='split'), orient='split')) - assert_frame_equal( - df, read_json(df.to_json(orient='columns'), orient='columns')) - assert_frame_equal( - df, read_json(df.to_json(orient='index'), orient='index')) + assert_frame_equal(df, read_json(df.to_json(orient='split'), + orient='split')) + assert_frame_equal(df, read_json(df.to_json(orient='columns'), + orient='columns')) + assert_frame_equal(df, read_json(df.to_json(orient='index'), + orient='index')) df_unser = read_json(df.to_json(orient='records'), orient='records') assert_index_equal(df.columns, df_unser.columns) np.testing.assert_equal(df.values, df_unser.values) @@ -75,10 +90,10 @@ def test_frame_non_unique_index(self): self.assertRaises(ValueError, df.to_json, orient='index') self.assertRaises(ValueError, df.to_json, orient='columns') - assert_frame_equal( - df, read_json(df.to_json(orient='split'), orient='split')) + assert_frame_equal(df, read_json(df.to_json(orient='split'), + orient='split')) unser = read_json(df.to_json(orient='records'), orient='records') - self.assert_(df.columns.equals(unser.columns)) + self.assertTrue(df.columns.equals(unser.columns)) np.testing.assert_equal(df.values, unser.values) unser = read_json(df.to_json(orient='values'), orient='values') np.testing.assert_equal(df.values, unser.values) @@ -102,7 +117,8 @@ def test_frame_non_unique_columns(self): assert_frame_equal(result, df) def _check(df): - result = read_json(df.to_json(orient='split'), orient='split', convert_dates=['x']) + result = read_json(df.to_json(orient='split'), orient='split', + convert_dates=['x']) assert_frame_equal(result, df) for o in [[['a','b'],['c','d']], @@ -112,15 +128,15 @@ def _check(df): _check(DataFrame(o, index=[1,2], columns=['x','x'])) def test_frame_from_json_to_json(self): - - def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_dtype=True, raise_ok=None): + def _check_orient(df, orient, dtype=None, numpy=False, + convert_axes=True, check_dtype=True, raise_ok=None): df = df.sort() dfjson = df.to_json(orient=orient) try: unser = read_json(dfjson, orient=orient, dtype=dtype, numpy=numpy, convert_axes=convert_axes) - except (Exception) as detail: + except Exception as detail: if raise_ok is not None: if isinstance(detail, raise_ok): return @@ -151,7 +167,8 @@ def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_ if convert_axes: assert_frame_equal(df, unser, check_dtype=check_dtype) else: - assert_frame_equal(df, unser, check_less_precise=False, check_dtype=check_dtype) + assert_frame_equal(df, unser, check_less_precise=False, + check_dtype=check_dtype) def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None): @@ -171,17 +188,27 @@ def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None): # numpy=True and raise_ok might be not None, so ignore the error if convert_axes: - _check_orient(df, "columns", dtype=dtype, numpy=True, raise_ok=raise_ok) - _check_orient(df, "records", dtype=dtype, numpy=True, raise_ok=raise_ok) - _check_orient(df, "split", dtype=dtype, numpy=True, raise_ok=raise_ok) - _check_orient(df, "index", dtype=dtype, numpy=True, raise_ok=raise_ok) - _check_orient(df, "values", dtype=dtype, numpy=True, raise_ok=raise_ok) - - _check_orient(df, "columns", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok) - _check_orient(df, "records", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok) - _check_orient(df, "split", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok) - _check_orient(df, "index", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok) - _check_orient(df, "values", dtype=dtype, numpy=True, convert_axes=False, raise_ok=raise_ok) + _check_orient(df, "columns", dtype=dtype, numpy=True, + raise_ok=raise_ok) + _check_orient(df, "records", dtype=dtype, numpy=True, + raise_ok=raise_ok) + _check_orient(df, "split", dtype=dtype, numpy=True, + raise_ok=raise_ok) + _check_orient(df, "index", dtype=dtype, numpy=True, + raise_ok=raise_ok) + _check_orient(df, "values", dtype=dtype, numpy=True, + raise_ok=raise_ok) + + _check_orient(df, "columns", dtype=dtype, numpy=True, + convert_axes=False, raise_ok=raise_ok) + _check_orient(df, "records", dtype=dtype, numpy=True, + convert_axes=False, raise_ok=raise_ok) + _check_orient(df, "split", dtype=dtype, numpy=True, + convert_axes=False, raise_ok=raise_ok) + _check_orient(df, "index", dtype=dtype, numpy=True, + convert_axes=False, raise_ok=raise_ok) + _check_orient(df, "values", dtype=dtype, numpy=True, + convert_axes=False, raise_ok=raise_ok) # basic _check_all_orients(self.frame) @@ -202,9 +229,10 @@ def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None): # dtypes _check_all_orients(DataFrame(biggie, dtype=np.float64), dtype=np.float64, convert_axes=False) - _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int, convert_axes=False) - _check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3', convert_axes=False, - raise_ok=ValueError) + _check_all_orients(DataFrame(biggie, dtype=np.int), dtype=np.int, + convert_axes=False) + _check_all_orients(DataFrame(biggie, dtype='U3'), dtype='U3', + convert_axes=False, raise_ok=ValueError) # empty _check_all_orients(self.empty_frame) @@ -258,37 +286,37 @@ def test_frame_from_json_bad_data(self): def test_frame_from_json_nones(self): df = DataFrame([[1, 2], [4, 5, 6]]) unser = read_json(df.to_json()) - self.assert_(np.isnan(unser[2][0])) + self.assertTrue(np.isnan(unser[2][0])) df = DataFrame([['1', '2'], ['4', '5', '6']]) unser = read_json(df.to_json()) - self.assert_(np.isnan(unser[2][0])) + self.assertTrue(np.isnan(unser[2][0])) unser = read_json(df.to_json(),dtype=False) - self.assert_(unser[2][0] is None) + self.assertTrue(unser[2][0] is None) unser = read_json(df.to_json(),convert_axes=False,dtype=False) - self.assert_(unser['2']['0'] is None) + self.assertTrue(unser['2']['0'] is None) unser = read_json(df.to_json(), numpy=False) - self.assert_(np.isnan(unser[2][0])) + self.assertTrue(np.isnan(unser[2][0])) unser = read_json(df.to_json(), numpy=False, dtype=False) - self.assert_(unser[2][0] is None) + self.assertTrue(unser[2][0] is None) unser = read_json(df.to_json(), numpy=False, convert_axes=False, dtype=False) - self.assert_(unser['2']['0'] is None) + self.assertTrue(unser['2']['0'] is None) # infinities get mapped to nulls which get mapped to NaNs during # deserialisation df = DataFrame([[1, 2], [4, 5, 6]]) df[2][0] = np.inf unser = read_json(df.to_json()) - self.assert_(np.isnan(unser[2][0])) + self.assertTrue(np.isnan(unser[2][0])) unser = read_json(df.to_json(), dtype=False) - self.assert_(np.isnan(unser[2][0])) + self.assertTrue(np.isnan(unser[2][0])) df[2][0] = np.NINF unser = read_json(df.to_json()) - self.assert_(np.isnan(unser[2][0])) + self.assertTrue(np.isnan(unser[2][0])) unser = read_json(df.to_json(),dtype=False) - self.assert_(np.isnan(unser[2][0])) + self.assertTrue(np.isnan(unser[2][0])) def test_frame_to_json_except(self): df = DataFrame([1, 2, 3]) @@ -345,7 +373,7 @@ def _check_orient(series, orient, dtype=None, numpy=False): except: raise if orient == "split": - self.assert_(series.name == unser.name) + self.assertEqual(series.name, unser.name) def _check_all_orients(series, dtype=None): _check_orient(series, "columns", dtype=dtype) @@ -403,12 +431,12 @@ def test_reconstruction_index(self): result = read_json(df.to_json()) # the index is serialized as strings....correct? - #assert_frame_equal(result,df) + assert_frame_equal(result, df) def test_path(self): with ensure_clean('test.json') as path: - - for df in [ self.frame, self.frame2, self.intframe, self.tsframe, self.mixed_frame ]: + for df in [self.frame, self.frame2, self.intframe, self.tsframe, + self.mixed_frame]: df.to_json(path) read_json(path) @@ -512,7 +540,6 @@ def test_date_unit(self): assert_frame_equal(result, df) def test_weird_nested_json(self): - # this used to core dump the parser s = r'''{ "status": "success", @@ -528,9 +555,9 @@ def test_weird_nested_json(self): "title": "Another blog post", "body": "More content" } - ] - } -}''' + ] + } + }''' read_json(s) @@ -550,18 +577,19 @@ def test_misc_example(self): # parsing unordered input fails result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]',numpy=True) expected = DataFrame([[1,2],[1,2]],columns=['a','b']) - #assert_frame_equal(result,expected) + with tm.assertRaisesRegexp(AssertionError, + '\[index\] left \[.+\], right \[.+\]'): + assert_frame_equal(result, expected) result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]') expected = DataFrame([[1,2],[1,2]],columns=['a','b']) assert_frame_equal(result,expected) @network - @slow def test_round_trip_exception_(self): # GH 3867 - - df = pd.read_csv('https://raw.github.com/hayd/lahman2012/master/csvs/Teams.csv') + csv = 'https://raw.github.com/hayd/lahman2012/master/csvs/Teams.csv' + df = pd.read_csv(csv) s = df.to_json() result = pd.read_json(s) assert_frame_equal(result.reindex(index=df.index,columns=df.columns),df) @@ -569,12 +597,9 @@ def test_round_trip_exception_(self): @network def test_url(self): url = 'https://api.github.com/repos/pydata/pandas/issues?per_page=5' - result = read_json(url,convert_dates=True) - for c in ['created_at','closed_at','updated_at']: - self.assert_(result[c].dtype == 'datetime64[ns]') - - url = 'http://search.twitter.com/search.json?q=pandas%20python' - result = read_json(url) + result = read_json(url, convert_dates=True) + for c in ['created_at', 'closed_at', 'updated_at']: + self.assertEqual(result[c].dtype, 'datetime64[ns]') def test_default_handler(self): from datetime import timedelta @@ -585,6 +610,6 @@ def test_default_handler(self): expected, pd.read_json(frame.to_json(default_handler=str))) def my_handler_raises(obj): - raise TypeError - self.assertRaises( - TypeError, frame.to_json, default_handler=my_handler_raises) + raise TypeError("raisin") + self.assertRaises(TypeError, frame.to_json, + default_handler=my_handler_raises) diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py index 0b3bff7a151cc..06ff5abf7cd13 100644 --- a/pandas/io/tests/test_json/test_ujson.py +++ b/pandas/io/tests/test_json/test_ujson.py @@ -32,6 +32,7 @@ def _skip_if_python_ver(skip_major, skip_minor=None): if major == skip_major and (skip_minor is None or minor == skip_minor): raise nose.SkipTest("skipping Python version %d.%d" % (major, minor)) + json_unicode = (json.dumps if sys.version_info[0] >= 3 else partial(json.dumps, encoding="utf-8")) @@ -194,7 +195,6 @@ def test_invalidDoublePrecision(self): # will throw typeError self.assertRaises(TypeError, ujson.encode, input, double_precision = None) - def test_encodeStringConversion(self): input = "A string \\ / \b \f \n \r \t" output = ujson.encode(input) @@ -220,7 +220,6 @@ def test_encodeControlEscaping(self): self.assertEquals(input, dec) self.assertEquals(enc, json_unicode(input)) - def test_encodeUnicodeConversion2(self): input = "\xe6\x97\xa5\xd1\x88" enc = ujson.encode(input) @@ -259,7 +258,6 @@ def test_encodeUnicode4BytesUTF8Highest(self): self.assertEquals(enc, json_unicode(input)) self.assertEquals(dec, json.loads(enc)) - def test_encodeArrayInArray(self): input = [[[[]]]] output = ujson.encode(input) @@ -286,7 +284,6 @@ def test_encodeIntNegConversion(self): self.assertEquals(input, ujson.decode(output)) pass - def test_encodeLongNegConversion(self): input = -9223372036854775808 output = ujson.encode(input) @@ -448,7 +445,6 @@ def test_encodeDoubleNegInf(self): input = -np.inf assert ujson.encode(input) == 'null', "Expected null" - def test_decodeJibberish(self): input = "fdsa sda v9sa fdsa" try: @@ -566,7 +562,6 @@ def test_decodeNullBroken(self): return assert False, "Wrong exception" - def test_decodeBrokenDictKeyTypeLeakTest(self): input = '{{1337:""}}' for x in range(1000): @@ -667,7 +662,6 @@ def test_decodeNullCharacter(self): input = "\"31337 \\u0000 31337\"" self.assertEquals(ujson.decode(input), json.loads(input)) - def test_encodeListLongConversion(self): input = [9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807, 9223372036854775807 ] @@ -1147,6 +1141,7 @@ def testArrayNumpyLabelled(self): self.assertTrue((np.array(['1','2','3']) == output[1]).all()) self.assertTrue((np.array(['a', 'b']) == output[2]).all()) + class PandasJSONTests(TestCase): def testDataFrame(self): @@ -1178,7 +1173,6 @@ def testDataFrame(self): assert_array_equal(df.transpose().columns, outp.columns) assert_array_equal(df.transpose().index, outp.index) - def testDataFrameNumpy(self): df = DataFrame([[1,2,3], [4,5,6]], index=['a', 'b'], columns=['x', 'y', 'z']) @@ -1486,7 +1480,6 @@ def test_decodeArrayFaultyUnicode(self): else: assert False, "expected ValueError" - def test_decodeFloatingPointAdditionalTests(self): places = 15 @@ -1529,39 +1522,10 @@ def test_encodeSet(self): self.assertTrue(v in s) -""" -def test_decodeNumericIntFrcOverflow(self): -input = "X.Y" -raise NotImplementedError("Implement this test!") - - -def test_decodeStringUnicodeEscape(self): -input = "\u3131" -raise NotImplementedError("Implement this test!") - -def test_decodeStringUnicodeBrokenEscape(self): -input = "\u3131" -raise NotImplementedError("Implement this test!") - -def test_decodeStringUnicodeInvalidEscape(self): -input = "\u3131" -raise NotImplementedError("Implement this test!") - -def test_decodeStringUTF8(self): -input = "someutfcharacters" -raise NotImplementedError("Implement this test!") - - - -""" - def _clean_dict(d): return dict((str(k), v) for k, v in compat.iteritems(d)) + if __name__ == '__main__': - # unittest.main() - import nose - # nose.runmodule(argv=[__file__,'-vvs','-x', '--ipdb-failure'], - # exit=False) nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index 1888f2ede35e0..614f5ecc39e9d 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -1,12 +1,21 @@ import os -import nose +import locale +import codecs import unittest +import nose + import numpy as np from numpy.testing import assert_equal +import pandas.util.testing as tm from pandas.tools.util import cartesian_product + +CURRENT_LOCALE = locale.getlocale() +LOCALE_OVERRIDE = os.environ.get('LOCALE_OVERRIDE', None) + + class TestCartesianProduct(unittest.TestCase): def test_simple(self): @@ -16,6 +25,61 @@ def test_simple(self): np.array([ 1, 22, 1, 22, 1, 22])] assert_equal(result, expected) + +class TestLocaleUtils(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.locales = tm.get_locales() + + if not cls.locales: + raise nose.SkipTest("No locales found") + + if os.name == 'nt': # we're on windows + raise nose.SkipTest("Running on Windows") + + @classmethod + def tearDownClass(cls): + del cls.locales + + def test_get_locales(self): + # all systems should have at least a single locale + assert len(tm.get_locales()) > 0 + + def test_get_locales_prefix(self): + if len(self.locales) == 1: + raise nose.SkipTest("Only a single locale found, no point in " + "trying to test filtering locale prefixes") + first_locale = self.locales[0] + assert len(tm.get_locales(prefix=first_locale[:2])) > 0 + + def test_set_locale(self): + if len(self.locales) == 1: + raise nose.SkipTest("Only a single locale found, no point in " + "trying to test setting another locale") + + if LOCALE_OVERRIDE is not None: + lang, enc = LOCALE_OVERRIDE.split('.') + else: + lang, enc = 'it_CH', 'UTF-8' + + enc = codecs.lookup(enc).name + new_locale = lang, enc + + if not tm._can_set_locale('.'.join(new_locale)): + with tm.assertRaises(locale.Error): + with tm.set_locale(new_locale): + pass + else: + with tm.set_locale(new_locale) as normalized_locale: + new_lang, new_enc = normalized_locale.split('.') + new_enc = codecs.lookup(enc).name + normalized_locale = new_lang, new_enc + self.assertEqual(normalized_locale, new_locale) + + current_locale = locale.getlocale() + self.assertEqual(current_locale, CURRENT_LOCALE) + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index bfbd28f7bb4a4..d059d229ef22e 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -10,7 +10,7 @@ from matplotlib.ticker import Formatter, AutoLocator, Locator from matplotlib.transforms import nonsingular -from pandas.compat import range, lrange +from pandas.compat import lrange import pandas.compat as compat import pandas.lib as lib import pandas.core.common as com diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py index cfbde75f6ae21..a5e249b77fa52 100644 --- a/pandas/tseries/tests/test_plotting.py +++ b/pandas/tseries/tests/test_plotting.py @@ -317,7 +317,8 @@ def _test(ax): result = ax.get_xlim() self.assertEqual(int(result[0]), expected[0].ordinal) self.assertEqual(int(result[1]), expected[1].ordinal) - plt.close(ax.get_figure()) + fig = ax.get_figure() + plt.close(fig) ser = tm.makeTimeSeries() ax = ser.plot() diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 3dcfa3621895e..c6c2b418f553d 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -35,6 +35,11 @@ from datetime import timedelta, datetime from datetime import time as datetime_time from pandas.compat import parse_date +from sys import version_info + +# GH3363 +cdef bint PY2 = version_info[0] == 2 + # initialize numpy import_array() #import_ufunc() @@ -1757,20 +1762,20 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, bint infer_dst=False): # timestamp falls to the right side of the DST transition if v + deltas[pos] == vals[i]: result_b[i] = v - - + + if infer_dst: dst_hours = np.empty(n, dtype=np.int64) dst_hours.fill(NPY_NAT) - + # Get the ambiguous hours (given the above, these are the hours - # where result_a != result_b and neither of them are NAT) + # where result_a != result_b and neither of them are NAT) both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT) both_eq = result_a == result_b trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq))) if trans_idx.size == 1: stamp = Timestamp(vals[trans_idx]) - raise pytz.AmbiguousTimeError("Cannot infer dst time from %s as" + raise pytz.AmbiguousTimeError("Cannot infer dst time from %s as" "there are no repeated times" % stamp) # Split the array into contiguous chunks (where the difference between # indices is 1). These are effectively dst transitions in different years @@ -1779,21 +1784,21 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, bint infer_dst=False): if trans_idx.size > 0: one_diff = np.where(np.diff(trans_idx)!=1)[0]+1 trans_grp = np.array_split(trans_idx, one_diff) - + # Iterate through each day, if there are no hours where the delta is negative # (indicates a repeat of hour) the switch cannot be inferred for grp in trans_grp: - + delta = np.diff(result_a[grp]) if grp.size == 1 or np.all(delta>0): stamp = Timestamp(vals[grp[0]]) raise pytz.AmbiguousTimeError(stamp) - + # Find the index for the switch and pull from a for dst and b for standard switch_idx = (delta<=0).nonzero()[0] if switch_idx.size > 1: raise pytz.AmbiguousTimeError("There are %i dst switches " - "when there should only be 1." + "when there should only be 1." % switch_idx.size) switch_idx = switch_idx[0]+1 # Pull the only index and adjust a_idx = grp[:switch_idx] @@ -1812,7 +1817,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, bint infer_dst=False): else: stamp = Timestamp(vals[i]) raise pytz.AmbiguousTimeError("Cannot infer dst time from %r, "\ - "try using the 'infer_dst' argument" + "try using the 'infer_dst' argument" % stamp) elif left != NPY_NAT: result[i] = left @@ -2549,8 +2554,9 @@ cdef list extra_fmts = [(b"%q", b"^`AB`^"), cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^", "^`GH`^", "^`IJ`^", "^`KL`^"] -cdef _period_strftime(int64_t value, int freq, object fmt): +cdef object _period_strftime(int64_t value, int freq, object fmt): import sys + cdef: Py_ssize_t i date_info dinfo @@ -2595,13 +2601,8 @@ cdef _period_strftime(int64_t value, int freq, object fmt): result = result.replace(str_extra_fmts[i], repl) - # Py3? - if not PyString_Check(result): - result = str(result) - - # GH3363 - if sys.version_info[0] == 2: - result = result.decode('utf-8','strict') + if PY2: + result = result.decode('utf-8', 'ignore') return result diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 946a4d94b6045..4787c82282a1f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -9,6 +9,8 @@ import warnings import inspect import os +import subprocess +import locale from datetime import datetime from functools import wraps, partial @@ -20,6 +22,7 @@ import nose +import pandas as pd from pandas.core.common import isnull, _is_sequence import pandas.core.index as index import pandas.core.series as series @@ -28,7 +31,7 @@ import pandas.core.panel4d as panel4d import pandas.compat as compat from pandas.compat import( - map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter, + filter, map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter, raise_with_traceback, httplib ) @@ -97,6 +100,172 @@ def setUpClass(cls): return cls +#------------------------------------------------------------------------------ +# locale utilities + +def check_output(*popenargs, **kwargs): # shamelessly taken from Python 2.7 source + r"""Run command with arguments and return its output as a byte string. + + If the exit code was non-zero it raises a CalledProcessError. The + CalledProcessError object will have the return code in the returncode + attribute and output in the output attribute. + + The arguments are the same as for the Popen constructor. Example: + + >>> check_output(["ls", "-l", "/dev/null"]) + 'crw-rw-rw- 1 root root 1, 3 Oct 18 2007 /dev/null\n' + + The stdout argument is not allowed as it is used internally. + To capture standard error in the result, use stderr=STDOUT. + + >>> check_output(["/bin/sh", "-c", + ... "ls -l non_existent_file ; exit 0"], + ... stderr=STDOUT) + 'ls: non_existent_file: No such file or directory\n' + """ + if 'stdout' in kwargs: + raise ValueError('stdout argument not allowed, it will be overridden.') + process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + raise subprocess.CalledProcessError(retcode, cmd, output=output) + return output + + +def _default_locale_getter(): + try: + raw_locales = check_output(['locale -a'], shell=True) + except subprocess.CalledProcessError as e: + raise type(e)("%s, the 'locale -a' command cannot be foundon your " + "system" % e) + return raw_locales + + +def get_locales(prefix=None, normalize=True, + locale_getter=_default_locale_getter): + """Get all the locales that are available on the system. + + Parameters + ---------- + prefix : str + If not ``None`` then return only those locales with the prefix + provided. For example to get all English language locales (those that + start with ``"en"``), pass ``prefix="en"``. + normalize : bool + Call ``locale.normalize`` on the resulting list of available locales. + If ``True``, only locales that can be set without throwing an + ``Exception`` are returned. + locale_getter : callable + The function to use to retrieve the current locales. This should return + a string with each locale separated by a newline character. + + Returns + ------- + locales : list of strings + A list of locale strings that can be set with ``locale.setlocale()``. + For example:: + + locale.setlocale(locale.LC_ALL, locale_string) + """ + raw_locales = locale_getter() + + try: + raw_locales = str(raw_locales, encoding=pd.options.display.encoding) + except TypeError: + pass + + if prefix is None: + return _valid_locales(raw_locales.splitlines(), normalize) + + found = re.compile('%s.*' % prefix).findall(raw_locales) + return _valid_locales(found, normalize) + + +@contextmanager +def set_locale(new_locale, lc_var=locale.LC_ALL): + """Context manager for temporarily setting a locale. + + Parameters + ---------- + new_locale : str or tuple + A string of the form .. For example to set + the current locale to US English with a UTF8 encoding, you would pass + "en_US.UTF-8". + + Notes + ----- + This is useful when you want to run a particular block of code under a + particular locale, without globally setting the locale. This probably isn't + thread-safe. + """ + current_locale = locale.getlocale() + + try: + locale.setlocale(lc_var, new_locale) + + try: + normalized_locale = locale.getlocale() + except ValueError: + yield new_locale + else: + if all(lc is not None for lc in normalized_locale): + yield '.'.join(normalized_locale) + else: + yield new_locale + finally: + locale.setlocale(lc_var, current_locale) + + +def _can_set_locale(lc): + """Check to see if we can set a locale without throwing an exception. + + Parameters + ---------- + lc : str + The locale to attempt to set. + + Returns + ------- + isvalid : bool + Whether the passed locale can be set + """ + try: + with set_locale(lc): + pass + except locale.Error: # horrible name for a Exception subclass + return False + else: + return True + + +def _valid_locales(locales, normalize): + """Return a list of normalized locales that do not throw an ``Exception`` + when set. + + Parameters + ---------- + locales : str + A string where each locale is separated by a newline. + normalize : bool + Whether to call ``locale.normalize`` on each locale. + + Returns + ------- + valid_locales : list + A list of valid locales. + """ + if normalize: + normalizer = lambda x: locale.normalize(x.strip()) + else: + normalizer = lambda x: x.strip() + + return list(filter(_can_set_locale, map(normalizer, locales))) + + #------------------------------------------------------------------------------ # Console debugging tools @@ -169,6 +338,7 @@ def assert_isinstance(obj, class_type_or_tuple): "Expected object to be of type %r, found %r instead" % ( type(obj), class_type_or_tuple)) + def assert_equal(a, b, msg=""): """asserts that a equals b, like nose's assert_equal, but allows custom message to start. Passes a and b to format string as well. So you can use '{0}' and '{1}' to display a and b. @@ -198,11 +368,11 @@ def assert_attr_equal(attr, left, right): right_attr = getattr(right, attr) assert_equal(left_attr,right_attr,"attr is not equal [{0}]" .format(attr)) + def isiterable(obj): return hasattr(obj, '__iter__') - def assert_almost_equal(a, b, check_less_precise=False): if isinstance(a, dict) or isinstance(b, dict): return assert_dict_equal(a, b) @@ -378,6 +548,7 @@ def assert_contains_all(iterable, dic): for k in iterable: assert k in dic, "Did not contain item: '%r'" % k + def assert_copy(iter1, iter2, **eql_kwargs): """ iter1, iter2: iterables that produce elements comparable with assert_almost_equal @@ -412,6 +583,7 @@ def makeFloatIndex(k=10): values = sorted(np.random.random_sample(k)) - np.random.random_sample(1) return Index(values * (10 ** np.random.randint(0, 9))) + def makeDateIndex(k=10): dt = datetime(2000, 1, 1) dr = bdate_range(dt, periods=k) @@ -446,6 +618,7 @@ def getSeriesData(): index = makeStringIndex(N) return dict((c, Series(randn(N), index=index)) for c in getCols(K)) + def makeTimeSeries(nper=None): if nper is None: nper = N @@ -503,11 +676,13 @@ def makePanel(nper=None): data = dict((c, makeTimeDataFrame(nper)) for c in cols) return Panel.fromDict(data) + def makePeriodPanel(nper=None): cols = ['Item' + c for c in string.ascii_uppercase[:K - 1]] data = dict((c, makePeriodFrame(nper)) for c in cols) return Panel.fromDict(data) + def makePanel4D(nper=None): return Panel4D(dict(l1=makePanel(nper), l2=makePanel(nper), l3=makePanel(nper)))