diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 4758c7f979da0..be8468d426946 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -3,15 +3,20 @@ import numpy as np from numpy.random import RandomState +from numpy import nan +import datetime -from pandas.core.api import Series, Categorical, CategoricalIndex +from pandas import Series, Categorical, CategoricalIndex, Index import pandas as pd from pandas import compat +import pandas.algos as _algos +from pandas.compat import lrange import pandas.core.algorithms as algos import pandas.util.testing as tm import pandas.hashtable as hashtable from pandas.compat.numpy import np_array_datetime64_compat +from pandas.util.testing import assert_almost_equal class TestMatch(tm.TestCase): @@ -705,6 +710,315 @@ def test_unique_label_indices(): tm.assert_numpy_array_equal(left, right) +def test_rank(): + tm._skip_if_no_scipy() + from scipy.stats import rankdata + + def _check(arr): + mask = ~np.isfinite(arr) + arr = arr.copy() + result = _algos.rank_1d_float64(arr) + arr[mask] = np.inf + exp = rankdata(arr) + exp[mask] = nan + assert_almost_equal(result, exp) + + _check(np.array([nan, nan, 5., 5., 5., nan, 1, 2, 3, nan])) + _check(np.array([4., nan, 5., 5., 5., nan, 1, 2, 4., nan])) + + +def test_pad_backfill_object_segfault(): + + old = np.array([], dtype='O') + new = np.array([datetime.datetime(2010, 12, 31)], dtype='O') + + result = _algos.pad_object(old, new) + expected = np.array([-1], dtype=np.int64) + assert (np.array_equal(result, expected)) + + result = _algos.pad_object(new, old) + expected = np.array([], dtype=np.int64) + assert (np.array_equal(result, expected)) + + result = _algos.backfill_object(old, new) + expected = np.array([-1], dtype=np.int64) + assert (np.array_equal(result, expected)) + + result = _algos.backfill_object(new, old) + expected = np.array([], dtype=np.int64) + assert (np.array_equal(result, expected)) + + +def test_arrmap(): + values = np.array(['foo', 'foo', 'bar', 'bar', 'baz', 'qux'], dtype='O') + result = _algos.arrmap_object(values, lambda x: x in ['foo', 'bar']) + assert (result.dtype == np.bool_) + + +class TestTseriesUtil(tm.TestCase): + _multiprocess_can_split_ = True + + def test_combineFunc(self): + pass + + def test_reindex(self): + pass + + def test_isnull(self): + pass + + def test_groupby(self): + pass + + def test_groupby_withnull(self): + pass + + def test_backfill(self): + old = Index([1, 5, 10]) + new = Index(lrange(12)) + + filler = _algos.backfill_int64(old.values, new.values) + + expect_filler = np.array([0, 0, 1, 1, 1, 1, + 2, 2, 2, 2, 2, -1], dtype=np.int64) + self.assert_numpy_array_equal(filler, expect_filler) + + # corner case + old = Index([1, 4]) + new = Index(lrange(5, 10)) + filler = _algos.backfill_int64(old.values, new.values) + + expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) + self.assert_numpy_array_equal(filler, expect_filler) + + def test_pad(self): + old = Index([1, 5, 10]) + new = Index(lrange(12)) + + filler = _algos.pad_int64(old.values, new.values) + + expect_filler = np.array([-1, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 2, 2], dtype=np.int64) + self.assert_numpy_array_equal(filler, expect_filler) + + # corner case + old = Index([5, 10]) + new = Index(lrange(5)) + filler = _algos.pad_int64(old.values, new.values) + expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) + self.assert_numpy_array_equal(filler, expect_filler) + + +def test_left_join_indexer_unique(): + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) + b = np.array([2, 2, 3, 4, 4], dtype=np.int64) + + result = _algos.left_join_indexer_unique_int64(b, a) + expected = np.array([1, 1, 2, 3, 3], dtype=np.int64) + assert (np.array_equal(result, expected)) + + +def test_left_outer_join_bug(): + left = np.array([0, 1, 0, 1, 1, 2, 3, 1, 0, 2, 1, 2, 0, 1, 1, 2, 3, 2, 3, + 2, 1, 1, 3, 0, 3, 2, 3, 0, 0, 2, 3, 2, 0, 3, 1, 3, 0, 1, + 3, 0, 0, 1, 0, 3, 1, 0, 1, 0, 1, 1, 0, 2, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 0, 3, 1, 3, 2, 2, 0, 1, 3, 0, 2, 3, 2, 3, 3, + 2, 3, 3, 1, 3, 2, 0, 0, 3, 1, 1, 1, 0, 2, 3, 3, 1, 2, 0, + 3, 1, 2, 0, 2], dtype=np.int64) + + right = np.array([3, 1], dtype=np.int64) + max_groups = 4 + + lidx, ridx = _algos.left_outer_join(left, right, max_groups, sort=False) + + exp_lidx = np.arange(len(left)) + exp_ridx = -np.ones(len(left)) + exp_ridx[left == 1] = 1 + exp_ridx[left == 3] = 0 + + assert (np.array_equal(lidx, exp_lidx)) + assert (np.array_equal(ridx, exp_ridx)) + + +def test_inner_join_indexer(): + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) + b = np.array([0, 3, 5, 7, 9], dtype=np.int64) + + index, ares, bres = _algos.inner_join_indexer_int64(a, b) + + index_exp = np.array([3, 5], dtype=np.int64) + assert_almost_equal(index, index_exp) + + aexp = np.array([2, 4], dtype=np.int64) + bexp = np.array([1, 2], dtype=np.int64) + assert_almost_equal(ares, aexp) + assert_almost_equal(bres, bexp) + + a = np.array([5], dtype=np.int64) + b = np.array([5], dtype=np.int64) + + index, ares, bres = _algos.inner_join_indexer_int64(a, b) + tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) + tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) + tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) + + +def test_outer_join_indexer(): + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) + b = np.array([0, 3, 5, 7, 9], dtype=np.int64) + + index, ares, bres = _algos.outer_join_indexer_int64(a, b) + + index_exp = np.array([0, 1, 2, 3, 4, 5, 7, 9], dtype=np.int64) + assert_almost_equal(index, index_exp) + + aexp = np.array([-1, 0, 1, 2, 3, 4, -1, -1], dtype=np.int64) + bexp = np.array([0, -1, -1, 1, -1, 2, 3, 4], dtype=np.int64) + assert_almost_equal(ares, aexp) + assert_almost_equal(bres, bexp) + + a = np.array([5], dtype=np.int64) + b = np.array([5], dtype=np.int64) + + index, ares, bres = _algos.outer_join_indexer_int64(a, b) + tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) + tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) + tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) + + +def test_left_join_indexer(): + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) + b = np.array([0, 3, 5, 7, 9], dtype=np.int64) + + index, ares, bres = _algos.left_join_indexer_int64(a, b) + + assert_almost_equal(index, a) + + aexp = np.array([0, 1, 2, 3, 4], dtype=np.int64) + bexp = np.array([-1, -1, 1, -1, 2], dtype=np.int64) + assert_almost_equal(ares, aexp) + assert_almost_equal(bres, bexp) + + a = np.array([5], dtype=np.int64) + b = np.array([5], dtype=np.int64) + + index, ares, bres = _algos.left_join_indexer_int64(a, b) + tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) + tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) + tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) + + +def test_left_join_indexer2(): + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + + res, lidx, ridx = _algos.left_join_indexer_int64(idx2.values, idx.values) + + exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) + assert_almost_equal(res, exp_res) + + exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.int64) + assert_almost_equal(lidx, exp_lidx) + + exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64) + assert_almost_equal(ridx, exp_ridx) + + +def test_outer_join_indexer2(): + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + + res, lidx, ridx = _algos.outer_join_indexer_int64(idx2.values, idx.values) + + exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) + assert_almost_equal(res, exp_res) + + exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.int64) + assert_almost_equal(lidx, exp_lidx) + + exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64) + assert_almost_equal(ridx, exp_ridx) + + +def test_inner_join_indexer2(): + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + + res, lidx, ridx = _algos.inner_join_indexer_int64(idx2.values, idx.values) + + exp_res = np.array([1, 1, 2, 5], dtype=np.int64) + assert_almost_equal(res, exp_res) + + exp_lidx = np.array([0, 0, 1, 2], dtype=np.int64) + assert_almost_equal(lidx, exp_lidx) + + exp_ridx = np.array([0, 1, 2, 3], dtype=np.int64) + assert_almost_equal(ridx, exp_ridx) + + +def test_is_lexsorted(): + failure = [ + np.array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, + 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0]), + np.array([30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, + 15, 14, + 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, + 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, + 12, 11, + 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25, + 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, + 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25, 24, 23, 22, + 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, + 6, 5, + 4, 3, 2, 1, 0])] + + assert (not _algos.is_lexsorted(failure)) + +# def test_get_group_index(): +# a = np.array([0, 1, 2, 0, 2, 1, 0, 0], dtype=np.int64) +# b = np.array([1, 0, 3, 2, 0, 2, 3, 0], dtype=np.int64) +# expected = np.array([1, 4, 11, 2, 8, 6, 3, 0], dtype=np.int64) + +# result = lib.get_group_index([a, b], (3, 4)) + +# assert(np.array_equal(result, expected)) + + +def test_groupsort_indexer(): + a = np.random.randint(0, 1000, 100).astype(np.int64) + b = np.random.randint(0, 1000, 100).astype(np.int64) + + result = _algos.groupsort_indexer(a, 1000)[0] + + # need to use a stable sort + expected = np.argsort(a, kind='mergesort') + assert (np.array_equal(result, expected)) + + # compare with lexsort + key = a * 1000 + b + result = _algos.groupsort_indexer(key, 1000000)[0] + expected = np.lexsort((b, a)) + assert (np.array_equal(result, expected)) + + +def test_ensure_platform_int(): + arr = np.arange(100) + + result = _algos.ensure_platform_int(arr) + assert (result is arr) + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/test_infer_and_convert.py b/pandas/tests/test_infer_and_convert.py new file mode 100644 index 0000000000000..06e2a82e07dee --- /dev/null +++ b/pandas/tests/test_infer_and_convert.py @@ -0,0 +1,384 @@ +# -*- coding: utf-8 -*- + +from datetime import datetime, timedelta, date, time + +import numpy as np +import pandas as pd +import pandas.lib as lib +import pandas.util.testing as tm +from pandas import Index + +from pandas.compat import long, u, PY2 + + +class TestInference(tm.TestCase): + + def test_infer_dtype_bytes(self): + compare = 'string' if PY2 else 'bytes' + + # string array of bytes + arr = np.array(list('abc'), dtype='S1') + self.assertEqual(pd.lib.infer_dtype(arr), compare) + + # object array of bytes + arr = arr.astype(object) + self.assertEqual(pd.lib.infer_dtype(arr), compare) + + def test_isinf_scalar(self): + # GH 11352 + self.assertTrue(lib.isposinf_scalar(float('inf'))) + self.assertTrue(lib.isposinf_scalar(np.inf)) + self.assertFalse(lib.isposinf_scalar(-np.inf)) + self.assertFalse(lib.isposinf_scalar(1)) + self.assertFalse(lib.isposinf_scalar('a')) + + self.assertTrue(lib.isneginf_scalar(float('-inf'))) + self.assertTrue(lib.isneginf_scalar(-np.inf)) + self.assertFalse(lib.isneginf_scalar(np.inf)) + self.assertFalse(lib.isneginf_scalar(1)) + self.assertFalse(lib.isneginf_scalar('a')) + + def test_maybe_convert_numeric_infinities(self): + # see gh-13274 + infinities = ['inf', 'inF', 'iNf', 'Inf', + 'iNF', 'InF', 'INf', 'INF'] + na_values = set(['', 'NULL', 'nan']) + + pos = np.array(['inf'], dtype=np.float64) + neg = np.array(['-inf'], dtype=np.float64) + + msg = "Unable to parse string" + + for infinity in infinities: + for maybe_int in (True, False): + out = lib.maybe_convert_numeric( + np.array([infinity], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, pos) + + out = lib.maybe_convert_numeric( + np.array(['-' + infinity], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, neg) + + out = lib.maybe_convert_numeric( + np.array([u(infinity)], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, pos) + + out = lib.maybe_convert_numeric( + np.array(['+' + infinity], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, pos) + + # too many characters + with tm.assertRaisesRegexp(ValueError, msg): + lib.maybe_convert_numeric( + np.array(['foo_' + infinity], dtype=object), + na_values, maybe_int) + + def test_maybe_convert_numeric_post_floatify_nan(self): + # see gh-13314 + data = np.array(['1.200', '-999.000', '4.500'], dtype=object) + expected = np.array([1.2, np.nan, 4.5], dtype=np.float64) + nan_values = set([-999, -999.0]) + + for coerce_type in (True, False): + out = lib.maybe_convert_numeric(data, nan_values, coerce_type) + tm.assert_numpy_array_equal(out, expected) + + def test_convert_infs(self): + arr = np.array(['inf', 'inf', 'inf'], dtype='O') + result = lib.maybe_convert_numeric(arr, set(), False) + self.assertTrue(result.dtype == np.float64) + + arr = np.array(['-inf', '-inf', '-inf'], dtype='O') + result = lib.maybe_convert_numeric(arr, set(), False) + self.assertTrue(result.dtype == np.float64) + + def test_scientific_no_exponent(self): + # See PR 12215 + arr = np.array(['42E', '2E', '99e', '6e'], dtype='O') + result = lib.maybe_convert_numeric(arr, set(), False, True) + self.assertTrue(np.all(np.isnan(result))) + + +class TestTypeInference(tm.TestCase): + _multiprocess_can_split_ = True + + def test_length_zero(self): + result = lib.infer_dtype(np.array([], dtype='i4')) + self.assertEqual(result, 'integer') + + result = lib.infer_dtype([]) + self.assertEqual(result, 'empty') + + def test_integers(self): + arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'integer') + + arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'mixed-integer') + + arr = np.array([1, 2, 3, 4, 5], dtype='i4') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'integer') + + def test_bools(self): + arr = np.array([True, False, True, True, True], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'boolean') + + arr = np.array([np.bool_(True), np.bool_(False)], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'boolean') + + arr = np.array([True, False, True, 'foo'], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'mixed') + + arr = np.array([True, False, True], dtype=bool) + result = lib.infer_dtype(arr) + self.assertEqual(result, 'boolean') + + def test_floats(self): + arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'floating') + + arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'], + dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'mixed-integer') + + arr = np.array([1, 2, 3, 4, 5], dtype='f4') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'floating') + + arr = np.array([1, 2, 3, 4, 5], dtype='f8') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'floating') + + def test_string(self): + pass + + def test_unicode(self): + pass + + def test_datetime(self): + + dates = [datetime(2012, 1, x) for x in range(1, 20)] + index = Index(dates) + self.assertEqual(index.inferred_type, 'datetime64') + + def test_date(self): + + dates = [date(2012, 1, x) for x in range(1, 20)] + index = Index(dates) + self.assertEqual(index.inferred_type, 'date') + + def test_to_object_array_tuples(self): + r = (5, 6) + values = [r] + result = lib.to_object_array_tuples(values) + + try: + # make sure record array works + from collections import namedtuple + record = namedtuple('record', 'x y') + r = record(5, 6) + values = [r] + result = lib.to_object_array_tuples(values) # noqa + except ImportError: + pass + + def test_object(self): + + # GH 7431 + # cannot infer more than this as only a single element + arr = np.array([None], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'mixed') + + def test_categorical(self): + + # GH 8974 + from pandas import Categorical, Series + arr = Categorical(list('abc')) + result = lib.infer_dtype(arr) + self.assertEqual(result, 'categorical') + + result = lib.infer_dtype(Series(arr)) + self.assertEqual(result, 'categorical') + + arr = Categorical(list('abc'), categories=['cegfab'], ordered=True) + result = lib.infer_dtype(arr) + self.assertEqual(result, 'categorical') + + result = lib.infer_dtype(Series(arr)) + self.assertEqual(result, 'categorical') + + +class TestConvert(tm.TestCase): + + def test_convert_objects(self): + arr = np.array(['a', 'b', np.nan, np.nan, 'd', 'e', 'f'], dtype='O') + result = lib.maybe_convert_objects(arr) + self.assertTrue(result.dtype == np.object_) + + def test_convert_objects_ints(self): + # test that we can detect many kinds of integers + dtypes = ['i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8'] + + for dtype_str in dtypes: + arr = np.array(list(np.arange(20, dtype=dtype_str)), dtype='O') + self.assertTrue(arr[0].dtype == np.dtype(dtype_str)) + result = lib.maybe_convert_objects(arr) + self.assertTrue(issubclass(result.dtype.type, np.integer)) + + def test_convert_objects_complex_number(self): + for dtype in np.sctypes['complex']: + arr = np.array(list(1j * np.arange(20, dtype=dtype)), dtype='O') + self.assertTrue(arr[0].dtype == np.dtype(dtype)) + result = lib.maybe_convert_objects(arr) + self.assertTrue(issubclass(result.dtype.type, np.complexfloating)) + + +class Testisscalar(tm.TestCase): + + def test_isscalar_builtin_scalars(self): + self.assertTrue(lib.isscalar(None)) + self.assertTrue(lib.isscalar(True)) + self.assertTrue(lib.isscalar(False)) + self.assertTrue(lib.isscalar(0.)) + self.assertTrue(lib.isscalar(np.nan)) + self.assertTrue(lib.isscalar('foobar')) + self.assertTrue(lib.isscalar(b'foobar')) + self.assertTrue(lib.isscalar(u('efoobar'))) + self.assertTrue(lib.isscalar(datetime(2014, 1, 1))) + self.assertTrue(lib.isscalar(date(2014, 1, 1))) + self.assertTrue(lib.isscalar(time(12, 0))) + self.assertTrue(lib.isscalar(timedelta(hours=1))) + self.assertTrue(lib.isscalar(pd.NaT)) + + def test_isscalar_builtin_nonscalars(self): + self.assertFalse(lib.isscalar({})) + self.assertFalse(lib.isscalar([])) + self.assertFalse(lib.isscalar([1])) + self.assertFalse(lib.isscalar(())) + self.assertFalse(lib.isscalar((1, ))) + self.assertFalse(lib.isscalar(slice(None))) + self.assertFalse(lib.isscalar(Ellipsis)) + + def test_isscalar_numpy_array_scalars(self): + self.assertTrue(lib.isscalar(np.int64(1))) + self.assertTrue(lib.isscalar(np.float64(1.))) + self.assertTrue(lib.isscalar(np.int32(1))) + self.assertTrue(lib.isscalar(np.object_('foobar'))) + self.assertTrue(lib.isscalar(np.str_('foobar'))) + self.assertTrue(lib.isscalar(np.unicode_(u('foobar')))) + self.assertTrue(lib.isscalar(np.bytes_(b'foobar'))) + self.assertTrue(lib.isscalar(np.datetime64('2014-01-01'))) + self.assertTrue(lib.isscalar(np.timedelta64(1, 'h'))) + + def test_isscalar_numpy_zerodim_arrays(self): + for zerodim in [np.array(1), np.array('foobar'), + np.array(np.datetime64('2014-01-01')), + np.array(np.timedelta64(1, 'h')), + np.array(np.datetime64('NaT'))]: + self.assertFalse(lib.isscalar(zerodim)) + self.assertTrue(lib.isscalar(lib.item_from_zerodim(zerodim))) + + def test_isscalar_numpy_arrays(self): + self.assertFalse(lib.isscalar(np.array([]))) + self.assertFalse(lib.isscalar(np.array([[]]))) + self.assertFalse(lib.isscalar(np.matrix('1; 2'))) + + def test_isscalar_pandas_scalars(self): + self.assertTrue(lib.isscalar(pd.Timestamp('2014-01-01'))) + self.assertTrue(lib.isscalar(pd.Timedelta(hours=1))) + self.assertTrue(lib.isscalar(pd.Period('2014-01-01'))) + + def test_lisscalar_pandas_containers(self): + self.assertFalse(lib.isscalar(pd.Series())) + self.assertFalse(lib.isscalar(pd.Series([1]))) + self.assertFalse(lib.isscalar(pd.DataFrame())) + self.assertFalse(lib.isscalar(pd.DataFrame([[1]]))) + self.assertFalse(lib.isscalar(pd.Panel())) + self.assertFalse(lib.isscalar(pd.Panel([[[1]]]))) + self.assertFalse(lib.isscalar(pd.Index([]))) + self.assertFalse(lib.isscalar(pd.Index([1]))) + + +class TestParseSQL(tm.TestCase): + + def test_convert_sql_column_floats(self): + arr = np.array([1.5, None, 3, 4.2], dtype=object) + result = lib.convert_sql_column(arr) + expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_strings(self): + arr = np.array(['1.5', None, '3', '4.2'], dtype=object) + result = lib.convert_sql_column(arr) + expected = np.array(['1.5', np.nan, '3', '4.2'], dtype=object) + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_unicode(self): + arr = np.array([u('1.5'), None, u('3'), u('4.2')], + dtype=object) + result = lib.convert_sql_column(arr) + expected = np.array([u('1.5'), np.nan, u('3'), u('4.2')], + dtype=object) + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_ints(self): + arr = np.array([1, 2, 3, 4], dtype='O') + arr2 = np.array([1, 2, 3, 4], dtype='i4').astype('O') + result = lib.convert_sql_column(arr) + result2 = lib.convert_sql_column(arr2) + expected = np.array([1, 2, 3, 4], dtype='i8') + self.assert_numpy_array_equal(result, expected) + self.assert_numpy_array_equal(result2, expected) + + arr = np.array([1, 2, 3, None, 4], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_longs(self): + arr = np.array([long(1), long(2), long(3), long(4)], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([1, 2, 3, 4], dtype='i8') + self.assert_numpy_array_equal(result, expected) + + arr = np.array([long(1), long(2), long(3), None, long(4)], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_bools(self): + arr = np.array([True, False, True, False], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([True, False, True, False], dtype=bool) + self.assert_numpy_array_equal(result, expected) + + arr = np.array([True, False, None, False], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([True, False, np.nan, False], dtype=object) + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_decimals(self): + from decimal import Decimal + arr = np.array([Decimal('1.5'), None, Decimal('3'), Decimal('4.2')]) + result = lib.convert_sql_column(arr) + expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') + self.assert_numpy_array_equal(result, expected) + +if __name__ == '__main__': + import nose + + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index c6a703673a4c4..bfac0aa83b434 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -1,19 +1,9 @@ # -*- coding: utf-8 -*- -from datetime import datetime, timedelta, date, time - import numpy as np -import pandas as pd import pandas.lib as lib import pandas.util.testing as tm -from pandas.compat import long, u, PY2 - - -def _assert_same_values_and_dtype(res, exp): - tm.assert_equal(res.dtype, exp.dtype) - tm.assert_almost_equal(res, exp) - class TestMisc(tm.TestCase): @@ -34,16 +24,8 @@ def test_max_len_string_array(self): tm.assertRaises(TypeError, lambda: lib.max_len_string_array(arr.astype('U'))) - def test_infer_dtype_bytes(self): - compare = 'string' if PY2 else 'bytes' - - # string array of bytes - arr = np.array(list('abc'), dtype='S1') - self.assertEqual(pd.lib.infer_dtype(arr), compare) - # object array of bytes - arr = arr.astype(object) - self.assertEqual(pd.lib.infer_dtype(arr), compare) +class TestIndexing(tm.TestCase): def test_maybe_indices_to_slice_left_edge(self): target = np.arange(100) @@ -174,203 +156,58 @@ def test_maybe_indices_to_slice_middle(self): self.assert_numpy_array_equal(maybe_slice, indices) self.assert_numpy_array_equal(target[indices], target[maybe_slice]) - def test_isinf_scalar(self): - # GH 11352 - self.assertTrue(lib.isposinf_scalar(float('inf'))) - self.assertTrue(lib.isposinf_scalar(np.inf)) - self.assertFalse(lib.isposinf_scalar(-np.inf)) - self.assertFalse(lib.isposinf_scalar(1)) - self.assertFalse(lib.isposinf_scalar('a')) - - self.assertTrue(lib.isneginf_scalar(float('-inf'))) - self.assertTrue(lib.isneginf_scalar(-np.inf)) - self.assertFalse(lib.isneginf_scalar(np.inf)) - self.assertFalse(lib.isneginf_scalar(1)) - self.assertFalse(lib.isneginf_scalar('a')) - - -# tests related to functions imported from inference.pyx -class TestInference(tm.TestCase): - def test_maybe_convert_numeric_infinities(self): - # see gh-13274 - infinities = ['inf', 'inF', 'iNf', 'Inf', - 'iNF', 'InF', 'INf', 'INF'] - na_values = set(['', 'NULL', 'nan']) - - pos = np.array(['inf'], dtype=np.float64) - neg = np.array(['-inf'], dtype=np.float64) - - msg = "Unable to parse string" - - for infinity in infinities: - for maybe_int in (True, False): - out = lib.maybe_convert_numeric( - np.array([infinity], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, pos) - - out = lib.maybe_convert_numeric( - np.array(['-' + infinity], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, neg) - - out = lib.maybe_convert_numeric( - np.array([u(infinity)], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, pos) - - out = lib.maybe_convert_numeric( - np.array(['+' + infinity], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, pos) - - # too many characters - with tm.assertRaisesRegexp(ValueError, msg): - lib.maybe_convert_numeric( - np.array(['foo_' + infinity], dtype=object), - na_values, maybe_int) - - def test_maybe_convert_numeric_post_floatify_nan(self): - # see gh-13314 - data = np.array(['1.200', '-999.000', '4.500'], dtype=object) - expected = np.array([1.2, np.nan, 4.5], dtype=np.float64) - nan_values = set([-999, -999.0]) - - for coerce_type in (True, False): - out = lib.maybe_convert_numeric(data, nan_values, coerce_type) - tm.assert_numpy_array_equal(out, expected) - - -class Testisscalar(tm.TestCase): - - def test_isscalar_builtin_scalars(self): - self.assertTrue(lib.isscalar(None)) - self.assertTrue(lib.isscalar(True)) - self.assertTrue(lib.isscalar(False)) - self.assertTrue(lib.isscalar(0.)) - self.assertTrue(lib.isscalar(np.nan)) - self.assertTrue(lib.isscalar('foobar')) - self.assertTrue(lib.isscalar(b'foobar')) - self.assertTrue(lib.isscalar(u('efoobar'))) - self.assertTrue(lib.isscalar(datetime(2014, 1, 1))) - self.assertTrue(lib.isscalar(date(2014, 1, 1))) - self.assertTrue(lib.isscalar(time(12, 0))) - self.assertTrue(lib.isscalar(timedelta(hours=1))) - self.assertTrue(lib.isscalar(pd.NaT)) - - def test_isscalar_builtin_nonscalars(self): - self.assertFalse(lib.isscalar({})) - self.assertFalse(lib.isscalar([])) - self.assertFalse(lib.isscalar([1])) - self.assertFalse(lib.isscalar(())) - self.assertFalse(lib.isscalar((1, ))) - self.assertFalse(lib.isscalar(slice(None))) - self.assertFalse(lib.isscalar(Ellipsis)) - - def test_isscalar_numpy_array_scalars(self): - self.assertTrue(lib.isscalar(np.int64(1))) - self.assertTrue(lib.isscalar(np.float64(1.))) - self.assertTrue(lib.isscalar(np.int32(1))) - self.assertTrue(lib.isscalar(np.object_('foobar'))) - self.assertTrue(lib.isscalar(np.str_('foobar'))) - self.assertTrue(lib.isscalar(np.unicode_(u('foobar')))) - self.assertTrue(lib.isscalar(np.bytes_(b'foobar'))) - self.assertTrue(lib.isscalar(np.datetime64('2014-01-01'))) - self.assertTrue(lib.isscalar(np.timedelta64(1, 'h'))) - - def test_isscalar_numpy_zerodim_arrays(self): - for zerodim in [np.array(1), np.array('foobar'), - np.array(np.datetime64('2014-01-01')), - np.array(np.timedelta64(1, 'h')), - np.array(np.datetime64('NaT'))]: - self.assertFalse(lib.isscalar(zerodim)) - self.assertTrue(lib.isscalar(lib.item_from_zerodim(zerodim))) - - def test_isscalar_numpy_arrays(self): - self.assertFalse(lib.isscalar(np.array([]))) - self.assertFalse(lib.isscalar(np.array([[]]))) - self.assertFalse(lib.isscalar(np.matrix('1; 2'))) - - def test_isscalar_pandas_scalars(self): - self.assertTrue(lib.isscalar(pd.Timestamp('2014-01-01'))) - self.assertTrue(lib.isscalar(pd.Timedelta(hours=1))) - self.assertTrue(lib.isscalar(pd.Period('2014-01-01'))) - - def test_lisscalar_pandas_containers(self): - self.assertFalse(lib.isscalar(pd.Series())) - self.assertFalse(lib.isscalar(pd.Series([1]))) - self.assertFalse(lib.isscalar(pd.DataFrame())) - self.assertFalse(lib.isscalar(pd.DataFrame([[1]]))) - self.assertFalse(lib.isscalar(pd.Panel())) - self.assertFalse(lib.isscalar(pd.Panel([[[1]]]))) - self.assertFalse(lib.isscalar(pd.Index([]))) - self.assertFalse(lib.isscalar(pd.Index([1]))) - - -class TestParseSQL(tm.TestCase): - - def test_convert_sql_column_floats(self): - arr = np.array([1.5, None, 3, 4.2], dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') - _assert_same_values_and_dtype(result, expected) - - def test_convert_sql_column_strings(self): - arr = np.array(['1.5', None, '3', '4.2'], dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array(['1.5', np.nan, '3', '4.2'], dtype=object) - _assert_same_values_and_dtype(result, expected) - - def test_convert_sql_column_unicode(self): - arr = np.array([u('1.5'), None, u('3'), u('4.2')], - dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array([u('1.5'), np.nan, u('3'), u('4.2')], - dtype=object) - _assert_same_values_and_dtype(result, expected) - - def test_convert_sql_column_ints(self): - arr = np.array([1, 2, 3, 4], dtype='O') - arr2 = np.array([1, 2, 3, 4], dtype='i4').astype('O') - result = lib.convert_sql_column(arr) - result2 = lib.convert_sql_column(arr2) - expected = np.array([1, 2, 3, 4], dtype='i8') - _assert_same_values_and_dtype(result, expected) - _assert_same_values_and_dtype(result2, expected) - - arr = np.array([1, 2, 3, None, 4], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') - _assert_same_values_and_dtype(result, expected) - - def test_convert_sql_column_longs(self): - arr = np.array([long(1), long(2), long(3), long(4)], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, 4], dtype='i8') - _assert_same_values_and_dtype(result, expected) - - arr = np.array([long(1), long(2), long(3), None, long(4)], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') - _assert_same_values_and_dtype(result, expected) - - def test_convert_sql_column_bools(self): - arr = np.array([True, False, True, False], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([True, False, True, False], dtype=bool) - _assert_same_values_and_dtype(result, expected) - - arr = np.array([True, False, None, False], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([True, False, np.nan, False], dtype=object) - _assert_same_values_and_dtype(result, expected) - - def test_convert_sql_column_decimals(self): - from decimal import Decimal - arr = np.array([Decimal('1.5'), None, Decimal('3'), Decimal('4.2')]) - result = lib.convert_sql_column(arr) - expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') - _assert_same_values_and_dtype(result, expected) + def test_maybe_booleans_to_slice(self): + arr = np.array([0, 0, 1, 1, 1, 0, 1], dtype=np.uint8) + result = lib.maybe_booleans_to_slice(arr) + self.assertTrue(result.dtype == np.bool_) + + result = lib.maybe_booleans_to_slice(arr[:0]) + self.assertTrue(result == slice(0, 0)) + + def test_get_reverse_indexer(self): + indexer = np.array([-1, -1, 1, 2, 0, -1, 3, 4], dtype=np.int64) + result = lib.get_reverse_indexer(indexer, 5) + expected = np.array([4, 2, 3, 6, 7], dtype=np.int64) + self.assertTrue(np.array_equal(result, expected)) + + +def test_duplicated_with_nas(): + keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object) + + result = lib.duplicated(keys) + expected = [False, False, False, True, False, True] + assert (np.array_equal(result, expected)) + + result = lib.duplicated(keys, keep='first') + expected = [False, False, False, True, False, True] + assert (np.array_equal(result, expected)) + + result = lib.duplicated(keys, keep='last') + expected = [True, False, True, False, False, False] + assert (np.array_equal(result, expected)) + + result = lib.duplicated(keys, keep=False) + expected = [True, False, True, True, False, True] + assert (np.array_equal(result, expected)) + + keys = np.empty(8, dtype=object) + for i, t in enumerate(zip([0, 0, np.nan, np.nan] * 2, + [0, np.nan, 0, np.nan] * 2)): + keys[i] = t + + result = lib.duplicated(keys) + falses = [False] * 4 + trues = [True] * 4 + expected = falses + trues + assert (np.array_equal(result, expected)) + + result = lib.duplicated(keys, keep='last') + expected = trues + falses + assert (np.array_equal(result, expected)) + + result = lib.duplicated(keys, keep=False) + expected = trues + trues + assert (np.array_equal(result, expected)) if __name__ == '__main__': import nose diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py deleted file mode 100644 index 4dd1cf54a5527..0000000000000 --- a/pandas/tests/test_tseries.py +++ /dev/null @@ -1,714 +0,0 @@ -# -*- coding: utf-8 -*- -from numpy import nan -import numpy as np -from pandas import Index, isnull, Timestamp -from pandas.util.testing import assert_almost_equal -import pandas.util.testing as tm -from pandas.compat import range, lrange, zip -import pandas.lib as lib -import pandas._period as period -import pandas.algos as algos -from pandas.core import common as com -import datetime - - -class TestTseriesUtil(tm.TestCase): - _multiprocess_can_split_ = True - - def test_combineFunc(self): - pass - - def test_reindex(self): - pass - - def test_isnull(self): - pass - - def test_groupby(self): - pass - - def test_groupby_withnull(self): - pass - - def test_backfill(self): - old = Index([1, 5, 10]) - new = Index(lrange(12)) - - filler = algos.backfill_int64(old.values, new.values) - - expect_filler = np.array([0, 0, 1, 1, 1, 1, - 2, 2, 2, 2, 2, -1], dtype=np.int64) - self.assert_numpy_array_equal(filler, expect_filler) - - # corner case - old = Index([1, 4]) - new = Index(lrange(5, 10)) - filler = algos.backfill_int64(old.values, new.values) - - expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) - self.assert_numpy_array_equal(filler, expect_filler) - - def test_pad(self): - old = Index([1, 5, 10]) - new = Index(lrange(12)) - - filler = algos.pad_int64(old.values, new.values) - - expect_filler = np.array([-1, 0, 0, 0, 0, 1, - 1, 1, 1, 1, 2, 2], dtype=np.int64) - self.assert_numpy_array_equal(filler, expect_filler) - - # corner case - old = Index([5, 10]) - new = Index(lrange(5)) - filler = algos.pad_int64(old.values, new.values) - expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) - self.assert_numpy_array_equal(filler, expect_filler) - - -def test_left_join_indexer_unique(): - a = np.array([1, 2, 3, 4, 5], dtype=np.int64) - b = np.array([2, 2, 3, 4, 4], dtype=np.int64) - - result = algos.left_join_indexer_unique_int64(b, a) - expected = np.array([1, 1, 2, 3, 3], dtype=np.int64) - assert (np.array_equal(result, expected)) - - -def test_left_outer_join_bug(): - left = np.array([0, 1, 0, 1, 1, 2, 3, 1, 0, 2, 1, 2, 0, 1, 1, 2, 3, 2, 3, - 2, 1, 1, 3, 0, 3, 2, 3, 0, 0, 2, 3, 2, 0, 3, 1, 3, 0, 1, - 3, 0, 0, 1, 0, 3, 1, 0, 1, 0, 1, 1, 0, 2, 2, 2, 2, 2, 0, - 3, 1, 2, 0, 0, 3, 1, 3, 2, 2, 0, 1, 3, 0, 2, 3, 2, 3, 3, - 2, 3, 3, 1, 3, 2, 0, 0, 3, 1, 1, 1, 0, 2, 3, 3, 1, 2, 0, - 3, 1, 2, 0, 2], dtype=np.int64) - - right = np.array([3, 1], dtype=np.int64) - max_groups = 4 - - lidx, ridx = algos.left_outer_join(left, right, max_groups, sort=False) - - exp_lidx = np.arange(len(left)) - exp_ridx = -np.ones(len(left)) - exp_ridx[left == 1] = 1 - exp_ridx[left == 3] = 0 - - assert (np.array_equal(lidx, exp_lidx)) - assert (np.array_equal(ridx, exp_ridx)) - - -def test_inner_join_indexer(): - a = np.array([1, 2, 3, 4, 5], dtype=np.int64) - b = np.array([0, 3, 5, 7, 9], dtype=np.int64) - - index, ares, bres = algos.inner_join_indexer_int64(a, b) - - index_exp = np.array([3, 5], dtype=np.int64) - assert_almost_equal(index, index_exp) - - aexp = np.array([2, 4], dtype=np.int64) - bexp = np.array([1, 2], dtype=np.int64) - assert_almost_equal(ares, aexp) - assert_almost_equal(bres, bexp) - - a = np.array([5], dtype=np.int64) - b = np.array([5], dtype=np.int64) - - index, ares, bres = algos.inner_join_indexer_int64(a, b) - tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) - tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) - tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) - - -def test_outer_join_indexer(): - a = np.array([1, 2, 3, 4, 5], dtype=np.int64) - b = np.array([0, 3, 5, 7, 9], dtype=np.int64) - - index, ares, bres = algos.outer_join_indexer_int64(a, b) - - index_exp = np.array([0, 1, 2, 3, 4, 5, 7, 9], dtype=np.int64) - assert_almost_equal(index, index_exp) - - aexp = np.array([-1, 0, 1, 2, 3, 4, -1, -1], dtype=np.int64) - bexp = np.array([0, -1, -1, 1, -1, 2, 3, 4], dtype=np.int64) - assert_almost_equal(ares, aexp) - assert_almost_equal(bres, bexp) - - a = np.array([5], dtype=np.int64) - b = np.array([5], dtype=np.int64) - - index, ares, bres = algos.outer_join_indexer_int64(a, b) - tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) - tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) - tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) - - -def test_left_join_indexer(): - a = np.array([1, 2, 3, 4, 5], dtype=np.int64) - b = np.array([0, 3, 5, 7, 9], dtype=np.int64) - - index, ares, bres = algos.left_join_indexer_int64(a, b) - - assert_almost_equal(index, a) - - aexp = np.array([0, 1, 2, 3, 4], dtype=np.int64) - bexp = np.array([-1, -1, 1, -1, 2], dtype=np.int64) - assert_almost_equal(ares, aexp) - assert_almost_equal(bres, bexp) - - a = np.array([5], dtype=np.int64) - b = np.array([5], dtype=np.int64) - - index, ares, bres = algos.left_join_indexer_int64(a, b) - tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) - tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) - tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) - - -def test_left_join_indexer2(): - idx = Index([1, 1, 2, 5]) - idx2 = Index([1, 2, 5, 7, 9]) - - res, lidx, ridx = algos.left_join_indexer_int64(idx2.values, idx.values) - - exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) - assert_almost_equal(res, exp_res) - - exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.int64) - assert_almost_equal(lidx, exp_lidx) - - exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64) - assert_almost_equal(ridx, exp_ridx) - - -def test_outer_join_indexer2(): - idx = Index([1, 1, 2, 5]) - idx2 = Index([1, 2, 5, 7, 9]) - - res, lidx, ridx = algos.outer_join_indexer_int64(idx2.values, idx.values) - - exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) - assert_almost_equal(res, exp_res) - - exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.int64) - assert_almost_equal(lidx, exp_lidx) - - exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64) - assert_almost_equal(ridx, exp_ridx) - - -def test_inner_join_indexer2(): - idx = Index([1, 1, 2, 5]) - idx2 = Index([1, 2, 5, 7, 9]) - - res, lidx, ridx = algos.inner_join_indexer_int64(idx2.values, idx.values) - - exp_res = np.array([1, 1, 2, 5], dtype=np.int64) - assert_almost_equal(res, exp_res) - - exp_lidx = np.array([0, 0, 1, 2], dtype=np.int64) - assert_almost_equal(lidx, exp_lidx) - - exp_ridx = np.array([0, 1, 2, 3], dtype=np.int64) - assert_almost_equal(ridx, exp_ridx) - - -def test_is_lexsorted(): - failure = [ - np.array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, - 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0]), - np.array([30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, - 15, 14, - 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, - 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, - 12, 11, - 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25, - 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, - 9, 8, - 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25, 24, 23, 22, - 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, - 6, 5, - 4, 3, 2, 1, 0])] - - assert (not algos.is_lexsorted(failure)) - -# def test_get_group_index(): -# a = np.array([0, 1, 2, 0, 2, 1, 0, 0], dtype=np.int64) -# b = np.array([1, 0, 3, 2, 0, 2, 3, 0], dtype=np.int64) -# expected = np.array([1, 4, 11, 2, 8, 6, 3, 0], dtype=np.int64) - -# result = lib.get_group_index([a, b], (3, 4)) - -# assert(np.array_equal(result, expected)) - - -def test_groupsort_indexer(): - a = np.random.randint(0, 1000, 100).astype(np.int64) - b = np.random.randint(0, 1000, 100).astype(np.int64) - - result = algos.groupsort_indexer(a, 1000)[0] - - # need to use a stable sort - expected = np.argsort(a, kind='mergesort') - assert (np.array_equal(result, expected)) - - # compare with lexsort - key = a * 1000 + b - result = algos.groupsort_indexer(key, 1000000)[0] - expected = np.lexsort((b, a)) - assert (np.array_equal(result, expected)) - - -def test_ensure_platform_int(): - arr = np.arange(100) - - result = algos.ensure_platform_int(arr) - assert (result is arr) - - -def test_duplicated_with_nas(): - keys = np.array([0, 1, nan, 0, 2, nan], dtype=object) - - result = lib.duplicated(keys) - expected = [False, False, False, True, False, True] - assert (np.array_equal(result, expected)) - - result = lib.duplicated(keys, keep='first') - expected = [False, False, False, True, False, True] - assert (np.array_equal(result, expected)) - - result = lib.duplicated(keys, keep='last') - expected = [True, False, True, False, False, False] - assert (np.array_equal(result, expected)) - - result = lib.duplicated(keys, keep=False) - expected = [True, False, True, True, False, True] - assert (np.array_equal(result, expected)) - - keys = np.empty(8, dtype=object) - for i, t in enumerate(zip([0, 0, nan, nan] * 2, [0, nan, 0, nan] * 2)): - keys[i] = t - - result = lib.duplicated(keys) - falses = [False] * 4 - trues = [True] * 4 - expected = falses + trues - assert (np.array_equal(result, expected)) - - result = lib.duplicated(keys, keep='last') - expected = trues + falses - assert (np.array_equal(result, expected)) - - result = lib.duplicated(keys, keep=False) - expected = trues + trues - assert (np.array_equal(result, expected)) - - -def test_maybe_booleans_to_slice(): - arr = np.array([0, 0, 1, 1, 1, 0, 1], dtype=np.uint8) - result = lib.maybe_booleans_to_slice(arr) - assert (result.dtype == np.bool_) - - result = lib.maybe_booleans_to_slice(arr[:0]) - assert (result == slice(0, 0)) - - -def test_convert_objects(): - arr = np.array(['a', 'b', nan, nan, 'd', 'e', 'f'], dtype='O') - result = lib.maybe_convert_objects(arr) - assert (result.dtype == np.object_) - - -def test_convert_infs(): - arr = np.array(['inf', 'inf', 'inf'], dtype='O') - result = lib.maybe_convert_numeric(arr, set(), False) - assert (result.dtype == np.float64) - - arr = np.array(['-inf', '-inf', '-inf'], dtype='O') - result = lib.maybe_convert_numeric(arr, set(), False) - assert (result.dtype == np.float64) - - -def test_scientific_no_exponent(): - # See PR 12215 - arr = np.array(['42E', '2E', '99e', '6e'], dtype='O') - result = lib.maybe_convert_numeric(arr, set(), False, True) - assert np.all(np.isnan(result)) - - -def test_convert_objects_ints(): - # test that we can detect many kinds of integers - dtypes = ['i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8'] - - for dtype_str in dtypes: - arr = np.array(list(np.arange(20, dtype=dtype_str)), dtype='O') - assert (arr[0].dtype == np.dtype(dtype_str)) - result = lib.maybe_convert_objects(arr) - assert (issubclass(result.dtype.type, np.integer)) - - -def test_convert_objects_complex_number(): - for dtype in np.sctypes['complex']: - arr = np.array(list(1j * np.arange(20, dtype=dtype)), dtype='O') - assert (arr[0].dtype == np.dtype(dtype)) - result = lib.maybe_convert_objects(arr) - assert (issubclass(result.dtype.type, np.complexfloating)) - - -def test_rank(): - tm._skip_if_no_scipy() - from scipy.stats import rankdata - - def _check(arr): - mask = ~np.isfinite(arr) - arr = arr.copy() - result = algos.rank_1d_float64(arr) - arr[mask] = np.inf - exp = rankdata(arr) - exp[mask] = nan - assert_almost_equal(result, exp) - - _check(np.array([nan, nan, 5., 5., 5., nan, 1, 2, 3, nan])) - _check(np.array([4., nan, 5., 5., 5., nan, 1, 2, 4., nan])) - - -def test_get_reverse_indexer(): - indexer = np.array([-1, -1, 1, 2, 0, -1, 3, 4], dtype=np.int64) - result = lib.get_reverse_indexer(indexer, 5) - expected = np.array([4, 2, 3, 6, 7], dtype=np.int64) - assert (np.array_equal(result, expected)) - - -def test_pad_backfill_object_segfault(): - - old = np.array([], dtype='O') - new = np.array([datetime.datetime(2010, 12, 31)], dtype='O') - - result = algos.pad_object(old, new) - expected = np.array([-1], dtype=np.int64) - assert (np.array_equal(result, expected)) - - result = algos.pad_object(new, old) - expected = np.array([], dtype=np.int64) - assert (np.array_equal(result, expected)) - - result = algos.backfill_object(old, new) - expected = np.array([-1], dtype=np.int64) - assert (np.array_equal(result, expected)) - - result = algos.backfill_object(new, old) - expected = np.array([], dtype=np.int64) - assert (np.array_equal(result, expected)) - - -def test_arrmap(): - values = np.array(['foo', 'foo', 'bar', 'bar', 'baz', 'qux'], dtype='O') - result = algos.arrmap_object(values, lambda x: x in ['foo', 'bar']) - assert (result.dtype == np.bool_) - - -def test_series_grouper(): - from pandas import Series - obj = Series(np.random.randn(10)) - dummy = obj[:0] - - labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64) - - grouper = lib.SeriesGrouper(obj, np.mean, labels, 2, dummy) - result, counts = grouper.get_result() - - expected = np.array([obj[3:6].mean(), obj[6:].mean()]) - assert_almost_equal(result, expected) - - exp_counts = np.array([3, 4], dtype=np.int64) - assert_almost_equal(counts, exp_counts) - - -def test_series_bin_grouper(): - from pandas import Series - obj = Series(np.random.randn(10)) - dummy = obj[:0] - - bins = np.array([3, 6]) - - grouper = lib.SeriesBinGrouper(obj, np.mean, bins, dummy) - result, counts = grouper.get_result() - - expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()]) - assert_almost_equal(result, expected) - - exp_counts = np.array([3, 3, 4], dtype=np.int64) - assert_almost_equal(counts, exp_counts) - - -class TestBinGroupers(tm.TestCase): - _multiprocess_can_split_ = True - - def setUp(self): - self.obj = np.random.randn(10, 1) - self.labels = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2], dtype=np.int64) - self.bins = np.array([3, 6], dtype=np.int64) - - def test_generate_bins(self): - from pandas.core.groupby import generate_bins_generic - values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) - binner = np.array([0, 3, 6, 9], dtype=np.int64) - - for func in [lib.generate_bins_dt64, generate_bins_generic]: - bins = func(values, binner, closed='left') - assert ((bins == np.array([2, 5, 6])).all()) - - bins = func(values, binner, closed='right') - assert ((bins == np.array([3, 6, 6])).all()) - - for func in [lib.generate_bins_dt64, generate_bins_generic]: - values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) - binner = np.array([0, 3, 6], dtype=np.int64) - - bins = func(values, binner, closed='right') - assert ((bins == np.array([3, 6])).all()) - - self.assertRaises(ValueError, generate_bins_generic, values, [], - 'right') - self.assertRaises(ValueError, generate_bins_generic, values[:0], - binner, 'right') - - self.assertRaises(ValueError, generate_bins_generic, values, [4], - 'right') - self.assertRaises(ValueError, generate_bins_generic, values, [-3, -1], - 'right') - - -def test_group_ohlc(): - def _check(dtype): - obj = np.array(np.random.randn(20), dtype=dtype) - - bins = np.array([6, 12, 20]) - out = np.zeros((3, 4), dtype) - counts = np.zeros(len(out), dtype=np.int64) - labels = com._ensure_int64(np.repeat(np.arange(3), - np.diff(np.r_[0, bins]))) - - func = getattr(algos, 'group_ohlc_%s' % dtype) - func(out, counts, obj[:, None], labels) - - def _ohlc(group): - if isnull(group).all(): - return np.repeat(nan, 4) - return [group[0], group.max(), group.min(), group[-1]] - - expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), - _ohlc(obj[12:])]) - - assert_almost_equal(out, expected) - tm.assert_numpy_array_equal(counts, - np.array([6, 6, 8], dtype=np.int64)) - - obj[:6] = nan - func(out, counts, obj[:, None], labels) - expected[0] = nan - assert_almost_equal(out, expected) - - _check('float32') - _check('float64') - - -def test_try_parse_dates(): - from dateutil.parser import parse - - arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) - - result = lib.try_parse_dates(arr, dayfirst=True) - expected = [parse(d, dayfirst=True) for d in arr] - assert (np.array_equal(result, expected)) - - -class TestTypeInference(tm.TestCase): - _multiprocess_can_split_ = True - - def test_length_zero(self): - result = lib.infer_dtype(np.array([], dtype='i4')) - self.assertEqual(result, 'integer') - - result = lib.infer_dtype([]) - self.assertEqual(result, 'empty') - - def test_integers(self): - arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'integer') - - arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed-integer') - - arr = np.array([1, 2, 3, 4, 5], dtype='i4') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'integer') - - def test_bools(self): - arr = np.array([True, False, True, True, True], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') - - arr = np.array([np.bool_(True), np.bool_(False)], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') - - arr = np.array([True, False, True, 'foo'], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed') - - arr = np.array([True, False, True], dtype=bool) - result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') - - def test_floats(self): - arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') - - arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'], - dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed-integer') - - arr = np.array([1, 2, 3, 4, 5], dtype='f4') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') - - arr = np.array([1, 2, 3, 4, 5], dtype='f8') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') - - def test_string(self): - pass - - def test_unicode(self): - pass - - def test_datetime(self): - - dates = [datetime.datetime(2012, 1, x) for x in range(1, 20)] - index = Index(dates) - self.assertEqual(index.inferred_type, 'datetime64') - - def test_date(self): - - dates = [datetime.date(2012, 1, x) for x in range(1, 20)] - index = Index(dates) - self.assertEqual(index.inferred_type, 'date') - - def test_to_object_array_tuples(self): - r = (5, 6) - values = [r] - result = lib.to_object_array_tuples(values) - - try: - # make sure record array works - from collections import namedtuple - record = namedtuple('record', 'x y') - r = record(5, 6) - values = [r] - result = lib.to_object_array_tuples(values) # noqa - except ImportError: - pass - - def test_object(self): - - # GH 7431 - # cannot infer more than this as only a single element - arr = np.array([None], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed') - - def test_categorical(self): - - # GH 8974 - from pandas import Categorical, Series - arr = Categorical(list('abc')) - result = lib.infer_dtype(arr) - self.assertEqual(result, 'categorical') - - result = lib.infer_dtype(Series(arr)) - self.assertEqual(result, 'categorical') - - arr = Categorical(list('abc'), categories=['cegfab'], ordered=True) - result = lib.infer_dtype(arr) - self.assertEqual(result, 'categorical') - - result = lib.infer_dtype(Series(arr)) - self.assertEqual(result, 'categorical') - - -class TestMoments(tm.TestCase): - pass - - -class TestReducer(tm.TestCase): - def test_int_index(self): - from pandas.core.series import Series - - arr = np.random.randn(100, 4) - result = lib.reduce(arr, np.sum, labels=Index(np.arange(4))) - expected = arr.sum(0) - assert_almost_equal(result, expected) - - result = lib.reduce(arr, np.sum, axis=1, labels=Index(np.arange(100))) - expected = arr.sum(1) - assert_almost_equal(result, expected) - - dummy = Series(0., index=np.arange(100)) - result = lib.reduce(arr, np.sum, dummy=dummy, - labels=Index(np.arange(4))) - expected = arr.sum(0) - assert_almost_equal(result, expected) - - dummy = Series(0., index=np.arange(4)) - result = lib.reduce(arr, np.sum, axis=1, dummy=dummy, - labels=Index(np.arange(100))) - expected = arr.sum(1) - assert_almost_equal(result, expected) - - result = lib.reduce(arr, np.sum, axis=1, dummy=dummy, - labels=Index(np.arange(100))) - assert_almost_equal(result, expected) - - -class TestTsUtil(tm.TestCase): - def test_min_valid(self): - # Ensure that Timestamp.min is a valid Timestamp - Timestamp(Timestamp.min) - - def test_max_valid(self): - # Ensure that Timestamp.max is a valid Timestamp - Timestamp(Timestamp.max) - - def test_to_datetime_bijective(self): - # Ensure that converting to datetime and back only loses precision - # by going from nanoseconds to microseconds. - self.assertEqual( - Timestamp(Timestamp.max.to_pydatetime()).value / 1000, - Timestamp.max.value / 1000) - self.assertEqual( - Timestamp(Timestamp.min.to_pydatetime()).value / 1000, - Timestamp.min.value / 1000) - - -class TestPeriodField(tm.TestCase): - def test_get_period_field_raises_on_out_of_range(self): - self.assertRaises(ValueError, period.get_period_field, -1, 0, 0) - - def test_get_period_field_array_raises_on_out_of_range(self): - self.assertRaises(ValueError, period.get_period_field_arr, -1, - np.empty(1), 0) diff --git a/pandas/tseries/tests/test_bin_groupby.py b/pandas/tseries/tests/test_bin_groupby.py new file mode 100644 index 0000000000000..6b6c468b7c391 --- /dev/null +++ b/pandas/tseries/tests/test_bin_groupby.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- + +from numpy import nan +import numpy as np + +from pandas import Index, isnull +from pandas.util.testing import assert_almost_equal +import pandas.util.testing as tm +import pandas.lib as lib +import pandas.algos as algos +from pandas.core import common as com + + +def test_series_grouper(): + from pandas import Series + obj = Series(np.random.randn(10)) + dummy = obj[:0] + + labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64) + + grouper = lib.SeriesGrouper(obj, np.mean, labels, 2, dummy) + result, counts = grouper.get_result() + + expected = np.array([obj[3:6].mean(), obj[6:].mean()]) + assert_almost_equal(result, expected) + + exp_counts = np.array([3, 4], dtype=np.int64) + assert_almost_equal(counts, exp_counts) + + +def test_series_bin_grouper(): + from pandas import Series + obj = Series(np.random.randn(10)) + dummy = obj[:0] + + bins = np.array([3, 6]) + + grouper = lib.SeriesBinGrouper(obj, np.mean, bins, dummy) + result, counts = grouper.get_result() + + expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()]) + assert_almost_equal(result, expected) + + exp_counts = np.array([3, 3, 4], dtype=np.int64) + assert_almost_equal(counts, exp_counts) + + +class TestBinGroupers(tm.TestCase): + _multiprocess_can_split_ = True + + def setUp(self): + self.obj = np.random.randn(10, 1) + self.labels = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2], dtype=np.int64) + self.bins = np.array([3, 6], dtype=np.int64) + + def test_generate_bins(self): + from pandas.core.groupby import generate_bins_generic + values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) + binner = np.array([0, 3, 6, 9], dtype=np.int64) + + for func in [lib.generate_bins_dt64, generate_bins_generic]: + bins = func(values, binner, closed='left') + assert ((bins == np.array([2, 5, 6])).all()) + + bins = func(values, binner, closed='right') + assert ((bins == np.array([3, 6, 6])).all()) + + for func in [lib.generate_bins_dt64, generate_bins_generic]: + values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) + binner = np.array([0, 3, 6], dtype=np.int64) + + bins = func(values, binner, closed='right') + assert ((bins == np.array([3, 6])).all()) + + self.assertRaises(ValueError, generate_bins_generic, values, [], + 'right') + self.assertRaises(ValueError, generate_bins_generic, values[:0], + binner, 'right') + + self.assertRaises(ValueError, generate_bins_generic, values, [4], + 'right') + self.assertRaises(ValueError, generate_bins_generic, values, [-3, -1], + 'right') + + +def test_group_ohlc(): + def _check(dtype): + obj = np.array(np.random.randn(20), dtype=dtype) + + bins = np.array([6, 12, 20]) + out = np.zeros((3, 4), dtype) + counts = np.zeros(len(out), dtype=np.int64) + labels = com._ensure_int64(np.repeat(np.arange(3), + np.diff(np.r_[0, bins]))) + + func = getattr(algos, 'group_ohlc_%s' % dtype) + func(out, counts, obj[:, None], labels) + + def _ohlc(group): + if isnull(group).all(): + return np.repeat(nan, 4) + return [group[0], group.max(), group.min(), group[-1]] + + expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), + _ohlc(obj[12:])]) + + assert_almost_equal(out, expected) + tm.assert_numpy_array_equal(counts, + np.array([6, 6, 8], dtype=np.int64)) + + obj[:6] = nan + func(out, counts, obj[:, None], labels) + expected[0] = nan + assert_almost_equal(out, expected) + + _check('float32') + _check('float64') + + +class TestMoments(tm.TestCase): + pass + + +class TestReducer(tm.TestCase): + def test_int_index(self): + from pandas.core.series import Series + + arr = np.random.randn(100, 4) + result = lib.reduce(arr, np.sum, labels=Index(np.arange(4))) + expected = arr.sum(0) + assert_almost_equal(result, expected) + + result = lib.reduce(arr, np.sum, axis=1, labels=Index(np.arange(100))) + expected = arr.sum(1) + assert_almost_equal(result, expected) + + dummy = Series(0., index=np.arange(100)) + result = lib.reduce(arr, np.sum, dummy=dummy, + labels=Index(np.arange(4))) + expected = arr.sum(0) + assert_almost_equal(result, expected) + + dummy = Series(0., index=np.arange(4)) + result = lib.reduce(arr, np.sum, axis=1, dummy=dummy, + labels=Index(np.arange(100))) + expected = arr.sum(1) + assert_almost_equal(result, expected) + + result = lib.reduce(arr, np.sum, axis=1, dummy=dummy, + labels=Index(np.arange(100))) + assert_almost_equal(result, expected) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 8e6d339b87623..de23306c80b71 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -8,7 +8,7 @@ from datetime import datetime, date, timedelta -from pandas import Timestamp +from pandas import Timestamp, _period from pandas.tseries.frequencies import MONTHS, DAYS, _period_code_map from pandas.tseries.period import Period, PeriodIndex, period_range from pandas.tseries.index import DatetimeIndex, date_range, Index @@ -4450,6 +4450,14 @@ def test_ops_frame_period(self): tm.assert_frame_equal(df - df2, -exp) +class TestPeriodField(tm.TestCase): + def test_get_period_field_raises_on_out_of_range(self): + self.assertRaises(ValueError, _period.get_period_field, -1, 0, 0) + + def test_get_period_field_array_raises_on_out_of_range(self): + self.assertRaises(ValueError, _period.get_period_field_arr, -1, + np.empty(1), 0) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index d7426daa794c3..c6436163b9edb 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -2,7 +2,7 @@ from distutils.version import LooseVersion import numpy as np -from pandas import tslib +from pandas import tslib, lib import pandas._period as period import datetime @@ -25,6 +25,35 @@ from pandas.util.testing import assert_series_equal, _skip_if_has_locale +class TestTsUtil(tm.TestCase): + + def test_try_parse_dates(self): + from dateutil.parser import parse + arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object) + + result = lib.try_parse_dates(arr, dayfirst=True) + expected = [parse(d, dayfirst=True) for d in arr] + self.assertTrue(np.array_equal(result, expected)) + + def test_min_valid(self): + # Ensure that Timestamp.min is a valid Timestamp + Timestamp(Timestamp.min) + + def test_max_valid(self): + # Ensure that Timestamp.max is a valid Timestamp + Timestamp(Timestamp.max) + + def test_to_datetime_bijective(self): + # Ensure that converting to datetime and back only loses precision + # by going from nanoseconds to microseconds. + self.assertEqual( + Timestamp(Timestamp.max.to_pydatetime()).value / 1000, + Timestamp.max.value / 1000) + self.assertEqual( + Timestamp(Timestamp.min.to_pydatetime()).value / 1000, + Timestamp.min.value / 1000) + + class TestTimestamp(tm.TestCase): def test_constructor(self):