diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 13d61957eea00..770ad8a268f11 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -612,5 +612,8 @@ Bug Fixes - Bug in ``io.common.get_filepath_or_buffer`` which caused reading of valid S3 files to fail if the bucket also contained keys for which the user does not have read permission (:issue:`10604`) - Bug in vectorised setting of timestamp columns with python ``datetime.date`` and numpy ``datetime64`` (:issue:`10408`, :issue:`10412`) + - Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`) - Bug in ``pd.unique`` for arrays with the ``datetime64`` or ``timedelta64`` dtype that meant an array with object dtype was returned instead the original dtype (:issue: `9431`) +- Bug in ``DatetimeIndex.take`` and ``TimedeltaIndex.take`` may not raise ``IndexError`` against invalid index (:issue:`10295`) +- Bug in ``PeriodIndex.order`` reset freq (:issue:`10295`) diff --git a/pandas/core/index.py b/pandas/core/index.py index ce6c60df2fd94..a9878f493251b 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2486,7 +2486,7 @@ def get_slice_bound(self, label, side, kind): if is_bool_dtype(slc): slc = lib.maybe_booleans_to_slice(slc.view('u1')) else: - slc = lib.maybe_indices_to_slice(slc.astype('i8')) + slc = lib.maybe_indices_to_slice(slc.astype('i8'), len(self)) if isinstance(slc, np.ndarray): raise KeyError( "Cannot get %s slice bound for non-unique label:" @@ -5108,7 +5108,7 @@ def _maybe_to_slice(loc): if not isinstance(loc, np.ndarray) or loc.dtype != 'int64': return loc - loc = lib.maybe_indices_to_slice(loc) + loc = lib.maybe_indices_to_slice(loc, len(self)) if isinstance(loc, slice): return loc diff --git a/pandas/lib.pyx b/pandas/lib.pyx index 4805a33e5b496..e839210fbbada 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -633,17 +633,42 @@ def convert_timestamps(ndarray values): return out -def maybe_indices_to_slice(ndarray[int64_t] indices): + +def maybe_indices_to_slice(ndarray[int64_t] indices, int max_len): cdef: Py_ssize_t i, n = len(indices) + int k, vstart, vlast, v + + if n == 0: + return slice(0, 0) - if not n or indices[0] < 0: + vstart = indices[0] + if vstart < 0 or max_len <= vstart: return indices - for i in range(1, n): - if indices[i] - indices[i - 1] != 1: - return indices - return slice(indices[0], indices[n - 1] + 1) + if n == 1: + return slice(vstart, vstart + 1) + + vlast = indices[n - 1] + if vlast < 0 or max_len <= vlast: + return indices + + k = indices[1] - indices[0] + if k == 0: + return indices + else: + for i in range(2, n): + v = indices[i] + if v - indices[i - 1] != k: + return indices + + if k > 0: + return slice(vstart, vlast + 1, k) + else: + if vlast == 0: + return slice(vstart, None, k) + else: + return slice(vstart, vlast - 1, k) def maybe_booleans_to_slice(ndarray[uint8_t] mask): diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 2699e780f0edb..15023b77694e6 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -2266,6 +2266,16 @@ def test_get_loc_na(self): idx = Float64Index([np.nan, 1, np.nan]) self.assertEqual(idx.get_loc(1), 1) + + # representable by slice [0:2:2] + # self.assertRaises(KeyError, idx.slice_locs, np.nan) + sliced = idx.slice_locs(np.nan) + self.assertTrue(isinstance(sliced, tuple)) + self.assertEqual(sliced, (0, 3)) + + # not representable by slice + idx = Float64Index([np.nan, 1, np.nan, np.nan]) + self.assertEqual(idx.get_loc(1), 1) self.assertRaises(KeyError, idx.slice_locs, np.nan) def test_contains_nans(self): diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 6d9bea29cf44d..cfc98f5c20360 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from pandas.lib import isscalar, item_from_zerodim, max_len_string_array +import pandas.lib as lib import pandas.util.testing as tm from pandas.compat import u, PY2 @@ -14,19 +14,19 @@ class TestMisc(tm.TestCase): def test_max_len_string_array(self): arr = a = np.array(['foo', 'b', np.nan], dtype='object') - self.assertTrue(max_len_string_array(arr), 3) + self.assertTrue(lib.max_len_string_array(arr), 3) # unicode arr = a.astype('U').astype(object) - self.assertTrue(max_len_string_array(arr), 3) + self.assertTrue(lib.max_len_string_array(arr), 3) # bytes for python3 arr = a.astype('S').astype(object) - self.assertTrue(max_len_string_array(arr), 3) + self.assertTrue(lib.max_len_string_array(arr), 3) # raises tm.assertRaises(TypeError, - lambda: max_len_string_array(arr.astype('U'))) + lambda: lib.max_len_string_array(arr.astype('U'))) def test_infer_dtype_bytes(self): compare = 'string' if PY2 else 'bytes' @@ -39,68 +39,197 @@ def test_infer_dtype_bytes(self): arr = arr.astype(object) self.assertEqual(pd.lib.infer_dtype(arr), compare) - -class TestIsscalar(tm.TestCase): + def test_maybe_indices_to_slice_left_edge(self): + target = np.arange(100) + + # slice + indices = np.array([], dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertTrue(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + for end in [1, 2, 5, 20, 99]: + for step in [1, 2, 4]: + indices = np.arange(0, end, step, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertTrue(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # reverse + indices = indices[::-1] + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertTrue(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # not slice + for case in [[2, 1, 2, 0], [2, 2, 1, 0], [0, 1, 2, 1], [-2, 0, 2], [2, 0, -2]]: + indices = np.array(case, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertFalse(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(maybe_slice, indices) + self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + def test_maybe_indices_to_slice_right_edge(self): + target = np.arange(100) + + # slice + for start in [0, 2, 5, 20, 97, 98]: + for step in [1, 2, 4]: + indices = np.arange(start, 99, step, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertTrue(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # reverse + indices = indices[::-1] + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertTrue(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # not slice + indices = np.array([97, 98, 99, 100], dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertFalse(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(maybe_slice, indices) + with self.assertRaises(IndexError): + target[indices] + with self.assertRaises(IndexError): + target[maybe_slice] + + indices = np.array([100, 99, 98, 97], dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertFalse(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(maybe_slice, indices) + with self.assertRaises(IndexError): + target[indices] + with self.assertRaises(IndexError): + target[maybe_slice] + + for case in [[99, 97, 99, 96], [99, 99, 98, 97], [98, 98, 97, 96]]: + indices = np.array(case, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertFalse(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(maybe_slice, indices) + self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + def test_maybe_indices_to_slice_both_edges(self): + target = np.arange(10) + + # slice + for step in [1, 2, 4, 5, 8, 9]: + indices = np.arange(0, 9, step, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertTrue(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # reverse + indices = indices[::-1] + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertTrue(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # not slice + for case in [[4, 2, 0, -2], [2, 2, 1, 0], [0, 1, 2, 1]]: + indices = np.array(case, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertFalse(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(maybe_slice, indices) + self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + def test_maybe_indices_to_slice_middle(self): + target = np.arange(100) + + # slice + for start, end in [(2, 10), (5, 25), (65, 97)]: + for step in [1, 2, 4, 20]: + indices = np.arange(start, end, step, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertTrue(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # reverse + indices = indices[::-1] + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertTrue(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # not slice + for case in [[14, 12, 10, 12], [12, 12, 11, 10], [10, 11, 12, 11]]: + indices = np.array(case, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + self.assertFalse(isinstance(maybe_slice, slice)) + self.assert_numpy_array_equal(maybe_slice, indices) + self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + +class Testisscalar(tm.TestCase): def test_isscalar_builtin_scalars(self): - self.assertTrue(isscalar(None)) - self.assertTrue(isscalar(True)) - self.assertTrue(isscalar(False)) - self.assertTrue(isscalar(0.)) - self.assertTrue(isscalar(np.nan)) - self.assertTrue(isscalar('foobar')) - self.assertTrue(isscalar(b'foobar')) - self.assertTrue(isscalar(u('efoobar'))) - self.assertTrue(isscalar(datetime(2014, 1, 1))) - self.assertTrue(isscalar(date(2014, 1, 1))) - self.assertTrue(isscalar(time(12, 0))) - self.assertTrue(isscalar(timedelta(hours=1))) - self.assertTrue(isscalar(pd.NaT)) + self.assertTrue(lib.isscalar(None)) + self.assertTrue(lib.isscalar(True)) + self.assertTrue(lib.isscalar(False)) + self.assertTrue(lib.isscalar(0.)) + self.assertTrue(lib.isscalar(np.nan)) + self.assertTrue(lib.isscalar('foobar')) + self.assertTrue(lib.isscalar(b'foobar')) + self.assertTrue(lib.isscalar(u('efoobar'))) + self.assertTrue(lib.isscalar(datetime(2014, 1, 1))) + self.assertTrue(lib.isscalar(date(2014, 1, 1))) + self.assertTrue(lib.isscalar(time(12, 0))) + self.assertTrue(lib.isscalar(timedelta(hours=1))) + self.assertTrue(lib.isscalar(pd.NaT)) def test_isscalar_builtin_nonscalars(self): - self.assertFalse(isscalar({})) - self.assertFalse(isscalar([])) - self.assertFalse(isscalar([1])) - self.assertFalse(isscalar(())) - self.assertFalse(isscalar((1,))) - self.assertFalse(isscalar(slice(None))) - self.assertFalse(isscalar(Ellipsis)) + self.assertFalse(lib.isscalar({})) + self.assertFalse(lib.isscalar([])) + self.assertFalse(lib.isscalar([1])) + self.assertFalse(lib.isscalar(())) + self.assertFalse(lib.isscalar((1,))) + self.assertFalse(lib.isscalar(slice(None))) + self.assertFalse(lib.isscalar(Ellipsis)) def test_isscalar_numpy_array_scalars(self): - self.assertTrue(isscalar(np.int64(1))) - self.assertTrue(isscalar(np.float64(1.))) - self.assertTrue(isscalar(np.int32(1))) - self.assertTrue(isscalar(np.object_('foobar'))) - self.assertTrue(isscalar(np.str_('foobar'))) - self.assertTrue(isscalar(np.unicode_(u('foobar')))) - self.assertTrue(isscalar(np.bytes_(b'foobar'))) - self.assertTrue(isscalar(np.datetime64('2014-01-01'))) - self.assertTrue(isscalar(np.timedelta64(1, 'h'))) + self.assertTrue(lib.isscalar(np.int64(1))) + self.assertTrue(lib.isscalar(np.float64(1.))) + self.assertTrue(lib.isscalar(np.int32(1))) + self.assertTrue(lib.isscalar(np.object_('foobar'))) + self.assertTrue(lib.isscalar(np.str_('foobar'))) + self.assertTrue(lib.isscalar(np.unicode_(u('foobar')))) + self.assertTrue(lib.isscalar(np.bytes_(b'foobar'))) + self.assertTrue(lib.isscalar(np.datetime64('2014-01-01'))) + self.assertTrue(lib.isscalar(np.timedelta64(1, 'h'))) def test_isscalar_numpy_zerodim_arrays(self): for zerodim in [np.array(1), np.array('foobar'), np.array(np.datetime64('2014-01-01')), np.array(np.timedelta64(1, 'h'))]: - self.assertFalse(isscalar(zerodim)) - self.assertTrue(isscalar(item_from_zerodim(zerodim))) + self.assertFalse(lib.isscalar(zerodim)) + self.assertTrue(lib.isscalar(lib.item_from_zerodim(zerodim))) def test_isscalar_numpy_arrays(self): - self.assertFalse(isscalar(np.array([]))) - self.assertFalse(isscalar(np.array([[]]))) - self.assertFalse(isscalar(np.matrix('1; 2'))) + self.assertFalse(lib.isscalar(np.array([]))) + self.assertFalse(lib.isscalar(np.array([[]]))) + self.assertFalse(lib.isscalar(np.matrix('1; 2'))) def test_isscalar_pandas_scalars(self): - self.assertTrue(isscalar(pd.Timestamp('2014-01-01'))) - self.assertTrue(isscalar(pd.Timedelta(hours=1))) - self.assertTrue(isscalar(pd.Period('2014-01-01'))) - - def test_isscalar_pandas_containers(self): - self.assertFalse(isscalar(pd.Series())) - self.assertFalse(isscalar(pd.Series([1]))) - self.assertFalse(isscalar(pd.DataFrame())) - self.assertFalse(isscalar(pd.DataFrame([[1]]))) - self.assertFalse(isscalar(pd.Panel())) - self.assertFalse(isscalar(pd.Panel([[[1]]]))) - self.assertFalse(isscalar(pd.Index([]))) - self.assertFalse(isscalar(pd.Index([1]))) + self.assertTrue(lib.isscalar(pd.Timestamp('2014-01-01'))) + self.assertTrue(lib.isscalar(pd.Timedelta(hours=1))) + self.assertTrue(lib.isscalar(pd.Period('2014-01-01'))) + + def test_lisscalar_pandas_containers(self): + self.assertFalse(lib.isscalar(pd.Series())) + self.assertFalse(lib.isscalar(pd.Series([1]))) + self.assertFalse(lib.isscalar(pd.DataFrame())) + self.assertFalse(lib.isscalar(pd.DataFrame([[1]]))) + self.assertFalse(lib.isscalar(pd.Panel())) + self.assertFalse(lib.isscalar(pd.Panel([[[1]]]))) + self.assertFalse(lib.isscalar(pd.Index([]))) + self.assertFalse(lib.isscalar(pd.Index([1]))) + + +if __name__ == '__main__': + import nose + + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) \ No newline at end of file diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index b3d10a80e0b50..6d20b0128f164 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -163,17 +163,26 @@ def order(self, return_indexer=False, ascending=True): return sorted_index, _as else: sorted_values = np.sort(self.values) + attribs = self._get_attributes_dict() + freq = attribs['freq'] + from pandas.tseries.period import PeriodIndex + if freq is not None and not isinstance(self, PeriodIndex): + if freq.n > 0 and not ascending: + freq = freq * -1 + elif freq.n < 0 and ascending: + freq = freq * -1 + attribs['freq'] = freq + if not ascending: sorted_values = sorted_values[::-1] - attribs = self._get_attributes_dict() - attribs['freq'] = None + return self._simple_new(sorted_values, **attribs) def take(self, indices, axis=0): """ Analogous to ndarray.take """ - maybe_slice = lib.maybe_indices_to_slice(com._ensure_int64(indices)) + maybe_slice = lib.maybe_indices_to_slice(com._ensure_int64(indices), len(self)) if isinstance(maybe_slice, slice): return self[maybe_slice] return super(DatetimeIndexOpsMixin, self).take(indices, axis) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 8ee6a1bc64e4e..5471bc076341b 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -180,8 +180,8 @@ def _join_i8_wrapper(joinf, **kwargs): tz = None offset = None - _comparables = ['name','freqstr','tz'] - _attributes = ['name','freq','tz'] + _comparables = ['name', 'freqstr', 'tz'] + _attributes = ['name', 'freq', 'tz'] _datetimelike_ops = ['year','month','day','hour','minute','second', 'weekofyear','week','dayofweek','weekday','dayofyear','quarter', 'days_in_month', 'daysinmonth', 'date','time','microsecond','nanosecond','is_month_start','is_month_end', @@ -1550,7 +1550,7 @@ def delete(self, loc): freq = self.freq else: if com.is_list_like(loc): - loc = lib.maybe_indices_to_slice(com._ensure_int64(np.array(loc))) + loc = lib.maybe_indices_to_slice(com._ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if (loc.start in (0, None) or loc.stop in (len(self), None)): freq = self.freq diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 6413ce9cd5a03..bb0eda8260704 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -267,7 +267,11 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): result = object.__new__(cls) result._data = values result.name = name + + if freq is None: + raise ValueError('freq not specified') result.freq = freq + result._reset_identity() return result diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index f1871e78e21a1..d7172dd304b6b 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -126,8 +126,8 @@ def _join_i8_wrapper(joinf, **kwargs): _engine_type = _index.TimedeltaEngine - _comparables = ['name','freq'] - _attributes = ['name','freq'] + _comparables = ['name', 'freq'] + _attributes = ['name', 'freq'] _is_numeric_dtype = True freq = None @@ -853,7 +853,7 @@ def delete(self, loc): freq = self.freq else: if com.is_list_like(loc): - loc = lib.maybe_indices_to_slice(com._ensure_int64(np.array(loc))) + loc = lib.maybe_indices_to_slice(com._ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if (loc.start in (0, None) or loc.stop in (len(self), None)): freq = self.freq diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 1b38f51ed4f71..3d9e80f351c44 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -8,6 +8,7 @@ from pandas.tseries.common import is_datetimelike from pandas import (Series, Index, Int64Index, Timestamp, DatetimeIndex, PeriodIndex, TimedeltaIndex, Timedelta, timedelta_range, date_range, Float64Index) +import pandas.tseries.offsets as offsets import pandas.tslib as tslib import nose @@ -297,6 +298,72 @@ def test_nonunique_contains(self): ['2015', '2015', '2016'], ['2015', '2015', '2014'])): tm.assertIn(idx[0], idx) + def test_order(self): + # with freq + idx1 = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], freq='D', name='idx') + idx2 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], + freq='H', tz='Asia/Tokyo', name='tzidx') + + for idx in [idx1, idx2]: + ordered = idx.order() + self.assert_index_equal(ordered, idx) + self.assertEqual(ordered.freq, idx.freq) + + ordered = idx.order(ascending=False) + expected = idx[::-1] + self.assert_index_equal(ordered, expected) + self.assertEqual(ordered.freq, expected.freq) + self.assertEqual(ordered.freq.n, -1) + + ordered, indexer = idx.order(return_indexer=True) + self.assert_index_equal(ordered, idx) + self.assert_numpy_array_equal(indexer, np.array([0, 1, 2])) + self.assertEqual(ordered.freq, idx.freq) + + ordered, indexer = idx.order(return_indexer=True, ascending=False) + expected = idx[::-1] + self.assert_index_equal(ordered, expected) + self.assert_numpy_array_equal(indexer, np.array([2, 1, 0])) + self.assertEqual(ordered.freq, expected.freq) + self.assertEqual(ordered.freq.n, -1) + + # without freq + idx1 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05', + '2011-01-02', '2011-01-01'], name='idx1') + exp1 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02', + '2011-01-03', '2011-01-05'], name='idx1') + + idx2 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05', + '2011-01-02', '2011-01-01'], + tz='Asia/Tokyo', name='idx2') + exp2 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02', + '2011-01-03', '2011-01-05'], + tz='Asia/Tokyo', name='idx2') + + idx3 = DatetimeIndex([pd.NaT, '2011-01-03', '2011-01-05', + '2011-01-02', pd.NaT], name='idx3') + exp3 = DatetimeIndex([pd.NaT, pd.NaT, '2011-01-02', '2011-01-03', + '2011-01-05'], name='idx3') + + for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: + ordered = idx.order() + self.assert_index_equal(ordered, expected) + self.assertIsNone(ordered.freq) + + ordered = idx.order(ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + self.assertIsNone(ordered.freq) + + ordered, indexer = idx.order(return_indexer=True) + self.assert_index_equal(ordered, expected) + self.assert_numpy_array_equal(indexer, np.array([0, 4, 3, 1, 2])) + self.assertIsNone(ordered.freq) + + ordered, indexer = idx.order(return_indexer=True, ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + self.assert_numpy_array_equal(indexer, np.array([2, 1, 3, 4, 0])) + self.assertIsNone(ordered.freq) + def test_getitem(self): idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', tz='Asia/Tokyo', name='idx') @@ -318,7 +385,7 @@ def test_getitem(self): self.assertEqual(result.freq, expected.freq) result = idx[-20:-5:3] - expected = pd.date_range('2011-01-12', '2011-01-25', freq='3D', + expected = pd.date_range('2011-01-12', '2011-01-24', freq='3D', tz=idx.tz, name='idx') self.assert_index_equal(result, expected) self.assertEqual(result.freq, expected.freq) @@ -343,6 +410,45 @@ def test_drop_duplicates_metadata(self): self.assert_index_equal(idx, result) self.assertIsNone(result.freq) + def test_take(self): + #GH 10295 + idx1 = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx') + idx2 = pd.date_range('2011-01-01', '2011-01-31', freq='D', tz='Asia/Tokyo', name='idx') + + for idx in [idx1, idx2]: + result = idx.take([0]) + self.assertEqual(result, pd.Timestamp('2011-01-01', tz=idx.tz)) + + result = idx.take([0, 1, 2]) + expected = pd.date_range('2011-01-01', '2011-01-03', freq='D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([0, 2, 4]) + expected = pd.date_range('2011-01-01', '2011-01-05', freq='2D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([7, 4, 1]) + expected = pd.date_range('2011-01-08', '2011-01-02', freq='-3D', + tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([3, 2, 5]) + expected = DatetimeIndex(['2011-01-04', '2011-01-03', '2011-01-06'], + freq=None, tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + + result = idx.take([-3, 2, 5]) + expected = DatetimeIndex(['2011-01-29', '2011-01-03', '2011-01-06'], + freq=None, tz=idx.tz, name='idx') + self.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + class TestTimedeltaIndexOps(Ops): @@ -762,7 +868,7 @@ def test_value_counts_unique(self): tm.assert_index_equal(idx.unique(), expected) idx = TimedeltaIndex(['1 days 09:00:00', '1 days 09:00:00', '1 days 09:00:00', - '1 days 08:00:00', '1 days 08:00:00', pd.NaT]) + '1 days 08:00:00', '1 days 08:00:00', pd.NaT]) exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00']) expected = Series([3, 2], index=exp_idx) @@ -788,6 +894,66 @@ def test_unknown_attribute(self): self.assertNotIn('foo',ts.__dict__.keys()) self.assertRaises(AttributeError,lambda : ts.foo) + def test_order(self): + #GH 10295 + idx1 = TimedeltaIndex(['1 day', '2 day', '3 day'], freq='D', name='idx') + idx2 = TimedeltaIndex(['1 hour', '2 hour', '3 hour'], freq='H', name='idx') + + for idx in [idx1, idx2]: + ordered = idx.order() + self.assert_index_equal(ordered, idx) + self.assertEqual(ordered.freq, idx.freq) + + ordered = idx.order(ascending=False) + expected = idx[::-1] + self.assert_index_equal(ordered, expected) + self.assertEqual(ordered.freq, expected.freq) + self.assertEqual(ordered.freq.n, -1) + + ordered, indexer = idx.order(return_indexer=True) + self.assert_index_equal(ordered, idx) + self.assert_numpy_array_equal(indexer, np.array([0, 1, 2])) + self.assertEqual(ordered.freq, idx.freq) + + ordered, indexer = idx.order(return_indexer=True, ascending=False) + self.assert_index_equal(ordered, idx[::-1]) + self.assertEqual(ordered.freq, expected.freq) + self.assertEqual(ordered.freq.n, -1) + + idx1 = TimedeltaIndex(['1 hour', '3 hour', '5 hour', + '2 hour ', '1 hour'], name='idx1') + exp1 = TimedeltaIndex(['1 hour', '1 hour', '2 hour', + '3 hour', '5 hour'], name='idx1') + + idx2 = TimedeltaIndex(['1 day', '3 day', '5 day', + '2 day', '1 day'], name='idx2') + exp2 = TimedeltaIndex(['1 day', '1 day', '2 day', + '3 day', '5 day'], name='idx2') + + idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute', + '2 minute', pd.NaT], name='idx3') + exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute', + '5 minute'], name='idx3') + + for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: + ordered = idx.order() + self.assert_index_equal(ordered, expected) + self.assertIsNone(ordered.freq) + + ordered = idx.order(ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + self.assertIsNone(ordered.freq) + + ordered, indexer = idx.order(return_indexer=True) + self.assert_index_equal(ordered, expected) + self.assert_numpy_array_equal(indexer, np.array([0, 4, 3, 1, 2])) + self.assertIsNone(ordered.freq) + + ordered, indexer = idx.order(return_indexer=True, ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + self.assert_numpy_array_equal(indexer, np.array([2, 1, 3, 4, 0])) + self.assertIsNone(ordered.freq) + def test_getitem(self): idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') @@ -806,7 +972,7 @@ def test_getitem(self): self.assertEqual(result.freq, expected.freq) result = idx[-20:-5:3] - expected = pd.timedelta_range('12 day', '25 day', freq='3D', name='idx') + expected = pd.timedelta_range('12 day', '24 day', freq='3D', name='idx') self.assert_index_equal(result, expected) self.assertEqual(result.freq, expected.freq) @@ -829,6 +995,42 @@ def test_drop_duplicates_metadata(self): self.assert_index_equal(idx, result) self.assertIsNone(result.freq) + def test_take(self): + #GH 10295 + idx1 = pd.timedelta_range('1 day', '31 day', freq='D', name='idx') + + for idx in [idx1]: + result = idx.take([0]) + self.assertEqual(result, pd.Timedelta('1 day')) + + result = idx.take([-1]) + self.assertEqual(result, pd.Timedelta('31 day')) + + result = idx.take([0, 1, 2]) + expected = pd.timedelta_range('1 day', '3 day', freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([0, 2, 4]) + expected = pd.timedelta_range('1 day', '5 day', freq='2D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([7, 4, 1]) + expected = pd.timedelta_range('8 day', '2 day', freq='-3D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([3, 2, 5]) + expected = TimedeltaIndex(['4 day', '3 day', '6 day'], name='idx') + self.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + + result = idx.take([-3, 2, 5]) + expected = TimedeltaIndex(['29 day', '3 day', '6 day'], name='idx') + self.assert_index_equal(result, expected) + self.assertIsNone(result.freq) + class TestPeriodIndexOps(Ops): @@ -1268,6 +1470,209 @@ def test_drop_duplicates_metadata(self): self.assert_index_equal(idx, result) self.assertEqual(idx.freq, result.freq) + def test_order_compat(self): + + def _check_freq(index, expected_index): + if isinstance(index, PeriodIndex): + self.assertEqual(index.freq, expected_index.freq) + + pidx = PeriodIndex(['2011', '2012', '2013'], name='pidx', freq='A') + # for compatibility check + iidx = Index([2011, 2012, 2013], name='idx') + for idx in [pidx, iidx]: + ordered = idx.order() + self.assert_index_equal(ordered, idx) + _check_freq(ordered, idx) + + ordered = idx.order(ascending=False) + self.assert_index_equal(ordered, idx[::-1]) + _check_freq(ordered, idx[::-1]) + + ordered, indexer = idx.order(return_indexer=True) + self.assert_index_equal(ordered, idx) + self.assert_numpy_array_equal(indexer, np.array([0, 1, 2])) + _check_freq(ordered, idx) + + ordered, indexer = idx.order(return_indexer=True, ascending=False) + self.assert_index_equal(ordered, idx[::-1]) + self.assert_numpy_array_equal(indexer, np.array([2, 1, 0])) + _check_freq(ordered, idx[::-1]) + + pidx = PeriodIndex(['2011', '2013', '2015', '2012', '2011'], name='pidx', freq='A') + pexpected = PeriodIndex(['2011', '2011', '2012', '2013', '2015'], name='pidx', freq='A') + # for compatibility check + iidx = Index([2011, 2013, 2015, 2012, 2011], name='idx') + iexpected = Index([2011, 2011, 2012, 2013, 2015], name='idx') + for idx, expected in [(pidx, pexpected), (iidx, iexpected)]: + ordered = idx.order() + self.assert_index_equal(ordered, expected) + _check_freq(ordered, idx) + + ordered = idx.order(ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + _check_freq(ordered, idx) + + ordered, indexer = idx.order(return_indexer=True) + self.assert_index_equal(ordered, expected) + self.assert_numpy_array_equal(indexer, np.array([0, 4, 3, 1, 2])) + _check_freq(ordered, idx) + + ordered, indexer = idx.order(return_indexer=True, ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + self.assert_numpy_array_equal(indexer, np.array([2, 1, 3, 4, 0])) + _check_freq(ordered, idx) + + pidx = PeriodIndex(['2011', '2013', 'NaT', '2011'], name='pidx', freq='D') + + result = pidx.order() + expected = PeriodIndex(['NaT', '2011', '2011', '2013'], name='pidx', freq='D') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, 'D') + + result = pidx.order(ascending=False) + expected = PeriodIndex(['2013', '2011', '2011', 'NaT'], name='pidx', freq='D') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, 'D') + + def test_order(self): + idx1 = PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], + freq='D', name='idx') + + for idx in [idx1]: + ordered = idx.order() + self.assert_index_equal(ordered, idx) + self.assertEqual(ordered.freq, idx.freq) + + ordered = idx.order(ascending=False) + expected = idx[::-1] + self.assert_index_equal(ordered, expected) + self.assertEqual(ordered.freq, 'D') + + ordered, indexer = idx.order(return_indexer=True) + self.assert_index_equal(ordered, idx) + self.assert_numpy_array_equal(indexer, np.array([0, 1, 2])) + self.assertEqual(ordered.freq, 'D') + + ordered, indexer = idx.order(return_indexer=True, ascending=False) + expected = idx[::-1] + self.assert_index_equal(ordered, expected) + self.assert_numpy_array_equal(indexer, np.array([2, 1, 0])) + self.assertEqual(ordered.freq, 'D') + + idx1 = PeriodIndex(['2011-01-01', '2011-01-03', '2011-01-05', + '2011-01-02', '2011-01-01'], freq='D', name='idx1') + exp1 = PeriodIndex(['2011-01-01', '2011-01-01', '2011-01-02', + '2011-01-03', '2011-01-05'], freq='D', name='idx1') + + idx2 = PeriodIndex(['2011-01-01', '2011-01-03', '2011-01-05', + '2011-01-02', '2011-01-01'], + freq='D', name='idx2') + exp2 = PeriodIndex(['2011-01-01', '2011-01-01', '2011-01-02', + '2011-01-03', '2011-01-05'], + freq='D', name='idx2') + + idx3 = PeriodIndex([pd.NaT, '2011-01-03', '2011-01-05', + '2011-01-02', pd.NaT], freq='D', name='idx3') + exp3 = PeriodIndex([pd.NaT, pd.NaT, '2011-01-02', '2011-01-03', + '2011-01-05'], freq='D', name='idx3') + + for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: + ordered = idx.order() + self.assert_index_equal(ordered, expected) + self.assertEqual(ordered.freq, 'D') + + ordered = idx.order(ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + self.assertEqual(ordered.freq, 'D') + + ordered, indexer = idx.order(return_indexer=True) + self.assert_index_equal(ordered, expected) + self.assert_numpy_array_equal(indexer, np.array([0, 4, 3, 1, 2])) + self.assertEqual(ordered.freq, 'D') + + ordered, indexer = idx.order(return_indexer=True, ascending=False) + self.assert_index_equal(ordered, expected[::-1]) + self.assert_numpy_array_equal(indexer, np.array([2, 1, 3, 4, 0])) + self.assertEqual(ordered.freq, 'D') + + def test_getitem(self): + idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D', name='idx') + + for idx in [idx1]: + result = idx[0] + self.assertEqual(result, pd.Period('2011-01-01', freq='D')) + + result = idx[-1] + self.assertEqual(result, pd.Period('2011-01-31', freq='D')) + + result = idx[0:5] + expected = pd.period_range('2011-01-01', '2011-01-05', freq='D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[0:10:2] + expected = pd.PeriodIndex(['2011-01-01', '2011-01-03', '2011-01-05', + '2011-01-07', '2011-01-09'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[-20:-5:3] + expected = pd.PeriodIndex(['2011-01-12', '2011-01-15', '2011-01-18', + '2011-01-21', '2011-01-24'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx[4::-1] + expected = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-03', + '2011-01-02', '2011-01-01'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + def test_take(self): + #GH 10295 + idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D', name='idx') + + for idx in [idx1]: + result = idx.take([0]) + self.assertEqual(result, pd.Period('2011-01-01', freq='D')) + + result = idx.take([5]) + self.assertEqual(result, pd.Period('2011-01-06', freq='D')) + + result = idx.take([0, 1, 2]) + expected = pd.period_range('2011-01-01', '2011-01-03', freq='D', + name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([0, 2, 4]) + expected = pd.PeriodIndex(['2011-01-01', '2011-01-03', '2011-01-05'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([7, 4, 1]) + expected = pd.PeriodIndex(['2011-01-08', '2011-01-05', '2011-01-02'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([3, 2, 5]) + expected = PeriodIndex(['2011-01-04', '2011-01-03', '2011-01-06'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + + result = idx.take([-3, 2, 5]) + expected = PeriodIndex(['2011-01-29', '2011-01-03', '2011-01-06'], + freq='D', name='idx') + self.assert_index_equal(result, expected) + self.assertEqual(result.freq, expected.freq) + if __name__ == '__main__': import nose