From 04de5118a8c92c8baa6ed38af48b20523664cbda Mon Sep 17 00:00:00 2001 From: sinhrks Date: Thu, 27 Sep 2018 15:26:29 +0900 Subject: [PATCH 1/2] BUG: DatetimeIndex slicing with boolean Index raises TypeError --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/multi.py | 2 +- .../tests/indexes/datetimes/test_indexing.py | 22 ++++++++++++ pandas/tests/indexes/multi/test_indexing.py | 27 +++++++++++++++ pandas/tests/indexes/test_base.py | 22 ++++++++++++ pandas/tests/indexes/test_numeric.py | 22 ++++++++++++ pandas/tests/test_base.py | 34 ++++++++++++++++--- 9 files changed, 127 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 149d618c4a621..41ed6130f4077 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1221,7 +1221,7 @@ Indexing - Bug in `MultiIndex.set_levels` when levels value is not subscriptable (:issue:`23273`) - Bug where setting a timedelta column by ``Index`` causes it to be casted to double, and therefore lose precision (:issue:`23511`) - Bug in :func:`Index.union` and :func:`Index.intersection` where name of the ``Index`` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`) - +- Bug in :class:`Index` slicing with boolean :class:`Index` may raise ``TypeError`` (:issue:`22533`) Missing ^^^^^^^ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 92de1fe2e0679..7f1c86938a354 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -161,7 +161,7 @@ def __getitem__(self, key): return self._box_func(val) if com.is_bool_indexer(key): - key = np.asarray(key) + key = np.asarray(key, dtype=bool) if key.all(): key = slice(0, None, None) else: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6e65d6899787f..fcced091b3794 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2078,7 +2078,7 @@ def __getitem__(self, key): return promote(getitem(key)) if com.is_bool_indexer(key): - key = np.asarray(key) + key = np.asarray(key, dtype=bool) key = com.values_from_object(key) result = getitem(key) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index c694289efc493..9c981c24190a4 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1614,7 +1614,7 @@ def __getitem__(self, key): return tuple(retval) else: if com.is_bool_indexer(key): - key = np.asarray(key) + key = np.asarray(key, dtype=bool) sortorder = self.sortorder else: # cannot be sure whether the result will be sorted diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index b66475612fe40..e1a75f2e86eac 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -603,3 +603,25 @@ def test_get_loc_nat(self): # GH#20464 index = DatetimeIndex(['1/3/2000', 'NaT']) assert index.get_loc(pd.NaT) == 1 + + @pytest.mark.parametrize('ind1', [[True] * 5, pd.Index([True] * 5)]) + @pytest.mark.parametrize('ind2', [[True, False, True, False, False], + pd.Index([True, False, True, False, + False])]) + def test_getitem_bool_index_all(self, ind1, ind2): + # GH#22533 + idx = pd.date_range('2011-01-01', '2011-01-05', freq='D', name='idx') + tm.assert_index_equal(idx[ind1], idx) + + expected = pd.DatetimeIndex(['2011-01-01', '2011-01-03'], name='idx') + tm.assert_index_equal(idx[ind2], expected) + + @pytest.mark.parametrize('ind1', [[True], pd.Index([True])]) + @pytest.mark.parametrize('ind2', [[False], pd.Index([False])]) + def test_getitem_bool_index_single(self, ind1, ind2): + # GH#22533 + idx = pd.DatetimeIndex(['2011-01-01'], name='idx') + tm.assert_index_equal(idx[ind1], idx) + + expected = pd.DatetimeIndex([], name='idx') + tm.assert_index_equal(idx[ind2], expected) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 2b5f16b0ea0c8..563027364134d 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -226,6 +226,33 @@ def test_get_indexer_consistency(idx): assert indexer.dtype == np.intp +@pytest.mark.parametrize('ind1', [[True] * 5, pd.Index([True] * 5)]) +@pytest.mark.parametrize('ind2', [[True, False, True, False, False], + pd.Index([True, False, True, False, + False])]) +def test_getitem_bool_index_all(ind1, ind2): + # GH#22533 + idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3), + (40, 4), (50, 5)]) + tm.assert_index_equal(idx[ind1], idx) + + expected = MultiIndex.from_tuples([(10, 1), (30, 3)]) + tm.assert_index_equal(idx[ind2], expected) + + +@pytest.mark.parametrize('ind1', [[True], pd.Index([True])]) +@pytest.mark.parametrize('ind2', [[False], pd.Index([False])]) +def test_getitem_bool_index_single(ind1, ind2): + # GH#22533 + idx = MultiIndex.from_tuples([(10, 1)]) + tm.assert_index_equal(idx[ind1], idx) + + expected = pd.MultiIndex(levels=[np.array([], dtype=np.int64), + np.array([], dtype=np.int64)], + labels=[[], []]) + tm.assert_index_equal(idx[ind2], expected) + + def test_get_loc(idx): assert idx.get_loc(('foo', 'two')) == 1 assert idx.get_loc(('baz', 'two')) == 3 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 724dffc49dd3b..7ace163dbd13d 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -718,6 +718,28 @@ def test_getitem_error(self, indices, itm): with pytest.raises(IndexError): indices[itm] + @pytest.mark.parametrize('ind1', [[True] * 5, pd.Index([True] * 5)]) + @pytest.mark.parametrize('ind2', [[True, False, True, False, False], + pd.Index([True, False, True, False, + False])]) + def test_getitem_bool_index_all(self, ind1, ind2): + # GH#22533 + idx = pd.Index(['a', 'b', 'c', 'd', 'e'], name='idx') + tm.assert_index_equal(idx[ind1], idx) + + expected = pd.Index(['a', 'c'], name='idx') + tm.assert_index_equal(idx[ind2], expected) + + @pytest.mark.parametrize('ind1', [[True], pd.Index([True])]) + @pytest.mark.parametrize('ind2', [[False], pd.Index([False])]) + def test_getitem_bool_index_single(self, ind1, ind2): + # GH#22533 + idx = pd.Index(['a'], name='idx') + tm.assert_index_equal(idx[ind1], idx) + + expected = pd.Index([], name='idx') + tm.assert_index_equal(idx[ind2], expected) + def test_intersection(self): first = self.strIndex[:20] second = self.strIndex[:10] diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 8373cbc89149a..49a5f55e57c1f 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -839,6 +839,28 @@ def test_join_outer(self): tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) + @pytest.mark.parametrize('ind1', [[True] * 5, pd.Index([True] * 5)]) + @pytest.mark.parametrize('ind2', [[True, False, True, False, False], + pd.Index([True, False, True, False, + False])]) + def test_getitem_bool_index_all(self, ind1, ind2): + # GH#22533 + idx = pd.Int64Index([1, 2, 3, 4, 5], name='idx') + tm.assert_index_equal(idx[ind1], idx) + + expected = pd.Int64Index([1, 3], name='idx') + tm.assert_index_equal(idx[ind2], expected) + + @pytest.mark.parametrize('ind1', [[True], pd.Index([True])]) + @pytest.mark.parametrize('ind2', [[False], pd.Index([False])]) + def test_getitem_bool_index_single(self, ind1, ind2): + # GH#22533 + idx = pd.Int64Index([1], name='idx') + tm.assert_index_equal(idx[ind1], idx) + + expected = pd.Int64Index([], name='idx') + tm.assert_index_equal(idx[ind2], expected) + class TestUInt64Index(NumericInt): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index fe2956adc35af..07d357b70f94b 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -178,19 +178,20 @@ def setup_method(self, method): self.unicode_index = tm.makeUnicodeIndex(10, name='a') arr = np.random.randn(10) + self.bool_series = Series(arr, index=self.bool_index, name='a') self.int_series = Series(arr, index=self.int_index, name='a') self.float_series = Series(arr, index=self.float_index, name='a') self.dt_series = Series(arr, index=self.dt_index, name='a') self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index, name='a') self.string_series = Series(arr, index=self.string_index, name='a') + self.unicode_series = Series(arr, index=self.unicode_index, name='a') types = ['bool', 'int', 'float', 'dt', 'dt_tz', 'period', 'string', 'unicode'] - fmts = ["{0}_{1}".format(t, f) - for t in types for f in ['index', 'series']] - self.objs = [getattr(self, f) - for f in fmts if getattr(self, f, None) is not None] + self.indexes = [getattr(self, '{}_index'.format(t)) for t in types] + self.series = [getattr(self, '{}_series'.format(t)) for t in types] + self.objs = self.indexes + self.series def check_ops_properties(self, props, filter=None, ignore_failures=False): for op in props: @@ -997,6 +998,31 @@ def test_validate_bool_args(self): with pytest.raises(ValueError): self.int_series.drop_duplicates(inplace=value) + def test_getitem(self): + for i in self.indexes: + s = pd.Series(i) + + assert i[0] == s.iloc[0] + assert i[5] == s.iloc[5] + assert i[-1] == s.iloc[-1] + + assert i[-1] == i[9] + + pytest.raises(IndexError, i.__getitem__, 20) + pytest.raises(IndexError, s.iloc.__getitem__, 20) + + @pytest.mark.parametrize('indexer_klass', [list, pd.Index]) + @pytest.mark.parametrize('indexer', [[True] * 10, [False] * 10, + [True, False, True, True, False, + False, True, True, False, True]]) + def test_bool_indexing(self, indexer_klass, indexer): + # GH 22533 + for idx in self.indexes: + exp_idx = [i for i in range(len(indexer)) if indexer[i]] + tm.assert_index_equal(idx[indexer_klass(indexer)], idx[exp_idx]) + s = pd.Series(idx) + tm.assert_series_equal(s[indexer_klass(indexer)], s.iloc[exp_idx]) + class TestTranspose(Ops): errmsg = "the 'axes' parameter is not supported" From 420e5c4d4b6164a88bb2c9478dcdb9299190a131 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Thu, 1 Nov 2018 10:00:07 +0900 Subject: [PATCH 2/2] remove duplicated tesets --- .../tests/indexes/datetimes/test_indexing.py | 22 ------------------- pandas/tests/indexes/test_base.py | 22 ------------------- pandas/tests/indexes/test_numeric.py | 22 ------------------- 3 files changed, 66 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index e1a75f2e86eac..b66475612fe40 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -603,25 +603,3 @@ def test_get_loc_nat(self): # GH#20464 index = DatetimeIndex(['1/3/2000', 'NaT']) assert index.get_loc(pd.NaT) == 1 - - @pytest.mark.parametrize('ind1', [[True] * 5, pd.Index([True] * 5)]) - @pytest.mark.parametrize('ind2', [[True, False, True, False, False], - pd.Index([True, False, True, False, - False])]) - def test_getitem_bool_index_all(self, ind1, ind2): - # GH#22533 - idx = pd.date_range('2011-01-01', '2011-01-05', freq='D', name='idx') - tm.assert_index_equal(idx[ind1], idx) - - expected = pd.DatetimeIndex(['2011-01-01', '2011-01-03'], name='idx') - tm.assert_index_equal(idx[ind2], expected) - - @pytest.mark.parametrize('ind1', [[True], pd.Index([True])]) - @pytest.mark.parametrize('ind2', [[False], pd.Index([False])]) - def test_getitem_bool_index_single(self, ind1, ind2): - # GH#22533 - idx = pd.DatetimeIndex(['2011-01-01'], name='idx') - tm.assert_index_equal(idx[ind1], idx) - - expected = pd.DatetimeIndex([], name='idx') - tm.assert_index_equal(idx[ind2], expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 7ace163dbd13d..724dffc49dd3b 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -718,28 +718,6 @@ def test_getitem_error(self, indices, itm): with pytest.raises(IndexError): indices[itm] - @pytest.mark.parametrize('ind1', [[True] * 5, pd.Index([True] * 5)]) - @pytest.mark.parametrize('ind2', [[True, False, True, False, False], - pd.Index([True, False, True, False, - False])]) - def test_getitem_bool_index_all(self, ind1, ind2): - # GH#22533 - idx = pd.Index(['a', 'b', 'c', 'd', 'e'], name='idx') - tm.assert_index_equal(idx[ind1], idx) - - expected = pd.Index(['a', 'c'], name='idx') - tm.assert_index_equal(idx[ind2], expected) - - @pytest.mark.parametrize('ind1', [[True], pd.Index([True])]) - @pytest.mark.parametrize('ind2', [[False], pd.Index([False])]) - def test_getitem_bool_index_single(self, ind1, ind2): - # GH#22533 - idx = pd.Index(['a'], name='idx') - tm.assert_index_equal(idx[ind1], idx) - - expected = pd.Index([], name='idx') - tm.assert_index_equal(idx[ind2], expected) - def test_intersection(self): first = self.strIndex[:20] second = self.strIndex[:10] diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 49a5f55e57c1f..8373cbc89149a 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -839,28 +839,6 @@ def test_join_outer(self): tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) - @pytest.mark.parametrize('ind1', [[True] * 5, pd.Index([True] * 5)]) - @pytest.mark.parametrize('ind2', [[True, False, True, False, False], - pd.Index([True, False, True, False, - False])]) - def test_getitem_bool_index_all(self, ind1, ind2): - # GH#22533 - idx = pd.Int64Index([1, 2, 3, 4, 5], name='idx') - tm.assert_index_equal(idx[ind1], idx) - - expected = pd.Int64Index([1, 3], name='idx') - tm.assert_index_equal(idx[ind2], expected) - - @pytest.mark.parametrize('ind1', [[True], pd.Index([True])]) - @pytest.mark.parametrize('ind2', [[False], pd.Index([False])]) - def test_getitem_bool_index_single(self, ind1, ind2): - # GH#22533 - idx = pd.Int64Index([1], name='idx') - tm.assert_index_equal(idx[ind1], idx) - - expected = pd.Int64Index([], name='idx') - tm.assert_index_equal(idx[ind2], expected) - class TestUInt64Index(NumericInt):