diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index bd3112403b31b..d08e8e009811a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -393,6 +393,7 @@ Indexing - Bug in :meth:`Index.get_indexer` and :meth:`Index.get_indexer_non_unique` where int64 arrays are returned instead of intp. (:issue:`36359`) - Bug in :meth:`DataFrame.sort_index` where parameter ascending passed as a list on a single level index gives wrong result. (:issue:`32334`) - Bug in :meth:`DataFrame.reset_index` was incorrectly raising a ``ValueError`` for input with a :class:`MultiIndex` with missing values in a level with ``Categorical`` dtype (:issue:`24206`) +- Bug in indexing with boolean masks on datetime-like values sometimes returning a view instead of a copy (:issue:`36210`) Missing ^^^^^^^ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index d6417bfe3f8e8..eb5e5b03fe243 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -65,7 +65,7 @@ from pandas.core.arrays._mixins import NDArrayBackedExtensionArray import pandas.core.common as com from pandas.core.construction import array, extract_array -from pandas.core.indexers import check_array_indexer, check_setitem_lengths +from pandas.core.indexers import check_setitem_lengths from pandas.core.ops.common import unpack_zerodim_and_defer from pandas.core.ops.invalid import invalid_comparison, make_invalid_op @@ -284,23 +284,6 @@ def __getitem__(self, key): result._freq = self._get_getitem_freq(key) return result - def _validate_getitem_key(self, key): - if com.is_bool_indexer(key): - # first convert to boolean, because check_array_indexer doesn't - # allow object dtype - if is_object_dtype(key): - key = np.asarray(key, dtype=bool) - - key = check_array_indexer(self, key) - key = lib.maybe_booleans_to_slice(key.view(np.uint8)) - elif isinstance(key, list) and len(key) == 1 and isinstance(key[0], slice): - # see https://github.com/pandas-dev/pandas/issues/31299, need to allow - # this for now (would otherwise raise in check_array_indexer) - pass - else: - key = super()._validate_getitem_key(key) - return key - def _get_getitem_freq(self, key): """ Find the `freq` attribute to assign to the result of a __getitem__ lookup. @@ -322,6 +305,10 @@ def _get_getitem_freq(self, key): # GH#21282 indexing with Ellipsis is similar to a full slice, # should preserve `freq` attribute freq = self.freq + elif com.is_bool_indexer(key): + new_key = lib.maybe_booleans_to_slice(key.view(np.uint8)) + if isinstance(new_key, slice): + return self._get_getitem_freq(new_key) return freq def __setitem__( diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 5baa103a25d51..3845a601000a0 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -190,12 +190,14 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): indices = ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) - if isinstance(maybe_slice, slice): - return self[maybe_slice] - return ExtensionIndex.take( + result = ExtensionIndex.take( self, indices, axis, allow_fill, fill_value, **kwargs ) + if isinstance(maybe_slice, slice): + freq = self._data._get_getitem_freq(maybe_slice) + result._data._freq = freq + return result @doc(IndexOpsMixin.searchsorted, klass="Datetime-like Index") def searchsorted(self, value, side="left", sorter=None): diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index e2b71b1f2f412..3f88f4193e770 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import Index, Series +from pandas import Index, Series, date_range import pandas._testing as tm from pandas.core.indexing import IndexingError @@ -128,3 +128,19 @@ def test_get_set_boolean_different_order(string_series): sel = string_series[ordered > 0] exp = string_series[string_series > 0] tm.assert_series_equal(sel, exp) + + +def test_getitem_boolean_dt64_copies(): + # GH#36210 + dti = date_range("2016-01-01", periods=4, tz="US/Pacific") + key = np.array([True, True, False, False]) + + ser = Series(dti._data) + + res = ser[key] + assert res._values._data.base is None + + # compare with numeric case for reference + ser2 = Series(range(4)) + res2 = ser2[key] + assert res2._values.base is None