Skip to content

CLN: dont special-case DatetimeArray indexing #36654

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 12, 2020
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ Indexing
- Bug in :meth:`Index.get_indexer` and :meth:`Index.get_indexer_non_unique` where int64 arrays are returned instead of intp. (:issue:`36359`)
- Bug in :meth:`DataFrame.sort_index` where parameter ascending passed as a list on a single level index gives wrong result. (:issue:`32334`)
- Bug in :meth:`DataFrame.reset_index` was incorrectly raising a ``ValueError`` for input with a :class:`MultiIndex` with missing values in a level with ``Categorical`` dtype (:issue:`24206`)
- Bug in indexing with boolean masks on datetime-like values sometimes returning a view instead of a copy (:issue:`36210`)

Missing
^^^^^^^
Expand Down
23 changes: 5 additions & 18 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
import pandas.core.common as com
from pandas.core.construction import array, extract_array
from pandas.core.indexers import check_array_indexer, check_setitem_lengths
from pandas.core.indexers import check_setitem_lengths
from pandas.core.ops.common import unpack_zerodim_and_defer
from pandas.core.ops.invalid import invalid_comparison, make_invalid_op

Expand Down Expand Up @@ -284,23 +284,6 @@ def __getitem__(self, key):
result._freq = self._get_getitem_freq(key)
return result

def _validate_getitem_key(self, key):
if com.is_bool_indexer(key):
# first convert to boolean, because check_array_indexer doesn't
# allow object dtype
if is_object_dtype(key):
key = np.asarray(key, dtype=bool)

key = check_array_indexer(self, key)
key = lib.maybe_booleans_to_slice(key.view(np.uint8))
elif isinstance(key, list) and len(key) == 1 and isinstance(key[0], slice):
# see https://github.com/pandas-dev/pandas/issues/31299, need to allow
# this for now (would otherwise raise in check_array_indexer)
pass
else:
key = super()._validate_getitem_key(key)
return key

def _get_getitem_freq(self, key):
"""
Find the `freq` attribute to assign to the result of a __getitem__ lookup.
Expand All @@ -322,6 +305,10 @@ def _get_getitem_freq(self, key):
# GH#21282 indexing with Ellipsis is similar to a full slice,
# should preserve `freq` attribute
freq = self.freq
elif com.is_bool_indexer(key):
new_key = lib.maybe_booleans_to_slice(key.view(np.uint8))
if isinstance(new_key, slice):
return self._get_getitem_freq(new_key)
return freq

def __setitem__(
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,14 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
indices = ensure_int64(indices)

maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
if isinstance(maybe_slice, slice):
return self[maybe_slice]

return ExtensionIndex.take(
result = ExtensionIndex.take(
self, indices, axis, allow_fill, fill_value, **kwargs
)
if isinstance(maybe_slice, slice):
freq = self._data._get_getitem_freq(maybe_slice)
result._data._freq = freq
return result

@doc(IndexOpsMixin.searchsorted, klass="Datetime-like Index")
def searchsorted(self, value, side="left", sorter=None):
Expand Down
18 changes: 17 additions & 1 deletion pandas/tests/series/indexing/test_boolean.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pytest

from pandas import Index, Series
from pandas import Index, Series, date_range
import pandas._testing as tm
from pandas.core.indexing import IndexingError

Expand Down Expand Up @@ -128,3 +128,19 @@ def test_get_set_boolean_different_order(string_series):
sel = string_series[ordered > 0]
exp = string_series[string_series > 0]
tm.assert_series_equal(sel, exp)


def test_getitem_boolean_dt64_copies():
# GH#36210
dti = date_range("2016-01-01", periods=4, tz="US/Pacific")
key = np.array([True, True, False, False])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you replicate the OP here as well (e.g. add the slice test for this too)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure i understand. this is exactly the example from the OP

Copy link
Contributor

@jreback jreback Oct 11, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the OP had 2 example inputs

ser1 = pd.Series(dti._data)
ser2 = pd.Series(range(4))


ser = Series(dti._data)

res = ser[key]
assert res._values._data.base is None

# compare with numeric case for reference
ser2 = Series(range(4))
res2 = ser2[key]
assert res2._values.base is None