Skip to content

Commit 151bdfe

Browse files
committed
updates
1 parent 3dd59ca commit 151bdfe

File tree

10 files changed

+26
-20
lines changed

10 files changed

+26
-20
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,7 @@ Datetimelike
706706
- Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`)
707707
- Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`)
708708
- Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`)
709+
- Bug in datetimelike indexes and arrays not validating that the length of a boolean mask matches the array (:issue:`30308`)
709710
- Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`)
710711
-
711712

pandas/core/arrays/datetimelike.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,9 @@ def __getitem__(self, key):
416416
return self._box_func(val)
417417

418418
if com.is_bool_indexer(key):
419-
key = np.asarray(key, dtype=bool)
419+
from pandas.core.indexing import check_bool_indexer
420+
421+
key = check_bool_indexer(self, key)
420422
if key.all():
421423
key = slice(0, None, None)
422424
else:

pandas/core/common.py

+6-13
Original file line numberDiff line numberDiff line change
@@ -112,35 +112,28 @@ def is_bool_indexer(key: Any) -> bool:
112112
bool
113113
Whether `key` is a valid boolean indexer.
114114
115-
Raises
116-
------
117-
ValueError
118-
When the array is an object-dtype ndarray or ExtensionArray
119-
and contains missing values.
115+
Notes
116+
-----
117+
This function is inexpensive for `bool` and `BooleanDtype`.
118+
It is expensive for object-dtype backed arrays. In this case
119+
a scan of the data to check that all the values are bool is
120+
needed.
120121
121122
See Also
122123
--------
123124
api.extensions.check_bool_array_indexer : Check that `key`
124125
is a valid mask for an array, and convert to an ndarary.
125126
"""
126-
na_msg = "cannot index with vector containing NA / NaN values"
127127
if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or (
128128
is_array_like(key) and is_extension_array_dtype(key.dtype)
129129
):
130130
if key.dtype == np.object_:
131131
key = np.asarray(values_from_object(key))
132132

133133
if not lib.is_bool_array(key):
134-
if isna(key).any():
135-
raise ValueError(na_msg)
136134
return False
137135
return True
138136
elif is_bool_dtype(key.dtype):
139-
# an ndarray with bool-dtype by definition has no missing values.
140-
# So we only need to check for NAs in ExtensionArrays
141-
if is_extension_array_dtype(key.dtype):
142-
if np.any(key.isna()):
143-
raise ValueError(na_msg)
144137
return True
145138
elif isinstance(key, list):
146139
try:

pandas/core/frame.py

-4
Original file line numberDiff line numberDiff line change
@@ -2799,10 +2799,6 @@ def _setitem_slice(self, key, value):
27992799
def _setitem_array(self, key, value):
28002800
# also raises Exception if object array with NA values
28012801
if com.is_bool_indexer(key):
2802-
if len(key) != len(self.index):
2803-
raise ValueError(
2804-
f"Item wrong length {len(key)} instead of {len(self.index)}!"
2805-
)
28062802
key = check_bool_indexer(self.index, key)
28072803
indexer = key.nonzero()[0]
28082804
self._check_setitem_copy()

pandas/core/indexes/base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -3998,6 +3998,8 @@ def __getitem__(self, key):
39983998
corresponding `Index` subclass.
39993999
40004000
"""
4001+
from pandas.core.indexing import check_bool_indexer
4002+
40014003
# There's no custom logic to be implemented in __getslice__, so it's
40024004
# not overloaded intentionally.
40034005
getitem = self._data.__getitem__
@@ -4013,7 +4015,7 @@ def __getitem__(self, key):
40134015
return promote(getitem(key))
40144016

40154017
if com.is_bool_indexer(key):
4016-
key = np.asarray(key, dtype=bool)
4018+
key = check_bool_indexer(self, key)
40174019

40184020
key = com.values_from_object(key)
40194021
result = getitem(key)

pandas/core/indexes/datetimes.py

+2
Original file line numberDiff line numberDiff line change
@@ -1115,6 +1115,8 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
11151115
_has_same_tz = ea_passthrough(DatetimeArray._has_same_tz)
11161116

11171117
def __getitem__(self, key):
1118+
# if com.is_bool_indexer(key):
1119+
# breakpoint()
11181120
result = self._data.__getitem__(key)
11191121
if is_scalar(result):
11201122
return result

pandas/core/indexes/multi.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
ensure_index,
4343
)
4444
from pandas.core.indexes.frozen import FrozenList
45+
from pandas.core.indexing import check_bool_indexer
4546
import pandas.core.missing as missing
4647
from pandas.core.sorting import (
4748
get_group_index,
@@ -1934,7 +1935,7 @@ def __getitem__(self, key):
19341935
return tuple(retval)
19351936
else:
19361937
if com.is_bool_indexer(key):
1937-
key = np.asarray(key, dtype=bool)
1938+
key = check_bool_indexer(self, key)
19381939
sortorder = self.sortorder
19391940
else:
19401941
# cannot be sure whether the result will be sorted

pandas/core/indexing.py

+3
Original file line numberDiff line numberDiff line change
@@ -1595,6 +1595,7 @@ def _validate_key(self, key, axis: int):
15951595
return
15961596

15971597
if com.is_bool_indexer(key):
1598+
# XXX: do we need to verify no NA here?
15981599
return
15991600

16001601
if not is_list_like_indexer(key):
@@ -1681,6 +1682,7 @@ def _getitem_axis(self, key, axis: int):
16811682
self._validate_key(key, axis)
16821683
return self._get_slice_axis(key, axis=axis)
16831684
elif com.is_bool_indexer(key):
1685+
# check_bool_indexer is called in getbool_axis
16841686
return self._getbool_axis(key, axis=axis)
16851687
elif is_list_like_indexer(key):
16861688

@@ -2030,6 +2032,7 @@ def _getitem_axis(self, key, axis: int):
20302032
key = np.asarray(key)
20312033

20322034
if com.is_bool_indexer(key):
2035+
# check_bool_indexer is called in _getbool_axis
20332036
self._validate_key(key, axis)
20342037
return self._getbool_axis(key, axis=axis)
20352038

pandas/core/series.py

+1
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,7 @@ def __getitem__(self, key):
879879
elif key is Ellipsis:
880880
return self
881881
elif com.is_bool_indexer(key):
882+
# We check later on.
882883
pass
883884
else:
884885

pandas/tests/indexes/common.py

+5
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,11 @@ def test_get_indexer_consistency(self, indices):
220220
assert isinstance(indexer, np.ndarray)
221221
assert indexer.dtype == np.intp
222222

223+
def test_getitem_mask_wrong_length(self, indices):
224+
mask = np.array([True])
225+
with pytest.raises(IndexError, match="Item wrong length 1"):
226+
indices[mask]
227+
223228
def test_ndarray_compat_properties(self):
224229
idx = self.create_index()
225230
assert idx.T.equals(idx)

0 commit comments

Comments
 (0)