Skip to content

Commit d94146c

Browse files
krsnik93TomAugspurger
authored andcommitted
BUG: Raise a ValueError when index and data lengths don't match (pandas-dev#26911)
* Raise a ValueError when index and data lengths don't match
1 parent 19d5d53 commit d94146c

File tree

4 files changed

+62
-15
lines changed

4 files changed

+62
-15
lines changed

doc/source/whatsnew/v0.25.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,8 @@ Indexing
673673
^^^^^^^^
674674

675675
- Improved exception message when calling :meth:`DataFrame.iloc` with a list of non-numeric objects (:issue:`25753`).
676+
- Improved exception message when calling ``.iloc`` or ``.loc`` with a boolean indexer with different length (:issue:`26658`).
677+
- Bug in ``.iloc`` and ``.loc`` with a boolean indexer not raising an ``IndexError`` when too few items are passed (:issue:`26658`).
676678
- Bug in :meth:`DataFrame.loc` and :meth:`Series.loc` where ``KeyError`` was not raised for a ``MultiIndex`` when the key was less than or equal to the number of levels in the :class:`MultiIndex` (:issue:`14885`).
677679
- Bug in which :meth:`DataFrame.append` produced an erroneous warning indicating that a ``KeyError`` will be thrown in the future when the data to be appended contains new columns (:issue:`22252`).
678680
- Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`).

pandas/core/indexing.py

+37-12
Original file line numberDiff line numberDiff line change
@@ -2407,29 +2407,54 @@ def convert_to_index_sliceable(obj, key):
24072407
return None
24082408

24092409

2410-
def check_bool_indexer(ax, key):
2411-
# boolean indexing, need to check that the data are aligned, otherwise
2412-
# disallowed
2410+
def check_bool_indexer(index: Index, key) -> np.ndarray:
2411+
"""
2412+
Check if key is a valid boolean indexer for an object with such index and
2413+
perform reindexing or conversion if needed.
2414+
2415+
This function assumes that is_bool_indexer(key) == True.
2416+
2417+
Parameters
2418+
----------
2419+
index : Index
2420+
Index of the object on which the indexing is done
2421+
key : list-like
2422+
Boolean indexer to check
24132423
2414-
# this function assumes that is_bool_indexer(key) == True
2424+
Returns
2425+
-------
2426+
result: np.array
2427+
Resulting key
24152428
2429+
Raises
2430+
------
2431+
IndexError
2432+
If the key does not have the same length as index
2433+
2434+
IndexingError
2435+
If the index of the key is unalignable to index
2436+
2437+
"""
24162438
result = key
2417-
if isinstance(key, ABCSeries) and not key.index.equals(ax):
2418-
result = result.reindex(ax)
2439+
if isinstance(key, ABCSeries) and not key.index.equals(index):
2440+
result = result.reindex(index)
24192441
mask = isna(result._values)
24202442
if mask.any():
24212443
raise IndexingError('Unalignable boolean Series provided as '
24222444
'indexer (index of the boolean Series and of '
2423-
'the indexed object do not match')
2445+
'the indexed object do not match).')
24242446
result = result.astype(bool)._values
2425-
elif is_sparse(result):
2426-
result = result.to_dense()
2427-
result = np.asarray(result, dtype=bool)
24282447
else:
2429-
# is_bool_indexer has already checked for nulls in the case of an
2430-
# object array key, so no check needed here
2448+
if is_sparse(result):
2449+
result = result.to_dense()
24312450
result = np.asarray(result, dtype=bool)
24322451

2452+
# GH26658
2453+
if len(result) != len(index):
2454+
raise IndexError(
2455+
'Item wrong length {} instead of {}.'.format(len(result),
2456+
len(index)))
2457+
24332458
return result
24342459

24352460

pandas/tests/indexing/test_iloc.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,16 @@ def test_iloc_getitem_bool(self):
265265
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
266266
fails=IndexError)
267267

268+
@pytest.mark.parametrize('index', [[True, False],
269+
[True, False, True, False]])
270+
def test_iloc_getitem_bool_diff_len(self, index):
271+
# GH26658
272+
s = Series([1, 2, 3])
273+
with pytest.raises(IndexError,
274+
match=('Item wrong length {} instead of {}.'.format(
275+
len(index), len(s)))):
276+
_ = s.iloc[index]
277+
268278
def test_iloc_getitem_slice(self):
269279

270280
# slices
@@ -614,10 +624,10 @@ def test_iloc_mask(self):
614624
'cannot use an indexable as a mask'),
615625
('locs', ''): 'Unalignable boolean Series provided as indexer '
616626
'(index of the boolean Series and of the indexed '
617-
'object do not match',
627+
'object do not match).',
618628
('locs', '.loc'): 'Unalignable boolean Series provided as indexer '
619629
'(index of the boolean Series and of the '
620-
'indexed object do not match',
630+
'indexed object do not match).',
621631
('locs', '.iloc'): ('iLocation based boolean indexing on an '
622632
'integer type is not available'),
623633
}

pandas/tests/indexing/test_loc.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,17 @@ def test_loc_getitem_bool(self):
194194
typs=['ints', 'uints', 'labels',
195195
'mixed', 'ts', 'floats'])
196196
self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'],
197-
fails=KeyError)
197+
fails=IndexError)
198+
199+
@pytest.mark.parametrize('index', [[True, False],
200+
[True, False, True, False]])
201+
def test_loc_getitem_bool_diff_len(self, index):
202+
# GH26658
203+
s = Series([1, 2, 3])
204+
with pytest.raises(IndexError,
205+
match=('Item wrong length {} instead of {}.'.format(
206+
len(index), len(s)))):
207+
_ = s.loc[index]
198208

199209
def test_loc_getitem_int_slice(self):
200210

0 commit comments

Comments
 (0)