Skip to content

Commit 6245e8c

Browse files
TST: add tests for take() on empty arrays (#20582)
1 parent 4a34497 commit 6245e8c

File tree

5 files changed

+88
-2
lines changed

5 files changed

+88
-2
lines changed

pandas/core/arrays/base.py

+18
Original file line numberDiff line numberDiff line change
@@ -458,11 +458,23 @@ def take(self, indexer, allow_fill=True, fill_value=None):
458458
Fill value to replace -1 values with. If applicable, this should
459459
use the sentinel missing value for this type.
460460
461+
Returns
462+
-------
463+
ExtensionArray
464+
465+
Raises
466+
------
467+
IndexError
468+
When the indexer is out of bounds for the array.
469+
461470
Notes
462471
-----
463472
This should follow pandas' semantics where -1 indicates missing values.
464473
Positions where indexer is ``-1`` should be filled with the missing
465474
value for this type.
475+
This gives rise to the special case of a take on an empty
476+
ExtensionArray that does not raises an IndexError straight away
477+
when the `indexer` is all ``-1``.
466478
467479
This is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when the
468480
indexer is a sequence of values.
@@ -477,6 +489,12 @@ def take(self, indexer, allow_fill=True, fill_value=None):
477489
def take(self, indexer, allow_fill=True, fill_value=None):
478490
indexer = np.asarray(indexer)
479491
mask = indexer == -1
492+
493+
# take on empty array not handled as desired by numpy
494+
# in case of -1 (all missing take)
495+
if not len(self) and mask.all():
496+
return type(self)([np.nan] * len(indexer))
497+
480498
result = self.data.take(indexer)
481499
result[mask] = np.nan # NA for this type
482500
return type(self)(result)

pandas/tests/extension/base/getitem.py

+47
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
import pytest
12
import numpy as np
23

34
import pandas as pd
5+
import pandas.util.testing as tm
46

57
from .base import BaseExtensionTests
68

@@ -120,3 +122,48 @@ def test_take_sequence(self, data):
120122
assert result.iloc[0] == data[0]
121123
assert result.iloc[1] == data[1]
122124
assert result.iloc[2] == data[3]
125+
126+
def test_take(self, data, na_value, na_cmp):
127+
result = data.take([0, -1])
128+
assert result.dtype == data.dtype
129+
assert result[0] == data[0]
130+
na_cmp(result[1], na_value)
131+
132+
with tm.assert_raises_regex(IndexError, "out of bounds"):
133+
data.take([len(data) + 1])
134+
135+
def test_take_empty(self, data, na_value, na_cmp):
136+
empty = data[:0]
137+
result = empty.take([-1])
138+
na_cmp(result[0], na_value)
139+
140+
with tm.assert_raises_regex(IndexError, "cannot do a non-empty take"):
141+
empty.take([0, 1])
142+
143+
@pytest.mark.xfail(reason="Series.take with extension array buggy for -1")
144+
def test_take_series(self, data):
145+
s = pd.Series(data)
146+
result = s.take([0, -1])
147+
expected = pd.Series(
148+
data._constructor_from_sequence([data[0], data[len(data) - 1]]),
149+
index=[0, len(data) - 1])
150+
self.assert_series_equal(result, expected)
151+
152+
def test_reindex(self, data, na_value):
153+
s = pd.Series(data)
154+
result = s.reindex([0, 1, 3])
155+
expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3])
156+
self.assert_series_equal(result, expected)
157+
158+
n = len(data)
159+
result = s.reindex([-1, 0, n])
160+
expected = pd.Series(
161+
data._constructor_from_sequence([na_value, data[0], na_value]),
162+
index=[-1, 0, n])
163+
self.assert_series_equal(result, expected)
164+
165+
result = s.reindex([n, n + 1])
166+
expected = pd.Series(
167+
data._constructor_from_sequence([na_value, na_value]),
168+
index=[n, n + 1])
169+
self.assert_series_equal(result, expected)

pandas/tests/extension/category/test_categorical.py

+13
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,19 @@ def test_getitem_scalar(self):
8484
# to break things by changing.
8585
pass
8686

87+
@pytest.mark.xfail(reason="Categorical.take buggy")
88+
def test_take(self):
89+
# TODO remove this once Categorical.take is fixed
90+
pass
91+
92+
@pytest.mark.xfail(reason="Categorical.take buggy")
93+
def test_take_empty(self):
94+
pass
95+
96+
@pytest.mark.xfail(reason="test not written correctly for categorical")
97+
def test_reindex(self):
98+
pass
99+
87100

88101
class TestSetitem(base.BaseSetitemTests):
89102
pass

pandas/tests/extension/decimal/array.py

+4
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ def take(self, indexer, allow_fill=True, fill_value=None):
8181
indexer = np.asarray(indexer)
8282
mask = indexer == -1
8383

84+
# take on empty array not handled as desired by numpy in case of -1
85+
if not len(self) and mask.all():
86+
return type(self)([self._na_value] * len(indexer))
87+
8488
indexer = _ensure_platform_int(indexer)
8589
out = self.values.take(indexer)
8690
out[mask] = self._na_value

pandas/tests/extension/json/array.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,12 @@ def isna(self):
8989
return np.array([x == self._na_value for x in self.data])
9090

9191
def take(self, indexer, allow_fill=True, fill_value=None):
92-
output = [self.data[loc] if loc != -1 else self._na_value
93-
for loc in indexer]
92+
try:
93+
output = [self.data[loc] if loc != -1 else self._na_value
94+
for loc in indexer]
95+
except IndexError:
96+
raise IndexError("Index is out of bounds or cannot do a "
97+
"non-empty take from an empty array.")
9498
return self._constructor_from_sequence(output)
9599

96100
def copy(self, deep=False):

0 commit comments

Comments
 (0)