From 9095693a213f9ff5b19e540113530e16837ef421 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 21 Oct 2020 19:53:18 -0700 Subject: [PATCH 1/4] BUG: IntervalIndex.take without fill_value --- pandas/core/arrays/_mixins.py | 7 ++++++- pandas/core/arrays/interval.py | 4 +++- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/interval.py | 7 ------- pandas/tests/indexes/categorical/test_astype.py | 2 +- pandas/tests/indexes/common.py | 11 +++++++++++ 6 files changed, 22 insertions(+), 11 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 948ffdc1f7c01..5edab7fe81bf9 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -51,12 +51,17 @@ def take( indices: Sequence[int], allow_fill: bool = False, fill_value: Any = None, + axis: int = 0, ) -> _T: if allow_fill: fill_value = self._validate_fill_value(fill_value) new_data = take( - self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value + self._ndarray, + indices, + allow_fill=allow_fill, + fill_value=fill_value, + axis=axis, ) return self._from_backing_data(new_data) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 09488b9576212..161cf3bf3a677 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -544,7 +544,9 @@ def __eq__(self, other): if is_interval_dtype(other_dtype): if self.closed != other.categories.closed: return np.zeros(len(self), dtype=bool) - other = other.categories.take(other.codes) + other = other.categories.take( + other.codes, allow_fill=True, fill_value=other.categories._na_value + ) # interval-like -> need same closed and matching endpoints if is_interval_dtype(other_dtype): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f336eec8c4cce..fc7cf110738cf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -749,7 +749,7 @@ def _assert_take_fillable( values, indices, allow_fill=allow_fill, fill_value=na_value ) else: - taken = values.take(indices) + taken = algos.take(values, indices, allow_fill=False, fill_value=na_value) return taken _index_shared_docs[ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index cb25ef1241ce0..dd524641dba2f 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -921,13 +921,6 @@ def insert(self, loc, item): result = IntervalArray.from_arrays(new_left, new_right, closed=self.closed) return self._shallow_copy(result) - @Appender(_index_shared_docs["take"] % _index_doc_kwargs) - def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): - result = self._data.take( - indices, axis=axis, allow_fill=allow_fill, fill_value=fill_value, **kwargs - ) - return self._shallow_copy(result) - # -------------------------------------------------------------------- # Rendering Methods # __repr__ associated methods are based on MultiIndex diff --git a/pandas/tests/indexes/categorical/test_astype.py b/pandas/tests/indexes/categorical/test_astype.py index a4a0cb1978325..44c4bcc951194 100644 --- a/pandas/tests/indexes/categorical/test_astype.py +++ b/pandas/tests/indexes/categorical/test_astype.py @@ -25,7 +25,7 @@ def test_astype(self): ) result = ci.astype("interval") - expected = ii.take([0, 1, -1]) + expected = ii.take([0, 1, -1], allow_fill=True, fill_value=np.nan) tm.assert_index_equal(result, expected) result = IntervalIndex(result.values) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index d6d8854f4f78a..13993f1112cb6 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -407,6 +407,17 @@ def test_take_invalid_kwargs(self): with pytest.raises(ValueError, match=msg): idx.take(indices, mode="clip") + def test_take_minus1_without_fill(self, index): + # -1 does not get treated as NA unless allow_fill=True is passed + if len(index) == 0: + # Test is not applicable + return + + result = index.take([0, 0, -1]) + + expected = index.take([0, 0, len(index) - 1]) + tm.assert_index_equal(result, expected) + def test_repeat(self): rep = 2 i = self.create_index() From c8b99a17389d411bb41384209b1f09ce66456fd4 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 22 Oct 2020 18:05:13 -0700 Subject: [PATCH 2/4] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index cdd8e50d98077..2669765c0f9fb 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -417,7 +417,7 @@ Strings Interval ^^^^^^^^ - +- Bug in :meth:`IntervalIndex.take` with negative indices and ``fill_value=None`` (:issue:`37330`) - - From 9d5168443f4c3126a2c1575d69865f07fed1f5fd Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 25 Oct 2020 08:24:59 -0700 Subject: [PATCH 3/4] dummy commit to force CI From 655983fa349a06abd1ead31f65f5872ec705afdd Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 29 Oct 2020 19:43:17 -0700 Subject: [PATCH 4/4] fix slow tests --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9c78166ce0480..812f914f83372 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -714,7 +714,7 @@ def value_counts( # lab is a Categorical with categories an IntervalIndex lab = cut(Series(val), bins, include_lowest=True) lev = lab.cat.categories - lab = lev.take(lab.cat.codes) + lab = lev.take(lab.cat.codes, allow_fill=True, fill_value=lev._na_value) llab = lambda lab, inc: lab[inc]._multiindex.codes[-1] if is_interval_dtype(lab.dtype):