-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
BUG/DEPR: deprecate Categorical take default behaviour + fix Series[categorical].take #20841
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
644138f
b874eed
07a561c
aa7266c
3a98ede
69701f2
f777fa1
d1c7e38
6056f7a
f7e0e6d
635db4f
12485c5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
|
||
import numpy as np | ||
from warnings import warn | ||
import textwrap | ||
import types | ||
|
||
from pandas import compat | ||
|
@@ -29,7 +30,7 @@ | |
is_scalar, | ||
is_dict_like) | ||
|
||
from pandas.core.algorithms import factorize, take_1d, unique1d | ||
from pandas.core.algorithms import factorize, take_1d, unique1d, take | ||
from pandas.core.accessor import PandasDelegate | ||
from pandas.core.base import (PandasObject, | ||
NoNewAttributesMixin, _shared_docs) | ||
|
@@ -48,6 +49,17 @@ | |
from .base import ExtensionArray | ||
|
||
|
||
_take_msg = textwrap.dedent("""\ | ||
Interpreting negative values in 'indexer' as missing values. | ||
In the future, this will change to meaning positional indicies | ||
from the right. | ||
|
||
Use 'allow_fill=True' to retain the previous behavior and silence this | ||
warning. | ||
|
||
Use 'allow_fill=False' to accept the new behavior.""") | ||
|
||
|
||
def _cat_compare_op(op): | ||
def f(self, other): | ||
# On python2, you can usually compare any type to any type, and | ||
|
@@ -1732,17 +1744,25 @@ def fillna(self, value=None, method=None, limit=None): | |
return self._constructor(values, categories=self.categories, | ||
ordered=self.ordered, fastpath=True) | ||
|
||
def take_nd(self, indexer, allow_fill=True, fill_value=None): | ||
""" Take the codes by the indexer, fill with the fill_value. | ||
def take_nd(self, indexer, allow_fill=None, fill_value=None): | ||
""" | ||
Return the indices | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. -> |
||
|
||
For internal compatibility with numpy arrays. | ||
""" | ||
indexer = np.asarray(indexer, dtype=np.intp) | ||
if allow_fill is None: | ||
if (indexer < 0).any(): | ||
warn(_take_msg, FutureWarning, stacklevel=2) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe just put the warning inline here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That messes with the formatting. |
||
allow_fill = True | ||
|
||
# filling must always be None/nan here | ||
# but is passed thru internally | ||
assert isna(fill_value) | ||
if fill_value is None or isna(fill_value): | ||
# For backwards compatability, we have to override | ||
# any na values for `fill_value` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand this comment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is to explain why we have the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh I understand. Will fix |
||
fill_value = -1 | ||
|
||
codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1) | ||
codes = take(self._codes, indexer, allow_fill=allow_fill, | ||
fill_value=fill_value) | ||
result = self._constructor(codes, categories=self.categories, | ||
ordered=self.ordered, fastpath=True) | ||
return result | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3499,7 +3499,14 @@ def _take(self, indices, axis=0, convert=True, is_copy=False): | |
|
||
indices = _ensure_platform_int(indices) | ||
new_index = self.index.take(indices) | ||
new_values = self._values.take(indices) | ||
|
||
if is_categorical_dtype(self): | ||
# https://github.com/pandas-dev/pandas/issues/20664 | ||
# TODO: remove when the default Categorical.take behavior changes | ||
kwargs = {'allow_fill': False} | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so you changed the default? it was True before There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fact that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Though since allow_fill isn't a keyword for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah nevermind. allow_fill isn't a keyword for ndarray.take. That's why the kwargs is needed. |
||
kwargs = {} | ||
new_values = self._values.take(indices, **kwargs) | ||
|
||
result = (self._constructor(new_values, index=new_index, | ||
fastpath=True).__finalize__(self)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,18 @@ | |
import pandas.util.testing as tm | ||
|
||
|
||
@pytest.fixture(params=[True, False]) | ||
def allow_fill(request): | ||
"""Boolean 'allow_fill' parameter for Categorical.take""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these can prob be moved into a higher level conftest There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do when that's necessary. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls make a conftest here, otherwise these are easily lost |
||
return request.param | ||
|
||
|
||
@pytest.fixture(params=[True, False]) | ||
def ordered(request): | ||
"""Boolean 'ordered' parameter for Categorical.""" | ||
return request.param | ||
|
||
|
||
@pytest.mark.parametrize('ordered', [True, False]) | ||
@pytest.mark.parametrize('categories', [ | ||
['b', 'a', 'c'], | ||
|
@@ -69,3 +81,45 @@ def test_isin_empty(empty): | |
|
||
result = s.isin(empty) | ||
tm.assert_numpy_array_equal(expected, result) | ||
|
||
|
||
class TestTake(object): | ||
# https://github.com/pandas-dev/pandas/issues/20664 | ||
|
||
def test_take_warns(self): | ||
cat = pd.Categorical(['a', 'b']) | ||
with tm.assert_produces_warning(FutureWarning): | ||
cat.take([0, -1]) | ||
|
||
def test_take_positive_no_warning(self): | ||
cat = pd.Categorical(['a', 'b']) | ||
with tm.assert_produces_warning(None): | ||
cat.take([0, 0]) | ||
|
||
def test_take_bounds(self, allow_fill): | ||
# https://github.com/pandas-dev/pandas/issues/20664 | ||
cat = pd.Categorical(['a', 'b', 'a']) | ||
with pytest.raises(IndexError): | ||
cat.take([4, 5], allow_fill=allow_fill) | ||
|
||
def test_take_empty(self, allow_fill): | ||
# https://github.com/pandas-dev/pandas/issues/20664 | ||
cat = pd.Categorical([], categories=['a', 'b']) | ||
with pytest.raises(IndexError): | ||
cat.take([0], allow_fill=allow_fill) | ||
|
||
def test_positional_take(self, ordered): | ||
cat = pd.Categorical(['a', 'a', 'b', 'b'], categories=['b', 'a'], | ||
ordered=ordered) | ||
result = cat.take([0, 1, 2], allow_fill=False) | ||
expected = pd.Categorical(['a', 'a', 'b'], categories=cat.categories, | ||
ordered=ordered) | ||
tm.assert_categorical_equal(result, expected) | ||
|
||
def test_positional_take_unobserved(self, ordered): | ||
cat = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'], | ||
ordered=ordered) | ||
result = cat.take([1, 0], allow_fill=False) | ||
expected = pd.Categorical(['b', 'a'], categories=cat.categories, | ||
ordered=ordered) | ||
tm.assert_categorical_equal(result, expected) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -753,6 +753,16 @@ def test_take(): | |
s.take([-1, 3, 4], convert=False) | ||
|
||
|
||
def test_take_categorical(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. instead of this test (or in addition to), I think you should be able to remove the skip from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That still fails because of #20747. The unobserved categories are still present. I've updated the skip. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, yes, that's still there :) |
||
# https://github.com/pandas-dev/pandas/issues/20664 | ||
s = Series(pd.Categorical(['a', 'b', 'c'])) | ||
result = s.take([-2, -2, 0]) | ||
expected = Series(pd.Categorical(['b', 'b', 'a'], | ||
categories=['a', 'b', 'c']), | ||
index=[1, 1, 0]) | ||
assert_series_equal(result, expected) | ||
|
||
|
||
def test_head_tail(test_data): | ||
assert_series_equal(test_data.series.head(), test_data.series[:5]) | ||
assert_series_equal(test_data.series.head(0), test_data.series[0:0]) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
indicies -> indices
I would also add "to be consistent with
Series.take
" at the end