Skip to content

BUG: fix categorical comparison with missing values (#26504 ) #26514

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 38 commits into from
Jun 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
e41387d
BUG: None comparison evaluates to True #26504
another-green May 24, 2019
9af03ce
ENH - Index set operation modifications to address issue #23525 (#23538)
ms7463 May 21, 2019
620fa59
DOC/CLN: wil -> will (#26484)
simonjayhawkins May 21, 2019
aad1bf9
Fixed typo mutiplication -> multiplication. (#26489)
javabrett May 22, 2019
ef87d02
DOC: fix SyntaxError in doc build on Windows (#26499)
simonjayhawkins May 23, 2019
89cc7f2
DOC: Highlighted role of index alignment in DataFrame.dot(other) (#26…
matsmaiwald May 23, 2019
babd572
DOC/CLN: Change API reference section title (#26486)
simonjayhawkins May 24, 2019
4c231a7
CLN: Remove StringMixin from PandasObject (#26505)
topper-123 May 24, 2019
cffbaac
Fix type annotations in pandas.core.indexes.datetimes (#26404)
vaibhavhrt May 24, 2019
a8af7a1
Better error message for DataFrame.hist() without numerical columns (…
matsmaiwald May 24, 2019
ac02674
Excel Test Cleanup - ReadWriteClass (#26473)
WillAyd May 24, 2019
9151211
CLN: pd.TimeGrouper (#26477)
mroeschke May 24, 2019
8c8a175
CLN: Remove ExcelWriter.sheetname (#26464)
mroeschke May 25, 2019
9d6d959
CLN: Remove deprecated parse_cols from read_excel (#26522)
mroeschke May 25, 2019
3bb4766
[TEST] Add two more parameters to the test_dti_add_sub_nonzero_mth_of…
makbigc May 25, 2019
014abdc
Remove py.path special handling from io.common (#26458)
nandahkrishna May 26, 2019
420eee5
CLN: remove StringMixin from code base, except core.computation (#26523)
topper-123 May 26, 2019
48a4b8c
MAINT: port numpy#13188 for np_datetime simplification (#26516)
xcz011 May 26, 2019
3e20569
fix categorical comparison with missing values #26504
another-green May 29, 2019
4d92eed
Merge branch 'master' into master
another-green May 29, 2019
7e6662d
Update test_operators.py
another-green May 29, 2019
16dac3a
Update categorical.py
another-green May 29, 2019
9464f72
Update test_operators.py
another-green May 29, 2019
c2b7343
Update test_operators.py
another-green May 29, 2019
65014e7
Update doc/source/whatsnew/v0.25.0.rst
another-green May 29, 2019
8964f0a
Update test_operators.py
another-green May 30, 2019
7f404d2
Update test_operators.py
another-green May 30, 2019
19e3711
Update v0.25.0.rst
another-green May 30, 2019
2fc1d27
Update test_operators.py
another-green May 30, 2019
c80c2dc
Update test_operators.py
another-green May 30, 2019
2e01686
Update test_operators.py
another-green May 30, 2019
924f693
Update test_operators.py
another-green May 30, 2019
3b4a42a
Update categorical.py
another-green May 30, 2019
57480bd
Update test_operators.py
another-green May 30, 2019
99204a4
Merge remote-tracking branch 'upstream/master' into cat
another-green May 31, 2019
045c912
Merge remote-tracking branch 'upstream/master' into cat
another-green May 31, 2019
8bb9bcf
fix categorical comparison with missing values
another-green May 31, 2019
d83c4f4
Merge remote-tracking branch 'upstream/master' into cat
another-green Jun 1, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ Categorical
^^^^^^^^^^^

- Bug in :func:`DataFrame.at` and :func:`Series.at` that would raise exception if the index was a :class:`CategoricalIndex` (:issue:`20629`)
-
- Fixed bug in comparison of ordered :class:`Categorical` that contained missing values with a scalar which sometimes incorrectly resulted in True (:issue:`26504`)
-

Datetimelike
Expand Down
13 changes: 9 additions & 4 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,23 @@ def f(self, other):
else:
other_codes = other._codes

na_mask = (self._codes == -1) | (other_codes == -1)
mask = (self._codes == -1) | (other_codes == -1)
f = getattr(self._codes, op)
ret = f(other_codes)
if na_mask.any():
if mask.any():
# In other series, the leads to False, so do that here too
ret[na_mask] = False
ret[mask] = False
return ret

if is_scalar(other):
if other in self.categories:
i = self.categories.get_loc(other)
return getattr(self._codes, op)(i)
ret = getattr(self._codes, op)(i)

# check for NaN in self
mask = (self._codes == -1)
ret[mask] = False
return ret
else:
if op == '__eq__':
return np.repeat(False, len(self))
Expand Down
32 changes: 31 additions & 1 deletion pandas/tests/arrays/categorical/test_operators.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import operator
import warnings

import numpy as np
import pytest
Expand All @@ -17,7 +18,6 @@ def test_categories_none_comparisons(self):
tm.assert_categorical_equal(factor, self.factor)

def test_comparisons(self):

result = self.factor[self.factor == 'a']
expected = self.factor[np.asarray(self.factor) == 'a']
tm.assert_categorical_equal(result, expected)
Expand Down Expand Up @@ -186,6 +186,36 @@ def test_comparison_with_unknown_scalars(self):
tm.assert_numpy_array_equal(cat != 4,
np.array([True, True, True]))

def test_comparison_of_ordered_categorical_with_nan_to_scalar(
self, compare_operators_no_eq_ne):
# https://github.com/pandas-dev/pandas/issues/26504
# BUG: fix ordered categorical comparison with missing values (#26504 )
# and following comparisons with scalars in categories with missing
# values should be evaluated as False

cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
scalar = 2
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
expected = getattr(np.array(cat),
compare_operators_no_eq_ne)(scalar)
actual = getattr(cat, compare_operators_no_eq_ne)(scalar)
tm.assert_numpy_array_equal(actual, expected)

def test_comparison_of_ordered_categorical_with_nan_to_listlike(
self, compare_operators_no_eq_ne):
# https://github.com/pandas-dev/pandas/issues/26504
# and following comparisons of missing values in ordered Categorical
# with listlike should be evaluated as False

cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True)
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2)
actual = getattr(cat, compare_operators_no_eq_ne)(other)
tm.assert_numpy_array_equal(actual, expected)

@pytest.mark.parametrize('data,reverse,base', [
(list("abc"), list("cba"), list("bbb")),
([1, 2, 3], [3, 2, 1], [2, 2, 2])]
Expand Down