Skip to content

Commit 2619889

Browse files
committed
Merge pull request #10304 from evanpw/remove_cat_nan
BUG: Categorical.remove_categories(np.nan) fails when underlying dtype is float
2 parents 39da6da + e462c34 commit 2619889

File tree

3 files changed

+35
-3
lines changed

3 files changed

+35
-3
lines changed

doc/source/whatsnew/v0.16.2.txt

+2
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ Bug Fixes
163163
- Bug in GroupBy.get_group raises ValueError when group key contains NaT (:issue:`6992`)
164164
- Bug in ``SparseSeries`` constructor ignores input data name (:issue:`10258`)
165165

166+
- Bug in ``Categorical.remove_categories`` causing a ValueError when removing the ``NaN`` category if underlying dtype is floating-point (:issue:`10156`)
167+
166168
- Bug where infer_freq infers timerule (WOM-5XXX) unsupported by to_offset (:issue:`9425`)
167169
- Bug in ``DataFrame.to_hdf()`` where table format would raise a seemingly unrelated error for invalid (non-string) column names. This is now explicitly forbidden. (:issue:`9057`)
168170
- Bug to handle masking empty ``DataFrame``(:issue:`10126`)

pandas/core/categorical.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -749,11 +749,19 @@ def remove_categories(self, removals, inplace=False):
749749
"""
750750
if not is_list_like(removals):
751751
removals = [removals]
752-
removals = set(list(removals))
753-
not_included = removals - set(self._categories)
752+
753+
removal_set = set(list(removals))
754+
not_included = removal_set - set(self._categories)
755+
new_categories = [ c for c in self._categories if c not in removal_set ]
756+
757+
# GH 10156
758+
if any(isnull(removals)):
759+
not_included = [x for x in not_included if notnull(x)]
760+
new_categories = [x for x in new_categories if notnull(x)]
761+
754762
if len(not_included) != 0:
755763
raise ValueError("removals must all be in old categories: %s" % str(not_included))
756-
new_categories = [ c for c in self._categories if c not in removals ]
764+
757765
return self.set_categories(new_categories, ordered=self.ordered, rename=False,
758766
inplace=inplace)
759767

pandas/tests/test_categorical.py

+22
Original file line numberDiff line numberDiff line change
@@ -854,6 +854,28 @@ def test_nan_handling(self):
854854
self.assert_numpy_array_equal(c.categories , np.array(["a","b",np.nan],dtype=np.object_))
855855
self.assert_numpy_array_equal(c._codes , np.array([0,2,-1,0]))
856856

857+
# Remove null categories (GH 10156)
858+
cases = [
859+
([1.0, 2.0, np.nan], [1.0, 2.0]),
860+
(['a', 'b', None], ['a', 'b']),
861+
([pd.Timestamp('2012-05-01'), pd.NaT], [pd.Timestamp('2012-05-01')])
862+
]
863+
864+
null_values = [np.nan, None, pd.NaT]
865+
866+
for with_null, without in cases:
867+
base = Categorical([], with_null)
868+
expected = Categorical([], without)
869+
870+
for nullval in null_values:
871+
result = base.remove_categories(nullval)
872+
self.assert_categorical_equal(result, expected)
873+
874+
# Different null values are indistinguishable
875+
for i, j in [(0, 1), (0, 2), (1, 2)]:
876+
nulls = [null_values[i], null_values[j]]
877+
self.assertRaises(ValueError, lambda: Categorical([], categories=nulls))
878+
857879

858880
def test_isnull(self):
859881
exp = np.array([False, False, True])

0 commit comments

Comments
 (0)