Skip to content

Commit c7e095d

Browse files
committed
Allow add_categories() to accept Series/np.array and make raising on duplicates optional, fixes #9927
1 parent 529cd3d commit c7e095d

File tree

3 files changed

+21
-3
lines changed

3 files changed

+21
-3
lines changed

doc/source/whatsnew/v0.16.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ Enhancements
5555
- Allow Panel.shift with ``axis='items'`` (:issue:`9890`)
5656

5757
- Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`)
58+
- Allow ``Categorical.add_categories`` to accept `Series` or `np.array`, and add optional keyword ``raise_on_duplicates``. (:issue:`9927`)
5859

5960
.. _whatsnew_0161.api:
6061

pandas/core/categorical.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -650,7 +650,7 @@ def reorder_categories(self, new_categories, ordered=None, inplace=False):
650650
raise ValueError("items in new_categories are not the same as in old categories")
651651
return self.set_categories(new_categories, ordered=ordered, inplace=inplace)
652652

653-
def add_categories(self, new_categories, inplace=False):
653+
def add_categories(self, new_categories, inplace=False, raise_on_duplicates=True):
654654
""" Add new categories.
655655
656656
`new_categories` will be included at the last/highest place in the categories and will be
@@ -668,6 +668,8 @@ def add_categories(self, new_categories, inplace=False):
668668
inplace : boolean (default: False)
669669
Whether or not to add the categories inplace or return a copy of this categorical
670670
with added categories.
671+
raise_on_duplicates : bool
672+
Whether to raise an error if new categories overlap with existing categories
671673
672674
Returns
673675
-------
@@ -684,10 +686,12 @@ def add_categories(self, new_categories, inplace=False):
684686
if not is_list_like(new_categories):
685687
new_categories = [new_categories]
686688
already_included = set(new_categories) & set(self._categories)
687-
if len(already_included) != 0:
689+
690+
if raise_on_duplicates and len(already_included) != 0:
688691
msg = "new categories must not include old categories: %s" % str(already_included)
689692
raise ValueError(msg)
690-
new_categories = list(self._categories) + (new_categories)
693+
694+
new_categories = list(self._categories) + list(set(new_categories) - already_included)
691695
new_categories = self._validate_categories(new_categories)
692696
cat = self if inplace else self.copy()
693697
cat._categories = new_categories

pandas/tests/test_categorical.py

+13
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,19 @@ def f():
727727
cat.add_categories(["d"])
728728
self.assertRaises(ValueError, f)
729729

730+
# GH 9927
731+
# with raise_on_duplicates=False
732+
cat = Categorical(list("abc"), ordered=True)
733+
res = cat.add_categories(list("cd"), raise_on_duplicates=False)
734+
expected = Categorical(list("abc"), categories=list("abcd"), ordered=True)
735+
self.assert_categorical_equal(res, expected)
736+
737+
# with Series or np.array
738+
res = cat.add_categories(Series(["c", "d"]), raise_on_duplicates=False)
739+
self.assert_categorical_equal(res, expected)
740+
res = cat.add_categories(np.array(["c", "d"]), raise_on_duplicates=False)
741+
self.assert_categorical_equal(res, expected)
742+
730743
def test_remove_categories(self):
731744
cat = Categorical(["a","b","c","a"], ordered=True)
732745
old = cat.copy()

0 commit comments

Comments
 (0)