Skip to content

Commit 9159d98

Browse files
committed
BUG: fix reindexing to an all-nan Categorical (GH8076)
1 parent ba955a9 commit 9159d98

File tree

3 files changed

+42
-6
lines changed

3 files changed

+42
-6
lines changed

doc/source/v0.15.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ Categoricals in Series/DataFrame
346346

347347
:class:`~pandas.Categorical` can now be included in `Series` and `DataFrames` and gained new
348348
methods to manipulate. Thanks to Jan Schultz for much of this API/implementation. (:issue:`3943`, :issue:`5313`, :issue:`5314`,
349-
:issue:`7444`, :issue:`7839`, :issue:`7848`, :issue:`7864`, :issue:`7914`, :issue:`7768`, :issue:`8006`, :issue:`3678`, :issue:`8075`).
349+
:issue:`7444`, :issue:`7839`, :issue:`7848`, :issue:`7864`, :issue:`7914`, :issue:`7768`, :issue:`8006`, :issue:`3678`, :issue:`8075`, :issue:`8076`).
350350

351351
For full docs, see the :ref:`Categorical introduction <categorical>` and the
352352
:ref:`API documentation <api.categorical>`.

pandas/core/categorical.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -743,12 +743,14 @@ def fillna(self, fill_value=None, method=None, limit=None, **kwargs):
743743
name=self.name, fastpath=True)
744744

745745
def take_nd(self, indexer, allow_fill=True, fill_value=None):
746-
""" Take the values by the indexer, fill with the fill_value. """
747-
if allow_fill and fill_value is None:
748-
fill_value = np.nan
746+
""" Take the codes by the indexer, fill with the fill_value. """
747+
748+
# filling must always be None/nan here
749+
# but is passed thru internally
750+
assert isnull(fill_value)
749751

750-
values = com.take_1d(self._codes, indexer, allow_fill=allow_fill, fill_value=fill_value)
751-
result = Categorical(values=values, levels=self.levels, ordered=self.ordered,
752+
codes = com.take_1d(self._codes, indexer, allow_fill=True, fill_value=-1)
753+
result = Categorical(codes, levels=self.levels, ordered=self.ordered,
752754
name=self.name, fastpath=True)
753755
return result
754756

pandas/tests/test_categorical.py

+34
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,40 @@ def test_construction_series(self):
858858
res = Series(l,dtype='category')
859859
tm.assert_series_equal(res, exp)
860860

861+
# insert into frame with different index
862+
# GH 8076
863+
index = pd.date_range('20000101', periods=3)
864+
expected = Series(Categorical(values=[np.nan,np.nan,np.nan],levels=['a', 'b', 'c']))
865+
expected.index = index
866+
867+
expected = DataFrame({'x': expected})
868+
df = DataFrame({'x': Series(['a', 'b', 'c'],dtype='category')}, index=index)
869+
tm.assert_frame_equal(df, expected)
870+
871+
def test_reindex(self):
872+
873+
index = pd.date_range('20000101', periods=3)
874+
875+
# reindexing to an invalid Categorical
876+
s = Series(['a', 'b', 'c'],dtype='category')
877+
result = s.reindex(index)
878+
expected = Series(Categorical(values=[np.nan,np.nan,np.nan],levels=['a', 'b', 'c']))
879+
expected.index = index
880+
tm.assert_series_equal(result, expected)
881+
882+
# partial reindexing
883+
expected = Series(Categorical(values=['b','c'],levels=['a', 'b', 'c']))
884+
expected.index = [1,2]
885+
result = s.reindex([1,2])
886+
tm.assert_series_equal(result, expected)
887+
888+
expected = Series(Categorical(values=['c',np.nan],levels=['a', 'b', 'c']))
889+
expected.index = [2,3]
890+
result = s.reindex([2,3])
891+
tm.assert_series_equal(result, expected)
892+
893+
894+
861895
def test_sideeffects_free(self):
862896

863897
# Passing a categorical to a Series and then changing values in either the series or the

0 commit comments

Comments
 (0)