pandas-dev · TomAugspurger · Dec 7, 2018 · Dec 7, 2018 · TomAugspurger · Dec 7, 2018
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1297,6 +1297,7 @@ Datetimelike
 - Bug in the :class:`Series` repr with period-dtype data missing a space before the data (:issue:`23601`)
 - Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`)
 - Bug in :meth:`Series.min` which would return ``NaN`` instead of ``NaT`` when called on a series of ``NaT`` (:issue:`23282`)
+- Bug in :meth:`Series.combine_first` not properly aligning categoricals, so that missing values in ``self`` where not filled by valid values from ``other`` (:issue:`24147`)
 - Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`)
 - Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`)
 - Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
@@ -64,7 +64,8 @@ cdef class BlockPlacement:
 
         return '%s(%r)' % (self.__class__.__name__, v)
 
-    __repr__ = __str__
+    def __repr__(self):
+        return str(self)
 
     def __len__(self):
         cdef:

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1887,7 +1887,7 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
                                       allow_fill=True)
 
         # if we are a 1-dim object, then always place at 0
-        if self.ndim == 1:
+        if self.ndim == 1 and new_mgr_locs is None:
             new_mgr_locs = [0]
         else:
             if new_mgr_locs is None:

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -164,6 +164,15 @@ def test_combine_add(self, data_repeated):
             orig_data1._from_sequence([a + val for a in list(orig_data1)]))
         self.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(reason="GH-24147", strict=True)
+    def test_combine_first(self, data):
+        # https://github.com/pandas-dev/pandas/issues/24147
+        a = pd.Series(data[:3])
+        b = pd.Series(data[2:5], index=[2, 3, 4])
+        result = a.combine_first(b)
+        expected = pd.Series(data[:5])
+        self.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize('frame', [True, False])
     @pytest.mark.parametrize('periods, indices', [
         (-2, [2, 3, 4, -1, -1]),

diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -1283,3 +1283,12 @@ def test_validate_ndim():
 
     with pytest.raises(ValueError, match=msg):
         make_block(values, placement, ndim=2)
+
+
+def test_block_shape():
+    idx = pd.Index([0, 1, 2, 3, 4])
+    a = pd.Series([1, 2, 3]).reindex(idx)
+    b = pd.Series(pd.Categorical([1, 2, 3])).reindex(idx)
+
+    assert (a._data.blocks[0].mgr_locs.indexer ==
+            b._data.blocks[0].mgr_locs.indexer)