diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 53ae3200d2adb..083e0198c1d15 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1297,6 +1297,7 @@ Datetimelike - Bug in the :class:`Series` repr with period-dtype data missing a space before the data (:issue:`23601`) - Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`) - Bug in :meth:`Series.min` which would return ``NaN`` instead of ``NaT`` when called on a series of ``NaT`` (:issue:`23282`) +- Bug in :meth:`Series.combine_first` not properly aligning categoricals, so that missing values in ``self`` where not filled by valid values from ``other`` (:issue:`24147`) - Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`) - Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`) - Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 681530ed494d7..e0f26357cae6f 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -64,7 +64,8 @@ cdef class BlockPlacement: return '%s(%r)' % (self.__class__.__name__, v) - __repr__ = __str__ + def __repr__(self): + return str(self) def __len__(self): cdef: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9c2d4cd5729d2..51c47a81f8e2f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1887,7 +1887,7 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None): allow_fill=True) # if we are a 1-dim object, then always place at 0 - if self.ndim == 1: + if self.ndim == 1 and new_mgr_locs is None: new_mgr_locs = [0] else: if new_mgr_locs is None: diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index e9a89c1af2f22..f28f69c9d2893 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -164,6 +164,15 @@ def test_combine_add(self, data_repeated): orig_data1._from_sequence([a + val for a in list(orig_data1)])) self.assert_series_equal(result, expected) + @pytest.mark.xfail(reason="GH-24147", strict=True) + def test_combine_first(self, data): + # https://github.com/pandas-dev/pandas/issues/24147 + a = pd.Series(data[:3]) + b = pd.Series(data[2:5], index=[2, 3, 4]) + result = a.combine_first(b) + expected = pd.Series(data[:5]) + self.assert_series_equal(result, expected) + @pytest.mark.parametrize('frame', [True, False]) @pytest.mark.parametrize('periods, indices', [ (-2, [2, 3, 4, -1, -1]), diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 14ef6237e8ddd..26cd39c4b807c 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1283,3 +1283,12 @@ def test_validate_ndim(): with pytest.raises(ValueError, match=msg): make_block(values, placement, ndim=2) + + +def test_block_shape(): + idx = pd.Index([0, 1, 2, 3, 4]) + a = pd.Series([1, 2, 3]).reindex(idx) + b = pd.Series(pd.Categorical([1, 2, 3])).reindex(idx) + + assert (a._data.blocks[0].mgr_locs.indexer == + b._data.blocks[0].mgr_locs.indexer)