diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8e2592a603716..de4e3b76420af 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -307,9 +307,6 @@ def shape(self): def dtype(self): return self.values.dtype - def merge(self, other): - return _merge_blocks([self, other]) - def concat_same_type(self, to_concat): """ Concatenate list of single blocks of the same type. @@ -2903,32 +2900,6 @@ def _block_shape(values, ndim=1, shape=None): return values -def _merge_blocks(blocks, dtype=None, _can_consolidate=True): - - if len(blocks) == 1: - return blocks[0] - - if _can_consolidate: - - if dtype is None: - if len({b.dtype for b in blocks}) != 1: - raise AssertionError("_merge_blocks are invalid!") - - # FIXME: optimization potential in case all mgrs contain slices and - # combination of those slices is a slice, too. - new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks]) - new_values = np.vstack([b.values for b in blocks]) - - argsort = np.argsort(new_mgr_locs) - new_values = new_values[argsort] - new_mgr_locs = new_mgr_locs[argsort] - - return make_block(new_values, placement=new_mgr_locs) - - # no merge - return blocks - - def _safe_reshape(arr, new_shape): """ If possible, reshape `arr` to have shape `new_shape`, diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index ebb4899c1ba9a..45027bde58f14 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -43,7 +43,6 @@ ExtensionBlock, ObjectValuesExtensionBlock, _extend_blocks, - _merge_blocks, _safe_reshape, get_block_type, make_block, @@ -1891,12 +1890,40 @@ def _consolidate(blocks): new_blocks = [] for (_can_consolidate, dtype), group_blocks in grouper: merged_blocks = _merge_blocks( - list(group_blocks), dtype=dtype, _can_consolidate=_can_consolidate + list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate ) new_blocks = _extend_blocks(merged_blocks, new_blocks) return new_blocks +def _merge_blocks( + blocks: List[Block], dtype: DtypeObj, can_consolidate: bool +) -> List[Block]: + + if len(blocks) == 1: + return blocks + + if can_consolidate: + + if dtype is None: + if len({b.dtype for b in blocks}) != 1: + raise AssertionError("_merge_blocks are invalid!") + + # TODO: optimization potential in case all mgrs contain slices and + # combination of those slices is a slice, too. + new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks]) + new_values = np.vstack([b.values for b in blocks]) + + argsort = np.argsort(new_mgr_locs) + new_values = new_values[argsort] + new_mgr_locs = new_mgr_locs[argsort] + + return [make_block(new_values, placement=new_mgr_locs)] + + # can't consolidate --> no merge + return blocks + + def _compare_or_regex_search(a, b, regex=False): """ Compare two array_like inputs of the same shape or two scalar values diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 91ec1c29873cf..657849874f091 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -232,21 +232,6 @@ def test_attrs(self): assert self.fblock.dtype == self.fblock.values.dtype assert len(self.fblock) == len(self.fblock.values) - def test_merge(self): - avals = tm.randn(2, 10) - bvals = tm.randn(2, 10) - - ref_cols = Index(["e", "a", "b", "d", "f"]) - - ablock = make_block(avals, ref_cols.get_indexer(["e", "b"])) - bblock = make_block(bvals, ref_cols.get_indexer(["a", "d"])) - merged = ablock.merge(bblock) - tm.assert_numpy_array_equal( - merged.mgr_locs.as_array, np.array([0, 1, 2, 3], dtype=np.int64) - ) - tm.assert_numpy_array_equal(merged.values[[0, 2]], np.array(avals)) - tm.assert_numpy_array_equal(merged.values[[1, 3]], np.array(bvals)) - def test_copy(self): cop = self.fblock.copy() assert cop is not self.fblock