Skip to content

REF: remove Block.concat_same_type #33486

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 43 commits into from
Apr 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
cb8f6c6
REF: reshape.concat operate on arrays, not SingleBlockManagers
jbrockmendel Mar 29, 2020
5fe3348
Merge branch 'master' of https://github.com/pandas-dev/pandas into no…
jbrockmendel Mar 30, 2020
2a2c9e7
Merge branch 'master' of https://github.com/pandas-dev/pandas into no…
jbrockmendel Mar 30, 2020
2e774f2
Merge branch 'master' of https://github.com/pandas-dev/pandas into no…
jbrockmendel Mar 31, 2020
e008f40
xfail more selectively
jbrockmendel Mar 31, 2020
a244f15
Merge branch 'master' of https://github.com/pandas-dev/pandas into no…
jbrockmendel Apr 4, 2020
9d52e7e
Merge branch 'master' of https://github.com/pandas-dev/pandas into no…
jbrockmendel Apr 6, 2020
3f0ee1b
Revert PandasArray.astype patch
jbrockmendel Apr 6, 2020
2da47de
DOC: Fix examples in `pandas/core/strings.py` (#33328)
ShaharNaveh Apr 6, 2020
9585a41
DOC: do not include type hints in signature in html docs (#33312)
jorisvandenbossche Apr 6, 2020
ed862c0
BUG: DataFrame fail to construct when data is list and columns is nes…
charlesdong1991 Apr 6, 2020
c57f6e7
API/CLN: simplify CategoricalBlock.replace (#33279)
jbrockmendel Apr 6, 2020
2b322d2
REF: BlockManager.delete -> idelete (#33332)
jbrockmendel Apr 6, 2020
d4d7538
TST: Don't use 'is' on strings to avoid SyntaxWarning (#33322)
rebecca-palmer Apr 6, 2020
e3eb29c
CLN: remove fill_tuple kludge (#33310)
jbrockmendel Apr 6, 2020
fcfa7c4
TST: misplaced reduction/indexing tests (#33307)
jbrockmendel Apr 6, 2020
7a468b0
BUG: Don't raise on value_counts for empty Int64 (#33339)
dsaxton Apr 6, 2020
0a2b9cd
REGR: Fix bug when replacing categorical value with self (#33292)
dsaxton Apr 6, 2020
5a38119
Pass method in __finalize__ (#33273)
TomAugspurger Apr 6, 2020
4f1fb46
DOC: Added an example for each series.dt field accessor (#33259)
ShaharNaveh Apr 6, 2020
8150c11
BUG: Timestamp+- ndarray[td64] (#33296)
jbrockmendel Apr 6, 2020
9585ae4
BUG: 2D indexing on DTA/TDA/PA (#33290)
jbrockmendel Apr 6, 2020
c05d28b
REF: dispatch TDBlock.to_native_types to TDA._format_native_types (#3…
jbrockmendel Apr 6, 2020
047e5d7
REF: put concatenate_block_managers in internals.concat (#33231)
jbrockmendel Apr 6, 2020
0e382f2
TST: Add tests for duplicated and drop_duplicates (#32575)
mproszewska Apr 6, 2020
717662b
Ods loses spaces 32207 (#33233)
detrout Apr 6, 2020
9c1984c
PERF: masked ops for reductions (min/max) (#33261)
jorisvandenbossche Apr 6, 2020
efce8fc
REF: do concat on values, avoid blocks
jbrockmendel Apr 7, 2020
362e86c
CLN: Clean nanops.get_corr_func (#33244)
dsaxton Apr 7, 2020
3ad2110
[DOC]: Mention default behaviour of index_col in readcsv (#32977)
bharatr21 Apr 7, 2020
629d7c5
Merge branch 'master' of https://github.com/pandas-dev/pandas into no…
jbrockmendel Apr 7, 2020
859327d
Merge branch 'master' of https://github.com/pandas-dev/pandas into no…
jbrockmendel Apr 8, 2020
87c1006
Merge branch 'master' of https://github.com/pandas-dev/pandas into no…
jbrockmendel Apr 10, 2020
3a84357
Merge branch 'master' of https://github.com/pandas-dev/pandas into no…
jbrockmendel Apr 10, 2020
3ee8363
Remove Block.concat_same_type
jbrockmendel Apr 11, 2020
9e6c7ed
Merge branch 'master' of https://github.com/pandas-dev/pandas into no…
jbrockmendel Apr 11, 2020
fd7c72e
Merge branch 'master' of https://github.com/pandas-dev/pandas into no…
jbrockmendel Apr 11, 2020
41d6da0
use concat_compat
jbrockmendel Apr 11, 2020
5d567f0
Merge branch 'master' of https://github.com/pandas-dev/pandas into no…
jbrockmendel Apr 12, 2020
2e070ca
combine cases
jbrockmendel Apr 12, 2020
9b6d3ac
Merge branch 'master' of https://github.com/pandas-dev/pandas into no…
jbrockmendel Apr 13, 2020
858658a
Merge branch 'master' of https://github.com/pandas-dev/pandas into no…
jbrockmendel Apr 14, 2020
675a948
Dummy commit to force CI
jbrockmendel Apr 14, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 0 additions & 55 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
is_timedelta64_dtype,
pandas_dtype,
)
from pandas.core.dtypes.concat import concat_categorical, concat_datetime
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
Expand Down Expand Up @@ -110,7 +109,6 @@ class Block(PandasObject):
_can_consolidate = True
_verify_integrity = True
_validate_ndim = True
_concatenator = staticmethod(np.concatenate)

def __init__(self, values, placement, ndim=None):
self.ndim = self._check_ndim(values, ndim)
Expand Down Expand Up @@ -309,16 +307,6 @@ def shape(self):
def dtype(self):
return self.values.dtype

def concat_same_type(self, to_concat):
"""
Concatenate list of single blocks of the same type.
"""
values = self._concatenator(
[blk.values for blk in to_concat], axis=self.ndim - 1
)
placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
return self.make_block_same_class(values, placement=placement)

def iget(self, i):
return self.values[i]

Expand Down Expand Up @@ -1770,14 +1758,6 @@ def _slice(self, slicer):

return self.values[slicer]

def concat_same_type(self, to_concat):
"""
Concatenate list of single blocks of the same type.
"""
values = self._holder._concat_same_type([blk.values for blk in to_concat])
placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
return self.make_block_same_class(values, placement=placement)

def fillna(self, value, limit=None, inplace=False, downcast=None):
values = self.values if inplace else self.values.copy()
values = values.fillna(value=value, limit=limit)
Expand Down Expand Up @@ -2258,20 +2238,6 @@ def diff(self, n: int, axis: int = 0) -> List["Block"]:
new_values = new_values.astype("timedelta64[ns]")
return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)]

def concat_same_type(self, to_concat):
# need to handle concat([tz1, tz2]) here, since DatetimeArray
# only handles cases where all the tzs are the same.
# Instead of placing the condition here, it could also go into the
# is_uniform_join_units check, but I'm not sure what is better.
if len({x.dtype for x in to_concat}) > 1:
values = concat_datetime([x.values for x in to_concat])

values = values.astype(object, copy=False)
placement = self.mgr_locs if self.ndim == 2 else slice(len(values))

return self.make_block(values, placement=placement)
return super().concat_same_type(to_concat)

def fillna(self, value, limit=None, inplace=False, downcast=None):
# We support filling a DatetimeTZ with a `value` whose timezone
# is different by coercing to object.
Expand Down Expand Up @@ -2642,7 +2608,6 @@ class CategoricalBlock(ExtensionBlock):
is_categorical = True
_verify_integrity = True
_can_hold_na = True
_concatenator = staticmethod(concat_categorical)

should_store = Block.should_store

Expand All @@ -2656,26 +2621,6 @@ def __init__(self, values, placement, ndim=None):
def _holder(self):
return Categorical

def concat_same_type(self, to_concat):
"""
Concatenate list of single blocks of the same type.

Note that this CategoricalBlock._concat_same_type *may* not
return a CategoricalBlock. When the categories in `to_concat`
differ, this will return an object ndarray.

If / when we decide we don't like that behavior:

1. Change Categorical._concat_same_type to use union_categoricals
2. Delete this method.
"""
values = self._concatenator(
[blk.values for blk in to_concat], axis=self.ndim - 1
)
placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
# not using self.make_block_same_class as values can be object dtype
return self.make_block(values, placement=placement)

def replace(
self,
to_replace,
Expand Down
19 changes: 16 additions & 3 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# TODO: Needs a better name; too many modules are already called "concat"
from collections import defaultdict
import copy
from typing import List

import numpy as np

Expand Down Expand Up @@ -61,8 +62,18 @@ def concatenate_block_managers(
values = values.view()
b = b.make_block_same_class(values, placement=placement)
elif _is_uniform_join_units(join_units):
b = join_units[0].block.concat_same_type([ju.block for ju in join_units])
b.mgr_locs = placement
blk = join_units[0].block
vals = [ju.block.values for ju in join_units]

if not blk.is_extension or blk.is_datetimetz or blk.is_categorical:
# datetimetz and categorical can have the same type but multiple
# dtypes, concatting does not necessarily preserve dtype
values = concat_compat(vals, axis=blk.ndim - 1)
else:
# TODO(EA2D): special-casing not needed with 2D EAs
values = concat_compat(vals)

b = make_block(values, placement=placement, ndim=blk.ndim)
else:
b = make_block(
_concatenate_join_units(join_units, concat_axis, copy=copy),
Expand Down Expand Up @@ -419,13 +430,15 @@ def _get_empty_dtype_and_na(join_units):
raise AssertionError(msg)


def _is_uniform_join_units(join_units) -> bool:
def _is_uniform_join_units(join_units: List[JoinUnit]) -> bool:
"""
Check if the join units consist of blocks of uniform type that can
be concatenated using Block.concat_same_type instead of the generic
_concatenate_join_units (which uses `concat_compat`).

"""
# TODO: require dtype match in addition to same type? e.g. DatetimeTZBlock
# cannot necessarily join
return (
# all blocks need to have the same type
all(type(ju.block) is type(join_units[0].block) for ju in join_units)
Expand Down
6 changes: 0 additions & 6 deletions pandas/tests/extension/test_external_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,6 @@ def df():
return pd.DataFrame(block_manager)


def test_concat_dataframe(df):
# GH17728
res = pd.concat([df, df])
assert isinstance(res._mgr.blocks[1], CustomBlock)


def test_concat_axis1(df):
# GH17954
df2 = pd.DataFrame({"c": [0.1, 0.2, 0.3]})
Expand Down