Skip to content

CLN: dont consolidate in reshape.concat #34683

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Dec 17, 2020
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
64e9692
CLN: dont consolidate in reshape.concat
jbrockmendel Jun 10, 2020
9251046
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Jun 11, 2020
a5ea5e7
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Sep 7, 2020
53ee564
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Sep 12, 2020
562aa9c
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Sep 13, 2020
5f6d062
PERF: optimizations for concat
jbrockmendel Sep 14, 2020
e5d0218
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Sep 19, 2020
1c69c93
PERF: fastpaths
jbrockmendel Sep 19, 2020
2558233
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Sep 20, 2020
2b5717c
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Sep 21, 2020
0b6ded7
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Sep 22, 2020
5823a67
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Sep 22, 2020
19dace8
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Oct 14, 2020
1911951
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Oct 16, 2020
3e7c2b5
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Nov 21, 2020
fe8bec7
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Nov 24, 2020
feff1ae
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Dec 17, 2020
6ed6c28
Merge branch 'master' of https://github.com/pandas-dev/pandas into cl…
jbrockmendel Dec 17, 2020
55637ad
use isinstance check
jbrockmendel Dec 17, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 26 additions & 17 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,16 @@

import numpy as np

from pandas._libs import NaT, algos as libalgos, internals as libinternals, lib, writers
from pandas._libs import (
Interval,
NaT,
Period,
Timestamp,
algos as libalgos,
internals as libinternals,
lib,
writers,
)
from pandas._libs.internals import BlockPlacement
from pandas._libs.tslibs import conversion
from pandas._libs.tslibs.timezones import tz_compare
Expand Down Expand Up @@ -39,10 +48,8 @@
is_float_dtype,
is_integer,
is_integer_dtype,
is_interval_dtype,
is_list_like,
is_object_dtype,
is_period_dtype,
is_re,
is_re_compilable,
is_sparse,
Expand Down Expand Up @@ -2669,35 +2676,37 @@ def get_block_type(values, dtype=None):
-------
cls : class, subclass of Block
"""
# We use vtype and kind checks because they are much more performant
# than is_foo_dtype
dtype = dtype or values.dtype
vtype = dtype.type
kind = dtype.kind

cls: Type[Block]

if is_sparse(dtype):
# Need this first(ish) so that Sparse[datetime] is sparse
cls = ExtensionBlock
elif is_categorical_dtype(values.dtype):
elif dtype.name == "category":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not is Categorical ? e.g. since we are removing comparison vs 'category' generally (in your other PR)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could do isinstance(dtype, CategoricalDtype). either way is fine by me

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think prefer that

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated+green

cls = CategoricalBlock
elif issubclass(vtype, np.datetime64):
assert not is_datetime64tz_dtype(values.dtype)
cls = DatetimeBlock
elif is_datetime64tz_dtype(values.dtype):
elif vtype is Timestamp:
cls = DatetimeTZBlock
elif is_interval_dtype(dtype) or is_period_dtype(dtype):
elif vtype is Interval or vtype is Period:
cls = ObjectValuesExtensionBlock
elif is_extension_array_dtype(values.dtype):
elif isinstance(dtype, ExtensionDtype):
cls = ExtensionBlock
elif issubclass(vtype, np.floating):
cls = FloatBlock
elif issubclass(vtype, np.timedelta64):
assert issubclass(vtype, np.integer)

elif kind == "M":
cls = DatetimeBlock
elif kind == "m":
cls = TimeDeltaBlock
elif issubclass(vtype, np.complexfloating):
elif kind == "f":
cls = FloatBlock
elif kind == "c":
cls = ComplexBlock
elif issubclass(vtype, np.integer):
elif kind == "i" or kind == "u":
cls = IntBlock
elif dtype == np.bool_:
elif kind == "b":
cls = BoolBlock
else:
cls = ObjectBlock
Expand Down
11 changes: 9 additions & 2 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,21 @@ def concatenate_block_managers(
vals = [ju.block.values for ju in join_units]

if not blk.is_extension:
values = concat_compat(vals, axis=blk.ndim - 1)
# _is_uniform_join_units ensures a single dtype, so
# we can use np.concatenate, which is more performant
# than concat_compat
values = np.concatenate(vals, axis=blk.ndim - 1)
else:
# TODO(EA2D): special-casing not needed with 2D EAs
values = concat_compat(vals)
if not isinstance(values, ExtensionArray):
values = values.reshape(1, len(values))

b = make_block(values, placement=placement, ndim=blk.ndim)
if blk.values.dtype == values.dtype:
# Fast-path
b = blk.make_block_same_class(values, placement=placement)
else:
b = make_block(values, placement=placement, ndim=blk.ndim)
else:
b = make_block(
_concatenate_join_units(join_units, concat_axis, copy=copy),
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np

from pandas._typing import FrameOrSeries, FrameOrSeriesUnion, Label
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
Expand Down Expand Up @@ -349,7 +350,7 @@ def __init__(
if len(objs) == 0:
raise ValueError("All objects passed were None")

# consolidate data & figure out what our result ndim is going to be
# figure out what our result ndim is going to be
ndims = set()
for obj in objs:
if not isinstance(obj, (ABCSeries, ABCDataFrame)):
Expand All @@ -359,8 +360,6 @@ def __init__(
)
raise TypeError(msg)

# consolidate
obj._consolidate_inplace()
ndims.add(obj.ndim)

# get the sample
Expand Down Expand Up @@ -518,7 +517,7 @@ def _get_result_dim(self) -> int:
def _get_new_axes(self) -> List[Index]:
ndim = self._get_result_dim()
return [
self._get_concat_axis() if i == self.bm_axis else self._get_comb_axis(i)
self._get_concat_axis if i == self.bm_axis else self._get_comb_axis(i)
for i in range(ndim)
]

Expand All @@ -532,6 +531,7 @@ def _get_comb_axis(self, i: int) -> Index:
copy=self.copy,
)

@cache_readonly
def _get_concat_axis(self) -> Index:
Comment on lines +559 to 560
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it is property, then maybe _concat_axis, without get would be better?

"""
Return index to be used along concatenation axis.
Expand Down