diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index df5cd12a479f0..7337d3a7c8f0b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2481,9 +2481,9 @@ def _can_hold_na(self): @classmethod def _concat_same_type(self, to_concat): - from pandas.core.dtypes.concat import _concat_categorical + from pandas.core.dtypes.concat import concat_categorical - return _concat_categorical(to_concat) + return concat_categorical(to_concat) def isin(self, values): """ diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index ac74ad5726a99..b37a0b6fae674 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -25,7 +25,6 @@ ABCIndexClass, ABCPeriodIndex, ABCRangeIndex, - ABCSparseDataFrame, ABCTimedeltaIndex, ) @@ -71,41 +70,7 @@ def get_dtype_kinds(l): return typs -def _get_series_result_type(result, objs=None): - """ - return appropriate class of Series concat - input is either dict or array-like - """ - from pandas import SparseSeries, SparseDataFrame, DataFrame - - # concat Series with axis 1 - if isinstance(result, dict): - # concat Series with axis 1 - if all(isinstance(c, (SparseSeries, SparseDataFrame)) for c in result.values()): - return SparseDataFrame - else: - return DataFrame - - # otherwise it is a SingleBlockManager (axis = 0) - return objs[0]._constructor - - -def _get_frame_result_type(result, objs): - """ - return appropriate class of DataFrame-like concat - if all blocks are sparse, return SparseDataFrame - otherwise, return 1st obj - """ - - if result.blocks and (any(isinstance(obj, ABCSparseDataFrame) for obj in objs)): - from pandas.core.sparse.api import SparseDataFrame - - return SparseDataFrame - else: - return next(obj for obj in objs if not isinstance(obj, ABCSparseDataFrame)) - - -def _concat_compat(to_concat, axis=0): +def concat_compat(to_concat, axis=0): """ provide concatenation of an array of arrays each of which is a single 'normalized' dtypes (in that for example, if it's object, then it is a @@ -142,12 +107,12 @@ def is_nonempty(x): _contains_period = any(typ.startswith("period") for typ in typs) if "category" in typs: - # this must be prior to _concat_datetime, + # this must be prior to concat_datetime, # to support Categorical + datetime-like - return _concat_categorical(to_concat, axis=axis) + return concat_categorical(to_concat, axis=axis) elif _contains_datetime or "timedelta" in typs or _contains_period: - return _concat_datetime(to_concat, axis=axis, typs=typs) + return concat_datetime(to_concat, axis=axis, typs=typs) # these are mandated to handle empties as well elif "sparse" in typs: @@ -174,7 +139,7 @@ def is_nonempty(x): return np.concatenate(to_concat, axis=axis) -def _concat_categorical(to_concat, axis=0): +def concat_categorical(to_concat, axis=0): """Concatenate an object/categorical array of arrays, each of which is a single dtype @@ -214,7 +179,7 @@ def _concat_categorical(to_concat, axis=0): else np.asarray(x.astype(object)) for x in to_concat ] - result = _concat_compat(to_concat) + result = concat_compat(to_concat) if axis == 1: result = result.reshape(1, len(result)) return result @@ -404,7 +369,7 @@ def _concatenate_2d(to_concat, axis): return np.concatenate(to_concat, axis=axis) -def _concat_datetime(to_concat, axis=0, typs=None): +def concat_datetime(to_concat, axis=0, typs=None): """ provide concatenation of an datetimelike array of arrays each of which is a single M8[ns], datetimet64[ns, tz] or m8[ns] dtype diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index bf94bf7ab6489..cded70813b87d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -15,6 +15,7 @@ from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, cache_readonly +from pandas.core.dtypes import concat as _concat from pandas.core.dtypes.cast import maybe_cast_to_integer_array from pandas.core.dtypes.common import ( ensure_categorical, @@ -45,7 +46,7 @@ is_unsigned_integer_dtype, pandas_dtype, ) -import pandas.core.dtypes.concat as _concat +from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ( ABCDataFrame, ABCDateOffset, @@ -2542,7 +2543,7 @@ def _union(self, other, sort): if len(indexer) > 0: other_diff = algos.take_nd(rvals, indexer, allow_fill=False) - result = _concat._concat_compat((lvals, other_diff)) + result = concat_compat((lvals, other_diff)) else: result = lvals @@ -2788,7 +2789,7 @@ def symmetric_difference(self, other, result_name=None, sort=None): right_indexer = (indexer == -1).nonzero()[0] right_diff = other.values.take(right_indexer) - the_diff = _concat._concat_compat([left_diff, right_diff]) + the_diff = concat_compat([left_diff, right_diff]) if sort is None: try: the_diff = sorting.safe_sort(the_diff) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 5024eebe03bb4..e9296eea2b8a3 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -18,7 +18,7 @@ is_scalar, is_string_like, ) -import pandas.core.dtypes.concat as _concat +from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna @@ -608,7 +608,7 @@ def _fast_union(self, other, sort=None): left_start = left[0] loc = right.searchsorted(left_start, side="left") right_chunk = right.values[:loc] - dates = _concat._concat_compat((left.values, right_chunk)) + dates = concat_compat((left.values, right_chunk)) return self._shallow_copy(dates) # DTIs are not in the "correct" order and we want # to sort @@ -624,7 +624,7 @@ def _fast_union(self, other, sort=None): if left_end < right_end: loc = right.searchsorted(left_end, side="right") right_chunk = right.values[loc:] - dates = _concat._concat_compat((left.values, right_chunk)) + dates = concat_compat((left.values, right_chunk)) return self._shallow_copy(dates) else: return left diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 19d0d2341dac1..1c2a8c4f0c10c 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -18,7 +18,7 @@ is_timedelta64_ns_dtype, pandas_dtype, ) -import pandas.core.dtypes.concat as _concat +from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.missing import isna from pandas.core.accessor import delegate_names @@ -462,7 +462,7 @@ def _fast_union(self, other): if left_end < right_end: loc = right.searchsorted(left_end, side="right") right_chunk = right.values[loc:] - dates = _concat._concat_compat((left.values, right_chunk)) + dates = concat_compat((left.values, right_chunk)) return self._shallow_copy(dates) else: return left diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 5aee37bc3b833..fb6974110d80b 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -21,7 +21,7 @@ is_sequence, is_sparse, ) -from pandas.core.dtypes.concat import _concat_compat +from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries from pandas.core.dtypes.missing import _infer_fill_value, isna @@ -607,7 +607,7 @@ def _setitem_with_indexer_missing(self, indexer, value): if len(self.obj._values): # GH#22717 handle casting compatibility that np.concatenate # does incorrectly - new_values = _concat_compat([self.obj._values, new_values]) + new_values = concat_compat([self.obj._values, new_values]) self.obj._data = self.obj._constructor( new_values, index=new_index, name=self.obj.name )._data diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ace57938f948c..16c93e6267da2 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -49,7 +49,7 @@ is_timedelta64_dtype, pandas_dtype, ) -import pandas.core.dtypes.concat as _concat +from pandas.core.dtypes.concat import concat_categorical, concat_datetime from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype from pandas.core.dtypes.generic import ( ABCDataFrame, @@ -2563,7 +2563,7 @@ def concat_same_type(self, to_concat, placement=None): # Instead of placing the condition here, it could also go into the # is_uniform_join_units check, but I'm not sure what is better. if len({x.dtype for x in to_concat}) > 1: - values = _concat._concat_datetime([x.values for x in to_concat]) + values = concat_datetime([x.values for x in to_concat]) placement = placement or slice(0, len(values), 1) if self.ndim > 1: @@ -3088,7 +3088,7 @@ class CategoricalBlock(ExtensionBlock): is_categorical = True _verify_integrity = True _can_hold_na = True - _concatenator = staticmethod(_concat._concat_categorical) + _concatenator = staticmethod(concat_categorical) def __init__(self, values, placement, ndim=None): from pandas.core.arrays.categorical import _maybe_to_categorical diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 9ccd4b80869a0..121c61d8d3623 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -19,7 +19,7 @@ is_sparse, is_timedelta64_dtype, ) -import pandas.core.dtypes.concat as _concat +from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algos @@ -211,7 +211,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): if not self.indexers: if not self.block._can_consolidate: - # preserve these for validation in _concat_compat + # preserve these for validation in concat_compat return self.block.values if self.block.is_bool and not self.block.is_categorical: @@ -265,7 +265,7 @@ def concatenate_join_units(join_units, concat_axis, copy): else: concat_values = concat_values.copy() else: - concat_values = _concat._concat_compat(to_concat, axis=concat_axis) + concat_values = concat_compat(to_concat, axis=concat_axis) return concat_values @@ -380,7 +380,7 @@ def is_uniform_join_units(join_units): """ Check if the join units consist of blocks of uniform type that can be concatenated using Block.concat_same_type instead of the generic - concatenate_join_units (which uses `_concat._concat_compat`). + concatenate_join_units (which uses `concat_compat`). """ return ( diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 394c0773409f2..344d41ed26943 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -26,7 +26,7 @@ is_scalar, is_sparse, ) -import pandas.core.dtypes.concat as _concat +from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries from pandas.core.dtypes.missing import isna @@ -532,7 +532,7 @@ def get_axe(block, qs, axes): return self.__class__(blocks, new_axes) # single block, i.e. ndim == {1} - values = _concat._concat_compat([b.values for b in blocks]) + values = concat_compat([b.values for b in blocks]) # compute the orderings of our original data if len(self.blocks) > 1: @@ -1647,11 +1647,11 @@ def concat(self, to_concat, new_axis): new_block = blocks[0].concat_same_type(blocks) else: values = [x.values for x in blocks] - values = _concat._concat_compat(values) + values = concat_compat(values) new_block = make_block(values, placement=slice(0, len(values), 1)) else: values = [x._block.values for x in to_concat] - values = _concat._concat_compat(values) + values = concat_compat(values) new_block = make_block(values, placement=slice(0, len(values), 1)) mgr = SingleBlockManager(new_block, new_axis) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 5a476dceca1f3..ca4175e4a474a 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -6,7 +6,7 @@ import numpy as np -import pandas.core.dtypes.concat as _concat +from pandas.core.dtypes.generic import ABCSparseDataFrame from pandas import DataFrame, Index, MultiIndex, Series from pandas.core import common as com @@ -439,13 +439,13 @@ def get_result(self): mgr = self.objs[0]._data.concat( [x._data for x in self.objs], self.new_axes ) - cons = _concat._get_series_result_type(mgr, self.objs) + cons = _get_series_result_type(mgr, self.objs) return cons(mgr, name=name).__finalize__(self, method="concat") # combine as columns in a frame else: data = dict(zip(range(len(self.objs)), self.objs)) - cons = _concat._get_series_result_type(data) + cons = _get_series_result_type(data) index, columns = self.new_axes df = cons(data, index=index) @@ -475,7 +475,7 @@ def get_result(self): if not self.copy: new_data._consolidate_inplace() - cons = _concat._get_frame_result_type(new_data, self.objs) + cons = _get_frame_result_type(new_data, self.objs) return cons._from_axes(new_data, self.new_axes).__finalize__( self, method="concat" ) @@ -708,3 +708,37 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): return MultiIndex( levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False ) + + +def _get_series_result_type(result, objs=None): + """ + return appropriate class of Series concat + input is either dict or array-like + """ + from pandas import SparseSeries, SparseDataFrame, DataFrame + + # concat Series with axis 1 + if isinstance(result, dict): + # concat Series with axis 1 + if all(isinstance(c, (SparseSeries, SparseDataFrame)) for c in result.values()): + return SparseDataFrame + else: + return DataFrame + + # otherwise it is a SingleBlockManager (axis = 0) + return objs[0]._constructor + + +def _get_frame_result_type(result, objs): + """ + return appropriate class of DataFrame-like concat + if all blocks are sparse, return SparseDataFrame + otherwise, return 1st obj + """ + + if result.blocks and (any(isinstance(obj, ABCSparseDataFrame) for obj in objs)): + from pandas.core.sparse.api import SparseDataFrame + + return SparseDataFrame + else: + return next(obj for obj in objs if not isinstance(obj, ABCSparseDataFrame)) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 9a69942a70e01..187a1913c3e15 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -171,9 +171,9 @@ def lreshape(data, groups, dropna=True, label=None): for target, names in zip(keys, values): to_concat = [data[col].values for col in names] - import pandas.core.dtypes.concat as _concat + from pandas.core.dtypes.concat import concat_compat - mdata[target] = _concat._concat_compat(to_concat) + mdata[target] = concat_compat(to_concat) pivot_cols.append(target) for col in id_cols: