Skip to content

Commit c570c51

Browse files
jbrockmendelquintusdias
authored andcommitted
REF: de-privatize dtypes.concat functions (pandas-dev#27499)
* de-privatize _concat_compat * de-privatize _concat_categorical * de-privatize _concat_datetime * move private functions to the one place they are used
1 parent ea2b810 commit c570c51

File tree

11 files changed

+71
-71
lines changed

11 files changed

+71
-71
lines changed

pandas/core/arrays/categorical.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2480,9 +2480,9 @@ def _can_hold_na(self):
24802480

24812481
@classmethod
24822482
def _concat_same_type(self, to_concat):
2483-
from pandas.core.dtypes.concat import _concat_categorical
2483+
from pandas.core.dtypes.concat import concat_categorical
24842484

2485-
return _concat_categorical(to_concat)
2485+
return concat_categorical(to_concat)
24862486

24872487
def isin(self, values):
24882488
"""

pandas/core/dtypes/concat.py

+7-42
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
ABCIndexClass,
2626
ABCPeriodIndex,
2727
ABCRangeIndex,
28-
ABCSparseDataFrame,
2928
ABCTimedeltaIndex,
3029
)
3130

@@ -71,41 +70,7 @@ def get_dtype_kinds(l):
7170
return typs
7271

7372

74-
def _get_series_result_type(result, objs=None):
75-
"""
76-
return appropriate class of Series concat
77-
input is either dict or array-like
78-
"""
79-
from pandas import SparseSeries, SparseDataFrame, DataFrame
80-
81-
# concat Series with axis 1
82-
if isinstance(result, dict):
83-
# concat Series with axis 1
84-
if all(isinstance(c, (SparseSeries, SparseDataFrame)) for c in result.values()):
85-
return SparseDataFrame
86-
else:
87-
return DataFrame
88-
89-
# otherwise it is a SingleBlockManager (axis = 0)
90-
return objs[0]._constructor
91-
92-
93-
def _get_frame_result_type(result, objs):
94-
"""
95-
return appropriate class of DataFrame-like concat
96-
if all blocks are sparse, return SparseDataFrame
97-
otherwise, return 1st obj
98-
"""
99-
100-
if result.blocks and (any(isinstance(obj, ABCSparseDataFrame) for obj in objs)):
101-
from pandas.core.sparse.api import SparseDataFrame
102-
103-
return SparseDataFrame
104-
else:
105-
return next(obj for obj in objs if not isinstance(obj, ABCSparseDataFrame))
106-
107-
108-
def _concat_compat(to_concat, axis=0):
73+
def concat_compat(to_concat, axis=0):
10974
"""
11075
provide concatenation of an array of arrays each of which is a single
11176
'normalized' dtypes (in that for example, if it's object, then it is a
@@ -142,12 +107,12 @@ def is_nonempty(x):
142107
_contains_period = any(typ.startswith("period") for typ in typs)
143108

144109
if "category" in typs:
145-
# this must be prior to _concat_datetime,
110+
# this must be prior to concat_datetime,
146111
# to support Categorical + datetime-like
147-
return _concat_categorical(to_concat, axis=axis)
112+
return concat_categorical(to_concat, axis=axis)
148113

149114
elif _contains_datetime or "timedelta" in typs or _contains_period:
150-
return _concat_datetime(to_concat, axis=axis, typs=typs)
115+
return concat_datetime(to_concat, axis=axis, typs=typs)
151116

152117
# these are mandated to handle empties as well
153118
elif "sparse" in typs:
@@ -174,7 +139,7 @@ def is_nonempty(x):
174139
return np.concatenate(to_concat, axis=axis)
175140

176141

177-
def _concat_categorical(to_concat, axis=0):
142+
def concat_categorical(to_concat, axis=0):
178143
"""Concatenate an object/categorical array of arrays, each of which is a
179144
single dtype
180145
@@ -214,7 +179,7 @@ def _concat_categorical(to_concat, axis=0):
214179
else np.asarray(x.astype(object))
215180
for x in to_concat
216181
]
217-
result = _concat_compat(to_concat)
182+
result = concat_compat(to_concat)
218183
if axis == 1:
219184
result = result.reshape(1, len(result))
220185
return result
@@ -400,7 +365,7 @@ def _concatenate_2d(to_concat, axis):
400365
return np.concatenate(to_concat, axis=axis)
401366

402367

403-
def _concat_datetime(to_concat, axis=0, typs=None):
368+
def concat_datetime(to_concat, axis=0, typs=None):
404369
"""
405370
provide concatenation of an datetimelike array of arrays each of which is a
406371
single M8[ns], datetimet64[ns, tz] or m8[ns] dtype

pandas/core/indexes/base.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from pandas.compat.numpy import function as nv
1616
from pandas.util._decorators import Appender, Substitution, cache_readonly
1717

18+
from pandas.core.dtypes import concat as _concat
1819
from pandas.core.dtypes.cast import maybe_cast_to_integer_array
1920
from pandas.core.dtypes.common import (
2021
ensure_categorical,
@@ -45,7 +46,7 @@
4546
is_unsigned_integer_dtype,
4647
pandas_dtype,
4748
)
48-
import pandas.core.dtypes.concat as _concat
49+
from pandas.core.dtypes.concat import concat_compat
4950
from pandas.core.dtypes.generic import (
5051
ABCDataFrame,
5152
ABCDateOffset,
@@ -2540,7 +2541,7 @@ def _union(self, other, sort):
25402541

25412542
if len(indexer) > 0:
25422543
other_diff = algos.take_nd(rvals, indexer, allow_fill=False)
2543-
result = _concat._concat_compat((lvals, other_diff))
2544+
result = concat_compat((lvals, other_diff))
25442545

25452546
else:
25462547
result = lvals
@@ -2786,7 +2787,7 @@ def symmetric_difference(self, other, result_name=None, sort=None):
27862787
right_indexer = (indexer == -1).nonzero()[0]
27872788
right_diff = other.values.take(right_indexer)
27882789

2789-
the_diff = _concat._concat_compat([left_diff, right_diff])
2790+
the_diff = concat_compat([left_diff, right_diff])
27902791
if sort is None:
27912792
try:
27922793
the_diff = sorting.safe_sort(the_diff)

pandas/core/indexes/datetimes.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
is_scalar,
1919
is_string_like,
2020
)
21-
import pandas.core.dtypes.concat as _concat
21+
from pandas.core.dtypes.concat import concat_compat
2222
from pandas.core.dtypes.dtypes import DatetimeTZDtype
2323
from pandas.core.dtypes.missing import isna
2424

@@ -608,7 +608,7 @@ def _fast_union(self, other, sort=None):
608608
left_start = left[0]
609609
loc = right.searchsorted(left_start, side="left")
610610
right_chunk = right.values[:loc]
611-
dates = _concat._concat_compat((left.values, right_chunk))
611+
dates = concat_compat((left.values, right_chunk))
612612
return self._shallow_copy(dates)
613613
# DTIs are not in the "correct" order and we want
614614
# to sort
@@ -624,7 +624,7 @@ def _fast_union(self, other, sort=None):
624624
if left_end < right_end:
625625
loc = right.searchsorted(left_end, side="right")
626626
right_chunk = right.values[loc:]
627-
dates = _concat._concat_compat((left.values, right_chunk))
627+
dates = concat_compat((left.values, right_chunk))
628628
return self._shallow_copy(dates)
629629
else:
630630
return left

pandas/core/indexes/timedeltas.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
is_timedelta64_ns_dtype,
1919
pandas_dtype,
2020
)
21-
import pandas.core.dtypes.concat as _concat
21+
from pandas.core.dtypes.concat import concat_compat
2222
from pandas.core.dtypes.missing import isna
2323

2424
from pandas.core.accessor import delegate_names
@@ -462,7 +462,7 @@ def _fast_union(self, other):
462462
if left_end < right_end:
463463
loc = right.searchsorted(left_end, side="right")
464464
right_chunk = right.values[loc:]
465-
dates = _concat._concat_compat((left.values, right_chunk))
465+
dates = concat_compat((left.values, right_chunk))
466466
return self._shallow_copy(dates)
467467
else:
468468
return left

pandas/core/indexing.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
is_sequence,
2222
is_sparse,
2323
)
24-
from pandas.core.dtypes.concat import _concat_compat
24+
from pandas.core.dtypes.concat import concat_compat
2525
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
2626
from pandas.core.dtypes.missing import _infer_fill_value, isna
2727

@@ -607,7 +607,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
607607
if len(self.obj._values):
608608
# GH#22717 handle casting compatibility that np.concatenate
609609
# does incorrectly
610-
new_values = _concat_compat([self.obj._values, new_values])
610+
new_values = concat_compat([self.obj._values, new_values])
611611
self.obj._data = self.obj._constructor(
612612
new_values, index=new_index, name=self.obj.name
613613
)._data

pandas/core/internals/blocks.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
is_timedelta64_dtype,
5050
pandas_dtype,
5151
)
52-
import pandas.core.dtypes.concat as _concat
52+
from pandas.core.dtypes.concat import concat_categorical, concat_datetime
5353
from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype
5454
from pandas.core.dtypes.generic import (
5555
ABCDataFrame,
@@ -2563,7 +2563,7 @@ def concat_same_type(self, to_concat, placement=None):
25632563
# Instead of placing the condition here, it could also go into the
25642564
# is_uniform_join_units check, but I'm not sure what is better.
25652565
if len({x.dtype for x in to_concat}) > 1:
2566-
values = _concat._concat_datetime([x.values for x in to_concat])
2566+
values = concat_datetime([x.values for x in to_concat])
25672567
placement = placement or slice(0, len(values), 1)
25682568

25692569
if self.ndim > 1:
@@ -3082,7 +3082,7 @@ class CategoricalBlock(ExtensionBlock):
30823082
is_categorical = True
30833083
_verify_integrity = True
30843084
_can_hold_na = True
3085-
_concatenator = staticmethod(_concat._concat_categorical)
3085+
_concatenator = staticmethod(concat_categorical)
30863086

30873087
def __init__(self, values, placement, ndim=None):
30883088
from pandas.core.arrays.categorical import _maybe_to_categorical

pandas/core/internals/concat.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
is_sparse,
2020
is_timedelta64_dtype,
2121
)
22-
import pandas.core.dtypes.concat as _concat
22+
from pandas.core.dtypes.concat import concat_compat
2323
from pandas.core.dtypes.missing import isna
2424

2525
import pandas.core.algorithms as algos
@@ -211,7 +211,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
211211

212212
if not self.indexers:
213213
if not self.block._can_consolidate:
214-
# preserve these for validation in _concat_compat
214+
# preserve these for validation in concat_compat
215215
return self.block.values
216216

217217
if self.block.is_bool and not self.block.is_categorical:
@@ -265,7 +265,7 @@ def concatenate_join_units(join_units, concat_axis, copy):
265265
else:
266266
concat_values = concat_values.copy()
267267
else:
268-
concat_values = _concat._concat_compat(to_concat, axis=concat_axis)
268+
concat_values = concat_compat(to_concat, axis=concat_axis)
269269

270270
return concat_values
271271

@@ -380,7 +380,7 @@ def is_uniform_join_units(join_units):
380380
"""
381381
Check if the join units consist of blocks of uniform type that can
382382
be concatenated using Block.concat_same_type instead of the generic
383-
concatenate_join_units (which uses `_concat._concat_compat`).
383+
concatenate_join_units (which uses `concat_compat`).
384384
385385
"""
386386
return (

pandas/core/internals/managers.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
is_scalar,
2727
is_sparse,
2828
)
29-
import pandas.core.dtypes.concat as _concat
29+
from pandas.core.dtypes.concat import concat_compat
3030
from pandas.core.dtypes.dtypes import ExtensionDtype
3131
from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries
3232
from pandas.core.dtypes.missing import isna
@@ -532,7 +532,7 @@ def get_axe(block, qs, axes):
532532
return self.__class__(blocks, new_axes)
533533

534534
# single block, i.e. ndim == {1}
535-
values = _concat._concat_compat([b.values for b in blocks])
535+
values = concat_compat([b.values for b in blocks])
536536

537537
# compute the orderings of our original data
538538
if len(self.blocks) > 1:
@@ -1647,11 +1647,11 @@ def concat(self, to_concat, new_axis):
16471647
new_block = blocks[0].concat_same_type(blocks)
16481648
else:
16491649
values = [x.values for x in blocks]
1650-
values = _concat._concat_compat(values)
1650+
values = concat_compat(values)
16511651
new_block = make_block(values, placement=slice(0, len(values), 1))
16521652
else:
16531653
values = [x._block.values for x in to_concat]
1654-
values = _concat._concat_compat(values)
1654+
values = concat_compat(values)
16551655
new_block = make_block(values, placement=slice(0, len(values), 1))
16561656

16571657
mgr = SingleBlockManager(new_block, new_axis)

pandas/core/reshape/concat.py

+38-4
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import numpy as np
88

9-
import pandas.core.dtypes.concat as _concat
9+
from pandas.core.dtypes.generic import ABCSparseDataFrame
1010

1111
from pandas import DataFrame, Index, MultiIndex, Series
1212
from pandas.core import common as com
@@ -439,13 +439,13 @@ def get_result(self):
439439
mgr = self.objs[0]._data.concat(
440440
[x._data for x in self.objs], self.new_axes
441441
)
442-
cons = _concat._get_series_result_type(mgr, self.objs)
442+
cons = _get_series_result_type(mgr, self.objs)
443443
return cons(mgr, name=name).__finalize__(self, method="concat")
444444

445445
# combine as columns in a frame
446446
else:
447447
data = dict(zip(range(len(self.objs)), self.objs))
448-
cons = _concat._get_series_result_type(data)
448+
cons = _get_series_result_type(data)
449449

450450
index, columns = self.new_axes
451451
df = cons(data, index=index)
@@ -475,7 +475,7 @@ def get_result(self):
475475
if not self.copy:
476476
new_data._consolidate_inplace()
477477

478-
cons = _concat._get_frame_result_type(new_data, self.objs)
478+
cons = _get_frame_result_type(new_data, self.objs)
479479
return cons._from_axes(new_data, self.new_axes).__finalize__(
480480
self, method="concat"
481481
)
@@ -708,3 +708,37 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
708708
return MultiIndex(
709709
levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
710710
)
711+
712+
713+
def _get_series_result_type(result, objs=None):
714+
"""
715+
return appropriate class of Series concat
716+
input is either dict or array-like
717+
"""
718+
from pandas import SparseSeries, SparseDataFrame, DataFrame
719+
720+
# concat Series with axis 1
721+
if isinstance(result, dict):
722+
# concat Series with axis 1
723+
if all(isinstance(c, (SparseSeries, SparseDataFrame)) for c in result.values()):
724+
return SparseDataFrame
725+
else:
726+
return DataFrame
727+
728+
# otherwise it is a SingleBlockManager (axis = 0)
729+
return objs[0]._constructor
730+
731+
732+
def _get_frame_result_type(result, objs):
733+
"""
734+
return appropriate class of DataFrame-like concat
735+
if all blocks are sparse, return SparseDataFrame
736+
otherwise, return 1st obj
737+
"""
738+
739+
if result.blocks and (any(isinstance(obj, ABCSparseDataFrame) for obj in objs)):
740+
from pandas.core.sparse.api import SparseDataFrame
741+
742+
return SparseDataFrame
743+
else:
744+
return next(obj for obj in objs if not isinstance(obj, ABCSparseDataFrame))

pandas/core/reshape/melt.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,9 @@ def lreshape(data, groups, dropna=True, label=None):
171171
for target, names in zip(keys, values):
172172
to_concat = [data[col].values for col in names]
173173

174-
import pandas.core.dtypes.concat as _concat
174+
from pandas.core.dtypes.concat import concat_compat
175175

176-
mdata[target] = _concat._concat_compat(to_concat)
176+
mdata[target] = concat_compat(to_concat)
177177
pivot_cols.append(target)
178178

179179
for col in id_cols:

0 commit comments

Comments
 (0)