Skip to content

Commit 4c76505

Browse files
jbrockmendeljreback
authored andcommitted
REF: define concat classmethods in the appropriate places (#27727)
1 parent 9b1c005 commit 4c76505

File tree

5 files changed

+68
-86
lines changed

5 files changed

+68
-86
lines changed

pandas/core/dtypes/concat.py

+4-79
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,11 @@
2020
is_timedelta64_dtype,
2121
)
2222
from pandas.core.dtypes.generic import (
23+
ABCCategoricalIndex,
2324
ABCDatetimeArray,
24-
ABCDatetimeIndex,
2525
ABCIndexClass,
26-
ABCPeriodIndex,
2726
ABCRangeIndex,
28-
ABCTimedeltaIndex,
27+
ABCSeries,
2928
)
3029

3130

@@ -285,14 +284,14 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False):
285284
[b, c, a, b]
286285
Categories (3, object): [b, c, a]
287286
"""
288-
from pandas import Index, Categorical, CategoricalIndex, Series
287+
from pandas import Index, Categorical
289288
from pandas.core.arrays.categorical import _recode_for_categories
290289

291290
if len(to_union) == 0:
292291
raise ValueError("No Categoricals to union")
293292

294293
def _maybe_unwrap(x):
295-
if isinstance(x, (CategoricalIndex, Series)):
294+
if isinstance(x, (ABCCategoricalIndex, ABCSeries)):
296295
return x.values
297296
elif isinstance(x, Categorical):
298297
return x
@@ -450,31 +449,6 @@ def _concat_datetimetz(to_concat, name=None):
450449
return sample._concat_same_type(to_concat)
451450

452451

453-
def _concat_index_same_dtype(indexes, klass=None):
454-
klass = klass if klass is not None else indexes[0].__class__
455-
return klass(np.concatenate([x._values for x in indexes]))
456-
457-
458-
def _concat_index_asobject(to_concat, name=None):
459-
"""
460-
concat all inputs as object. DatetimeIndex, TimedeltaIndex and
461-
PeriodIndex are converted to object dtype before concatenation
462-
"""
463-
from pandas import Index
464-
from pandas.core.arrays import ExtensionArray
465-
466-
klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex, ExtensionArray)
467-
to_concat = [x.astype(object) if isinstance(x, klasses) else x for x in to_concat]
468-
469-
self = to_concat[0]
470-
attribs = self._get_attributes_dict()
471-
attribs["name"] = name
472-
473-
to_concat = [x._values if isinstance(x, Index) else x for x in to_concat]
474-
475-
return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs)
476-
477-
478452
def _concat_sparse(to_concat, axis=0, typs=None):
479453
"""
480454
provide concatenation of an sparse/dense array of arrays each of which is a
@@ -505,52 +479,3 @@ def _concat_sparse(to_concat, axis=0, typs=None):
505479
]
506480

507481
return SparseArray._concat_same_type(to_concat)
508-
509-
510-
def _concat_rangeindex_same_dtype(indexes):
511-
"""
512-
Concatenates multiple RangeIndex instances. All members of "indexes" must
513-
be of type RangeIndex; result will be RangeIndex if possible, Int64Index
514-
otherwise. E.g.:
515-
indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6)
516-
indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5])
517-
"""
518-
from pandas import Int64Index, RangeIndex
519-
520-
start = step = next_ = None
521-
522-
# Filter the empty indexes
523-
non_empty_indexes = [obj for obj in indexes if len(obj)]
524-
525-
for obj in non_empty_indexes:
526-
rng = obj._range # type: range
527-
528-
if start is None:
529-
# This is set by the first non-empty index
530-
start = rng.start
531-
if step is None and len(rng) > 1:
532-
step = rng.step
533-
elif step is None:
534-
# First non-empty index had only one element
535-
if rng.start == start:
536-
return _concat_index_same_dtype(indexes, klass=Int64Index)
537-
step = rng.start - start
538-
539-
non_consecutive = (step != rng.step and len(rng) > 1) or (
540-
next_ is not None and rng.start != next_
541-
)
542-
if non_consecutive:
543-
return _concat_index_same_dtype(indexes, klass=Int64Index)
544-
545-
if step is not None:
546-
next_ = rng[-1] + step
547-
548-
if non_empty_indexes:
549-
# Get the stop value from "next" or alternatively
550-
# from the last non-empty index
551-
stop = non_empty_indexes[-1].stop if next_ is None else next_
552-
return RangeIndex(start, stop, step)
553-
554-
# Here all "indexes" had 0 length, i.e. were empty.
555-
# In this case return an empty range index.
556-
return RangeIndex(0, 0)

pandas/core/indexes/base.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
ABCDataFrame,
5252
ABCDateOffset,
5353
ABCDatetimeArray,
54+
ABCDatetimeIndex,
5455
ABCIndexClass,
5556
ABCMultiIndex,
5657
ABCPandasArray,
@@ -4312,14 +4313,25 @@ def _concat(self, to_concat, name):
43124313

43134314
if len(typs) == 1:
43144315
return self._concat_same_dtype(to_concat, name=name)
4315-
return _concat._concat_index_asobject(to_concat, name=name)
4316+
return Index._concat_same_dtype(self, to_concat, name=name)
43164317

43174318
def _concat_same_dtype(self, to_concat, name):
43184319
"""
43194320
Concatenate to_concat which has the same class.
43204321
"""
43214322
# must be overridden in specific classes
4322-
return _concat._concat_index_asobject(to_concat, name)
4323+
klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex, ExtensionArray)
4324+
to_concat = [
4325+
x.astype(object) if isinstance(x, klasses) else x for x in to_concat
4326+
]
4327+
4328+
self = to_concat[0]
4329+
attribs = self._get_attributes_dict()
4330+
attribs["name"] = name
4331+
4332+
to_concat = [x._values if isinstance(x, Index) else x for x in to_concat]
4333+
4334+
return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs)
43234335

43244336
def putmask(self, mask, value):
43254337
"""

pandas/core/indexes/numeric.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
needs_i8_conversion,
1818
pandas_dtype,
1919
)
20-
import pandas.core.dtypes.concat as _concat
2120
from pandas.core.dtypes.generic import (
2221
ABCFloat64Index,
2322
ABCInt64Index,
@@ -129,7 +128,8 @@ def _assert_safe_casting(cls, data, subarr):
129128
pass
130129

131130
def _concat_same_dtype(self, indexes, name):
132-
return _concat._concat_index_same_dtype(indexes).rename(name)
131+
result = type(indexes[0])(np.concatenate([x._values for x in indexes]))
132+
return result.rename(name)
133133

134134
@property
135135
def is_all_dates(self):

pandas/core/indexes/range.py

+47-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from pandas.compat.numpy import function as nv
1212
from pandas.util._decorators import Appender, cache_readonly
1313

14-
from pandas.core.dtypes import concat as _concat
1514
from pandas.core.dtypes.common import (
1615
ensure_platform_int,
1716
ensure_python_int,
@@ -647,7 +646,53 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False)
647646
return super().join(other, how, level, return_indexers, sort)
648647

649648
def _concat_same_dtype(self, indexes, name):
650-
return _concat._concat_rangeindex_same_dtype(indexes).rename(name)
649+
"""
650+
Concatenates multiple RangeIndex instances. All members of "indexes" must
651+
be of type RangeIndex; result will be RangeIndex if possible, Int64Index
652+
otherwise. E.g.:
653+
indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6)
654+
indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5])
655+
"""
656+
start = step = next_ = None
657+
658+
# Filter the empty indexes
659+
non_empty_indexes = [obj for obj in indexes if len(obj)]
660+
661+
for obj in non_empty_indexes:
662+
rng = obj._range # type: range
663+
664+
if start is None:
665+
# This is set by the first non-empty index
666+
start = rng.start
667+
if step is None and len(rng) > 1:
668+
step = rng.step
669+
elif step is None:
670+
# First non-empty index had only one element
671+
if rng.start == start:
672+
result = Int64Index(np.concatenate([x._values for x in indexes]))
673+
return result.rename(name)
674+
675+
step = rng.start - start
676+
677+
non_consecutive = (step != rng.step and len(rng) > 1) or (
678+
next_ is not None and rng.start != next_
679+
)
680+
if non_consecutive:
681+
result = Int64Index(np.concatenate([x._values for x in indexes]))
682+
return result.rename(name)
683+
684+
if step is not None:
685+
next_ = rng[-1] + step
686+
687+
if non_empty_indexes:
688+
# Get the stop value from "next" or alternatively
689+
# from the last non-empty index
690+
stop = non_empty_indexes[-1].stop if next_ is None else next_
691+
return RangeIndex(start, stop, step).rename(name)
692+
693+
# Here all "indexes" had 0 length, i.e. were empty.
694+
# In this case return an empty range index.
695+
return RangeIndex(0, 0).rename(name)
651696

652697
def __len__(self):
653698
"""

pandas/tests/indexes/test_category.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ def test_append(self):
411411
tm.assert_index_equal(result, expected, exact=True)
412412

413413
def test_append_to_another(self):
414-
# hits _concat_index_asobject
414+
# hits Index._concat_same_dtype
415415
fst = Index(["a", "b"])
416416
snd = CategoricalIndex(["d", "e"])
417417
result = fst.append(snd)

0 commit comments

Comments
 (0)