Skip to content

Commit d106b81

Browse files
authored
REF: simplify concat_datetime (#33526)
1 parent 7a1d715 commit d106b81

File tree

3 files changed

+36
-76
lines changed

3 files changed

+36
-76
lines changed

pandas/core/arrays/datetimelike.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -723,7 +723,7 @@ def take(self, indices, allow_fill=False, fill_value=None):
723723
return type(self)(new_values, dtype=self.dtype)
724724

725725
@classmethod
726-
def _concat_same_type(cls, to_concat):
726+
def _concat_same_type(cls, to_concat, axis: int = 0):
727727

728728
# do not pass tz to set because tzlocal cannot be hashed
729729
dtypes = {str(x.dtype) for x in to_concat}
@@ -733,14 +733,15 @@ def _concat_same_type(cls, to_concat):
733733
obj = to_concat[0]
734734
dtype = obj.dtype
735735

736-
values = np.concatenate([x.asi8 for x in to_concat])
736+
i8values = [x.asi8 for x in to_concat]
737+
values = np.concatenate(i8values, axis=axis)
737738

738-
if is_period_dtype(to_concat[0].dtype):
739+
new_freq = None
740+
if is_period_dtype(dtype):
739741
new_freq = obj.freq
740-
else:
742+
elif axis == 0:
741743
# GH 3232: If the concat result is evenly spaced, we can retain the
742744
# original frequency
743-
new_freq = None
744745
to_concat = [x for x in to_concat if len(x)]
745746

746747
if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):

pandas/core/dtypes/concat.py

+26-67
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,7 @@
44

55
import numpy as np
66

7-
from pandas._libs import tslib, tslibs
8-
97
from pandas.core.dtypes.common import (
10-
DT64NS_DTYPE,
11-
TD64NS_DTYPE,
128
is_bool_dtype,
139
is_categorical_dtype,
1410
is_datetime64_dtype,
@@ -19,13 +15,7 @@
1915
is_sparse,
2016
is_timedelta64_dtype,
2117
)
22-
from pandas.core.dtypes.generic import (
23-
ABCCategoricalIndex,
24-
ABCDatetimeArray,
25-
ABCIndexClass,
26-
ABCRangeIndex,
27-
ABCSeries,
28-
)
18+
from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCRangeIndex, ABCSeries
2919

3020

3121
def get_dtype_kinds(l):
@@ -390,70 +380,39 @@ def concat_datetime(to_concat, axis=0, typs=None):
390380
if typs is None:
391381
typs = get_dtype_kinds(to_concat)
392382

393-
# multiple types, need to coerce to object
394-
if len(typs) != 1:
395-
return _concatenate_2d(
396-
[_convert_datetimelike_to_object(x) for x in to_concat], axis=axis
397-
)
398-
399-
# must be single dtype
400-
if any(typ.startswith("datetime") for typ in typs):
401-
402-
if "datetime" in typs:
403-
to_concat = [x.astype(np.int64, copy=False) for x in to_concat]
404-
return _concatenate_2d(to_concat, axis=axis).view(DT64NS_DTYPE)
405-
else:
406-
# when to_concat has different tz, len(typs) > 1.
407-
# thus no need to care
408-
return _concat_datetimetz(to_concat)
409-
410-
elif "timedelta" in typs:
411-
return _concatenate_2d([x.view(np.int64) for x in to_concat], axis=axis).view(
412-
TD64NS_DTYPE
413-
)
414-
415-
elif any(typ.startswith("period") for typ in typs):
416-
assert len(typs) == 1
417-
cls = to_concat[0]
418-
new_values = cls._concat_same_type(to_concat)
419-
return new_values
420-
383+
to_concat = [_wrap_datetimelike(x) for x in to_concat]
384+
single_dtype = len({x.dtype for x in to_concat}) == 1
421385

422-
def _convert_datetimelike_to_object(x):
423-
# coerce datetimelike array to object dtype
386+
# multiple types, need to coerce to object
387+
if not single_dtype:
388+
# wrap_datetimelike ensures that astype(object) wraps in Timestamp/Timedelta
389+
return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis)
424390

425-
# if dtype is of datetimetz or timezone
426-
if x.dtype.kind == DT64NS_DTYPE.kind:
427-
if getattr(x, "tz", None) is not None:
428-
x = np.asarray(x.astype(object))
429-
else:
430-
shape = x.shape
431-
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(), box="timestamp")
432-
x = x.reshape(shape)
391+
if axis == 1:
392+
# TODO(EA2D): kludge not necessary with 2D EAs
393+
to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat]
433394

434-
elif x.dtype == TD64NS_DTYPE:
435-
shape = x.shape
436-
x = tslibs.ints_to_pytimedelta(x.view(np.int64).ravel(), box=True)
437-
x = x.reshape(shape)
395+
result = type(to_concat[0])._concat_same_type(to_concat, axis=axis)
438396

439-
return x
397+
if result.ndim == 2 and is_extension_array_dtype(result.dtype):
398+
# TODO(EA2D): kludge not necessary with 2D EAs
399+
assert result.shape[0] == 1
400+
result = result[0]
401+
return result
440402

441403

442-
def _concat_datetimetz(to_concat, name=None):
404+
def _wrap_datetimelike(arr):
443405
"""
444-
concat DatetimeIndex with the same tz
445-
all inputs must be DatetimeIndex
446-
it is used in DatetimeIndex.append also
406+
Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.
407+
408+
DTA/TDA handle .astype(object) correctly.
447409
"""
448-
# Right now, internals will pass a List[DatetimeArray] here
449-
# for reductions like quantile. I would like to disentangle
450-
# all this before we get here.
451-
sample = to_concat[0]
452-
453-
if isinstance(sample, ABCIndexClass):
454-
return sample._concat_same_dtype(to_concat, name=name)
455-
elif isinstance(sample, ABCDatetimeArray):
456-
return sample._concat_same_type(to_concat)
410+
from pandas.core.construction import array as pd_array, extract_array
411+
412+
arr = extract_array(arr, extract_numpy=True)
413+
if isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"]:
414+
arr = pd_array(arr)
415+
return arr
457416

458417

459418
def _concat_sparse(to_concat, axis=0, typs=None):

pandas/core/indexes/datetimelike.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -778,8 +778,8 @@ def _fast_union(self, other, sort=None):
778778
left, right = self, other
779779
left_start = left[0]
780780
loc = right.searchsorted(left_start, side="left")
781-
right_chunk = right.values[:loc]
782-
dates = concat_compat((left.values, right_chunk))
781+
right_chunk = right._values[:loc]
782+
dates = concat_compat([left._values, right_chunk])
783783
result = self._shallow_copy(dates)
784784
result._set_freq("infer")
785785
# TODO: can we infer that it has self.freq?
@@ -793,8 +793,8 @@ def _fast_union(self, other, sort=None):
793793
# concatenate
794794
if left_end < right_end:
795795
loc = right.searchsorted(left_end, side="right")
796-
right_chunk = right.values[loc:]
797-
dates = concat_compat((left.values, right_chunk))
796+
right_chunk = right._values[loc:]
797+
dates = concat_compat([left._values, right_chunk])
798798
result = self._shallow_copy(dates)
799799
result._set_freq("infer")
800800
# TODO: can we infer that it has self.freq?

0 commit comments

Comments
 (0)