Skip to content

REF: simplify concat_datetime #33526

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ def take(self, indices, allow_fill=False, fill_value=None):
return type(self)(new_values, dtype=self.dtype)

@classmethod
def _concat_same_type(cls, to_concat):
def _concat_same_type(cls, to_concat, axis: int = 0):

# do not pass tz to set because tzlocal cannot be hashed
dtypes = {str(x.dtype) for x in to_concat}
Expand All @@ -733,14 +733,15 @@ def _concat_same_type(cls, to_concat):
obj = to_concat[0]
dtype = obj.dtype

values = np.concatenate([x.asi8 for x in to_concat])
i8values = [x.asi8 for x in to_concat]
values = np.concatenate(i8values, axis=axis)

if is_period_dtype(to_concat[0].dtype):
new_freq = None
if is_period_dtype(dtype):
new_freq = obj.freq
else:
elif axis == 0:
# GH 3232: If the concat result is evenly spaced, we can retain the
# original frequency
new_freq = None
to_concat = [x for x in to_concat if len(x)]

if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):
Expand Down
93 changes: 26 additions & 67 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@

import numpy as np

from pandas._libs import tslib, tslibs

from pandas.core.dtypes.common import (
DT64NS_DTYPE,
TD64NS_DTYPE,
is_bool_dtype,
is_categorical_dtype,
is_datetime64_dtype,
Expand All @@ -19,13 +15,7 @@
is_sparse,
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import (
ABCCategoricalIndex,
ABCDatetimeArray,
ABCIndexClass,
ABCRangeIndex,
ABCSeries,
)
from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCRangeIndex, ABCSeries


def get_dtype_kinds(l):
Expand Down Expand Up @@ -390,70 +380,39 @@ def concat_datetime(to_concat, axis=0, typs=None):
if typs is None:
typs = get_dtype_kinds(to_concat)

# multiple types, need to coerce to object
if len(typs) != 1:
return _concatenate_2d(
[_convert_datetimelike_to_object(x) for x in to_concat], axis=axis
)

# must be single dtype
if any(typ.startswith("datetime") for typ in typs):

if "datetime" in typs:
to_concat = [x.astype(np.int64, copy=False) for x in to_concat]
return _concatenate_2d(to_concat, axis=axis).view(DT64NS_DTYPE)
else:
# when to_concat has different tz, len(typs) > 1.
# thus no need to care
return _concat_datetimetz(to_concat)

elif "timedelta" in typs:
return _concatenate_2d([x.view(np.int64) for x in to_concat], axis=axis).view(
TD64NS_DTYPE
)

elif any(typ.startswith("period") for typ in typs):
assert len(typs) == 1
cls = to_concat[0]
new_values = cls._concat_same_type(to_concat)
return new_values

to_concat = [_wrap_datetimelike(x) for x in to_concat]
single_dtype = len({x.dtype for x in to_concat}) == 1

def _convert_datetimelike_to_object(x):
# coerce datetimelike array to object dtype
# multiple types, need to coerce to object
if not single_dtype:
# wrap_datetimelike ensures that astype(object) wraps in Timestamp/Timedelta
return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis)

# if dtype is of datetimetz or timezone
if x.dtype.kind == DT64NS_DTYPE.kind:
if getattr(x, "tz", None) is not None:
x = np.asarray(x.astype(object))
else:
shape = x.shape
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(), box="timestamp")
x = x.reshape(shape)
if axis == 1:
# TODO(EA2D): kludge not necessary with 2D EAs
to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat]

elif x.dtype == TD64NS_DTYPE:
shape = x.shape
x = tslibs.ints_to_pytimedelta(x.view(np.int64).ravel(), box=True)
x = x.reshape(shape)
result = type(to_concat[0])._concat_same_type(to_concat, axis=axis)

return x
if result.ndim == 2 and is_extension_array_dtype(result.dtype):
# TODO(EA2D): kludge not necessary with 2D EAs
assert result.shape[0] == 1
result = result[0]
return result


def _concat_datetimetz(to_concat, name=None):
def _wrap_datetimelike(arr):
"""
concat DatetimeIndex with the same tz
all inputs must be DatetimeIndex
it is used in DatetimeIndex.append also
Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.

DTA/TDA handle .astype(object) correctly.
"""
# Right now, internals will pass a List[DatetimeArray] here
# for reductions like quantile. I would like to disentangle
# all this before we get here.
sample = to_concat[0]

if isinstance(sample, ABCIndexClass):
return sample._concat_same_dtype(to_concat, name=name)
elif isinstance(sample, ABCDatetimeArray):
return sample._concat_same_type(to_concat)
from pandas.core.construction import array as pd_array, extract_array

arr = extract_array(arr, extract_numpy=True)
if isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"]:
arr = pd_array(arr)
return arr


def _concat_sparse(to_concat, axis=0, typs=None):
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,8 +778,8 @@ def _fast_union(self, other, sort=None):
left, right = self, other
left_start = left[0]
loc = right.searchsorted(left_start, side="left")
right_chunk = right.values[:loc]
dates = concat_compat((left.values, right_chunk))
right_chunk = right._values[:loc]
dates = concat_compat([left._values, right_chunk])
result = self._shallow_copy(dates)
result._set_freq("infer")
# TODO: can we infer that it has self.freq?
Expand All @@ -793,8 +793,8 @@ def _fast_union(self, other, sort=None):
# concatenate
if left_end < right_end:
loc = right.searchsorted(left_end, side="right")
right_chunk = right.values[loc:]
dates = concat_compat((left.values, right_chunk))
right_chunk = right._values[loc:]
dates = concat_compat([left._values, right_chunk])
result = self._shallow_copy(dates)
result._set_freq("infer")
# TODO: can we infer that it has self.freq?
Expand Down