pandas-dev · jbrockmendel · Apr 13, 2020 · Apr 13, 2020 · Apr 13, 2020 · Apr 13, 2020
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -30,6 +30,7 @@
     ensure_platform_int,
     is_categorical_dtype,
     is_datetime64_dtype,
+    is_datetime64tz_dtype,
     is_dict_like,
     is_dtype_equal,
     is_extension_array_dtype,
@@ -2348,10 +2349,82 @@ def _can_hold_na(self):
         return True
 
     @classmethod
-    def _concat_same_type(self, to_concat):
-        from pandas.core.dtypes.concat import concat_categorical
+    def _concat_same_type(cls, to_concat):
+        return cls._concat_arrays(to_concat)
+        # TODO: lock down stricter behavior?
 
-        return concat_categorical(to_concat)
+    @classmethod
+    def _concat_same_dtype(
+        cls,
+        to_concat,
+        axis: int = 0,
+        sort_categories: bool = False,
+        ignore_order: bool = False,
+    ):
+        """
+        Like _concat_same_type, but with the added restriction of matching dtypes.
+        """
+        ordered = False
+
+        first = to_concat[0]
+
+        # identical categories - fastpath
+        categories = first.categories
+        ordered = first.ordered
+
+        if all(first.categories.equals(other.categories) for other in to_concat[1:]):
+            new_codes = np.concatenate([c.codes for c in to_concat])
+        else:
+            codes = [first.codes] + [
+                recode_for_categories(other.codes, other.categories, first.categories)
+                for other in to_concat[1:]
+            ]
+            new_codes = np.concatenate(codes)
+
+        if sort_categories and not ignore_order and ordered:
+            raise TypeError("Cannot use sort_categories=True with ordered Categoricals")
+
+        if sort_categories and not categories.is_monotonic_increasing:
+            categories = categories.sort_values()
+            indexer = categories.get_indexer(first.categories)
+
+            new_codes = take_1d(indexer, new_codes, fill_value=-1)
+
+        if ignore_order:
+            ordered = False
+
+        return cls(new_codes, categories=categories, ordered=ordered, fastpath=True)
+
+    @classmethod
+    def _concat_arrays(cls, to_concat, axis: int = 0):
+        from pandas.core.dtypes.concat import concat_compat, union_categoricals
+
+        categoricals = [x for x in to_concat if is_categorical_dtype(x.dtype)]
+
+        # validate the categories
+        if len(categoricals) != len(to_concat):
+            pass
+        else:
+            # when all categories are identical
+            first = to_concat[0]
+            if all(first.is_dtype_equal(other) for other in to_concat[1:]):
+                return union_categoricals(categoricals)
+
+        # extract the categoricals & coerce to object if needed
+        to_concat = [
+            x._internal_get_values()
+            if is_categorical_dtype(x.dtype)
+            else np.asarray(x).ravel()
+            if not is_datetime64tz_dtype(x)
+            else np.asarray(x.astype(object))
+            for x in to_concat
+        ]
+
+        result = concat_compat(to_concat)
+        if axis == 1:
+            # TODO(EA2D): this is a kludge for 1D EAs
+            result = result.reshape(1, len(result))
+        return result
 
     def isin(self, values):
         """

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -24,6 +24,7 @@
     is_datetime64tz_dtype,
     is_datetime_or_timedelta_dtype,
     is_dtype_equal,
+    is_extension_array_dtype,
     is_float_dtype,
     is_integer_dtype,
     is_list_like,
@@ -751,6 +752,30 @@ def _concat_same_type(cls, to_concat, axis: int = 0):
 
         return cls._simple_new(values, dtype=dtype, freq=new_freq)
 
+    @classmethod
+    def _concat_arrays(cls, to_concat, axis: int = 0):
+        from pandas.core.ops.array_ops import maybe_upcast_datetimelike_array
+
+        to_concat = [maybe_upcast_datetimelike_array(x) for x in to_concat]
+
+        if len({x.dtype for x in to_concat}) == 1:
+            if axis == 1 and is_extension_array_dtype(to_concat[0].dtype):
+                # TODO(EA2D): not necessary with 2D EAs
+                axis = 0
+
+            result = cls._concat_same_type(to_concat, axis=axis)
+
+            if axis == 1 and result.ndim == 1:
+                # TODO(EA2D): not necessary with 2D EAs
+                result = result.reshape(1, -1)
+            return result
+
+        to_concat = [x.astype(object) for x in to_concat]
+        if axis == 1:
+            # TODO(EA2D): not necessary with 2D EAs
+            to_concat = [np.atleast_2d(x) for x in to_concat]
+        return np.concatenate(to_concat, axis=axis)
+
     def copy(self):
         values = self.asi8.copy()
         return type(self)._simple_new(values, dtype=self.dtype, freq=self.freq)

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -1022,6 +1022,19 @@ def _concat_same_type(cls, to_concat):
 
         return cls(data, sparse_index=sp_index, fill_value=fill_value)
 
+    @classmethod
+    def _concat_arrays(cls, to_concat, axis: int = 0):
+        fill_values = [x.fill_value for x in to_concat if isinstance(x, cls)]
+        fill_value = fill_values[0]
+
+        # TODO: Fix join unit generation so we aren't passed this.
+        to_concat = [
+            x if isinstance(x, cls) else cls(x.squeeze(), fill_value=fill_value)
+            for x in to_concat
+        ]
+
+        return cls._concat_same_type(to_concat)
+
     def astype(self, dtype=None, copy=True):
         """
         Change the dtype of a SparseArray.

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -95,6 +95,11 @@ def is_nonempty(x) -> bool:
     _contains_datetime = any(typ.startswith("datetime") for typ in typs)
     _contains_period = any(typ.startswith("period") for typ in typs)
 
+    from pandas.core.arrays import Categorical, SparseArray, datetimelike as dtl
+    from pandas.core.ops.array_ops import maybe_upcast_datetimelike_array
+
+    to_concat = [maybe_upcast_datetimelike_array(x) for x in to_concat]
+
     all_empty = not len(non_empties)
     single_dtype = len({x.dtype for x in to_concat}) == 1
     any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat)
@@ -106,14 +111,15 @@ def is_nonempty(x) -> bool:
     elif "category" in typs:
         # this must be prior to concat_datetime,
         # to support Categorical + datetime-like
-        return concat_categorical(to_concat, axis=axis)
+        return Categorical._concat_arrays(to_concat, axis=axis)
 
     elif _contains_datetime or "timedelta" in typs or _contains_period:
-        return concat_datetime(to_concat, axis=axis, typs=typs)
+        obj = [x for x in to_concat if isinstance(x, dtl.DatetimeLikeArrayMixin)][0]
+        return type(obj)._concat_arrays(to_concat, axis=axis)
 
     # these are mandated to handle empties as well
     elif "sparse" in typs:
-        return _concat_sparse(to_concat, axis=axis, typs=typs)
+        return SparseArray._concat_arrays(to_concat, axis=axis)
 
     elif any_ea and axis == 1:
         to_concat = [np.atleast_2d(x.astype("object")) for x in to_concat]
@@ -136,52 +142,6 @@ def is_nonempty(x) -> bool:
     return np.concatenate(to_concat, axis=axis)
 
 
-def concat_categorical(to_concat, axis: int = 0):
-    """
-    Concatenate an object/categorical array of arrays, each of which is a
-    single dtype
-
-    Parameters
-    ----------
-    to_concat : array of arrays
-    axis : int
-        Axis to provide concatenation in the current implementation this is
-        always 0, e.g. we only have 1D categoricals
-
-    Returns
-    -------
-    Categorical
-        A single array, preserving the combined dtypes
-    """
-    # we could have object blocks and categoricals here
-    # if we only have a single categoricals then combine everything
-    # else its a non-compat categorical
-    categoricals = [x for x in to_concat if is_categorical_dtype(x.dtype)]
-
-    # validate the categories
-    if len(categoricals) != len(to_concat):
-        pass
-    else:
-        # when all categories are identical
-        first = to_concat[0]
-        if all(first.is_dtype_equal(other) for other in to_concat[1:]):
-            return union_categoricals(categoricals)
-
-    # extract the categoricals & coerce to object if needed
-    to_concat = [
-        x._internal_get_values()
-        if is_categorical_dtype(x.dtype)
-        else np.asarray(x).ravel()
-        if not is_datetime64tz_dtype(x)
-        else np.asarray(x.astype(object))
-        for x in to_concat
-    ]
-    result = concat_compat(to_concat)
-    if axis == 1:
-        result = result.reshape(1, len(result))
-    return result
-
-
 def union_categoricals(
     to_union, sort_categories: bool = False, ignore_order: bool = False
 ):
@@ -309,28 +269,10 @@ def _maybe_unwrap(x):
     ordered = False
     if all(first.is_dtype_equal(other) for other in to_union[1:]):
         # identical categories - fastpath
-        categories = first.categories
-        ordered = first.ordered
-
-        if all(first.categories.equals(other.categories) for other in to_union[1:]):
-            new_codes = np.concatenate([c.codes for c in to_union])
-        else:
-            codes = [first.codes] + [
-                recode_for_categories(other.codes, other.categories, first.categories)
-                for other in to_union[1:]
-            ]
-            new_codes = np.concatenate(codes)
+        return Categorical._concat_same_dtype(
+            to_union, sort_categories=sort_categories, ignore_order=ignore_order,
+        )
 
-        if sort_categories and not ignore_order and ordered:
-            raise TypeError("Cannot use sort_categories=True with ordered Categoricals")
-
-        if sort_categories and not categories.is_monotonic_increasing:
-            categories = categories.sort_values()
-            indexer = categories.get_indexer(first.categories)
-
-            from pandas.core.algorithms import take_1d
-
-            new_codes = take_1d(indexer, new_codes, fill_value=-1)
     elif ignore_order or all(not c.ordered for c in to_union):
         # different categories - union and recode
         cats = first.categories.append([c.categories for c in to_union[1:]])
@@ -354,94 +296,3 @@ def _maybe_unwrap(x):
         ordered = False
 
     return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True)
-
-
-def _concatenate_2d(to_concat, axis: int):
-    # coerce to 2d if needed & concatenate
-    if axis == 1:
-        to_concat = [np.atleast_2d(x) for x in to_concat]
-    return np.concatenate(to_concat, axis=axis)
-
-
-def concat_datetime(to_concat, axis=0, typs=None):
-    """
-    provide concatenation of an datetimelike array of arrays each of which is a
-    single M8[ns], datetimet64[ns, tz] or m8[ns] dtype
-
-    Parameters
-    ----------
-    to_concat : array of arrays
-    axis : axis to provide concatenation
-    typs : set of to_concat dtypes
-
-    Returns
-    -------
-    a single array, preserving the combined dtypes
-    """
-    if typs is None:
-        typs = get_dtype_kinds(to_concat)
-
-    to_concat = [_wrap_datetimelike(x) for x in to_concat]
-    single_dtype = len({x.dtype for x in to_concat}) == 1
-
-    # multiple types, need to coerce to object
-    if not single_dtype:
-        # wrap_datetimelike ensures that astype(object) wraps in Timestamp/Timedelta
-        return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis)
-
-    if axis == 1:
-        # TODO(EA2D): kludge not necessary with 2D EAs
-        to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat]
-
-    result = type(to_concat[0])._concat_same_type(to_concat, axis=axis)
-
-    if result.ndim == 2 and is_extension_array_dtype(result.dtype):
-        # TODO(EA2D): kludge not necessary with 2D EAs
-        assert result.shape[0] == 1
-        result = result[0]
-    return result
-
-
-def _wrap_datetimelike(arr):
-    """
-    Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.
-
-    DTA/TDA handle .astype(object) correctly.
-    """
-    from pandas.core.construction import array as pd_array, extract_array
-
-    arr = extract_array(arr, extract_numpy=True)
-    if isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"]:
-        arr = pd_array(arr)
-    return arr
-
-
-def _concat_sparse(to_concat, axis=0, typs=None):
-    """
-    provide concatenation of an sparse/dense array of arrays each of which is a
-    single dtype
-
-    Parameters
-    ----------
-    to_concat : array of arrays
-    axis : axis to provide concatenation
-    typs : set of to_concat dtypes
-
-    Returns
-    -------
-    a single array, preserving the combined dtypes
-    """
-    from pandas.core.arrays import SparseArray
-
-    fill_values = [x.fill_value for x in to_concat if isinstance(x, SparseArray)]
-    fill_value = fill_values[0]
-
-    # TODO: Fix join unit generation so we aren't passed this.
-    to_concat = [
-        x
-        if isinstance(x, SparseArray)
-        else SparseArray(x.squeeze(), fill_value=fill_value)
-        for x in to_concat
-    ]
-
-    return SparseArray._concat_same_type(to_concat)