diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 624e71a5cf760..5b46bee96d4b3 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -1,7 +1,7 @@ """ Utility functions related to concat. """ -from typing import Set, cast +from typing import cast import numpy as np @@ -14,49 +14,13 @@ is_extension_array_dtype, is_sparse, ) -from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCRangeIndex, ABCSeries +from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCSeries from pandas.core.arrays import ExtensionArray from pandas.core.arrays.sparse import SparseArray from pandas.core.construction import array, ensure_wrapped_if_datetimelike -def _get_dtype_kinds(arrays) -> Set[str]: - """ - Parameters - ---------- - arrays : list of arrays - - Returns - ------- - set[str] - A set of kinds that exist in this list of arrays. - """ - typs: Set[str] = set() - for arr in arrays: - # Note: we use dtype.kind checks because they are much more performant - # than is_foo_dtype - - dtype = arr.dtype - if not isinstance(dtype, np.dtype): - # ExtensionDtype so we get - # e.g. "categorical", "datetime64[ns, US/Central]", "Sparse[itn64, 0]" - typ = str(dtype) - elif isinstance(arr, ABCRangeIndex): - typ = "range" - elif dtype.kind == "M": - typ = "datetime" - elif dtype.kind == "m": - typ = "timedelta" - elif dtype.kind in ["O", "b"]: - typ = str(dtype) # i.e. "object", "bool" - else: - typ = dtype.kind - - typs.add(typ) - return typs - - def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike: """ Helper function for `arr.astype(common_dtype)` but handling all special @@ -130,8 +94,7 @@ def is_nonempty(x) -> bool: if non_empties and axis == 0: to_concat = non_empties - typs = _get_dtype_kinds(to_concat) - _contains_datetime = any(typ.startswith("datetime") for typ in typs) + kinds = {obj.dtype.kind for obj in to_concat} all_empty = not len(non_empties) single_dtype = len({x.dtype for x in to_concat}) == 1 @@ -150,17 +113,16 @@ def is_nonempty(x) -> bool: else: return np.concatenate(to_concat) - elif _contains_datetime or "timedelta" in typs: + elif any(kind in ["m", "M"] for kind in kinds): return _concat_datetime(to_concat, axis=axis) elif all_empty: # we have all empties, but may need to coerce the result dtype to # object if we have non-numeric type operands (numpy would otherwise # cast this to float) - typs = _get_dtype_kinds(to_concat) - if len(typs) != 1: + if len(kinds) != 1: - if not len(typs - {"i", "u", "f"}) or not len(typs - {"bool", "i", "u"}): + if not len(kinds - {"i", "u", "f"}) or not len(kinds - {"b", "i", "u"}): # let numpy coerce pass else: diff --git a/pandas/tests/dtypes/test_concat.py b/pandas/tests/dtypes/test_concat.py index 53d53e35c6eb5..a749955d35494 100644 --- a/pandas/tests/dtypes/test_concat.py +++ b/pandas/tests/dtypes/test_concat.py @@ -3,83 +3,10 @@ import pandas.core.dtypes.concat as _concat import pandas as pd -from pandas import DatetimeIndex, Period, PeriodIndex, Series, TimedeltaIndex +from pandas import Series import pandas._testing as tm -@pytest.mark.parametrize( - "to_concat, expected", - [ - # int/float/str - ([["a"], [1, 2]], ["i", "object"]), - ([[3, 4], [1, 2]], ["i"]), - ([[3, 4], [1, 2.1]], ["i", "f"]), - # datetimelike - ([DatetimeIndex(["2011-01-01"]), DatetimeIndex(["2011-01-02"])], ["datetime"]), - ([TimedeltaIndex(["1 days"]), TimedeltaIndex(["2 days"])], ["timedelta"]), - # datetimelike object - ( - [ - DatetimeIndex(["2011-01-01"]), - DatetimeIndex(["2011-01-02"], tz="US/Eastern"), - ], - ["datetime", "datetime64[ns, US/Eastern]"], - ), - ( - [ - DatetimeIndex(["2011-01-01"], tz="Asia/Tokyo"), - DatetimeIndex(["2011-01-02"], tz="US/Eastern"), - ], - ["datetime64[ns, Asia/Tokyo]", "datetime64[ns, US/Eastern]"], - ), - ([TimedeltaIndex(["1 days"]), TimedeltaIndex(["2 hours"])], ["timedelta"]), - ( - [ - DatetimeIndex(["2011-01-01"], tz="Asia/Tokyo"), - TimedeltaIndex(["1 days"]), - ], - ["datetime64[ns, Asia/Tokyo]", "timedelta"], - ), - ], -) -def test_get_dtype_kinds(index_or_series, to_concat, expected): - to_concat_klass = [index_or_series(c) for c in to_concat] - result = _concat._get_dtype_kinds(to_concat_klass) - assert result == set(expected) - - -@pytest.mark.parametrize( - "to_concat, expected", - [ - ( - [PeriodIndex(["2011-01"], freq="M"), PeriodIndex(["2011-01"], freq="M")], - ["period[M]"], - ), - ( - [ - Series([Period("2011-01", freq="M")]), - Series([Period("2011-02", freq="M")]), - ], - ["period[M]"], - ), - ( - [PeriodIndex(["2011-01"], freq="M"), PeriodIndex(["2011-01"], freq="D")], - ["period[M]", "period[D]"], - ), - ( - [ - Series([Period("2011-01", freq="M")]), - Series([Period("2011-02", freq="D")]), - ], - ["period[M]", "period[D]"], - ), - ], -) -def test_get_dtype_kinds_period(to_concat, expected): - result = _concat._get_dtype_kinds(to_concat) - assert result == set(expected) - - def test_concat_mismatched_categoricals_with_empty(): # concat_compat behavior on series._values should match pd.concat on series ser1 = Series(["a", "b", "c"], dtype="category")