Skip to content

CLN: dtypes.concat #39572

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 6 additions & 44 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Utility functions related to concat.
"""
from typing import Set, cast
from typing import cast

import numpy as np

Expand All @@ -14,49 +14,13 @@
is_extension_array_dtype,
is_sparse,
)
from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCRangeIndex, ABCSeries
from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCSeries

from pandas.core.arrays import ExtensionArray
from pandas.core.arrays.sparse import SparseArray
from pandas.core.construction import array, ensure_wrapped_if_datetimelike


def _get_dtype_kinds(arrays) -> Set[str]:
"""
Parameters
----------
arrays : list of arrays

Returns
-------
set[str]
A set of kinds that exist in this list of arrays.
"""
typs: Set[str] = set()
for arr in arrays:
# Note: we use dtype.kind checks because they are much more performant
# than is_foo_dtype

dtype = arr.dtype
if not isinstance(dtype, np.dtype):
# ExtensionDtype so we get
# e.g. "categorical", "datetime64[ns, US/Central]", "Sparse[itn64, 0]"
typ = str(dtype)
elif isinstance(arr, ABCRangeIndex):
typ = "range"
elif dtype.kind == "M":
typ = "datetime"
elif dtype.kind == "m":
typ = "timedelta"
elif dtype.kind in ["O", "b"]:
typ = str(dtype) # i.e. "object", "bool"
else:
typ = dtype.kind

typs.add(typ)
return typs


def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
"""
Helper function for `arr.astype(common_dtype)` but handling all special
Expand Down Expand Up @@ -130,8 +94,7 @@ def is_nonempty(x) -> bool:
if non_empties and axis == 0:
to_concat = non_empties

typs = _get_dtype_kinds(to_concat)
_contains_datetime = any(typ.startswith("datetime") for typ in typs)
kinds = {obj.dtype.kind for obj in to_concat}

all_empty = not len(non_empties)
single_dtype = len({x.dtype for x in to_concat}) == 1
Expand All @@ -150,17 +113,16 @@ def is_nonempty(x) -> bool:
else:
return np.concatenate(to_concat)

elif _contains_datetime or "timedelta" in typs:
elif any(kind in ["m", "M"] for kind in kinds):
return _concat_datetime(to_concat, axis=axis)

elif all_empty:
# we have all empties, but may need to coerce the result dtype to
# object if we have non-numeric type operands (numpy would otherwise
# cast this to float)
typs = _get_dtype_kinds(to_concat)
if len(typs) != 1:
if len(kinds) != 1:

if not len(typs - {"i", "u", "f"}) or not len(typs - {"bool", "i", "u"}):
if not len(kinds - {"i", "u", "f"}) or not len(kinds - {"b", "i", "u"}):
# let numpy coerce
pass
else:
Expand Down
75 changes: 1 addition & 74 deletions pandas/tests/dtypes/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,83 +3,10 @@
import pandas.core.dtypes.concat as _concat

import pandas as pd
from pandas import DatetimeIndex, Period, PeriodIndex, Series, TimedeltaIndex
from pandas import Series
import pandas._testing as tm


@pytest.mark.parametrize(
"to_concat, expected",
[
# int/float/str
([["a"], [1, 2]], ["i", "object"]),
([[3, 4], [1, 2]], ["i"]),
([[3, 4], [1, 2.1]], ["i", "f"]),
# datetimelike
([DatetimeIndex(["2011-01-01"]), DatetimeIndex(["2011-01-02"])], ["datetime"]),
([TimedeltaIndex(["1 days"]), TimedeltaIndex(["2 days"])], ["timedelta"]),
# datetimelike object
(
[
DatetimeIndex(["2011-01-01"]),
DatetimeIndex(["2011-01-02"], tz="US/Eastern"),
],
["datetime", "datetime64[ns, US/Eastern]"],
),
(
[
DatetimeIndex(["2011-01-01"], tz="Asia/Tokyo"),
DatetimeIndex(["2011-01-02"], tz="US/Eastern"),
],
["datetime64[ns, Asia/Tokyo]", "datetime64[ns, US/Eastern]"],
),
([TimedeltaIndex(["1 days"]), TimedeltaIndex(["2 hours"])], ["timedelta"]),
(
[
DatetimeIndex(["2011-01-01"], tz="Asia/Tokyo"),
TimedeltaIndex(["1 days"]),
],
["datetime64[ns, Asia/Tokyo]", "timedelta"],
),
],
)
def test_get_dtype_kinds(index_or_series, to_concat, expected):
to_concat_klass = [index_or_series(c) for c in to_concat]
result = _concat._get_dtype_kinds(to_concat_klass)
assert result == set(expected)


@pytest.mark.parametrize(
"to_concat, expected",
[
(
[PeriodIndex(["2011-01"], freq="M"), PeriodIndex(["2011-01"], freq="M")],
["period[M]"],
),
(
[
Series([Period("2011-01", freq="M")]),
Series([Period("2011-02", freq="M")]),
],
["period[M]"],
),
(
[PeriodIndex(["2011-01"], freq="M"), PeriodIndex(["2011-01"], freq="D")],
["period[M]", "period[D]"],
),
(
[
Series([Period("2011-01", freq="M")]),
Series([Period("2011-02", freq="D")]),
],
["period[M]", "period[D]"],
),
],
)
def test_get_dtype_kinds_period(to_concat, expected):
result = _concat._get_dtype_kinds(to_concat)
assert result == set(expected)


def test_concat_mismatched_categoricals_with_empty():
# concat_compat behavior on series._values should match pd.concat on series
ser1 = Series(["a", "b", "c"], dtype="category")
Expand Down