Skip to content

Commit f3f90c3

Browse files
authored
REF: use concat_compat in union_with_duplicates (#44125)
1 parent 8e1160d commit f3f90c3

File tree

2 files changed

+26
-33
lines changed

2 files changed

+26
-33
lines changed

pandas/core/algorithms.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
is_timedelta64_dtype,
6262
needs_i8_conversion,
6363
)
64+
from pandas.core.dtypes.concat import concat_compat
6465
from pandas.core.dtypes.dtypes import PandasDtype
6566
from pandas.core.dtypes.generic import (
6667
ABCDatetimeArray,
@@ -1834,17 +1835,18 @@ def union_with_duplicates(lvals: ArrayLike, rvals: ArrayLike) -> ArrayLike:
18341835
-------
18351836
np.ndarray or ExtensionArray
18361837
Containing the unsorted union of both arrays.
1838+
1839+
Notes
1840+
-----
1841+
Caller is responsible for ensuring lvals.dtype == rvals.dtype.
18371842
"""
18381843
indexer = []
18391844
l_count = value_counts(lvals, dropna=False)
18401845
r_count = value_counts(rvals, dropna=False)
18411846
l_count, r_count = l_count.align(r_count, fill_value=0)
1842-
unique_array = unique(np.append(lvals, rvals))
1843-
if not isinstance(lvals, np.ndarray):
1844-
# i.e. ExtensionArray
1845-
# Note: we only get here with lvals.dtype == rvals.dtype
1846-
# TODO: are there any cases where union won't be type/dtype preserving?
1847-
unique_array = type(lvals)._from_sequence(unique_array, dtype=lvals.dtype)
1847+
unique_array = unique(concat_compat([lvals, rvals]))
1848+
unique_array = ensure_wrapped_if_datetimelike(unique_array)
1849+
18481850
for i, value in enumerate(unique_array):
18491851
indexer += [i] * int(max(l_count[value], r_count[value]))
18501852
return unique_array.take(indexer)

pandas/core/dtypes/concat.py

+18-27
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
"""
22
Utility functions related to concat.
33
"""
4-
from typing import cast
4+
from typing import (
5+
TYPE_CHECKING,
6+
cast,
7+
)
58

69
import numpy as np
710

@@ -10,7 +13,10 @@
1013
DtypeObj,
1114
)
1215

13-
from pandas.core.dtypes.cast import find_common_type
16+
from pandas.core.dtypes.cast import (
17+
astype_array,
18+
find_common_type,
19+
)
1420
from pandas.core.dtypes.common import (
1521
is_categorical_dtype,
1622
is_dtype_equal,
@@ -19,15 +25,12 @@
1925
from pandas.core.dtypes.dtypes import ExtensionDtype
2026
from pandas.core.dtypes.generic import (
2127
ABCCategoricalIndex,
28+
ABCExtensionArray,
2229
ABCSeries,
2330
)
2431

25-
from pandas.core.arrays import ExtensionArray
26-
from pandas.core.arrays.sparse import SparseArray
27-
from pandas.core.construction import (
28-
array as pd_array,
29-
ensure_wrapped_if_datetimelike,
30-
)
32+
if TYPE_CHECKING:
33+
from pandas.core.arrays.sparse import SparseArray
3134

3235

3336
def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
@@ -59,26 +62,11 @@ def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
5962
# SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any,
6063
# Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict,
6164
# Tuple[Any, Any]]]" [arg-type]
62-
arr = cast(SparseArray, arr)
65+
arr = cast("SparseArray", arr)
6366
return arr.to_dense().astype(dtype, copy=False) # type: ignore[arg-type]
6467

65-
if (
66-
isinstance(arr, np.ndarray)
67-
and arr.dtype.kind in ["m", "M"]
68-
and dtype is np.dtype("object")
69-
):
70-
# wrap datetime-likes in EA to ensure astype(object) gives Timestamp/Timedelta
71-
# this can happen when concat_compat is called directly on arrays (when arrays
72-
# are not coming from Index/Series._values), eg in BlockManager.quantile
73-
arr = ensure_wrapped_if_datetimelike(arr)
74-
75-
if isinstance(dtype, ExtensionDtype):
76-
if isinstance(arr, np.ndarray):
77-
# numpy's astype cannot handle ExtensionDtypes
78-
return pd_array(arr, dtype=dtype, copy=False)
79-
return arr.astype(dtype, copy=False)
80-
81-
return arr.astype(dtype, copy=False)
68+
# astype_array includes ensure_wrapped_if_datetimelike
69+
return astype_array(arr, dtype=dtype, copy=False)
8270

8371

8472
def concat_compat(to_concat, axis: int = 0, ea_compat_axis: bool = False):
@@ -135,7 +123,8 @@ def is_nonempty(x) -> bool:
135123
target_dtype = find_common_type([x.dtype for x in to_concat])
136124
to_concat = [cast_to_common_type(arr, target_dtype) for arr in to_concat]
137125

138-
if isinstance(to_concat[0], ExtensionArray):
126+
if isinstance(to_concat[0], ABCExtensionArray):
127+
# TODO: what about EA-backed Index?
139128
cls = type(to_concat[0])
140129
return cls._concat_same_type(to_concat)
141130
else:
@@ -346,6 +335,8 @@ def _concat_datetime(to_concat, axis=0):
346335
-------
347336
a single array, preserving the combined dtypes
348337
"""
338+
from pandas.core.construction import ensure_wrapped_if_datetimelike
339+
349340
to_concat = [ensure_wrapped_if_datetimelike(x) for x in to_concat]
350341

351342
single_dtype = len({x.dtype for x in to_concat}) == 1

0 commit comments

Comments
 (0)