20
20
ensure_categorical , ensure_int64 , ensure_object , ensure_platform_int ,
21
21
is_bool , is_bool_dtype , is_categorical , is_categorical_dtype ,
22
22
is_datetime64_any_dtype , is_datetime64tz_dtype , is_dtype_equal ,
23
- is_dtype_union_equal , is_extension_array_dtype , is_float , is_float_dtype ,
24
- is_hashable , is_integer , is_integer_dtype , is_interval_dtype , is_iterator ,
25
- is_list_like , is_object_dtype , is_period_dtype , is_scalar ,
26
- is_signed_integer_dtype , is_timedelta64_dtype , is_unsigned_integer_dtype ,
27
- pandas_dtype )
23
+ is_extension_array_dtype , is_float , is_float_dtype , is_hashable ,
24
+ is_integer , is_integer_dtype , is_interval_dtype , is_iterator , is_list_like ,
25
+ is_object_dtype , is_period_dtype , is_scalar , is_signed_integer_dtype ,
26
+ is_timedelta64_dtype , is_unsigned_integer_dtype , pandas_dtype )
28
27
import pandas .core .dtypes .concat as _concat
29
28
from pandas .core .dtypes .generic import (
30
29
ABCDataFrame , ABCDateOffset , ABCDatetimeArray , ABCIndexClass ,
@@ -2262,6 +2261,47 @@ def _get_reconciled_name_object(self, other):
2262
2261
return self ._shallow_copy (name = name )
2263
2262
return self
2264
2263
2264
+ def _union_incompatible_dtypes (self , other , sort ):
2265
+ """
2266
+ Casts this and other index to object dtype to allow the formation
2267
+ of a union between incompatible types.
2268
+
2269
+ Parameters
2270
+ ----------
2271
+ other : Index or array-like
2272
+ sort : False or None, default False
2273
+ Whether to sort the resulting index.
2274
+
2275
+ * False : do not sort the result.
2276
+ * None : sort the result, except when `self` and `other` are equal
2277
+ or when the values cannot be compared.
2278
+
2279
+ Returns
2280
+ -------
2281
+ Index
2282
+ """
2283
+ this = self .astype (object , copy = False )
2284
+ # cast to Index for when `other` is list-like
2285
+ other = Index (other ).astype (object , copy = False )
2286
+ return Index .union (this , other , sort = sort ).astype (object , copy = False )
2287
+
2288
+ def _is_compatible_with_other (self , other ):
2289
+ """
2290
+ Check whether this and the other dtype are compatible with each other.
2291
+ Meaning a union can be formed between them without needing to be cast
2292
+ to dtype object.
2293
+
2294
+ Parameters
2295
+ ----------
2296
+ other : Index or array-like
2297
+
2298
+ Returns
2299
+ -------
2300
+ bool
2301
+ """
2302
+ return (type (self ) is type (other )
2303
+ and is_dtype_equal (self .dtype , other .dtype ))
2304
+
2265
2305
def _validate_sort_keyword (self , sort ):
2266
2306
if sort not in [None , False ]:
2267
2307
raise ValueError ("The 'sort' keyword only takes the values of "
@@ -2271,6 +2311,11 @@ def union(self, other, sort=None):
2271
2311
"""
2272
2312
Form the union of two Index objects.
2273
2313
2314
+ If the Index objects are incompatible, both Index objects will be
2315
+ cast to dtype('object') first.
2316
+
2317
+ .. versionchanged:: 0.25.0
2318
+
2274
2319
Parameters
2275
2320
----------
2276
2321
other : Index or array-like
@@ -2300,30 +2345,54 @@ def union(self, other, sort=None):
2300
2345
Examples
2301
2346
--------
2302
2347
2348
+ Union matching dtypes
2349
+
2303
2350
>>> idx1 = pd.Index([1, 2, 3, 4])
2304
2351
>>> idx2 = pd.Index([3, 4, 5, 6])
2305
2352
>>> idx1.union(idx2)
2306
2353
Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')
2354
+
2355
+ Union mismatched dtypes
2356
+
2357
+ >>> idx1 = pd.Index(['a', 'b', 'c', 'd'])
2358
+ >>> idx2 = pd.Index([1, 2, 3, 4])
2359
+ >>> idx1.union(idx2)
2360
+ Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object')
2307
2361
"""
2308
2362
self ._validate_sort_keyword (sort )
2309
2363
self ._assert_can_do_setop (other )
2310
- other = ensure_index (other )
2311
2364
2312
- if len (other ) == 0 or self .equals (other ):
2365
+ if not self ._is_compatible_with_other (other ):
2366
+ return self ._union_incompatible_dtypes (other , sort = sort )
2367
+
2368
+ return self ._union (other , sort = sort )
2369
+
2370
+ def _union (self , other , sort ):
2371
+ """
2372
+ Specific union logic should go here. In subclasses, union behavior
2373
+ should be overwritten here rather than in `self.union`.
2374
+
2375
+ Parameters
2376
+ ----------
2377
+ other : Index or array-like
2378
+ sort : False or None, default False
2379
+ Whether to sort the resulting index.
2380
+
2381
+ * False : do not sort the result.
2382
+ * None : sort the result, except when `self` and `other` are equal
2383
+ or when the values cannot be compared.
2384
+
2385
+ Returns
2386
+ -------
2387
+ Index
2388
+ """
2389
+
2390
+ if not len (other ) or self .equals (other ):
2313
2391
return self ._get_reconciled_name_object (other )
2314
2392
2315
- if len (self ) == 0 :
2393
+ if not len (self ):
2316
2394
return other ._get_reconciled_name_object (self )
2317
2395
2318
- # TODO: is_dtype_union_equal is a hack around
2319
- # 1. buggy set ops with duplicates (GH #13432)
2320
- # 2. CategoricalIndex lacking setops (GH #10186)
2321
- # Once those are fixed, this workaround can be removed
2322
- if not is_dtype_union_equal (self .dtype , other .dtype ):
2323
- this = self .astype ('O' )
2324
- other = other .astype ('O' )
2325
- return this .union (other , sort = sort )
2326
-
2327
2396
# TODO(EA): setops-refactor, clean all this up
2328
2397
if is_period_dtype (self ) or is_datetime64tz_dtype (self ):
2329
2398
lvals = self ._ndarray_values
@@ -2370,6 +2439,7 @@ def union(self, other, sort=None):
2370
2439
def _wrap_setop_result (self , other , result ):
2371
2440
return self ._constructor (result , name = get_op_result_name (self , other ))
2372
2441
2442
+ # TODO: standardize return type of non-union setops type(self vs other)
2373
2443
def intersection (self , other , sort = False ):
2374
2444
"""
2375
2445
Form the intersection of two Index objects.
0 commit comments