@@ -878,7 +878,7 @@ def _get_merge_keys(self):
878
878
return left_keys , right_keys , join_names
879
879
880
880
def _maybe_coerce_merge_keys (self ):
881
- # we have valid mergee's but we may have to further
881
+ # we have valid mergees but we may have to further
882
882
# coerce these if they are originally incompatible types
883
883
#
884
884
# for example if these are categorical, but are not dtype_equal
@@ -890,12 +890,16 @@ def _maybe_coerce_merge_keys(self):
890
890
if (len (lk ) and not len (rk )) or (not len (lk ) and len (rk )):
891
891
continue
892
892
893
+ lk_is_cat = is_categorical_dtype (lk )
894
+ rk_is_cat = is_categorical_dtype (rk )
895
+
893
896
# if either left or right is a categorical
894
897
# then the must match exactly in categories & ordered
895
- if is_categorical_dtype ( lk ) and is_categorical_dtype ( rk ) :
898
+ if lk_is_cat and rk_is_cat :
896
899
if lk .is_dtype_equal (rk ):
897
900
continue
898
- elif is_categorical_dtype (lk ) or is_categorical_dtype (rk ):
901
+
902
+ elif lk_is_cat or rk_is_cat :
899
903
pass
900
904
901
905
elif is_dtype_equal (lk .dtype , rk .dtype ):
@@ -905,7 +909,7 @@ def _maybe_coerce_merge_keys(self):
905
909
# kinds to proceed, eg. int64 and int8
906
910
# further if we are object, but we infer to
907
911
# the same, then proceed
908
- if ( is_numeric_dtype (lk ) and is_numeric_dtype (rk ) ):
912
+ if is_numeric_dtype (lk ) and is_numeric_dtype (rk ):
909
913
if lk .dtype .kind == rk .dtype .kind :
910
914
continue
911
915
@@ -914,13 +918,20 @@ def _maybe_coerce_merge_keys(self):
914
918
continue
915
919
916
920
# Houston, we have a problem!
917
- # let's coerce to object
921
+ # let's coerce to object if the dtypes aren't
922
+ # categorical, otherwise coerce to the category
923
+ # dtype. If we coerced categories to object,
924
+ # then we would lose type information on some
925
+ # columns, and end up trying to merge
926
+ # incompatible dtypes. See GH 16900.
918
927
if name in self .left .columns :
928
+ typ = lk .categories .dtype if lk_is_cat else object
919
929
self .left = self .left .assign (
920
- ** {name : self .left [name ].astype (object )})
930
+ ** {name : self .left [name ].astype (typ )})
921
931
if name in self .right .columns :
932
+ typ = rk .categories .dtype if rk_is_cat else object
922
933
self .right = self .right .assign (
923
- ** {name : self .right [name ].astype (object )})
934
+ ** {name : self .right [name ].astype (typ )})
924
935
925
936
def _validate_specification (self ):
926
937
# Hm, any way to make this logic less complicated??
0 commit comments