Skip to content

Commit 7d8fd4b

Browse files
Licht-Ttm9k1
authored andcommitted
BUG: Fix combine_first converts other columns type into floats unexpectedly (pandas-dev#20965)
1 parent 1fd3501 commit 7d8fd4b

File tree

3 files changed

+29
-4
lines changed

3 files changed

+29
-4
lines changed

doc/source/whatsnew/v0.24.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -911,4 +911,4 @@ Other
911911
- :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax`` (:issue:`21548` and :issue:`21526`). ``NaN`` values are also handled properly.
912912
- Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`)
913913
- Checking PEP 3141 numbers in :func:`~pandas.api.types.is_scalar` function returns ``True`` (:issue:`22903`)
914-
-
914+
- Bug in :meth:`DataFrame.combine_first` in which column types were unexpectedly converted to float (:issue:`20699`)

pandas/core/frame.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -5095,9 +5095,17 @@ def combine(self, other, func, fill_value=None, overwrite=True):
50955095
series[this_mask] = fill_value
50965096
otherSeries[other_mask] = fill_value
50975097

5098-
# if we have different dtypes, possibly promote
5099-
new_dtype = this_dtype
5100-
if not is_dtype_equal(this_dtype, other_dtype):
5098+
if col not in self.columns:
5099+
# If self DataFrame does not have col in other DataFrame,
5100+
# try to promote series, which is all NaN, as other_dtype.
5101+
new_dtype = other_dtype
5102+
try:
5103+
series = series.astype(new_dtype, copy=False)
5104+
except ValueError:
5105+
# e.g. new_dtype is integer types
5106+
pass
5107+
else:
5108+
# if we have different dtypes, possibly promote
51015109
new_dtype = find_common_type([this_dtype, other_dtype])
51025110
if not is_dtype_equal(this_dtype, new_dtype):
51035111
series = series.astype(new_dtype)
@@ -5176,6 +5184,11 @@ def combiner(x, y, needs_i8_conversion=False):
51765184
else:
51775185
mask = isna(x_values)
51785186

5187+
# If the column y in other DataFrame is not in first DataFrame,
5188+
# just return y_values.
5189+
if y.name not in self.columns:
5190+
return y_values
5191+
51795192
return expressions.where(mask, y_values, x_values)
51805193

51815194
return self.combine(other, combiner, overwrite=False)

pandas/tests/frame/test_combine_concat.py

+12
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from datetime import datetime
66

7+
import pytest
78
import numpy as np
89
from numpy import nan
910

@@ -750,6 +751,17 @@ def test_combine_first_int(self):
750751
tm.assert_frame_equal(res, df1)
751752
assert res['a'].dtype == 'int64'
752753

754+
@pytest.mark.parametrize("val", [1, 1.0])
755+
def test_combine_first_with_asymmetric_other(self, val):
756+
# see gh-20699
757+
df1 = pd.DataFrame({'isNum': [val]})
758+
df2 = pd.DataFrame({'isBool': [True]})
759+
760+
res = df1.combine_first(df2)
761+
exp = pd.DataFrame({'isBool': [True], 'isNum': [val]})
762+
763+
tm.assert_frame_equal(res, exp)
764+
753765
def test_concat_datetime_datetime64_frame(self):
754766
# #2624
755767
rows = []

0 commit comments

Comments
 (0)