Skip to content

Commit 992dfbc

Browse files
BUG: regression in DataFrame.combine_first with integer columns (GH14687) (#14886)
1 parent 5f777f4 commit 992dfbc

File tree

3 files changed

+13
-5
lines changed

3 files changed

+13
-5
lines changed

doc/source/whatsnew/v0.19.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ Bug Fixes
7878
- Bug in clipboard functions on linux with python2 with unicode and separators (:issue:`13747`)
7979
- Bug in clipboard functions on Windows 10 and python 3 (:issue:`14362`, :issue:`12807`)
8080
- Bug in ``.to_clipboard()`` and Excel compat (:issue:`12529`)
81-
81+
- Bug in ``DataFrame.combine_first()`` for integer columns (:issue:`14687`).
8282

8383
- Bug in ``pd.read_csv()`` in which the ``dtype`` parameter was not being respected for empty data (:issue:`14712`)
8484
- Bug in ``pd.read_csv()`` in which the ``nrows`` parameter was not being respected for large input when using the C engine for parsing (:issue:`7626`)

pandas/core/frame.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -3665,10 +3665,8 @@ def combine(self, other, func, fill_value=None, overwrite=True):
36653665
otherSeries[other_mask] = fill_value
36663666

36673667
# if we have different dtypes, possibily promote
3668-
if notnull(series).all():
3669-
new_dtype = this_dtype
3670-
otherSeries = otherSeries.astype(new_dtype)
3671-
else:
3668+
new_dtype = this_dtype
3669+
if not is_dtype_equal(this_dtype, other_dtype):
36723670
new_dtype = _find_common_type([this_dtype, other_dtype])
36733671
if not is_dtype_equal(this_dtype, new_dtype):
36743672
series = series.astype(new_dtype)

pandas/tests/frame/test_combine_concat.py

+10
Original file line numberDiff line numberDiff line change
@@ -725,3 +725,13 @@ def test_combine_first_period(self):
725725
exp = pd.DataFrame({'P': exp_dts}, index=[1, 2, 3, 4, 5, 7])
726726
tm.assert_frame_equal(res, exp)
727727
self.assertEqual(res['P'].dtype, 'object')
728+
729+
def test_combine_first_int(self):
730+
# GH14687 - integer series that do no align exactly
731+
732+
df1 = pd.DataFrame({'a': [0, 1, 3, 5]}, dtype='int64')
733+
df2 = pd.DataFrame({'a': [1, 4]}, dtype='int64')
734+
735+
res = df1.combine_first(df2)
736+
tm.assert_frame_equal(res, df1)
737+
self.assertEqual(res['a'].dtype, 'int64')

0 commit comments

Comments
 (0)