Skip to content

Commit 4e98a7b

Browse files
reidy-pjreback
authored andcommitted
BUG: Keep float dtype in merge on int and float column (#18352)
1 parent 369df07 commit 4e98a7b

File tree

3 files changed

+69
-14
lines changed

3 files changed

+69
-14
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ Backwards incompatible API changes
5151
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5252

5353
- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`)
54-
-
54+
- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`)
5555
-
5656

5757

pandas/core/reshape/merge.py

+28-12
Original file line numberDiff line numberDiff line change
@@ -906,16 +906,31 @@ def _maybe_coerce_merge_keys(self):
906906
continue
907907

908908
# if we are numeric, then allow differing
909-
# kinds to proceed, eg. int64 and int8
909+
# kinds to proceed, eg. int64 and int8, int and float
910910
# further if we are object, but we infer to
911911
# the same, then proceed
912912
if is_numeric_dtype(lk) and is_numeric_dtype(rk):
913913
if lk.dtype.kind == rk.dtype.kind:
914-
continue
914+
pass
915+
916+
# check whether ints and floats
917+
elif is_integer_dtype(rk) and is_float_dtype(lk):
918+
if not (lk == lk.astype(rk.dtype)).all():
919+
warnings.warn('You are merging on int and float '
920+
'columns where the float values '
921+
'are not equal to their int '
922+
'representation', UserWarning)
923+
924+
elif is_float_dtype(rk) and is_integer_dtype(lk):
925+
if not (rk == rk.astype(lk.dtype)).all():
926+
warnings.warn('You are merging on int and float '
927+
'columns where the float values '
928+
'are not equal to their int '
929+
'representation', UserWarning)
915930

916931
# let's infer and see if we are ok
917-
if lib.infer_dtype(lk) == lib.infer_dtype(rk):
918-
continue
932+
elif lib.infer_dtype(lk) == lib.infer_dtype(rk):
933+
pass
919934

920935
# Houston, we have a problem!
921936
# let's coerce to object if the dtypes aren't
@@ -924,14 +939,15 @@ def _maybe_coerce_merge_keys(self):
924939
# then we would lose type information on some
925940
# columns, and end up trying to merge
926941
# incompatible dtypes. See GH 16900.
927-
if name in self.left.columns:
928-
typ = lk.categories.dtype if lk_is_cat else object
929-
self.left = self.left.assign(
930-
**{name: self.left[name].astype(typ)})
931-
if name in self.right.columns:
932-
typ = rk.categories.dtype if rk_is_cat else object
933-
self.right = self.right.assign(
934-
**{name: self.right[name].astype(typ)})
942+
else:
943+
if name in self.left.columns:
944+
typ = lk.categories.dtype if lk_is_cat else object
945+
self.left = self.left.assign(
946+
**{name: self.left[name].astype(typ)})
947+
if name in self.right.columns:
948+
typ = rk.categories.dtype if rk_is_cat else object
949+
self.right = self.right.assign(
950+
**{name: self.right[name].astype(typ)})
935951

936952
def _validate_specification(self):
937953
# Hm, any way to make this logic less complicated??

pandas/tests/reshape/test_merge.py

+40-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313
from pandas.core.reshape.merge import merge, MergeError
1414
from pandas.util.testing import assert_frame_equal, assert_series_equal
1515
from pandas.core.dtypes.dtypes import CategoricalDtype
16-
from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype
16+
from pandas.core.dtypes.common import (
17+
is_categorical_dtype,
18+
is_object_dtype,
19+
)
1720
from pandas import DataFrame, Index, MultiIndex, Series, Categorical
1821
import pandas.util.testing as tm
1922
from pandas.api.types import CategoricalDtype as CDT
@@ -1408,6 +1411,42 @@ def test_join_multi_dtypes(self, d1, d2):
14081411
expected.sort_values(['k1', 'k2'], kind='mergesort', inplace=True)
14091412
tm.assert_frame_equal(result, expected)
14101413

1414+
@pytest.mark.parametrize('int_vals, float_vals, exp_vals', [
1415+
([1, 2, 3], [1.0, 2.0, 3.0], {'X': [1, 2, 3], 'Y': [1.0, 2.0, 3.0]}),
1416+
([1, 2, 3], [1.0, 3.0], {'X': [1, 3], 'Y': [1.0, 3.0]}),
1417+
([1, 2], [1.0, 2.0, 3.0], {'X': [1, 2], 'Y': [1.0, 2.0]}),
1418+
])
1419+
def test_merge_on_ints_floats(self, int_vals, float_vals, exp_vals):
1420+
# GH 16572
1421+
# Check that float column is not cast to object if
1422+
# merging on float and int columns
1423+
A = DataFrame({'X': int_vals})
1424+
B = DataFrame({'Y': float_vals})
1425+
expected = DataFrame(exp_vals)
1426+
1427+
result = A.merge(B, left_on='X', right_on='Y')
1428+
assert_frame_equal(result, expected)
1429+
1430+
result = B.merge(A, left_on='Y', right_on='X')
1431+
assert_frame_equal(result, expected[['Y', 'X']])
1432+
1433+
def test_merge_on_ints_floats_warning(self):
1434+
# GH 16572
1435+
# merge will produce a warning when merging on int and
1436+
# float columns where the float values are not exactly
1437+
# equal to their int representation
1438+
A = DataFrame({'X': [1, 2, 3]})
1439+
B = DataFrame({'Y': [1.1, 2.5, 3.0]})
1440+
expected = DataFrame({'X': [3], 'Y': [3.0]})
1441+
1442+
with tm.assert_produces_warning(UserWarning):
1443+
result = A.merge(B, left_on='X', right_on='Y')
1444+
assert_frame_equal(result, expected)
1445+
1446+
with tm.assert_produces_warning(UserWarning):
1447+
result = B.merge(A, left_on='Y', right_on='X')
1448+
assert_frame_equal(result, expected[['Y', 'X']])
1449+
14111450

14121451
@pytest.fixture
14131452
def left():

0 commit comments

Comments
 (0)