|
6 | 6 | from numpy import nan
|
7 | 7 | import numpy as np
|
8 | 8 | import random
|
| 9 | +import re |
9 | 10 |
|
10 | 11 | import pandas as pd
|
11 | 12 | from pandas.compat import lrange, lzip
|
@@ -1385,14 +1386,27 @@ class TestMergeDtypes(object):
|
1385 | 1386 |
|
1386 | 1387 | def test_different(self, df):
|
1387 | 1388 |
|
1388 |
| - # we expect differences by kind |
1389 |
| - # to be ok, while other differences should return object |
1390 |
| - |
1391 | 1389 | left = df
|
1392 | 1390 | for col in df.columns:
|
1393 | 1391 | right = DataFrame({'A': df[col]})
|
1394 |
| - result = pd.merge(left, right, on='A') |
1395 |
| - assert is_object_dtype(result.A.dtype) |
| 1392 | + # GH 9780 |
| 1393 | + # We allow merging on object and categorical cols and cast |
| 1394 | + # categorical cols to object |
| 1395 | + if (is_categorical_dtype(right['A'].dtype) or |
| 1396 | + is_object_dtype(right['A'].dtype)): |
| 1397 | + result = pd.merge(left, right, on='A') |
| 1398 | + assert is_object_dtype(result.A.dtype) |
| 1399 | + # GH 9780 |
| 1400 | + # We raise for merging on object col and int/float col and |
| 1401 | + # merging on categorical col and int/float col |
| 1402 | + else: |
| 1403 | + msg = ("You are trying to merge on " |
| 1404 | + "{lk_dtype} and {rk_dtype} columns. " |
| 1405 | + "If you wish to proceed you should use " |
| 1406 | + "pd.concat".format(lk_dtype=left['A'].dtype, |
| 1407 | + rk_dtype=right['A'].dtype)) |
| 1408 | + with tm.assert_raises_regex(ValueError, msg): |
| 1409 | + pd.merge(left, right, on='A') |
1396 | 1410 |
|
1397 | 1411 | @pytest.mark.parametrize('d1', [np.int64, np.int32,
|
1398 | 1412 | np.int16, np.int8, np.uint8])
|
@@ -1462,6 +1476,42 @@ def test_merge_on_ints_floats_warning(self):
|
1462 | 1476 | result = B.merge(A, left_on='Y', right_on='X')
|
1463 | 1477 | assert_frame_equal(result, expected[['Y', 'X']])
|
1464 | 1478 |
|
| 1479 | + @pytest.mark.parametrize('df1_vals, df2_vals', [ |
| 1480 | + ([0, 1, 2], ["0", "1", "2"]), |
| 1481 | + ([0.0, 1.0, 2.0], ["0", "1", "2"]), |
| 1482 | + ([0, 1, 2], [u"0", u"1", u"2"]), |
| 1483 | + (pd.date_range('1/1/2011', periods=2, freq='D'), ['2011-01-01', |
| 1484 | + '2011-01-02']), |
| 1485 | + (pd.date_range('1/1/2011', periods=2, freq='D'), [0, 1]), |
| 1486 | + (pd.date_range('1/1/2011', periods=2, freq='D'), [0.0, 1.0]), |
| 1487 | + ([0, 1, 2], Series(['a', 'b', 'a']).astype('category')), |
| 1488 | + ([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category')), |
| 1489 | + ]) |
| 1490 | + def test_merge_incompat_dtypes(self, df1_vals, df2_vals): |
| 1491 | + # GH 9780 |
| 1492 | + # Raise a ValueError when a user tries to merge on |
| 1493 | + # dtypes that are incompatible (e.g., obj and int/float) |
| 1494 | + |
| 1495 | + df1 = DataFrame({'A': df1_vals}) |
| 1496 | + df2 = DataFrame({'A': df2_vals}) |
| 1497 | + |
| 1498 | + msg = ("You are trying to merge on {lk_dtype} and " |
| 1499 | + "{rk_dtype} columns. If you wish to proceed " |
| 1500 | + "you should use pd.concat".format(lk_dtype=df1['A'].dtype, |
| 1501 | + rk_dtype=df2['A'].dtype)) |
| 1502 | + msg = re.escape(msg) |
| 1503 | + with tm.assert_raises_regex(ValueError, msg): |
| 1504 | + pd.merge(df1, df2, on=['A']) |
| 1505 | + |
| 1506 | + # Check that error still raised when swapping order of dataframes |
| 1507 | + msg = ("You are trying to merge on {lk_dtype} and " |
| 1508 | + "{rk_dtype} columns. If you wish to proceed " |
| 1509 | + "you should use pd.concat".format(lk_dtype=df2['A'].dtype, |
| 1510 | + rk_dtype=df1['A'].dtype)) |
| 1511 | + msg = re.escape(msg) |
| 1512 | + with tm.assert_raises_regex(ValueError, msg): |
| 1513 | + pd.merge(df2, df1, on=['A']) |
| 1514 | + |
1465 | 1515 |
|
1466 | 1516 | @pytest.fixture
|
1467 | 1517 | def left():
|
|
0 commit comments