|
6 | 6 | from numpy import nan
|
7 | 7 | import numpy as np
|
8 | 8 | import random
|
| 9 | +import re |
9 | 10 |
|
10 | 11 | import pandas as pd
|
11 | 12 | from pandas.compat import lrange, lzip
|
@@ -1370,30 +1371,47 @@ def f():
|
1370 | 1371 | pytest.raises(NotImplementedError, f)
|
1371 | 1372 |
|
1372 | 1373 |
|
1373 |
| -@pytest.fixture |
1374 |
| -def df(): |
1375 |
| - return DataFrame( |
1376 |
| - {'A': ['foo', 'bar'], |
1377 |
| - 'B': Series(['foo', 'bar']).astype('category'), |
1378 |
| - 'C': [1, 2], |
1379 |
| - 'D': [1.0, 2.0], |
1380 |
| - 'E': Series([1, 2], dtype='uint64'), |
1381 |
| - 'F': Series([1, 2], dtype='int32')}) |
1382 |
| - |
1383 |
| - |
1384 | 1374 | class TestMergeDtypes(object):
|
1385 | 1375 |
|
1386 |
| - def test_different(self, df): |
1387 |
| - |
1388 |
| - # we expect differences by kind |
1389 |
| - # to be ok, while other differences should return object |
1390 |
| - |
1391 |
| - left = df |
1392 |
| - for col in df.columns: |
1393 |
| - right = DataFrame({'A': df[col]}) |
| 1376 | + @pytest.mark.parametrize('right_vals', [ |
| 1377 | + ['foo', 'bar'], |
| 1378 | + Series(['foo', 'bar']).astype('category'), |
| 1379 | + [1, 2], |
| 1380 | + [1.0, 2.0], |
| 1381 | + Series([1, 2], dtype='uint64'), |
| 1382 | + Series([1, 2], dtype='int32') |
| 1383 | + ] |
| 1384 | + ) |
| 1385 | + def test_different(self, right_vals): |
| 1386 | + |
| 1387 | + left = DataFrame({'A': ['foo', 'bar'], |
| 1388 | + 'B': Series(['foo', 'bar']).astype('category'), |
| 1389 | + 'C': [1, 2], |
| 1390 | + 'D': [1.0, 2.0], |
| 1391 | + 'E': Series([1, 2], dtype='uint64'), |
| 1392 | + 'F': Series([1, 2], dtype='int32')}) |
| 1393 | + right = DataFrame({'A': right_vals}) |
| 1394 | + |
| 1395 | + # GH 9780 |
| 1396 | + # We allow merging on object and categorical cols and cast |
| 1397 | + # categorical cols to object |
| 1398 | + if (is_categorical_dtype(right['A'].dtype) or |
| 1399 | + is_object_dtype(right['A'].dtype)): |
1394 | 1400 | result = pd.merge(left, right, on='A')
|
1395 | 1401 | assert is_object_dtype(result.A.dtype)
|
1396 | 1402 |
|
| 1403 | + # GH 9780 |
| 1404 | + # We raise for merging on object col and int/float col and |
| 1405 | + # merging on categorical col and int/float col |
| 1406 | + else: |
| 1407 | + msg = ("You are trying to merge on " |
| 1408 | + "{lk_dtype} and {rk_dtype} columns. " |
| 1409 | + "If you wish to proceed you should use " |
| 1410 | + "pd.concat".format(lk_dtype=left['A'].dtype, |
| 1411 | + rk_dtype=right['A'].dtype)) |
| 1412 | + with tm.assert_raises_regex(ValueError, msg): |
| 1413 | + pd.merge(left, right, on='A') |
| 1414 | + |
1397 | 1415 | @pytest.mark.parametrize('d1', [np.int64, np.int32,
|
1398 | 1416 | np.int16, np.int8, np.uint8])
|
1399 | 1417 | @pytest.mark.parametrize('d2', [np.int64, np.float64,
|
@@ -1462,6 +1480,42 @@ def test_merge_on_ints_floats_warning(self):
|
1462 | 1480 | result = B.merge(A, left_on='Y', right_on='X')
|
1463 | 1481 | assert_frame_equal(result, expected[['Y', 'X']])
|
1464 | 1482 |
|
| 1483 | + @pytest.mark.parametrize('df1_vals, df2_vals', [ |
| 1484 | + ([0, 1, 2], ["0", "1", "2"]), |
| 1485 | + ([0.0, 1.0, 2.0], ["0", "1", "2"]), |
| 1486 | + ([0, 1, 2], [u"0", u"1", u"2"]), |
| 1487 | + (pd.date_range('1/1/2011', periods=2, freq='D'), ['2011-01-01', |
| 1488 | + '2011-01-02']), |
| 1489 | + (pd.date_range('1/1/2011', periods=2, freq='D'), [0, 1]), |
| 1490 | + (pd.date_range('1/1/2011', periods=2, freq='D'), [0.0, 1.0]), |
| 1491 | + ([0, 1, 2], Series(['a', 'b', 'a']).astype('category')), |
| 1492 | + ([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category')), |
| 1493 | + ]) |
| 1494 | + def test_merge_incompat_dtypes(self, df1_vals, df2_vals): |
| 1495 | + # GH 9780 |
| 1496 | + # Raise a ValueError when a user tries to merge on |
| 1497 | + # dtypes that are incompatible (e.g., obj and int/float) |
| 1498 | + |
| 1499 | + df1 = DataFrame({'A': df1_vals}) |
| 1500 | + df2 = DataFrame({'A': df2_vals}) |
| 1501 | + |
| 1502 | + msg = ("You are trying to merge on {lk_dtype} and " |
| 1503 | + "{rk_dtype} columns. If you wish to proceed " |
| 1504 | + "you should use pd.concat".format(lk_dtype=df1['A'].dtype, |
| 1505 | + rk_dtype=df2['A'].dtype)) |
| 1506 | + msg = re.escape(msg) |
| 1507 | + with tm.assert_raises_regex(ValueError, msg): |
| 1508 | + pd.merge(df1, df2, on=['A']) |
| 1509 | + |
| 1510 | + # Check that error still raised when swapping order of dataframes |
| 1511 | + msg = ("You are trying to merge on {lk_dtype} and " |
| 1512 | + "{rk_dtype} columns. If you wish to proceed " |
| 1513 | + "you should use pd.concat".format(lk_dtype=df2['A'].dtype, |
| 1514 | + rk_dtype=df1['A'].dtype)) |
| 1515 | + msg = re.escape(msg) |
| 1516 | + with tm.assert_raises_regex(ValueError, msg): |
| 1517 | + pd.merge(df2, df1, on=['A']) |
| 1518 | + |
1465 | 1519 |
|
1466 | 1520 | @pytest.fixture
|
1467 | 1521 | def left():
|
|
0 commit comments