|
1 | 1 | # pylint: disable=E1103
|
2 | 2 |
|
3 |
| -import pytest |
4 |
| -from datetime import datetime, date |
5 |
| -from numpy.random import randn |
6 |
| -from numpy import nan |
7 |
| -import numpy as np |
8 | 3 | import random
|
9 | 4 | import re
|
| 5 | +from collections import OrderedDict |
| 6 | +from datetime import date, datetime |
| 7 | + |
| 8 | +import numpy as np |
| 9 | +import pytest |
| 10 | +from numpy import nan |
| 11 | +from numpy.random import randn |
10 | 12 |
|
11 | 13 | import pandas as pd
|
| 14 | +import pandas.util.testing as tm |
| 15 | +from pandas import (Categorical, CategoricalIndex, DataFrame, DatetimeIndex, |
| 16 | + Float64Index, Index, Int64Index, MultiIndex, RangeIndex, |
| 17 | + Series, UInt64Index) |
| 18 | +from pandas.api.types import CategoricalDtype as CDT |
12 | 19 | from pandas.compat import lrange, lzip
|
| 20 | +from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype |
| 21 | +from pandas.core.dtypes.dtypes import CategoricalDtype |
13 | 22 | from pandas.core.reshape.concat import concat
|
14 |
| -from pandas.core.reshape.merge import merge, MergeError |
| 23 | +from pandas.core.reshape.merge import MergeError, merge |
15 | 24 | from pandas.util.testing import assert_frame_equal, assert_series_equal
|
16 |
| -from pandas.core.dtypes.dtypes import CategoricalDtype |
17 |
| -from pandas.core.dtypes.common import ( |
18 |
| - is_categorical_dtype, |
19 |
| - is_object_dtype, |
20 |
| -) |
21 |
| -from pandas import DataFrame, Index, MultiIndex, Series, Categorical |
22 |
| -import pandas.util.testing as tm |
23 |
| -from pandas.api.types import CategoricalDtype as CDT |
24 | 25 |
|
25 | 26 | N = 50
|
26 | 27 | NGROUPS = 8
|
@@ -813,7 +814,7 @@ def test_validation(self):
|
813 | 814 |
|
814 | 815 | # Dups on right
|
815 | 816 | right_w_dups = right.append(pd.DataFrame({'a': ['e'], 'c': ['moo']},
|
816 |
| - index=[4])) |
| 817 | + index=[4])) |
817 | 818 | merge(left, right_w_dups, left_index=True, right_index=True,
|
818 | 819 | validate='one_to_many')
|
819 | 820 |
|
@@ -1388,17 +1389,24 @@ def test_merge_datetime_index(self, klass):
|
1388 | 1389 | if klass is not None:
|
1389 | 1390 | on_vector = klass(on_vector)
|
1390 | 1391 |
|
1391 |
| - expected = DataFrame({"a": [1, 2, 3]}) |
1392 |
| - |
1393 |
| - if klass == np.asarray: |
1394 |
| - # The join key is added for ndarray. |
1395 |
| - expected["key_1"] = [2016, 2017, 2018] |
| 1392 | + expected = DataFrame( |
| 1393 | + OrderedDict([ |
| 1394 | + ("a", [1, 2, 3]), |
| 1395 | + ("key_1", [2016, 2017, 2018]), |
| 1396 | + ]) |
| 1397 | + ) |
1396 | 1398 |
|
1397 | 1399 | result = df.merge(df, on=["a", on_vector], how="inner")
|
1398 | 1400 | tm.assert_frame_equal(result, expected)
|
1399 | 1401 |
|
1400 |
| - expected = DataFrame({"a_x": [1, 2, 3], |
1401 |
| - "a_y": [1, 2, 3]}) |
| 1402 | + expected = DataFrame( |
| 1403 | + OrderedDict([ |
| 1404 | + ("key_0", [2016, 2017, 2018]), |
| 1405 | + ("a_x", [1, 2, 3]), |
| 1406 | + ("a_y", [1, 2, 3]), |
| 1407 | + ]) |
| 1408 | + ) |
| 1409 | + |
1402 | 1410 | result = df.merge(df, on=[df.index.year], how="inner")
|
1403 | 1411 | tm.assert_frame_equal(result, expected)
|
1404 | 1412 |
|
@@ -1427,7 +1435,7 @@ def test_different(self, right_vals):
|
1427 | 1435 | # We allow merging on object and categorical cols and cast
|
1428 | 1436 | # categorical cols to object
|
1429 | 1437 | if (is_categorical_dtype(right['A'].dtype) or
|
1430 |
| - is_object_dtype(right['A'].dtype)): |
| 1438 | + is_object_dtype(right['A'].dtype)): |
1431 | 1439 | result = pd.merge(left, right, on='A')
|
1432 | 1440 | assert is_object_dtype(result.A.dtype)
|
1433 | 1441 |
|
@@ -1826,3 +1834,26 @@ def test_merge_on_indexes(self, left_df, right_df, how, sort, expected):
|
1826 | 1834 | how=how,
|
1827 | 1835 | sort=sort)
|
1828 | 1836 | tm.assert_frame_equal(result, expected)
|
| 1837 | + |
| 1838 | + |
| 1839 | +@pytest.mark.parametrize( |
| 1840 | + 'index', [ |
| 1841 | + CategoricalIndex(['A', 'B'], categories=['A', 'B'], name='index_col'), |
| 1842 | + Float64Index([1.0, 2.0], name='index_col'), |
| 1843 | + Int64Index([1, 2], name='index_col'), |
| 1844 | + UInt64Index([1, 2], name='index_col'), |
| 1845 | + RangeIndex(start=0, stop=2, name='index_col'), |
| 1846 | + DatetimeIndex(["2018-01-01", "2018-01-02"], name='index_col'), |
| 1847 | + ], ids=lambda x: type(x).__name__) |
| 1848 | +def test_merge_index_types(index): |
| 1849 | + # gh-20777 |
| 1850 | + # assert key access is consistent across index types |
| 1851 | + left = DataFrame({"left_data": [1, 2]}, index=index) |
| 1852 | + right = DataFrame({"right_data": [1.0, 2.0]}, index=index) |
| 1853 | + |
| 1854 | + result = left.merge(right, on=['index_col']) |
| 1855 | + |
| 1856 | + expected = DataFrame( |
| 1857 | + OrderedDict([('left_data', [1, 2]), ('right_data', [1.0, 2.0])]), |
| 1858 | + index=index) |
| 1859 | + assert_frame_equal(result, expected) |
0 commit comments