|
30 | 30 | from pandas.core.reshape.concat import concat
|
31 | 31 | from pandas.core.reshape.merge import MergeError, merge
|
32 | 32 | import pandas.util.testing as tm
|
33 |
| -from pandas.util.testing import assert_frame_equal, assert_series_equal |
| 33 | +from pandas.util.testing import ( |
| 34 | + assert_frame_equal, |
| 35 | + assert_index_equal, |
| 36 | + assert_series_equal, |
| 37 | +) |
34 | 38 |
|
35 | 39 | N = 50
|
36 | 40 | NGROUPS = 8
|
@@ -2094,3 +2098,179 @@ def test_merge_equal_cat_dtypes2():
|
2094 | 2098 |
|
2095 | 2099 | # Categorical is unordered, so don't check ordering.
|
2096 | 2100 | tm.assert_frame_equal(result, expected, check_categorical=False)
|
| 2101 | + |
| 2102 | + |
| 2103 | +@pytest.fixture( |
| 2104 | + params=[ |
| 2105 | + dict(domain=pd.Index(["A", "B", "C"])), |
| 2106 | + dict(domain=CategoricalIndex(["A", "B", "C"])), |
| 2107 | + dict(domain=DatetimeIndex(["2001-01-01", "2002-02-02", "2003-03-03"])), |
| 2108 | + dict(domain=Float64Index([1, 2, 3])), |
| 2109 | + dict(domain=Int64Index([1, 2, 3])), |
| 2110 | + dict(domain=IntervalIndex.from_tuples([(1, 2), (2, 3), (3, 4)])), |
| 2111 | + dict(domain=TimedeltaIndex(["1d", "2d", "3d"])), |
| 2112 | + dict(domain=PeriodIndex(["2001-01-01", "2001-01-02", "2001-01-03"], freq="D")), |
| 2113 | + ] |
| 2114 | +) |
| 2115 | +def fix_GH_28220_(request): |
| 2116 | + class Data: |
| 2117 | + def __init__(self): |
| 2118 | + self.domain = request.param["domain"] |
| 2119 | + self.X = pd.DataFrame({"count": [1, 2]}, index=self.domain.take([0, 1])) |
| 2120 | + self.Y = pd.DataFrame( |
| 2121 | + {"name": self.domain.take([0, 2]), "value": [100, 200]} |
| 2122 | + ) |
| 2123 | + self.Z = pd.DataFrame( |
| 2124 | + {"name": self.domain.take([0, 0, 2]), "value": [100, 200, 300]} |
| 2125 | + ) |
| 2126 | + self.E = pd.DataFrame(columns=["name", "value"]) |
| 2127 | + |
| 2128 | + assert isinstance(self.X.index, type(self.domain)) |
| 2129 | + |
| 2130 | + return Data() |
| 2131 | + |
| 2132 | + |
| 2133 | +@pytest.mark.parametrize( |
| 2134 | + "how,expected", |
| 2135 | + [ |
| 2136 | + ("left", ([0, -255], [0, 1, -255], [0, 1])), |
| 2137 | + ("inner", ([0], [0, 1], [])), |
| 2138 | + ("outer", ([0, -255, 1], [0, 1, -255, 2], [0, 1])), |
| 2139 | + ], |
| 2140 | +) |
| 2141 | +def test_left_index_merge_with_missing_by_right_on(fix_GH_28220_, how, expected): |
| 2142 | + |
| 2143 | + # GH 28220 |
| 2144 | + (e1, e2, e3) = map(lambda x: pd.Index(x), expected) |
| 2145 | + e3 = fix_GH_28220_.domain.take(e3) |
| 2146 | + |
| 2147 | + r1 = pd.merge( |
| 2148 | + fix_GH_28220_.X, |
| 2149 | + fix_GH_28220_.Y, |
| 2150 | + left_index=True, |
| 2151 | + right_on=["name"], |
| 2152 | + how=how, |
| 2153 | + index_na_value=-255, |
| 2154 | + ) |
| 2155 | + assert_index_equal(r1.index, e1) |
| 2156 | + |
| 2157 | + r2 = pd.merge( |
| 2158 | + fix_GH_28220_.X, |
| 2159 | + fix_GH_28220_.Z, |
| 2160 | + left_index=True, |
| 2161 | + right_on=["name"], |
| 2162 | + how=how, |
| 2163 | + index_na_value=-255, |
| 2164 | + ) |
| 2165 | + assert_index_equal(r2.index, e2) |
| 2166 | + |
| 2167 | + r3 = pd.merge( |
| 2168 | + fix_GH_28220_.X, |
| 2169 | + fix_GH_28220_.E, |
| 2170 | + left_index=True, |
| 2171 | + right_on=["name"], |
| 2172 | + how=how, |
| 2173 | + index_na_value=-255, |
| 2174 | + ) |
| 2175 | + |
| 2176 | + # special case when result is empty, dtype is object |
| 2177 | + if r3.empty: |
| 2178 | + e3 = pd.Index([], dtype=object, name=e3.name) |
| 2179 | + |
| 2180 | + assert_index_equal(r3.index, e3) |
| 2181 | + |
| 2182 | + |
| 2183 | +@pytest.mark.parametrize( |
| 2184 | + "how,expected", |
| 2185 | + [ |
| 2186 | + ("right", ([0, -255], [0, 0, -255], [0, 1, 2])), |
| 2187 | + ("inner", ([0], [0, 0], [])), |
| 2188 | + ("outer", ([0, 1, -255], [0, 0, 1, -255], [0, 1])), |
| 2189 | + ], |
| 2190 | +) |
| 2191 | +def test_left_on_merge_with_missing_by_right_index(fix_GH_28220_, how, expected): |
| 2192 | + |
| 2193 | + # GH 28220 |
| 2194 | + (e1, e2, e3) = map(lambda x: pd.Index(x), expected) |
| 2195 | + |
| 2196 | + r1 = pd.merge( |
| 2197 | + fix_GH_28220_.X.reset_index(), |
| 2198 | + fix_GH_28220_.Y.set_index("name"), |
| 2199 | + left_on=["index"], |
| 2200 | + right_index=True, |
| 2201 | + how=how, |
| 2202 | + index_na_value=-255, |
| 2203 | + ) |
| 2204 | + assert_index_equal(r1.index, e1) |
| 2205 | + |
| 2206 | + r2 = pd.merge( |
| 2207 | + fix_GH_28220_.X.reset_index(), |
| 2208 | + fix_GH_28220_.Z.set_index("name"), |
| 2209 | + left_on=["index"], |
| 2210 | + right_index=True, |
| 2211 | + how=how, |
| 2212 | + index_na_value=-255, |
| 2213 | + ) |
| 2214 | + assert_index_equal(r2.index, e2) |
| 2215 | + |
| 2216 | + r3 = pd.merge( |
| 2217 | + fix_GH_28220_.X.reset_index(), |
| 2218 | + fix_GH_28220_.E.set_index("name"), |
| 2219 | + left_on=["index"], |
| 2220 | + right_index=True, |
| 2221 | + how=how, |
| 2222 | + index_na_value=-255, |
| 2223 | + ) |
| 2224 | + |
| 2225 | + # special case when result is empty, dtype is object |
| 2226 | + if r3.empty: |
| 2227 | + e3 = pd.Index([], dtype=object, name=e3.name) |
| 2228 | + |
| 2229 | + assert_index_equal(r3.index, e3) |
| 2230 | + |
| 2231 | + |
| 2232 | +@pytest.mark.parametrize( |
| 2233 | + "how,expected", |
| 2234 | + [ |
| 2235 | + ("left", ([0, 1], [0, 1, 2], [0, 1])), |
| 2236 | + ("right", ([0, 1], [0, 1, 2], [0, 2])), |
| 2237 | + ("inner", ([0], [0, 1], [])), |
| 2238 | + ("outer", ([0, 1, 2], [0, 1, 2, 3], [0, 1])), |
| 2239 | + ], |
| 2240 | +) |
| 2241 | +def test_left_on_merge_with_missing_by_right_on(fix_GH_28220_, how, expected): |
| 2242 | + |
| 2243 | + # GH 28220 |
| 2244 | + (e1, e2, e3) = map(lambda x: pd.Index(x), expected) |
| 2245 | + |
| 2246 | + r1 = pd.merge( |
| 2247 | + fix_GH_28220_.X.reset_index(), |
| 2248 | + fix_GH_28220_.Y, |
| 2249 | + left_on=["index"], |
| 2250 | + right_on=["name"], |
| 2251 | + how=how, |
| 2252 | + ) |
| 2253 | + assert_index_equal(r1.index, e1) |
| 2254 | + |
| 2255 | + r2 = pd.merge( |
| 2256 | + fix_GH_28220_.X.reset_index(), |
| 2257 | + fix_GH_28220_.Z, |
| 2258 | + left_on=["index"], |
| 2259 | + right_on=["name"], |
| 2260 | + how=how, |
| 2261 | + ) |
| 2262 | + assert_index_equal(r2.index, e2) |
| 2263 | + |
| 2264 | + r3 = pd.merge( |
| 2265 | + fix_GH_28220_.X.reset_index(), |
| 2266 | + fix_GH_28220_.E, |
| 2267 | + left_on=["index"], |
| 2268 | + right_on=["name"], |
| 2269 | + how=how, |
| 2270 | + ) |
| 2271 | + |
| 2272 | + # special case when result is empty, dtype is object |
| 2273 | + if r3.empty: |
| 2274 | + e3 = pd.Index([], dtype=object, name=e3.name) |
| 2275 | + |
| 2276 | + assert_index_equal(r3.index, e3) |
0 commit comments