|
29 | 29 | from pandas.core.reshape.concat import concat
|
30 | 30 | from pandas.core.reshape.merge import MergeError, merge
|
31 | 31 | import pandas.util.testing as tm
|
32 |
| -from pandas.util.testing import assert_frame_equal, assert_series_equal |
| 32 | +from pandas.util.testing import ( |
| 33 | + assert_frame_equal, |
| 34 | + assert_index_equal, |
| 35 | + assert_series_equal, |
| 36 | +) |
33 | 37 |
|
34 | 38 | N = 50
|
35 | 39 | NGROUPS = 8
|
@@ -2088,7 +2092,6 @@ def test_merge_equal_cat_dtypes2():
|
2088 | 2092 | # Categorical is unordered, so don't check ordering.
|
2089 | 2093 | tm.assert_frame_equal(result, expected, check_categorical=False)
|
2090 | 2094 |
|
2091 |
| - |
2092 | 2095 | def test_merge_on_cat_and_ext_array():
|
2093 | 2096 | # GH 28668
|
2094 | 2097 | right = DataFrame(
|
@@ -2131,3 +2134,178 @@ def test_merge_multiindex_columns():
|
2131 | 2134 | expected["id"] = ""
|
2132 | 2135 |
|
2133 | 2136 | tm.assert_frame_equal(result, expected)
|
| 2137 | + |
| 2138 | +@pytest.fixture( |
| 2139 | + params=[ |
| 2140 | + dict(domain=pd.Index(["A", "B", "C"])), |
| 2141 | + dict(domain=CategoricalIndex(["A", "B", "C"])), |
| 2142 | + dict(domain=DatetimeIndex(["2001-01-01", "2002-02-02", "2003-03-03"])), |
| 2143 | + dict(domain=Float64Index([1, 2, 3])), |
| 2144 | + dict(domain=Int64Index([1, 2, 3])), |
| 2145 | + dict(domain=IntervalIndex.from_tuples([(1, 2), (2, 3), (3, 4)])), |
| 2146 | + dict(domain=TimedeltaIndex(["1d", "2d", "3d"])), |
| 2147 | + dict(domain=PeriodIndex(["2001-01-01", "2001-01-02", "2001-01-03"], freq="D")), |
| 2148 | + ] |
| 2149 | +) |
| 2150 | +def fix_GH_28220_(request): |
| 2151 | + class Data: |
| 2152 | + def __init__(self): |
| 2153 | + self.domain = request.param["domain"] |
| 2154 | + self.X = pd.DataFrame({"count": [1, 2]}, index=self.domain.take([0, 1])) |
| 2155 | + self.Y = pd.DataFrame( |
| 2156 | + {"name": self.domain.take([0, 2]), "value": [100, 200]} |
| 2157 | + ) |
| 2158 | + self.Z = pd.DataFrame( |
| 2159 | + {"name": self.domain.take([0, 0, 2]), "value": [100, 200, 300]} |
| 2160 | + ) |
| 2161 | + self.E = pd.DataFrame(columns=["name", "value"]) |
| 2162 | + |
| 2163 | + assert isinstance(self.X.index, type(self.domain)) |
| 2164 | + |
| 2165 | + return Data() |
| 2166 | + |
| 2167 | + |
| 2168 | +@pytest.mark.parametrize( |
| 2169 | + "how,expected", |
| 2170 | + [ |
| 2171 | + ("left", ([0, -255], [0, 1, -255], [0, 1])), |
| 2172 | + ("inner", ([0], [0, 1], [])), |
| 2173 | + ("outer", ([0, -255, 1], [0, 1, -255, 2], [0, 1])), |
| 2174 | + ], |
| 2175 | +) |
| 2176 | +def test_left_index_merge_with_missing_by_right_on(fix_GH_28220_, how, expected): |
| 2177 | + |
| 2178 | + # GH 28220 |
| 2179 | + (e1, e2, e3) = map(lambda x: pd.Index(x), expected) |
| 2180 | + e3 = fix_GH_28220_.domain.take(e3) |
| 2181 | + |
| 2182 | + r1 = pd.merge( |
| 2183 | + fix_GH_28220_.X, |
| 2184 | + fix_GH_28220_.Y, |
| 2185 | + left_index=True, |
| 2186 | + right_on=["name"], |
| 2187 | + how=how, |
| 2188 | + index_na_value=-255, |
| 2189 | + ) |
| 2190 | + assert_index_equal(r1.index, e1) |
| 2191 | + |
| 2192 | + r2 = pd.merge( |
| 2193 | + fix_GH_28220_.X, |
| 2194 | + fix_GH_28220_.Z, |
| 2195 | + left_index=True, |
| 2196 | + right_on=["name"], |
| 2197 | + how=how, |
| 2198 | + index_na_value=-255, |
| 2199 | + ) |
| 2200 | + assert_index_equal(r2.index, e2) |
| 2201 | + |
| 2202 | + r3 = pd.merge( |
| 2203 | + fix_GH_28220_.X, |
| 2204 | + fix_GH_28220_.E, |
| 2205 | + left_index=True, |
| 2206 | + right_on=["name"], |
| 2207 | + how=how, |
| 2208 | + index_na_value=-255, |
| 2209 | + ) |
| 2210 | + |
| 2211 | + # special case when result is empty, dtype is object |
| 2212 | + if r3.empty: |
| 2213 | + e3 = pd.Index([], dtype=object, name=e3.name) |
| 2214 | + |
| 2215 | + assert_index_equal(r3.index, e3) |
| 2216 | + |
| 2217 | + |
| 2218 | +@pytest.mark.parametrize( |
| 2219 | + "how,expected", |
| 2220 | + [ |
| 2221 | + ("right", ([0, -255], [0, 0, -255], [0, 1, 2])), |
| 2222 | + ("inner", ([0], [0, 0], [])), |
| 2223 | + ("outer", ([0, 1, -255], [0, 0, 1, -255], [0, 1])), |
| 2224 | + ], |
| 2225 | +) |
| 2226 | +def test_left_on_merge_with_missing_by_right_index(fix_GH_28220_, how, expected): |
| 2227 | + |
| 2228 | + # GH 28220 |
| 2229 | + (e1, e2, e3) = map(lambda x: pd.Index(x), expected) |
| 2230 | + |
| 2231 | + r1 = pd.merge( |
| 2232 | + fix_GH_28220_.X.reset_index(), |
| 2233 | + fix_GH_28220_.Y.set_index("name"), |
| 2234 | + left_on=["index"], |
| 2235 | + right_index=True, |
| 2236 | + how=how, |
| 2237 | + index_na_value=-255, |
| 2238 | + ) |
| 2239 | + assert_index_equal(r1.index, e1) |
| 2240 | + |
| 2241 | + r2 = pd.merge( |
| 2242 | + fix_GH_28220_.X.reset_index(), |
| 2243 | + fix_GH_28220_.Z.set_index("name"), |
| 2244 | + left_on=["index"], |
| 2245 | + right_index=True, |
| 2246 | + how=how, |
| 2247 | + index_na_value=-255, |
| 2248 | + ) |
| 2249 | + assert_index_equal(r2.index, e2) |
| 2250 | + |
| 2251 | + r3 = pd.merge( |
| 2252 | + fix_GH_28220_.X.reset_index(), |
| 2253 | + fix_GH_28220_.E.set_index("name"), |
| 2254 | + left_on=["index"], |
| 2255 | + right_index=True, |
| 2256 | + how=how, |
| 2257 | + index_na_value=-255, |
| 2258 | + ) |
| 2259 | + |
| 2260 | + # special case when result is empty, dtype is object |
| 2261 | + if r3.empty: |
| 2262 | + e3 = pd.Index([], dtype=object, name=e3.name) |
| 2263 | + |
| 2264 | + assert_index_equal(r3.index, e3) |
| 2265 | + |
| 2266 | + |
| 2267 | +@pytest.mark.parametrize( |
| 2268 | + "how,expected", |
| 2269 | + [ |
| 2270 | + ("left", ([0, 1], [0, 1, 2], [0, 1])), |
| 2271 | + ("right", ([0, 1], [0, 1, 2], [0, 2])), |
| 2272 | + ("inner", ([0], [0, 1], [])), |
| 2273 | + ("outer", ([0, 1, 2], [0, 1, 2, 3], [0, 1])), |
| 2274 | + ], |
| 2275 | +) |
| 2276 | +def test_left_on_merge_with_missing_by_right_on(fix_GH_28220_, how, expected): |
| 2277 | + |
| 2278 | + # GH 28220 |
| 2279 | + (e1, e2, e3) = map(lambda x: pd.Index(x), expected) |
| 2280 | + |
| 2281 | + r1 = pd.merge( |
| 2282 | + fix_GH_28220_.X.reset_index(), |
| 2283 | + fix_GH_28220_.Y, |
| 2284 | + left_on=["index"], |
| 2285 | + right_on=["name"], |
| 2286 | + how=how, |
| 2287 | + ) |
| 2288 | + assert_index_equal(r1.index, e1) |
| 2289 | + |
| 2290 | + r2 = pd.merge( |
| 2291 | + fix_GH_28220_.X.reset_index(), |
| 2292 | + fix_GH_28220_.Z, |
| 2293 | + left_on=["index"], |
| 2294 | + right_on=["name"], |
| 2295 | + how=how, |
| 2296 | + ) |
| 2297 | + assert_index_equal(r2.index, e2) |
| 2298 | + |
| 2299 | + r3 = pd.merge( |
| 2300 | + fix_GH_28220_.X.reset_index(), |
| 2301 | + fix_GH_28220_.E, |
| 2302 | + left_on=["index"], |
| 2303 | + right_on=["name"], |
| 2304 | + how=how, |
| 2305 | + ) |
| 2306 | + |
| 2307 | + # special case when result is empty, dtype is object |
| 2308 | + if r3.empty: |
| 2309 | + e3 = pd.Index([], dtype=object, name=e3.name) |
| 2310 | + |
| 2311 | + assert_index_equal(r3.index, e3) |
0 commit comments