|
1 | 1 | import numpy as np
|
2 | 2 | import pytest
|
3 | 3 |
|
| 4 | +import pandas.util._test_decorators as td |
| 5 | + |
4 | 6 | import pandas as pd
|
5 | 7 | from pandas import (
|
6 | 8 | DataFrame,
|
|
9 | 11 | RangeIndex,
|
10 | 12 | Series,
|
11 | 13 | Timestamp,
|
| 14 | + option_context, |
12 | 15 | )
|
13 | 16 | import pandas._testing as tm
|
14 | 17 | from pandas.core.reshape.concat import concat
|
@@ -88,67 +91,70 @@ def test_merge_on_multikey(self, left, right, join_type):
|
88 | 91 |
|
89 | 92 | tm.assert_frame_equal(result, expected)
|
90 | 93 |
|
91 |
| - @pytest.mark.parametrize("sort", [False, True]) |
92 |
| - def test_left_join_multi_index(self, sort): |
93 |
| - icols = ["1st", "2nd", "3rd"] |
| 94 | + @pytest.mark.parametrize( |
| 95 | + "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))] |
| 96 | + ) |
| 97 | + def test_left_join_multi_index(self, sort, infer_string): |
| 98 | + with option_context("future.infer_string", infer_string): |
| 99 | + icols = ["1st", "2nd", "3rd"] |
94 | 100 |
|
95 |
| - def bind_cols(df): |
96 |
| - iord = lambda a: 0 if a != a else ord(a) |
97 |
| - f = lambda ts: ts.map(iord) - ord("a") |
98 |
| - return f(df["1st"]) + f(df["3rd"]) * 1e2 + df["2nd"].fillna(0) * 10 |
| 101 | + def bind_cols(df): |
| 102 | + iord = lambda a: 0 if a != a else ord(a) |
| 103 | + f = lambda ts: ts.map(iord) - ord("a") |
| 104 | + return f(df["1st"]) + f(df["3rd"]) * 1e2 + df["2nd"].fillna(0) * 10 |
99 | 105 |
|
100 |
| - def run_asserts(left, right, sort): |
101 |
| - res = left.join(right, on=icols, how="left", sort=sort) |
| 106 | + def run_asserts(left, right, sort): |
| 107 | + res = left.join(right, on=icols, how="left", sort=sort) |
102 | 108 |
|
103 |
| - assert len(left) < len(res) + 1 |
104 |
| - assert not res["4th"].isna().any() |
105 |
| - assert not res["5th"].isna().any() |
| 109 | + assert len(left) < len(res) + 1 |
| 110 | + assert not res["4th"].isna().any() |
| 111 | + assert not res["5th"].isna().any() |
106 | 112 |
|
107 |
| - tm.assert_series_equal(res["4th"], -res["5th"], check_names=False) |
108 |
| - result = bind_cols(res.iloc[:, :-2]) |
109 |
| - tm.assert_series_equal(res["4th"], result, check_names=False) |
110 |
| - assert result.name is None |
| 113 | + tm.assert_series_equal(res["4th"], -res["5th"], check_names=False) |
| 114 | + result = bind_cols(res.iloc[:, :-2]) |
| 115 | + tm.assert_series_equal(res["4th"], result, check_names=False) |
| 116 | + assert result.name is None |
111 | 117 |
|
112 |
| - if sort: |
113 |
| - tm.assert_frame_equal(res, res.sort_values(icols, kind="mergesort")) |
| 118 | + if sort: |
| 119 | + tm.assert_frame_equal(res, res.sort_values(icols, kind="mergesort")) |
114 | 120 |
|
115 |
| - out = merge(left, right.reset_index(), on=icols, sort=sort, how="left") |
| 121 | + out = merge(left, right.reset_index(), on=icols, sort=sort, how="left") |
116 | 122 |
|
117 |
| - res.index = RangeIndex(len(res)) |
118 |
| - tm.assert_frame_equal(out, res) |
| 123 | + res.index = RangeIndex(len(res)) |
| 124 | + tm.assert_frame_equal(out, res) |
119 | 125 |
|
120 |
| - lc = list(map(chr, np.arange(ord("a"), ord("z") + 1))) |
121 |
| - left = DataFrame( |
122 |
| - np.random.default_rng(2).choice(lc, (50, 2)), columns=["1st", "3rd"] |
123 |
| - ) |
124 |
| - # Explicit cast to float to avoid implicit cast when setting nan |
125 |
| - left.insert( |
126 |
| - 1, |
127 |
| - "2nd", |
128 |
| - np.random.default_rng(2).integers(0, 10, len(left)).astype("float"), |
129 |
| - ) |
| 126 | + lc = list(map(chr, np.arange(ord("a"), ord("z") + 1))) |
| 127 | + left = DataFrame( |
| 128 | + np.random.default_rng(2).choice(lc, (50, 2)), columns=["1st", "3rd"] |
| 129 | + ) |
| 130 | + # Explicit cast to float to avoid implicit cast when setting nan |
| 131 | + left.insert( |
| 132 | + 1, |
| 133 | + "2nd", |
| 134 | + np.random.default_rng(2).integers(0, 10, len(left)).astype("float"), |
| 135 | + ) |
130 | 136 |
|
131 |
| - i = np.random.default_rng(2).permutation(len(left)) |
132 |
| - right = left.iloc[i].copy() |
| 137 | + i = np.random.default_rng(2).permutation(len(left)) |
| 138 | + right = left.iloc[i].copy() |
133 | 139 |
|
134 |
| - left["4th"] = bind_cols(left) |
135 |
| - right["5th"] = -bind_cols(right) |
136 |
| - right.set_index(icols, inplace=True) |
| 140 | + left["4th"] = bind_cols(left) |
| 141 | + right["5th"] = -bind_cols(right) |
| 142 | + right.set_index(icols, inplace=True) |
137 | 143 |
|
138 |
| - run_asserts(left, right, sort) |
| 144 | + run_asserts(left, right, sort) |
139 | 145 |
|
140 |
| - # inject some nulls |
141 |
| - left.loc[1::4, "1st"] = np.nan |
142 |
| - left.loc[2::5, "2nd"] = np.nan |
143 |
| - left.loc[3::6, "3rd"] = np.nan |
144 |
| - left["4th"] = bind_cols(left) |
| 146 | + # inject some nulls |
| 147 | + left.loc[1::4, "1st"] = np.nan |
| 148 | + left.loc[2::5, "2nd"] = np.nan |
| 149 | + left.loc[3::6, "3rd"] = np.nan |
| 150 | + left["4th"] = bind_cols(left) |
145 | 151 |
|
146 |
| - i = np.random.default_rng(2).permutation(len(left)) |
147 |
| - right = left.iloc[i, :-1] |
148 |
| - right["5th"] = -bind_cols(right) |
149 |
| - right.set_index(icols, inplace=True) |
| 152 | + i = np.random.default_rng(2).permutation(len(left)) |
| 153 | + right = left.iloc[i, :-1] |
| 154 | + right["5th"] = -bind_cols(right) |
| 155 | + right.set_index(icols, inplace=True) |
150 | 156 |
|
151 |
| - run_asserts(left, right, sort) |
| 157 | + run_asserts(left, right, sort) |
152 | 158 |
|
153 | 159 | @pytest.mark.parametrize("sort", [False, True])
|
154 | 160 | def test_merge_right_vs_left(self, left, right, sort):
|
|
0 commit comments