|
1 | 1 | import numpy as np
|
2 | 2 | import pytest
|
3 | 3 |
|
| 4 | +import pandas.util._test_decorators as td |
| 5 | + |
4 | 6 | import pandas as pd
|
5 | 7 | from pandas import (
|
6 | 8 | DataFrame,
|
|
9 | 11 | RangeIndex,
|
10 | 12 | Series,
|
11 | 13 | Timestamp,
|
| 14 | + option_context, |
12 | 15 | )
|
13 | 16 | import pandas._testing as tm
|
14 | 17 | from pandas.core.reshape.concat import concat
|
@@ -88,64 +91,68 @@ def test_merge_on_multikey(self, left, right, join_type):
|
88 | 91 |
|
89 | 92 | tm.assert_frame_equal(result, expected)
|
90 | 93 |
|
91 |
| - def test_left_join_multi_index(self, sort): |
92 |
| - icols = ["1st", "2nd", "3rd"] |
| 94 | + @pytest.mark.parametrize( |
| 95 | + "infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))] |
| 96 | + ) |
| 97 | + def test_left_join_multi_index(self, sort, infer_string): |
| 98 | + with option_context("future.infer_string", infer_string): |
| 99 | + icols = ["1st", "2nd", "3rd"] |
93 | 100 |
|
94 |
| - def bind_cols(df): |
95 |
| - iord = lambda a: 0 if a != a else ord(a) |
96 |
| - f = lambda ts: ts.map(iord) - ord("a") |
97 |
| - return f(df["1st"]) + f(df["3rd"]) * 1e2 + df["2nd"].fillna(0) * 10 |
| 101 | + def bind_cols(df): |
| 102 | + iord = lambda a: 0 if a != a else ord(a) |
| 103 | + f = lambda ts: ts.map(iord) - ord("a") |
| 104 | + return f(df["1st"]) + f(df["3rd"]) * 1e2 + df["2nd"].fillna(0) * 10 |
98 | 105 |
|
99 |
| - def run_asserts(left, right, sort): |
100 |
| - res = left.join(right, on=icols, how="left", sort=sort) |
| 106 | + def run_asserts(left, right, sort): |
| 107 | + res = left.join(right, on=icols, how="left", sort=sort) |
101 | 108 |
|
102 |
| - assert len(left) < len(res) + 1 |
103 |
| - assert not res["4th"].isna().any() |
104 |
| - assert not res["5th"].isna().any() |
| 109 | + assert len(left) < len(res) + 1 |
| 110 | + assert not res["4th"].isna().any() |
| 111 | + assert not res["5th"].isna().any() |
105 | 112 |
|
106 |
| - tm.assert_series_equal(res["4th"], -res["5th"], check_names=False) |
107 |
| - result = bind_cols(res.iloc[:, :-2]) |
108 |
| - tm.assert_series_equal(res["4th"], result, check_names=False) |
109 |
| - assert result.name is None |
| 113 | + tm.assert_series_equal(res["4th"], -res["5th"], check_names=False) |
| 114 | + result = bind_cols(res.iloc[:, :-2]) |
| 115 | + tm.assert_series_equal(res["4th"], result, check_names=False) |
| 116 | + assert result.name is None |
110 | 117 |
|
111 |
| - if sort: |
112 |
| - tm.assert_frame_equal(res, res.sort_values(icols, kind="mergesort")) |
| 118 | + if sort: |
| 119 | + tm.assert_frame_equal(res, res.sort_values(icols, kind="mergesort")) |
113 | 120 |
|
114 |
| - out = merge(left, right.reset_index(), on=icols, sort=sort, how="left") |
| 121 | + out = merge(left, right.reset_index(), on=icols, sort=sort, how="left") |
115 | 122 |
|
116 |
| - res.index = RangeIndex(len(res)) |
117 |
| - tm.assert_frame_equal(out, res) |
| 123 | + res.index = RangeIndex(len(res)) |
| 124 | + tm.assert_frame_equal(out, res) |
118 | 125 |
|
119 |
| - lc = list(map(chr, np.arange(ord("a"), ord("z") + 1))) |
120 |
| - left = DataFrame( |
121 |
| - np.random.default_rng(2).choice(lc, (50, 2)), columns=["1st", "3rd"] |
122 |
| - ) |
123 |
| - # Explicit cast to float to avoid implicit cast when setting nan |
124 |
| - left.insert( |
125 |
| - 1, |
126 |
| - "2nd", |
127 |
| - np.random.default_rng(2).integers(0, 10, len(left)).astype("float"), |
128 |
| - ) |
129 |
| - right = left.sample(frac=1, random_state=np.random.default_rng(2)) |
| 126 | + lc = list(map(chr, np.arange(ord("a"), ord("z") + 1))) |
| 127 | + left = DataFrame( |
| 128 | + np.random.default_rng(2).choice(lc, (50, 2)), columns=["1st", "3rd"] |
| 129 | + ) |
| 130 | + # Explicit cast to float to avoid implicit cast when setting nan |
| 131 | + left.insert( |
| 132 | + 1, |
| 133 | + "2nd", |
| 134 | + np.random.default_rng(2).integers(0, 10, len(left)).astype("float"), |
| 135 | + ) |
| 136 | + right = left.sample(frac=1, random_state=np.random.default_rng(2)) |
130 | 137 |
|
131 |
| - left["4th"] = bind_cols(left) |
132 |
| - right["5th"] = -bind_cols(right) |
133 |
| - right.set_index(icols, inplace=True) |
| 138 | + left["4th"] = bind_cols(left) |
| 139 | + right["5th"] = -bind_cols(right) |
| 140 | + right.set_index(icols, inplace=True) |
134 | 141 |
|
135 |
| - run_asserts(left, right, sort) |
| 142 | + run_asserts(left, right, sort) |
136 | 143 |
|
137 |
| - # inject some nulls |
138 |
| - left.loc[1::4, "1st"] = np.nan |
139 |
| - left.loc[2::5, "2nd"] = np.nan |
140 |
| - left.loc[3::6, "3rd"] = np.nan |
141 |
| - left["4th"] = bind_cols(left) |
| 144 | + # inject some nulls |
| 145 | + left.loc[1::4, "1st"] = np.nan |
| 146 | + left.loc[2::5, "2nd"] = np.nan |
| 147 | + left.loc[3::6, "3rd"] = np.nan |
| 148 | + left["4th"] = bind_cols(left) |
142 | 149 |
|
143 |
| - i = np.random.default_rng(2).permutation(len(left)) |
144 |
| - right = left.iloc[i, :-1] |
145 |
| - right["5th"] = -bind_cols(right) |
146 |
| - right.set_index(icols, inplace=True) |
| 150 | + i = np.random.default_rng(2).permutation(len(left)) |
| 151 | + right = left.iloc[i, :-1] |
| 152 | + right["5th"] = -bind_cols(right) |
| 153 | + right.set_index(icols, inplace=True) |
147 | 154 |
|
148 |
| - run_asserts(left, right, sort) |
| 155 | + run_asserts(left, right, sort) |
149 | 156 |
|
150 | 157 | def test_merge_right_vs_left(self, left, right, sort):
|
151 | 158 | # compare left vs right merge with multikey
|
|
0 commit comments