|
23 | 23 | from pandas.core.groupby.groupby import DataError
|
24 | 24 |
|
25 | 25 |
|
| 26 | +@pytest.fixture |
| 27 | +def df_for_transformation_func(): |
| 28 | + return DataFrame( |
| 29 | + { |
| 30 | + "A": [121, 121, 121, 121, 231, 231, 676], |
| 31 | + "B": [1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0], |
| 32 | + } |
| 33 | + ) |
| 34 | + |
| 35 | + |
26 | 36 | def assert_fp_equal(a, b):
|
27 | 37 | assert (np.abs(a - b) < 1e-12).all()
|
28 | 38 |
|
@@ -1198,46 +1208,66 @@ def test_transform_lambda_indexing():
|
1198 | 1208 | tm.assert_frame_equal(result, expected)
|
1199 | 1209 |
|
1200 | 1210 |
|
1201 |
| -def test_transform_nan_tshift_corrwith(transformation_func): |
| 1211 | +def test_groupby_corrwith(transformation_func, df_for_transformation_func): |
1202 | 1212 |
|
1203 |
| - df1 = DataFrame( |
1204 |
| - { |
1205 |
| - "A": [121, 121, 121, 121, 231, 231, 676], |
1206 |
| - "B": [1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0], |
1207 |
| - } |
1208 |
| - ) |
1209 |
| - g1 = df1.groupby("A") |
| 1213 | + # GH 27905 |
| 1214 | + df = df_for_transformation_func.copy() |
| 1215 | + g = df.groupby("A") |
1210 | 1216 |
|
1211 | 1217 | if transformation_func == "corrwith":
|
1212 |
| - result = g1.corrwith(df1) |
| 1218 | + op = lambda x: getattr(x, transformation_func)(df) |
| 1219 | + result = op(g) |
1213 | 1220 | expected = pd.DataFrame(dict(B=[1, np.nan, np.nan], A=[np.nan] * 3))
|
1214 | 1221 | expected.index = pd.Index([121, 231, 676], name="A")
|
1215 | 1222 | tm.assert_frame_equal(result, expected)
|
1216 | 1223 |
|
| 1224 | + |
| 1225 | +def test_groupby_transform_nan(transformation_func, df_for_transformation_func): |
| 1226 | + |
| 1227 | + # GH 27905 |
| 1228 | + df = df_for_transformation_func.copy() |
| 1229 | + g = df.groupby("A") |
| 1230 | + |
1217 | 1231 | if transformation_func == "fillna":
|
1218 |
| - df3 = df1.copy() |
1219 |
| - df3["B"] = [1, np.nan, np.nan, 3, np.nan, 3, 4] |
1220 |
| - result = df3.groupby("A").transform(lambda x: x.fillna(x.mean())) |
1221 |
| - expected = pd.DataFrame({"B": [1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0]}) |
1222 |
| - tm.assert_frame_equal(result, expected) |
1223 | 1232 |
|
1224 |
| - result = df3.groupby("A").transform(transformation_func, value=1) |
| 1233 | + df["B"] = [1, np.nan, np.nan, 3, np.nan, 3, 4] |
| 1234 | + result = g.transform(transformation_func, value=1) |
1225 | 1235 | expected = pd.DataFrame({"B": [1.0, 1.0, 1.0, 3.0, 1.0, 3.0, 4.0]})
|
1226 | 1236 | tm.assert_frame_equal(result, expected)
|
| 1237 | + op = lambda x: getattr(x, transformation_func)(1) |
| 1238 | + result = op(g) |
| 1239 | + tm.assert_frame_equal(result, expected) |
| 1240 | + |
| 1241 | + |
| 1242 | +def test_groupby_transform_tshift(transformation_func, df_for_transformation_func): |
| 1243 | + |
| 1244 | + # GH 27905 |
| 1245 | + df = df_for_transformation_func.copy() |
| 1246 | + dt_periods = pd.date_range("2013-11-03", periods=7, freq="D") |
| 1247 | + df["C"] = dt_periods |
| 1248 | + g = df.set_index("C").groupby("A") |
1227 | 1249 |
|
1228 | 1250 | if transformation_func == "tshift":
|
1229 |
| - df2 = df1.copy() |
1230 |
| - dt_periods = pd.date_range("2013-11-03", periods=7, freq="D") |
1231 |
| - df2["C"] = dt_periods |
1232 |
| - result = df2.set_index("C").groupby("A").tshift(2, "D") |
1233 |
| - df2["C"] = dt_periods + dt_periods.freq * 2 |
1234 |
| - expected = df2 |
| 1251 | + |
| 1252 | + op = lambda x: getattr(x, transformation_func)(2, "D") |
| 1253 | + result = op(g) |
| 1254 | + df["C"] = dt_periods + dt_periods.freq * 2 |
| 1255 | + expected = df |
1235 | 1256 | tm.assert_frame_equal(
|
1236 | 1257 | result.reset_index().reindex(columns=["A", "B", "C"]), expected
|
1237 | 1258 | )
|
1238 | 1259 |
|
1239 | 1260 |
|
1240 | 1261 | def test_check_original_and_transformed_index(transformation_func):
|
| 1262 | + |
| 1263 | + # GH 27905 |
| 1264 | + df = DataFrame( |
| 1265 | + { |
| 1266 | + "A": [121, 121, 121, 121, 231, 231, 676], |
| 1267 | + "B": [1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0], |
| 1268 | + } |
| 1269 | + ) |
| 1270 | + |
1241 | 1271 | df = DataFrame({"A": [0, 0, 0, 1, 1, 1], "B": [0, 1, 2, 3, 4, 5]})
|
1242 | 1272 | g = df.groupby("A")
|
1243 | 1273 |
|
|
0 commit comments