|
12 | 12 | from pandas import (
|
13 | 13 | Categorical,
|
14 | 14 | DataFrame,
|
| 15 | + Index, |
15 | 16 | MultiIndex,
|
16 | 17 | Series,
|
17 | 18 | Timestamp,
|
@@ -1083,10 +1084,12 @@ def test_any_all_bool_only(self):
|
1083 | 1084 | pytest.param(np.any, {"A": Series([0, 1], dtype="m8[ns]")}, True),
|
1084 | 1085 | pytest.param(np.all, {"A": Series([1, 2], dtype="m8[ns]")}, True),
|
1085 | 1086 | pytest.param(np.any, {"A": Series([1, 2], dtype="m8[ns]")}, True),
|
1086 |
| - (np.all, {"A": Series([0, 1], dtype="category")}, False), |
1087 |
| - (np.any, {"A": Series([0, 1], dtype="category")}, True), |
| 1087 | + # np.all on Categorical raises, so the reduction drops the |
| 1088 | + # column, so all is being done on an empty Series, so is True |
| 1089 | + (np.all, {"A": Series([0, 1], dtype="category")}, True), |
| 1090 | + (np.any, {"A": Series([0, 1], dtype="category")}, False), |
1088 | 1091 | (np.all, {"A": Series([1, 2], dtype="category")}, True),
|
1089 |
| - (np.any, {"A": Series([1, 2], dtype="category")}, True), |
| 1092 | + (np.any, {"A": Series([1, 2], dtype="category")}, False), |
1090 | 1093 | # Mix GH#21484
|
1091 | 1094 | pytest.param(
|
1092 | 1095 | np.all,
|
@@ -1308,6 +1311,114 @@ def test_frame_any_with_timedelta(self):
|
1308 | 1311 | tm.assert_series_equal(result, expected)
|
1309 | 1312 |
|
1310 | 1313 |
|
| 1314 | +class TestNuisanceColumns: |
| 1315 | + @pytest.mark.parametrize("method", ["any", "all"]) |
| 1316 | + def test_any_all_categorical_dtype_nuisance_column(self, method): |
| 1317 | + # GH#36076 DataFrame should match Series behavior |
| 1318 | + ser = Series([0, 1], dtype="category", name="A") |
| 1319 | + df = ser.to_frame() |
| 1320 | + |
| 1321 | + # Double-check the Series behavior is to raise |
| 1322 | + with pytest.raises(TypeError, match="does not implement reduction"): |
| 1323 | + getattr(ser, method)() |
| 1324 | + |
| 1325 | + with pytest.raises(TypeError, match="does not implement reduction"): |
| 1326 | + getattr(np, method)(ser) |
| 1327 | + |
| 1328 | + with pytest.raises(TypeError, match="does not implement reduction"): |
| 1329 | + getattr(df, method)(bool_only=False) |
| 1330 | + |
| 1331 | + # With bool_only=None, operating on this column raises and is ignored, |
| 1332 | + # so we expect an empty result. |
| 1333 | + result = getattr(df, method)(bool_only=None) |
| 1334 | + expected = Series([], index=Index([]), dtype=bool) |
| 1335 | + tm.assert_series_equal(result, expected) |
| 1336 | + |
| 1337 | + result = getattr(np, method)(df, axis=0) |
| 1338 | + tm.assert_series_equal(result, expected) |
| 1339 | + |
| 1340 | + def test_median_categorical_dtype_nuisance_column(self): |
| 1341 | + # GH#21020 DataFrame.median should match Series.median |
| 1342 | + df = DataFrame({"A": Categorical([1, 2, 2, 2, 3])}) |
| 1343 | + ser = df["A"] |
| 1344 | + |
| 1345 | + # Double-check the Series behavior is to raise |
| 1346 | + with pytest.raises(TypeError, match="does not implement reduction"): |
| 1347 | + ser.median() |
| 1348 | + |
| 1349 | + with pytest.raises(TypeError, match="does not implement reduction"): |
| 1350 | + df.median(numeric_only=False) |
| 1351 | + |
| 1352 | + result = df.median() |
| 1353 | + expected = Series([], index=Index([]), dtype=np.float64) |
| 1354 | + tm.assert_series_equal(result, expected) |
| 1355 | + |
| 1356 | + # same thing, but with an additional non-categorical column |
| 1357 | + df["B"] = df["A"].astype(int) |
| 1358 | + |
| 1359 | + with pytest.raises(TypeError, match="does not implement reduction"): |
| 1360 | + df.median(numeric_only=False) |
| 1361 | + |
| 1362 | + result = df.median() |
| 1363 | + expected = Series([2.0], index=["B"]) |
| 1364 | + tm.assert_series_equal(result, expected) |
| 1365 | + |
| 1366 | + # TODO: np.median(df, axis=0) gives np.array([2.0, 2.0]) instead |
| 1367 | + # of expected.values |
| 1368 | + |
| 1369 | + @pytest.mark.parametrize("method", ["min", "max"]) |
| 1370 | + def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method): |
| 1371 | + # GH#28949 DataFrame.min should behave like Series.min |
| 1372 | + cat = Categorical(["a", "b", "c", "b"], ordered=False) |
| 1373 | + ser = Series(cat) |
| 1374 | + df = ser.to_frame("A") |
| 1375 | + |
| 1376 | + # Double-check the Series behavior |
| 1377 | + with pytest.raises(TypeError, match="is not ordered for operation"): |
| 1378 | + getattr(ser, method)() |
| 1379 | + |
| 1380 | + with pytest.raises(TypeError, match="is not ordered for operation"): |
| 1381 | + getattr(np, method)(ser) |
| 1382 | + |
| 1383 | + with pytest.raises(TypeError, match="is not ordered for operation"): |
| 1384 | + getattr(df, method)(numeric_only=False) |
| 1385 | + |
| 1386 | + result = getattr(df, method)() |
| 1387 | + expected = Series([], index=Index([]), dtype=np.float64) |
| 1388 | + tm.assert_series_equal(result, expected) |
| 1389 | + |
| 1390 | + result = getattr(np, method)(df) |
| 1391 | + tm.assert_series_equal(result, expected) |
| 1392 | + |
| 1393 | + # same thing, but with an additional non-categorical column |
| 1394 | + df["B"] = df["A"].astype(object) |
| 1395 | + result = getattr(df, method)() |
| 1396 | + if method == "min": |
| 1397 | + expected = Series(["a"], index=["B"]) |
| 1398 | + else: |
| 1399 | + expected = Series(["c"], index=["B"]) |
| 1400 | + tm.assert_series_equal(result, expected) |
| 1401 | + |
| 1402 | + result = getattr(np, method)(df) |
| 1403 | + tm.assert_series_equal(result, expected) |
| 1404 | + |
| 1405 | + def test_reduction_object_block_splits_nuisance_columns(self): |
| 1406 | + # GH#37827 |
| 1407 | + df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", "c"]}, dtype=object) |
| 1408 | + |
| 1409 | + # We should only exclude "B", not "A" |
| 1410 | + result = df.mean() |
| 1411 | + expected = Series([1.0], index=["A"]) |
| 1412 | + tm.assert_series_equal(result, expected) |
| 1413 | + |
| 1414 | + # Same behavior but heterogeneous dtype |
| 1415 | + df["C"] = df["A"].astype(int) + 4 |
| 1416 | + |
| 1417 | + result = df.mean() |
| 1418 | + expected = Series([1.0, 5.0], index=["A", "C"]) |
| 1419 | + tm.assert_series_equal(result, expected) |
| 1420 | + |
| 1421 | + |
1311 | 1422 | def test_sum_timedelta64_skipna_false():
|
1312 | 1423 | # GH#17235
|
1313 | 1424 | arr = np.arange(8).astype(np.int64).view("m8[s]").reshape(4, 2)
|
@@ -1352,6 +1463,6 @@ def test_minmax_extensionarray(method, numeric_only):
|
1352 | 1463 | df = DataFrame({"Int64": ser})
|
1353 | 1464 | result = getattr(df, method)(numeric_only=numeric_only)
|
1354 | 1465 | expected = Series(
|
1355 |
| - [getattr(int64_info, method)], index=pd.Index(["Int64"], dtype="object") |
| 1466 | + [getattr(int64_info, method)], index=Index(["Int64"], dtype="object") |
1356 | 1467 | )
|
1357 | 1468 | tm.assert_series_equal(result, expected)
|
0 commit comments