|
10 | 10 |
|
11 | 11 | import pandas as pd
|
12 | 12 | from pandas import (
|
| 13 | + Categorical, |
13 | 14 | DataFrame,
|
14 | 15 | Grouper,
|
15 | 16 | Index,
|
|
18 | 19 | Timestamp,
|
19 | 20 | date_range,
|
20 | 21 | read_csv,
|
| 22 | + to_datetime, |
21 | 23 | )
|
22 | 24 | import pandas._testing as tm
|
23 | 25 | from pandas.core.base import SpecificationError
|
@@ -1716,15 +1718,48 @@ def test_pivot_table_values_key_error():
|
1716 | 1718 | )
|
1717 | 1719 |
|
1718 | 1720 |
|
1719 |
| -def test_empty_dataframe_groupby(): |
1720 |
| - # GH8093 |
1721 |
| - df = DataFrame(columns=["A", "B", "C"]) |
1722 |
| - |
1723 |
| - result = df.groupby("A").sum() |
1724 |
| - expected = DataFrame(columns=["B", "C"], dtype=np.float64) |
1725 |
| - expected.index.name = "A" |
1726 |
| - |
1727 |
| - tm.assert_frame_equal(result, expected) |
| 1721 | +@pytest.mark.parametrize("columns", ["C", ["C"]]) |
| 1722 | +@pytest.mark.parametrize("keys", [["A"], ["A", "B"]]) |
| 1723 | +@pytest.mark.parametrize( |
| 1724 | + "values", |
| 1725 | + [ |
| 1726 | + [True], |
| 1727 | + [0], |
| 1728 | + [0.0], |
| 1729 | + ["a"], |
| 1730 | + [Categorical([0])], |
| 1731 | + [to_datetime(0)], |
| 1732 | + [date_range(0, 1, 1, tz="US/Eastern")], |
| 1733 | + [pd.array([0], dtype="Int64")], |
| 1734 | + ], |
| 1735 | +) |
| 1736 | +@pytest.mark.parametrize("method", ["attr", "agg", "apply"]) |
| 1737 | +@pytest.mark.parametrize( |
| 1738 | + "op", ["idxmax", "idxmin", "mad", "min", "max", "sum", "prod", "skew"] |
| 1739 | +) |
| 1740 | +def test_empty_groupby(columns, keys, values, method, op): |
| 1741 | + # GH8093 & GH26411 |
| 1742 | + |
| 1743 | + override_dtype = None |
| 1744 | + if isinstance(values[0], bool) and op in ("prod", "sum") and method != "apply": |
| 1745 | + # sum/product of bools is an integer |
| 1746 | + override_dtype = "int64" |
| 1747 | + |
| 1748 | + df = DataFrame([3 * values], columns=list("ABC")) |
| 1749 | + df = df.iloc[:0] |
| 1750 | + |
| 1751 | + gb = df.groupby(keys)[columns] |
| 1752 | + if method == "attr": |
| 1753 | + result = getattr(gb, op)() |
| 1754 | + else: |
| 1755 | + result = getattr(gb, method)(op) |
| 1756 | + |
| 1757 | + expected = df.set_index(keys)[columns] |
| 1758 | + if override_dtype is not None: |
| 1759 | + expected = expected.astype(override_dtype) |
| 1760 | + if len(keys) == 1: |
| 1761 | + expected.index.name = keys[0] |
| 1762 | + tm.assert_equal(result, expected) |
1728 | 1763 |
|
1729 | 1764 |
|
1730 | 1765 | def test_tuple_as_grouping():
|
|
0 commit comments