Skip to content

Commit ba47001

Browse files
committed
Implementation
1 parent ccf4b84 commit ba47001

File tree

1 file changed

+127
-3
lines changed

1 file changed

+127
-3
lines changed

pandas/core/groupby/generic.py

+127-3
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
all_indexes_same,
7676
default_index,
7777
)
78+
from pandas.core.reshape.concat import concat
7879
from pandas.core.series import Series
7980
from pandas.core.sorting import get_group_index
8081
from pandas.core.util.numba_ import maybe_use_numba
@@ -1863,15 +1864,138 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs):
18631864
3 5 9
18641865
4 5 8
18651866
5 5 9
1867+
1868+
Using list-like arguments
1869+
1870+
>>> df = pd.DataFrame({"col": list("aab"), "val": range(3)})
1871+
>>> df.groupby("col").transform(["sum", "min"])
1872+
val
1873+
sum min
1874+
0 1 0
1875+
1 1 0
1876+
2 2 2
1877+
1878+
.. versionchanged:: 3.0.0
1879+
1880+
Named aggregation
1881+
1882+
>>> df = pd.DataFrame({"A": list("aaabbbccc"), "B": range(9), "D": range(9, 18)})
1883+
>>> df.groupby("A").transform(
1884+
... b_min=pd.NamedAgg(column="B", aggfunc="min"),
1885+
... c_sum=pd.NamedAgg(column="D", aggfunc="sum")
1886+
... )
1887+
b_min c_sum
1888+
0 0 30
1889+
1 0 30
1890+
2 0 30
1891+
3 3 39
1892+
4 3 39
1893+
5 3 39
1894+
6 6 48
1895+
7 6 48
1896+
8 6 48
1897+
1898+
.. versionchanged:: 3.0.0
18661899
"""
18671900
)
18681901

18691902
@Substitution(klass="DataFrame", example=__examples_dataframe_doc)
18701903
@Appender(_transform_template)
1871-
def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
1872-
return self._transform(
1873-
func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
1904+
def transform(
1905+
self,
1906+
func: None
1907+
| (Callable | str | list[Callable | str] | dict[str, NamedAgg]) = None,
1908+
*args,
1909+
engine: str | None = None,
1910+
engine_kwargs: dict | None = None,
1911+
**kwargs,
1912+
) -> DataFrame:
1913+
if func is None:
1914+
# Convert named aggregations to dictionary format
1915+
transformed_func = {
1916+
name: aggfunc
1917+
for name, aggfunc in kwargs.items()
1918+
if not isinstance(aggfunc[1], DataFrame)
1919+
}
1920+
kwargs = {}
1921+
return self._transform_multiple_funcs(
1922+
transformed_func,
1923+
*args,
1924+
engine=engine,
1925+
engine_kwargs=engine_kwargs,
1926+
**kwargs,
1927+
)
1928+
else:
1929+
if isinstance(func, list):
1930+
func = maybe_mangle_lambdas(func)
1931+
return self._transform_multiple_funcs(
1932+
func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
1933+
)
1934+
else:
1935+
return self._transform(
1936+
func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
1937+
)
1938+
1939+
def _transform_multiple_funcs(
1940+
self,
1941+
func: Any,
1942+
*args,
1943+
engine: str | None = None,
1944+
engine_kwargs: dict | None = None,
1945+
**kwargs,
1946+
) -> DataFrame:
1947+
results = []
1948+
if isinstance(func, dict):
1949+
for name, named_agg in func.items():
1950+
column_name = named_agg.column
1951+
agg_func = named_agg.aggfunc
1952+
result = self._transform_single_column(
1953+
column_name,
1954+
agg_func,
1955+
*args,
1956+
engine=engine,
1957+
engine_kwargs=engine_kwargs,
1958+
**kwargs,
1959+
)
1960+
result.name = name
1961+
results.append(result)
1962+
output = concat(results, axis=1)
1963+
elif isinstance(func, list):
1964+
col_names = []
1965+
columns = [com.get_callable_name(f) or f for f in func]
1966+
func_pairs = zip(columns, func)
1967+
for idx, (name, func_item) in enumerate(func_pairs):
1968+
result = self._transform(
1969+
func_item,
1970+
*args,
1971+
engine=engine,
1972+
engine_kwargs=engine_kwargs,
1973+
**kwargs,
1974+
)
1975+
results.append(result)
1976+
col_names.extend([(col, name) for col in result.columns])
1977+
output = concat(results, ignore_index=True, axis=1)
1978+
arrays = [list(x) for x in zip(*col_names)]
1979+
output.columns = MultiIndex.from_arrays(arrays)
1980+
output = output.sort_index(axis=1, level=[0], sort_remaining=False)
1981+
1982+
return output
1983+
1984+
def _transform_single_column(
1985+
self,
1986+
column_name: Hashable,
1987+
agg_func: Callable | str,
1988+
*args,
1989+
engine: str | None = None,
1990+
engine_kwargs: dict | None = None,
1991+
**kwargs,
1992+
) -> Series:
1993+
data = self._obj_with_exclusions[column_name]
1994+
groupings = self._grouper.groupings
1995+
result = data.groupby(groupings).transform(
1996+
agg_func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs
18741997
)
1998+
return result
18751999

18762000
def _define_paths(self, func, *args, **kwargs):
18772001
if isinstance(func, str):

0 commit comments

Comments
 (0)