diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index a269580bc4453..b77e301547731 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -420,6 +420,7 @@ Reshaping - Bug in :func:`union_indexes` where input index names are not preserved in some cases. Affects :func:`concat` and :class:`DataFrame` constructor (:issue:`13475`) - Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`) - Bug in :meth:`DataFrame.agg` with ``func={'name':}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`) +- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`) - Sparse @@ -442,7 +443,6 @@ Other - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`) - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`) - Fixed metadata propagation in the :class:`Series.dt` accessor (:issue:`28283`) -- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`) - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index f2eb282d1e498..ad69e9f31e065 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -17,9 +17,17 @@ Sequence, Tuple, Union, + cast, ) -from pandas._typing import AggFuncType, Axis, FrameOrSeries, Label +from pandas._typing import ( + AggFuncType, + AggFuncTypeBase, + Axis, + FrameOrSeries, + FrameOrSeriesUnion, + Label, +) from pandas.core.dtypes.common import is_dict_like, is_list_like from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries @@ -391,7 +399,7 @@ def validate_func_kwargs( def transform( obj: FrameOrSeries, func: AggFuncType, axis: Axis, *args, **kwargs -) -> FrameOrSeries: +) -> FrameOrSeriesUnion: """ Transform a DataFrame or Series @@ -424,16 +432,20 @@ def transform( assert not is_series return transform(obj.T, func, 0, *args, **kwargs).T - if isinstance(func, list): + if is_list_like(func) and not is_dict_like(func): + func = cast(List[AggFuncTypeBase], func) + # Convert func equivalent dict if is_series: func = {com.get_callable_name(v) or v: v for v in func} else: func = {col: func for col in obj} - if isinstance(func, dict): + if is_dict_like(func): + func = cast(Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]], func) return transform_dict_like(obj, func, *args, **kwargs) # func is either str or callable + func = cast(AggFuncTypeBase, func) try: result = transform_str_or_callable(obj, func, *args, **kwargs) except Exception: @@ -451,29 +463,42 @@ def transform( return result -def transform_dict_like(obj, func, *args, **kwargs): +def transform_dict_like( + obj: FrameOrSeries, + func: Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]], + *args, + **kwargs, +): """ Compute transform in the case of a dict-like func """ from pandas.core.reshape.concat import concat + if len(func) == 0: + raise ValueError("No transform functions were provided") + if obj.ndim != 1: + # Check for missing columns on a frame cols = sorted(set(func.keys()) - set(obj.columns)) if len(cols) > 0: raise SpecificationError(f"Column(s) {cols} do not exist") - if any(isinstance(v, dict) for v in func.values()): + # Can't use func.values(); wouldn't work for a Series + if any(is_dict_like(v) for _, v in func.items()): # GH 15931 - deprecation of renaming keys raise SpecificationError("nested renamer is not supported") - results = {} + results: Dict[Label, FrameOrSeriesUnion] = {} for name, how in func.items(): colg = obj._gotitem(name, ndim=1) try: results[name] = transform(colg, how, 0, *args, **kwargs) - except Exception as e: - if str(e) == "Function did not transform": - raise e + except Exception as err: + if ( + str(err) == "Function did not transform" + or str(err) == "No transform functions were provided" + ): + raise err # combine results if len(results) == 0: @@ -481,7 +506,9 @@ def transform_dict_like(obj, func, *args, **kwargs): return concat(results, axis=1) -def transform_str_or_callable(obj, func, *args, **kwargs): +def transform_str_or_callable( + obj: FrameOrSeries, func: AggFuncTypeBase, *args, **kwargs +) -> FrameOrSeriesUnion: """ Compute transform in the case of a string or callable func """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1f9987d9d3f5b..28a6f6b8c6621 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7270,7 +7270,7 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: def _gotitem( self, - key: Union[str, List[str]], + key: Union[Label, List[Label]], ndim: int, subset: Optional[FrameOrSeriesUnion] = None, ) -> FrameOrSeriesUnion: diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 14363dabfcdf3..d595b03a535ed 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -265,16 +265,17 @@ Parameters ---------- -func : function, str, list or dict +func : function, str, list-like or dict-like Function to use for transforming the data. If a function, must either - work when passed a {klass} or when passed to {klass}.apply. + work when passed a {klass} or when passed to {klass}.apply. If func + is both list-like and dict-like, dict-like behavior takes precedence. Accepted combinations are: - function - string function name - - list of functions and/or function names, e.g. ``[np.exp, 'sqrt']`` - - dict of axis labels -> functions, function names or list of such. + - list-like of functions and/or function names, e.g. ``[np.exp, 'sqrt']`` + - dict-like of axis labels -> functions, function names or list-like of such. {axis} *args Positional arguments to pass to `func`. diff --git a/pandas/tests/frame/apply/test_frame_transform.py b/pandas/tests/frame/apply/test_frame_transform.py index 346e60954fc13..01c6fd4ec08f0 100644 --- a/pandas/tests/frame/apply/test_frame_transform.py +++ b/pandas/tests/frame/apply/test_frame_transform.py @@ -4,7 +4,7 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex +from pandas import DataFrame, MultiIndex, Series import pandas._testing as tm from pandas.core.base import SpecificationError from pandas.core.groupby.base import transformation_kernels @@ -41,9 +41,15 @@ def test_transform_groupby_kernel(axis, float_frame, op): @pytest.mark.parametrize( - "ops, names", [([np.sqrt], ["sqrt"]), ([np.abs, np.sqrt], ["absolute", "sqrt"])] + "ops, names", + [ + ([np.sqrt], ["sqrt"]), + ([np.abs, np.sqrt], ["absolute", "sqrt"]), + (np.array([np.sqrt]), ["sqrt"]), + (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), + ], ) -def test_transform_list(axis, float_frame, ops, names): +def test_transform_listlike(axis, float_frame, ops, names): # GH 35964 other_axis = 1 if axis in {0, "index"} else 0 with np.errstate(all="ignore"): @@ -56,7 +62,14 @@ def test_transform_list(axis, float_frame, ops, names): tm.assert_frame_equal(result, expected) -def test_transform_dict(axis, float_frame): +@pytest.mark.parametrize("ops", [[], np.array([])]) +def test_transform_empty_listlike(float_frame, ops): + with pytest.raises(ValueError, match="No transform functions were provided"): + float_frame.transform(ops) + + +@pytest.mark.parametrize("box", [dict, Series]) +def test_transform_dictlike(axis, float_frame, box): # GH 35964 if axis == 0 or axis == "index": e = float_frame.columns[0] @@ -64,10 +77,26 @@ def test_transform_dict(axis, float_frame): else: e = float_frame.index[0] expected = float_frame.iloc[[0]].transform(np.abs) - result = float_frame.transform({e: np.abs}, axis=axis) + result = float_frame.transform(box({e: np.abs}), axis=axis) tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "ops", + [ + {}, + {"A": []}, + {"A": [], "B": "cumsum"}, + {"A": "cumsum", "B": []}, + {"A": [], "B": ["cumsum"]}, + {"A": ["cumsum"], "B": []}, + ], +) +def test_transform_empty_dictlike(float_frame, ops): + with pytest.raises(ValueError, match="No transform functions were provided"): + float_frame.transform(ops) + + @pytest.mark.parametrize("use_apply", [True, False]) def test_transform_udf(axis, float_frame, use_apply): # GH 35964 diff --git a/pandas/tests/series/apply/test_series_transform.py b/pandas/tests/series/apply/test_series_transform.py index 0e200709f60cf..67b271f757cfb 100644 --- a/pandas/tests/series/apply/test_series_transform.py +++ b/pandas/tests/series/apply/test_series_transform.py @@ -34,9 +34,15 @@ def test_transform_groupby_kernel(string_series, op): @pytest.mark.parametrize( - "ops, names", [([np.sqrt], ["sqrt"]), ([np.abs, np.sqrt], ["absolute", "sqrt"])] + "ops, names", + [ + ([np.sqrt], ["sqrt"]), + ([np.abs, np.sqrt], ["absolute", "sqrt"]), + (np.array([np.sqrt]), ["sqrt"]), + (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), + ], ) -def test_transform_list(string_series, ops, names): +def test_transform_listlike(string_series, ops, names): # GH 35964 with np.errstate(all="ignore"): expected = concat([op(string_series) for op in ops], axis=1) @@ -45,15 +51,38 @@ def test_transform_list(string_series, ops, names): tm.assert_frame_equal(result, expected) -def test_transform_dict(string_series): +@pytest.mark.parametrize("ops", [[], np.array([])]) +def test_transform_empty_listlike(string_series, ops): + with pytest.raises(ValueError, match="No transform functions were provided"): + string_series.transform(ops) + + +@pytest.mark.parametrize("box", [dict, Series]) +def test_transform_dictlike(string_series, box): # GH 35964 with np.errstate(all="ignore"): expected = concat([np.sqrt(string_series), np.abs(string_series)], axis=1) expected.columns = ["foo", "bar"] - result = string_series.transform({"foo": np.sqrt, "bar": np.abs}) + result = string_series.transform(box({"foo": np.sqrt, "bar": np.abs})) tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "ops", + [ + {}, + {"A": []}, + {"A": [], "B": ["cumsum"]}, + {"A": ["cumsum"], "B": []}, + {"A": [], "B": "cumsum"}, + {"A": "cumsum", "B": []}, + ], +) +def test_transform_empty_dictlike(string_series, ops): + with pytest.raises(ValueError, match="No transform functions were provided"): + string_series.transform(ops) + + def test_transform_udf(axis, string_series): # GH 35964 # via apply