diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index dfa87e3cd4574..f6f6aa51be91a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -44,6 +44,7 @@ Other enhancements - Implemented a complex-dtype :class:`Index`, passing a complex-dtype array-like to ``pd.Index`` will now retain complex dtype instead of casting to ``object`` (:issue:`45845`) - Improved error message in :class:`~pandas.core.window.Rolling` when ``window`` is a frequency and ``NaT`` is in the rolling axis (:issue:`46087`) - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) +- ``DataFrame.assign`` now supports multiple conditions assign statement (:issue:`46285`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9a554537896ab..8de61093da2de 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4468,13 +4468,25 @@ def assign(self, **kwargs) -> DataFrame: Parameters ---------- - **kwargs : dict of {str: callable or Series} + **kwargs : dict of {str: callable or Series or dict} The column names are keywords. If the values are callable, they are computed on the DataFrame and - assigned to the new columns. The callable must not - change input DataFrame (though pandas doesn't check it). - If the values are not callable, (e.g. a Series, scalar, or array), - they are simply assigned. + assigned to the new columns. + + The value can also be a multiple condition dict that + contains the desired values to be assigned as keys + and bool Series, array-like, or callable as values + (see examples). If the value to the multiple condition + dict is callable, it is computed on the DataFrame + and should return boolean Series or array. Cases not + covered will be assigned with `None`. This is based on + `np.select`. + + All callables must not change input DataFrame (though pandas + doesn't check it). + + If the values are not callable or not dict, (e.g. a Series, + scalar, or array), they are simply assigned. Returns ------- @@ -4520,11 +4532,36 @@ def assign(self, **kwargs) -> DataFrame: temp_c temp_f temp_k Portland 17.0 62.6 290.15 Berkeley 25.0 77.0 298.15 + + If you want to assign a column based on multiple conditions, you can + pass a multiple conditions dict with as follows: + + >>> df = pd.DataFrame({'a': [1, 2, 3], 'b': [5, 4, 6]}) + >>> df + a b + 0 1 5 + 1 2 4 + 2 3 6 + >>> df.assign( + ... new_column={ + ... "case 1": lambda x: (x.a < 2) & (x.b == 5), + ... "case 2": lambda x: (x.a == 2) & (x.b < 5), + ... "case 3": lambda x: (x.a > 2) & (x.b > 5), + ... } + ... ) + a b new_column + 0 1 5 case 1 + 1 2 4 case 2 + 2 3 6 case 3 """ data = self.copy() for k, v in kwargs.items(): - data[k] = com.apply_if_callable(v, data) + data[k] = ( + np.select([i(data) for i in v.values()], v.keys(), default=None) + if isinstance(v, dict) + else com.apply_if_callable(v, data) + ) return data def _sanitize_column(self, value) -> ArrayLike: diff --git a/pandas/tests/frame/methods/test_assign.py b/pandas/tests/frame/methods/test_assign.py index 0ae501d43e742..b613cdd08b8ae 100644 --- a/pandas/tests/frame/methods/test_assign.py +++ b/pandas/tests/frame/methods/test_assign.py @@ -82,3 +82,31 @@ def test_assign_dependent(self): result = df.assign(C=lambda df: df.A, D=lambda df: df["A"] + df["C"]) expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) tm.assert_frame_equal(result, expected) + + def test_assign_mutilple_conditions_lambda(self): + df = DataFrame({"A": [1, 2, 3]}) + + # conditions cover all cases + result = df.assign( + A_status={ + "less than 2": lambda x: x < 2, + "equals 2": lambda x: x == 2, + "bigger than 2": lambda x: x > 2, + } + ) + expected = DataFrame( + {"A": [1, 2, 3], "A_status": ["less than 2", "equals 2", "bigger than 2"]} + ) + tm.assert_frame_equal(result, expected) + + # conditions do not cover all cases + result = df.assign( + A_status={ + "less than 2": lambda x: x < 2, + "equals 2": lambda x: x == 2, + } + ) + expected = DataFrame( + {"A": [1, 2, 3], "A_status": ["less than 2", "equals 2", None]} + ) + tm.assert_frame_equal(result, expected)