Skip to content

Commit 148011b

Browse files
rhshadrachpmhatre1
authored andcommitted
CLN: Enforce deprecation of using alias for builtin/NumPy funcs (pandas-dev#57444)
* CLN: Enforce deprecation of using alias for builtin/NumPy funcs * GH# and whatsnew * Fixup docs * More tests * Restore docstring * Test fixes * Test fixups * Test fixes * Test fixup * Test fixes
1 parent f40172e commit 148011b

26 files changed

+151
-413
lines changed

doc/source/whatsnew/v0.15.1.rst

+2-3
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ API changes
9292

9393
.. code-block:: ipython
9494
95-
In [4]: gr.apply(sum)
95+
In [4]: gr.apply("sum")
9696
Out[4]:
9797
joe
9898
jim
@@ -102,9 +102,8 @@ API changes
102102
current behavior:
103103

104104
.. ipython:: python
105-
:okwarning:
106105
107-
gr.apply(sum)
106+
gr.apply("sum")
108107
109108
- Support for slicing with monotonic decreasing indexes, even if ``start`` or ``stop`` is
110109
not found in the index (:issue:`7860`):

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ Removal of prior version deprecations/changes
199199
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
200200
- Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
201201
- In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`)
202+
- Methods ``apply``, ``agg``, and ``transform`` will no longer replace NumPy functions (e.g. ``np.sum``) and built-in functions (e.g. ``min``) with the equivalent pandas implementation; use string aliases (e.g. ``"sum"`` and ``"min"``) if you desire to use the pandas implementation (:issue:`53974`)
202203
- Passing both ``freq`` and ``fill_value`` in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`.DataFrameGroupBy.shift` now raises a ``ValueError`` (:issue:`54818`)
203204
- Removed :meth:`DateOffset.is_anchored` and :meth:`offsets.Tick.is_anchored` (:issue:`56594`)
204205
- Removed ``DataFrame.applymap``, ``Styler.applymap`` and ``Styler.applymap_index`` (:issue:`52364`)

pandas/_testing/__init__.py

-8
Original file line numberDiff line numberDiff line change
@@ -398,9 +398,6 @@ def external_error_raised(expected_exception: type[Exception]) -> ContextManager
398398
return pytest.raises(expected_exception, match=None)
399399

400400

401-
cython_table = pd.core.common._cython_table.items()
402-
403-
404401
def get_cython_table_params(ndframe, func_names_and_expected):
405402
"""
406403
Combine frame, functions from com._cython_table
@@ -421,11 +418,6 @@ def get_cython_table_params(ndframe, func_names_and_expected):
421418
results = []
422419
for func_name, expected in func_names_and_expected:
423420
results.append((ndframe, func_name, expected))
424-
results += [
425-
(ndframe, func, expected)
426-
for func, name in cython_table
427-
if name == func_name
428-
]
429421
return results
430422

431423

pandas/core/apply.py

-15
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,7 @@ def agg(self) -> DataFrame | Series | None:
175175
Result of aggregation, or None if agg cannot be performed by
176176
this method.
177177
"""
178-
obj = self.obj
179178
func = self.func
180-
args = self.args
181-
kwargs = self.kwargs
182179

183180
if isinstance(func, str):
184181
return self.apply_str()
@@ -189,12 +186,6 @@ def agg(self) -> DataFrame | Series | None:
189186
# we require a list, but not a 'str'
190187
return self.agg_list_like()
191188

192-
if callable(func):
193-
f = com.get_cython_func(func)
194-
if f and not args and not kwargs:
195-
warn_alias_replacement(obj, func, f)
196-
return getattr(obj, f)()
197-
198189
# caller can react
199190
return None
200191

@@ -300,12 +291,6 @@ def transform_str_or_callable(self, func) -> DataFrame | Series:
300291
if isinstance(func, str):
301292
return self._apply_str(obj, func, *args, **kwargs)
302293

303-
if not args and not kwargs:
304-
f = com.get_cython_func(func)
305-
if f:
306-
warn_alias_replacement(obj, func, f)
307-
return getattr(obj, f)()
308-
309294
# Two possible ways to use a UDF - apply or call directly
310295
try:
311296
return obj.apply(func, args=args, **kwargs)

pandas/core/common.py

-24
Original file line numberDiff line numberDiff line change
@@ -608,22 +608,6 @@ def require_length_match(data, index: Index) -> None:
608608
)
609609

610610

611-
# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0,
612-
# whereas np.min and np.max (which directly call obj.min and obj.max)
613-
# default to axis=None.
614-
_builtin_table = {
615-
builtins.sum: np.sum,
616-
builtins.max: np.maximum.reduce,
617-
builtins.min: np.minimum.reduce,
618-
}
619-
620-
# GH#53425: Only for deprecation
621-
_builtin_table_alias = {
622-
builtins.sum: "np.sum",
623-
builtins.max: "np.maximum.reduce",
624-
builtins.min: "np.minimum.reduce",
625-
}
626-
627611
_cython_table = {
628612
builtins.sum: "sum",
629613
builtins.max: "max",
@@ -660,14 +644,6 @@ def get_cython_func(arg: Callable) -> str | None:
660644
return _cython_table.get(arg)
661645

662646

663-
def is_builtin_func(arg):
664-
"""
665-
if we define a builtin function for this argument, return it,
666-
otherwise return the arg
667-
"""
668-
return _builtin_table.get(arg, arg)
669-
670-
671647
def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]:
672648
"""
673649
If a name is missing then replace it by level_n, where n is the count

pandas/core/groupby/generic.py

-16
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@
5959
maybe_mangle_lambdas,
6060
reconstruct_func,
6161
validate_func_kwargs,
62-
warn_alias_replacement,
6362
)
6463
import pandas.core.common as com
6564
from pandas.core.frame import DataFrame
@@ -357,11 +356,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
357356
return ret
358357

359358
else:
360-
cyfunc = com.get_cython_func(func)
361-
if cyfunc and not args and not kwargs:
362-
warn_alias_replacement(self, func, cyfunc)
363-
return getattr(self, cyfunc)()
364-
365359
if maybe_use_numba(engine):
366360
return self._aggregate_with_numba(
367361
func, *args, engine_kwargs=engine_kwargs, **kwargs
@@ -409,11 +403,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
409403
agg = aggregate
410404

411405
def _python_agg_general(self, func, *args, **kwargs):
412-
orig_func = func
413-
func = com.is_builtin_func(func)
414-
if orig_func != func:
415-
alias = com._builtin_table_alias[func]
416-
warn_alias_replacement(self, orig_func, alias)
417406
f = lambda x: func(x, *args, **kwargs)
418407

419408
obj = self._obj_with_exclusions
@@ -1656,11 +1645,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
16561645
agg = aggregate
16571646

16581647
def _python_agg_general(self, func, *args, **kwargs):
1659-
orig_func = func
1660-
func = com.is_builtin_func(func)
1661-
if orig_func != func:
1662-
alias = com._builtin_table_alias[func]
1663-
warn_alias_replacement(self, orig_func, alias)
16641648
f = lambda x: func(x, *args, **kwargs)
16651649

16661650
if self.ngroups == 0:

pandas/core/groupby/groupby.py

-12
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ class providing the base-class of operations.
9494
sample,
9595
)
9696
from pandas.core._numba import executor
97-
from pandas.core.apply import warn_alias_replacement
9897
from pandas.core.arrays import (
9998
ArrowExtensionArray,
10099
BaseMaskedArray,
@@ -1647,12 +1646,6 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
16471646
b 2
16481647
dtype: int64
16491648
"""
1650-
orig_func = func
1651-
func = com.is_builtin_func(func)
1652-
if orig_func != func:
1653-
alias = com._builtin_table_alias[orig_func]
1654-
warn_alias_replacement(self, orig_func, alias)
1655-
16561649
if isinstance(func, str):
16571650
if hasattr(self, func):
16581651
res = getattr(self, func)
@@ -1868,11 +1861,6 @@ def _cython_transform(self, how: str, numeric_only: bool = False, **kwargs):
18681861
@final
18691862
def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
18701863
# optimized transforms
1871-
orig_func = func
1872-
func = com.get_cython_func(func) or func
1873-
if orig_func != func:
1874-
warn_alias_replacement(self, orig_func, func)
1875-
18761864
if not isinstance(func, str):
18771865
return self._transform_general(func, engine, engine_kwargs, *args, **kwargs)
18781866

pandas/core/resample.py

+1-13
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,12 @@
4545
)
4646

4747
import pandas.core.algorithms as algos
48-
from pandas.core.apply import (
49-
ResamplerWindowApply,
50-
warn_alias_replacement,
51-
)
48+
from pandas.core.apply import ResamplerWindowApply
5249
from pandas.core.arrays import ArrowExtensionArray
5350
from pandas.core.base import (
5451
PandasObject,
5552
SelectionMixin,
5653
)
57-
import pandas.core.common as com
5854
from pandas.core.generic import (
5955
NDFrame,
6056
_shared_docs,
@@ -1609,10 +1605,6 @@ def _downsample(self, how, **kwargs):
16091605
how : string / cython mapped function
16101606
**kwargs : kw args passed to how function
16111607
"""
1612-
orig_how = how
1613-
how = com.get_cython_func(how) or how
1614-
if orig_how != how:
1615-
warn_alias_replacement(self, orig_how, how)
16161608
ax = self.ax
16171609

16181610
# Excludes `on` column when provided
@@ -1775,10 +1767,6 @@ def _downsample(self, how, **kwargs):
17751767
if self.kind == "timestamp":
17761768
return super()._downsample(how, **kwargs)
17771769

1778-
orig_how = how
1779-
how = com.get_cython_func(how) or how
1780-
if orig_how != how:
1781-
warn_alias_replacement(self, orig_how, how)
17821770
ax = self.ax
17831771

17841772
if is_subperiod(ax.freq, self.freq):

pandas/tests/apply/test_frame_apply.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1699,13 +1699,11 @@ def foo2(x, b=2, c=0):
16991699
def test_agg_std():
17001700
df = DataFrame(np.arange(6).reshape(3, 2), columns=["A", "B"])
17011701

1702-
with tm.assert_produces_warning(FutureWarning, match="using DataFrame.std"):
1703-
result = df.agg(np.std)
1702+
result = df.agg(np.std, ddof=1)
17041703
expected = Series({"A": 2.0, "B": 2.0}, dtype=float)
17051704
tm.assert_series_equal(result, expected)
17061705

1707-
with tm.assert_produces_warning(FutureWarning, match="using Series.std"):
1708-
result = df.agg([np.std])
1706+
result = df.agg([np.std], ddof=1)
17091707
expected = DataFrame({"A": 2.0, "B": 2.0}, index=["std"])
17101708
tm.assert_frame_equal(result, expected)
17111709

pandas/tests/apply/test_frame_apply_relabeling.py

+9-13
Original file line numberDiff line numberDiff line change
@@ -49,24 +49,20 @@ def test_agg_relabel_multi_columns_multi_methods():
4949
def test_agg_relabel_partial_functions():
5050
# GH 26513, test on partial, functools or more complex cases
5151
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
52-
msg = "using Series.[mean|min]"
53-
with tm.assert_produces_warning(FutureWarning, match=msg):
54-
result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
52+
result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
5553
expected = pd.DataFrame(
5654
{"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
5755
)
5856
tm.assert_frame_equal(result, expected)
5957

60-
msg = "using Series.[mean|min|max|sum]"
61-
with tm.assert_produces_warning(FutureWarning, match=msg):
62-
result = df.agg(
63-
foo=("A", min),
64-
bar=("A", np.min),
65-
cat=("B", max),
66-
dat=("C", "min"),
67-
f=("B", np.sum),
68-
kk=("B", lambda x: min(x)),
69-
)
58+
result = df.agg(
59+
foo=("A", min),
60+
bar=("A", np.min),
61+
cat=("B", max),
62+
dat=("C", "min"),
63+
f=("B", np.sum),
64+
kk=("B", lambda x: min(x)),
65+
)
7066
expected = pd.DataFrame(
7167
{
7268
"A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],

pandas/tests/apply/test_series_apply.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -547,10 +547,7 @@ def test_apply_listlike_reducer(string_series, ops, names, how, kwargs):
547547
# GH 39140
548548
expected = Series({name: op(string_series) for name, op in zip(names, ops)})
549549
expected.name = "series"
550-
warn = FutureWarning if how == "agg" else None
551-
msg = f"using Series.[{'|'.join(names)}]"
552-
with tm.assert_produces_warning(warn, match=msg):
553-
result = getattr(string_series, how)(ops, **kwargs)
550+
result = getattr(string_series, how)(ops, **kwargs)
554551
tm.assert_series_equal(result, expected)
555552

556553

@@ -571,10 +568,7 @@ def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row):
571568
# GH 39140
572569
expected = Series({name: op(string_series) for name, op in ops.items()})
573570
expected.name = string_series.name
574-
warn = FutureWarning if how == "agg" else None
575-
msg = "using Series.[sum|mean]"
576-
with tm.assert_produces_warning(warn, match=msg):
577-
result = getattr(string_series, how)(ops, **kwargs)
571+
result = getattr(string_series, how)(ops, **kwargs)
578572
tm.assert_series_equal(result, expected)
579573

580574

pandas/tests/apply/test_series_apply_relabeling.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,8 @@ def test_relabel_no_duplicated_method():
1414
expected = df["B"].agg({"foo": "min", "bar": "max"})
1515
tm.assert_series_equal(result, expected)
1616

17-
msg = "using Series.[sum|min|max]"
18-
with tm.assert_produces_warning(FutureWarning, match=msg):
19-
result = df["B"].agg(foo=sum, bar=min, cat="max")
20-
msg = "using Series.[sum|min|max]"
21-
with tm.assert_produces_warning(FutureWarning, match=msg):
22-
expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"})
17+
result = df["B"].agg(foo=sum, bar=min, cat="max")
18+
expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"})
2319
tm.assert_series_equal(result, expected)
2420

2521

@@ -32,8 +28,6 @@ def test_relabel_duplicated_method():
3228
expected = pd.Series([6, 6], index=["foo", "bar"], name="A")
3329
tm.assert_series_equal(result, expected)
3430

35-
msg = "using Series.min"
36-
with tm.assert_produces_warning(FutureWarning, match=msg):
37-
result = df["B"].agg(foo=min, bar="min")
31+
result = df["B"].agg(foo=min, bar="min")
3832
expected = pd.Series([1, 1], index=["foo", "bar"], name="B")
3933
tm.assert_series_equal(result, expected)

pandas/tests/groupby/aggregate/test_aggregate.py

+4-15
Original file line numberDiff line numberDiff line change
@@ -289,9 +289,7 @@ def func(ser):
289289
def test_agg_multiple_functions_maintain_order(df):
290290
# GH #610
291291
funcs = [("mean", np.mean), ("max", np.max), ("min", np.min)]
292-
msg = "is currently using SeriesGroupBy.mean"
293-
with tm.assert_produces_warning(FutureWarning, match=msg):
294-
result = df.groupby("A")["C"].agg(funcs)
292+
result = df.groupby("A")["C"].agg(funcs)
295293
exp_cols = Index(["mean", "max", "min"])
296294

297295
tm.assert_index_equal(result.columns, exp_cols)
@@ -881,11 +879,9 @@ def test_agg_relabel_multiindex_column(
881879
expected = DataFrame({"a_max": [1, 3]}, index=idx)
882880
tm.assert_frame_equal(result, expected)
883881

884-
msg = "is currently using SeriesGroupBy.mean"
885-
with tm.assert_produces_warning(FutureWarning, match=msg):
886-
result = df.groupby(("x", "group")).agg(
887-
col_1=agg_col1, col_2=agg_col2, col_3=agg_col3
888-
)
882+
result = df.groupby(("x", "group")).agg(
883+
col_1=agg_col1, col_2=agg_col2, col_3=agg_col3
884+
)
889885
expected = DataFrame(
890886
{"col_1": agg_result1, "col_2": agg_result2, "col_3": agg_result3}, index=idx
891887
)
@@ -1036,13 +1032,6 @@ def test_groupby_as_index_agg(df):
10361032
gr = df.groupby(ts)
10371033
gr.nth(0) # invokes set_selection_from_grouper internally
10381034

1039-
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
1040-
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
1041-
res = gr.apply(sum)
1042-
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
1043-
alt = df.groupby(ts).apply(sum)
1044-
tm.assert_frame_equal(res, alt)
1045-
10461035
for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]:
10471036
gr = df.groupby(ts, as_index=False)
10481037
left = getattr(gr, attr)()

0 commit comments

Comments
 (0)