Skip to content

Commit d8d0218

Browse files
committed
Merge remote-tracking branch 'upstream/main' into perf/ri/reindex_return
2 parents 7d6fc28 + 47cd690 commit d8d0218

29 files changed

+197
-415
lines changed

ci/code_checks.sh

-2
Original file line numberDiff line numberDiff line change
@@ -145,15 +145,13 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
145145
MSG='Partially validate docstrings (GL08)' ; echo $MSG
146146
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL08 --ignore_functions \
147147
pandas.ExcelFile.book\
148-
pandas.ExcelFile.sheet_names\
149148
pandas.Index.empty\
150149
pandas.Index.names\
151150
pandas.Index.view\
152151
pandas.IntervalIndex.left\
153152
pandas.IntervalIndex.length\
154153
pandas.IntervalIndex.mid\
155154
pandas.IntervalIndex.right\
156-
pandas.MultiIndex.codes\
157155
pandas.Period.freq\
158156
pandas.Period.ordinal\
159157
pandas.PeriodIndex.freq\

doc/source/whatsnew/v0.15.1.rst

+2-3
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ API changes
9292

9393
.. code-block:: ipython
9494
95-
In [4]: gr.apply(sum)
95+
In [4]: gr.apply("sum")
9696
Out[4]:
9797
joe
9898
jim
@@ -102,9 +102,8 @@ API changes
102102
current behavior:
103103

104104
.. ipython:: python
105-
:okwarning:
106105
107-
gr.apply(sum)
106+
gr.apply("sum")
108107
109108
- Support for slicing with monotonic decreasing indexes, even if ``start`` or ``stop`` is
110109
not found in the index (:issue:`7860`):

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ Removal of prior version deprecations/changes
199199
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
200200
- Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
201201
- In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`)
202+
- Methods ``apply``, ``agg``, and ``transform`` will no longer replace NumPy functions (e.g. ``np.sum``) and built-in functions (e.g. ``min``) with the equivalent pandas implementation; use string aliases (e.g. ``"sum"`` and ``"min"``) if you desire to use the pandas implementation (:issue:`53974`)
202203
- Passing both ``freq`` and ``fill_value`` in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`.DataFrameGroupBy.shift` now raises a ``ValueError`` (:issue:`54818`)
203204
- Removed :meth:`DateOffset.is_anchored` and :meth:`offsets.Tick.is_anchored` (:issue:`56594`)
204205
- Removed ``DataFrame.applymap``, ``Styler.applymap`` and ``Styler.applymap_index`` (:issue:`52364`)

pandas/_testing/__init__.py

-8
Original file line numberDiff line numberDiff line change
@@ -398,9 +398,6 @@ def external_error_raised(expected_exception: type[Exception]) -> ContextManager
398398
return pytest.raises(expected_exception, match=None)
399399

400400

401-
cython_table = pd.core.common._cython_table.items()
402-
403-
404401
def get_cython_table_params(ndframe, func_names_and_expected):
405402
"""
406403
Combine frame, functions from com._cython_table
@@ -421,11 +418,6 @@ def get_cython_table_params(ndframe, func_names_and_expected):
421418
results = []
422419
for func_name, expected in func_names_and_expected:
423420
results.append((ndframe, func_name, expected))
424-
results += [
425-
(ndframe, func, expected)
426-
for func, name in cython_table
427-
if name == func_name
428-
]
429421
return results
430422

431423

pandas/core/apply.py

-15
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,7 @@ def agg(self) -> DataFrame | Series | None:
175175
Result of aggregation, or None if agg cannot be performed by
176176
this method.
177177
"""
178-
obj = self.obj
179178
func = self.func
180-
args = self.args
181-
kwargs = self.kwargs
182179

183180
if isinstance(func, str):
184181
return self.apply_str()
@@ -189,12 +186,6 @@ def agg(self) -> DataFrame | Series | None:
189186
# we require a list, but not a 'str'
190187
return self.agg_list_like()
191188

192-
if callable(func):
193-
f = com.get_cython_func(func)
194-
if f and not args and not kwargs:
195-
warn_alias_replacement(obj, func, f)
196-
return getattr(obj, f)()
197-
198189
# caller can react
199190
return None
200191

@@ -300,12 +291,6 @@ def transform_str_or_callable(self, func) -> DataFrame | Series:
300291
if isinstance(func, str):
301292
return self._apply_str(obj, func, *args, **kwargs)
302293

303-
if not args and not kwargs:
304-
f = com.get_cython_func(func)
305-
if f:
306-
warn_alias_replacement(obj, func, f)
307-
return getattr(obj, f)()
308-
309294
# Two possible ways to use a UDF - apply or call directly
310295
try:
311296
return obj.apply(func, args=args, **kwargs)

pandas/core/common.py

-24
Original file line numberDiff line numberDiff line change
@@ -608,22 +608,6 @@ def require_length_match(data, index: Index) -> None:
608608
)
609609

610610

611-
# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0,
612-
# whereas np.min and np.max (which directly call obj.min and obj.max)
613-
# default to axis=None.
614-
_builtin_table = {
615-
builtins.sum: np.sum,
616-
builtins.max: np.maximum.reduce,
617-
builtins.min: np.minimum.reduce,
618-
}
619-
620-
# GH#53425: Only for deprecation
621-
_builtin_table_alias = {
622-
builtins.sum: "np.sum",
623-
builtins.max: "np.maximum.reduce",
624-
builtins.min: "np.minimum.reduce",
625-
}
626-
627611
_cython_table = {
628612
builtins.sum: "sum",
629613
builtins.max: "max",
@@ -660,14 +644,6 @@ def get_cython_func(arg: Callable) -> str | None:
660644
return _cython_table.get(arg)
661645

662646

663-
def is_builtin_func(arg):
664-
"""
665-
if we define a builtin function for this argument, return it,
666-
otherwise return the arg
667-
"""
668-
return _builtin_table.get(arg, arg)
669-
670-
671647
def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]:
672648
"""
673649
If a name is missing then replace it by level_n, where n is the count

pandas/core/groupby/generic.py

-16
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@
5959
maybe_mangle_lambdas,
6060
reconstruct_func,
6161
validate_func_kwargs,
62-
warn_alias_replacement,
6362
)
6463
import pandas.core.common as com
6564
from pandas.core.frame import DataFrame
@@ -357,11 +356,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
357356
return ret
358357

359358
else:
360-
cyfunc = com.get_cython_func(func)
361-
if cyfunc and not args and not kwargs:
362-
warn_alias_replacement(self, func, cyfunc)
363-
return getattr(self, cyfunc)()
364-
365359
if maybe_use_numba(engine):
366360
return self._aggregate_with_numba(
367361
func, *args, engine_kwargs=engine_kwargs, **kwargs
@@ -409,11 +403,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
409403
agg = aggregate
410404

411405
def _python_agg_general(self, func, *args, **kwargs):
412-
orig_func = func
413-
func = com.is_builtin_func(func)
414-
if orig_func != func:
415-
alias = com._builtin_table_alias[func]
416-
warn_alias_replacement(self, orig_func, alias)
417406
f = lambda x: func(x, *args, **kwargs)
418407

419408
obj = self._obj_with_exclusions
@@ -1656,11 +1645,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
16561645
agg = aggregate
16571646

16581647
def _python_agg_general(self, func, *args, **kwargs):
1659-
orig_func = func
1660-
func = com.is_builtin_func(func)
1661-
if orig_func != func:
1662-
alias = com._builtin_table_alias[func]
1663-
warn_alias_replacement(self, orig_func, alias)
16641648
f = lambda x: func(x, *args, **kwargs)
16651649

16661650
if self.ngroups == 0:

pandas/core/groupby/groupby.py

-12
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ class providing the base-class of operations.
9494
sample,
9595
)
9696
from pandas.core._numba import executor
97-
from pandas.core.apply import warn_alias_replacement
9897
from pandas.core.arrays import (
9998
ArrowExtensionArray,
10099
BaseMaskedArray,
@@ -1647,12 +1646,6 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
16471646
b 2
16481647
dtype: int64
16491648
"""
1650-
orig_func = func
1651-
func = com.is_builtin_func(func)
1652-
if orig_func != func:
1653-
alias = com._builtin_table_alias[orig_func]
1654-
warn_alias_replacement(self, orig_func, alias)
1655-
16561649
if isinstance(func, str):
16571650
if hasattr(self, func):
16581651
res = getattr(self, func)
@@ -1868,11 +1861,6 @@ def _cython_transform(self, how: str, numeric_only: bool = False, **kwargs):
18681861
@final
18691862
def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
18701863
# optimized transforms
1871-
orig_func = func
1872-
func = com.get_cython_func(func) or func
1873-
if orig_func != func:
1874-
warn_alias_replacement(self, orig_func, func)
1875-
18761864
if not isinstance(func, str):
18771865
return self._transform_general(func, engine, engine_kwargs, *args, **kwargs)
18781866

pandas/core/indexes/multi.py

+23
Original file line numberDiff line numberDiff line change
@@ -1078,6 +1078,29 @@ def levshape(self) -> Shape:
10781078

10791079
@property
10801080
def codes(self) -> tuple:
1081+
"""
1082+
Codes of the MultiIndex.
1083+
1084+
Codes are the position of the index value in the list of level values
1085+
for each level.
1086+
1087+
Returns
1088+
-------
1089+
tuple of numpy.ndarray
1090+
The codes of the MultiIndex. Each array in the tuple corresponds
1091+
to a level in the MultiIndex.
1092+
1093+
See Also
1094+
--------
1095+
MultiIndex.set_codes : Set new codes on MultiIndex.
1096+
1097+
Examples
1098+
--------
1099+
>>> arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
1100+
>>> mi = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
1101+
>>> mi.codes
1102+
(array([0, 0, 1, 1], dtype=int8), array([1, 0, 1, 0], dtype=int8))
1103+
"""
10811104
return self._codes
10821105

10831106
def _set_codes(

pandas/core/resample.py

+1-13
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,12 @@
4545
)
4646

4747
import pandas.core.algorithms as algos
48-
from pandas.core.apply import (
49-
ResamplerWindowApply,
50-
warn_alias_replacement,
51-
)
48+
from pandas.core.apply import ResamplerWindowApply
5249
from pandas.core.arrays import ArrowExtensionArray
5350
from pandas.core.base import (
5451
PandasObject,
5552
SelectionMixin,
5653
)
57-
import pandas.core.common as com
5854
from pandas.core.generic import (
5955
NDFrame,
6056
_shared_docs,
@@ -1609,10 +1605,6 @@ def _downsample(self, how, **kwargs):
16091605
how : string / cython mapped function
16101606
**kwargs : kw args passed to how function
16111607
"""
1612-
orig_how = how
1613-
how = com.get_cython_func(how) or how
1614-
if orig_how != how:
1615-
warn_alias_replacement(self, orig_how, how)
16161608
ax = self.ax
16171609

16181610
# Excludes `on` column when provided
@@ -1775,10 +1767,6 @@ def _downsample(self, how, **kwargs):
17751767
if self.kind == "timestamp":
17761768
return super()._downsample(how, **kwargs)
17771769

1778-
orig_how = how
1779-
how = com.get_cython_func(how) or how
1780-
if orig_how != how:
1781-
warn_alias_replacement(self, orig_how, how)
17821770
ax = self.ax
17831771

17841772
if is_subperiod(ax.freq, self.freq):

pandas/io/excel/_base.py

+23
Original file line numberDiff line numberDiff line change
@@ -1630,6 +1630,29 @@ def book(self):
16301630

16311631
@property
16321632
def sheet_names(self):
1633+
"""
1634+
Names of the sheets in the document.
1635+
1636+
This is particularly useful for loading a specific sheet into a DataFrame when
1637+
you do not know the sheet names beforehand.
1638+
1639+
Returns
1640+
-------
1641+
list of str
1642+
List of sheet names in the document.
1643+
1644+
See Also
1645+
--------
1646+
ExcelFile.parse : Parse a sheet into a DataFrame.
1647+
read_excel : Read an Excel file into a pandas DataFrame. If you know the sheet
1648+
names, it may be easier to specify them directly to read_excel.
1649+
1650+
Examples
1651+
--------
1652+
>>> file = pd.ExcelFile("myfile.xlsx") # doctest: +SKIP
1653+
>>> file.sheet_names # doctest: +SKIP
1654+
["Sheet1", "Sheet2"]
1655+
"""
16331656
return self._reader.sheet_names
16341657

16351658
def close(self) -> None:

pandas/tests/apply/test_frame_apply.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1699,13 +1699,11 @@ def foo2(x, b=2, c=0):
16991699
def test_agg_std():
17001700
df = DataFrame(np.arange(6).reshape(3, 2), columns=["A", "B"])
17011701

1702-
with tm.assert_produces_warning(FutureWarning, match="using DataFrame.std"):
1703-
result = df.agg(np.std)
1702+
result = df.agg(np.std, ddof=1)
17041703
expected = Series({"A": 2.0, "B": 2.0}, dtype=float)
17051704
tm.assert_series_equal(result, expected)
17061705

1707-
with tm.assert_produces_warning(FutureWarning, match="using Series.std"):
1708-
result = df.agg([np.std])
1706+
result = df.agg([np.std], ddof=1)
17091707
expected = DataFrame({"A": 2.0, "B": 2.0}, index=["std"])
17101708
tm.assert_frame_equal(result, expected)
17111709

pandas/tests/apply/test_frame_apply_relabeling.py

+9-13
Original file line numberDiff line numberDiff line change
@@ -49,24 +49,20 @@ def test_agg_relabel_multi_columns_multi_methods():
4949
def test_agg_relabel_partial_functions():
5050
# GH 26513, test on partial, functools or more complex cases
5151
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
52-
msg = "using Series.[mean|min]"
53-
with tm.assert_produces_warning(FutureWarning, match=msg):
54-
result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
52+
result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
5553
expected = pd.DataFrame(
5654
{"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
5755
)
5856
tm.assert_frame_equal(result, expected)
5957

60-
msg = "using Series.[mean|min|max|sum]"
61-
with tm.assert_produces_warning(FutureWarning, match=msg):
62-
result = df.agg(
63-
foo=("A", min),
64-
bar=("A", np.min),
65-
cat=("B", max),
66-
dat=("C", "min"),
67-
f=("B", np.sum),
68-
kk=("B", lambda x: min(x)),
69-
)
58+
result = df.agg(
59+
foo=("A", min),
60+
bar=("A", np.min),
61+
cat=("B", max),
62+
dat=("C", "min"),
63+
f=("B", np.sum),
64+
kk=("B", lambda x: min(x)),
65+
)
7066
expected = pd.DataFrame(
7167
{
7268
"A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],

pandas/tests/apply/test_series_apply.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -547,10 +547,7 @@ def test_apply_listlike_reducer(string_series, ops, names, how, kwargs):
547547
# GH 39140
548548
expected = Series({name: op(string_series) for name, op in zip(names, ops)})
549549
expected.name = "series"
550-
warn = FutureWarning if how == "agg" else None
551-
msg = f"using Series.[{'|'.join(names)}]"
552-
with tm.assert_produces_warning(warn, match=msg):
553-
result = getattr(string_series, how)(ops, **kwargs)
550+
result = getattr(string_series, how)(ops, **kwargs)
554551
tm.assert_series_equal(result, expected)
555552

556553

@@ -571,10 +568,7 @@ def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row):
571568
# GH 39140
572569
expected = Series({name: op(string_series) for name, op in ops.items()})
573570
expected.name = string_series.name
574-
warn = FutureWarning if how == "agg" else None
575-
msg = "using Series.[sum|mean]"
576-
with tm.assert_produces_warning(warn, match=msg):
577-
result = getattr(string_series, how)(ops, **kwargs)
571+
result = getattr(string_series, how)(ops, **kwargs)
578572
tm.assert_series_equal(result, expected)
579573

580574

0 commit comments

Comments
 (0)