pandas-dev
diff --git a/‎doc/source/reference/groupby.rst
Lines changed: 2 additions & 0 deletions b/‎doc/source/reference/groupby.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 4 additions & 0 deletions b/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 4 additions & 0 deletions
diff --git a/‎pandas/__init__.py
Lines changed: 5 additions & 7 deletions b/‎pandas/__init__.py
Lines changed: 5 additions & 7 deletions
diff --git a/‎pandas/core/arrays/categorical.py
Lines changed: 1 addition & 1 deletion b/‎pandas/core/arrays/categorical.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/groupby/groupby.py
Lines changed: 66 additions & 2 deletions b/‎pandas/core/groupby/groupby.py
Lines changed: 66 additions & 2 deletions
diff --git a/‎pandas/core/internals/blocks.py
Lines changed: 4 additions & 0 deletions b/‎pandas/core/internals/blocks.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎pandas/core/reshape/reshape.py
Lines changed: 24 additions & 13 deletions b/‎pandas/core/reshape/reshape.py
Lines changed: 24 additions & 13 deletions
diff --git a/‎pandas/core/series.py
Lines changed: 22 additions & 6 deletions b/‎pandas/core/series.py
Lines changed: 22 additions & 6 deletions
diff --git a/‎pandas/tests/extension/base/reshaping.py
Lines changed: 9 additions & 1 deletion b/‎pandas/tests/extension/base/reshaping.py
Lines changed: 9 additions & 1 deletion
diff --git a/‎pandas/tests/frame/methods/test_fillna.py
Lines changed: 14 additions & 0 deletions b/‎pandas/tests/frame/methods/test_fillna.py
Lines changed: 14 additions & 0 deletions
diff --git a/‎pandas/tests/reshape/test_pivot.py
Lines changed: 29 additions & 0 deletions b/‎pandas/tests/reshape/test_pivot.py
Lines changed: 29 additions & 0 deletions
@@ -79,6 +79,7 @@ Function application
    DataFrameGroupBy.cumsum
    DataFrameGroupBy.describe
    DataFrameGroupBy.diff
+   DataFrameGroupBy.ewm
    DataFrameGroupBy.expanding
    DataFrameGroupBy.ffill
    DataFrameGroupBy.first
@@ -131,6 +132,7 @@ Function application
    SeriesGroupBy.cumsum
    SeriesGroupBy.describe
    SeriesGroupBy.diff
+   SeriesGroupBy.ewm
    SeriesGroupBy.expanding
    SeriesGroupBy.ffill
    SeriesGroupBy.first
 
@@ -421,6 +421,7 @@ Other Deprecations
 - Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`)
 - Deprecated parameter ``method`` in :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` (:issue:`58667`)
 - Deprecated strings ``w``, ``d``, ``MIN``, ``MS``, ``US`` and ``NS`` denoting units in :class:`Timedelta` in favour of ``W``, ``D``, ``min``, ``ms``, ``us`` and ``ns`` (:issue:`59051`)
+- Deprecated the ``arg`` parameter of ``Series.map``; pass the added ``func`` argument instead. (:issue:`61260`)
 - Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`)
 
 .. ---------------------------------------------------------------------------
@@ -622,6 +623,7 @@ Performance improvements
 - Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`)
 - Performance improvement in :meth:`DataFrame.__getitem__` when ``key`` is a :class:`DataFrame` with many columns (:issue:`61010`)
 - Performance improvement in :meth:`DataFrame.astype` when converting to extension floating dtypes, e.g. "Float64" (:issue:`60066`)
+- Performance improvement in :meth:`DataFrame.stack` when using ``future_stack=True`` and the DataFrame does not have a :class:`MultiIndex` (:issue:`58391`)
 - Performance improvement in :meth:`DataFrame.where` when ``cond`` is a :class:`DataFrame` with many columns (:issue:`61010`)
 - Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
 - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
@@ -637,6 +639,7 @@ Bug fixes
 Categorical
 ^^^^^^^^^^^
 - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
+- Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
 - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
 -
 
@@ -649,6 +652,7 @@ Datetimelike
 - Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56147`)
 - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
 - Bug in :func:`tseries.frequencies.to_offset` would fail to parse frequency strings starting with "LWOM" (:issue:`59218`)
+- Bug in :meth:`DataFrame.fillna` raising an ``AssertionError`` instead of ``OutOfBoundsDatetime`` when filling a ``datetime64[ns]`` column with an out-of-bounds timestamp. Now correctly raises ``OutOfBoundsDatetime``. (:issue:`61208`)
 - Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` casting ``datetime64`` and ``timedelta64`` columns to ``float64`` and losing precision (:issue:`60850`)
 - Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`)
 - Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`)
 
@@ -4,19 +4,17 @@
 
 # Let users know if they're missing any of our hard dependencies
 _hard_dependencies = ("numpy", "dateutil")
-_missing_dependencies = []
 
 for _dependency in _hard_dependencies:
     try:
         __import__(_dependency)
     except ImportError as _e:  # pragma: no cover
-        _missing_dependencies.append(f"{_dependency}: {_e}")
+        raise ImportError(
+            f"Unable to import required dependency {_dependency}. "
+            "Please see the traceback for details."
+        ) from _e
 
-if _missing_dependencies:  # pragma: no cover
-    raise ImportError(
-        "Unable to import required dependencies:\n" + "\n".join(_missing_dependencies)
-    )
-del _hard_dependencies, _dependency, _missing_dependencies
+del _hard_dependencies, _dependency
 
 try:
     # numpy compat
 
@@ -452,7 +452,7 @@ def __init__(
                 if isinstance(values, Index):
                     arr = values._data._pa_array.combine_chunks()
                 else:
-                    arr = values._pa_array.combine_chunks()
+                    arr = extract_array(values)._pa_array.combine_chunks()
                 categories = arr.dictionary.to_pandas(types_mapper=ArrowDtype)
                 codes = arr.indices.to_numpy()
                 dtype = CategoricalDtype(categories, values.dtype.pyarrow_dtype.ordered)
 
@@ -3866,15 +3866,79 @@ def expanding(self, *args, **kwargs) -> ExpandingGroupby:
         )
 
     @final
-    @Substitution(name="groupby")
-    @Appender(_common_see_also)
     def ewm(self, *args, **kwargs) -> ExponentialMovingWindowGroupby:
         """
         Return an ewm grouper, providing ewm functionality per group.
 
+        Parameters
+        ----------
+        *args : tuple
+            Positional arguments passed to the EWM window constructor.
+        **kwargs : dict
+            Keyword arguments passed to the EWM window constructor, such as:
+
+            com : float, optional
+                Specify decay in terms of center of mass.
+                ``span``, ``halflife``, and ``alpha`` are alternative ways to specify
+                decay.
+            span : float, optional
+                Specify decay in terms of span.
+            halflife : float, optional
+                Specify decay in terms of half-life.
+            alpha : float, optional
+                Specify smoothing factor directly.
+            min_periods : int, default 0
+                Minimum number of observations in the window required to have a value;
+                otherwise, result is ``np.nan``.
+            adjust : bool, default True
+                Divide by decaying adjustment factor to account for imbalance in
+                relative weights.
+            ignore_na : bool, default False
+                Ignore missing values when calculating weights.
+            times : str or array-like of datetime64, optional
+                Times corresponding to the observations.
+            axis : {0 or 'index', 1 or 'columns'}, default 0
+                Axis along which the EWM function is applied.
+
         Returns
         -------
         pandas.api.typing.ExponentialMovingWindowGroupby
+            An object that supports exponentially weighted moving transformations over
+            each group.
+
+        See Also
+        --------
+        Series.ewm : EWM transformations for Series.
+        DataFrame.ewm : EWM transformations for DataFrames.
+        Series.groupby : Apply a function groupby to a Series.
+        DataFrame.groupby : Apply a function groupby.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "Class": ["A", "A", "A", "B", "B", "B"],
+        ...         "Value": [10, 20, 30, 40, 50, 60],
+        ...     }
+        ... )
+        >>> df
+        Class  Value
+        0     A     10
+        1     A     20
+        2     A     30
+        3     B     40
+        4     B     50
+        5     B     60
+
+        >>> df.groupby("Class").ewm(com=0.5).mean()
+                     Value
+        Class
+        A     0  10.000000
+              1  17.500000
+              2  26.153846
+        B     3  40.000000
+              4  47.500000
+              5  56.153846
         """
         from pandas.core.window import ExponentialMovingWindowGroupby
 
 
@@ -1679,6 +1679,8 @@ def where(self, other, cond) -> list[Block]:
 
         try:
             res_values = arr._where(cond, other).T
+        except OutOfBoundsDatetime:
+            raise
         except (ValueError, TypeError):
             if self.ndim == 1 or self.shape[0] == 1:
                 if isinstance(self.dtype, (IntervalDtype, StringDtype)):
@@ -1746,6 +1748,8 @@ def putmask(self, mask, new) -> list[Block]:
         try:
             # Caller is responsible for ensuring matching lengths
             values._putmask(mask, new)
+        except OutOfBoundsDatetime:
+            raise
         except (TypeError, ValueError):
             if self.ndim == 1 or self.shape[0] == 1:
                 if isinstance(self.dtype, IntervalDtype):
 
@@ -936,7 +936,20 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
         [k for k in range(frame.columns.nlevels - 1, -1, -1) if k not in set_levels]
     )
 
-    result = stack_reshape(frame, level, set_levels, stack_cols)
+    result: Series | DataFrame
+    if not isinstance(frame.columns, MultiIndex):
+        # GH#58817 Fast path when we're stacking the columns of a non-MultiIndex.
+        # When columns are homogeneous EAs, we pass through object
+        # dtype but this is still slightly faster than the normal path.
+        if len(frame.columns) > 0 and frame._is_homogeneous_type:
+            dtype = frame._mgr.blocks[0].dtype
+        else:
+            dtype = None
+        result = frame._constructor_sliced(
+            frame._values.reshape(-1, order="F"), dtype=dtype
+        )
+    else:
+        result = stack_reshape(frame, level, set_levels, stack_cols)
 
     # Construct the correct MultiIndex by combining the frame's index and
     # stacked columns.
@@ -1018,6 +1031,8 @@ def stack_reshape(
     -------
     The data of behind the stacked DataFrame.
     """
+    # non-MultIndex takes a fast path.
+    assert isinstance(frame.columns, MultiIndex)
     # If we need to drop `level` from columns, it needs to be in descending order
     drop_levnums = sorted(level, reverse=True)
 
@@ -1027,18 +1042,14 @@ def stack_reshape(
         if len(frame.columns) == 1:
             data = frame.copy(deep=False)
         else:
-            if not isinstance(frame.columns, MultiIndex) and not isinstance(idx, tuple):
-                # GH#57750 - if the frame is an Index with tuples, .loc below will fail
-                column_indexer = idx
-            else:
-                # Take the data from frame corresponding to this idx value
-                if len(level) == 1:
-                    idx = (idx,)
-                gen = iter(idx)
-                column_indexer = tuple(
-                    next(gen) if k in set_levels else slice(None)
-                    for k in range(frame.columns.nlevels)
-                )
+            # Take the data from frame corresponding to this idx value
+            if len(level) == 1:
+                idx = (idx,)
+            gen = iter(idx)
+            column_indexer = tuple(
+                next(gen) if k in set_levels else slice(None)
+                for k in range(frame.columns.nlevels)
+            )
             data = frame.loc[:, column_indexer]
 
         if len(level) < frame.columns.nlevels:
 
@@ -52,6 +52,9 @@
     doc,
     set_module,
 )
+from pandas.util._exceptions import (
+    find_stack_level,
+)
 from pandas.util._validators import (
     validate_ascending,
     validate_bool_kwarg,
@@ -4320,7 +4323,7 @@ def unstack(
 
     def map(
         self,
-        arg: Callable | Mapping | Series,
+        func: Callable | Mapping | Series | None = None,
         na_action: Literal["ignore"] | None = None,
         **kwargs,
     ) -> Series:
@@ -4333,8 +4336,8 @@ def map(
 
         Parameters
         ----------
-        arg : function, collections.abc.Mapping subclass or Series
-            Mapping correspondence.
+        func : function, collections.abc.Mapping subclass or Series
+            Function or mapping correspondence.
         na_action : {None, 'ignore'}, default None
             If 'ignore', propagate NaN values, without passing them to the
             mapping correspondence.
@@ -4404,9 +4407,22 @@ def map(
         3  I am a rabbit
         dtype: object
         """
-        if callable(arg):
-            arg = functools.partial(arg, **kwargs)
-        new_values = self._map_values(arg, na_action=na_action)
+        if func is None:
+            if "arg" in kwargs:
+                # `.map(arg=my_func)`
+                func = kwargs.pop("arg")
+                warnings.warn(
+                    "The parameter `arg` has been renamed to `func`, and it "
+                    "will stop being supported in a future version of pandas.",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
+            else:
+                raise ValueError("The `func` parameter is required")
+
+        if callable(func):
+            func = functools.partial(func, **kwargs)
+        new_values = self._map_values(func, na_action=na_action)
         return self._constructor(new_values, index=self.index, copy=False).__finalize__(
             self, method="map"
         )
 
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+from pandas.core.dtypes.dtypes import NumpyEADtype
+
 import pandas as pd
 import pandas._testing as tm
 from pandas.api.extensions import ExtensionArray
@@ -266,7 +268,13 @@ def test_stack(self, data, columns, future_stack):
         expected = expected.astype(object)
 
         if isinstance(expected, pd.Series):
-            assert result.dtype == df.iloc[:, 0].dtype
+            if future_stack and isinstance(data.dtype, NumpyEADtype):
+                # GH#58817 future_stack=True constructs the result specifying the dtype
+                # using the dtype of the input; we thus get the underlying
+                # NumPy dtype as the result instead of the NumpyExtensionArray
+                assert result.dtype == df.iloc[:, 0].to_numpy().dtype
+            else:
+                assert result.dtype == df.iloc[:, 0].dtype
         else:
             assert all(result.dtypes == df.iloc[:, 0].dtype)
 
 
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+from pandas.errors import OutOfBoundsDatetime
+
 from pandas import (
     Categorical,
     DataFrame,
@@ -781,3 +783,15 @@ def test_fillna_with_none_object(test_frame, dtype):
     if test_frame:
         expected = expected.to_frame()
     tm.assert_equal(result, expected)
+
+
+def test_fillna_out_of_bounds_datetime():
+    # GH#61208
+    df = DataFrame(
+        {"datetime": date_range("1/1/2011", periods=3, freq="h"), "value": [1, 2, 3]}
+    )
+    df.iloc[0, 0] = None
+
+    msg = "Cannot cast 0001-01-01 00:00:00 to unit='ns' without overflow"
+    with pytest.raises(OutOfBoundsDatetime, match=msg):
+        df.fillna(Timestamp("0001-01-01"))
@@ -15,6 +15,7 @@
 
 import pandas as pd
 from pandas import (
+    ArrowDtype,
     Categorical,
     DataFrame,
     Grouper,
@@ -2851,3 +2852,31 @@ def test_pivot_margins_with_none_index(self):
             ),
         )
         tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
+    def test_pivot_with_pyarrow_categorical(self):
+        # GH#53051
+        pa = pytest.importorskip("pyarrow")
+
+        df = DataFrame(
+            {"string_column": ["A", "B", "C"], "number_column": [1, 2, 3]}
+        ).astype(
+            {
+                "string_column": ArrowDtype(pa.dictionary(pa.int32(), pa.string())),
+                "number_column": "float[pyarrow]",
+            }
+        )
+
+        df = df.pivot(columns=["string_column"], values=["number_column"])
+
+        multi_index = MultiIndex.from_arrays(
+            [["number_column", "number_column", "number_column"], ["A", "B", "C"]],
+            names=(None, "string_column"),
+        )
+        df_expected = DataFrame(
+            [[1.0, np.nan, np.nan], [np.nan, 2.0, np.nan], [np.nan, np.nan, 3.0]],
+            columns=multi_index,
+        )
+        tm.assert_frame_equal(
+            df, df_expected, check_dtype=False, check_column_type=False
+        )