From 3dfb779b85d3e5af870ddcd909c4558590537432 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 6 Sep 2021 11:16:42 -0400
Subject: [PATCH 01/41] ENH: new .agg for list-likes

---
 pandas/core/apply.py                          |  74 +++++++++-
 pandas/core/config_init.py                    |  17 +++
 pandas/core/groupby/generic.py                |  36 ++++-
 pandas/core/groupby/groupby.py                |   2 +-
 pandas/tests/apply/test_frame_apply.py        | 129 +++++++++++++-----
 .../tests/groupby/aggregate/test_aggregate.py |  20 ++-
 pandas/tests/groupby/aggregate/test_other.py  |  23 +++-
 pandas/tests/groupby/test_function.py         |   6 +-
 pandas/tests/groupby/test_groupby.py          |  29 ++--
 pandas/tests/resample/test_deprecated.py      |  11 +-
 pandas/tests/resample/test_resample_api.py    |  32 ++++-
 pandas/tests/reshape/test_pivot.py            |  12 +-
 pandas/tests/window/test_api.py               |  14 +-
 13 files changed, 333 insertions(+), 72 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 7555fb50f16af..f9199ac35643c 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -21,7 +21,10 @@
 
 import numpy as np
 
-from pandas._config import option_context
+from pandas._config import (
+    get_option,
+    option_context,
+)
 
 from pandas._libs import lib
 from pandas._typing import (
@@ -167,7 +170,10 @@ def agg(self) -> DataFrame | Series | None:
             return self.agg_dict_like()
         elif is_list_like(arg):
             # we require a list, but not a 'str'
-            return self.agg_list_like()
+            if get_option("new_udf_methods"):
+                return self.new_list_like("agg")
+            else:
+                return self.agg_list_like()
 
         if callable(arg):
             f = com.get_cython_func(arg)
@@ -408,6 +414,70 @@ def agg_list_like(self) -> DataFrame | Series:
             )
             return concatenated.reindex(full_ordered_index, copy=False)
 
+    def new_list_like(self, method: str) -> DataFrame | Series:
+        """
+        Compute aggregation in the case of a list-like argument.
+
+        Returns
+        -------
+        Result of aggregation.
+        """
+        from pandas.core.reshape.concat import concat
+
+        obj = self.obj
+        arg = cast(List[AggFuncTypeBase], self.f)
+
+        results = []
+        keys = []
+        result_dim = None
+
+        for a in arg:
+            name = None
+            try:
+                if isinstance(a, (tuple, list)):
+                    # Handle (name, value) pairs
+                    name, a = a
+                new_res = getattr(obj, method)(a)
+                if result_dim is None:
+                    result_dim = getattr(new_res, "ndim", 0)
+                elif getattr(new_res, "ndim", 0) != result_dim:
+                    raise ValueError(
+                        "cannot combine transform and aggregation operations"
+                    )
+            except TypeError:
+                pass
+            else:
+                results.append(new_res)
+
+                # make sure we find a good name
+                if name is None:
+                    name = com.get_callable_name(a) or a
+                keys.append(name)
+
+        # if we are empty
+        if not len(results):
+            raise ValueError("no results")
+
+        try:
+            concatenated = concat(results, keys=keys, axis=1, sort=False)
+        except TypeError:
+            # we are concatting non-NDFrame objects,
+            # e.g. a list of scalars
+            from pandas import Series
+
+            result = Series(results, index=keys, name=obj.name)
+            return result
+        else:
+            # Concat uses the first index to determine the final indexing order.
+            # The union of a shorter first index with the other indices causes
+            # the index sorting to be different from the order of the aggregating
+            # functions. Reindex if this is the case.
+            index_size = concatenated.index.size
+            full_ordered_index = next(
+                result.index for result in results if result.index.size == index_size
+            )
+            return concatenated.reindex(full_ordered_index, copy=False)
+
     def agg_dict_like(self) -> DataFrame | Series:
         """
         Compute aggregation in the case of a dict-like argument.
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index cf41bcff3d0c8..2df98a59cb184 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -511,6 +511,23 @@ def use_inf_as_na_cb(key):
         validator=is_one_of_factory(["block", "array"]),
     )
 
+new_udf_methods = """
+: boolean
+    Whether to use the new UDF method implementations. Currently experimental.
+    Defaults to False.
+"""
+
+
+with cf.config_prefix("mode"):
+    cf.register_option(
+        "new_udf_methods",
+        # Get the default from an environment variable, if set, otherwise defaults
+        # to "block". This environment variable can be set for testing.
+        os.environ.get("PANDAS_NEW_UDF_METHODS", "false").lower() == "true",
+        new_udf_methods,
+        validator=is_bool,
+    )
+
 
 # user warnings
 chained_assignment = """
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 38f1d41494fd2..9d0bcf81f3e9c 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -26,6 +26,8 @@
 
 import numpy as np
 
+from pandas._config import get_option
+
 from pandas._libs import reduction as libreduction
 from pandas._typing import (
     ArrayLike,
@@ -37,6 +39,7 @@
     Substitution,
     doc,
 )
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
     ensure_int64,
@@ -886,8 +889,9 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
         relabeling, func, columns, order = reconstruct_func(func, **kwargs)
         func = maybe_mangle_lambdas(func)
 
-        op = GroupByApply(self, func, args, kwargs)
-        result = op.agg()
+        with group_selection_context(self):
+            op = GroupByApply(self, func, args, kwargs)
+            result = op.agg()
         if not is_dict_like(func) and result is not None:
             return result
         elif relabeling and result is not None:
@@ -897,6 +901,8 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             result.columns = columns
 
         if result is None:
+            if get_option("new_udf_methods"):
+                return self._new_agg(func, args, kwargs)
 
             # grouper specific aggregations
             if self.grouper.nkeys > 1:
@@ -947,6 +953,28 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
 
         return result
 
+    def _new_agg(self, func, args, kwargs):
+        if args or kwargs:
+            # test_pass_args_kwargs gets here (with and without as_index)
+            # can't return early
+            result = self._aggregate_frame(func, *args, **kwargs)
+
+        elif self.axis == 1 and self.grouper.nkeys == 1:
+            # _aggregate_multiple_funcs does not allow self.axis == 1
+            # Note: axis == 1 precludes 'not self.as_index', see __init__
+            result = self._aggregate_frame(func)
+            return result
+        else:
+            # test_groupby_as_index_series_scalar gets here
+            # with 'not self.as_index'
+            return self._python_agg_general(func, *args, **kwargs)
+
+        if not self.as_index:
+            self._insert_inaxis_grouper_inplace(result)
+            result.index = Index(range(len(result)))
+
+        return result
+
     agg = aggregate
 
     def _iterate_slices(self) -> Iterable[Series]:
@@ -999,7 +1027,7 @@ def array_func(values: ArrayLike) -> ArrayLike:
                 f"Before calling .{how}, select only columns which should be "
                 "valid for the function.",
                 FutureWarning,
-                stacklevel=4,
+                stacklevel=find_stack_level(),
             )
 
         return self._wrap_agged_manager(new_mgr)
@@ -1195,7 +1223,7 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike:
                 f"Before calling .{how}, select only columns which should be "
                 "valid for the transforming function.",
                 FutureWarning,
-                stacklevel=4,
+                stacklevel=find_stack_level(),
             )
 
         res_df = self.obj._constructor(res_mgr)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 1ea16939603f9..18d750dd27ed5 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1428,7 +1428,7 @@ def _python_agg_general(self, func, *args, **kwargs):
                     "Before calling .agg, select only columns which should be "
                     "valid for the aggregating function.",
                     FutureWarning,
-                    stacklevel=3,
+                    stacklevel=find_stack_level(),
                 )
                 continue
 
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 62983b5327a26..2c2376d163002 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -13,6 +13,7 @@
     Series,
     Timestamp,
     date_range,
+    get_option,
 )
 import pandas._testing as tm
 from pandas.tests.frame.common import zip_frames
@@ -639,6 +640,8 @@ def test_apply_dup_names_multi_agg():
     # GH 21063
     df = DataFrame([[0, 1], [2, 3]], columns=["a", "a"])
     expected = DataFrame([[0, 1]], columns=["a", "a"], index=["min"])
+    if get_option("mode.new_udf_methods"):
+        expected = expected.T
     result = df.agg(["min"])
 
     tm.assert_frame_equal(result, expected)
@@ -1010,25 +1013,46 @@ def test_agg_transform(axis, float_frame):
         # list-like
         result = float_frame.apply([np.sqrt], axis=axis)
         expected = f_sqrt.copy()
-        if axis in {0, "index"}:
-            expected.columns = MultiIndex.from_product([float_frame.columns, ["sqrt"]])
+        if get_option("mode.new_udf_methods"):
+            if axis in {0, "index"}:
+                expected.columns = MultiIndex.from_product(
+                    [["sqrt"], float_frame.columns]
+                )
+            else:
+                expected.index = MultiIndex.from_product([["sqrt"], float_frame.index])
         else:
-            expected.index = MultiIndex.from_product([float_frame.index, ["sqrt"]])
+            if axis in {0, "index"}:
+                expected.columns = MultiIndex.from_product(
+                    [float_frame.columns, ["sqrt"]]
+                )
+            else:
+                expected.index = MultiIndex.from_product([float_frame.index, ["sqrt"]])
         tm.assert_frame_equal(result, expected)
 
         # multiple items in list
         # these are in the order as if we are applying both
         # functions per series and then concatting
         result = float_frame.apply([np.abs, np.sqrt], axis=axis)
-        expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
-        if axis in {0, "index"}:
-            expected.columns = MultiIndex.from_product(
-                [float_frame.columns, ["absolute", "sqrt"]]
-            )
+        if get_option("mode.new_udf_methods"):
+            expected = pd.concat([f_abs, f_sqrt], axis=other_axis)
+            if axis in {0, "index"}:
+                expected.columns = MultiIndex.from_product(
+                    [["absolute", "sqrt"], float_frame.columns]
+                )
+            else:
+                expected.index = MultiIndex.from_product(
+                    [["absolute", "sqrt"], float_frame.index]
+                )
         else:
-            expected.index = MultiIndex.from_product(
-                [float_frame.index, ["absolute", "sqrt"]]
-            )
+            expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
+            if axis in {0, "index"}:
+                expected.columns = MultiIndex.from_product(
+                    [float_frame.columns, ["absolute", "sqrt"]]
+                )
+            else:
+                expected.index = MultiIndex.from_product(
+                    [float_frame.index, ["absolute", "sqrt"]]
+                )
         tm.assert_frame_equal(result, expected)
 
 
@@ -1040,6 +1064,8 @@ def test_demo():
     expected = DataFrame(
         {"A": [0, 4], "B": [5, 5]}, columns=["A", "B"], index=["min", "max"]
     )
+    if get_option("mode.new_udf_methods"):
+        expected = expected.T
     tm.assert_frame_equal(result, expected)
 
     result = df.agg({"A": ["min", "max"], "B": ["sum", "max"]})
@@ -1086,18 +1112,29 @@ def test_agg_multiple_mixed_no_warning():
         },
         index=["min", "sum"],
     )
+    klass, match = None, None
+    if get_option("mode.new_udf_methods"):
+        expected = expected.T
+        klass, match = FutureWarning, "Dropping of nuisance columns"
     # sorted index
-    with tm.assert_produces_warning(None):
+    with tm.assert_produces_warning(klass, match=match, check_stacklevel=False):
         result = mdf.agg(["min", "sum"])
 
     tm.assert_frame_equal(result, expected)
 
-    with tm.assert_produces_warning(None):
+    klass, match = None, None
+    if get_option("mode.new_udf_methods"):
+        klass, match = FutureWarning, "Dropping of nuisance columns"
+
+    with tm.assert_produces_warning(klass, match=match, check_stacklevel=False):
         result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"])
 
     # GH40420: the result of .agg should have an index that is sorted
     # according to the arguments provided to agg.
-    expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"])
+    if get_option("mode.new_udf_methods"):
+        expected = expected.loc[["D", "C", "B", "A"], ["sum", "min"]]
+    else:
+        expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"])
     tm.assert_frame_equal(result, expected)
 
 
@@ -1116,6 +1153,8 @@ def test_agg_reduce(axis, float_frame):
     )
     expected.columns = ["mean", "max", "sum"]
     expected = expected.T if axis in {0, "index"} else expected
+    if get_option("mode.new_udf_methods"):
+        expected = expected.T
 
     result = float_frame.agg(["mean", "max", "sum"], axis=axis)
     tm.assert_frame_equal(result, expected)
@@ -1192,6 +1231,8 @@ def test_nuiscance_columns():
         index=["min"],
         columns=df.columns,
     )
+    if get_option("mode.new_udf_methods"):
+        expected = expected.T
     tm.assert_frame_equal(result, expected)
 
     with tm.assert_produces_warning(
@@ -1205,6 +1246,8 @@ def test_nuiscance_columns():
     expected = DataFrame(
         [[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"]
     )
+    if get_option("mode.new_udf_methods"):
+        expected = expected.T
     tm.assert_frame_equal(result, expected)
 
 
@@ -1244,8 +1287,12 @@ def test_non_callable_aggregates(how):
         }
     )
 
-    tm.assert_frame_equal(result1, result2, check_like=True)
-    tm.assert_frame_equal(result2, expected, check_like=True)
+    if get_option("new_udf_methods"):
+        tm.assert_frame_equal(result2, expected)
+        tm.assert_frame_equal(result1, expected.T)
+    else:
+        tm.assert_frame_equal(result1, result2, check_like=True)
+        tm.assert_frame_equal(result2, expected, check_like=True)
 
     # Just functional string arg is same as calling df.arg()
     result = getattr(df, how)("count")
@@ -1282,7 +1329,9 @@ def func(group_col):
     tm.assert_series_equal(result, expected)
 
     result = df.agg([func])
-    expected = expected.to_frame("func").T
+    expected = expected.to_frame("func")
+    if not get_option("mode.new_udf_methods"):
+        expected = expected.T
     tm.assert_frame_equal(result, expected)
 
 
@@ -1395,14 +1444,20 @@ def test_apply_empty_list_reduce():
     tm.assert_series_equal(result, expected)
 
 
-def test_apply_no_suffix_index():
+def test_apply_no_suffix_index(request):
     # GH36189
     pdf = DataFrame([[4, 9]] * 3, columns=["A", "B"])
-    result = pdf.apply(["sum", lambda x: x.sum(), lambda x: x.sum()])
-    expected = DataFrame(
-        {"A": [12, 12, 12], "B": [27, 27, 27]}, index=["sum", "<lambda>", "<lambda>"]
-    )
-
+    result = pdf.apply([np.square, lambda x: x, lambda x: x])
+    if get_option("mode.new_udf_methods"):
+        columns = MultiIndex.from_product(
+            [["A", "B"], ["square", "<lambda>", "<lambda>"]]
+        )
+        expected = DataFrame([[16, 4, 4, 81, 9, 9]], columns=columns)
+    else:
+        columns = MultiIndex.from_product(
+            [["A", "B"], ["square", "<lambda>", "<lambda>"]]
+        )
+        expected = DataFrame(3 * [[16, 4, 4, 81, 9, 9]], columns=columns)
     tm.assert_frame_equal(result, expected)
 
 
@@ -1434,15 +1489,25 @@ def foo(s):
 
     aggs = ["sum", foo, "count", "min"]
     result = df.agg(aggs)
-    expected = DataFrame(
-        {
-            "item": ["123456", np.nan, 6, "1"],
-            "att1": [21.0, 10.5, 6.0, 1.0],
-            "att2": [18.0, 9.0, 6.0, 0.0],
-            "att3": [17.0, 8.5, 6.0, 0.0],
-        },
-        index=["sum", "foo", "count", "min"],
-    )
+    if get_option("mode.new_udf_methods"):
+        expected = DataFrame(
+            {
+                "sum": ["123456", 21, 18, 17],
+                "count": [6, 6, 6, 6],
+                "min": ["1", 1, 0, 0],
+            },
+            index=["item", "att1", "att2", "att3"],
+        )
+    else:
+        expected = DataFrame(
+            {
+                "item": ["123456", np.nan, 6, "1"],
+                "att1": [21.0, 10.5, 6.0, 1.0],
+                "att2": [18.0, 9.0, 6.0, 0.0],
+                "att3": [17.0, 8.5, 6.0, 0.0],
+            },
+            index=["sum", "foo", "count", "min"],
+        )
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 4bda0e6ef9872..78194a806f456 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -20,6 +20,7 @@
     MultiIndex,
     Series,
     concat,
+    get_option,
     to_datetime,
 )
 import pandas._testing as tm
@@ -499,12 +500,18 @@ def test_order_aggregate_multiple_funcs():
     # GH 25692
     df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]})
 
-    res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
-    result = res.columns.levels[1]
+    if get_option("new_udf_methods"):
+        # TODO (GH 35725): This will not raise when agg-must-agg is implemented
+        msg = "Cannot concat indices that do not have the same number of levels"
+        with pytest.raises(AssertionError, match=msg):
+            df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
+    else:
+        res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
+        result = res.columns.levels[1]
 
-    expected = Index(["sum", "max", "mean", "ohlc", "min"])
+        expected = Index(["sum", "max", "mean", "ohlc", "min"])
 
-    tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected)
 
 
 @pytest.mark.parametrize("dtype", [np.int64, np.uint64])
@@ -1207,7 +1214,10 @@ def test_nonagg_agg():
     g = df.groupby("a")
 
     result = g.agg(["cumsum"])
-    result.columns = result.columns.droplevel(-1)
+    if get_option("new_udf_methods"):
+        result.columns = result.columns.droplevel(0)
+    else:
+        result.columns = result.columns.droplevel(-1)
     expected = g.agg("cumsum")
 
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 79990deed261d..d34538a4f5935 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import get_option
+
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -201,13 +203,21 @@ def test_aggregate_api_consistency():
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped.agg([np.sum, np.mean])
-    expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
-    expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]])
+    if get_option("new_udf_methods"):
+        expected = pd.concat([c_sum, d_sum, c_mean, d_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["sum", "mean"], ["C", "D"]])
+    else:
+        expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]])
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped[["D", "C"]].agg([np.sum, np.mean])
-    expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
-    expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]])
+    if get_option("new_udf_methods"):
+        expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["sum", "mean"], ["D", "C"]])
+    else:
+        expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
+        expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]])
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped.agg({"C": "mean", "D": "sum"})
@@ -393,7 +403,10 @@ def P1(a):
     g = df.groupby("date")
 
     expected = g.agg([P1])
-    expected.columns = expected.columns.levels[0]
+    if get_option("new_udf_methods"):
+        expected.columns = expected.columns.levels[1]
+    else:
+        expected.columns = expected.columns.levels[0]
 
     result = g.agg(P1)
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 3ae11847cc06b..caa04d7994223 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -14,6 +14,7 @@
     Series,
     Timestamp,
     date_range,
+    get_option,
 )
 import pandas._testing as tm
 import pandas.core.nanops as nanops
@@ -1138,7 +1139,10 @@ def test_apply_to_nullable_integer_returns_float(values, function):
     tm.assert_frame_equal(result, expected)
 
     result = groups.agg([function])
-    expected.columns = MultiIndex.from_tuples([("b", function)])
+    if get_option("new_udf_methods"):
+        expected.columns = MultiIndex.from_tuples([(function, "b")])
+    else:
+        expected.columns = MultiIndex.from_tuples([("b", function)])
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index b9a6730996a02..1cf36ddbb1772 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -20,6 +20,7 @@
     Timedelta,
     Timestamp,
     date_range,
+    get_option,
     read_csv,
     to_datetime,
 )
@@ -584,11 +585,18 @@ def test_frame_multi_key_function_list():
     grouped = data.groupby(["A", "B"])
     funcs = [np.mean, np.std]
     agged = grouped.agg(funcs)
-    expected = pd.concat(
-        [grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)],
-        keys=["D", "E", "F"],
-        axis=1,
-    )
+    if get_option("new_udf_methods"):
+        expected = pd.concat(
+            [grouped.agg(funcs[0]), grouped.agg(funcs[1])],
+            keys=["mean", "std"],
+            axis=1,
+        )
+    else:
+        expected = pd.concat(
+            [grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)],
+            keys=["D", "E", "F"],
+            axis=1,
+        )
     assert isinstance(agged.index, MultiIndex)
     assert isinstance(expected.index, MultiIndex)
     tm.assert_frame_equal(agged, expected)
@@ -1985,9 +1993,14 @@ def test_groupby_agg_ohlc_non_first():
         index=date_range("2018-01-01", periods=2, freq="D", name="dti"),
     )
 
-    result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
-
-    tm.assert_frame_equal(result, expected)
+    if get_option("new_udf_methods"):
+        # TODO (GH 35725): This will not raise when agg-must-agg is implemented
+        msg = "Cannot concat indices that do not have the same number of levels"
+        with pytest.raises(AssertionError, match=msg):
+            df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
+    else:
+        result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
+        tm.assert_frame_equal(result, expected)
 
 
 def test_groupby_multiindex_nat():
diff --git a/pandas/tests/resample/test_deprecated.py b/pandas/tests/resample/test_deprecated.py
index 359c3cea62f9c..ff3cb8d873bb9 100644
--- a/pandas/tests/resample/test_deprecated.py
+++ b/pandas/tests/resample/test_deprecated.py
@@ -10,6 +10,7 @@
 from pandas import (
     DataFrame,
     Series,
+    get_option,
 )
 import pandas._testing as tm
 from pandas.core.indexes.datetimes import date_range
@@ -97,7 +98,10 @@ def test_resample_loffset_arg_type(frame, create_index, arg):
         result_agg = df.resample("2D", loffset="2H").agg(arg)
 
     if isinstance(arg, list):
-        expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
+        if get_option("new_udf_methods"):
+            expected.columns = pd.MultiIndex.from_tuples([("mean", "value")])
+        else:
+            expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
 
     tm.assert_frame_equal(result_agg, expected)
 
@@ -216,7 +220,10 @@ def test_loffset_returns_datetimeindex(frame, kind, agg_arg):
     with tm.assert_produces_warning(FutureWarning):
         result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg)
     if isinstance(agg_arg, list):
-        expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
+        if get_option("new_udf_methods"):
+            expected.columns = pd.MultiIndex.from_tuples([("mean", "value")])
+        else:
+            expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
     tm.assert_frame_equal(result_agg, expected)
 
 
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index 3b3bd402e4cc7..3566ed42b7133 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -8,6 +8,7 @@
     DataFrame,
     NamedAgg,
     Series,
+    get_option,
 )
 import pandas._testing as tm
 from pandas.core.indexes.datetimes import date_range
@@ -347,10 +348,16 @@ def test_agg():
     b_std = r["B"].std()
     b_sum = r["B"].sum()
 
-    expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
-    expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
+    if get_option("new_udf_methods"):
+        expected = pd.concat([a_mean, b_mean, a_std, b_std], axis=1)
+        expected.columns = pd.MultiIndex.from_product([["mean", "std"], ["A", "B"]])
+    else:
+        expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
+        expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
     for t in cases:
         warn = FutureWarning if t in cases[1:3] else None
+        if get_option("new_udf_methods"):
+            warn = None
         with tm.assert_produces_warning(
             warn, match="Dropping invalid columns", check_stacklevel=False
         ):
@@ -628,11 +635,22 @@ def test_agg_with_datetime_index_list_agg_func(col_name):
         columns=[col_name],
     )
     result = df.resample("1d").aggregate(["mean"])
-    expected = DataFrame(
-        [47.5, 143.5, 195.5],
-        index=date_range(start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"),
-        columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
-    )
+    if get_option("new_udf_methods"):
+        expected = DataFrame(
+            [47.5, 143.5, 195.5],
+            index=date_range(
+                start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"
+            ),
+            columns=pd.MultiIndex(levels=[["mean"], [col_name]], codes=[[0], [0]]),
+        )
+    else:
+        expected = DataFrame(
+            [47.5, 143.5, 195.5],
+            index=date_range(
+                start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"
+            ),
+            columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
+        )
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 88607f4b036a0..4c13c9733cf68 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import get_option
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -1905,8 +1907,14 @@ def test_pivot_margins_name_unicode(self):
             frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek
         )
         index = Index([1, 2, 3, greek], dtype="object", name="foo")
-        expected = DataFrame(index=index)
-        tm.assert_frame_equal(table, expected)
+
+        if get_option("new_udf_methods"):
+            expected = Series([1, 1, 1, 3], index=index)
+            expected.index.name = None
+            tm.assert_series_equal(table, expected)
+        else:
+            expected = DataFrame(index=index)
+            tm.assert_frame_equal(table, expected)
 
     def test_pivot_string_as_func(self):
         # GH #18713
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index 5cc22249c26f0..0089f092dd439 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -10,6 +10,7 @@
     Timestamp,
     concat,
     date_range,
+    get_option,
     timedelta_range,
 )
 import pandas._testing as tm
@@ -90,8 +91,12 @@ def test_agg():
     b_std = r["B"].std()
 
     result = r.aggregate([np.mean, np.std])
-    expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
-    expected.columns = MultiIndex.from_product([["A", "B"], ["mean", "std"]])
+    if get_option("new_udf_methods"):
+        expected = concat([a_mean, b_mean, a_std, b_std], axis=1)
+        expected.columns = MultiIndex.from_product([["mean", "std"], ["A", "B"]])
+    else:
+        expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
+        expected.columns = MultiIndex.from_product([["A", "B"], ["mean", "std"]])
     tm.assert_frame_equal(result, expected)
 
     result = r.aggregate({"A": np.mean, "B": np.std})
@@ -147,7 +152,10 @@ def test_agg_consistency():
     r = df.rolling(window=3)
 
     result = r.agg([np.sum, np.mean]).columns
-    expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]])
+    if get_option("new_udf_methods"):
+        expected = MultiIndex.from_product([["sum", "mean"], list("AB")])
+    else:
+        expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]])
     tm.assert_index_equal(result, expected)
 
     result = r["A"].agg([np.sum, np.mean]).columns

From 9ef1eb0ca453d9d886f01d1292ab9e828b7373d7 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Fri, 24 Sep 2021 17:40:55 -0400
Subject: [PATCH 02/41] Refactor single arg computation, test fixup

---
 pandas/core/apply.py                   | 43 ++++++++++++++------------
 pandas/tests/apply/test_frame_apply.py |  4 +--
 2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index f9199ac35643c..495f944012ebb 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -414,6 +414,28 @@ def agg_list_like(self) -> DataFrame | Series:
             )
             return concatenated.reindex(full_ordered_index, copy=False)
 
+    def new_list_single_arg(
+        self, method: str, a: AggFuncTypeBase, result_dim: int | None
+    ) -> tuple[int | None, AggFuncTypeBase | None, DataFrame | Series | None]:
+        name = None
+        result = None
+        try:
+            if isinstance(a, (tuple, list)):
+                # Handle (name, value) pairs
+                name, a = a
+            result = getattr(self.obj, method)(a)
+            if result_dim is None:
+                result_dim = getattr(result, "ndim", 0)
+            elif getattr(result, "ndim", 0) != result_dim:
+                raise ValueError("cannot combine transform and aggregation operations")
+        except TypeError:
+            pass
+        else:
+            # make sure we find a good name
+            if name is None:
+                name = com.get_callable_name(a) or a
+        return result_dim, name, result
+
     def new_list_like(self, method: str) -> DataFrame | Series:
         """
         Compute aggregation in the case of a list-like argument.
@@ -432,26 +454,9 @@ def new_list_like(self, method: str) -> DataFrame | Series:
         result_dim = None
 
         for a in arg:
-            name = None
-            try:
-                if isinstance(a, (tuple, list)):
-                    # Handle (name, value) pairs
-                    name, a = a
-                new_res = getattr(obj, method)(a)
-                if result_dim is None:
-                    result_dim = getattr(new_res, "ndim", 0)
-                elif getattr(new_res, "ndim", 0) != result_dim:
-                    raise ValueError(
-                        "cannot combine transform and aggregation operations"
-                    )
-            except TypeError:
-                pass
-            else:
+            result_dim, name, new_res = self.new_list_single_arg(method, a, result_dim)
+            if new_res is not None:
                 results.append(new_res)
-
-                # make sure we find a good name
-                if name is None:
-                    name = com.get_callable_name(a) or a
                 keys.append(name)
 
         # if we are empty
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 2c2376d163002..d79317e48bd5b 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -1450,9 +1450,9 @@ def test_apply_no_suffix_index(request):
     result = pdf.apply([np.square, lambda x: x, lambda x: x])
     if get_option("mode.new_udf_methods"):
         columns = MultiIndex.from_product(
-            [["A", "B"], ["square", "<lambda>", "<lambda>"]]
+            [["square", "<lambda>", "<lambda>"], ["A", "B"]]
         )
-        expected = DataFrame([[16, 4, 4, 81, 9, 9]], columns=columns)
+        expected = DataFrame(3 * [[16, 81, 4, 9, 4, 9]], columns=columns)
     else:
         columns = MultiIndex.from_product(
             [["A", "B"], ["square", "<lambda>", "<lambda>"]]

From 1974e07853df015f92f1c9a5e103723bf3620ef6 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Fri, 24 Sep 2021 20:30:36 -0400
Subject: [PATCH 03/41] Revert change to GroupBy.agg

---
 pandas/core/groupby/generic.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 9d0bcf81f3e9c..c6d9c45485ee2 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -889,9 +889,9 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
         relabeling, func, columns, order = reconstruct_func(func, **kwargs)
         func = maybe_mangle_lambdas(func)
 
-        with group_selection_context(self):
-            op = GroupByApply(self, func, args, kwargs)
-            result = op.agg()
+        # with group_selection_context(self):
+        op = GroupByApply(self, func, args, kwargs)
+        result = op.agg()
         if not is_dict_like(func) and result is not None:
             return result
         elif relabeling and result is not None:

From d7b6c7f886b864ec5fe613b643c2b7632131878f Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 25 Sep 2021 18:02:42 -0400
Subject: [PATCH 04/41] Rename option and methods

---
 pandas/core/apply.py                          | 12 ++++---
 pandas/core/config_init.py                    | 12 +++----
 pandas/core/groupby/generic.py                |  7 ++--
 pandas/tests/apply/test_frame_apply.py        | 34 +++++++++++--------
 .../tests/groupby/aggregate/test_aggregate.py |  4 +--
 pandas/tests/groupby/aggregate/test_other.py  |  6 ++--
 pandas/tests/groupby/test_function.py         |  2 +-
 pandas/tests/groupby/test_groupby.py          |  4 +--
 pandas/tests/resample/test_deprecated.py      |  4 +--
 pandas/tests/resample/test_resample_api.py    |  4 +--
 pandas/tests/reshape/test_pivot.py            |  2 +-
 pandas/tests/window/test_api.py               |  4 +--
 12 files changed, 50 insertions(+), 45 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index f5f2bbf9bebbe..c9c813159568d 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -170,8 +170,8 @@ def agg(self) -> DataFrame | Series | None:
             return self.agg_dict_like()
         elif is_list_like(arg):
             # we require a list, but not a 'str'
-            if get_option("new_udf_methods"):
-                return self.new_list_like("agg")
+            if get_option("future_udf_behavior"):
+                return self.future_list_like("agg")
             else:
                 return self.agg_list_like()
 
@@ -414,7 +414,7 @@ def agg_list_like(self) -> DataFrame | Series:
             )
             return concatenated.reindex(full_ordered_index, copy=False)
 
-    def new_list_single_arg(
+    def future_list_single_arg(
         self, method: str, a: AggFuncTypeBase, result_dim: int | None
     ) -> tuple[int | None, AggFuncTypeBase | None, DataFrame | Series | None]:
         name = None
@@ -436,7 +436,7 @@ def new_list_single_arg(
                 name = com.get_callable_name(a) or a
         return result_dim, name, result
 
-    def new_list_like(self, method: str) -> DataFrame | Series:
+    def future_list_like(self, method: str) -> DataFrame | Series:
         """
         Compute aggregation in the case of a list-like argument.
 
@@ -454,7 +454,9 @@ def new_list_like(self, method: str) -> DataFrame | Series:
         result_dim = None
 
         for a in arg:
-            result_dim, name, new_res = self.new_list_single_arg(method, a, result_dim)
+            result_dim, name, new_res = self.future_list_single_arg(
+                method, a, result_dim
+            )
             if new_res is not None:
                 results.append(new_res)
                 keys.append(name)
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 5d444fa35a46a..99aff44f75029 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -511,20 +511,20 @@ def use_inf_as_na_cb(key):
         validator=is_one_of_factory(["block", "array"]),
     )
 
-new_udf_methods = """
+future_udf_behavior = """
 : boolean
-    Whether to use the new UDF method implementations. Currently experimental.
+    Whether to use the future UDF method implementations. Currently experimental.
     Defaults to False.
 """
 
 
 with cf.config_prefix("mode"):
     cf.register_option(
-        "new_udf_methods",
+        "future_udf_behavior",
         # Get the default from an environment variable, if set, otherwise defaults
-        # to "block". This environment variable can be set for testing.
-        os.environ.get("PANDAS_NEW_UDF_METHODS", "false").lower() == "true",
-        new_udf_methods,
+        # to False. This environment variable can be set for testing.
+        os.environ.get("PANDAS_FUTURE_UDF_BEHAVIOR", "false").lower() == "true",
+        future_udf_behavior,
         validator=is_bool,
     )
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 821da94daaf5c..b5cfc01b4f2f9 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -895,7 +895,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
         relabeling, func, columns, order = reconstruct_func(func, **kwargs)
         func = maybe_mangle_lambdas(func)
 
-        # with group_selection_context(self):
         op = GroupByApply(self, func, args, kwargs)
         result = op.agg()
         if not is_dict_like(func) and result is not None:
@@ -907,8 +906,8 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
             result.columns = columns
 
         if result is None:
-            if get_option("new_udf_methods"):
-                return self._new_agg(func, args, kwargs)
+            if get_option("future_udf_behavior"):
+                return self._future_agg(func, args, kwargs)
 
             # grouper specific aggregations
             if self.grouper.nkeys > 1:
@@ -959,7 +958,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
 
         return result
 
-    def _new_agg(self, func, args, kwargs):
+    def _future_agg(self, func, args, kwargs):
         if args or kwargs:
             # test_pass_args_kwargs gets here (with and without as_index)
             # can't return early
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index d79317e48bd5b..8978d78ed9e2e 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -640,7 +640,7 @@ def test_apply_dup_names_multi_agg():
     # GH 21063
     df = DataFrame([[0, 1], [2, 3]], columns=["a", "a"])
     expected = DataFrame([[0, 1]], columns=["a", "a"], index=["min"])
-    if get_option("mode.new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = expected.T
     result = df.agg(["min"])
 
@@ -1013,7 +1013,7 @@ def test_agg_transform(axis, float_frame):
         # list-like
         result = float_frame.apply([np.sqrt], axis=axis)
         expected = f_sqrt.copy()
-        if get_option("mode.new_udf_methods"):
+        if get_option("future_udf_behavior"):
             if axis in {0, "index"}:
                 expected.columns = MultiIndex.from_product(
                     [["sqrt"], float_frame.columns]
@@ -1033,7 +1033,7 @@ def test_agg_transform(axis, float_frame):
         # these are in the order as if we are applying both
         # functions per series and then concatting
         result = float_frame.apply([np.abs, np.sqrt], axis=axis)
-        if get_option("mode.new_udf_methods"):
+        if get_option("future_udf_behavior"):
             expected = pd.concat([f_abs, f_sqrt], axis=other_axis)
             if axis in {0, "index"}:
                 expected.columns = MultiIndex.from_product(
@@ -1064,7 +1064,7 @@ def test_demo():
     expected = DataFrame(
         {"A": [0, 4], "B": [5, 5]}, columns=["A", "B"], index=["min", "max"]
     )
-    if get_option("mode.new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = expected.T
     tm.assert_frame_equal(result, expected)
 
@@ -1113,7 +1113,7 @@ def test_agg_multiple_mixed_no_warning():
         index=["min", "sum"],
     )
     klass, match = None, None
-    if get_option("mode.new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = expected.T
         klass, match = FutureWarning, "Dropping of nuisance columns"
     # sorted index
@@ -1123,7 +1123,7 @@ def test_agg_multiple_mixed_no_warning():
     tm.assert_frame_equal(result, expected)
 
     klass, match = None, None
-    if get_option("mode.new_udf_methods"):
+    if get_option("future_udf_behavior"):
         klass, match = FutureWarning, "Dropping of nuisance columns"
 
     with tm.assert_produces_warning(klass, match=match, check_stacklevel=False):
@@ -1131,7 +1131,7 @@ def test_agg_multiple_mixed_no_warning():
 
     # GH40420: the result of .agg should have an index that is sorted
     # according to the arguments provided to agg.
-    if get_option("mode.new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = expected.loc[["D", "C", "B", "A"], ["sum", "min"]]
     else:
         expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"])
@@ -1153,7 +1153,7 @@ def test_agg_reduce(axis, float_frame):
     )
     expected.columns = ["mean", "max", "sum"]
     expected = expected.T if axis in {0, "index"} else expected
-    if get_option("mode.new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = expected.T
 
     result = float_frame.agg(["mean", "max", "sum"], axis=axis)
@@ -1231,7 +1231,7 @@ def test_nuiscance_columns():
         index=["min"],
         columns=df.columns,
     )
-    if get_option("mode.new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = expected.T
     tm.assert_frame_equal(result, expected)
 
@@ -1242,11 +1242,15 @@ def test_nuiscance_columns():
     expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"])
     tm.assert_series_equal(result, expected)
 
-    result = df.agg(["sum"])
+    warn = FutureWarning if get_option("future_udf_behavior") else None
+    with tm.assert_produces_warning(
+        warn, match="Select only valid", check_stacklevel=False
+    ):
+        result = df.agg(["sum"])
     expected = DataFrame(
         [[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"]
     )
-    if get_option("mode.new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = expected.T
     tm.assert_frame_equal(result, expected)
 
@@ -1287,7 +1291,7 @@ def test_non_callable_aggregates(how):
         }
     )
 
-    if get_option("new_udf_methods"):
+    if get_option("future_udf_behavior"):
         tm.assert_frame_equal(result2, expected)
         tm.assert_frame_equal(result1, expected.T)
     else:
@@ -1330,7 +1334,7 @@ def func(group_col):
 
     result = df.agg([func])
     expected = expected.to_frame("func")
-    if not get_option("mode.new_udf_methods"):
+    if not get_option("future_udf_behavior"):
         expected = expected.T
     tm.assert_frame_equal(result, expected)
 
@@ -1448,7 +1452,7 @@ def test_apply_no_suffix_index(request):
     # GH36189
     pdf = DataFrame([[4, 9]] * 3, columns=["A", "B"])
     result = pdf.apply([np.square, lambda x: x, lambda x: x])
-    if get_option("mode.new_udf_methods"):
+    if get_option("future_udf_behavior"):
         columns = MultiIndex.from_product(
             [["square", "<lambda>", "<lambda>"], ["A", "B"]]
         )
@@ -1489,7 +1493,7 @@ def foo(s):
 
     aggs = ["sum", foo, "count", "min"]
     result = df.agg(aggs)
-    if get_option("mode.new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = DataFrame(
             {
                 "sum": ["123456", 21, 18, 17],
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 78194a806f456..129fd3419743f 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -500,7 +500,7 @@ def test_order_aggregate_multiple_funcs():
     # GH 25692
     df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]})
 
-    if get_option("new_udf_methods"):
+    if get_option("future_udf_behavior"):
         # TODO (GH 35725): This will not raise when agg-must-agg is implemented
         msg = "Cannot concat indices that do not have the same number of levels"
         with pytest.raises(AssertionError, match=msg):
@@ -1214,7 +1214,7 @@ def test_nonagg_agg():
     g = df.groupby("a")
 
     result = g.agg(["cumsum"])
-    if get_option("new_udf_methods"):
+    if get_option("future_udf_behavior"):
         result.columns = result.columns.droplevel(0)
     else:
         result.columns = result.columns.droplevel(-1)
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index d34538a4f5935..1720c293cfff3 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -203,7 +203,7 @@ def test_aggregate_api_consistency():
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped.agg([np.sum, np.mean])
-    if get_option("new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = pd.concat([c_sum, d_sum, c_mean, d_mean], axis=1)
         expected.columns = MultiIndex.from_product([["sum", "mean"], ["C", "D"]])
     else:
@@ -212,7 +212,7 @@ def test_aggregate_api_consistency():
     tm.assert_frame_equal(result, expected, check_like=True)
 
     result = grouped[["D", "C"]].agg([np.sum, np.mean])
-    if get_option("new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
         expected.columns = MultiIndex.from_product([["sum", "mean"], ["D", "C"]])
     else:
@@ -403,7 +403,7 @@ def P1(a):
     g = df.groupby("date")
 
     expected = g.agg([P1])
-    if get_option("new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected.columns = expected.columns.levels[1]
     else:
         expected.columns = expected.columns.levels[0]
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index caa04d7994223..4539b2ef67e8d 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1139,7 +1139,7 @@ def test_apply_to_nullable_integer_returns_float(values, function):
     tm.assert_frame_equal(result, expected)
 
     result = groups.agg([function])
-    if get_option("new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected.columns = MultiIndex.from_tuples([(function, "b")])
     else:
         expected.columns = MultiIndex.from_tuples([("b", function)])
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 1cf36ddbb1772..45c10a53c9c0f 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -585,7 +585,7 @@ def test_frame_multi_key_function_list():
     grouped = data.groupby(["A", "B"])
     funcs = [np.mean, np.std]
     agged = grouped.agg(funcs)
-    if get_option("new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = pd.concat(
             [grouped.agg(funcs[0]), grouped.agg(funcs[1])],
             keys=["mean", "std"],
@@ -1993,7 +1993,7 @@ def test_groupby_agg_ohlc_non_first():
         index=date_range("2018-01-01", periods=2, freq="D", name="dti"),
     )
 
-    if get_option("new_udf_methods"):
+    if get_option("future_udf_behavior"):
         # TODO (GH 35725): This will not raise when agg-must-agg is implemented
         msg = "Cannot concat indices that do not have the same number of levels"
         with pytest.raises(AssertionError, match=msg):
diff --git a/pandas/tests/resample/test_deprecated.py b/pandas/tests/resample/test_deprecated.py
index ff3cb8d873bb9..3de3694f1eb52 100644
--- a/pandas/tests/resample/test_deprecated.py
+++ b/pandas/tests/resample/test_deprecated.py
@@ -98,7 +98,7 @@ def test_resample_loffset_arg_type(frame, create_index, arg):
         result_agg = df.resample("2D", loffset="2H").agg(arg)
 
     if isinstance(arg, list):
-        if get_option("new_udf_methods"):
+        if get_option("future_udf_behavior"):
             expected.columns = pd.MultiIndex.from_tuples([("mean", "value")])
         else:
             expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
@@ -220,7 +220,7 @@ def test_loffset_returns_datetimeindex(frame, kind, agg_arg):
     with tm.assert_produces_warning(FutureWarning):
         result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg)
     if isinstance(agg_arg, list):
-        if get_option("new_udf_methods"):
+        if get_option("future_udf_behavior"):
             expected.columns = pd.MultiIndex.from_tuples([("mean", "value")])
         else:
             expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index a8df3b9b81c12..d9e44ba6625ec 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -348,7 +348,7 @@ def test_agg():
     b_std = r["B"].std()
     b_sum = r["B"].sum()
 
-    if get_option("new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = pd.concat([a_mean, b_mean, a_std, b_std], axis=1)
         expected.columns = pd.MultiIndex.from_product([["mean", "std"], ["A", "B"]])
     else:
@@ -632,7 +632,7 @@ def test_agg_with_datetime_index_list_agg_func(col_name):
         columns=[col_name],
     )
     result = df.resample("1d").aggregate(["mean"])
-    if get_option("new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = DataFrame(
             [47.5, 143.5, 195.5],
             index=date_range(
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 4c13c9733cf68..080eb7dd9cd29 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1908,7 +1908,7 @@ def test_pivot_margins_name_unicode(self):
         )
         index = Index([1, 2, 3, greek], dtype="object", name="foo")
 
-        if get_option("new_udf_methods"):
+        if get_option("future_udf_behavior"):
             expected = Series([1, 1, 1, 3], index=index)
             expected.index.name = None
             tm.assert_series_equal(table, expected)
diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py
index 0089f092dd439..d24399cb8a83b 100644
--- a/pandas/tests/window/test_api.py
+++ b/pandas/tests/window/test_api.py
@@ -91,7 +91,7 @@ def test_agg():
     b_std = r["B"].std()
 
     result = r.aggregate([np.mean, np.std])
-    if get_option("new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = concat([a_mean, b_mean, a_std, b_std], axis=1)
         expected.columns = MultiIndex.from_product([["mean", "std"], ["A", "B"]])
     else:
@@ -152,7 +152,7 @@ def test_agg_consistency():
     r = df.rolling(window=3)
 
     result = r.agg([np.sum, np.mean]).columns
-    if get_option("new_udf_methods"):
+    if get_option("future_udf_behavior"):
         expected = MultiIndex.from_product([["sum", "mean"], list("AB")])
     else:
         expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]])

From d412b4f1158c660c65fc4ac86050d3b214e4200e Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 9 Oct 2021 16:36:42 -0400
Subject: [PATCH 05/41] Merge fixups

---
 pandas/tests/groupby/aggregate/test_other.py | 2 +-
 pandas/tests/groupby/test_groupby.py         | 8 +++++---
 pandas/tests/resample/test_resample_api.py   | 5 ++++-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 885de13ae853d..3ec47870a4fcb 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -49,7 +49,7 @@ def peak_to_peak(arr):
     if get_option("future_udf_behavior"):
         match = "Dropping invalid columns in DataFrameGroupBy.agg"
     else:
-        match = (r"\['key2'\] did not aggregate successfully",)
+        match = r"\['key2'\] did not aggregate successfully"
 
     with tm.assert_produces_warning(
         FutureWarning,
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 537220707f3f8..820a18d59d62e 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -584,9 +584,11 @@ def test_frame_multi_key_function_list():
 
     grouped = data.groupby(["A", "B"])
     funcs = [np.mean, np.std]
-    with tm.assert_produces_warning(
-        FutureWarning, match=r"\['C'\] did not aggregate successfully"
-    ):
+    if get_option("future_udf_behavior"):
+        klass, msg = None, None
+    else:
+        klass, msg = FutureWarning, r"\['C'\] did not aggregate successfully"
+    with tm.assert_produces_warning(klass, match=msg):
         agged = grouped.agg(funcs)
     if get_option("future_udf_behavior"):
         expected = pd.concat(
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index 4be47eaa3c25d..e51396d380b6a 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -355,7 +355,10 @@ def test_agg():
         expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
         expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
     for t in cases:
-        warn = FutureWarning if t in cases[1:3] else None
+        if t in cases[1:3] and not get_option("FUTURE_UDF_BEHAVIOR"):
+            warn = FutureWarning
+        else:
+            warn = None
         with tm.assert_produces_warning(
             warn,
             match=r"\['date'\] did not aggregate successfully",

From 0cea15b4ab3c23e3ae215b90a66b4f059a42d0bd Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com>
Date: Sun, 3 Oct 2021 20:09:53 -0400
Subject: [PATCH 06/41] BUG/ERR: sparse array cmp methods mismatched len
 (#43863)

---
 doc/source/whatsnew/v1.4.0.rst                 | 1 +
 pandas/core/arrays/sparse/array.py             | 7 +++++--
 pandas/tests/arrays/sparse/test_arithmetics.py | 8 ++++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 381b0f39ff849..0c841078fe9b4 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -511,6 +511,7 @@ Sparse
 - Bug in :meth:`DataFrame.sparse.to_coo` raising ``AttributeError`` when column names are not unique (:issue:`29564`)
 - Bug in :meth:`SparseArray.max` and :meth:`SparseArray.min` raising ``ValueError`` for arrays with 0 non-null elements (:issue:`43527`)
 - Bug in :meth:`DataFrame.sparse.to_coo` silently converting non-zero fill values to zero (:issue:`24817`)
+- Bug in :class:`SparseArray` comparison methods with an array-like operand of mismatched length raising ``AssertionError`` or unclear ``ValueError`` depending on the input (:issue:`43863`)
 -
 
 ExtensionArray
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 7c5f0578bda27..87fcf54ed684b 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -1642,11 +1642,14 @@ def _cmp_method(self, other, op) -> SparseArray:
 
         if isinstance(other, np.ndarray):
             # TODO: make this more flexible than just ndarray...
-            if len(self) != len(other):
-                raise AssertionError(f"length mismatch: {len(self)} vs. {len(other)}")
             other = SparseArray(other, fill_value=self.fill_value)
 
         if isinstance(other, SparseArray):
+            if len(self) != len(other):
+                raise ValueError(
+                    f"operands have mismatched length {len(self)} and {len(other)}"
+                )
+
             op_name = op.__name__.strip("_")
             return _sparse_array_op(self, other, op, op_name)
         else:
diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py
index 2ae60a90fee60..d7c39c0e0708e 100644
--- a/pandas/tests/arrays/sparse/test_arithmetics.py
+++ b/pandas/tests/arrays/sparse/test_arithmetics.py
@@ -529,3 +529,11 @@ def test_unary_op(op, fill_value):
     result = op(sparray)
     expected = SparseArray(op(arr), fill_value=op(fill_value))
     tm.assert_sp_array_equal(result, expected)
+
+
+@pytest.mark.parametrize("cons", [list, np.array, SparseArray])
+def test_mismatched_length_cmp_op(cons):
+    left = SparseArray([True, True])
+    right = cons([True, True, True])
+    with pytest.raises(ValueError, match="operands have mismatched length"):
+        left & right

From 665b304f71608d7ca3abdce6de809a3ef11e79be Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Mon, 4 Oct 2021 05:07:41 -0700
Subject: [PATCH 07/41] Add deprecation tag for passing a string for
 ewm(times=...) (#43873)

---
 pandas/core/window/ewm.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
index 79102c2bc82ee..29a6704ae5092 100644
--- a/pandas/core/window/ewm.py
+++ b/pandas/core/window/ewm.py
@@ -205,6 +205,8 @@ class ExponentialMovingWindow(BaseWindow):
 
         If str, the name of the column in the DataFrame representing the times.
 
+        .. deprecated:: 1.4.0
+
         If 1-D array like, a sequence with the same shape as the observations.
 
         Only applicable to ``mean()``.

From 214ba4aff27a474f161e52963eca17b48921b998 Mon Sep 17 00:00:00 2001
From: Julian Fleischer <scravy@users.noreply.github.com>
Date: Mon, 4 Oct 2021 14:10:46 +0200
Subject: [PATCH 08/41] Make components of Suffixes Optional (#42544)

---
 pandas/_typing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 9c20eb12dc7fc..68ec331c2781f 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -111,7 +111,7 @@
 IndexLabel = Union[Hashable, Sequence[Hashable]]
 Level = Union[Hashable, int]
 Shape = Tuple[int, ...]
-Suffixes = Tuple[str, str]
+Suffixes = Tuple[Optional[str], Optional[str]]
 Ordered = Optional[bool]
 JSONSerializable = Optional[Union[PythonScalar, List, Dict]]
 Frequency = Union[str, "DateOffset"]

From 9d6da6d4383ad91ae6eb9f23e200dc37ef14813a Mon Sep 17 00:00:00 2001
From: Robin Raymond <robin@robinraymond.de>
Date: Mon, 4 Oct 2021 21:13:40 +0200
Subject: [PATCH 09/41] BUG: Fix dtypes for read_json (#42819)

* Fix dtypes for read_json

* Address comments

* Add whatsnew entry

* Update doc/source/whatsnew/v1.4.0.rst

Co-authored-by: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com>

* Linting

Co-authored-by: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com>
---
 doc/source/whatsnew/v1.4.0.rst      |  1 +
 pandas/io/json/_json.py             |  9 +--------
 pandas/tests/io/json/test_pandas.py | 30 +++++++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 0c841078fe9b4..8113ac97a3a37 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -465,6 +465,7 @@ I/O
 - Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
 - Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
 - Bug in :func:`read_csv`, changed exception class when expecting a file path name or file-like object from ``OSError`` to ``TypeError`` (:issue:`43366`)
+- Bug in :func:`read_json` not handling non-numpy dtypes correctly (especially ``category``) (:issue:`21892`, :issue:`33205`)
 - Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`)
 - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
 -
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index f92fc65f55df6..b9bdfb91ca154 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -876,11 +876,8 @@ def check_keys_split(self, decoded):
 
     def parse(self):
 
-        # try numpy
-        numpy = self.numpy
-        if numpy:
+        if self.numpy:
             self._parse_numpy()
-
         else:
             self._parse_no_numpy()
 
@@ -941,10 +938,6 @@ def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True):
                 )
                 if dtype is not None:
                     try:
-                        # error: Argument 1 to "dtype" has incompatible type
-                        # "Union[ExtensionDtype, str, dtype[Any], Type[object]]";
-                        # expected "Type[Any]"
-                        dtype = np.dtype(dtype)  # type: ignore[arg-type]
                         return data.astype(dtype), True
                     except (TypeError, ValueError):
                         return data, False
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index a856f031e20ba..747770ad78684 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1387,6 +1387,36 @@ def test_from_json_to_json_table_dtypes(self):
         result = read_json(dfjson, orient="table")
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize("orient", ["split", "records", "index", "columns"])
+    def test_to_json_from_json_columns_dtypes(self, orient):
+        # GH21892 GH33205
+        expected = DataFrame.from_dict(
+            {
+                "Integer": Series([1, 2, 3], dtype="int64"),
+                "Float": Series([None, 2.0, 3.0], dtype="float64"),
+                "Object": Series([None, "", "c"], dtype="object"),
+                "Bool": Series([True, False, True], dtype="bool"),
+                "Category": Series(["a", "b", None], dtype="category"),
+                "Datetime": Series(
+                    ["2020-01-01", None, "2020-01-03"], dtype="datetime64[ns]"
+                ),
+            }
+        )
+        dfjson = expected.to_json(orient=orient)
+        result = read_json(
+            dfjson,
+            orient=orient,
+            dtype={
+                "Integer": "int64",
+                "Float": "float64",
+                "Object": "object",
+                "Bool": "bool",
+                "Category": "category",
+                "Datetime": "datetime64[ns]",
+            },
+        )
+        tm.assert_frame_equal(result, expected)
+
     @pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}])
     def test_read_json_table_dtype_raises(self, dtype):
         # GH21345

From 005598c7b77e66b9bd1989b2d1197acacb232ab0 Mon Sep 17 00:00:00 2001
From: Horace Lai <44500643+horaceklai@users.noreply.github.com>
Date: Tue, 5 Oct 2021 02:30:10 +0200
Subject: [PATCH 10/41] TST: dropping of nuisance columns for groupby ops
 #38815 (#43674)

---
 pandas/tests/groupby/test_groupby.py | 42 ++++++++++++++++++++++------
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 820a18d59d62e..c836ef0c6130c 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -863,11 +863,6 @@ def test_groupby_multi_corner(df):
 
 def test_omit_nuisance(df):
     grouped = df.groupby("A")
-
-    result = grouped.mean()
-    expected = df.loc[:, ["A", "C", "D"]].groupby("A").mean()
-    tm.assert_frame_equal(result, expected)
-
     agged = grouped.agg(np.mean)
     exp = grouped.mean()
     tm.assert_frame_equal(agged, exp)
@@ -886,14 +881,43 @@ def test_omit_nuisance(df):
         grouped.agg(lambda x: x.sum(0, numeric_only=False))
 
 
-def test_omit_nuisance_sem(df):
-    # GH 38774 - sem should work with nuisance columns
+@pytest.mark.parametrize(
+    "agg_function",
+    ["max", "min"],
+)
+def test_keep_nuisance_agg(df, agg_function):
+    # GH 38815
+    grouped = df.groupby("A")
+    result = getattr(grouped, agg_function)()
+    expected = result.copy()
+    expected.loc["bar", "B"] = getattr(df.loc[df["A"] == "bar", "B"], agg_function)()
+    expected.loc["foo", "B"] = getattr(df.loc[df["A"] == "foo", "B"], agg_function)()
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "agg_function",
+    ["sum", "mean", "prod", "std", "var", "median"],
+)
+def test_omit_nuisance_agg(df, agg_function):
+    # GH 38774, GH 38815
     grouped = df.groupby("A")
-    result = grouped.sem()
-    expected = df.loc[:, ["A", "C", "D"]].groupby("A").sem()
+    result = getattr(grouped, agg_function)()
+    expected = getattr(df.loc[:, ["A", "C", "D"]].groupby("A"), agg_function)()
     tm.assert_frame_equal(result, expected)
 
 
+def test_omit_nuisance_warnings(df):
+    # GH 38815
+    with tm.assert_produces_warning(
+        FutureWarning, filter_level="always", check_stacklevel=False
+    ):
+        grouped = df.groupby("A")
+        result = grouped.skew()
+        expected = df.loc[:, ["A", "C", "D"]].groupby("A").skew()
+        tm.assert_frame_equal(result, expected)
+
+
 def test_omit_nuisance_python_multiple(three_group):
     grouped = three_group.groupby(["A", "B"])
 

From 7afb062a217c018ee48f857fef256e16b59071d6 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 4 Oct 2021 17:32:09 -0700
Subject: [PATCH 11/41] BUG: retain EA dtypes in DataFrame __pos__, __neg__
 (#43883)

---
 doc/source/whatsnew/v1.4.0.rst             |  3 +-
 pandas/_libs/ops_dispatch.pyx              | 41 ++++++++++++++----
 pandas/core/arrays/numpy_.py               |  9 ++++
 pandas/core/generic.py                     | 47 +++++++++------------
 pandas/tests/arithmetic/test_datetime64.py |  2 +-
 pandas/tests/arrays/test_numpy.py          | 11 +++--
 pandas/tests/frame/test_unary.py           | 49 +++++++++++++++++++++-
 7 files changed, 120 insertions(+), 42 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 8113ac97a3a37..dcd31abaa8857 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -126,7 +126,8 @@ Other enhancements
 - Attempting to write into a file in missing parent directory with :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_html`, :meth:`DataFrame.to_excel`, :meth:`DataFrame.to_feather`, :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_json`, :meth:`DataFrame.to_pickle`, and :meth:`DataFrame.to_xml` now explicitly mentions missing parent directory, the same is true for :class:`Series` counterparts (:issue:`24306`)
 - :meth:`IntegerArray.all` , :meth:`IntegerArray.any`, :meth:`FloatingArray.any`, and :meth:`FloatingArray.all` use Kleene logic (:issue:`41967`)
 - Added support for nullable boolean and integer types in :meth:`DataFrame.to_stata`, :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`40855`)
--
+- :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`)
+
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/_libs/ops_dispatch.pyx b/pandas/_libs/ops_dispatch.pyx
index f6ecef2038cf3..c34504732ac32 100644
--- a/pandas/_libs/ops_dispatch.pyx
+++ b/pandas/_libs/ops_dispatch.pyx
@@ -18,6 +18,14 @@ DISPATCHED_UFUNCS = {
     "or",
     "xor",
     "and",
+    "neg",
+    "pos",
+    "abs",
+}
+UNARY_UFUNCS = {
+    "neg",
+    "pos",
+    "abs",
 }
 UFUNC_ALIASES = {
     "subtract": "sub",
@@ -36,6 +44,9 @@ UFUNC_ALIASES = {
     "bitwise_or": "or",
     "bitwise_and": "and",
     "bitwise_xor": "xor",
+    "negative": "neg",
+    "absolute": "abs",
+    "positive": "pos",
 }
 
 # For op(., Array) -> Array.__r{op}__
@@ -80,15 +91,31 @@ def maybe_dispatch_ufunc_to_dunder_op(
     def not_implemented(*args, **kwargs):
         return NotImplemented
 
-    if (method == "__call__"
-            and op_name in DISPATCHED_UFUNCS
-            and kwargs.get("out") is None):
-        if isinstance(inputs[0], type(self)):
+    if kwargs or ufunc.nin > 2:
+        return NotImplemented
+
+    if method == "__call__" and op_name in DISPATCHED_UFUNCS:
+
+        if inputs[0] is self:
             name = f"__{op_name}__"
-            return getattr(self, name, not_implemented)(inputs[1])
-        else:
+            meth = getattr(self, name, not_implemented)
+
+            if op_name in UNARY_UFUNCS:
+                assert len(inputs) == 1
+                return meth()
+
+            return meth(inputs[1])
+
+        elif inputs[1] is self:
             name = REVERSED_NAMES.get(op_name, f"__r{op_name}__")
-            result = getattr(self, name, not_implemented)(inputs[0])
+
+            meth = getattr(self, name, not_implemented)
+            result = meth(inputs[0])
             return result
+
+        else:
+            # should not be reached, but covering our bases
+            return NotImplemented
+
     else:
         return NotImplemented
diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
index 410497d61c98b..8fe0c0114fb04 100644
--- a/pandas/core/arrays/numpy_.py
+++ b/pandas/core/arrays/numpy_.py
@@ -388,6 +388,15 @@ def to_numpy(
     def __invert__(self) -> PandasArray:
         return type(self)(~self._ndarray)
 
+    def __neg__(self) -> PandasArray:
+        return type(self)(-self._ndarray)
+
+    def __pos__(self) -> PandasArray:
+        return type(self)(+self._ndarray)
+
+    def __abs__(self) -> PandasArray:
+        return type(self)(abs(self._ndarray))
+
     def _cmp_method(self, other, op):
         if isinstance(other, PandasArray):
             other = other._ndarray
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index af8c64d5c0202..b235f120d98c8 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -36,6 +36,7 @@
     to_offset,
 )
 from pandas._typing import (
+    ArrayLike,
     Axis,
     CompressionOptions,
     Dtype,
@@ -90,7 +91,6 @@
     is_list_like,
     is_number,
     is_numeric_dtype,
-    is_object_dtype,
     is_re_compilable,
     is_scalar,
     is_timedelta64_dtype,
@@ -1495,36 +1495,27 @@ def equals(self, other: object) -> bool_t:
 
     @final
     def __neg__(self):
-        values = self._values
-        if is_bool_dtype(values):
-            arr = operator.inv(values)
-        elif (
-            is_numeric_dtype(values)
-            or is_timedelta64_dtype(values)
-            or is_object_dtype(values)
-        ):
-            arr = operator.neg(values)
-        else:
-            raise TypeError(f"Unary negative expects numeric dtype, not {values.dtype}")
-        return self.__array_wrap__(arr)
+        def blk_func(values: ArrayLike):
+            if is_bool_dtype(values.dtype):
+                return operator.inv(values)
+            else:
+                return operator.neg(values)
+
+        new_data = self._mgr.apply(blk_func)
+        res = self._constructor(new_data)
+        return res.__finalize__(self, method="__neg__")
 
     @final
     def __pos__(self):
-        values = self._values
-        if is_bool_dtype(values):
-            arr = values
-        elif (
-            is_numeric_dtype(values)
-            or is_timedelta64_dtype(values)
-            or is_object_dtype(values)
-        ):
-            arr = operator.pos(values)
-        else:
-            raise TypeError(
-                "Unary plus expects bool, numeric, timedelta, "
-                f"or object dtype, not {values.dtype}"
-            )
-        return self.__array_wrap__(arr)
+        def blk_func(values: ArrayLike):
+            if is_bool_dtype(values.dtype):
+                return values.copy()
+            else:
+                return operator.pos(values)
+
+        new_data = self._mgr.apply(blk_func)
+        res = self._constructor(new_data)
+        return res.__finalize__(self, method="__pos__")
 
     @final
     def __invert__(self):
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
index c0f38a1181026..60a58b7bbea78 100644
--- a/pandas/tests/arithmetic/test_datetime64.py
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -1878,7 +1878,7 @@ def test_datetime64_ops_nat(self):
 
         # subtraction
         tm.assert_series_equal(-NaT + datetime_series, nat_series_dtype_timestamp)
-        msg = "Unary negative expects"
+        msg = "bad operand type for unary -: 'DatetimeArray'"
         with pytest.raises(TypeError, match=msg):
             -single_nat_dtype_datetime + datetime_series
 
diff --git a/pandas/tests/arrays/test_numpy.py b/pandas/tests/arrays/test_numpy.py
index 753ec99e683e6..e8e9ee86e77dd 100644
--- a/pandas/tests/arrays/test_numpy.py
+++ b/pandas/tests/arrays/test_numpy.py
@@ -198,12 +198,17 @@ def test_validate_reduction_keyword_args():
 # Ops
 
 
-def test_ufunc():
+@pytest.mark.parametrize("ufunc", [np.abs, np.negative, np.positive])
+def test_ufunc_unary(ufunc):
     arr = PandasArray(np.array([-1.0, 0.0, 1.0]))
-    result = np.abs(arr)
-    expected = PandasArray(np.abs(arr._ndarray))
+    result = ufunc(arr)
+    expected = PandasArray(ufunc(arr._ndarray))
     tm.assert_extension_array_equal(result, expected)
 
+
+def test_ufunc():
+    arr = PandasArray(np.array([-1.0, 0.0, 1.0]))
+
     r1, r2 = np.divmod(arr, np.add(arr, 2))
     e1, e2 = np.divmod(arr._ndarray, np.add(arr._ndarray, 2))
     e1 = PandasArray(e1)
diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py
index ea6243e2eae4a..2129586455333 100644
--- a/pandas/tests/frame/test_unary.py
+++ b/pandas/tests/frame/test_unary.py
@@ -49,7 +49,7 @@ def test_neg_object(self, df, expected):
     def test_neg_raises(self, df):
         msg = (
             "bad operand type for unary -: 'str'|"
-            r"Unary negative expects numeric dtype, not datetime64\[ns\]"
+            r"bad operand type for unary -: 'DatetimeArray'"
         )
         with pytest.raises(TypeError, match=msg):
             (-df)
@@ -116,8 +116,53 @@ def test_pos_object(self, df):
         "df", [pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])})]
     )
     def test_pos_raises(self, df):
-        msg = "Unary plus expects .* dtype, not datetime64\\[ns\\]"
+        msg = r"bad operand type for unary \+: 'DatetimeArray'"
         with pytest.raises(TypeError, match=msg):
             (+df)
         with pytest.raises(TypeError, match=msg):
             (+df["a"])
+
+    def test_unary_nullable(self):
+        df = pd.DataFrame(
+            {
+                "a": pd.array([1, -2, 3, pd.NA], dtype="Int64"),
+                "b": pd.array([4.0, -5.0, 6.0, pd.NA], dtype="Float32"),
+                "c": pd.array([True, False, False, pd.NA], dtype="boolean"),
+                # include numpy bool to make sure bool-vs-boolean behavior
+                #  is consistent in non-NA locations
+                "d": np.array([True, False, False, True]),
+            }
+        )
+
+        result = +df
+        res_ufunc = np.positive(df)
+        expected = df
+        # TODO: assert that we have copies?
+        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(res_ufunc, expected)
+
+        result = -df
+        res_ufunc = np.negative(df)
+        expected = pd.DataFrame(
+            {
+                "a": pd.array([-1, 2, -3, pd.NA], dtype="Int64"),
+                "b": pd.array([-4.0, 5.0, -6.0, pd.NA], dtype="Float32"),
+                "c": pd.array([False, True, True, pd.NA], dtype="boolean"),
+                "d": np.array([False, True, True, False]),
+            }
+        )
+        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(res_ufunc, expected)
+
+        result = abs(df)
+        res_ufunc = np.abs(df)
+        expected = pd.DataFrame(
+            {
+                "a": pd.array([1, 2, 3, pd.NA], dtype="Int64"),
+                "b": pd.array([4.0, 5.0, 6.0, pd.NA], dtype="Float32"),
+                "c": pd.array([True, False, False, pd.NA], dtype="boolean"),
+                "d": np.array([True, False, False, True]),
+            }
+        )
+        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(res_ufunc, expected)

From 195f9cf098a91494be95cc82b2f2b1a8229b3192 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yoshiki=20V=C3=A1zquez=20Baeza?= <yoshiki89@gmail.com>
Date: Mon, 4 Oct 2021 17:32:36 -0700
Subject: [PATCH 12/41] TST: Test Series' settitem with Interval and NaN
 (#43844)

---
 pandas/tests/indexing/test_iloc.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index b04a2c86a79d7..b8c53c7b59239 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -931,6 +931,17 @@ def test_iloc_setitem_td64_values_cast_na(self, value):
         expected = Series([NaT, 1, 2], dtype="timedelta64[ns]")
         tm.assert_series_equal(series, expected)
 
+    @pytest.mark.parametrize("not_na", [Interval(0, 1), "a", 1.0])
+    def test_setitem_mix_of_nan_and_interval(self, not_na, nulls_fixture):
+        # GH#27937
+        dtype = CategoricalDtype(categories=[not_na])
+        ser = Series(
+            [nulls_fixture, nulls_fixture, nulls_fixture, nulls_fixture], dtype=dtype
+        )
+        ser.iloc[:3] = [nulls_fixture, not_na, nulls_fixture]
+        exp = Series([nulls_fixture, not_na, nulls_fixture, nulls_fixture], dtype=dtype)
+        tm.assert_series_equal(ser, exp)
+
     def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self):
         idx = Index([])
         obj = DataFrame(np.random.randn(len(idx), len(idx)), index=idx, columns=idx)

From 6021c0663f091b575bac1b1526cf801a6d73c854 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 4 Oct 2021 17:33:02 -0700
Subject: [PATCH 13/41] PERF: tighter cython declarations, faster __iter__
 (#43872)

---
 pandas/_libs/algos_common_helper.pxi.in |  6 ++---
 pandas/_libs/algos_take_helper.pxi.in   |  8 +++---
 pandas/_libs/internals.pyx              | 32 +++++++++++++++-------
 pandas/_libs/lib.pyx                    | 36 ++++++++++++++-----------
 pandas/_libs/testing.pyx                |  5 ++--
 5 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in
index 87130906ef28b..4242a76dcc3b7 100644
--- a/pandas/_libs/algos_common_helper.pxi.in
+++ b/pandas/_libs/algos_common_helper.pxi.in
@@ -8,18 +8,16 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 # ensure_dtype
 # ----------------------------------------------------------------------
 
-cdef int PLATFORM_INT = (<ndarray>np.arange(0, dtype=np.intp)).descr.type_num
-
 
 def ensure_platform_int(object arr):
     # GH3033, GH1392
     # platform int is the size of the int pointer, e.g. np.intp
     if util.is_array(arr):
-        if (<ndarray>arr).descr.type_num == PLATFORM_INT:
+        if (<ndarray>arr).descr.type_num == cnp.NPY_INTP:
             return arr
         else:
             # equiv: arr.astype(np.intp)
-            return cnp.PyArray_Cast(<ndarray>arr, PLATFORM_INT)
+            return cnp.PyArray_Cast(<ndarray>arr, cnp.NPY_INTP)
     else:
         return np.array(arr, dtype=np.intp)
 
diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in
index ec041c03b05e1..2a3858674af9e 100644
--- a/pandas/_libs/algos_take_helper.pxi.in
+++ b/pandas/_libs/algos_take_helper.pxi.in
@@ -103,7 +103,7 @@ def take_2d_axis0_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
 {{else}}
 def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
 {{endif}}
-                                    ndarray[intp_t] indexer,
+                                    ndarray[intp_t, ndim=1] indexer,
                                     {{c_type_out}}[:, :] out,
                                     fill_value=np.nan):
     cdef:
@@ -158,7 +158,7 @@ def take_2d_axis1_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
 {{else}}
 def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
 {{endif}}
-                                    ndarray[intp_t] indexer,
+                                    ndarray[intp_t, ndim=1] indexer,
                                     {{c_type_out}}[:, :] out,
                                     fill_value=np.nan):
 
@@ -195,8 +195,8 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
                                     fill_value=np.nan):
     cdef:
         Py_ssize_t i, j, k, n, idx
-        ndarray[intp_t] idx0 = indexer[0]
-        ndarray[intp_t] idx1 = indexer[1]
+        ndarray[intp_t, ndim=1] idx0 = indexer[0]
+        ndarray[intp_t, ndim=1] idx1 = indexer[1]
         {{c_type_out}} fv
 
     n = len(idx0)
diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index 87709ac6c33bf..2f0bcefefaaa1 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -227,7 +227,7 @@ cdef class BlockPlacement:
         cdef:
             slice nv, s = self._ensure_has_slice()
             Py_ssize_t other_int, start, stop, step, l
-            ndarray newarr
+            ndarray[intp_t, ndim=1] newarr
 
         if s is not None:
             # see if we are either all-above or all-below, each of which
@@ -260,7 +260,7 @@ cdef class BlockPlacement:
         cdef:
             slice slc = self._ensure_has_slice()
             slice new_slice
-            ndarray new_placement
+            ndarray[intp_t, ndim=1] new_placement
 
         if slc is not None and slc.step == 1:
             new_slc = slice(slc.start * factor, slc.stop * factor, 1)
@@ -345,7 +345,9 @@ cpdef Py_ssize_t slice_len(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -
     return length
 
 
-cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX):
+cdef (Py_ssize_t, Py_ssize_t, Py_ssize_t, Py_ssize_t) slice_get_indices_ex(
+    slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX
+):
     """
     Get (start, stop, step, length) tuple for a slice.
 
@@ -460,9 +462,11 @@ def get_blkno_indexers(
     # blockno handling.
     cdef:
         int64_t cur_blkno
-        Py_ssize_t i, start, stop, n, diff, tot_len
+        Py_ssize_t i, start, stop, n, diff
+        cnp.npy_intp tot_len
         int64_t blkno
         object group_dict = defaultdict(list)
+        ndarray[int64_t, ndim=1] arr
 
     n = blknos.shape[0]
     result = list()
@@ -495,7 +499,8 @@ def get_blkno_indexers(
                 result.append((blkno, slice(slices[0][0], slices[0][1])))
             else:
                 tot_len = sum(stop - start for start, stop in slices)
-                arr = np.empty(tot_len, dtype=np.int64)
+                # equiv np.empty(tot_len, dtype=np.int64)
+                arr = cnp.PyArray_EMPTY(1, &tot_len, cnp.NPY_INT64, 0)
 
                 i = 0
                 for start, stop in slices:
@@ -526,8 +531,13 @@ def get_blkno_placements(blknos, group: bool = True):
         yield blkno, BlockPlacement(indexer)
 
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
 cpdef update_blklocs_and_blknos(
-    ndarray[intp_t] blklocs, ndarray[intp_t] blknos, Py_ssize_t loc, intp_t nblocks
+    ndarray[intp_t, ndim=1] blklocs,
+    ndarray[intp_t, ndim=1] blknos,
+    Py_ssize_t loc,
+    intp_t nblocks,
 ):
     """
     Update blklocs and blknos when a new column is inserted at 'loc'.
@@ -535,7 +545,7 @@ cpdef update_blklocs_and_blknos(
     cdef:
         Py_ssize_t i
         cnp.npy_intp length = len(blklocs) + 1
-        ndarray[intp_t] new_blklocs, new_blknos
+        ndarray[intp_t, ndim=1] new_blklocs, new_blknos
 
     # equiv: new_blklocs = np.empty(length, dtype=np.intp)
     new_blklocs = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0)
@@ -693,7 +703,7 @@ cdef class BlockManager:
             cnp.npy_intp length = self.shape[0]
             SharedBlock blk
             BlockPlacement bp
-            ndarray[intp_t] new_blknos, new_blklocs
+            ndarray[intp_t, ndim=1] new_blknos, new_blklocs
 
         # equiv: np.empty(length, dtype=np.intp)
         new_blknos = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0)
@@ -711,7 +721,11 @@ cdef class BlockManager:
                 new_blknos[j] = blkno
                 new_blklocs[j] = i
 
-        for blkno in new_blknos:
+        for i in range(length):
+            # faster than `for blkno in new_blknos`
+            #  https://github.com/cython/cython/issues/4393
+            blkno = new_blknos[i]
+
             # If there are any -1s remaining, this indicates that our mgr_locs
             #  are invalid.
             if blkno == -1:
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 7e49c7f1952c4..2c7b052917463 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -448,7 +448,7 @@ def fast_zip(list ndarrays) -> ndarray[object]:
     """
     cdef:
         Py_ssize_t i, j, k, n
-        ndarray[object] result
+        ndarray[object, ndim=1] result
         flatiter it
         object val, tup
 
@@ -507,7 +507,7 @@ def get_reverse_indexer(const intp_t[:] indexer, Py_ssize_t length) -> ndarray:
     """
     cdef:
         Py_ssize_t i, n = len(indexer)
-        ndarray[intp_t] rev_indexer
+        ndarray[intp_t, ndim=1] rev_indexer
         intp_t idx
 
     rev_indexer = np.empty(length, dtype=np.intp)
@@ -540,7 +540,7 @@ def has_infs(floating[:] arr) -> bool:
     return ret
 
 
-def maybe_indices_to_slice(ndarray[intp_t] indices, int max_len):
+def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, int max_len):
     cdef:
         Py_ssize_t i, n = len(indices)
         int k, vstart, vlast, v
@@ -579,7 +579,7 @@ def maybe_indices_to_slice(ndarray[intp_t] indices, int max_len):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def maybe_booleans_to_slice(ndarray[uint8_t] mask):
+def maybe_booleans_to_slice(ndarray[uint8_t, ndim=1] mask):
     cdef:
         Py_ssize_t i, n = len(mask)
         Py_ssize_t start = 0, end = 0
@@ -775,14 +775,14 @@ def is_all_arraylike(obj: list) -> bool:
 # is a general, O(max(len(values), len(binner))) method.
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def generate_bins_dt64(ndarray[int64_t] values, const int64_t[:] binner,
+def generate_bins_dt64(ndarray[int64_t, ndim=1] values, const int64_t[:] binner,
                        object closed='left', bint hasnans=False):
     """
     Int64 (datetime64) version of generic python version in ``groupby.py``.
     """
     cdef:
         Py_ssize_t lenidx, lenbin, i, j, bc, vc
-        ndarray[int64_t] bins
+        ndarray[int64_t, ndim=1] bins
         int64_t l_bin, r_bin, nat_count
         bint right_closed = closed == 'right'
 
@@ -931,7 +931,7 @@ def generate_slices(const intp_t[:] labels, Py_ssize_t ngroups):
     return np.asarray(starts), np.asarray(ends)
 
 
-def indices_fast(ndarray[intp_t] index, const int64_t[:] labels, list keys,
+def indices_fast(ndarray[intp_t, ndim=1] index, const int64_t[:] labels, list keys,
                  list sorted_labels) -> dict:
     """
     Parameters
@@ -2067,7 +2067,9 @@ cdef bint is_period_array(ndarray[object] values):
     if len(values) == 0:
         return False
 
-    for val in values:
+    for i in range(n):
+        val = values[i]
+
         if is_period_object(val):
             if dtype_code == -10000:
                 dtype_code = val._dtype._dtype_code
@@ -2102,7 +2104,9 @@ cpdef bint is_interval_array(ndarray values):
     if len(values) == 0:
         return False
 
-    for val in values:
+    for i in range(n):
+        val = values[i]
+
         if is_interval(val):
             if closed is None:
                 closed = val.closed
@@ -2144,7 +2148,7 @@ cpdef bint is_interval_array(ndarray values):
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def maybe_convert_numeric(
-    ndarray[object] values,
+    ndarray[object, ndim=1] values,
     set na_values,
     bint convert_empty=True,
     bint coerce_numeric=False,
@@ -2205,12 +2209,12 @@ def maybe_convert_numeric(
         int status, maybe_int
         Py_ssize_t i, n = values.size
         Seen seen = Seen(coerce_numeric)
-        ndarray[float64_t] floats = np.empty(n, dtype='f8')
-        ndarray[complex128_t] complexes = np.empty(n, dtype='c16')
-        ndarray[int64_t] ints = np.empty(n, dtype='i8')
-        ndarray[uint64_t] uints = np.empty(n, dtype='u8')
-        ndarray[uint8_t] bools = np.empty(n, dtype='u1')
-        ndarray[uint8_t] mask = np.zeros(n, dtype="u1")
+        ndarray[float64_t, ndim=1] floats = np.empty(n, dtype='f8')
+        ndarray[complex128_t, ndim=1] complexes = np.empty(n, dtype='c16')
+        ndarray[int64_t, ndim=1] ints = np.empty(n, dtype='i8')
+        ndarray[uint64_t, ndim=1] uints = np.empty(n, dtype='u8')
+        ndarray[uint8_t, ndim=1] bools = np.empty(n, dtype='u1')
+        ndarray[uint8_t, ndim=1] mask = np.zeros(n, dtype="u1")
         float64_t fval
         bint allow_null_in_int = convert_to_masked_nullable
 
diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx
index ff15a2c720c2c..cfe9f40f12452 100644
--- a/pandas/_libs/testing.pyx
+++ b/pandas/_libs/testing.pyx
@@ -7,10 +7,9 @@ from numpy cimport import_array
 
 import_array()
 
-from pandas._libs.lib import is_complex
-
 from pandas._libs.util cimport (
     is_array,
+    is_complex_object,
     is_real_number_object,
 )
 
@@ -196,7 +195,7 @@ cpdef assert_almost_equal(a, b,
                            f"with rtol={rtol}, atol={atol}")
         return True
 
-    if is_complex(a) and is_complex(b):
+    if is_complex_object(a) and is_complex_object(b):
         if array_equivalent(a, b, strict_nan=True):
             # inf comparison
             return True

From aa0a1d6e6b3062978dbc4c6c95ff2ddf564bf1f9 Mon Sep 17 00:00:00 2001
From: michal-gh <michaltus@gmail.com>
Date: Wed, 6 Oct 2021 03:02:47 +0200
Subject: [PATCH 14/41] PERF: read_csv with memory_map=True when file encoding
 is UTF-8 (#43787) (#43787)

---
 asv_bench/benchmarks/io/csv.py          | 29 ++++++++++++++++++++++++
 doc/source/whatsnew/v1.4.0.rst          |  1 +
 pandas/io/common.py                     |  2 +-
 pandas/tests/io/parser/test_encoding.py | 30 +++++++++++++++++++++++++
 4 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
index 35058ba03ade8..153cad403dcc3 100644
--- a/asv_bench/benchmarks/io/csv.py
+++ b/asv_bench/benchmarks/io/csv.py
@@ -10,6 +10,7 @@
 from pandas import (
     Categorical,
     DataFrame,
+    concat,
     date_range,
     read_csv,
     to_datetime,
@@ -459,6 +460,34 @@ def time_read_special_date(self, value, engine):
         )
 
 
+class ReadCSVMemMapUTF8:
+
+    fname = "__test__.csv"
+    number = 5
+
+    def setup(self):
+        lines = []
+        line_length = 128
+        start_char = " "
+        end_char = "\U00010080"
+        # This for loop creates a list of 128-char strings
+        # consisting of consecutive Unicode chars
+        for lnum in range(ord(start_char), ord(end_char), line_length):
+            line = "".join([chr(c) for c in range(lnum, lnum + 0x80)]) + "\n"
+            try:
+                line.encode("utf-8")
+            except UnicodeEncodeError:
+                # Some 16-bit words are not valid Unicode chars and must be skipped
+                continue
+            lines.append(line)
+        df = DataFrame(lines)
+        df = concat([df for n in range(100)], ignore_index=True)
+        df.to_csv(self.fname, index=False, header=False, encoding="utf-8")
+
+    def time_read_memmapped_utf8(self):
+        read_csv(self.fname, header=None, memory_map=True, encoding="utf-8", engine="c")
+
+
 class ParseDateComparison(StringIORewind):
     params = ([False, True],)
     param_names = ["cache_dates"]
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index dcd31abaa8857..83820ac25491d 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -364,6 +364,7 @@ Performance improvements
 - Indexing into a :class:`SparseArray` with a ``slice`` with ``step=1`` no longer requires converting to a dense array (:issue:`43777`)
 - Performance improvement in :meth:`SparseArray.take` with ``allow_fill=False`` (:issue:`43654`)
 - Performance improvement in :meth:`.Rolling.mean` and :meth:`.Expanding.mean` with ``engine="numba"`` (:issue:`43612`)
+- Improved performance of :meth:`pandas.read_csv` with ``memory_map=True`` when file encoding is UTF-8 (:issue:`43787`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 6dfddd571b88f..be6577e646ac3 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -874,7 +874,7 @@ def __iter__(self) -> _MMapWrapper:
     def read(self, size: int = -1) -> str | bytes:
         # CSV c-engine uses read instead of iterating
         content: bytes = self.mmap.read(size)
-        if self.decode:
+        if self.decode and self.encoding != "utf-8":
             # memory mapping is applied before compression. Encoding should
             # be applied to the de-compressed data.
             final = size == -1 or len(content) < size
diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py
index 6ca3fdf9a6258..2573314f155cf 100644
--- a/pandas/tests/io/parser/test_encoding.py
+++ b/pandas/tests/io/parser/test_encoding.py
@@ -272,6 +272,36 @@ def test_chunk_splits_multibyte_char(all_parsers):
     tm.assert_frame_equal(dfr, df)
 
 
+@skip_pyarrow
+def test_readcsv_memmap_utf8(all_parsers):
+    """
+    GH 43787
+
+    Test correct handling of UTF-8 chars when memory_map=True and encoding is UTF-8
+    """
+    lines = []
+    line_length = 128
+    start_char = " "
+    end_char = "\U00010080"
+    # This for loop creates a list of 128-char strings
+    # consisting of consecutive Unicode chars
+    for lnum in range(ord(start_char), ord(end_char), line_length):
+        line = "".join([chr(c) for c in range(lnum, lnum + 0x80)]) + "\n"
+        try:
+            line.encode("utf-8")
+        except UnicodeEncodeError:
+            continue
+        lines.append(line)
+    parser = all_parsers
+    df = DataFrame(lines)
+    with tm.ensure_clean("utf8test.csv") as fname:
+        df.to_csv(fname, index=False, header=False, encoding="utf-8")
+        dfr = parser.read_csv(
+            fname, header=None, memory_map=True, engine="c", encoding="utf-8"
+        )
+    tm.assert_frame_equal(df, dfr)
+
+
 def test_not_readable(all_parsers):
     # GH43439
     parser = all_parsers

From ef35a190d18cd28ca6ba127fb2a88b64a588d8c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?=
 <twoertwein@users.noreply.github.com>
Date: Tue, 5 Oct 2021 21:04:57 -0400
Subject: [PATCH 15/41] TYP: enable reportMissingImports (#43790)

---
 pandas/_libs/reshape.pyi           | 2 +-
 pandas/_libs/tslibs/timedeltas.pyi | 2 +-
 pandas/io/excel/_pyxlsb.py         | 1 +
 pyproject.toml                     | 3 +--
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/reshape.pyi b/pandas/_libs/reshape.pyi
index 893826a35d41e..110687fcd0c31 100644
--- a/pandas/_libs/reshape.pyi
+++ b/pandas/_libs/reshape.pyi
@@ -1,6 +1,6 @@
 import numpy as np
 
-import pandas._tying as npt
+from pandas._typing import npt
 
 def unstack(
     values: np.ndarray,  # reshape_t[:, :]
diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi
index 8de02aa566456..7c0131cf28c9a 100644
--- a/pandas/_libs/tslibs/timedeltas.pyi
+++ b/pandas/_libs/tslibs/timedeltas.pyi
@@ -7,12 +7,12 @@ from typing import (
 )
 
 import numpy as np
-from pands._typing import npt
 
 from pandas._libs.tslibs import (
     NaTType,
     Tick,
 )
+from pandas._typing import npt
 
 _S = TypeVar("_S")
 
diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py
index 52a67336aaa82..4b2b9f7a3a678 100644
--- a/pandas/io/excel/_pyxlsb.py
+++ b/pandas/io/excel/_pyxlsb.py
@@ -1,3 +1,4 @@
+# pyright: reportMissingImports=false
 from __future__ import annotations
 
 from pandas._typing import (
diff --git a/pyproject.toml b/pyproject.toml
index 0223a1c035cbc..fe48a4d684cf8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -159,14 +159,13 @@ skip = "pandas/__init__.py"
 pythonVersion = "3.8"
 typeCheckingMode = "strict"
 include = ["pandas"]
-exclude = ["pandas/tests", "pandas/util/version"]
+exclude = ["pandas/tests", "pandas/io/clipboard", "pandas/util/version"]
 reportGeneralTypeIssues = false
 reportConstantRedefinition = false
 reportFunctionMemberAccess = false
 reportImportCycles = false
 reportIncompatibleMethodOverride = false
 reportIncompatibleVariableOverride = false
-reportMissingImports = false
 reportMissingModuleSource = false
 reportMissingTypeArgument = false
 reportMissingTypeStubs = false

From eefd0f0da3e2957db8ed091d380b1c4a6da9cdb5 Mon Sep 17 00:00:00 2001
From: Thomas Kluyver <thomas@kluyver.me.uk>
Date: Wed, 6 Oct 2021 02:07:56 +0100
Subject: [PATCH 16/41] Don't suppress exception chaining for optional
 dependencies (#43882)

---
 doc/source/whatsnew/v1.4.0.rst           | 3 ++-
 pandas/compat/_optional.py               | 2 +-
 pandas/tests/test_optional_dependency.py | 4 +++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 83820ac25491d..9ecd49ee31047 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -127,7 +127,8 @@ Other enhancements
 - :meth:`IntegerArray.all` , :meth:`IntegerArray.any`, :meth:`FloatingArray.any`, and :meth:`FloatingArray.all` use Kleene logic (:issue:`41967`)
 - Added support for nullable boolean and integer types in :meth:`DataFrame.to_stata`, :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`40855`)
 - :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`)
-
+- The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`)
+-
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
index 651729cd0ad44..adf20f3322a79 100644
--- a/pandas/compat/_optional.py
+++ b/pandas/compat/_optional.py
@@ -115,7 +115,7 @@ def import_optional_dependency(
         module = importlib.import_module(name)
     except ImportError:
         if errors == "raise":
-            raise ImportError(msg) from None
+            raise ImportError(msg)
         else:
             return None
 
diff --git a/pandas/tests/test_optional_dependency.py b/pandas/tests/test_optional_dependency.py
index f75ee0d0ddd95..c1d1948d6c31a 100644
--- a/pandas/tests/test_optional_dependency.py
+++ b/pandas/tests/test_optional_dependency.py
@@ -13,8 +13,10 @@
 
 def test_import_optional():
     match = "Missing .*notapackage.* pip .* conda .* notapackage"
-    with pytest.raises(ImportError, match=match):
+    with pytest.raises(ImportError, match=match) as exc_info:
         import_optional_dependency("notapackage")
+    # The original exception should be there as context:
+    assert isinstance(exc_info.value.__context__, ImportError)
 
     result = import_optional_dependency("notapackage", errors="ignore")
     assert result is None

From d3f5a4473dd2ecb10125d70aa97a9792ac43aa04 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 5 Oct 2021 18:08:43 -0700
Subject: [PATCH 17/41] BUG: DataFrame arithmetic with subclass where
 constructor is not the subclass itself (#43897)

---
 doc/source/whatsnew/v1.4.0.rst        |  2 ++
 pandas/core/frame.py                  |  4 +--
 pandas/tests/frame/test_arithmetic.py | 37 +++++++++++++++++++++++++++
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 9ecd49ee31047..2a3049895a390 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -408,6 +408,8 @@ Numeric
 - Bug in :meth:`DataFrame.rank` raising ``ValueError`` with ``object`` columns and ``method="first"`` (:issue:`41931`)
 - Bug in :meth:`DataFrame.rank` treating missing values and extreme values as equal (for example ``np.nan`` and ``np.inf``), causing incorrect results when ``na_option="bottom"`` or ``na_option="top`` used (:issue:`41931`)
 - Bug in ``numexpr`` engine still being used when the option ``compute.use_numexpr`` is set to ``False`` (:issue:`32556`)
+- Bug in :class:`DataFrame` arithmetic ops with a subclass whose :meth:`_constructor` attribute is a callable other than the subclass itself (:issue:`43201`)
+-
 
 Conversion
 ^^^^^^^^^^
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 69ba0904165f7..2eb66c7db0ba6 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6955,7 +6955,7 @@ def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None):
             # i.e. scalar, faster than checking np.ndim(right) == 0
             with np.errstate(all="ignore"):
                 bm = self._mgr.apply(array_op, right=right)
-            return type(self)(bm)
+            return self._constructor(bm)
 
         elif isinstance(right, DataFrame):
             assert self.index.equals(right.index)
@@ -6976,7 +6976,7 @@ def _dispatch_frame_op(self, right, func: Callable, axis: int | None = None):
                     right._mgr,  # type: ignore[arg-type]
                     array_op,
                 )
-            return type(self)(bm)
+            return self._constructor(bm)
 
         elif isinstance(right, Series) and axis == 1:
             # axis=1 means we want to operate row-by-row
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index afa9593807acc..1ddb18c218cc6 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -1,5 +1,6 @@
 from collections import deque
 from datetime import datetime
+import functools
 import operator
 import re
 
@@ -1845,3 +1846,39 @@ def test_bool_frame_mult_float():
     result = df * 1.0
     expected = DataFrame(np.ones((2, 2)), list("ab"), list("cd"))
     tm.assert_frame_equal(result, expected)
+
+
+def test_frame_op_subclass_nonclass_constructor():
+    # GH#43201 subclass._constructor is a function, not the subclass itself
+
+    class SubclassedSeries(Series):
+        @property
+        def _constructor(self):
+            return SubclassedSeries
+
+        @property
+        def _constructor_expanddim(self):
+            return SubclassedDataFrame
+
+    class SubclassedDataFrame(DataFrame):
+        _metadata = ["my_extra_data"]
+
+        def __init__(self, my_extra_data, *args, **kwargs):
+            self.my_extra_data = my_extra_data
+            super().__init__(*args, **kwargs)
+
+        @property
+        def _constructor(self):
+            return functools.partial(type(self), self.my_extra_data)
+
+        @property
+        def _constructor_sliced(self):
+            return SubclassedSeries
+
+    sdf = SubclassedDataFrame("some_data", {"A": [1, 2, 3], "B": [4, 5, 6]})
+    result = sdf * 2
+    expected = SubclassedDataFrame("some_data", {"A": [2, 4, 6], "B": [8, 10, 12]})
+    tm.assert_frame_equal(result, expected)
+
+    result = sdf + sdf
+    tm.assert_frame_equal(result, expected)

From 114621514a608a30130bf530fcef7df9d54f612f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 5 Oct 2021 18:09:29 -0700
Subject: [PATCH 18/41] REF: remove _get_attributes_dict (#43895)

---
 pandas/core/indexes/base.py      | 30 ++++++++++--------------------
 pandas/core/indexes/category.py  |  2 --
 pandas/core/indexes/datetimes.py |  7 +------
 pandas/core/indexes/interval.py  |  8 ++++++--
 pandas/core/indexes/period.py    |  1 -
 pandas/core/indexes/range.py     |  5 ++---
 6 files changed, 19 insertions(+), 34 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index c9e128ffc4289..2b49a88e27961 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -728,13 +728,6 @@ def _format_duplicate_message(self) -> DataFrame:
     # --------------------------------------------------------------------
     # Index Internals Methods
 
-    @final
-    def _get_attributes_dict(self) -> dict[str_t, Any]:
-        """
-        Return an attributes dict for my class.
-        """
-        return {k: getattr(self, k, None) for k in self._attributes}
-
     def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT:
         """
         Create a new Index with the same class as the caller, don't copy the
@@ -859,9 +852,7 @@ def __array_wrap__(self, result, context=None):
         if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1:
             return result
 
-        attrs = self._get_attributes_dict()
-        attrs.pop("freq", None)  # For DatetimeIndex/TimedeltaIndex
-        return Index(result, **attrs)
+        return Index(result, name=self.name)
 
     @cache_readonly
     def dtype(self) -> DtypeObj:
@@ -2493,8 +2484,7 @@ def _is_multi(self) -> bool:
     # Pickle Methods
 
     def __reduce__(self):
-        d = {"data": self._data}
-        d.update(self._get_attributes_dict())
+        d = {"data": self._data, "name": self.name}
         return _new_Index, (type(self), d), None
 
     # --------------------------------------------------------------------
@@ -5820,29 +5810,29 @@ def map(self, mapper, na_action=None):
 
         new_values = self._map_values(mapper, na_action=na_action)
 
-        attributes = self._get_attributes_dict()
-
         # we can return a MultiIndex
         if new_values.size and isinstance(new_values[0], tuple):
             if isinstance(self, MultiIndex):
                 names = self.names
-            elif attributes.get("name"):
-                names = [attributes.get("name")] * len(new_values[0])
+            elif self.name:
+                names = [self.name] * len(new_values[0])
             else:
                 names = None
             return MultiIndex.from_tuples(new_values, names=names)
 
-        attributes["copy"] = False
+        dtype = None
         if not new_values.size:
             # empty
-            attributes["dtype"] = self.dtype
+            dtype = self.dtype
 
         if self._is_backward_compat_public_numeric_index and is_numeric_dtype(
             new_values.dtype
         ):
-            return self._constructor(new_values, **attributes)
+            return self._constructor(
+                new_values, dtype=dtype, copy=False, name=self.name
+            )
 
-        return Index._with_infer(new_values, **attributes)
+        return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name)
 
     # TODO: De-duplicate with map, xref GH#32349
     @final
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index c45543a9187bd..02bbfe69be1b8 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -194,8 +194,6 @@ def _engine_type(self):
             np.int64: libindex.Int64Engine,
         }[self.codes.dtype.type]
 
-    _attributes = ["name"]
-
     # --------------------------------------------------------------------
     # Constructors
 
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index d556466554ea4..6078da3bedd8c 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -357,12 +357,7 @@ def _is_dates_only(self) -> bool:
         return self.tz is None and is_dates_only(self._values)  # type: ignore[arg-type]
 
     def __reduce__(self):
-
-        # we use a special reduce here because we need
-        # to simply set the .tz (and not reinterpret it)
-
-        d = {"data": self._data}
-        d.update(self._get_attributes_dict())
+        d = {"data": self._data, "name": self.name}
         return _new_DatetimeIndex, (type(self), d), None
 
     def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index f494638ba1aa4..165048e2a591a 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -354,8 +354,12 @@ def _multiindex(self) -> MultiIndex:
         return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"])
 
     def __reduce__(self):
-        d = {"left": self.left, "right": self.right, "closed": self.closed}
-        d.update(self._get_attributes_dict())
+        d = {
+            "left": self.left,
+            "right": self.right,
+            "closed": self.closed,
+            "name": self.name,
+        }
         return _new_IntervalIndex, (type(self), d), None
 
     @property
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 4c4902d3ce89f..e422f2bc3ff9a 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -148,7 +148,6 @@ class PeriodIndex(DatetimeIndexOpsMixin):
     """
 
     _typ = "periodindex"
-    _attributes = ["name"]
 
     _data: PeriodArray
     freq: BaseOffset
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 71bc4af78db6b..51d9f15390789 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -206,7 +206,7 @@ def _get_data_as_items(self):
         return [("start", rng.start), ("stop", rng.stop), ("step", rng.step)]
 
     def __reduce__(self):
-        d = self._get_attributes_dict()
+        d = {"name": self.name}
         d.update(dict(self._get_data_as_items()))
         return ibase._new_Index, (type(self), d), None
 
@@ -913,7 +913,6 @@ def _arith_method(self, other, op):
 
         # TODO: if other is a RangeIndex we may have more efficient options
         other = extract_array(other, extract_numpy=True, extract_range=True)
-        attrs = self._get_attributes_dict()
 
         left, right = self, other
 
@@ -935,7 +934,7 @@ def _arith_method(self, other, op):
                 rstart = op(left.start, right)
                 rstop = op(left.stop, right)
 
-            result = type(self)(rstart, rstop, rstep, **attrs)
+            result = type(self)(rstart, rstop, rstep, name=self.name)
 
             # for compat with numpy / Int64Index
             # even if we can represent as a RangeIndex, return

From 58ff02d373e8c9b46d0f3125835133dab700d705 Mon Sep 17 00:00:00 2001
From: Nikita Sobolev <mail@sobolevn.me>
Date: Wed, 6 Oct 2021 04:10:03 +0300
Subject: [PATCH 19/41] Annotates `indexers/utils.py` functions that don't
 return anything with `None` (#43893)

---
 pandas/core/indexers/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/indexers/utils.py b/pandas/core/indexers/utils.py
index eacc7960a82aa..23d83343c96a2 100644
--- a/pandas/core/indexers/utils.py
+++ b/pandas/core/indexers/utils.py
@@ -363,7 +363,7 @@ def length_of_indexer(indexer, target=None) -> int:
     raise AssertionError("cannot find the length of the indexer")
 
 
-def deprecate_ndim_indexing(result, stacklevel: int = 3):
+def deprecate_ndim_indexing(result, stacklevel: int = 3) -> None:
     """
     Helper function to raise the deprecation warning for multi-dimensional
     indexing on 1D Series/Index.
@@ -409,7 +409,7 @@ def unpack_1tuple(tup):
     return tup
 
 
-def check_key_length(columns: Index, key, value: DataFrame):
+def check_key_length(columns: Index, key, value: DataFrame) -> None:
     """
     Checks if a key used as indexer has the same length as the columns it is
     associated with.

From c9b0a6d1bcb3d2363a3867a8bc6e5c66a56c556b Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Wed, 6 Oct 2021 05:27:58 -0700
Subject: [PATCH 20/41] CI: Test Python 3.10 on MacOS and Windows too (#43772)

---
 .github/workflows/python-dev.yml      | 23 ++++++++++++++++++-----
 pandas/tests/frame/test_reductions.py |  8 +++-----
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/python-dev.yml b/.github/workflows/python-dev.yml
index 596c3b6df9d49..3a139936fbd22 100644
--- a/.github/workflows/python-dev.yml
+++ b/.github/workflows/python-dev.yml
@@ -21,12 +21,20 @@ env:
 
 jobs:
   build:
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macOS-latest, windows-latest]
+
     name: actions-310-dev
     timeout-minutes: 60
 
+    env:
+      NUMPY_WHEELS_AVAILABLE: ${{ matrix.os == 'ubuntu-latest' }}
+
     concurrency:
-      group: ${{ github.ref }}-dev
+      group: ${{ github.ref }}-${{ matrix.os }}-dev
       cancel-in-progress: ${{github.event_name == 'pull_request'}}
 
     steps:
@@ -40,12 +48,16 @@ jobs:
         python-version: '3.10-dev'
 
     - name: Install dependencies
+      shell: bash
       run: |
         python -m pip install --upgrade pip setuptools wheel
-        pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
-        pip install git+https://github.com/pytest-dev/pytest.git
+        if [[ "$NUMPY_WHEELS_AVAILABLE" == "true" ]]; then
+          pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
+        else
+          pip install git+https://github.com/numpy/numpy.git
+        fi
         pip install git+https://github.com/nedbat/coveragepy.git
-        pip install cython python-dateutil pytz hypothesis pytest-xdist pytest-cov
+        pip install cython python-dateutil pytz hypothesis pytest>=6.2.5 pytest-xdist pytest-cov
         pip list
 
     - name: Build Pandas
@@ -58,6 +70,7 @@ jobs:
         python -c "import pandas; pandas.show_versions();"
 
     - name: Test with pytest
+      shell: bash
       run: |
         ci/run_tests.sh
       # GH 41935
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index 886cdfb7d76b0..258e4e6eb0cc9 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -1366,11 +1366,9 @@ def test_min_max_dt64_with_NaT_skipna_false(self, request, tz_naive_fixture):
         # GH#36907
         tz = tz_naive_fixture
         if isinstance(tz, tzlocal) and is_platform_windows():
-            request.node.add_marker(
-                pytest.mark.xfail(
-                    reason="GH#37659 OSError raised within tzlocal bc Windows "
-                    "chokes in times before 1970-01-01"
-                )
+            pytest.skip(
+                "GH#37659 OSError raised within tzlocal bc Windows "
+                "chokes in times before 1970-01-01"
             )
 
         df = DataFrame(

From 28c28c76e19efb76297a536c800de6c1402919ff Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Wed, 6 Oct 2021 05:57:43 -0700
Subject: [PATCH 21/41] ENH: ExponentialMovingWindow.sum (#43871)

---
 doc/source/reference/window.rst      |   1 +
 doc/source/whatsnew/v1.4.0.rst       |   2 +-
 pandas/_libs/window/aggregations.pyi |   3 +-
 pandas/_libs/window/aggregations.pyx |  57 +++++++-------
 pandas/core/window/ewm.py            |  85 +++++++++++++++++---
 pandas/core/window/numba_.py         | 111 +++++++++++++++------------
 pandas/tests/window/test_ewm.py      |  19 +++++
 pandas/tests/window/test_numba.py    |  29 ++++---
 8 files changed, 207 insertions(+), 100 deletions(-)

diff --git a/doc/source/reference/window.rst b/doc/source/reference/window.rst
index 5e230a533625f..0be3184a9356c 100644
--- a/doc/source/reference/window.rst
+++ b/doc/source/reference/window.rst
@@ -88,6 +88,7 @@ Exponentially-weighted window functions
    :toctree: api/
 
    ExponentialMovingWindow.mean
+   ExponentialMovingWindow.sum
    ExponentialMovingWindow.std
    ExponentialMovingWindow.var
    ExponentialMovingWindow.corr
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 2a3049895a390..daf0d0d000079 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -128,7 +128,7 @@ Other enhancements
 - Added support for nullable boolean and integer types in :meth:`DataFrame.to_stata`, :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`40855`)
 - :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`)
 - The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`)
--
+- Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/_libs/window/aggregations.pyi b/pandas/_libs/window/aggregations.pyi
index 879809a259266..f3317ff5a60be 100644
--- a/pandas/_libs/window/aggregations.pyi
+++ b/pandas/_libs/window/aggregations.pyi
@@ -100,7 +100,7 @@ def roll_weighted_var(
     minp: int,  # int64_t
     ddof: int,  # unsigned int
 ) -> np.ndarray: ...  # np.ndarray[np.float64]
-def ewma(
+def ewm(
     vals: np.ndarray,  # const float64_t[:]
     start: np.ndarray,  # const int64_t[:]
     end: np.ndarray,  # const int64_t[:]
@@ -109,6 +109,7 @@ def ewma(
     adjust: bool,
     ignore_na: bool,
     deltas: np.ndarray,  # const float64_t[:]
+    normalize: bool,
 ) -> np.ndarray: ...  # np.ndarray[np.float64]
 def ewmcov(
     input_x: np.ndarray,  # const float64_t[:]
diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
index 29fe20090875b..1941a3c4a37f0 100644
--- a/pandas/_libs/window/aggregations.pyx
+++ b/pandas/_libs/window/aggregations.pyx
@@ -1604,13 +1604,13 @@ def roll_weighted_var(const float64_t[:] values, const float64_t[:] weights,
 
 
 # ----------------------------------------------------------------------
-# Exponentially weighted moving average
+# Exponentially weighted moving
 
-def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
-         int minp, float64_t com, bint adjust, bint ignore_na,
-         const float64_t[:] deltas=None) -> np.ndarray:
+def ewm(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
+        int minp, float64_t com, bint adjust, bint ignore_na,
+        const float64_t[:] deltas=None, bint normalize=True) -> np.ndarray:
     """
-    Compute exponentially-weighted moving average using center-of-mass.
+    Compute exponentially-weighted moving average or sum using center-of-mass.
 
     Parameters
     ----------
@@ -1623,6 +1623,8 @@ def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
     ignore_na : bool
     deltas : ndarray (float64 type), optional. If None, implicitly assumes equally
              spaced points (used when `times` is not passed)
+    normalize : bool, optional.
+                If True, calculate the mean. If False, calculate the sum.
 
     Returns
     -------
@@ -1634,7 +1636,7 @@ def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
         const float64_t[:] sub_vals
         const float64_t[:] sub_deltas=None
         ndarray[float64_t] sub_output, output = np.empty(N, dtype=np.float64)
-        float64_t alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur
+        float64_t alpha, old_wt_factor, new_wt, weighted, old_wt, cur
         bint is_observation, use_deltas
 
     if N == 0:
@@ -1657,10 +1659,10 @@ def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
         win_size = len(sub_vals)
         sub_output = np.empty(win_size, dtype=np.float64)
 
-        weighted_avg = sub_vals[0]
-        is_observation = weighted_avg == weighted_avg
+        weighted = sub_vals[0]
+        is_observation = weighted == weighted
         nobs = int(is_observation)
-        sub_output[0] = weighted_avg if nobs >= minp else NaN
+        sub_output[0] = weighted if nobs >= minp else NaN
         old_wt = 1.
 
         with nogil:
@@ -1668,37 +1670,38 @@ def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
                 cur = sub_vals[i]
                 is_observation = cur == cur
                 nobs += is_observation
-                if weighted_avg == weighted_avg:
+                if weighted == weighted:
 
                     if is_observation or not ignore_na:
-                        if use_deltas:
-                            old_wt *= old_wt_factor ** sub_deltas[i - 1]
+                        if normalize:
+                            if use_deltas:
+                                old_wt *= old_wt_factor ** sub_deltas[i - 1]
+                            else:
+                                old_wt *= old_wt_factor
                         else:
-                            old_wt *= old_wt_factor
+                            weighted = old_wt_factor * weighted
                         if is_observation:
-
-                            # avoid numerical errors on constant series
-                            if weighted_avg != cur:
-                                weighted_avg = ((old_wt * weighted_avg) +
-                                                (new_wt * cur)) / (old_wt + new_wt)
-                            if adjust:
-                                old_wt += new_wt
+                            if normalize:
+                                # avoid numerical errors on constant series
+                                if weighted != cur:
+                                    weighted = old_wt * weighted + new_wt * cur
+                                    weighted /= (old_wt + new_wt)
+                                if adjust:
+                                    old_wt += new_wt
+                                else:
+                                    old_wt = 1.
                             else:
-                                old_wt = 1.
+                                weighted += cur
                 elif is_observation:
-                    weighted_avg = cur
+                    weighted = cur
 
-                sub_output[i] = weighted_avg if nobs >= minp else NaN
+                sub_output[i] = weighted if nobs >= minp else NaN
 
         output[s:e] = sub_output
 
     return output
 
 
-# ----------------------------------------------------------------------
-# Exponentially weighted moving covariance
-
-
 def ewmcov(const float64_t[:] input_x, const int64_t[:] start, const int64_t[:] end,
            int minp, const float64_t[:] input_y, float64_t com, bint adjust,
            bint ignore_na, bint bias) -> np.ndarray:
diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
index 29a6704ae5092..d769f846b3bdc 100644
--- a/pandas/core/window/ewm.py
+++ b/pandas/core/window/ewm.py
@@ -46,8 +46,8 @@
     window_agg_numba_parameters,
 )
 from pandas.core.window.numba_ import (
-    generate_ewma_numba_table_func,
-    generate_numba_ewma_func,
+    generate_numba_ewm_func,
+    generate_numba_ewm_table_func,
 )
 from pandas.core.window.online import (
     EWMMeanState,
@@ -469,17 +469,21 @@ def aggregate(self, func, *args, **kwargs):
     def mean(self, *args, engine=None, engine_kwargs=None, **kwargs):
         if maybe_use_numba(engine):
             if self.method == "single":
-                ewma_func = generate_numba_ewma_func(
-                    engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas
-                )
-                numba_cache_key = (lambda x: x, "ewma")
+                func = generate_numba_ewm_func
+                numba_cache_key = (lambda x: x, "ewm_mean")
             else:
-                ewma_func = generate_ewma_numba_table_func(
-                    engine_kwargs, self._com, self.adjust, self.ignore_na, self._deltas
-                )
-                numba_cache_key = (lambda x: x, "ewma_table")
+                func = generate_numba_ewm_table_func
+                numba_cache_key = (lambda x: x, "ewm_mean_table")
+            ewm_func = func(
+                engine_kwargs=engine_kwargs,
+                com=self._com,
+                adjust=self.adjust,
+                ignore_na=self.ignore_na,
+                deltas=self._deltas,
+                normalize=True,
+            )
             return self._apply(
-                ewma_func,
+                ewm_func,
                 numba_cache_key=numba_cache_key,
             )
         elif engine in ("cython", None):
@@ -489,11 +493,68 @@ def mean(self, *args, engine=None, engine_kwargs=None, **kwargs):
 
             deltas = None if self.times is None else self._deltas
             window_func = partial(
-                window_aggregations.ewma,
+                window_aggregations.ewm,
+                com=self._com,
+                adjust=self.adjust,
+                ignore_na=self.ignore_na,
+                deltas=deltas,
+                normalize=True,
+            )
+            return self._apply(window_func)
+        else:
+            raise ValueError("engine must be either 'numba' or 'cython'")
+
+    @doc(
+        template_header,
+        create_section_header("Parameters"),
+        args_compat,
+        window_agg_numba_parameters,
+        kwargs_compat,
+        create_section_header("Returns"),
+        template_returns,
+        create_section_header("See Also"),
+        template_see_also,
+        create_section_header("Notes"),
+        numba_notes.replace("\n", "", 1),
+        window_method="ewm",
+        aggregation_description="(exponential weighted moment) sum",
+        agg_method="sum",
+    )
+    def sum(self, *args, engine=None, engine_kwargs=None, **kwargs):
+        if not self.adjust:
+            raise NotImplementedError("sum is not implemented with adjust=False")
+        if maybe_use_numba(engine):
+            if self.method == "single":
+                func = generate_numba_ewm_func
+                numba_cache_key = (lambda x: x, "ewm_sum")
+            else:
+                func = generate_numba_ewm_table_func
+                numba_cache_key = (lambda x: x, "ewm_sum_table")
+            ewm_func = func(
+                engine_kwargs=engine_kwargs,
+                com=self._com,
+                adjust=self.adjust,
+                ignore_na=self.ignore_na,
+                deltas=self._deltas,
+                normalize=False,
+            )
+            return self._apply(
+                ewm_func,
+                numba_cache_key=numba_cache_key,
+            )
+        elif engine in ("cython", None):
+            if engine_kwargs is not None:
+                raise ValueError("cython engine does not accept engine_kwargs")
+            nv.validate_window_func("sum", args, kwargs)
+
+            deltas = None if self.times is None else self._deltas
+            window_func = partial(
+                window_aggregations.ewm,
                 com=self._com,
                 adjust=self.adjust,
                 ignore_na=self.ignore_na,
                 deltas=deltas,
+                normalize=False,
             )
             return self._apply(window_func)
         else:
diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py
index ab1eb9d3a2688..f41711a4d1f19 100644
--- a/pandas/core/window/numba_.py
+++ b/pandas/core/window/numba_.py
@@ -80,15 +80,16 @@ def roll_apply(
     return roll_apply
 
 
-def generate_numba_ewma_func(
+def generate_numba_ewm_func(
     engine_kwargs: dict[str, bool] | None,
     com: float,
     adjust: bool,
     ignore_na: bool,
     deltas: np.ndarray,
+    normalize: bool,
 ):
     """
-    Generate a numba jitted ewma function specified by values
+    Generate a numba jitted ewm mean or sum function specified by values
     from engine_kwargs.
 
     Parameters
@@ -99,6 +100,7 @@ def generate_numba_ewma_func(
     adjust : bool
     ignore_na : bool
     deltas : numpy.ndarray
+    normalize : bool
 
     Returns
     -------
@@ -106,14 +108,15 @@ def generate_numba_ewma_func(
     """
     nopython, nogil, parallel = get_jit_arguments(engine_kwargs)
 
-    cache_key = (lambda x: x, "ewma")
+    str_key = "ewm_mean" if normalize else "ewm_sum"
+    cache_key = (lambda x: x, str_key)
     if cache_key in NUMBA_FUNC_CACHE:
         return NUMBA_FUNC_CACHE[cache_key]
 
     numba = import_optional_dependency("numba")
 
     @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
-    def ewma(
+    def ewm(
         values: np.ndarray,
         begin: np.ndarray,
         end: np.ndarray,
@@ -130,43 +133,47 @@ def ewma(
             window = values[start:stop]
             sub_result = np.empty(len(window))
 
-            weighted_avg = window[0]
-            nobs = int(not np.isnan(weighted_avg))
-            sub_result[0] = weighted_avg if nobs >= minimum_periods else np.nan
+            weighted = window[0]
+            nobs = int(not np.isnan(weighted))
+            sub_result[0] = weighted if nobs >= minimum_periods else np.nan
             old_wt = 1.0
 
             for j in range(1, len(window)):
                 cur = window[j]
                 is_observation = not np.isnan(cur)
                 nobs += is_observation
-                if not np.isnan(weighted_avg):
+                if not np.isnan(weighted):
 
                     if is_observation or not ignore_na:
-
-                        # note that len(deltas) = len(vals) - 1 and deltas[i] is to be
-                        # used in conjunction with vals[i+1]
-                        old_wt *= old_wt_factor ** deltas[start + j - 1]
+                        if normalize:
+                            # note that len(deltas) = len(vals) - 1 and deltas[i]
+                            # is to be used in conjunction with vals[i+1]
+                            old_wt *= old_wt_factor ** deltas[start + j - 1]
+                        else:
+                            weighted = old_wt_factor * weighted
                         if is_observation:
-
-                            # avoid numerical errors on constant series
-                            if weighted_avg != cur:
-                                weighted_avg = (
-                                    (old_wt * weighted_avg) + (new_wt * cur)
-                                ) / (old_wt + new_wt)
-                            if adjust:
-                                old_wt += new_wt
+                            if normalize:
+                                # avoid numerical errors on constant series
+                                if weighted != cur:
+                                    weighted = old_wt * weighted + new_wt * cur
+                                    if normalize:
+                                        weighted = weighted / (old_wt + new_wt)
+                                if adjust:
+                                    old_wt += new_wt
+                                else:
+                                    old_wt = 1.0
                             else:
-                                old_wt = 1.0
+                                weighted += cur
                 elif is_observation:
-                    weighted_avg = cur
+                    weighted = cur
 
-                sub_result[j] = weighted_avg if nobs >= minimum_periods else np.nan
+                sub_result[j] = weighted if nobs >= minimum_periods else np.nan
 
             result[start:stop] = sub_result
 
         return result
 
-    return ewma
+    return ewm
 
 
 def generate_numba_table_func(
@@ -252,15 +259,16 @@ def nan_agg_with_axis(table):
     return nan_agg_with_axis
 
 
-def generate_ewma_numba_table_func(
+def generate_numba_ewm_table_func(
     engine_kwargs: dict[str, bool] | None,
     com: float,
     adjust: bool,
     ignore_na: bool,
     deltas: np.ndarray,
+    normalize: bool,
 ):
     """
-    Generate a numba jitted ewma function applied table wise specified
+    Generate a numba jitted ewm mean or sum function applied table wise specified
     by values from engine_kwargs.
 
     Parameters
@@ -271,6 +279,7 @@ def generate_ewma_numba_table_func(
     adjust : bool
     ignore_na : bool
     deltas : numpy.ndarray
+    normalize: bool
 
     Returns
     -------
@@ -278,14 +287,15 @@ def generate_ewma_numba_table_func(
     """
     nopython, nogil, parallel = get_jit_arguments(engine_kwargs)
 
-    cache_key = (lambda x: x, "ewma_table")
+    str_key = "ewm_mean_table" if normalize else "ewm_sum_table"
+    cache_key = (lambda x: x, str_key)
     if cache_key in NUMBA_FUNC_CACHE:
         return NUMBA_FUNC_CACHE[cache_key]
 
     numba = import_optional_dependency("numba")
 
     @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
-    def ewma_table(
+    def ewm_table(
         values: np.ndarray,
         begin: np.ndarray,
         end: np.ndarray,
@@ -297,35 +307,42 @@ def ewma_table(
         old_wt = np.ones(values.shape[1])
 
         result = np.empty(values.shape)
-        weighted_avg = values[0].copy()
-        nobs = (~np.isnan(weighted_avg)).astype(np.int64)
-        result[0] = np.where(nobs >= minimum_periods, weighted_avg, np.nan)
+        weighted = values[0].copy()
+        nobs = (~np.isnan(weighted)).astype(np.int64)
+        result[0] = np.where(nobs >= minimum_periods, weighted, np.nan)
         for i in range(1, len(values)):
             cur = values[i]
             is_observations = ~np.isnan(cur)
             nobs += is_observations.astype(np.int64)
             for j in numba.prange(len(cur)):
-                if not np.isnan(weighted_avg[j]):
+                if not np.isnan(weighted[j]):
                     if is_observations[j] or not ignore_na:
-
-                        # note that len(deltas) = len(vals) - 1 and deltas[i] is to be
-                        # used in conjunction with vals[i+1]
-                        old_wt[j] *= old_wt_factor ** deltas[i - 1]
+                        if normalize:
+                            # note that len(deltas) = len(vals) - 1 and deltas[i]
+                            # is to be used in conjunction with vals[i+1]
+                            old_wt[j] *= old_wt_factor ** deltas[i - 1]
+                        else:
+                            weighted[j] = old_wt_factor * weighted[j]
                         if is_observations[j]:
-                            # avoid numerical errors on constant series
-                            if weighted_avg[j] != cur[j]:
-                                weighted_avg[j] = (
-                                    (old_wt[j] * weighted_avg[j]) + (new_wt * cur[j])
-                                ) / (old_wt[j] + new_wt)
-                            if adjust:
-                                old_wt[j] += new_wt
+                            if normalize:
+                                # avoid numerical errors on constant series
+                                if weighted[j] != cur[j]:
+                                    weighted[j] = (
+                                        old_wt[j] * weighted[j] + new_wt * cur[j]
+                                    )
+                                    if normalize:
+                                        weighted[j] = weighted[j] / (old_wt[j] + new_wt)
+                                if adjust:
+                                    old_wt[j] += new_wt
+                                else:
+                                    old_wt[j] = 1.0
                             else:
-                                old_wt[j] = 1.0
+                                weighted[j] += cur[j]
                 elif is_observations[j]:
-                    weighted_avg[j] = cur[j]
+                    weighted[j] = cur[j]
 
-            result[i] = np.where(nobs >= minimum_periods, weighted_avg, np.nan)
+            result[i] = np.where(nobs >= minimum_periods, weighted, np.nan)
 
         return result
 
-    return ewma_table
+    return ewm_table
diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py
index 5579444f99bbb..4cb5d0342572b 100644
--- a/pandas/tests/window/test_ewm.py
+++ b/pandas/tests/window/test_ewm.py
@@ -241,3 +241,22 @@ def test_times_string_col_deprecated():
         result = df.ewm(halflife="1 day", min_periods=0, times="time_col").mean()
         expected = df.ewm(halflife=1.0, min_periods=0).mean()
     tm.assert_frame_equal(result, expected)
+
+
+def test_ewm_sum_adjust_false_notimplemented():
+    data = Series(range(1)).ewm(com=1, adjust=False)
+    with pytest.raises(NotImplementedError, match="sum is not"):
+        data.sum()
+
+
+@pytest.mark.parametrize(
+    "expected_data, ignore",
+    [[[10.0, 5.0, 2.5, 11.25], False], [[10.0, 5.0, 5.0, 12.5], True]],
+)
+def test_ewm_sum(expected_data, ignore):
+    # xref from Numbagg tests
+    # https://github.com/numbagg/numbagg/blob/v0.2.1/numbagg/test/test_moving.py#L50
+    data = Series([10, 0, np.nan, 10])
+    result = data.ewm(alpha=0.5, ignore_na=ignore).sum()
+    expected = Series(expected_data)
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py
index af2ca7270c982..d47b3e856cb25 100644
--- a/pandas/tests/window/test_numba.py
+++ b/pandas/tests/window/test_numba.py
@@ -159,28 +159,31 @@ def add(values, x):
 
 
 @td.skip_if_no("numba")
-class TestEWMMean:
+class TestEWM:
     @pytest.mark.parametrize(
         "grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
     )
-    def test_invalid_engine(self, grouper):
+    @pytest.mark.parametrize("method", ["mean", "sum"])
+    def test_invalid_engine(self, grouper, method):
         df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
         with pytest.raises(ValueError, match="engine must be either"):
-            grouper(df).ewm(com=1.0).mean(engine="foo")
+            getattr(grouper(df).ewm(com=1.0), method)(engine="foo")
 
     @pytest.mark.parametrize(
         "grouper", [lambda x: x, lambda x: x.groupby("A")], ids=["None", "groupby"]
     )
-    def test_invalid_engine_kwargs(self, grouper):
+    @pytest.mark.parametrize("method", ["mean", "sum"])
+    def test_invalid_engine_kwargs(self, grouper, method):
         df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
         with pytest.raises(ValueError, match="cython engine does not"):
-            grouper(df).ewm(com=1.0).mean(
+            getattr(grouper(df).ewm(com=1.0), method)(
                 engine="cython", engine_kwargs={"nopython": True}
             )
 
     @pytest.mark.parametrize("grouper", ["None", "groupby"])
+    @pytest.mark.parametrize("method", ["mean", "sum"])
     def test_cython_vs_numba(
-        self, grouper, nogil, parallel, nopython, ignore_na, adjust
+        self, grouper, method, nogil, parallel, nopython, ignore_na, adjust
     ):
         if grouper == "None":
             grouper = lambda x: x
@@ -188,15 +191,16 @@ def test_cython_vs_numba(
         else:
             grouper = lambda x: x.groupby("A")
             warn = None
-
+        if method == "sum":
+            adjust = True
         df = DataFrame({"A": ["a", "b", "a", "b"], "B": range(4)})
         ewm = grouper(df).ewm(com=1.0, adjust=adjust, ignore_na=ignore_na)
 
         engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
         with tm.assert_produces_warning(warn, match="nuisance"):
             # GH#42738
-            result = ewm.mean(engine="numba", engine_kwargs=engine_kwargs)
-            expected = ewm.mean(engine="cython")
+            result = getattr(ewm, method)(engine="numba", engine_kwargs=engine_kwargs)
+            expected = getattr(ewm, method)(engine="cython")
 
         tm.assert_frame_equal(result, expected)
 
@@ -358,15 +362,16 @@ def test_table_method_expanding_methods(
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("data", [np.eye(3), np.ones((2, 3)), np.ones((3, 2))])
-    def test_table_method_ewm(self, data, axis, nogil, parallel, nopython):
+    @pytest.mark.parametrize("method", ["mean", "sum"])
+    def test_table_method_ewm(self, data, method, axis, nogil, parallel, nopython):
         engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython}
 
         df = DataFrame(data)
 
-        result = df.ewm(com=1, method="table", axis=axis).mean(
+        result = getattr(df.ewm(com=1, method="table", axis=axis), method)(
             engine_kwargs=engine_kwargs, engine="numba"
         )
-        expected = df.ewm(com=1, method="single", axis=axis).mean(
+        expected = getattr(df.ewm(com=1, method="single", axis=axis), method)(
             engine_kwargs=engine_kwargs, engine="numba"
         )
         tm.assert_frame_equal(result, expected)

From f157d4d77f35ac0a9296e0a5ef6b5132b6300eed Mon Sep 17 00:00:00 2001
From: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com>
Date: Wed, 6 Oct 2021 09:29:09 -0400
Subject: [PATCH 22/41] TST: slow collection in test_algos.py (#43898)

---
 pandas/tests/test_algos.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 5488c076554fd..9a9cd9fa4baaa 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -1797,13 +1797,13 @@ def test_too_many_ndims(self):
 
     @pytest.mark.single
     @pytest.mark.high_memory
-    @pytest.mark.parametrize(
-        "values",
-        [np.arange(2 ** 24 + 1), np.arange(2 ** 25 + 2).reshape(2 ** 24 + 1, 2)],
-        ids=["1d", "2d"],
-    )
-    def test_pct_max_many_rows(self, values):
+    def test_pct_max_many_rows(self):
         # GH 18271
+        values = np.arange(2 ** 24 + 1)
+        result = algos.rank(values, pct=True).max()
+        assert result == 1
+
+        values = np.arange(2 ** 25 + 2).reshape(2 ** 24 + 1, 2)
         result = algos.rank(values, pct=True).max()
         assert result == 1
 

From cdc7b4a86a1e63c03eb437eecd11c1396525e79a Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 6 Oct 2021 11:32:42 -0700
Subject: [PATCH 23/41] ENH: implement ExtensionArray.__array_ufunc__ (#43899)

---
 doc/source/whatsnew/v1.4.0.rst            |  2 +-
 pandas/core/arraylike.py                  | 20 +++++++++++++-
 pandas/core/arrays/base.py                | 15 +++++++++++
 pandas/core/arrays/boolean.py             |  3 +++
 pandas/tests/arrays/boolean/test_ops.py   |  7 +++++
 pandas/tests/arrays/test_timedeltas.py    | 19 ++++++++++++++
 pandas/tests/extension/arrow/test_bool.py |  5 +++-
 pandas/tests/extension/base/ops.py        | 32 ++++++++++++++++++++---
 8 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index daf0d0d000079..22b49c35e0e68 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -522,7 +522,7 @@ Sparse
 
 ExtensionArray
 ^^^^^^^^^^^^^^
--
+- NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`)
 -
 
 Styler
diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
index f114278caf3ee..3d209189d97d8 100644
--- a/pandas/core/arraylike.py
+++ b/pandas/core/arraylike.py
@@ -371,6 +371,8 @@ def reconstruct(result):
         # * len(inputs) > 1 is doable when we know that we have
         #   aligned blocks / dtypes.
         inputs = tuple(np.asarray(x) for x in inputs)
+        # Note: we can't use default_array_ufunc here bc reindexing means
+        #  that `self` may not be among `inputs`
         result = getattr(ufunc, method)(*inputs, **kwargs)
     elif self.ndim == 1:
         # ufunc(series, ...)
@@ -387,7 +389,7 @@ def reconstruct(result):
         else:
             # otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))
             # Those can have an axis keyword and thus can't be called block-by-block
-            result = getattr(ufunc, method)(np.asarray(inputs[0]), **kwargs)
+            result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)
 
     result = reconstruct(result)
     return result
@@ -452,3 +454,19 @@ def _assign_where(out, result, where) -> None:
         out[:] = result
     else:
         np.putmask(out, where, result)
+
+
+def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
+    """
+    Fallback to the behavior we would get if we did not define __array_ufunc__.
+
+    Notes
+    -----
+    We are assuming that `self` is among `inputs`.
+    """
+    if not any(x is self for x in inputs):
+        raise NotImplementedError
+
+    new_inputs = [x if x is not self else np.asarray(x) for x in inputs]
+
+    return getattr(ufunc, method)(*new_inputs, **kwargs)
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 8ee5a4a2d913a..b17f309e5f9fb 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -65,6 +65,7 @@
 from pandas.core.dtypes.missing import isna
 
 from pandas.core import (
+    arraylike,
     missing,
     ops,
 )
@@ -1366,6 +1367,20 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype):
             )
         return result
 
+    def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
+        if any(
+            isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs
+        ):
+            return NotImplemented
+
+        result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
+            self, ufunc, method, *inputs, **kwargs
+        )
+        if result is not NotImplemented:
+            return result
+
+        return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
+
 
 class ExtensionOpsMixin:
     """
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 69896a389102f..1df7c191bdb68 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -604,3 +604,6 @@ def _maybe_mask_result(self, result, mask, other, op_name: str):
         else:
             result[mask] = np.nan
             return result
+
+    def __abs__(self):
+        return self.copy()
diff --git a/pandas/tests/arrays/boolean/test_ops.py b/pandas/tests/arrays/boolean/test_ops.py
index 52f602258a049..95ebe8528c2e5 100644
--- a/pandas/tests/arrays/boolean/test_ops.py
+++ b/pandas/tests/arrays/boolean/test_ops.py
@@ -18,3 +18,10 @@ def test_invert(self):
             {"A": expected, "B": [False, True, True]}, index=["a", "b", "c"]
         )
         tm.assert_frame_equal(result, expected)
+
+    def test_abs(self):
+        # matching numpy behavior, abs is the identity function
+        arr = pd.array([True, False, None], dtype="boolean")
+        result = abs(arr)
+
+        tm.assert_extension_array_equal(result, arr)
diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py
index 9e2b8e0f1603e..98329776242f1 100644
--- a/pandas/tests/arrays/test_timedeltas.py
+++ b/pandas/tests/arrays/test_timedeltas.py
@@ -90,6 +90,19 @@ def test_abs(self):
         result = abs(arr)
         tm.assert_timedelta_array_equal(result, expected)
 
+        result2 = np.abs(arr)
+        tm.assert_timedelta_array_equal(result2, expected)
+
+    def test_pos(self):
+        vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]")
+        arr = TimedeltaArray(vals)
+
+        result = +arr
+        tm.assert_timedelta_array_equal(result, arr)
+
+        result2 = np.positive(arr)
+        tm.assert_timedelta_array_equal(result2, arr)
+
     def test_neg(self):
         vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]")
         arr = TimedeltaArray(vals)
@@ -100,6 +113,9 @@ def test_neg(self):
         result = -arr
         tm.assert_timedelta_array_equal(result, expected)
 
+        result2 = np.negative(arr)
+        tm.assert_timedelta_array_equal(result2, expected)
+
     def test_neg_freq(self):
         tdi = pd.timedelta_range("2 Days", periods=4, freq="H")
         arr = TimedeltaArray(tdi, freq=tdi.freq)
@@ -108,3 +124,6 @@ def test_neg_freq(self):
 
         result = -arr
         tm.assert_timedelta_array_equal(result, expected)
+
+        result2 = np.negative(arr)
+        tm.assert_timedelta_array_equal(result2, expected)
diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py
index 6a16433aa0a32..d262f09182a9c 100644
--- a/pandas/tests/extension/arrow/test_bool.py
+++ b/pandas/tests/extension/arrow/test_bool.py
@@ -54,7 +54,10 @@ def test_view(self, data):
         # __setitem__ does not work, so we only have a smoke-test
         data.view()
 
-    @pytest.mark.xfail(raises=AssertionError, reason="Not implemented yet")
+    @pytest.mark.xfail(
+        raises=AttributeError,
+        reason="__eq__ incorrectly returns bool instead of ndarray[bool]",
+    )
     def test_contains(self, data, data_missing):
         super().test_contains(data, data_missing)
 
diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
index ca22973d0b4d3..e9ceec3a3d7e6 100644
--- a/pandas/tests/extension/base/ops.py
+++ b/pandas/tests/extension/base/ops.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import numpy as np
 import pytest
 
 import pandas as pd
@@ -128,11 +129,13 @@ class BaseComparisonOpsTests(BaseOpsUtil):
     """Various Series and DataFrame comparison ops methods."""
 
     def _compare_other(self, s, data, op_name, other):
+
         op = self.get_op_from_name(op_name)
-        if op_name == "__eq__":
-            assert not op(s, other).all()
-        elif op_name == "__ne__":
-            assert op(s, other).all()
+        if op_name in ["__eq__", "__ne__"]:
+            # comparison should match point-wise comparisons
+            result = op(s, other)
+            expected = s.combine(other, op)
+            self.assert_series_equal(result, expected)
 
         else:
 
@@ -182,3 +185,24 @@ def test_invert(self, data):
         result = ~s
         expected = pd.Series(~data, name="name")
         self.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("ufunc", [np.positive, np.negative, np.abs])
+    def test_unary_ufunc_dunder_equivalence(self, data, ufunc):
+        # the dunder __pos__ works if and only if np.positive works,
+        #  same for __neg__/np.negative and __abs__/np.abs
+        attr = {np.positive: "__pos__", np.negative: "__neg__", np.abs: "__abs__"}[
+            ufunc
+        ]
+
+        exc = None
+        try:
+            result = getattr(data, attr)()
+        except Exception as err:
+            exc = err
+
+            # if __pos__ raised, then so should the ufunc
+            with pytest.raises((type(exc), TypeError)):
+                ufunc(data)
+        else:
+            alt = ufunc(data)
+            self.assert_extension_array_equal(result, alt)

From 2688ca8c226609a5d08f8d4e61e2570a3332bc04 Mon Sep 17 00:00:00 2001
From: realead <egor.dranischnikow@googlemail.com>
Date: Thu, 7 Oct 2021 02:36:47 +0200
Subject: [PATCH 24/41] [ENH] introducing IntpHashMap and making
 unique_label_indices use intp (#40653)

---
 pandas/_libs/hashtable.pyi                |  1 +
 pandas/_libs/hashtable.pyx                | 47 ++++++----------------
 pandas/_libs/hashtable_func_helper.pxi.in | 48 +++++++++++++++++++++++
 pandas/core/sorting.py                    |  3 +-
 pandas/tests/libs/test_hashtable.py       | 10 +++++
 pandas/tests/test_algos.py                |  2 +-
 6 files changed, 73 insertions(+), 38 deletions(-)

diff --git a/pandas/_libs/hashtable.pyi b/pandas/_libs/hashtable.pyi
index bf7df5776896b..9c1de67a7ba2a 100644
--- a/pandas/_libs/hashtable.pyi
+++ b/pandas/_libs/hashtable.pyi
@@ -192,6 +192,7 @@ class UInt16HashTable(HashTable): ...
 class UInt8HashTable(HashTable): ...
 class StringHashTable(HashTable): ...
 class PyObjectHashTable(HashTable): ...
+class IntpHashTable(HashTable): ...
 
 def duplicated_int64(
     values: np.ndarray,  # const int64_t[:] values
diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index 3eb7bcc673cd4..6e97c13c644cf 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -65,6 +65,18 @@ cdef Py_ssize_t _INIT_VEC_CAP = 128
 include "hashtable_class_helper.pxi"
 include "hashtable_func_helper.pxi"
 
+
+# map derived hash-map types onto basic hash-map types:
+if np.dtype(np.intp) == np.dtype(np.int64):
+    IntpHashTable = Int64HashTable
+    unique_label_indices = _unique_label_indices_int64
+elif np.dtype(np.intp) == np.dtype(np.int32):
+    IntpHashTable = Int32HashTable
+    unique_label_indices = _unique_label_indices_int32
+else:
+    raise ValueError(np.dtype(np.intp))
+
+
 cdef class Factorizer:
     cdef readonly:
         Py_ssize_t count
@@ -168,38 +180,3 @@ cdef class Int64Factorizer(Factorizer):
 
         self.count = len(self.uniques)
         return labels
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def unique_label_indices(const int64_t[:] labels) -> ndarray:
-    """
-    Indices of the first occurrences of the unique labels
-    *excluding* -1. equivalent to:
-        np.unique(labels, return_index=True)[1]
-    """
-    cdef:
-        int ret = 0
-        Py_ssize_t i, n = len(labels)
-        kh_int64_t *table = kh_init_int64()
-        Int64Vector idx = Int64Vector()
-        ndarray[int64_t, ndim=1] arr
-        Int64VectorData *ud = idx.data
-
-    kh_resize_int64(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
-
-    with nogil:
-        for i in range(n):
-            kh_put_int64(table, labels[i], &ret)
-            if ret != 0:
-                if needs_resize(ud):
-                    with gil:
-                        idx.resize()
-                append_data_int64(ud, i)
-
-    kh_destroy_int64(table)
-
-    arr = idx.to_array()
-    arr = arr[np.asarray(labels)[arr].argsort()]
-
-    return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
index ceb473a0b06af..fb8ce79a924a4 100644
--- a/pandas/_libs/hashtable_func_helper.pxi.in
+++ b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -470,3 +470,51 @@ cpdef mode(ndarray[htfunc_t] values, bint dropna):
 
     else:
         raise TypeError(values.dtype)
+
+
+{{py:
+
+# name, dtype, ttype, c_type
+dtypes = [('Int64', 'int64', 'int64', 'int64_t'),
+          ('Int32', 'int32', 'int32', 'int32_t'), ]
+
+}}
+
+{{for name, dtype, ttype, c_type in dtypes}}
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
+    """
+    Indices of the first occurrences of the unique labels
+    *excluding* -1. equivalent to:
+        np.unique(labels, return_index=True)[1]
+    """
+    cdef:
+        int ret = 0
+        Py_ssize_t i, n = len(labels)
+        kh_{{ttype}}_t *table = kh_init_{{ttype}}()
+        {{name}}Vector idx = {{name}}Vector()
+        ndarray[{{c_type}}, ndim=1] arr
+        {{name}}VectorData *ud = idx.data
+
+    kh_resize_{{ttype}}(table, min(kh_needed_n_buckets(n), SIZE_HINT_LIMIT))
+
+    with nogil:
+        for i in range(n):
+            kh_put_{{ttype}}(table, labels[i], &ret)
+            if ret != 0:
+                if needs_resize(ud):
+                    with gil:
+                        idx.resize()
+                append_data_{{ttype}}(ud, i)
+
+    kh_destroy_{{ttype}}(table)
+
+    arr = idx.to_array()
+    arr = arr[np.asarray(labels)[arr].argsort()]
+
+    return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr
+
+{{endfor}}
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index ccb51a0ea2132..a8348b0c5773f 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -261,8 +261,7 @@ def decons_obs_group_ids(
         out = decons_group_index(obs_ids, shape)
         return out if xnull or not lift.any() else [x - y for x, y in zip(out, lift)]
 
-    # TODO: unique_label_indices only used here, should take ndarray[np.intp]
-    indexer = unique_label_indices(ensure_int64(comp_ids))
+    indexer = unique_label_indices(comp_ids)
     return [lab[indexer].astype(np.intp, subok=False, copy=True) for lab in labels]
 
 
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index 5ff20051da8c0..8b7304a84c27b 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -44,6 +44,7 @@ def get_allocated_khash_memory():
         (ht.UInt16HashTable, np.uint16),
         (ht.Int8HashTable, np.int8),
         (ht.UInt8HashTable, np.uint8),
+        (ht.IntpHashTable, np.intp),
     ],
 )
 class TestHashTable:
@@ -389,6 +390,7 @@ def get_ht_function(fun_name, type_suffix):
         (np.uint16, "uint16"),
         (np.int8, "int8"),
         (np.uint8, "uint8"),
+        (np.intp, "intp"),
     ],
 )
 class TestHelpFunctions:
@@ -471,6 +473,14 @@ def test_modes_with_nans():
     assert np.isnan(modes[0])
 
 
+def test_unique_label_indices_intp(writable):
+    keys = np.array([1, 2, 2, 2, 1, 3], dtype=np.intp)
+    keys.flags.writeable = writable
+    result = ht.unique_label_indices(keys)
+    expected = np.array([0, 1, 5], dtype=np.intp)
+    tm.assert_numpy_array_equal(result, expected)
+
+
 @pytest.mark.parametrize(
     "dtype, type_suffix",
     [
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 9a9cd9fa4baaa..4a0d6f2cccc32 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -1741,7 +1741,7 @@ def test_quantile():
 
 def test_unique_label_indices():
 
-    a = np.random.randint(1, 1 << 10, 1 << 15).astype("int64")
+    a = np.random.randint(1, 1 << 10, 1 << 15).astype(np.intp)
 
     left = ht.unique_label_indices(a)
     right = np.unique(a, return_index=True)[1]

From 5fe8d7df40f90ab9869a2e37472fa38a7f66419a Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 7 Oct 2021 05:55:37 -0700
Subject: [PATCH 25/41] ENH: implement Index.__array_ufunc__ (#43904)

---
 doc/source/whatsnew/v1.4.0.rst             |  1 +
 pandas/core/arraylike.py                   |  4 ++--
 pandas/core/arrays/base.py                 |  5 +++++
 pandas/core/arrays/datetimelike.py         |  4 ++--
 pandas/core/indexes/base.py                | 20 ++++++++++++++++++++
 pandas/core/indexes/datetimelike.py        |  9 ---------
 pandas/tests/arithmetic/test_datetime64.py | 15 ++++++++++++++-
 7 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 22b49c35e0e68..722d0dcc10041 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -390,6 +390,7 @@ Datetimelike
 - Bug in :func:`to_datetime` with ``format`` and ``pandas.NA`` was raising ``ValueError`` (:issue:`42957`)
 - :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`)
 - Bug in :meth:`date_range` and :meth:`bdate_range` do not return right bound when ``start`` = ``end`` and set is closed on one side (:issue:`43394`)
+- Bug in inplace addition and subtraction of :class:`DatetimeIndex` or :class:`TimedeltaIndex` with :class:`DatetimeArray` or :class:`TimedeltaArray` (:issue:`43904`)
 -
 
 Timedelta
diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py
index 3d209189d97d8..fe09a044566f8 100644
--- a/pandas/core/arraylike.py
+++ b/pandas/core/arraylike.py
@@ -357,7 +357,7 @@ def reconstruct(result):
         return result
 
     if "out" in kwargs:
-        result = _dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs)
+        result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs)
         return reconstruct(result)
 
     # We still get here with kwargs `axis` for e.g. np.maximum.accumulate
@@ -410,7 +410,7 @@ def _standardize_out_kwarg(**kwargs) -> dict:
     return kwargs
 
 
-def _dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
+def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
     """
     If we have an `out` keyword, then call the ufunc without `out` and then
     set the result into the given `out`.
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index b17f309e5f9fb..46b0a6873986e 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1379,6 +1379,11 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
         if result is not NotImplemented:
             return result
 
+        if "out" in kwargs:
+            return arraylike.dispatch_ufunc_with_out(
+                self, ufunc, method, *inputs, **kwargs
+            )
+
         return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
 
 
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 2c9796e826825..1f42463cb9f2d 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1414,7 +1414,7 @@ def __iadd__(self, other):
 
         if not is_period_dtype(self.dtype):
             # restore freq, which is invalidated by setitem
-            self._freq = result._freq
+            self._freq = result.freq
         return self
 
     def __isub__(self, other):
@@ -1423,7 +1423,7 @@ def __isub__(self, other):
 
         if not is_period_dtype(self.dtype):
             # restore freq, which is invalidated by setitem
-            self._freq = result._freq
+            self._freq = result.freq
         return self
 
     # --------------------------------------------------------------
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 2b49a88e27961..da953fe46ef1d 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -102,6 +102,7 @@
     PeriodDtype,
 )
 from pandas.core.dtypes.generic import (
+    ABCDataFrame,
     ABCDatetimeIndex,
     ABCMultiIndex,
     ABCPeriodIndex,
@@ -116,6 +117,7 @@
 )
 
 from pandas.core import (
+    arraylike,
     missing,
     ops,
 )
@@ -844,6 +846,24 @@ def __array__(self, dtype=None) -> np.ndarray:
         """
         return np.asarray(self._data, dtype=dtype)
 
+    def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
+        if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):
+            return NotImplemented
+
+        result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
+            self, ufunc, method, *inputs, **kwargs
+        )
+        if result is not NotImplemented:
+            return result
+
+        new_inputs = [x if x is not self else x._values for x in inputs]
+        result = getattr(ufunc, method)(*new_inputs, **kwargs)
+        if ufunc.nout == 2:
+            # i.e. np.divmod, np.modf, np.frexp
+            return tuple(self.__array_wrap__(x) for x in result)
+
+        return self.__array_wrap__(result)
+
     def __array_wrap__(self, result, context=None):
         """
         Gets called after a ufunc and other functions.
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 063bb4aafeb75..48171bdef24fd 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -672,15 +672,6 @@ def insert(self, loc: int, item):
     # --------------------------------------------------------------------
     # NDArray-Like Methods
 
-    def __array_wrap__(self, result, context=None):
-        """
-        Gets called after a ufunc and other functions.
-        """
-        out = super().__array_wrap__(result, context=context)
-        if isinstance(out, DatetimeTimedeltaMixin) and self.freq is not None:
-            out = out._with_freq("infer")
-        return out
-
     @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
     def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
         nv.validate_take((), kwargs)
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
index 60a58b7bbea78..0d3f7dcaaf65b 100644
--- a/pandas/tests/arithmetic/test_datetime64.py
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -2163,6 +2163,15 @@ def test_dti_isub_tdi(self, tz_naive_fixture):
         result -= tdi
         tm.assert_index_equal(result, expected)
 
+        # DTA.__isub__ GH#43904
+        dta = dti._data.copy()
+        dta -= tdi
+        tm.assert_datetime_array_equal(dta, expected._data)
+
+        out = dti._data.copy()
+        np.subtract(out, tdi, out=out)
+        tm.assert_datetime_array_equal(out, expected._data)
+
         msg = "cannot subtract .* from a TimedeltaArray"
         with pytest.raises(TypeError, match=msg):
             tdi -= dti
@@ -2172,10 +2181,14 @@ def test_dti_isub_tdi(self, tz_naive_fixture):
         result -= tdi.values
         tm.assert_index_equal(result, expected)
 
-        msg = "cannot subtract a datelike from a TimedeltaArray"
+        msg = "cannot subtract DatetimeArray from ndarray"
         with pytest.raises(TypeError, match=msg):
             tdi.values -= dti
 
+        msg = "cannot subtract a datelike from a TimedeltaArray"
+        with pytest.raises(TypeError, match=msg):
+            tdi._values -= dti
+
     # -------------------------------------------------------------
     # Binary Operations DatetimeIndex and datetime-like
     # TODO: A couple other tests belong in this section.  Move them in

From a49977c7097e244832d808dbde7b069edad2cf82 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 7 Oct 2021 05:57:06 -0700
Subject: [PATCH 26/41] TST/REF: share/split index tests (#43905)

---
 pandas/tests/indexes/datetimelike.py          | 12 ++++++++
 .../tests/indexes/datetimes/test_datetime.py  | 12 --------
 .../tests/indexes/datetimes/test_indexing.py  | 18 ++++++-----
 pandas/tests/indexes/datetimes/test_misc.py   |  9 ++++++
 pandas/tests/indexes/period/test_period.py    | 10 -------
 .../tests/indexes/timedeltas/test_pickle.py   | 11 +++++++
 .../indexes/timedeltas/test_timedelta.py      | 30 ++++---------------
 7 files changed, 48 insertions(+), 54 deletions(-)
 create mode 100644 pandas/tests/indexes/timedeltas/test_pickle.py

diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py
index 70156092eeabe..ecdbf01fd41c1 100644
--- a/pandas/tests/indexes/datetimelike.py
+++ b/pandas/tests/indexes/datetimelike.py
@@ -9,6 +9,18 @@
 
 
 class DatetimeLike(Base):
+    def test_isin(self, simple_index):
+        index = simple_index[:4]
+        result = index.isin(index)
+        assert result.all()
+
+        result = index.isin(list(index))
+        assert result.all()
+
+        result = index.isin([index[2], 5])
+        expected = np.array([False, False, True, False])
+        tm.assert_numpy_array_equal(result, expected)
+
     def test_argsort_matches_array(self, simple_index):
         idx = simple_index
         idx = idx.insert(1, pd.NaT)
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index 17b80fbc0afc2..b220ce486f80b 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -154,18 +154,6 @@ def test_groupby_function_tuple_1677(self):
         result = monthly_group.mean()
         assert isinstance(result.index[0], tuple)
 
-    def test_isin(self):
-        index = tm.makeDateIndex(4)
-        result = index.isin(index)
-        assert result.all()
-
-        result = index.isin(list(index))
-        assert result.all()
-
-        tm.assert_almost_equal(
-            index.isin([index[2], 5]), np.array([False, False, True, False])
-        )
-
     def assert_index_parameters(self, index):
         assert index.freq == "40960N"
         assert index.inferred_freq == "40960N"
diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py
index 6eaf799ae2779..4ad85f7d4e30f 100644
--- a/pandas/tests/indexes/datetimes/test_indexing.py
+++ b/pandas/tests/indexes/datetimes/test_indexing.py
@@ -485,19 +485,23 @@ def test_get_loc(self):
         with pytest.raises(InvalidIndexError, match=r"slice\(None, 2, None\)"):
             idx.get_loc(slice(2))
 
-        idx = pd.to_datetime(["2000-01-01", "2000-01-04"])
+        idx = DatetimeIndex(["2000-01-01", "2000-01-04"])
         assert idx.get_loc("2000-01-02", method="nearest") == 0
         assert idx.get_loc("2000-01-03", method="nearest") == 1
         assert idx.get_loc("2000-01", method="nearest") == slice(0, 2)
 
+    def test_get_loc_time_obj(self):
         # time indexing
         idx = date_range("2000-01-01", periods=24, freq="H")
-        tm.assert_numpy_array_equal(
-            idx.get_loc(time(12)), np.array([12]), check_dtype=False
-        )
-        tm.assert_numpy_array_equal(
-            idx.get_loc(time(12, 30)), np.array([]), check_dtype=False
-        )
+
+        result = idx.get_loc(time(12))
+        expected = np.array([12])
+        tm.assert_numpy_array_equal(result, expected, check_dtype=False)
+
+        result = idx.get_loc(time(12, 30))
+        expected = np.array([])
+        tm.assert_numpy_array_equal(result, expected, check_dtype=False)
+
         msg = "cannot yet lookup inexact labels when key is a time object"
         with pytest.raises(NotImplementedError, match=msg):
             with tm.assert_produces_warning(FutureWarning, match="deprecated"):
diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py
index 408ed2db316ca..647f7739b482a 100644
--- a/pandas/tests/indexes/datetimes/test_misc.py
+++ b/pandas/tests/indexes/datetimes/test_misc.py
@@ -156,6 +156,15 @@ def test_range_edges9(self):
 
 
 class TestDatetime64:
+    def test_no_millisecond_field(self):
+        msg = "type object 'DatetimeIndex' has no attribute 'millisecond'"
+        with pytest.raises(AttributeError, match=msg):
+            DatetimeIndex.millisecond
+
+        msg = "'DatetimeIndex' object has no attribute 'millisecond'"
+        with pytest.raises(AttributeError, match=msg):
+            DatetimeIndex([]).millisecond
+
     def test_datetimeindex_accessors(self):
         dti_naive = date_range(freq="D", start=datetime(1998, 1, 1), periods=365)
         # GH#13303
diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py
index 83c82c18f3d1e..e0f794a188ba3 100644
--- a/pandas/tests/indexes/period/test_period.py
+++ b/pandas/tests/indexes/period/test_period.py
@@ -4,7 +4,6 @@
 from pandas._libs.tslibs.period import IncompatibleFrequency
 
 from pandas import (
-    DatetimeIndex,
     Index,
     NaT,
     Period,
@@ -49,15 +48,6 @@ def test_where(self):
         # This is handled in test_indexing
         pass
 
-    def test_no_millisecond_field(self):
-        msg = "type object 'DatetimeIndex' has no attribute 'millisecond'"
-        with pytest.raises(AttributeError, match=msg):
-            DatetimeIndex.millisecond
-
-        msg = "'DatetimeIndex' object has no attribute 'millisecond'"
-        with pytest.raises(AttributeError, match=msg):
-            DatetimeIndex([]).millisecond
-
     def test_make_time_series(self):
         index = period_range(freq="A", start="1/1/2001", end="12/1/2009")
         series = Series(1, index=index)
diff --git a/pandas/tests/indexes/timedeltas/test_pickle.py b/pandas/tests/indexes/timedeltas/test_pickle.py
new file mode 100644
index 0000000000000..befe709728bdd
--- /dev/null
+++ b/pandas/tests/indexes/timedeltas/test_pickle.py
@@ -0,0 +1,11 @@
+from pandas import timedelta_range
+import pandas._testing as tm
+
+
+class TestPickle:
+    def test_pickle_after_set_freq(self):
+        tdi = timedelta_range("1 day", periods=4, freq="s")
+        tdi = tdi._with_freq(None)
+
+        res = tm.round_trip_pickle(tdi)
+        tm.assert_index_equal(res, tdi)
diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py
index 33f0565c0b23b..952036428d3c9 100644
--- a/pandas/tests/indexes/timedeltas/test_timedelta.py
+++ b/pandas/tests/indexes/timedeltas/test_timedelta.py
@@ -7,10 +7,10 @@
 from pandas import (
     Index,
     Int64Index,
+    NaT,
     Series,
     Timedelta,
     TimedeltaIndex,
-    date_range,
     timedelta_range,
 )
 import pandas._testing as tm
@@ -42,26 +42,6 @@ def test_numeric_compat(self):
     def test_shift(self):
         pass  # this is handled in test_arithmetic.py
 
-    def test_pickle_after_set_freq(self):
-        tdi = timedelta_range("1 day", periods=4, freq="s")
-        tdi = tdi._with_freq(None)
-
-        res = tm.round_trip_pickle(tdi)
-        tm.assert_index_equal(res, tdi)
-
-    def test_isin(self):
-
-        index = tm.makeTimedeltaIndex(4)
-        result = index.isin(index)
-        assert result.all()
-
-        result = index.isin(list(index))
-        assert result.all()
-
-        tm.assert_almost_equal(
-            index.isin([index[2], 5]), np.array([False, False, True, False])
-        )
-
     def test_misc_coverage(self):
 
         rng = timedelta_range("1 day", periods=5)
@@ -140,11 +120,11 @@ def test_freq_conversion(self):
         # doc example
 
         # series
-        td = Series(date_range("20130101", periods=4)) - Series(
-            date_range("20121201", periods=4)
+        scalar = Timedelta(days=31)
+        td = Series(
+            [scalar, scalar, scalar + timedelta(minutes=5, seconds=3), NaT],
+            dtype="m8[ns]",
         )
-        td[2] += timedelta(minutes=5, seconds=3)
-        td[3] = np.nan
 
         result = td / np.timedelta64(1, "D")
         expected = Series([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan])

From 47791717a25654355235fe85cd19bdbf158591c1 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 7 Oct 2021 05:57:46 -0700
Subject: [PATCH 27/41] TST/REF: misplaced Index.putmask tests (#43906)

---
 pandas/tests/indexes/interval/test_base.py    | 27 ----------------
 .../tests/indexes/interval/test_indexing.py   | 26 ++++++++++++++++
 pandas/tests/indexes/multi/test_indexing.py   | 31 +++++++++++++------
 pandas/tests/indexes/multi/test_putmask.py    | 17 ----------
 pandas/tests/indexes/numeric/test_indexing.py |  2 +-
 pandas/tests/indexes/ranges/test_indexing.py  | 14 +++++++++
 pandas/tests/indexes/ranges/test_setops.py    | 14 ---------
 7 files changed, 63 insertions(+), 68 deletions(-)
 delete mode 100644 pandas/tests/indexes/multi/test_putmask.py

diff --git a/pandas/tests/indexes/interval/test_base.py b/pandas/tests/indexes/interval/test_base.py
index 3589fe726b3bb..aa88bca2faec9 100644
--- a/pandas/tests/indexes/interval/test_base.py
+++ b/pandas/tests/indexes/interval/test_base.py
@@ -4,7 +4,6 @@
 from pandas import (
     IntervalIndex,
     Series,
-    date_range,
 )
 import pandas._testing as tm
 from pandas.tests.indexes.common import Base
@@ -66,29 +65,3 @@ def test_getitem_2d_deprecated(self, simple_index):
         with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
             with tm.assert_produces_warning(FutureWarning):
                 idx[:, None]
-
-
-class TestPutmask:
-    @pytest.mark.parametrize("tz", ["US/Pacific", None])
-    def test_putmask_dt64(self, tz):
-        # GH#37968
-        dti = date_range("2016-01-01", periods=9, tz=tz)
-        idx = IntervalIndex.from_breaks(dti)
-        mask = np.zeros(idx.shape, dtype=bool)
-        mask[0:3] = True
-
-        result = idx.putmask(mask, idx[-1])
-        expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
-        tm.assert_index_equal(result, expected)
-
-    def test_putmask_td64(self):
-        # GH#37968
-        dti = date_range("2016-01-01", periods=9)
-        tdi = dti - dti[0]
-        idx = IntervalIndex.from_breaks(tdi)
-        mask = np.zeros(idx.shape, dtype=bool)
-        mask[0:3] = True
-
-        result = idx.putmask(mask, idx[-1])
-        expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
-        tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/interval/test_indexing.py b/pandas/tests/indexes/interval/test_indexing.py
index aa3359d775c5a..8df8eef69e9c9 100644
--- a/pandas/tests/indexes/interval/test_indexing.py
+++ b/pandas/tests/indexes/interval/test_indexing.py
@@ -497,3 +497,29 @@ def test_slice_locs_with_ints_and_floats_errors(self, tuples, query):
             ),
         ):
             index.slice_locs(start, stop)
+
+
+class TestPutmask:
+    @pytest.mark.parametrize("tz", ["US/Pacific", None])
+    def test_putmask_dt64(self, tz):
+        # GH#37968
+        dti = date_range("2016-01-01", periods=9, tz=tz)
+        idx = IntervalIndex.from_breaks(dti)
+        mask = np.zeros(idx.shape, dtype=bool)
+        mask[0:3] = True
+
+        result = idx.putmask(mask, idx[-1])
+        expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
+        tm.assert_index_equal(result, expected)
+
+    def test_putmask_td64(self):
+        # GH#37968
+        dti = date_range("2016-01-01", periods=9)
+        tdi = dti - dti[0]
+        idx = IntervalIndex.from_breaks(tdi)
+        mask = np.zeros(idx.shape, dtype=bool)
+        mask[0:3] = True
+
+        result = idx.putmask(mask, idx[-1])
+        expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
+        tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py
index e142cbf89f1bd..405b41c829a2f 100644
--- a/pandas/tests/indexes/multi/test_indexing.py
+++ b/pandas/tests/indexes/multi/test_indexing.py
@@ -136,18 +136,31 @@ def test_slice_locs_with_missing_value(
         assert result == expected
 
 
-def test_putmask_with_wrong_mask(idx):
-    # GH18368
+class TestPutmask:
+    def test_putmask_with_wrong_mask(self, idx):
+        # GH18368
 
-    msg = "putmask: mask and data must be the same size"
-    with pytest.raises(ValueError, match=msg):
-        idx.putmask(np.ones(len(idx) + 1, np.bool_), 1)
+        msg = "putmask: mask and data must be the same size"
+        with pytest.raises(ValueError, match=msg):
+            idx.putmask(np.ones(len(idx) + 1, np.bool_), 1)
+
+        with pytest.raises(ValueError, match=msg):
+            idx.putmask(np.ones(len(idx) - 1, np.bool_), 1)
+
+        with pytest.raises(ValueError, match=msg):
+            idx.putmask("foo", 1)
+
+    def test_putmask_multiindex_other(self):
+        # GH#43212 `value` is also a MultiIndex
+
+        left = MultiIndex.from_tuples([(np.nan, 6), (np.nan, 6), ("a", 4)])
+        right = MultiIndex.from_tuples([("a", 1), ("a", 1), ("d", 1)])
+        mask = np.array([True, True, False])
 
-    with pytest.raises(ValueError, match=msg):
-        idx.putmask(np.ones(len(idx) - 1, np.bool_), 1)
+        result = left.putmask(mask, right)
 
-    with pytest.raises(ValueError, match=msg):
-        idx.putmask("foo", 1)
+        expected = MultiIndex.from_tuples([right[0], right[1], left[2]])
+        tm.assert_index_equal(result, expected)
 
 
 class TestGetIndexer:
diff --git a/pandas/tests/indexes/multi/test_putmask.py b/pandas/tests/indexes/multi/test_putmask.py
deleted file mode 100644
index 2a24be9003302..0000000000000
--- a/pandas/tests/indexes/multi/test_putmask.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import numpy as np
-
-from pandas import MultiIndex
-import pandas._testing as tm
-
-
-def test_putmask_multiindex_other():
-    # GH#43212 `value` is also a MultiIndex
-
-    left = MultiIndex.from_tuples([(np.nan, 6), (np.nan, 6), ("a", 4)])
-    right = MultiIndex.from_tuples([("a", 1), ("a", 1), ("d", 1)])
-    mask = np.array([True, True, False])
-
-    result = left.putmask(mask, right)
-
-    expected = MultiIndex.from_tuples([right[0], right[1], left[2]])
-    tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/numeric/test_indexing.py b/pandas/tests/indexes/numeric/test_indexing.py
index 4621cbcb9d462..be05d5d8a9cae 100644
--- a/pandas/tests/indexes/numeric/test_indexing.py
+++ b/pandas/tests/indexes/numeric/test_indexing.py
@@ -406,7 +406,7 @@ def test_where(self, klass, index):
         result = index.where(klass(cond))
         tm.assert_index_equal(result, expected)
 
-    def test_where_uin64(self):
+    def test_where_uint64(self):
         idx = UInt64Index([0, 6, 2])
         mask = np.array([False, True, False])
         other = np.array([1], dtype=np.int64)
diff --git a/pandas/tests/indexes/ranges/test_indexing.py b/pandas/tests/indexes/ranges/test_indexing.py
index b46354939f3c5..f8c3eff0ab80a 100644
--- a/pandas/tests/indexes/ranges/test_indexing.py
+++ b/pandas/tests/indexes/ranges/test_indexing.py
@@ -77,3 +77,17 @@ def test_take_fill_value(self):
         msg = "index -5 is out of bounds for (axis 0 with )?size 3"
         with pytest.raises(IndexError, match=msg):
             idx.take(np.array([1, -5]))
+
+
+class TestWhere:
+    def test_where_putmask_range_cast(self):
+        # GH#43240
+        idx = RangeIndex(0, 5, name="test")
+
+        mask = np.array([True, True, False, False, False])
+        result = idx.putmask(mask, 10)
+        expected = Int64Index([10, 10, 2, 3, 4], name="test")
+        tm.assert_index_equal(result, expected)
+
+        result = idx.where(~mask, 10)
+        tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/ranges/test_setops.py b/pandas/tests/indexes/ranges/test_setops.py
index 210bcd300b1b0..ba938f82e9d89 100644
--- a/pandas/tests/indexes/ranges/test_setops.py
+++ b/pandas/tests/indexes/ranges/test_setops.py
@@ -354,17 +354,3 @@ def test_symmetric_difference(self):
         result = left.symmetric_difference(right[1:])
         expected = Int64Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14])
         tm.assert_index_equal(result, expected)
-
-    def test_putmask_range_cast(self):
-        # GH#43240
-        idx = RangeIndex(0, 5, name="test")
-        result = idx.putmask(np.array([True, True, False, False, False]), 10)
-        expected = Index([10, 10, 2, 3, 4], name="test")
-        tm.assert_index_equal(result, expected)
-
-    def test_where_range_cast(self):
-        # GH#43240
-        idx = RangeIndex(0, 5, name="test")
-        result = idx.where(np.array([False, False, True, True, True]), 10)
-        expected = Index([10, 10, 2, 3, 4], name="test")
-        tm.assert_index_equal(result, expected)

From d4ae657dfe22cde2825eb595d71db9436beb8635 Mon Sep 17 00:00:00 2001
From: Jernej Makovsek <jernej.makovsek@gmail.com>
Date: Thu, 7 Oct 2021 14:58:24 +0200
Subject: [PATCH 28/41] Add clarifications to the docs regarding `to_feather`
 (#43866)

---
 pandas/core/frame.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 2eb66c7db0ba6..4db99c5d7f074 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2565,6 +2565,13 @@ def to_feather(self, path: FilePathOrBuffer[AnyStr], **kwargs) -> None:
             `compression_level`, `chunksize` and `version` keywords.
 
             .. versionadded:: 1.1.0
+
+        Notes
+        -----
+        This function writes the dataframe as a `feather file
+        <https://arrow.apache.org/docs/python/feather.html>`_. Requires a default
+        index. For saving the DataFrame with your custom index use a method that
+        supports custom indices e.g. `to_parquet`.
         """
         from pandas.io.feather_format import to_feather
 

From bde9b111a0ba9def1fb64064de898cfde4682255 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 7 Oct 2021 15:02:09 -0700
Subject: [PATCH 29/41] TST/REF: collect/de-dup index tests (#43914)

---
 pandas/tests/frame/methods/test_reindex.py    |  32 +++++
 .../tests/indexes/categorical/test_reindex.py |  42 ------
 .../indexes/datetimes/test_date_range.py      |  37 ++++-
 pandas/tests/indexes/datetimes/test_misc.py   | 136 ------------------
 .../indexes/period/test_partial_slicing.py    |  26 ++++
 pandas/tests/indexes/period/test_period.py    |  25 ----
 .../indexes/timedeltas/test_timedelta.py      |  41 ++----
 pandas/tests/series/methods/test_reindex.py   |   8 ++
 8 files changed, 115 insertions(+), 232 deletions(-)

diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py
index c6b19547904ec..bee8025275b42 100644
--- a/pandas/tests/frame/methods/test_reindex.py
+++ b/pandas/tests/frame/methods/test_reindex.py
@@ -1078,3 +1078,35 @@ def test_reindex_datetimelike_to_object(self, dtype):
         assert res.iloc[-1, 0] is fv
         assert res.iloc[-1, 1] is fv
         tm.assert_frame_equal(res, expected)
+
+    @pytest.mark.parametrize(
+        "index_df,index_res,index_exp",
+        [
+            (
+                CategoricalIndex([], categories=["A"]),
+                Index(["A"]),
+                Index(["A"]),
+            ),
+            (
+                CategoricalIndex([], categories=["A"]),
+                Index(["B"]),
+                Index(["B"]),
+            ),
+            (
+                CategoricalIndex([], categories=["A"]),
+                CategoricalIndex(["A"]),
+                CategoricalIndex(["A"]),
+            ),
+            (
+                CategoricalIndex([], categories=["A"]),
+                CategoricalIndex(["B"]),
+                CategoricalIndex(["B"]),
+            ),
+        ],
+    )
+    def test_reindex_not_category(self, index_df, index_res, index_exp):
+        # GH#28690
+        df = DataFrame(index=index_df)
+        result = df.reindex(index=index_res)
+        expected = DataFrame(index=index_exp)
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexes/categorical/test_reindex.py b/pandas/tests/indexes/categorical/test_reindex.py
index 0b81d4f88eaf8..72130ef9e4627 100644
--- a/pandas/tests/indexes/categorical/test_reindex.py
+++ b/pandas/tests/indexes/categorical/test_reindex.py
@@ -1,13 +1,10 @@
 import numpy as np
-import pytest
 
 from pandas import (
     Categorical,
     CategoricalIndex,
-    DataFrame,
     Index,
     Interval,
-    Series,
 )
 import pandas._testing as tm
 
@@ -66,45 +63,6 @@ def test_reindex_empty_index(self):
         tm.assert_index_equal(res, Index(["a", "b"]), exact=True)
         tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
 
-    def test_reindex_missing_category(self):
-        # GH: 18185
-        ser = Series([1, 2, 3, 1], dtype="category")
-        msg = r"Cannot setitem on a Categorical with a new category \(-1\)"
-        with pytest.raises(TypeError, match=msg):
-            ser.reindex([1, 2, 3, 4, 5], fill_value=-1)
-
-    @pytest.mark.parametrize(
-        "index_df,index_res,index_exp",
-        [
-            (
-                CategoricalIndex([], categories=["A"]),
-                Index(["A"]),
-                Index(["A"]),
-            ),
-            (
-                CategoricalIndex([], categories=["A"]),
-                Index(["B"]),
-                Index(["B"]),
-            ),
-            (
-                CategoricalIndex([], categories=["A"]),
-                CategoricalIndex(["A"]),
-                CategoricalIndex(["A"]),
-            ),
-            (
-                CategoricalIndex([], categories=["A"]),
-                CategoricalIndex(["B"]),
-                CategoricalIndex(["B"]),
-            ),
-        ],
-    )
-    def test_reindex_not_category(self, index_df, index_res, index_exp):
-        # GH: 28690
-        df = DataFrame(index=index_df)
-        result = df.reindex(index=index_res)
-        expected = DataFrame(index=index_exp)
-        tm.assert_frame_equal(result, expected)
-
     def test_reindex_categorical_added_category(self):
         # GH 42424
         ci = CategoricalIndex(
diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
index 7559d7ce645e0..80c86e0103436 100644
--- a/pandas/tests/indexes/datetimes/test_date_range.py
+++ b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -121,6 +121,41 @@ def test_date_range_timestamp_equiv_preserve_frequency(self):
 
 
 class TestDateRanges:
+    @pytest.mark.parametrize("freq", ["N", "U", "L", "T", "S", "H", "D"])
+    def test_date_range_edges(self, freq):
+        # GH#13672
+        td = Timedelta(f"1{freq}")
+        ts = Timestamp("1970-01-01")
+
+        idx = date_range(
+            start=ts + td,
+            end=ts + 4 * td,
+            freq=freq,
+        )
+        exp = DatetimeIndex(
+            [ts + n * td for n in range(1, 5)],
+            freq=freq,
+        )
+        tm.assert_index_equal(idx, exp)
+
+        # start after end
+        idx = date_range(
+            start=ts + 4 * td,
+            end=ts + td,
+            freq=freq,
+        )
+        exp = DatetimeIndex([], freq=freq)
+        tm.assert_index_equal(idx, exp)
+
+        # start matches end
+        idx = date_range(
+            start=ts + td,
+            end=ts + td,
+            freq=freq,
+        )
+        exp = DatetimeIndex([ts + td], freq=freq)
+        tm.assert_index_equal(idx, exp)
+
     def test_date_range_near_implementation_bound(self):
         # GH#???
         freq = Timedelta(1)
@@ -717,7 +752,7 @@ def test_timezone_comparaison_bug(self):
         result = date_range(start, periods=2, tz="US/Eastern")
         assert len(result) == 2
 
-    def test_timezone_comparaison_assert(self):
+    def test_timezone_comparison_assert(self):
         start = Timestamp("20130220 10:00", tz="US/Eastern")
         msg = "Inferred time zone not equal to passed time zone"
         with pytest.raises(AssertionError, match=msg):
diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py
index 647f7739b482a..f0757d0ba555e 100644
--- a/pandas/tests/indexes/datetimes/test_misc.py
+++ b/pandas/tests/indexes/datetimes/test_misc.py
@@ -19,142 +19,6 @@
 from pandas.core.arrays import DatetimeArray
 
 
-class TestTimeSeries:
-    def test_range_edges(self):
-        # GH#13672
-        idx = date_range(
-            start=Timestamp("1970-01-01 00:00:00.000000001"),
-            end=Timestamp("1970-01-01 00:00:00.000000004"),
-            freq="N",
-        )
-        exp = DatetimeIndex(
-            [
-                "1970-01-01 00:00:00.000000001",
-                "1970-01-01 00:00:00.000000002",
-                "1970-01-01 00:00:00.000000003",
-                "1970-01-01 00:00:00.000000004",
-            ],
-            freq="N",
-        )
-        tm.assert_index_equal(idx, exp)
-
-    def test_range_edges2(self):
-
-        idx = date_range(
-            start=Timestamp("1970-01-01 00:00:00.000000004"),
-            end=Timestamp("1970-01-01 00:00:00.000000001"),
-            freq="N",
-        )
-        exp = DatetimeIndex([], freq="N")
-        tm.assert_index_equal(idx, exp)
-
-    def test_range_edges3(self):
-
-        idx = date_range(
-            start=Timestamp("1970-01-01 00:00:00.000000001"),
-            end=Timestamp("1970-01-01 00:00:00.000000001"),
-            freq="N",
-        )
-        exp = DatetimeIndex(["1970-01-01 00:00:00.000000001"], freq="N")
-        tm.assert_index_equal(idx, exp)
-
-    def test_range_edges4(self):
-
-        idx = date_range(
-            start=Timestamp("1970-01-01 00:00:00.000001"),
-            end=Timestamp("1970-01-01 00:00:00.000004"),
-            freq="U",
-        )
-        exp = DatetimeIndex(
-            [
-                "1970-01-01 00:00:00.000001",
-                "1970-01-01 00:00:00.000002",
-                "1970-01-01 00:00:00.000003",
-                "1970-01-01 00:00:00.000004",
-            ],
-            freq="U",
-        )
-        tm.assert_index_equal(idx, exp)
-
-    def test_range_edges5(self):
-
-        idx = date_range(
-            start=Timestamp("1970-01-01 00:00:00.001"),
-            end=Timestamp("1970-01-01 00:00:00.004"),
-            freq="L",
-        )
-        exp = DatetimeIndex(
-            [
-                "1970-01-01 00:00:00.001",
-                "1970-01-01 00:00:00.002",
-                "1970-01-01 00:00:00.003",
-                "1970-01-01 00:00:00.004",
-            ],
-            freq="L",
-        )
-        tm.assert_index_equal(idx, exp)
-
-    def test_range_edges6(self):
-        idx = date_range(
-            start=Timestamp("1970-01-01 00:00:01"),
-            end=Timestamp("1970-01-01 00:00:04"),
-            freq="S",
-        )
-        exp = DatetimeIndex(
-            [
-                "1970-01-01 00:00:01",
-                "1970-01-01 00:00:02",
-                "1970-01-01 00:00:03",
-                "1970-01-01 00:00:04",
-            ],
-            freq="S",
-        )
-        tm.assert_index_equal(idx, exp)
-
-    def test_range_edges7(self):
-        idx = date_range(
-            start=Timestamp("1970-01-01 00:01"),
-            end=Timestamp("1970-01-01 00:04"),
-            freq="T",
-        )
-        exp = DatetimeIndex(
-            [
-                "1970-01-01 00:01",
-                "1970-01-01 00:02",
-                "1970-01-01 00:03",
-                "1970-01-01 00:04",
-            ],
-            freq="T",
-        )
-        tm.assert_index_equal(idx, exp)
-
-    def test_range_edges8(self):
-        idx = date_range(
-            start=Timestamp("1970-01-01 01:00"),
-            end=Timestamp("1970-01-01 04:00"),
-            freq="H",
-        )
-        exp = DatetimeIndex(
-            [
-                "1970-01-01 01:00",
-                "1970-01-01 02:00",
-                "1970-01-01 03:00",
-                "1970-01-01 04:00",
-            ],
-            freq="H",
-        )
-        tm.assert_index_equal(idx, exp)
-
-    def test_range_edges9(self):
-        idx = date_range(
-            start=Timestamp("1970-01-01"), end=Timestamp("1970-01-04"), freq="D"
-        )
-        exp = DatetimeIndex(
-            ["1970-01-01", "1970-01-02", "1970-01-03", "1970-01-04"], freq="D"
-        )
-        tm.assert_index_equal(idx, exp)
-
-
 class TestDatetime64:
     def test_no_millisecond_field(self):
         msg = "type object 'DatetimeIndex' has no attribute 'millisecond'"
diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py
index 148999d90d554..c565902d080c3 100644
--- a/pandas/tests/indexes/period/test_partial_slicing.py
+++ b/pandas/tests/indexes/period/test_partial_slicing.py
@@ -3,6 +3,7 @@
 
 from pandas import (
     DataFrame,
+    PeriodIndex,
     Series,
     date_range,
     period_range,
@@ -11,6 +12,31 @@
 
 
 class TestPeriodIndex:
+    def test_getitem_periodindex_duplicates_string_slice(self):
+        # monotonic
+        idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN")
+        ts = Series(np.random.randn(len(idx)), index=idx)
+
+        result = ts["2007"]
+        expected = ts[1:3]
+        tm.assert_series_equal(result, expected)
+        result[:] = 1
+        assert (ts[1:3] == 1).all()
+
+        # not monotonic
+        idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN")
+        ts = Series(np.random.randn(len(idx)), index=idx)
+
+        result = ts["2007"]
+        expected = ts[idx == "2007"]
+        tm.assert_series_equal(result, expected)
+
+    def test_getitem_periodindex_quarter_string(self):
+        pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q")
+        ser = Series(np.random.rand(len(pi)), index=pi).cumsum()
+        # Todo: fix these accessors!
+        assert ser["05Q4"] == ser[2]
+
     def test_pindex_slice_index(self):
         pi = period_range(start="1/1/10", end="12/31/12", freq="M")
         s = Series(np.random.rand(len(pi)), index=pi)
diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py
index e0f794a188ba3..e6c31d22e626f 100644
--- a/pandas/tests/indexes/period/test_period.py
+++ b/pandas/tests/indexes/period/test_period.py
@@ -245,25 +245,6 @@ def test_is_(self):
         assert not index.is_(index - 2)
         assert not index.is_(index - 0)
 
-    def test_index_duplicate_periods(self):
-        # monotonic
-        idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN")
-        ts = Series(np.random.randn(len(idx)), index=idx)
-
-        result = ts["2007"]
-        expected = ts[1:3]
-        tm.assert_series_equal(result, expected)
-        result[:] = 1
-        assert (ts[1:3] == 1).all()
-
-        # not monotonic
-        idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN")
-        ts = Series(np.random.randn(len(idx)), index=idx)
-
-        result = ts["2007"]
-        expected = ts[idx == "2007"]
-        tm.assert_series_equal(result, expected)
-
     def test_index_unique(self):
         idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN")
         expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN")
@@ -292,12 +273,6 @@ def test_pindex_fieldaccessor_nat(self):
         exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name="name")
         tm.assert_index_equal(idx.month, exp)
 
-    def test_pindex_qaccess(self):
-        pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q")
-        s = Series(np.random.rand(len(pi)), index=pi).cumsum()
-        # Todo: fix these accessors!
-        assert s["05Q4"] == s[2]
-
     def test_pindex_multiples(self):
         expected = PeriodIndex(
             ["2011-01", "2011-03", "2011-05", "2011-07", "2011-09", "2011-11"],
diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py
index 952036428d3c9..9672929ecc06b 100644
--- a/pandas/tests/indexes/timedeltas/test_timedelta.py
+++ b/pandas/tests/indexes/timedeltas/test_timedelta.py
@@ -115,46 +115,31 @@ def test_freq_conversion_always_floating(self):
         res = tdi.to_series().astype("m8[s]")
         tm.assert_numpy_array_equal(res._values, expected._values)
 
-    def test_freq_conversion(self):
+    def test_freq_conversion(self, index_or_series):
 
         # doc example
 
-        # series
         scalar = Timedelta(days=31)
-        td = Series(
+        td = index_or_series(
             [scalar, scalar, scalar + timedelta(minutes=5, seconds=3), NaT],
             dtype="m8[ns]",
         )
 
         result = td / np.timedelta64(1, "D")
-        expected = Series([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan])
-        tm.assert_series_equal(result, expected)
-
-        result = td.astype("timedelta64[D]")
-        expected = Series([31, 31, 31, np.nan])
-        tm.assert_series_equal(result, expected)
-
-        result = td / np.timedelta64(1, "s")
-        expected = Series([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, np.nan])
-        tm.assert_series_equal(result, expected)
-
-        result = td.astype("timedelta64[s]")
-        tm.assert_series_equal(result, expected)
-
-        # tdi
-        td = TimedeltaIndex(td)
-
-        result = td / np.timedelta64(1, "D")
-        expected = Index([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan])
-        tm.assert_index_equal(result, expected)
+        expected = index_or_series(
+            [31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan]
+        )
+        tm.assert_equal(result, expected)
 
         result = td.astype("timedelta64[D]")
-        expected = Index([31, 31, 31, np.nan])
-        tm.assert_index_equal(result, expected)
+        expected = index_or_series([31, 31, 31, np.nan])
+        tm.assert_equal(result, expected)
 
         result = td / np.timedelta64(1, "s")
-        expected = Index([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, np.nan])
-        tm.assert_index_equal(result, expected)
+        expected = index_or_series(
+            [31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, np.nan]
+        )
+        tm.assert_equal(result, expected)
 
         result = td.astype("timedelta64[s]")
-        tm.assert_index_equal(result, expected)
+        tm.assert_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py
index 36d3971d10a3d..be9f96c8b509a 100644
--- a/pandas/tests/series/methods/test_reindex.py
+++ b/pandas/tests/series/methods/test_reindex.py
@@ -359,3 +359,11 @@ def test_reindex_empty_with_level(values):
         index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype="object"
     )
     tm.assert_series_equal(result, expected)
+
+
+def test_reindex_missing_category():
+    # GH#18185
+    ser = Series([1, 2, 3, 1], dtype="category")
+    msg = r"Cannot setitem on a Categorical with a new category \(-1\)"
+    with pytest.raises(TypeError, match=msg):
+        ser.reindex([1, 2, 3, 4, 5], fill_value=-1)

From ecab3a24dc08848ecca0ca48578d919a41935e94 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 7 Oct 2021 16:54:28 -0700
Subject: [PATCH 30/41] BENCH: indexing_engines (#43916)

---
 asv_bench/benchmarks/indexing_engines.py | 46 ++++++++++++++++++------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/asv_bench/benchmarks/indexing_engines.py b/asv_bench/benchmarks/indexing_engines.py
index 0cbc300ee2fc4..60e07a9d1469c 100644
--- a/asv_bench/benchmarks/indexing_engines.py
+++ b/asv_bench/benchmarks/indexing_engines.py
@@ -35,25 +35,49 @@ class NumericEngineIndexing:
     params = [
         _get_numeric_engines(),
         ["monotonic_incr", "monotonic_decr", "non_monotonic"],
+        [True, False],
+        [10 ** 5, 2 * 10 ** 6],  # 2e6 is above SIZE_CUTOFF
     ]
-    param_names = ["engine_and_dtype", "index_type"]
+    param_names = ["engine_and_dtype", "index_type", "unique", "N"]
 
-    def setup(self, engine_and_dtype, index_type):
+    def setup(self, engine_and_dtype, index_type, unique, N):
         engine, dtype = engine_and_dtype
-        N = 10 ** 5
-        values = list([1] * N + [2] * N + [3] * N)
-        arr = {
-            "monotonic_incr": np.array(values, dtype=dtype),
-            "monotonic_decr": np.array(list(reversed(values)), dtype=dtype),
-            "non_monotonic": np.array([1, 2, 3] * N, dtype=dtype),
-        }[index_type]
+
+        if index_type == "monotonic_incr":
+            if unique:
+                arr = np.arange(N * 3, dtype=dtype)
+            else:
+                values = list([1] * N + [2] * N + [3] * N)
+                arr = np.array(values, dtype=dtype)
+        elif index_type == "monotonic_decr":
+            if unique:
+                arr = np.arange(N * 3, dtype=dtype)[::-1]
+            else:
+                values = list([1] * N + [2] * N + [3] * N)
+                arr = np.array(values, dtype=dtype)[::-1]
+        else:
+            assert index_type == "non_monotonic"
+            if unique:
+                arr = np.empty(N * 3, dtype=dtype)
+                arr[:N] = np.arange(N * 2, N * 3, dtype=dtype)
+                arr[N:] = np.arange(N * 2, dtype=dtype)
+            else:
+                arr = np.array([1, 2, 3] * N, dtype=dtype)
 
         self.data = engine(arr)
         # code belows avoids populating the mapping etc. while timing.
         self.data.get_loc(2)
 
-    def time_get_loc(self, engine_and_dtype, index_type):
-        self.data.get_loc(2)
+        self.key_middle = arr[len(arr) // 2]
+        self.key_early = arr[2]
+
+    def time_get_loc(self, engine_and_dtype, index_type, unique, N):
+        self.data.get_loc(self.key_early)
+
+    def time_get_loc_near_middle(self, engine_and_dtype, index_type, unique, N):
+        # searchsorted performance may be different near the middle of a range
+        #  vs near an endpoint
+        self.data.get_loc(self.key_middle)
 
 
 class ObjectEngineIndexing:

From adef17cfd4dc1614d818fe6214528de1cd463035 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 8 Oct 2021 05:31:13 -0700
Subject: [PATCH 31/41] TST: avoid re-running tests 14 times (#43922)

---
 pandas/tests/indexes/common.py      | 32 ----------------------------
 pandas/tests/indexes/test_common.py | 33 +++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
index a8684ca4d3c25..50097ae3787b3 100644
--- a/pandas/tests/indexes/common.py
+++ b/pandas/tests/indexes/common.py
@@ -525,38 +525,6 @@ def test_format_empty(self):
         assert empty_idx.format() == []
         assert empty_idx.format(name=True) == [""]
 
-    def test_hasnans_isnans(self, index_flat):
-        # GH 11343, added tests for hasnans / isnans
-        index = index_flat
-
-        # cases in indices doesn't include NaN
-        idx = index.copy(deep=True)
-        expected = np.array([False] * len(idx), dtype=bool)
-        tm.assert_numpy_array_equal(idx._isnan, expected)
-        assert idx.hasnans is False
-
-        idx = index.copy(deep=True)
-        values = np.asarray(idx.values)
-
-        if len(index) == 0:
-            return
-        elif isinstance(index, NumericIndex) and is_integer_dtype(index.dtype):
-            return
-        elif isinstance(index, DatetimeIndexOpsMixin):
-            values[1] = iNaT
-        else:
-            values[1] = np.nan
-
-        if isinstance(index, PeriodIndex):
-            idx = type(index)(values, freq=index.freq)
-        else:
-            idx = type(index)(values)
-
-            expected = np.array([False] * len(idx), dtype=bool)
-            expected[1] = True
-            tm.assert_numpy_array_equal(idx._isnan, expected)
-            assert idx.hasnans is True
-
     def test_fillna(self, index):
         # GH 11343
         if len(index) == 0:
diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
index 33aa8bbb942d5..604b68cfcc791 100644
--- a/pandas/tests/indexes/test_common.py
+++ b/pandas/tests/indexes/test_common.py
@@ -12,6 +12,7 @@
 from pandas.compat import IS64
 
 from pandas.core.dtypes.common import (
+    is_integer_dtype,
     is_period_dtype,
     needs_i8_conversion,
 )
@@ -366,6 +367,38 @@ def test_asi8_deprecation(self, index):
         with tm.assert_produces_warning(warn):
             index.asi8
 
+    def test_hasnans_isnans(self, index_flat):
+        # GH#11343, added tests for hasnans / isnans
+        index = index_flat
+
+        # cases in indices doesn't include NaN
+        idx = index.copy(deep=True)
+        expected = np.array([False] * len(idx), dtype=bool)
+        tm.assert_numpy_array_equal(idx._isnan, expected)
+        assert idx.hasnans is False
+
+        idx = index.copy(deep=True)
+        values = np.asarray(idx.values)
+
+        if len(index) == 0:
+            return
+        elif isinstance(index, NumericIndex) and is_integer_dtype(index.dtype):
+            return
+        elif needs_i8_conversion(index.dtype):
+            values[1] = iNaT
+        else:
+            values[1] = np.nan
+
+        if isinstance(index, PeriodIndex):
+            idx = type(index)(values, freq=index.freq)
+        else:
+            idx = type(index)(values)
+
+            expected = np.array([False] * len(idx), dtype=bool)
+            expected[1] = True
+            tm.assert_numpy_array_equal(idx._isnan, expected)
+            assert idx.hasnans is True
+
 
 @pytest.mark.parametrize("na_position", [None, "middle"])
 def test_sort_values_invalid_na_position(index_with_missing, na_position):

From d5716c7ae53c97afed2741d2485e0102b398cc32 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 8 Oct 2021 05:32:03 -0700
Subject: [PATCH 32/41] CLN: unnecessary warning-catching (#43919)

---
 pandas/core/arrays/datetimes.py                | 18 +++++++-----------
 pandas/core/frame.py                           | 10 +++++-----
 pandas/tests/indexes/numeric/test_astype.py    |  6 +++---
 pandas/tests/indexes/numeric/test_indexing.py  |  8 +++++---
 pandas/tests/indexes/numeric/test_join.py      |  4 ++--
 pandas/tests/indexes/numeric/test_numeric.py   |  8 +++++---
 pandas/tests/indexes/numeric/test_setops.py    |  4 ++--
 .../indexes/period/methods/test_astype.py      |  6 ++++--
 pandas/tests/indexes/ranges/test_join.py       |  2 +-
 pandas/tests/indexes/ranges/test_range.py      |  4 ++--
 pandas/tests/indexes/ranges/test_setops.py     |  4 ++--
 .../tests/indexes/timedeltas/test_timedelta.py |  2 +-
 pandas/tests/indexing/test_partial.py          |  6 +++---
 13 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index d9f9c07a4f645..eb7638df301f7 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -621,17 +621,13 @@ def __iter__(self):
             chunksize = 10000
             chunks = (length // chunksize) + 1
 
-            with warnings.catch_warnings():
-                # filter out warnings about Timestamp.freq
-                warnings.filterwarnings("ignore", category=FutureWarning)
-
-                for i in range(chunks):
-                    start_i = i * chunksize
-                    end_i = min((i + 1) * chunksize, length)
-                    converted = ints_to_pydatetime(
-                        data[start_i:end_i], tz=self.tz, freq=self.freq, box="timestamp"
-                    )
-                    yield from converted
+            for i in range(chunks):
+                start_i = i * chunksize
+                end_i = min((i + 1) * chunksize, length)
+                converted = ints_to_pydatetime(
+                    data[start_i:end_i], tz=self.tz, freq=self.freq, box="timestamp"
+                )
+                yield from converted
 
     def astype(self, dtype, copy: bool = True):
         # We handle
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4db99c5d7f074..077ac303b8327 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -9361,17 +9361,17 @@ def round(
         """
         from pandas.core.reshape.concat import concat
 
-        def _dict_round(df, decimals):
+        def _dict_round(df: DataFrame, decimals):
             for col, vals in df.items():
                 try:
                     yield _series_round(vals, decimals[col])
                 except KeyError:
                     yield vals
 
-        def _series_round(s, decimals):
-            if is_integer_dtype(s) or is_float_dtype(s):
-                return s.round(decimals)
-            return s
+        def _series_round(ser: Series, decimals: int):
+            if is_integer_dtype(ser.dtype) or is_float_dtype(ser.dtype):
+                return ser.round(decimals)
+            return ser
 
         nv.validate_round(args, kwargs)
 
diff --git a/pandas/tests/indexes/numeric/test_astype.py b/pandas/tests/indexes/numeric/test_astype.py
index bda66856fb57a..89f26e953400d 100644
--- a/pandas/tests/indexes/numeric/test_astype.py
+++ b/pandas/tests/indexes/numeric/test_astype.py
@@ -5,12 +5,12 @@
 
 from pandas.core.dtypes.common import pandas_dtype
 
-from pandas import (
+from pandas import Index
+import pandas._testing as tm
+from pandas.core.indexes.api import (
     Float64Index,
-    Index,
     Int64Index,
 )
-import pandas._testing as tm
 
 
 class TestAstype:
diff --git a/pandas/tests/indexes/numeric/test_indexing.py b/pandas/tests/indexes/numeric/test_indexing.py
index be05d5d8a9cae..cb861aaab80f8 100644
--- a/pandas/tests/indexes/numeric/test_indexing.py
+++ b/pandas/tests/indexes/numeric/test_indexing.py
@@ -2,15 +2,17 @@
 import pytest
 
 from pandas import (
-    Float64Index,
     Index,
-    Int64Index,
     RangeIndex,
     Series,
     Timestamp,
-    UInt64Index,
 )
 import pandas._testing as tm
+from pandas.core.indexes.api import (
+    Float64Index,
+    Int64Index,
+    UInt64Index,
+)
 
 
 @pytest.fixture
diff --git a/pandas/tests/indexes/numeric/test_join.py b/pandas/tests/indexes/numeric/test_join.py
index 43d731f8c3142..2a47289b65aad 100644
--- a/pandas/tests/indexes/numeric/test_join.py
+++ b/pandas/tests/indexes/numeric/test_join.py
@@ -1,12 +1,12 @@
 import numpy as np
 import pytest
 
-from pandas import (
+import pandas._testing as tm
+from pandas.core.indexes.api import (
     Index,
     Int64Index,
     UInt64Index,
 )
-import pandas._testing as tm
 
 
 class TestJoinInt64Index:
diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py
index 6d35568b69fac..ec451ac13ec44 100644
--- a/pandas/tests/indexes/numeric/test_numeric.py
+++ b/pandas/tests/indexes/numeric/test_numeric.py
@@ -5,14 +5,16 @@
 
 import pandas as pd
 from pandas import (
-    Float64Index,
     Index,
-    Int64Index,
     NumericIndex,
     Series,
-    UInt64Index,
 )
 import pandas._testing as tm
+from pandas.core.indexes.api import (
+    Float64Index,
+    Int64Index,
+    UInt64Index,
+)
 from pandas.tests.indexes.common import NumericBase
 
 
diff --git a/pandas/tests/indexes/numeric/test_setops.py b/pandas/tests/indexes/numeric/test_setops.py
index 5a7db9858dbad..4045cc0b91313 100644
--- a/pandas/tests/indexes/numeric/test_setops.py
+++ b/pandas/tests/indexes/numeric/test_setops.py
@@ -6,14 +6,14 @@
 import numpy as np
 import pytest
 
-from pandas import (
+import pandas._testing as tm
+from pandas.core.indexes.api import (
     Float64Index,
     Index,
     Int64Index,
     RangeIndex,
     UInt64Index,
 )
-import pandas._testing as tm
 
 
 @pytest.fixture
diff --git a/pandas/tests/indexes/period/methods/test_astype.py b/pandas/tests/indexes/period/methods/test_astype.py
index 74f627478a29c..e2340a2db02f7 100644
--- a/pandas/tests/indexes/period/methods/test_astype.py
+++ b/pandas/tests/indexes/period/methods/test_astype.py
@@ -5,15 +5,17 @@
     CategoricalIndex,
     DatetimeIndex,
     Index,
-    Int64Index,
     NaT,
     Period,
     PeriodIndex,
     Timedelta,
-    UInt64Index,
     period_range,
 )
 import pandas._testing as tm
+from pandas.core.indexes.api import (
+    Int64Index,
+    UInt64Index,
+)
 
 
 class TestPeriodIndexAsType:
diff --git a/pandas/tests/indexes/ranges/test_join.py b/pandas/tests/indexes/ranges/test_join.py
index 6668a7c6a3d02..353605da91f94 100644
--- a/pandas/tests/indexes/ranges/test_join.py
+++ b/pandas/tests/indexes/ranges/test_join.py
@@ -2,10 +2,10 @@
 
 from pandas import (
     Index,
-    Int64Index,
     RangeIndex,
 )
 import pandas._testing as tm
+from pandas.core.indexes.api import Int64Index
 
 
 class TestJoin:
diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py
index 1b98f3c8194b5..7dcdb627b9abb 100644
--- a/pandas/tests/indexes/ranges/test_range.py
+++ b/pandas/tests/indexes/ranges/test_range.py
@@ -4,13 +4,13 @@
 from pandas.core.dtypes.common import ensure_platform_int
 
 import pandas as pd
-from pandas import (
+import pandas._testing as tm
+from pandas.core.indexes.api import (
     Float64Index,
     Index,
     Int64Index,
     RangeIndex,
 )
-import pandas._testing as tm
 from pandas.tests.indexes.common import NumericBase
 
 # aliases to make some tests easier to read
diff --git a/pandas/tests/indexes/ranges/test_setops.py b/pandas/tests/indexes/ranges/test_setops.py
index ba938f82e9d89..6dc47b7fef5ac 100644
--- a/pandas/tests/indexes/ranges/test_setops.py
+++ b/pandas/tests/indexes/ranges/test_setops.py
@@ -6,13 +6,13 @@
 import numpy as np
 import pytest
 
-from pandas import (
+import pandas._testing as tm
+from pandas.core.indexes.api import (
     Index,
     Int64Index,
     RangeIndex,
     UInt64Index,
 )
-import pandas._testing as tm
 
 
 class TestRangeIndexSetOps:
diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py
index 9672929ecc06b..8ceef8186e4ea 100644
--- a/pandas/tests/indexes/timedeltas/test_timedelta.py
+++ b/pandas/tests/indexes/timedeltas/test_timedelta.py
@@ -6,7 +6,6 @@
 import pandas as pd
 from pandas import (
     Index,
-    Int64Index,
     NaT,
     Series,
     Timedelta,
@@ -14,6 +13,7 @@
     timedelta_range,
 )
 import pandas._testing as tm
+from pandas.core.indexes.api import Int64Index
 from pandas.tests.indexes.datetimelike import DatetimeLike
 
 randn = np.random.randn
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
index 172301a2fde84..7b2713ad274c6 100644
--- a/pandas/tests/indexing/test_partial.py
+++ b/pandas/tests/indexing/test_partial.py
@@ -540,9 +540,9 @@ def test_partial_set_empty_frame_empty_consistencies(self):
                 date_range(start="2000", periods=20, freq="D"),
                 ["2000-01-04", "2000-01-08", "2000-01-12"],
                 [
-                    Timestamp("2000-01-04", freq="D"),
-                    Timestamp("2000-01-08", freq="D"),
-                    Timestamp("2000-01-12", freq="D"),
+                    Timestamp("2000-01-04"),
+                    Timestamp("2000-01-08"),
+                    Timestamp("2000-01-12"),
                 ],
             ),
             (

From 505ed3f343e0bf4b66afae05494bbaf434f7b927 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 8 Oct 2021 05:32:56 -0700
Subject: [PATCH 33/41] TST/REF: fixturize (#43918)

---
 .../indexes/categorical/test_indexing.py      |  5 ++-
 pandas/tests/indexes/common.py                |  5 ++-
 pandas/tests/indexes/conftest.py              | 24 +++++++++++
 .../tests/indexes/datetimes/test_datetime.py  | 40 -------------------
 .../tests/indexes/datetimes/test_indexing.py  | 26 +++++++++++-
 .../indexes/datetimes/test_partial_slicing.py | 17 ++++++++
 pandas/tests/indexes/interval/test_base.py    | 10 ++---
 .../tests/indexes/interval/test_interval.py   |  7 ++--
 pandas/tests/indexes/multi/test_indexing.py   |  7 ++--
 pandas/tests/indexes/numeric/test_indexing.py |  7 ++--
 pandas/tests/indexes/period/test_indexing.py  |  7 ++--
 .../tests/indexes/period/test_searchsorted.py |  9 ++---
 .../indexes/timedeltas/test_searchsorted.py   |  9 ++---
 13 files changed, 94 insertions(+), 79 deletions(-)

diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py
index 5d89fd3bb4bc3..798aa7188cb9a 100644
--- a/pandas/tests/indexes/categorical/test_indexing.py
+++ b/pandas/tests/indexes/categorical/test_indexing.py
@@ -300,8 +300,9 @@ def test_get_indexer_same_categories_different_order(self):
 
 
 class TestWhere:
-    @pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
-    def test_where(self, klass):
+    def test_where(self, listlike_box_with_tuple):
+        klass = listlike_box_with_tuple
+
         i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
         cond = [True] * len(i)
         expected = i
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
index 50097ae3787b3..8357595fdaa40 100644
--- a/pandas/tests/indexes/common.py
+++ b/pandas/tests/indexes/common.py
@@ -378,8 +378,9 @@ def test_numpy_repeat(self, simple_index):
         with pytest.raises(ValueError, match=msg):
             np.repeat(idx, rep, axis=0)
 
-    @pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
-    def test_where(self, klass, simple_index):
+    def test_where(self, listlike_box_with_tuple, simple_index):
+        klass = listlike_box_with_tuple
+
         idx = simple_index
         if isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
             # where does not preserve freq
diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py
index ac4477e60d5dc..2eae51c62aa0d 100644
--- a/pandas/tests/indexes/conftest.py
+++ b/pandas/tests/indexes/conftest.py
@@ -1,5 +1,11 @@
+import numpy as np
 import pytest
 
+from pandas import (
+    Series,
+    array,
+)
+
 
 @pytest.fixture(params=[None, False])
 def sort(request):
@@ -25,3 +31,21 @@ def freq_sample(request):
     timedelta_range..
     """
     return request.param
+
+
+@pytest.fixture(params=[list, np.array, array, Series])
+def listlike_box(request):
+    """
+    Types that may be passed as the indexer to searchsorted.
+    """
+    return request.param
+
+
+# TODO: not clear if this _needs_ to be different from listlike_box or
+#  if that is just a historical artifact
+@pytest.fixture(params=[list, tuple, np.array, Series])
+def listlike_box_with_tuple(request):
+    """
+    Types that may be passed as the indexer to searchsorted.
+    """
+    return request.param
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index b220ce486f80b..5c85221c5a753 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -17,29 +17,6 @@
 
 
 class TestDatetimeIndex:
-    def test_time_loc(self):  # GH8667
-        from datetime import time
-
-        from pandas._libs.index import _SIZE_CUTOFF
-
-        ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64)
-        key = time(15, 11, 30)
-        start = key.hour * 3600 + key.minute * 60 + key.second
-        step = 24 * 3600
-
-        for n in ns:
-            idx = date_range("2014-11-26", periods=n, freq="S")
-            ts = pd.Series(np.random.randn(n), index=idx)
-            i = np.arange(start, n, step)
-
-            tm.assert_numpy_array_equal(ts.index.get_loc(key), i, check_dtype=False)
-            tm.assert_series_equal(ts[key], ts.iloc[i])
-
-            left, right = ts.copy(), ts.copy()
-            left[key] *= -10
-            right.iloc[i] *= -10
-            tm.assert_series_equal(left, right)
-
     def test_time_overflow_for_32bit_machines(self):
         # GH8943.  On some machines NumPy defaults to np.int32 (for example,
         # 32-bit Linux machines).  In the function _generate_regular_range
@@ -78,13 +55,6 @@ def test_week_of_month_frequency(self):
         expected = DatetimeIndex(dates, freq="WOM-1SAT")
         tm.assert_index_equal(result, expected)
 
-    def test_stringified_slice_with_tz(self):
-        # GH#2658
-        start = "2013-01-07"
-        idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern")
-        df = DataFrame(np.arange(10), index=idx)
-        df["2013-01-14 23:44:34.437768-05:00":]  # no exception here
-
     def test_append_nondatetimeindex(self):
         rng = date_range("1/1/2000", periods=10)
         idx = Index(["a", "b", "c", "d"])
@@ -137,16 +107,6 @@ def test_misc_coverage(self):
         result = rng.groupby(rng.day)
         assert isinstance(list(result.values())[0][0], Timestamp)
 
-    def test_string_index_series_name_converted(self):
-        # #1644
-        df = DataFrame(np.random.randn(10, 4), index=date_range("1/1/2000", periods=10))
-
-        result = df.loc["1/3/2000"]
-        assert result.name == df.index[2]
-
-        result = df.T["1/3/2000"]
-        assert result.name == df.index[2]
-
     def test_groupby_function_tuple_1677(self):
         df = DataFrame(np.random.rand(100), index=date_range("1/1/2000", periods=100))
         monthly_group = df.groupby(lambda x: (x.year, x.month))
diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py
index 4ad85f7d4e30f..c3152b77d39df 100644
--- a/pandas/tests/indexes/datetimes/test_indexing.py
+++ b/pandas/tests/indexes/datetimes/test_indexing.py
@@ -507,6 +507,30 @@ def test_get_loc_time_obj(self):
             with tm.assert_produces_warning(FutureWarning, match="deprecated"):
                 idx.get_loc(time(12, 30), method="pad")
 
+    def test_get_loc_time_obj2(self):
+        # GH#8667
+
+        from pandas._libs.index import _SIZE_CUTOFF
+
+        ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64)
+        key = time(15, 11, 30)
+        start = key.hour * 3600 + key.minute * 60 + key.second
+        step = 24 * 3600
+
+        for n in ns:
+            idx = date_range("2014-11-26", periods=n, freq="S")
+            ts = pd.Series(np.random.randn(n), index=idx)
+            locs = np.arange(start, n, step, dtype=np.intp)
+
+            result = ts.index.get_loc(key)
+            tm.assert_numpy_array_equal(result, locs)
+            tm.assert_series_equal(ts[key], ts.iloc[locs])
+
+            left, right = ts.copy(), ts.copy()
+            left[key] *= -10
+            right.iloc[locs] *= -10
+            tm.assert_series_equal(left, right)
+
     def test_get_loc_time_nat(self):
         # GH#35114
         # Case where key's total microseconds happens to match iNaT % 1e6 // 1000
@@ -705,7 +729,7 @@ def test_maybe_cast_slice_duplicate_monotonic(self):
         assert result == expected
 
 
-class TestDatetimeIndex:
+class TestGetValue:
     def test_get_value(self):
         # specifically make sure we have test for np.datetime64 key
         dti = date_range("2016-01-01", periods=3)
diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py
index c5b47053471eb..896c43db5e356 100644
--- a/pandas/tests/indexes/datetimes/test_partial_slicing.py
+++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py
@@ -19,6 +19,23 @@
 
 
 class TestSlicing:
+    def test_string_index_series_name_converted(self):
+        # GH#1644
+        df = DataFrame(np.random.randn(10, 4), index=date_range("1/1/2000", periods=10))
+
+        result = df.loc["1/3/2000"]
+        assert result.name == df.index[2]
+
+        result = df.T["1/3/2000"]
+        assert result.name == df.index[2]
+
+    def test_stringified_slice_with_tz(self):
+        # GH#2658
+        start = "2013-01-07"
+        idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern")
+        df = DataFrame(np.arange(10), index=idx)
+        df["2013-01-14 23:44:34.437768-05:00":]  # no exception here
+
     def test_return_type_doesnt_depend_on_monotonicity(self):
         # GH#24892 we get Series back regardless of whether our DTI is monotonic
         dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3)
diff --git a/pandas/tests/indexes/interval/test_base.py b/pandas/tests/indexes/interval/test_base.py
index aa88bca2faec9..411e76ca5d8b7 100644
--- a/pandas/tests/indexes/interval/test_base.py
+++ b/pandas/tests/indexes/interval/test_base.py
@@ -1,10 +1,7 @@
 import numpy as np
 import pytest
 
-from pandas import (
-    IntervalIndex,
-    Series,
-)
+from pandas import IntervalIndex
 import pandas._testing as tm
 from pandas.tests.indexes.common import Base
 
@@ -46,8 +43,9 @@ def test_take(self, closed):
         expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed)
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
-    def test_where(self, simple_index, klass):
+    def test_where(self, simple_index, listlike_box_with_tuple):
+        klass = listlike_box_with_tuple
+
         idx = simple_index
         cond = [True] * len(idx)
         expected = idx
diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py
index ce8323199ce62..321d1aa34b9af 100644
--- a/pandas/tests/indexes/interval/test_interval.py
+++ b/pandas/tests/indexes/interval/test_interval.py
@@ -934,15 +934,14 @@ def test_dir():
     assert "str" not in result
 
 
-@pytest.mark.parametrize("klass", [list, np.array, pd.array, pd.Series])
-def test_searchsorted_different_argument_classes(klass):
+def test_searchsorted_different_argument_classes(listlike_box):
     # https://github.com/pandas-dev/pandas/issues/32762
     values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
-    result = values.searchsorted(klass(values))
+    result = values.searchsorted(listlike_box(values))
     expected = np.array([0, 1], dtype=result.dtype)
     tm.assert_numpy_array_equal(result, expected)
 
-    result = values._data.searchsorted(klass(values))
+    result = values._data.searchsorted(listlike_box(values))
     tm.assert_numpy_array_equal(result, expected)
 
 
diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py
index 405b41c829a2f..99322f474dd9e 100644
--- a/pandas/tests/indexes/multi/test_indexing.py
+++ b/pandas/tests/indexes/multi/test_indexing.py
@@ -720,13 +720,12 @@ def test_where(self):
         with pytest.raises(NotImplementedError, match=msg):
             i.where(True)
 
-    @pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
-    def test_where_array_like(self, klass):
-        i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
+    def test_where_array_like(self, listlike_box_with_tuple):
+        mi = MultiIndex.from_tuples([("A", 1), ("A", 2)])
         cond = [False, True]
         msg = r"\.where is not supported for MultiIndex operations"
         with pytest.raises(NotImplementedError, match=msg):
-            i.where(klass(cond))
+            mi.where(listlike_box_with_tuple(cond))
 
 
 class TestContains:
diff --git a/pandas/tests/indexes/numeric/test_indexing.py b/pandas/tests/indexes/numeric/test_indexing.py
index cb861aaab80f8..cc309beef92d6 100644
--- a/pandas/tests/indexes/numeric/test_indexing.py
+++ b/pandas/tests/indexes/numeric/test_indexing.py
@@ -397,15 +397,14 @@ class TestWhere:
             UInt64Index(np.arange(5, dtype="uint64")),
         ],
     )
-    @pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
-    def test_where(self, klass, index):
+    def test_where(self, listlike_box_with_tuple, index):
         cond = [True] * len(index)
         expected = index
-        result = index.where(klass(cond))
+        result = index.where(listlike_box_with_tuple(cond))
 
         cond = [False] + [True] * (len(index) - 1)
         expected = Float64Index([index._na_value] + index[1:].tolist())
-        result = index.where(klass(cond))
+        result = index.where(listlike_box_with_tuple(cond))
         tm.assert_index_equal(result, expected)
 
     def test_where_uint64(self):
diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py
index cef045766efcc..78afcf2fdc78a 100644
--- a/pandas/tests/indexes/period/test_indexing.py
+++ b/pandas/tests/indexes/period/test_indexing.py
@@ -602,17 +602,16 @@ def test_get_indexer2(self):
 
 
 class TestWhere:
-    @pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
-    def test_where(self, klass):
+    def test_where(self, listlike_box_with_tuple):
         i = period_range("20130101", periods=5, freq="D")
         cond = [True] * len(i)
         expected = i
-        result = i.where(klass(cond))
+        result = i.where(listlike_box_with_tuple(cond))
         tm.assert_index_equal(result, expected)
 
         cond = [False] + [True] * (len(i) - 1)
         expected = PeriodIndex([NaT] + i[1:].tolist(), freq="D")
-        result = i.where(klass(cond))
+        result = i.where(listlike_box_with_tuple(cond))
         tm.assert_index_equal(result, expected)
 
     def test_where_other(self):
diff --git a/pandas/tests/indexes/period/test_searchsorted.py b/pandas/tests/indexes/period/test_searchsorted.py
index 27e998284c189..b9863d1bb019a 100644
--- a/pandas/tests/indexes/period/test_searchsorted.py
+++ b/pandas/tests/indexes/period/test_searchsorted.py
@@ -7,8 +7,6 @@
     NaT,
     Period,
     PeriodIndex,
-    Series,
-    array,
 )
 import pandas._testing as tm
 
@@ -37,17 +35,16 @@ def test_searchsorted(self, freq):
         with pytest.raises(IncompatibleFrequency, match=msg):
             pidx.searchsorted(Period("2014-01-01", freq="5D"))
 
-    @pytest.mark.parametrize("klass", [list, np.array, array, Series])
-    def test_searchsorted_different_argument_classes(self, klass):
+    def test_searchsorted_different_argument_classes(self, listlike_box):
         pidx = PeriodIndex(
             ["2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05"],
             freq="D",
         )
-        result = pidx.searchsorted(klass(pidx))
+        result = pidx.searchsorted(listlike_box(pidx))
         expected = np.arange(len(pidx), dtype=result.dtype)
         tm.assert_numpy_array_equal(result, expected)
 
-        result = pidx._data.searchsorted(klass(pidx))
+        result = pidx._data.searchsorted(listlike_box(pidx))
         tm.assert_numpy_array_equal(result, expected)
 
     def test_searchsorted_invalid(self):
diff --git a/pandas/tests/indexes/timedeltas/test_searchsorted.py b/pandas/tests/indexes/timedeltas/test_searchsorted.py
index 8a48da91ef31d..710571ef38397 100644
--- a/pandas/tests/indexes/timedeltas/test_searchsorted.py
+++ b/pandas/tests/indexes/timedeltas/test_searchsorted.py
@@ -2,23 +2,20 @@
 import pytest
 
 from pandas import (
-    Series,
     TimedeltaIndex,
     Timestamp,
-    array,
 )
 import pandas._testing as tm
 
 
 class TestSearchSorted:
-    @pytest.mark.parametrize("klass", [list, np.array, array, Series])
-    def test_searchsorted_different_argument_classes(self, klass):
+    def test_searchsorted_different_argument_classes(self, listlike_box):
         idx = TimedeltaIndex(["1 day", "2 days", "3 days"])
-        result = idx.searchsorted(klass(idx))
+        result = idx.searchsorted(listlike_box(idx))
         expected = np.arange(len(idx), dtype=result.dtype)
         tm.assert_numpy_array_equal(result, expected)
 
-        result = idx._data.searchsorted(klass(idx))
+        result = idx._data.searchsorted(listlike_box(idx))
         tm.assert_numpy_array_equal(result, expected)
 
     @pytest.mark.parametrize(

From 9ee956b764768ab6c7d33ad4e50ecfcea924d470 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 8 Oct 2021 17:17:40 -0700
Subject: [PATCH 34/41] BUG: NumericIndex.insert (#43933)

---
 pandas/core/indexes/base.py    |  7 ++++---
 pandas/tests/indexes/common.py | 19 +++++++++++++++++++
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index da953fe46ef1d..2ff9b3973a526 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -6329,10 +6329,11 @@ def insert(self, loc: int, item) -> Index:
 
         arr = np.asarray(self)
 
-        # Use Index constructor to ensure we get tuples cast correctly.
-        item = Index([item], dtype=self.dtype)._values
+        # Use constructor to ensure we get tuples cast correctly.
+        # Use self._constructor instead of Index to retain NumericIndex GH#43921
+        item = self._constructor([item], dtype=self.dtype)._values
         idx = np.concatenate((arr[:loc], item, arr[loc:]))
-        return Index._with_infer(idx, name=self.name)
+        return self._constructor._with_infer(idx, name=self.name)
 
     def drop(self, labels, errors: str_t = "raise") -> Index:
         """
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
index 8357595fdaa40..7e43664c6b3de 100644
--- a/pandas/tests/indexes/common.py
+++ b/pandas/tests/indexes/common.py
@@ -793,6 +793,20 @@ def test_format(self, simple_index):
     def test_numeric_compat(self):
         pass  # override Base method
 
+    def test_insert_non_na(self, simple_index):
+        # GH#43921 inserting an element that we know we can hold should
+        #  not change dtype or type (except for RangeIndex)
+        index = simple_index
+
+        result = index.insert(0, index[0])
+
+        cls = type(index)
+        if cls is RangeIndex:
+            cls = Int64Index
+
+        expected = cls([index[0]] + list(index), dtype=index.dtype)
+        tm.assert_index_equal(result, expected)
+
     def test_insert_na(self, nulls_fixture, simple_index):
         # GH 18295 (test missing)
         index = simple_index
@@ -800,6 +814,11 @@ def test_insert_na(self, nulls_fixture, simple_index):
 
         if na_val is pd.NaT:
             expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object)
+        elif type(index) is NumericIndex and index.dtype.kind == "f":
+            # GH#43921
+            expected = NumericIndex(
+                [index[0], np.nan] + list(index[1:]), dtype=index.dtype
+            )
         else:
             expected = Float64Index([index[0], np.nan] + list(index[1:]))
 

From 1e370aa138c6e0186cd959fe6816187ac9698a3b Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Fri, 8 Oct 2021 17:18:23 -0700
Subject: [PATCH 35/41] TST: Skip leaky test on Python 3.10 (#43910)

---
 .github/workflows/python-dev.yml                   | 2 --
 pandas/tests/io/parser/common/test_common_basic.py | 6 ++++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-dev.yml b/.github/workflows/python-dev.yml
index 3a139936fbd22..b32b18b86e9df 100644
--- a/.github/workflows/python-dev.yml
+++ b/.github/workflows/python-dev.yml
@@ -73,8 +73,6 @@ jobs:
       shell: bash
       run: |
         ci/run_tests.sh
-      # GH 41935
-      continue-on-error: true
 
     - name: Publish test results
       uses: actions/upload-artifact@master
diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
index ff54a378806fa..6d958f46a49dd 100644
--- a/pandas/tests/io/parser/common/test_common_basic.py
+++ b/pandas/tests/io/parser/common/test_common_basic.py
@@ -12,6 +12,7 @@
 import numpy as np
 import pytest
 
+from pandas.compat import PY310
 from pandas.errors import (
     EmptyDataError,
     ParserError,
@@ -674,6 +675,11 @@ def test_read_table_equivalency_to_read_csv(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.skipif(
+    PY310,
+    reason="GH41935 This test is leaking only on Python 3.10,"
+    "causing other tests to fail with a cryptic error.",
+)
 @pytest.mark.parametrize("read_func", ["read_csv", "read_table"])
 def test_read_csv_and_table_sys_setprofile(all_parsers, read_func):
     # GH#41069

From acb76509cfd87f23807bf408f21b217c4f2d3687 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 8 Oct 2021 17:19:13 -0700
Subject: [PATCH 36/41] ENH: EA.tolist (#43920)

---
 doc/source/reference/extensions.rst      |  1 +
 pandas/core/arrays/base.py               | 17 +++++++++++++++++
 pandas/core/arrays/categorical.py        | 13 +++----------
 pandas/core/base.py                      |  3 ---
 pandas/tests/extension/base/dim2.py      | 11 +++++++++++
 pandas/tests/extension/base/interface.py |  6 ++++++
 6 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst
index 7b451ed3bf296..e2e8c94ef8fc6 100644
--- a/doc/source/reference/extensions.rst
+++ b/doc/source/reference/extensions.rst
@@ -60,6 +60,7 @@ objects.
       api.extensions.ExtensionArray.nbytes
       api.extensions.ExtensionArray.ndim
       api.extensions.ExtensionArray.shape
+      api.extensions.ExtensionArray.tolist
 
 Additionally, we have some utility methods for ensuring your object
 behaves correctly.
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 46b0a6873986e..99c4944a1cfa7 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -130,6 +130,7 @@ class ExtensionArray:
     searchsorted
     shift
     take
+    tolist
     unique
     view
     _concat_same_type
@@ -1348,6 +1349,22 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
     # ------------------------------------------------------------------------
     # Non-Optimized Default Methods
 
+    def tolist(self) -> list:
+        """
+        Return a list of the values.
+
+        These are each a scalar type, which is a Python scalar
+        (for str, int, float) or a pandas scalar
+        (for Timestamp/Timedelta/Interval/Period)
+
+        Returns
+        -------
+        list
+        """
+        if self.ndim > 1:
+            return [x.tolist() for x in self]
+        return list(self)
+
     def delete(self: ExtensionArrayT, loc: PositionalIndexer) -> ExtensionArrayT:
         indexer = np.delete(np.arange(len(self)), loc)
         return self.take(indexer)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index c0fc172139149..7e3bf33f411bb 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -40,7 +40,6 @@
     Ordered,
     PositionalIndexer2D,
     PositionalIndexerTuple,
-    Scalar,
     ScalarIndexer,
     SequenceIndexer,
     Shape,
@@ -566,17 +565,11 @@ def itemsize(self) -> int:
         """
         return self.categories.itemsize
 
-    def tolist(self) -> list[Scalar]:
+    def to_list(self):
         """
-        Return a list of the values.
-
-        These are each a scalar type, which is a Python scalar
-        (for str, int, float) or a pandas scalar
-        (for Timestamp/Timedelta/Interval/Period)
+        Alias for tolist.
         """
-        return list(self)
-
-    to_list = tolist
+        return self.tolist()
 
     @classmethod
     def _from_inferred_categories(
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 24fa362eea9c3..a1bf448df18c4 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -740,9 +740,6 @@ def tolist(self):
         numpy.ndarray.tolist : Return the array as an a.ndim-levels deep
             nested list of Python scalars.
         """
-        if not isinstance(self._values, np.ndarray):
-            # check for ndarray instead of dtype to catch DTA/TDA
-            return list(self._values)
         return self._values.tolist()
 
     to_list = tolist
diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py
index b56ec23c63569..b80d2a3586b3b 100644
--- a/pandas/tests/extension/base/dim2.py
+++ b/pandas/tests/extension/base/dim2.py
@@ -97,6 +97,17 @@ def test_iter_2d(self, data):
             assert obj.ndim == 1
             assert len(obj) == arr2d.shape[1]
 
+    def test_tolist_2d(self, data):
+        arr2d = data.reshape(1, -1)
+
+        result = arr2d.tolist()
+        expected = [data.tolist()]
+
+        assert isinstance(result, list)
+        assert all(isinstance(x, list) for x in result)
+
+        assert result == expected
+
     def test_concat_2d(self, data):
         left = data.reshape(-1, 1)
         right = left.copy()
diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py
index f51f9f732bace..3e8a754c8c527 100644
--- a/pandas/tests/extension/base/interface.py
+++ b/pandas/tests/extension/base/interface.py
@@ -119,3 +119,9 @@ def test_view(self, data):
 
         # check specifically that the `dtype` kwarg is accepted
         data.view(dtype=None)
+
+    def test_tolist(self, data):
+        result = data.tolist()
+        expected = list(data)
+        assert isinstance(result, list)
+        assert result == expected

From e12643ea788a9bf23c323580c8dde1557287515b Mon Sep 17 00:00:00 2001
From: rosagold <meinschallundrauch@gmail.com>
Date: Sat, 9 Oct 2021 03:10:01 +0200
Subject: [PATCH 37/41] fixed rolling for a decreasing index, added a test for
 that (#43928)

---
 doc/source/whatsnew/v1.4.0.rst         |  1 +
 pandas/_libs/window/indexers.pyx       |  6 +++--
 pandas/tests/apply/test_frame_apply.py |  2 --
 pandas/tests/window/test_rolling.py    | 33 ++++++++++++++++++++++++++
 4 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 722d0dcc10041..e638a24f830ef 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -502,6 +502,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.groupby.rolling` when specifying ``on`` and calling ``__getitem__`` would subsequently return incorrect results (:issue:`43355`)
 - Bug in :meth:`GroupBy.apply` with time-based :class:`Grouper` objects incorrectly raising ``ValueError`` in corner cases where the grouping vector contains a ``NaT`` (:issue:`43500`, :issue:`43515`)
 - Bug in :meth:`GroupBy.mean` failing with ``complex`` dtype (:issue:`43701`)
+- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when ``center=True`` and index is decreasing (:issue:`43927`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx
index 197345b3ce6ac..3782b55bd19b3 100644
--- a/pandas/_libs/window/indexers.pyx
+++ b/pandas/_libs/window/indexers.pyx
@@ -81,9 +81,11 @@ def calculate_variable_window_bounds(
     if center:
         end_bound = index[0] + index_growth_sign * window_size / 2
         for j in range(0, num_values):
-            if (index[j] < end_bound) or (index[j] == end_bound and right_closed):
+            if (index[j] - end_bound) * index_growth_sign < 0:
                 end[0] = j + 1
-            elif index[j] >= end_bound:
+            elif (index[j] - end_bound) * index_growth_sign == 0 and right_closed:
+                end[0] = j + 1
+            elif (index[j] - end_bound) * index_growth_sign >= 0:
                 end[0] = j
                 break
 
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index b51b66a0adda0..f1a93714b4c62 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -1133,8 +1133,6 @@ def test_agg_multiple_mixed_no_warning():
     else:
         expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"])
     tm.assert_frame_equal(result, expected)
-    if get_option("future_udf_behavior"):
-        assert False, "Yay!"
 
 
 def test_agg_reduce(axis, float_frame):
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
index 1631c9f0e2ffd..d88ce2ccb54cc 100644
--- a/pandas/tests/window/test_rolling.py
+++ b/pandas/tests/window/test_rolling.py
@@ -1210,6 +1210,39 @@ def test_rolling_decreasing_indices(method):
     assert np.abs(decreasing.values[::-1][:-4] - increasing.values[4:]).max() < 1e-12
 
 
+@pytest.mark.parametrize(
+    "window,closed,expected",
+    [
+        ("2s", "right", [1.0, 3.0, 5.0, 3.0]),
+        ("2s", "left", [0.0, 1.0, 3.0, 5.0]),
+        ("2s", "both", [1.0, 3.0, 6.0, 5.0]),
+        ("2s", "neither", [0.0, 1.0, 2.0, 3.0]),
+        ("3s", "right", [1.0, 3.0, 6.0, 5.0]),
+        ("3s", "left", [1.0, 3.0, 6.0, 5.0]),
+        ("3s", "both", [1.0, 3.0, 6.0, 5.0]),
+        ("3s", "neither", [1.0, 3.0, 6.0, 5.0]),
+    ],
+)
+def test_rolling_decreasing_indices_centered(window, closed, expected, frame_or_series):
+    """
+    Ensure that a symmetrical inverted index return same result as non-inverted.
+    """
+    #  GH 43927
+
+    index = date_range("2020", periods=4, freq="1s")
+    df_inc = frame_or_series(range(4), index=index)
+    df_dec = frame_or_series(range(4), index=index[::-1])
+
+    expected_inc = frame_or_series(expected, index=index)
+    expected_dec = frame_or_series(expected, index=index[::-1])
+
+    result_inc = df_inc.rolling(window, closed=closed, center=True).sum()
+    result_dec = df_dec.rolling(window, closed=closed, center=True).sum()
+
+    tm.assert_equal(result_inc, expected_inc)
+    tm.assert_equal(result_dec, expected_dec)
+
+
 @pytest.mark.parametrize(
     "method,expected",
     [

From 3bba3719f057586ab400a6d3d556d16b49d5c340 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 18 Oct 2021 16:13:11 -0400
Subject: [PATCH 38/41] Added docs

---
 doc/source/user_guide/future_udf_behavior.rst | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 doc/source/user_guide/future_udf_behavior.rst

diff --git a/doc/source/user_guide/future_udf_behavior.rst b/doc/source/user_guide/future_udf_behavior.rst
new file mode 100644
index 0000000000000..294133846bca9
--- /dev/null
+++ b/doc/source/user_guide/future_udf_behavior.rst
@@ -0,0 +1,71 @@
+.. _future_udf_behavior:
+:orphan:
+
+{{ header }}
+
+*******************
+Future UDF Behavior
+*******************
+
+pandas is experimenting with improving the behavior of methods that take a
+user-defined function (UDF). These methods include ``.apply``, ``.agg``, ``.transform``,
+and ``.filter``. The goal is to make these methods behave in a more predictable
+and consistent manner, reducing the complexity of their implementation, and improving
+performance where possible. This page details the differences between the old and
+new behaviors, as well as providing some context behind each change that is being made.
+
+There are a great number of changes that are planned. In order to transition in a
+reasonable manner for users, all changes are behind an experimental "future_udf_behavior"
+option. This is currently experimental and subject to breaking changes without notice.
+Users can opt into the new behavior and provide feedback. Once the improvements have
+been made, this option will be declared no longer experimental. pandas will then raise
+a ``FutureWarning`` that the default value of this option will be set to ``True`` in
+a future version. Once the default is ``True``, users can still override it to ``False``.
+After a sufficient amount of time, pandas will remove this option altogether and only
+the future behavior will remain.
+
+``DataFrame.agg`` with list-likes
+---------------------------------
+
+Previously, using ``DataFrame.agg`` with a list-like argument would transpose the result when
+compared with just providing a single aggregation function.
+
+.. ipython:: python
+
+   df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]})
+
+   df.agg('sum')
+   df.agg(['sum'])
+
+This transpose no longer occurs, making the result more consistent.
+
+.. ipython:: python
+
+   with pd.option_context('future_udf_behavior', True):
+       result = df.agg(['sum'])
+   result
+
+   with pd.option_context('future_udf_behavior', True):
+       result = df.agg(['sum', 'mean'])
+   result
+
+``DataFrame.groupby(...).agg`` with list-likes
+----------------------------------------------
+
+Previously, using ``DataFrame.groupby(...).agg`` with a list-like argument would put the
+columns as the first level of the resulting hierarchical columns. The result is
+that the columns for each aggregation function are separated, inconsistent with the result
+for a single aggregator.
+
+.. ipython:: python
+
+   df.groupby("a").agg('sum')
+   df.groupby("a").agg(["sum", "min"])
+
+Now the levels are swapped, so that the columns for each aggregation are together.
+
+.. ipython:: python
+
+   with pd.option_context('future_udf_behavior', True):
+       result = df.groupby("a").agg(["sum", "min"])
+   result

From 7abdff9caec9e03fde8d0341aa5857a41701cd4c Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 18 Oct 2021 16:14:37 -0400
Subject: [PATCH 39/41] Make quotes consistent

---
 doc/source/user_guide/future_udf_behavior.rst | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/doc/source/user_guide/future_udf_behavior.rst b/doc/source/user_guide/future_udf_behavior.rst
index 294133846bca9..5ac78b19594ed 100644
--- a/doc/source/user_guide/future_udf_behavior.rst
+++ b/doc/source/user_guide/future_udf_behavior.rst
@@ -32,21 +32,21 @@ compared with just providing a single aggregation function.
 
 .. ipython:: python
 
-   df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]})
+   df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
 
-   df.agg('sum')
-   df.agg(['sum'])
+   df.agg("sum")
+   df.agg(["sum"])
 
 This transpose no longer occurs, making the result more consistent.
 
 .. ipython:: python
 
-   with pd.option_context('future_udf_behavior', True):
-       result = df.agg(['sum'])
+   with pd.option_context("future_udf_behavior", True):
+       result = df.agg(["sum"])
    result
 
-   with pd.option_context('future_udf_behavior', True):
-       result = df.agg(['sum', 'mean'])
+   with pd.option_context("future_udf_behavior", True):
+       result = df.agg(["sum", "mean"])
    result
 
 ``DataFrame.groupby(...).agg`` with list-likes
@@ -59,13 +59,13 @@ for a single aggregator.
 
 .. ipython:: python
 
-   df.groupby("a").agg('sum')
+   df.groupby("a").agg("sum")
    df.groupby("a").agg(["sum", "min"])
 
 Now the levels are swapped, so that the columns for each aggregation are together.
 
 .. ipython:: python
 
-   with pd.option_context('future_udf_behavior', True):
+   with pd.option_context("future_udf_behavior", True):
        result = df.groupby("a").agg(["sum", "min"])
    result

From a72a5eb3a68229b6b530133e0258c12aa1a248f3 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 18 Oct 2021 23:42:13 -0400
Subject: [PATCH 40/41] Fixup docs

---
 doc/source/user_guide/future_udf_behavior.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/user_guide/future_udf_behavior.rst b/doc/source/user_guide/future_udf_behavior.rst
index 5ac78b19594ed..8871f767c9cb5 100644
--- a/doc/source/user_guide/future_udf_behavior.rst
+++ b/doc/source/user_guide/future_udf_behavior.rst
@@ -1,4 +1,5 @@
 .. _future_udf_behavior:
+
 :orphan:
 
 {{ header }}

From afc27ba4c379807358e7ec75d6ec30e8d954cb44 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 7 Nov 2021 17:12:37 -0500
Subject: [PATCH 41/41] Merge cleanup

---
 pandas/core/frame.py                          |  5 ++--
 pandas/tests/apply/test_frame_apply.py        | 26 +++++++++++--------
 .../tests/groupby/aggregate/test_aggregate.py |  5 ++--
 pandas/tests/groupby/aggregate/test_other.py  |  8 ++++--
 pandas/tests/groupby/test_groupby.py          |  3 ++-
 pandas/tests/resample/test_resample_api.py    |  5 +++-
 6 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ebf3428020652..3f46669c61683 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -83,6 +83,7 @@
     doc,
     rewrite_axis_style_signature,
 )
+from pandas.util._exceptions import find_stack_level
 from pandas.util._validators import (
     validate_ascending,
     validate_axis_style_args,
@@ -10016,7 +10017,7 @@ def _get_data() -> DataFrame:
                     "version this will raise TypeError.  Select only valid "
                     "columns before calling the reduction.",
                     FutureWarning,
-                    stacklevel=5,
+                    stacklevel=find_stack_level(),
                 )
 
             return out
@@ -10049,7 +10050,7 @@ def _get_data() -> DataFrame:
                 "version this will raise TypeError.  Select only valid "
                 "columns before calling the reduction.",
                 FutureWarning,
-                stacklevel=5,
+                stacklevel=find_stack_level(),
             )
 
         if hasattr(result, "dtype"):
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 2931be660ea6e..a12ca64a7a0eb 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -1112,21 +1112,23 @@ def test_agg_multiple_mixed_no_warning():
         },
         index=["min", "sum"],
     )
-    klass, match = None, None
     if get_option("future_udf_behavior"):
         expected = expected.T
-        klass, match = FutureWarning, "Dropping of nuisance columns"
+        match = "Dropping of nuisance columns"
+    else:
+        match = "did not aggregate successfully"
     # sorted index
-    with tm.assert_produces_warning(klass, match=match, check_stacklevel=False):
+    with tm.assert_produces_warning(FutureWarning, match=match):
         result = mdf.agg(["min", "sum"])
 
     tm.assert_frame_equal(result, expected)
 
-    klass, match = None, None
     if get_option("future_udf_behavior"):
-        klass, match = FutureWarning, "Dropping of nuisance columns"
+        match = "Dropping of nuisance columns"
+    else:
+        match = "did not aggregate successfully"
 
-    with tm.assert_produces_warning(klass, match=match, check_stacklevel=False):
+    with tm.assert_produces_warning(FutureWarning, match=match, check_stacklevel=False):
         result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"])
 
     # GH40420: the result of .agg should have an index that is sorted
@@ -1242,10 +1244,11 @@ def test_nuiscance_columns():
     expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"])
     tm.assert_series_equal(result, expected)
 
-    warn = FutureWarning if get_option("future_udf_behavior") else None
-    with tm.assert_produces_warning(
-        warn, match="Select only valid", check_stacklevel=False
-    ):
+    if get_option("future_udf_behavior"):
+        match = "Select only valid"
+    else:
+        match = "did not aggregate successfully"
+    with tm.assert_produces_warning(FutureWarning, match=match):
         result = df.agg(["sum"])
     expected = DataFrame(
         [[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"]
@@ -1492,8 +1495,9 @@ def foo(s):
         return s.sum() / 2
 
     aggs = ["sum", foo, "count", "min"]
+    klass = None if get_option("future_udf_behavior") else FutureWarning
     with tm.assert_produces_warning(
-        FutureWarning, match=r"\['item'\] did not aggregate successfully"
+        klass, match=r"\['item'\] did not aggregate successfully"
     ):
         result = df.agg(aggs)
     if get_option("future_udf_behavior"):
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 18e17fd70216a..095c3fbaf10fb 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -390,12 +390,13 @@ def test_multiple_functions_tuples_and_non_tuples(df):
     expected = df.groupby("A")["C"].agg(ex_funcs)
     tm.assert_frame_equal(result, expected)
 
+    klass = None if get_option("future_udf_behavior") else FutureWarning
     with tm.assert_produces_warning(
-        FutureWarning, match=r"\['B'\] did not aggregate successfully"
+        klass, match=r"\['B'\] did not aggregate successfully"
     ):
         result = df.groupby("A").agg(funcs)
     with tm.assert_produces_warning(
-        FutureWarning, match=r"\['B'\] did not aggregate successfully"
+        klass, match=r"\['B'\] did not aggregate successfully"
     ):
         expected = df.groupby("A").agg(ex_funcs)
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 35ac60eeac45f..c79878d71f5ae 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -46,16 +46,20 @@ def test_agg_api():
     def peak_to_peak(arr):
         return arr.max() - arr.min()
 
+    if get_option("future_udf_behavior"):
+        msg = "Dropping invalid columns"
+    else:
+        msg = r"\['key2'\] did not aggregate successfully"
     with tm.assert_produces_warning(
         FutureWarning,
-        match=r"\['key2'\] did not aggregate successfully",
+        match=msg,
     ):
         expected = grouped.agg([peak_to_peak])
     expected.columns = ["data1", "data2"]
 
     with tm.assert_produces_warning(
         FutureWarning,
-        match=r"\['key2'\] did not aggregate successfully",
+        match=msg,
     ):
         result = grouped.agg(peak_to_peak)
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 6cb2d6484ca4e..2f1fc1efa26c7 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -589,8 +589,9 @@ def test_frame_multi_key_function_list():
 
     grouped = data.groupby(["A", "B"])
     funcs = [np.mean, np.std]
+    klass = None if get_option("future_udf_behavior") else FutureWarning
     with tm.assert_produces_warning(
-        FutureWarning, match=r"\['C'\] did not aggregate successfully"
+        klass, match=r"\['C'\] did not aggregate successfully"
     ):
         agged = grouped.agg(funcs)
     if get_option("future_udf_behavior"):
diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
index 4be47eaa3c25d..476b29217a8c0 100644
--- a/pandas/tests/resample/test_resample_api.py
+++ b/pandas/tests/resample/test_resample_api.py
@@ -355,7 +355,10 @@ def test_agg():
         expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
         expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
     for t in cases:
-        warn = FutureWarning if t in cases[1:3] else None
+        if t in cases[1:3] and not get_option("future_udf_behavior"):
+            warn = FutureWarning
+        else:
+            warn = None
         with tm.assert_produces_warning(
             warn,
             match=r"\['date'\] did not aggregate successfully",