From 3b265b0c4fc8b529d224c82875d03a0e0165cf73 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 24 Feb 2021 07:51:08 -0800
Subject: [PATCH 01/10] BUG: raise on RangeIndex.array

---
 pandas/_testing/__init__.py             | 14 ++++++++++++--
 pandas/core/indexes/range.py            | 22 +++++++++++++++++++---
 pandas/core/reshape/merge.py            | 12 +++++++++---
 pandas/core/series.py                   | 17 ++++++++++++++---
 pandas/core/sorting.py                  |  9 ++++++---
 pandas/tests/arithmetic/test_numeric.py |  1 +
 6 files changed, 61 insertions(+), 14 deletions(-)

diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 0b2be53131af6..75df3c58f9bdf 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -99,7 +99,13 @@
     use_numexpr,
     with_csv_dialect,
 )
-from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray, period_array
+from pandas.core.arrays import (
+    DatetimeArray,
+    PandasArray,
+    PeriodArray,
+    TimedeltaArray,
+    period_array,
+)
 
 if TYPE_CHECKING:
     from pandas import PeriodIndex, TimedeltaIndex
@@ -197,7 +203,11 @@ def box_expected(expected, box_cls, transpose=True):
     subclass of box_cls
     """
     if box_cls is pd.array:
-        expected = pd.array(expected)
+        if isinstance(expected, pd.RangeIndex):
+            # pd.array would return an IntegerArray
+            expected = PandasArray(expected._values)
+        else:
+            expected = pd.array(expected)
     elif box_cls is pd.Index:
         expected = pd.Index(expected)
     elif box_cls is pd.Series:
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index ee0b49aac3f79..3d283f5a4d0b0 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -171,6 +171,13 @@ def _data(self):
         """
         return np.arange(self.start, self.stop, self.step, dtype=np.int64)
 
+    @property
+    def array(self):
+        raise ValueError(
+            f"{type(self).__name__} has no single backing array. Use "
+            f"'{type(self).__name__}.to_numpy()' to get a NumPy array."
+        )
+
     @cache_readonly
     def _cached_int64index(self) -> Int64Index:
         return Int64Index._simple_new(self._data, name=self.name)
@@ -485,12 +492,17 @@ def argsort(self, *args, **kwargs) -> np.ndarray:
         --------
         numpy.ndarray.argsort
         """
+        ascending = kwargs.pop("ascending", True)  # EA compat
         nv.validate_argsort(args, kwargs)
 
         if self._range.step > 0:
-            return np.arange(len(self))
+            result = np.arange(len(self))
         else:
-            return np.arange(len(self) - 1, -1, -1)
+            result = np.arange(len(self) - 1, -1, -1)
+
+        if not ascending:
+            result = result[::-1]
+        return result
 
     def factorize(
         self, sort: bool = False, na_sentinel: Optional[int] = -1
@@ -870,7 +882,11 @@ def _arith_method(self, other, op):
         if op in [operator.mul, ops.rmul, operator.truediv, ops.rtruediv]:
             step = op
 
-        other = extract_array(other, extract_numpy=True)
+        if isinstance(other, RangeIndex):
+            # TODO: in some cases we can likely be more efficient, especially add/sub
+            other = other._values
+        else:
+            other = extract_array(other, extract_numpy=True)
         attrs = self._get_attributes_dict()
 
         left, right = self, other
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 8704d757c3289..127570bc5bf47 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -47,7 +47,7 @@
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 from pandas.core.dtypes.missing import isna, na_value_for_dtype
 
-from pandas import Categorical, Index, MultiIndex
+from pandas import Categorical, Index, MultiIndex, RangeIndex
 from pandas.core import groupby
 import pandas.core.algorithms as algos
 import pandas.core.common as com
@@ -2032,8 +2032,14 @@ def _factorize_keys(
     (array([0, 1, 2]), array([0, 1]), 3)
     """
     # Some pre-processing for non-ndarray lk / rk
-    lk = extract_array(lk, extract_numpy=True)
-    rk = extract_array(rk, extract_numpy=True)
+    if not isinstance(lk, RangeIndex):
+        lk = extract_array(lk, extract_numpy=True)
+    else:
+        lk = np.array(lk)  # TODO: more efficient option?
+    if not isinstance(rk, RangeIndex):
+        rk = extract_array(rk, extract_numpy=True)
+    else:
+        rk = np.array(rk)  # TODO: more efficient option?
 
     if is_datetime64tz_dtype(lk.dtype) and is_datetime64tz_dtype(rk.dtype):
         # Extract the ndarray (UTC-localized) values
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 7d97c9f6189f3..386d216ac9bdd 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -94,6 +94,7 @@
     Float64Index,
     Index,
     MultiIndex,
+    RangeIndex,
     ensure_index,
 )
 import pandas.core.indexes.base as ibase
@@ -4994,7 +4995,10 @@ def _cmp_method(self, other, op):
             raise ValueError("Can only compare identically-labeled Series objects")
 
         lvalues = extract_array(self, extract_numpy=True)
-        rvalues = extract_array(other, extract_numpy=True)
+        if isinstance(other, RangeIndex):
+            rvalues = other._values
+        else:
+            rvalues = extract_array(other, extract_numpy=True)
 
         res_values = ops.comparison_op(lvalues, rvalues, op)
 
@@ -5005,7 +5009,10 @@ def _logical_method(self, other, op):
         self, other = ops.align_method_SERIES(self, other, align_asobject=True)
 
         lvalues = extract_array(self, extract_numpy=True)
-        rvalues = extract_array(other, extract_numpy=True)
+        if isinstance(other, RangeIndex):
+            rvalues = other._values
+        else:
+            rvalues = extract_array(other, extract_numpy=True)
 
         res_values = ops.logical_op(lvalues, rvalues, op)
         return self._construct_result(res_values, name=res_name)
@@ -5015,7 +5022,11 @@ def _arith_method(self, other, op):
         self, other = ops.align_method_SERIES(self, other)
 
         lvalues = extract_array(self, extract_numpy=True)
-        rvalues = extract_array(other, extract_numpy=True)
+        if isinstance(other, RangeIndex):
+            rvalues = other._values
+        else:
+            rvalues = extract_array(other, extract_numpy=True)
+
         result = ops.arithmetic_op(lvalues, rvalues, op)
 
         return self._construct_result(result, name=res_name)
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index cfbabab491ae4..e722c2e434e0b 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -25,7 +25,7 @@
     ensure_platform_int,
     is_extension_array_dtype,
 )
-from pandas.core.dtypes.generic import ABCMultiIndex
+from pandas.core.dtypes.generic import ABCMultiIndex, ABCRangeIndex
 from pandas.core.dtypes.missing import isna
 
 import pandas.core.algorithms as algorithms
@@ -361,9 +361,12 @@ def nargsort(
             mask=mask,
         )
 
-    items = extract_array(items)
+    if isinstance(items, ABCRangeIndex):
+        return items.argsort(ascending=ascending)  # TODO: test coverage with key?
+    elif not isinstance(items, ABCMultiIndex):
+        items = extract_array(items)
     if mask is None:
-        mask = np.asarray(isna(items))
+        mask = np.asarray(isna(items))  # TODO: does this exclude MultiIndex too?
 
     if is_extension_array_dtype(items):
         return items.argsort(ascending=ascending, kind=kind, na_position=na_position)
diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
index f4f258b559939..44f214269a3fb 100644
--- a/pandas/tests/arithmetic/test_numeric.py
+++ b/pandas/tests/arithmetic/test_numeric.py
@@ -308,6 +308,7 @@ def test_add_sub_datetimelike_invalid(self, numeric_idx, other, box_with_array):
                 "Concatenation operation is not implemented for NumPy arrays",
                 # pd.array vs np.datetime64 case
                 r"operand type\(s\) all returned NotImplemented from __array_ufunc__",
+                "can only perform ops with numeric values",
             ]
         )
         with pytest.raises(TypeError, match=msg):

From bd50f1854323e6e6508e97e222b233f3c969f3ce Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 24 Feb 2021 13:35:01 -0800
Subject: [PATCH 02/10] make RangeIndex check part of extract_array

---
 pandas/core/construction.py  | 10 +++++++++-
 pandas/core/indexes/range.py |  7 ++-----
 pandas/core/reshape/merge.py | 12 +++---------
 pandas/core/series.py        | 16 +++-------------
 4 files changed, 17 insertions(+), 28 deletions(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index 0c0084f2492d3..9aa1c620fe1d9 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -60,6 +60,7 @@
     ABCExtensionArray,
     ABCIndex,
     ABCPandasArray,
+    ABCRangeIndex,
     ABCSeries,
 )
 from pandas.core.dtypes.missing import isna
@@ -368,7 +369,9 @@ def array(
     return PandasArray._from_sequence(data, dtype=dtype, copy=copy)
 
 
-def extract_array(obj: object, extract_numpy: bool = False) -> Union[Any, ArrayLike]:
+def extract_array(
+    obj: object, extract_numpy: bool = False, range_compat: bool = False
+) -> Union[Any, ArrayLike]:
     """
     Extract the ndarray or ExtensionArray from a Series or Index.
 
@@ -383,6 +386,9 @@ def extract_array(obj: object, extract_numpy: bool = False) -> Union[Any, ArrayL
     extract_numpy : bool, default False
         Whether to extract the ndarray from a PandasArray
 
+    range_compat : bool, default False
+        If we have a RangeIndex, return range._values if True, otherwise raise.
+
     Returns
     -------
     arr : object
@@ -411,6 +417,8 @@ def extract_array(obj: object, extract_numpy: bool = False) -> Union[Any, ArrayL
     array([1, 2, 3])
     """
     if isinstance(obj, (ABCIndex, ABCSeries)):
+        if range_compat and isinstance(obj, ABCRangeIndex):
+            return obj._values
         obj = obj.array
 
     if extract_numpy and isinstance(obj, ABCPandasArray):
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index f73707d36d478..3615d85273f99 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -896,11 +896,8 @@ def _arith_method(self, other, op):
         if op in [operator.mul, ops.rmul, operator.truediv, ops.rtruediv]:
             step = op
 
-        if isinstance(other, RangeIndex):
-            # TODO: in some cases we can likely be more efficient, especially add/sub
-            other = other._values
-        else:
-            other = extract_array(other, extract_numpy=True)
+        # TODO: if other is a RangeIndex we may have more efficient options
+        other = extract_array(other, extract_numpy=True, range_compat=True)
         attrs = self._get_attributes_dict()
 
         left, right = self, other
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 9ee3b0a21747f..ce06fc55ee8e6 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -70,7 +70,6 @@
     Categorical,
     Index,
     MultiIndex,
-    RangeIndex,
 )
 from pandas.core import groupby
 import pandas.core.algorithms as algos
@@ -2059,14 +2058,9 @@ def _factorize_keys(
     (array([0, 1, 2]), array([0, 1]), 3)
     """
     # Some pre-processing for non-ndarray lk / rk
-    if not isinstance(lk, RangeIndex):
-        lk = extract_array(lk, extract_numpy=True)
-    else:
-        lk = np.array(lk)  # TODO: more efficient option?
-    if not isinstance(rk, RangeIndex):
-        rk = extract_array(rk, extract_numpy=True)
-    else:
-        rk = np.array(rk)  # TODO: more efficient option?
+    lk = extract_array(lk, extract_numpy=True, range_compat=True)
+    rk = extract_array(rk, extract_numpy=True, range_compat=True)
+    # TODO: if either is a RangeIndex, we can likely factorize more efficiently?
 
     if is_datetime64tz_dtype(lk.dtype) and is_datetime64tz_dtype(rk.dtype):
         # Extract the ndarray (UTC-localized) values
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 6ec6b49995d10..3f43b27cd88ce 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -116,7 +116,6 @@
     Float64Index,
     Index,
     MultiIndex,
-    RangeIndex,
     ensure_index,
 )
 import pandas.core.indexes.base as ibase
@@ -5011,10 +5010,7 @@ def _cmp_method(self, other, op):
             raise ValueError("Can only compare identically-labeled Series objects")
 
         lvalues = extract_array(self, extract_numpy=True)
-        if isinstance(other, RangeIndex):
-            rvalues = other._values
-        else:
-            rvalues = extract_array(other, extract_numpy=True)
+        rvalues = extract_array(other, extract_numpy=True, range_compat=True)
 
         res_values = ops.comparison_op(lvalues, rvalues, op)
 
@@ -5025,10 +5021,7 @@ def _logical_method(self, other, op):
         self, other = ops.align_method_SERIES(self, other, align_asobject=True)
 
         lvalues = extract_array(self, extract_numpy=True)
-        if isinstance(other, RangeIndex):
-            rvalues = other._values
-        else:
-            rvalues = extract_array(other, extract_numpy=True)
+        rvalues = extract_array(other, extract_numpy=True, range_compat=True)
 
         res_values = ops.logical_op(lvalues, rvalues, op)
         return self._construct_result(res_values, name=res_name)
@@ -5038,10 +5031,7 @@ def _arith_method(self, other, op):
         self, other = ops.align_method_SERIES(self, other)
 
         lvalues = extract_array(self, extract_numpy=True)
-        if isinstance(other, RangeIndex):
-            rvalues = other._values
-        else:
-            rvalues = extract_array(other, extract_numpy=True)
+        rvalues = extract_array(other, extract_numpy=True, range_compat=True)
 
         result = ops.arithmetic_op(lvalues, rvalues, op)
 

From 92ccdfef14ffc4f6904fe221bf0d3fd686560634 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 3 Mar 2021 14:46:56 -0800
Subject: [PATCH 03/10] merge master

---
 .github/workflows/ci.yml                      |    5 +-
 .pre-commit-config.yaml                       |   14 +-
 asv_bench/benchmarks/algorithms.py            |   30 +-
 asv_bench/benchmarks/algos/isin.py            |    6 +
 asv_bench/benchmarks/attrs_caching.py         |   18 -
 asv_bench/benchmarks/dtypes.py                |   22 -
 asv_bench/benchmarks/gil.py                   |    1 +
 asv_bench/benchmarks/groupby.py               |   33 +-
 asv_bench/benchmarks/indexing_engines.py      |    7 +
 asv_bench/benchmarks/inference.py             |  216 +++
 asv_bench/benchmarks/libs.py                  |   66 +-
 asv_bench/benchmarks/reindex.py               |   20 +-
 asv_bench/benchmarks/rolling.py               |    2 +-
 asv_bench/benchmarks/timedelta.py             |   36 -
 asv_bench/benchmarks/timeseries.py            |  157 +-
 asv_bench/benchmarks/tslibs/normalize.py      |    5 +
 asv_bench/benchmarks/tslibs/period.py         |    5 +
 asv_bench/benchmarks/tslibs/resolution.py     |   31 +-
 asv_bench/benchmarks/tslibs/timestamp.py      |   25 +-
 asv_bench/benchmarks/tslibs/tslib.py          |   14 +-
 asv_bench/benchmarks/tslibs/tz_convert.py     |    8 +-
 conda.recipe/bld.bat                          |    2 -
 conda.recipe/build.sh                         |    2 -
 conda.recipe/meta.yaml                        |   40 -
 doc/source/getting_started/install.rst        |    9 +
 doc/source/reference/io.rst                   |    7 +
 doc/source/user_guide/io.rst                  |  456 ++++++
 doc/source/whatsnew/index.rst                 |    1 +
 doc/source/whatsnew/v1.2.3.rst                |   27 +-
 doc/source/whatsnew/v1.2.4.rst                |   48 +
 doc/source/whatsnew/v1.3.0.rst                |   82 +-
 environment.yml                               |    2 +-
 pandas/__init__.py                            |    1 +
 pandas/_libs/groupby.pyx                      |   14 +-
 pandas/_libs/lib.pyx                          |    4 +-
 pandas/_libs/tslib.pyx                        |   15 +-
 pandas/_libs/tslibs/offsets.pyx               |    2 +-
 pandas/_libs/tslibs/timedeltas.pyx            |   16 +-
 pandas/_libs/window/aggregations.pyx          |  130 +-
 pandas/core/algorithms.py                     |   42 +-
 pandas/core/apply.py                          |   70 +-
 pandas/core/arrays/categorical.py             |   37 +-
 pandas/core/arrays/datetimelike.py            |   19 +-
 pandas/core/arrays/datetimes.py               |   67 +-
 pandas/core/arrays/integer.py                 |    9 -
 pandas/core/arrays/interval.py                |    4 +-
 pandas/core/arrays/numeric.py                 |    9 +
 pandas/core/arrays/string_arrow.py            |   21 +-
 pandas/core/arrays/timedeltas.py              |   13 +-
 pandas/core/common.py                         |   18 +
 pandas/core/construction.py                   |    9 +-
 pandas/core/dtypes/cast.py                    |  351 +++--
 pandas/core/dtypes/concat.py                  |    4 +-
 pandas/core/frame.py                          |  284 +++-
 pandas/core/generic.py                        |    8 +-
 pandas/core/groupby/generic.py                |    6 +
 pandas/core/groupby/groupby.py                |    3 +-
 pandas/core/groupby/ops.py                    |  142 +-
 pandas/core/indexes/base.py                   |   25 +-
 pandas/core/indexes/datetimelike.py           |   40 +-
 pandas/core/indexes/extension.py              |   22 +-
 pandas/core/indexes/multi.py                  |    7 +-
 pandas/core/indexes/range.py                  |    4 +-
 pandas/core/indexing.py                       |   27 +-
 pandas/core/internals/array_manager.py        |   70 +-
 pandas/core/internals/blocks.py               |   90 +-
 pandas/core/internals/construction.py         |  242 +--
 pandas/core/internals/managers.py             |    9 +
 pandas/core/missing.py                        |   66 +-
 pandas/core/nanops.py                         |    3 +-
 pandas/core/reshape/reshape.py                |    5 +-
 pandas/core/series.py                         |   44 +-
 pandas/core/sorting.py                        |    9 +-
 pandas/core/tools/datetimes.py                |  256 ++--
 pandas/core/window/ewm.py                     |   73 +-
 pandas/io/api.py                              |    1 +
 pandas/io/formats/format.py                   |  135 +-
 pandas/io/formats/style.py                    |   45 +-
 pandas/io/formats/xml.py                      |  618 ++++++++
 pandas/io/xml.py                              |  944 ++++++++++++
 pandas/tests/api/test_api.py                  |    1 +
 pandas/tests/apply/conftest.py                |   18 +
 pandas/tests/apply/test_frame_apply.py        |  301 +---
 .../apply/test_frame_apply_relabeling.py      |   10 -
 pandas/tests/apply/test_frame_transform.py    |   78 +-
 pandas/tests/apply/test_invalid_arg.py        |  284 ++++
 pandas/tests/apply/test_series_apply.py       |   25 -
 pandas/tests/apply/test_series_transform.py   |   37 +-
 .../arrays/categorical/test_constructors.py   |    6 +
 .../tests/arrays/floating/test_arithmetic.py  |   21 +
 .../tests/arrays/integer/test_arithmetic.py   |   40 +-
 pandas/tests/arrays/masked/test_arithmetic.py |   12 +-
 pandas/tests/arrays/test_datetimelike.py      |   20 +-
 pandas/tests/base/test_constructors.py        |    4 +-
 .../dtypes/cast/test_construct_ndarray.py     |   10 +
 pandas/tests/dtypes/test_dtypes.py            |    2 +-
 pandas/tests/dtypes/test_inference.py         |   34 +-
 pandas/tests/dtypes/test_missing.py           |   28 +-
 pandas/tests/extension/base/groupby.py        |    6 +
 pandas/tests/extension/test_boolean.py        |    3 +
 pandas/tests/extension/test_string.py         |   48 +-
 .../frame/constructors/test_from_records.py   |   37 +-
 pandas/tests/frame/indexing/test_setitem.py   |   13 +-
 pandas/tests/frame/methods/test_astype.py     |    8 -
 pandas/tests/frame/methods/test_fillna.py     |   11 +-
 pandas/tests/frame/methods/test_rename.py     |    1 +
 pandas/tests/frame/methods/test_replace.py    |    7 +
 pandas/tests/frame/methods/test_sort_index.py |   17 +
 pandas/tests/frame/test_constructors.py       |   74 +-
 pandas/tests/groupby/test_allowlist.py        |    5 +-
 pandas/tests/groupby/test_apply.py            |   13 +-
 pandas/tests/groupby/test_categorical.py      |   10 +-
 pandas/tests/groupby/test_function.py         |    8 +
 pandas/tests/groupby/test_groupby.py          |    3 +
 pandas/tests/groupby/test_quantile.py         |    5 +
 pandas/tests/groupby/test_sample.py           |   10 +
 .../tests/groupby/transform/test_transform.py |   11 +-
 .../tests/indexes/categorical/test_append.py  |   62 +
 .../indexes/categorical/test_category.py      |   67 +-
 .../tests/indexes/categorical/test_formats.py |    6 +
 .../datetimelike_/test_drop_duplicates.py     |   80 +
 .../tests/indexes/datetimelike_/test_nat.py   |   54 +
 .../indexes/datetimelike_/test_sort_values.py |  317 ++++
 .../datetimelike_/test_value_counts.py        |  103 ++
 .../indexes/datetimes/methods/test_repeat.py  |   78 +
 .../tests/indexes/datetimes/test_datetime.py  |   22 -
 .../tests/indexes/datetimes/test_indexing.py  |    7 +
 pandas/tests/indexes/datetimes/test_misc.py   |   17 +
 pandas/tests/indexes/datetimes/test_ops.py    |  254 ----
 .../indexes/datetimes/test_partial_slicing.py |    6 -
 .../indexes/period/methods/test_is_full.py    |   23 +
 .../indexes/period/methods/test_repeat.py     |   26 +
 pandas/tests/indexes/period/test_join.py      |    2 +-
 pandas/tests/indexes/period/test_ops.py       |  276 ----
 pandas/tests/indexes/period/test_period.py    |  106 --
 .../tests/indexes/period/test_period_range.py |    8 +
 pandas/tests/indexes/test_common.py           |    4 +-
 pandas/tests/indexes/test_index_new.py        |    7 +
 .../indexes/timedeltas/methods/test_repeat.py |   34 +
 .../tests/indexes/timedeltas/test_indexing.py |   28 +-
 pandas/tests/indexes/timedeltas/test_ops.py   |  186 ---
 .../timedeltas/test_partial_slicing.py        |   42 -
 .../indexes/timedeltas/test_timedelta.py      |   25 -
 pandas/tests/indexing/test_categorical.py     |   52 +-
 pandas/tests/indexing/test_iloc.py            |   14 +
 pandas/tests/indexing/test_loc.py             |   88 ++
 pandas/tests/io/data/xml/baby_names.xml       |   53 +
 pandas/tests/io/data/xml/books.xml            |   21 +
 pandas/tests/io/data/xml/cta_rail_lines.kml   |   92 ++
 pandas/tests/io/data/xml/flatten_doc.xsl      |   18 +
 pandas/tests/io/data/xml/row_field_output.xsl |   19 +
 pandas/tests/io/excel/test_writers.py         |    4 +-
 pandas/tests/io/formats/style/test_style.py   |   13 +
 pandas/tests/io/formats/test_format.py        |   64 +-
 pandas/tests/io/formats/test_to_csv.py        |    2 +-
 pandas/tests/io/formats/test_to_html.py       |    4 +-
 pandas/tests/io/json/test_pandas.py           |   70 +-
 pandas/tests/io/json/test_readlines.py        |   26 +-
 .../tests/io/parser/common/test_chunksize.py  |    2 +-
 pandas/tests/io/parser/test_parse_dates.py    |    2 +-
 pandas/tests/io/parser/test_read_fwf.py       |    5 +-
 pandas/tests/io/pytables/test_append.py       |    4 +-
 pandas/tests/io/pytables/test_errors.py       |    3 +-
 .../tests/io/pytables/test_file_handling.py   |    9 +-
 pandas/tests/io/pytables/test_read.py         |    8 +-
 pandas/tests/io/pytables/test_select.py       |    2 +-
 pandas/tests/io/pytables/test_store.py        |   24 +-
 pandas/tests/io/pytables/test_timezones.py    |    4 +-
 pandas/tests/io/test_clipboard.py             |    9 +-
 pandas/tests/io/test_common.py                |   10 +-
 pandas/tests/io/test_feather.py               |    8 +-
 pandas/tests/io/test_parquet.py               |    8 +-
 pandas/tests/io/test_pickle.py                |    2 +-
 pandas/tests/io/test_sql.py                   |   14 +-
 pandas/tests/io/test_stata.py                 |   27 +-
 pandas/tests/io/xml/test_to_xml.py            | 1301 +++++++++++++++++
 pandas/tests/io/xml/test_xml.py               | 1097 ++++++++++++++
 pandas/tests/resample/test_base.py            |    3 +
 pandas/tests/reshape/concat/test_concat.py    |   49 +-
 pandas/tests/reshape/concat/test_dataframe.py |   38 +-
 pandas/tests/reshape/concat/test_datetimes.py |   60 +-
 pandas/tests/reshape/concat/test_empty.py     |   32 +-
 pandas/tests/reshape/concat/test_index.py     |   14 +-
 pandas/tests/reshape/concat/test_series.py    |    7 +-
 pandas/tests/reshape/merge/test_join.py       |    8 +-
 pandas/tests/reshape/merge/test_merge.py      |  110 +-
 pandas/tests/reshape/merge/test_merge_asof.py |  150 +-
 pandas/tests/reshape/merge/test_multi.py      |   14 +-
 pandas/tests/reshape/test_crosstab.py         |   19 +-
 pandas/tests/reshape/test_cut.py              |    4 +-
 pandas/tests/reshape/test_melt.py             |   10 +-
 pandas/tests/reshape/test_pivot.py            |   75 +-
 .../tests/scalar/timedelta/test_arithmetic.py |    3 +-
 .../scalar/timedelta/test_constructors.py     |    2 +-
 .../series/accessors/test_dt_accessor.py      |   16 +-
 pandas/tests/series/indexing/test_datetime.py |    2 +-
 pandas/tests/series/indexing/test_getitem.py  |   34 +-
 pandas/tests/series/indexing/test_indexing.py |    7 +-
 pandas/tests/series/indexing/test_where.py    |    2 +-
 .../tests/series/methods/test_interpolate.py  |    6 +-
 pandas/tests/series/methods/test_shift.py     |    4 +-
 .../tests/series/methods/test_sort_index.py   |   14 +
 pandas/tests/series/test_constructors.py      |   89 +-
 pandas/tests/series/test_repr.py              |    2 +-
 pandas/tests/series/test_unary.py             |   59 +-
 pandas/tests/tools/test_to_datetime.py        |   17 +-
 pandas/tests/window/test_ewm.py               |   23 +
 pandas/util/_exceptions.py                    |    2 +-
 pandas/util/_validators.py                    |   48 +-
 requirements-dev.txt                          |    2 +-
 setup.cfg                                     |    2 +-
 211 files changed, 9128 insertions(+), 3369 deletions(-)
 delete mode 100644 conda.recipe/bld.bat
 delete mode 100644 conda.recipe/build.sh
 delete mode 100644 conda.recipe/meta.yaml
 create mode 100644 doc/source/whatsnew/v1.2.4.rst
 create mode 100644 pandas/io/formats/xml.py
 create mode 100644 pandas/io/xml.py
 create mode 100644 pandas/tests/apply/conftest.py
 create mode 100644 pandas/tests/indexes/categorical/test_append.py
 create mode 100644 pandas/tests/indexes/datetimelike_/test_drop_duplicates.py
 create mode 100644 pandas/tests/indexes/datetimelike_/test_nat.py
 create mode 100644 pandas/tests/indexes/datetimelike_/test_sort_values.py
 create mode 100644 pandas/tests/indexes/datetimelike_/test_value_counts.py
 create mode 100644 pandas/tests/indexes/datetimes/methods/test_repeat.py
 create mode 100644 pandas/tests/indexes/period/methods/test_is_full.py
 create mode 100644 pandas/tests/indexes/period/methods/test_repeat.py
 create mode 100644 pandas/tests/indexes/timedeltas/methods/test_repeat.py
 delete mode 100644 pandas/tests/indexes/timedeltas/test_partial_slicing.py
 create mode 100644 pandas/tests/io/data/xml/baby_names.xml
 create mode 100644 pandas/tests/io/data/xml/books.xml
 create mode 100644 pandas/tests/io/data/xml/cta_rail_lines.kml
 create mode 100644 pandas/tests/io/data/xml/flatten_doc.xsl
 create mode 100644 pandas/tests/io/data/xml/row_field_output.xsl
 create mode 100644 pandas/tests/io/xml/test_to_xml.py
 create mode 100644 pandas/tests/io/xml/test_xml.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6bb9753fcea65..c03722e32fea9 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -153,11 +153,14 @@ jobs:
       run: |
         source activate pandas-dev
         pytest pandas/tests/frame/methods --array-manager
+        pytest pandas/tests/frame/test_constructors.py --array-manager
+        pytest pandas/tests/frame/constructors/ --array-manager
         pytest pandas/tests/frame/test_reductions.py --array-manager
         pytest pandas/tests/reductions/ --array-manager
         pytest pandas/tests/generic/test_generic.py --array-manager
         pytest pandas/tests/arithmetic/ --array-manager
-        pytest pandas/tests/groupby/aggregate/ --array-manager
+        pytest pandas/tests/groupby/ --array-manager
+        pytest pandas/tests/resample/ --array-manager
         pytest pandas/tests/reshape/merge --array-manager
 
         # indexing subset (temporary since other tests don't pass yet)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 47a9ae592f940..3966e8931162c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,13 +29,15 @@ repos:
     -   id: pyupgrade
         args: [--py37-plus, --keep-runtime-typing]
 -   repo: https://github.com/pre-commit/pygrep-hooks
-    rev: v1.7.1
+    rev: v1.8.0
     hooks:
       - id: rst-backticks
       - id: rst-directive-colons
-        types: [text]
+        types: [text]  # overwrite types: [rst]
+        types_or: [python, rst]
       - id: rst-inline-touching-normal
-        types: [text]
+        types: [text]  # overwrite types: [rst]
+        types_or: [python, rst]
 -   repo: local
     hooks:
     -   id: pip_to_conda
@@ -212,8 +214,8 @@ repos:
     rev: v0.1.7
     hooks:
     -   id: no-string-hints
--   repo: https://github.com/MarcoGorelli/abs-imports
-    rev: v0.1.2
+-   repo: https://github.com/MarcoGorelli/absolufy-imports
+    rev: v0.2.1
     hooks:
-    -   id: abs-imports
+    -   id: absolufy-imports
         files: ^pandas/
diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
index 65e52e03c43c7..aecc609df574e 100644
--- a/asv_bench/benchmarks/algorithms.py
+++ b/asv_bench/benchmarks/algorithms.py
@@ -2,8 +2,6 @@
 
 import numpy as np
 
-from pandas._libs import lib
-
 import pandas as pd
 
 from .pandas_vb_common import tm
@@ -16,19 +14,6 @@
         pass
 
 
-class MaybeConvertObjects:
-    def setup(self):
-        N = 10 ** 5
-
-        data = list(range(N))
-        data[0] = pd.NaT
-        data = np.array(data)
-        self.data = data
-
-    def time_maybe_convert_objects(self):
-        lib.maybe_convert_objects(self.data)
-
-
 class Factorize:
 
     params = [
@@ -43,23 +28,36 @@ class Factorize:
             "datetime64[ns, tz]",
             "Int64",
             "boolean",
+            "string_arrow",
         ],
     ]
     param_names = ["unique", "sort", "dtype"]
 
     def setup(self, unique, sort, dtype):
         N = 10 ** 5
+        string_index = tm.makeStringIndex(N)
+        try:
+            from pandas.core.arrays.string_arrow import ArrowStringDtype
+
+            string_arrow = pd.array(string_index, dtype=ArrowStringDtype())
+        except ImportError:
+            string_arrow = None
+
+        if dtype == "string_arrow" and not string_arrow:
+            raise NotImplementedError
+
         data = {
             "int": pd.Int64Index(np.arange(N)),
             "uint": pd.UInt64Index(np.arange(N)),
             "float": pd.Float64Index(np.random.randn(N)),
-            "string": tm.makeStringIndex(N),
+            "string": string_index,
             "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
             "datetime64[ns, tz]": pd.date_range(
                 "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
             ),
             "Int64": pd.array(np.arange(N), dtype="Int64"),
             "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
+            "string_arrow": string_arrow,
         }[dtype]
         if not unique:
             data = data.repeat(5)
diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py
index 75a96e5b691ca..a8b8a193dbcfc 100644
--- a/asv_bench/benchmarks/algos/isin.py
+++ b/asv_bench/benchmarks/algos/isin.py
@@ -273,6 +273,7 @@ class IsInLongSeriesLookUpDominates:
     def setup(self, dtype, MaxNumber, series_type):
         N = 10 ** 7
 
+        # https://github.com/pandas-dev/pandas/issues/39844
         if not np_version_under1p20 and dtype in ("Int64", "Float64"):
             raise NotImplementedError
 
@@ -303,6 +304,11 @@ class IsInLongSeriesValuesDominate:
 
     def setup(self, dtype, series_type):
         N = 10 ** 7
+
+        # https://github.com/pandas-dev/pandas/issues/39844
+        if not np_version_under1p20 and dtype in ("Int64", "Float64"):
+            raise NotImplementedError
+
         if series_type == "random":
             np.random.seed(42)
             vals = np.random.randint(0, 10 * N, N)
diff --git a/asv_bench/benchmarks/attrs_caching.py b/asv_bench/benchmarks/attrs_caching.py
index 9c7b107b478d4..d4366c42f96aa 100644
--- a/asv_bench/benchmarks/attrs_caching.py
+++ b/asv_bench/benchmarks/attrs_caching.py
@@ -3,11 +3,6 @@
 import pandas as pd
 from pandas import DataFrame
 
-try:
-    from pandas.util import cache_readonly
-except ImportError:
-    from pandas.util.decorators import cache_readonly
-
 try:
     from pandas.core.construction import extract_array
 except ImportError:
@@ -53,17 +48,4 @@ def time_extract_array_numpy(self, dtype):
         extract_array(self.series, extract_numpy=True)
 
 
-class CacheReadonly:
-    def setup(self):
-        class Foo:
-            @cache_readonly
-            def prop(self):
-                return 5
-
-        self.obj = Foo()
-
-    def time_cache_readonly(self):
-        self.obj.prop
-
-
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py
index 9209e851289bb..c561b80ed1ca6 100644
--- a/asv_bench/benchmarks/dtypes.py
+++ b/asv_bench/benchmarks/dtypes.py
@@ -13,7 +13,6 @@
 from .pandas_vb_common import (
     datetime_dtypes,
     extension_dtypes,
-    lib,
     numeric_dtypes,
     string_dtypes,
 )
@@ -49,27 +48,6 @@ def time_pandas_dtype_invalid(self, dtype):
             pass
 
 
-class InferDtypes:
-    param_names = ["dtype"]
-    data_dict = {
-        "np-object": np.array([1] * 100000, dtype="O"),
-        "py-object": [1] * 100000,
-        "np-null": np.array([1] * 50000 + [np.nan] * 50000),
-        "py-null": [1] * 50000 + [None] * 50000,
-        "np-int": np.array([1] * 100000, dtype=int),
-        "np-floating": np.array([1.0] * 100000, dtype=float),
-        "empty": [],
-        "bytes": [b"a"] * 100000,
-    }
-    params = list(data_dict.keys())
-
-    def time_infer_skipna(self, dtype):
-        lib.infer_dtype(self.data_dict[dtype], skipna=True)
-
-    def time_infer(self, dtype):
-        lib.infer_dtype(self.data_dict[dtype], skipna=False)
-
-
 class SelectDtypes:
 
     params = [
diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py
index 410668ca3c7cf..459046d2decfb 100644
--- a/asv_bench/benchmarks/gil.py
+++ b/asv_bench/benchmarks/gil.py
@@ -125,6 +125,7 @@ def time_take1d(self, dtype):
 
 
 class ParallelKth:
+    # This depends exclusively on code in _libs/, could go in libs.py
 
     number = 1
     repeat = 5
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index fb08c6fdeaedf..9930c61e34b15 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -68,9 +68,18 @@ def time_groupby_apply_dict_return(self):
 
 
 class Apply:
-    def setup_cache(self):
-        N = 10 ** 4
-        labels = np.random.randint(0, 2000, size=N)
+
+    param_names = ["factor"]
+    params = [4, 5]
+
+    def setup(self, factor):
+        N = 10 ** factor
+        # two cases:
+        # - small groups: small data (N**4) + many labels (2000) -> average group
+        #   size of 5 (-> larger overhead of slicing method)
+        # - larger groups: larger data (N**5) + fewer labels (20) -> average group
+        #   size of 5000
+        labels = np.random.randint(0, 2000 if factor == 4 else 20, size=N)
         labels2 = np.random.randint(0, 3, size=N)
         df = DataFrame(
             {
@@ -80,13 +89,13 @@ def setup_cache(self):
                 "value2": ["foo", "bar", "baz", "qux"] * (N // 4),
             }
         )
-        return df
+        self.df = df
 
-    def time_scalar_function_multi_col(self, df):
-        df.groupby(["key", "key2"]).apply(lambda x: 1)
+    def time_scalar_function_multi_col(self, factor):
+        self.df.groupby(["key", "key2"]).apply(lambda x: 1)
 
-    def time_scalar_function_single_col(self, df):
-        df.groupby("key").apply(lambda x: 1)
+    def time_scalar_function_single_col(self, factor):
+        self.df.groupby("key").apply(lambda x: 1)
 
     @staticmethod
     def df_copy_function(g):
@@ -94,11 +103,11 @@ def df_copy_function(g):
         g.name
         return g.copy()
 
-    def time_copy_function_multi_col(self, df):
-        df.groupby(["key", "key2"]).apply(self.df_copy_function)
+    def time_copy_function_multi_col(self, factor):
+        self.df.groupby(["key", "key2"]).apply(self.df_copy_function)
 
-    def time_copy_overhead_single_col(self, df):
-        df.groupby("key").apply(self.df_copy_function)
+    def time_copy_overhead_single_col(self, factor):
+        self.df.groupby("key").apply(self.df_copy_function)
 
 
 class Groups:
diff --git a/asv_bench/benchmarks/indexing_engines.py b/asv_bench/benchmarks/indexing_engines.py
index 44a22dfa77791..30ef7f63dc0dc 100644
--- a/asv_bench/benchmarks/indexing_engines.py
+++ b/asv_bench/benchmarks/indexing_engines.py
@@ -1,3 +1,10 @@
+"""
+Benchmarks in this fiel depend exclusively on code in _libs/
+
+If a PR does not edit anything in _libs, it is very unlikely that benchmarks
+in this file will be affected.
+"""
+
 import numpy as np
 
 from pandas._libs import index as libindex
diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py
index b6808ace629db..0aa924dabd469 100644
--- a/asv_bench/benchmarks/inference.py
+++ b/asv_bench/benchmarks/inference.py
@@ -1,8 +1,20 @@
+"""
+The functions benchmarked in this file depend _almost_ exclusively on
+_libs, but not in a way that is easy to formalize.
+
+If a PR does not change anything in pandas/_libs/ or pandas/core/tools/, then
+it is likely that these benchmarks will be unaffected.
+"""
+
 import numpy as np
 
 from pandas import (
+    NaT,
     Series,
+    date_range,
+    to_datetime,
     to_numeric,
+    to_timedelta,
 )
 
 from .pandas_vb_common import (
@@ -69,6 +81,9 @@ def time_downcast(self, dtype, downcast):
 
 
 class MaybeConvertNumeric:
+    # maybe_convert_numeric depends _exclusively_ on _libs, could
+    #  go in benchmarks/libs.py
+
     def setup_cache(self):
         N = 10 ** 6
         arr = np.repeat([2 ** 63], N) + np.arange(N).astype("uint64")
@@ -81,4 +96,205 @@ def time_convert(self, data):
         lib.maybe_convert_numeric(data, set(), coerce_numeric=False)
 
 
+class MaybeConvertObjects:
+    # maybe_convert_objects depends _almost_ exclusively on _libs, but
+    #  does have some run-time imports from outside of _libs
+
+    def setup(self):
+        N = 10 ** 5
+
+        data = list(range(N))
+        data[0] = NaT
+        data = np.array(data)
+        self.data = data
+
+    def time_maybe_convert_objects(self):
+        lib.maybe_convert_objects(self.data)
+
+
+class ToDatetimeFromIntsFloats:
+    def setup(self):
+        self.ts_sec = Series(range(1521080307, 1521685107), dtype="int64")
+        self.ts_sec_float = self.ts_sec.astype("float64")
+
+        self.ts_nanosec = 1_000_000 * self.ts_sec
+        self.ts_nanosec_float = self.ts_nanosec.astype("float64")
+
+    # speed of int64 and float64 paths should be comparable
+
+    def time_nanosec_int64(self):
+        to_datetime(self.ts_nanosec, unit="ns")
+
+    def time_nanosec_float64(self):
+        to_datetime(self.ts_nanosec_float, unit="ns")
+
+    def time_sec_int64(self):
+        to_datetime(self.ts_sec, unit="s")
+
+    def time_sec_float64(self):
+        to_datetime(self.ts_sec_float, unit="s")
+
+
+class ToDatetimeYYYYMMDD:
+    def setup(self):
+        rng = date_range(start="1/1/2000", periods=10000, freq="D")
+        self.stringsD = Series(rng.strftime("%Y%m%d"))
+
+    def time_format_YYYYMMDD(self):
+        to_datetime(self.stringsD, format="%Y%m%d")
+
+
+class ToDatetimeCacheSmallCount:
+
+    params = ([True, False], [50, 500, 5000, 100000])
+    param_names = ["cache", "count"]
+
+    def setup(self, cache, count):
+        rng = date_range(start="1/1/1971", periods=count)
+        self.unique_date_strings = rng.strftime("%Y-%m-%d").tolist()
+
+    def time_unique_date_strings(self, cache, count):
+        to_datetime(self.unique_date_strings, cache=cache)
+
+
+class ToDatetimeISO8601:
+    def setup(self):
+        rng = date_range(start="1/1/2000", periods=20000, freq="H")
+        self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()
+        self.strings_nosep = rng.strftime("%Y%m%d %H:%M:%S").tolist()
+        self.strings_tz_space = [
+            x.strftime("%Y-%m-%d %H:%M:%S") + " -0800" for x in rng
+        ]
+
+    def time_iso8601(self):
+        to_datetime(self.strings)
+
+    def time_iso8601_nosep(self):
+        to_datetime(self.strings_nosep)
+
+    def time_iso8601_format(self):
+        to_datetime(self.strings, format="%Y-%m-%d %H:%M:%S")
+
+    def time_iso8601_format_no_sep(self):
+        to_datetime(self.strings_nosep, format="%Y%m%d %H:%M:%S")
+
+    def time_iso8601_tz_spaceformat(self):
+        to_datetime(self.strings_tz_space)
+
+
+class ToDatetimeNONISO8601:
+    def setup(self):
+        N = 10000
+        half = N // 2
+        ts_string_1 = "March 1, 2018 12:00:00+0400"
+        ts_string_2 = "March 1, 2018 12:00:00+0500"
+        self.same_offset = [ts_string_1] * N
+        self.diff_offset = [ts_string_1] * half + [ts_string_2] * half
+
+    def time_same_offset(self):
+        to_datetime(self.same_offset)
+
+    def time_different_offset(self):
+        to_datetime(self.diff_offset)
+
+
+class ToDatetimeFormatQuarters:
+    def setup(self):
+        self.s = Series(["2Q2005", "2Q05", "2005Q1", "05Q1"] * 10000)
+
+    def time_infer_quarter(self):
+        to_datetime(self.s)
+
+
+class ToDatetimeFormat:
+    def setup(self):
+        N = 100000
+        self.s = Series(["19MAY11", "19MAY11:00:00:00"] * N)
+        self.s2 = self.s.str.replace(":\\S+$", "")
+
+        self.same_offset = ["10/11/2018 00:00:00.045-07:00"] * N
+        self.diff_offset = [
+            f"10/11/2018 00:00:00.045-0{offset}:00" for offset in range(10)
+        ] * (N // 10)
+
+    def time_exact(self):
+        to_datetime(self.s2, format="%d%b%y")
+
+    def time_no_exact(self):
+        to_datetime(self.s, format="%d%b%y", exact=False)
+
+    def time_same_offset(self):
+        to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z")
+
+    def time_different_offset(self):
+        to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z")
+
+    def time_same_offset_to_utc(self):
+        to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True)
+
+    def time_different_offset_to_utc(self):
+        to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True)
+
+
+class ToDatetimeCache:
+
+    params = [True, False]
+    param_names = ["cache"]
+
+    def setup(self, cache):
+        N = 10000
+        self.unique_numeric_seconds = list(range(N))
+        self.dup_numeric_seconds = [1000] * N
+        self.dup_string_dates = ["2000-02-11"] * N
+        self.dup_string_with_tz = ["2000-02-11 15:00:00-0800"] * N
+
+    def time_unique_seconds_and_unit(self, cache):
+        to_datetime(self.unique_numeric_seconds, unit="s", cache=cache)
+
+    def time_dup_seconds_and_unit(self, cache):
+        to_datetime(self.dup_numeric_seconds, unit="s", cache=cache)
+
+    def time_dup_string_dates(self, cache):
+        to_datetime(self.dup_string_dates, cache=cache)
+
+    def time_dup_string_dates_and_format(self, cache):
+        to_datetime(self.dup_string_dates, format="%Y-%m-%d", cache=cache)
+
+    def time_dup_string_tzoffset_dates(self, cache):
+        to_datetime(self.dup_string_with_tz, cache=cache)
+
+
+class ToTimedelta:
+    def setup(self):
+        self.ints = np.random.randint(0, 60, size=10000)
+        self.str_days = []
+        self.str_seconds = []
+        for i in self.ints:
+            self.str_days.append(f"{i} days")
+            self.str_seconds.append(f"00:00:{i:02d}")
+
+    def time_convert_int(self):
+        to_timedelta(self.ints, unit="s")
+
+    def time_convert_string_days(self):
+        to_timedelta(self.str_days)
+
+    def time_convert_string_seconds(self):
+        to_timedelta(self.str_seconds)
+
+
+class ToTimedeltaErrors:
+
+    params = ["coerce", "ignore"]
+    param_names = ["errors"]
+
+    def setup(self, errors):
+        ints = np.random.randint(0, 60, size=10000)
+        self.arr = [f"{i} days" for i in ints]
+        self.arr[-1] = "apple"
+
+    def time_convert(self, errors):
+        to_timedelta(self.arr, errors=errors)
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/libs.py b/asv_bench/benchmarks/libs.py
index f5c2397945cea..4e3f938a33eb1 100644
--- a/asv_bench/benchmarks/libs.py
+++ b/asv_bench/benchmarks/libs.py
@@ -1,10 +1,14 @@
 """
 Benchmarks for code in pandas/_libs, excluding pandas/_libs/tslibs,
-which has its own directory
+which has its own directory.
+
+If a PR does not edit anything in _libs/, then it is unlikely that thes
+benchmarks will be affected.
 """
 import numpy as np
 
 from pandas._libs.lib import (
+    infer_dtype,
     is_list_like,
     is_scalar,
 )
@@ -14,6 +18,17 @@
     NaT,
 )
 
+from .pandas_vb_common import (
+    lib,
+    tm,
+)
+
+try:
+    from pandas.util import cache_readonly
+except ImportError:
+    from pandas.util.decorators import cache_readonly
+
+
 # TODO: share with something in pd._testing?
 scalars = [
     0,
@@ -40,3 +55,52 @@ def time_is_list_like(self, param):
 
     def time_is_scalar(self, param):
         is_scalar(param)
+
+
+class FastZip:
+    def setup(self):
+        N = 10000
+        K = 10
+        key1 = tm.makeStringIndex(N).values.repeat(K)
+        key2 = tm.makeStringIndex(N).values.repeat(K)
+        col_array = np.vstack([key1, key2, np.random.randn(N * K)])
+        col_array2 = col_array.copy()
+        col_array2[:, :10000] = np.nan
+        self.col_array_list = list(col_array)
+
+    def time_lib_fast_zip(self):
+        lib.fast_zip(self.col_array_list)
+
+
+class InferDtype:
+    param_names = ["dtype"]
+    data_dict = {
+        "np-object": np.array([1] * 100000, dtype="O"),
+        "py-object": [1] * 100000,
+        "np-null": np.array([1] * 50000 + [np.nan] * 50000),
+        "py-null": [1] * 50000 + [None] * 50000,
+        "np-int": np.array([1] * 100000, dtype=int),
+        "np-floating": np.array([1.0] * 100000, dtype=float),
+        "empty": [],
+        "bytes": [b"a"] * 100000,
+    }
+    params = list(data_dict.keys())
+
+    def time_infer_dtype_skipna(self, dtype):
+        infer_dtype(self.data_dict[dtype], skipna=True)
+
+    def time_infer_dtype(self, dtype):
+        infer_dtype(self.data_dict[dtype], skipna=False)
+
+
+class CacheReadonly:
+    def setup(self):
+        class Foo:
+            @cache_readonly
+            def prop(self):
+                return 5
+
+        self.obj = Foo()
+
+    def time_cache_readonly(self):
+        self.obj.prop
diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py
index 65392f2cea65b..5181b983c9f7a 100644
--- a/asv_bench/benchmarks/reindex.py
+++ b/asv_bench/benchmarks/reindex.py
@@ -9,10 +9,7 @@
     period_range,
 )
 
-from .pandas_vb_common import (
-    lib,
-    tm,
-)
+from .pandas_vb_common import tm
 
 
 class Reindex:
@@ -155,19 +152,4 @@ def time_align_series_irregular_string(self):
         self.x + self.y
 
 
-class LibFastZip:
-    def setup(self):
-        N = 10000
-        K = 10
-        key1 = tm.makeStringIndex(N).values.repeat(K)
-        key2 = tm.makeStringIndex(N).values.repeat(K)
-        col_array = np.vstack([key1, key2, np.random.randn(N * K)])
-        col_array2 = col_array.copy()
-        col_array2[:, :10000] = np.nan
-        self.col_array_list = list(col_array)
-
-    def time_lib_fast_zip(self):
-        lib.fast_zip(self.col_array_list)
-
-
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
index 0c23aa59c4608..d35770b720f7a 100644
--- a/asv_bench/benchmarks/rolling.py
+++ b/asv_bench/benchmarks/rolling.py
@@ -114,7 +114,7 @@ def time_ewm(self, constructor, window, dtype, method):
         getattr(self.ewm, method)()
 
     def time_ewm_times(self, constructor, window, dtype, method):
-        self.ewm.mean()
+        self.ewm_times.mean()
 
 
 class VariableWindowMethods(Methods):
diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py
index 9e221ee030e6d..cb0e4455e1a56 100644
--- a/asv_bench/benchmarks/timedelta.py
+++ b/asv_bench/benchmarks/timedelta.py
@@ -3,49 +3,13 @@
 benchmarks.tslibs.timedelta for benchmarks that rely only on tslibs.
 """
 
-import numpy as np
-
 from pandas import (
     DataFrame,
     Series,
     timedelta_range,
-    to_timedelta,
 )
 
 
-class ToTimedelta:
-    def setup(self):
-        self.ints = np.random.randint(0, 60, size=10000)
-        self.str_days = []
-        self.str_seconds = []
-        for i in self.ints:
-            self.str_days.append(f"{i} days")
-            self.str_seconds.append(f"00:00:{i:02d}")
-
-    def time_convert_int(self):
-        to_timedelta(self.ints, unit="s")
-
-    def time_convert_string_days(self):
-        to_timedelta(self.str_days)
-
-    def time_convert_string_seconds(self):
-        to_timedelta(self.str_seconds)
-
-
-class ToTimedeltaErrors:
-
-    params = ["coerce", "ignore"]
-    param_names = ["errors"]
-
-    def setup(self, errors):
-        ints = np.random.randint(0, 60, size=10000)
-        self.arr = [f"{i} days" for i in ints]
-        self.arr[-1] = "apple"
-
-    def time_convert(self, errors):
-        to_timedelta(self.arr, errors=errors)
-
-
 class DatetimeAccessor:
     def setup_cache(self):
         N = 100000
diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
index 94498e54f0f06..5b123c7127c28 100644
--- a/asv_bench/benchmarks/timeseries.py
+++ b/asv_bench/benchmarks/timeseries.py
@@ -9,7 +9,6 @@
     date_range,
     period_range,
     timedelta_range,
-    to_datetime,
 )
 
 from pandas.tseries.frequencies import infer_freq
@@ -97,12 +96,12 @@ def setup(self, tz):
         idx = date_range(start="1/1/2000", periods=1000, freq="H", tz=tz)
         self.df = DataFrame(np.random.randn(1000, 2), index=idx)
 
-    def time_reest_datetimeindex(self, tz):
+    def time_reset_datetimeindex(self, tz):
         self.df.reset_index()
 
 
 class InferFreq:
-
+    # This depends mostly on code in _libs/, tseries/, and core.algos.unique
     params = [None, "D", "B"]
     param_names = ["freq"]
 
@@ -273,158 +272,6 @@ def time_lookup_and_cleanup(self):
         self.ts.index._cleanup()
 
 
-class ToDatetimeFromIntsFloats:
-    def setup(self):
-        self.ts_sec = Series(range(1521080307, 1521685107), dtype="int64")
-        self.ts_sec_float = self.ts_sec.astype("float64")
-
-        self.ts_nanosec = 1_000_000 * self.ts_sec
-        self.ts_nanosec_float = self.ts_nanosec.astype("float64")
-
-    # speed of int64 and float64 paths should be comparable
-
-    def time_nanosec_int64(self):
-        to_datetime(self.ts_nanosec, unit="ns")
-
-    def time_nanosec_float64(self):
-        to_datetime(self.ts_nanosec_float, unit="ns")
-
-    def time_sec_int64(self):
-        to_datetime(self.ts_sec, unit="s")
-
-    def time_sec_float64(self):
-        to_datetime(self.ts_sec_float, unit="s")
-
-
-class ToDatetimeYYYYMMDD:
-    def setup(self):
-        rng = date_range(start="1/1/2000", periods=10000, freq="D")
-        self.stringsD = Series(rng.strftime("%Y%m%d"))
-
-    def time_format_YYYYMMDD(self):
-        to_datetime(self.stringsD, format="%Y%m%d")
-
-
-class ToDatetimeCacheSmallCount:
-
-    params = ([True, False], [50, 500, 5000, 100000])
-    param_names = ["cache", "count"]
-
-    def setup(self, cache, count):
-        rng = date_range(start="1/1/1971", periods=count)
-        self.unique_date_strings = rng.strftime("%Y-%m-%d").tolist()
-
-    def time_unique_date_strings(self, cache, count):
-        to_datetime(self.unique_date_strings, cache=cache)
-
-
-class ToDatetimeISO8601:
-    def setup(self):
-        rng = date_range(start="1/1/2000", periods=20000, freq="H")
-        self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()
-        self.strings_nosep = rng.strftime("%Y%m%d %H:%M:%S").tolist()
-        self.strings_tz_space = [
-            x.strftime("%Y-%m-%d %H:%M:%S") + " -0800" for x in rng
-        ]
-
-    def time_iso8601(self):
-        to_datetime(self.strings)
-
-    def time_iso8601_nosep(self):
-        to_datetime(self.strings_nosep)
-
-    def time_iso8601_format(self):
-        to_datetime(self.strings, format="%Y-%m-%d %H:%M:%S")
-
-    def time_iso8601_format_no_sep(self):
-        to_datetime(self.strings_nosep, format="%Y%m%d %H:%M:%S")
-
-    def time_iso8601_tz_spaceformat(self):
-        to_datetime(self.strings_tz_space)
-
-
-class ToDatetimeNONISO8601:
-    def setup(self):
-        N = 10000
-        half = N // 2
-        ts_string_1 = "March 1, 2018 12:00:00+0400"
-        ts_string_2 = "March 1, 2018 12:00:00+0500"
-        self.same_offset = [ts_string_1] * N
-        self.diff_offset = [ts_string_1] * half + [ts_string_2] * half
-
-    def time_same_offset(self):
-        to_datetime(self.same_offset)
-
-    def time_different_offset(self):
-        to_datetime(self.diff_offset)
-
-
-class ToDatetimeFormatQuarters:
-    def setup(self):
-        self.s = Series(["2Q2005", "2Q05", "2005Q1", "05Q1"] * 10000)
-
-    def time_infer_quarter(self):
-        to_datetime(self.s)
-
-
-class ToDatetimeFormat:
-    def setup(self):
-        N = 100000
-        self.s = Series(["19MAY11", "19MAY11:00:00:00"] * N)
-        self.s2 = self.s.str.replace(":\\S+$", "")
-
-        self.same_offset = ["10/11/2018 00:00:00.045-07:00"] * N
-        self.diff_offset = [
-            f"10/11/2018 00:00:00.045-0{offset}:00" for offset in range(10)
-        ] * (N // 10)
-
-    def time_exact(self):
-        to_datetime(self.s2, format="%d%b%y")
-
-    def time_no_exact(self):
-        to_datetime(self.s, format="%d%b%y", exact=False)
-
-    def time_same_offset(self):
-        to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z")
-
-    def time_different_offset(self):
-        to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z")
-
-    def time_same_offset_to_utc(self):
-        to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True)
-
-    def time_different_offset_to_utc(self):
-        to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True)
-
-
-class ToDatetimeCache:
-
-    params = [True, False]
-    param_names = ["cache"]
-
-    def setup(self, cache):
-        N = 10000
-        self.unique_numeric_seconds = list(range(N))
-        self.dup_numeric_seconds = [1000] * N
-        self.dup_string_dates = ["2000-02-11"] * N
-        self.dup_string_with_tz = ["2000-02-11 15:00:00-0800"] * N
-
-    def time_unique_seconds_and_unit(self, cache):
-        to_datetime(self.unique_numeric_seconds, unit="s", cache=cache)
-
-    def time_dup_seconds_and_unit(self, cache):
-        to_datetime(self.dup_numeric_seconds, unit="s", cache=cache)
-
-    def time_dup_string_dates(self, cache):
-        to_datetime(self.dup_string_dates, cache=cache)
-
-    def time_dup_string_dates_and_format(self, cache):
-        to_datetime(self.dup_string_dates, format="%Y-%m-%d", cache=cache)
-
-    def time_dup_string_tzoffset_dates(self, cache):
-        to_datetime(self.dup_string_with_tz, cache=cache)
-
-
 class DatetimeAccessor:
 
     params = [None, "US/Eastern", "UTC", dateutil.tz.tzutc()]
diff --git a/asv_bench/benchmarks/tslibs/normalize.py b/asv_bench/benchmarks/tslibs/normalize.py
index 292f57d7f5c77..f5f7adbf63995 100644
--- a/asv_bench/benchmarks/tslibs/normalize.py
+++ b/asv_bench/benchmarks/tslibs/normalize.py
@@ -14,6 +14,7 @@
 from .tslib import (
     _sizes,
     _tzs,
+    tzlocal_obj,
 )
 
 
@@ -30,6 +31,10 @@ def setup(self, size, tz):
         dti = pd.date_range("2016-01-01", periods=10, tz=tz).repeat(size // 10)
         self.i8data = dti.asi8
 
+        if size == 10 ** 6 and tz is tzlocal_obj:
+            # tzlocal is cumbersomely slow, so skip to keep runtime in check
+            raise NotImplementedError
+
     def time_normalize_i8_timestamps(self, size, tz):
         normalize_i8_timestamps(self.i8data, tz)
 
diff --git a/asv_bench/benchmarks/tslibs/period.py b/asv_bench/benchmarks/tslibs/period.py
index f2efee33c6da7..15a922da7ee76 100644
--- a/asv_bench/benchmarks/tslibs/period.py
+++ b/asv_bench/benchmarks/tslibs/period.py
@@ -15,6 +15,7 @@
 from .tslib import (
     _sizes,
     _tzs,
+    tzlocal_obj,
 )
 
 try:
@@ -129,6 +130,10 @@ class TimeDT64ArrToPeriodArr:
     param_names = ["size", "freq", "tz"]
 
     def setup(self, size, freq, tz):
+        if size == 10 ** 6 and tz is tzlocal_obj:
+            # tzlocal is cumbersomely slow, so skip to keep runtime in check
+            raise NotImplementedError
+
         arr = np.arange(10, dtype="i8").repeat(size // 10)
         self.i8values = arr
 
diff --git a/asv_bench/benchmarks/tslibs/resolution.py b/asv_bench/benchmarks/tslibs/resolution.py
index 0d22ff77ee308..4b52efc188bf4 100644
--- a/asv_bench/benchmarks/tslibs/resolution.py
+++ b/asv_bench/benchmarks/tslibs/resolution.py
@@ -17,40 +17,33 @@
             df.loc[key] = (val.average, val.stdev)
 
 """
-from datetime import (
-    timedelta,
-    timezone,
-)
-
-from dateutil.tz import (
-    gettz,
-    tzlocal,
-)
 import numpy as np
-import pytz
 
 try:
     from pandas._libs.tslibs import get_resolution
 except ImportError:
     from pandas._libs.tslibs.resolution import get_resolution
 
+from .tslib import (
+    _sizes,
+    _tzs,
+    tzlocal_obj,
+)
+
 
 class TimeResolution:
     params = (
         ["D", "h", "m", "s", "us", "ns"],
-        [1, 100, 10 ** 4, 10 ** 6],
-        [
-            None,
-            timezone.utc,
-            timezone(timedelta(minutes=60)),
-            pytz.timezone("US/Pacific"),
-            gettz("Asia/Tokyo"),
-            tzlocal(),
-        ],
+        _sizes,
+        _tzs,
     )
     param_names = ["unit", "size", "tz"]
 
     def setup(self, unit, size, tz):
+        if size == 10 ** 6 and tz is tzlocal_obj:
+            # tzlocal is cumbersomely slow, so skip to keep runtime in check
+            raise NotImplementedError
+
         arr = np.random.randint(0, 10, size=size, dtype="i8")
         arr = arr.view(f"M8[{unit}]").astype("M8[ns]").view("i8")
         self.i8data = arr
diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py
index 86c8d735bdb27..eda9bce89188c 100644
--- a/asv_bench/benchmarks/tslibs/timestamp.py
+++ b/asv_bench/benchmarks/tslibs/timestamp.py
@@ -1,30 +1,11 @@
-from datetime import (
-    datetime,
-    timedelta,
-    timezone,
-)
-
-from dateutil.tz import (
-    gettz,
-    tzlocal,
-    tzutc,
-)
+from datetime import datetime
+
 import numpy as np
 import pytz
 
 from pandas import Timestamp
 
-# One case for each type of tzinfo object that has its own code path
-#  in tzconversion code.
-_tzs = [
-    None,
-    pytz.timezone("Europe/Amsterdam"),
-    gettz("US/Central"),
-    pytz.UTC,
-    tzutc(),
-    timezone(timedelta(minutes=60)),
-    tzlocal(),
-]
+from .tslib import _tzs
 
 
 class TimestampConstruction:
diff --git a/asv_bench/benchmarks/tslibs/tslib.py b/asv_bench/benchmarks/tslibs/tslib.py
index 17beada916e46..180f95e7fbda5 100644
--- a/asv_bench/benchmarks/tslibs/tslib.py
+++ b/asv_bench/benchmarks/tslibs/tslib.py
@@ -32,13 +32,14 @@
 except ImportError:
     from pandas._libs.tslib import ints_to_pydatetime
 
+tzlocal_obj = tzlocal()
 _tzs = [
     None,
     timezone.utc,
     timezone(timedelta(minutes=60)),
     pytz.timezone("US/Pacific"),
     gettz("Asia/Tokyo"),
-    tzlocal(),
+    tzlocal_obj,
 ]
 _sizes = [0, 1, 100, 10 ** 4, 10 ** 6]
 
@@ -53,12 +54,15 @@ class TimeIntsToPydatetime:
     # TODO: fold? freq?
 
     def setup(self, box, size, tz):
+        if box == "date" and tz is not None:
+            # tz is ignored, so avoid running redundant benchmarks
+            raise NotImplementedError  # skip benchmark
+        if size == 10 ** 6 and tz is _tzs[-1]:
+            # This is cumbersomely-slow, so skip to trim runtime
+            raise NotImplementedError  # skip benchmark
+
         arr = np.random.randint(0, 10, size=size, dtype="i8")
         self.i8data = arr
 
     def time_ints_to_pydatetime(self, box, size, tz):
-        if box == "date":
-            # ints_to_pydatetime does not allow non-None tz with date;
-            #  this will mean doing some duplicate benchmarks
-            tz = None
         ints_to_pydatetime(self.i8data, tz, box=box)
diff --git a/asv_bench/benchmarks/tslibs/tz_convert.py b/asv_bench/benchmarks/tslibs/tz_convert.py
index 89b39c1f8919f..793f43e9bbe35 100644
--- a/asv_bench/benchmarks/tslibs/tz_convert.py
+++ b/asv_bench/benchmarks/tslibs/tz_convert.py
@@ -6,6 +6,7 @@
 from .tslib import (
     _sizes,
     _tzs,
+    tzlocal_obj,
 )
 
 try:
@@ -24,6 +25,10 @@ class TimeTZConvert:
     param_names = ["size", "tz"]
 
     def setup(self, size, tz):
+        if size == 10 ** 6 and tz is tzlocal_obj:
+            # tzlocal is cumbersomely slow, so skip to keep runtime in check
+            raise NotImplementedError
+
         arr = np.random.randint(0, 10, size=size, dtype="i8")
         self.i8data = arr
 
@@ -31,9 +36,6 @@ def time_tz_convert_from_utc(self, size, tz):
         # effectively:
         #  dti = DatetimeIndex(self.i8data, tz=tz)
         #  dti.tz_localize(None)
-        if size >= 10 ** 6 and str(tz) == "tzlocal()":
-            # asv fill will because each call takes 8+seconds
-            return
         if old_sig:
             tz_convert_from_utc(self.i8data, UTC, tz)
         else:
diff --git a/conda.recipe/bld.bat b/conda.recipe/bld.bat
deleted file mode 100644
index 284926fae8c04..0000000000000
--- a/conda.recipe/bld.bat
+++ /dev/null
@@ -1,2 +0,0 @@
-@echo off
-%PYTHON% setup.py install
diff --git a/conda.recipe/build.sh b/conda.recipe/build.sh
deleted file mode 100644
index f341bce6fcf96..0000000000000
--- a/conda.recipe/build.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/sh
-$PYTHON setup.py install
diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml
deleted file mode 100644
index 53ee212360475..0000000000000
--- a/conda.recipe/meta.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-package:
-    name: pandas
-    version: {{ environ.get('GIT_DESCRIBE_TAG','').replace('v', '', 1) }}
-
-build:
-    number: {{ environ.get('GIT_DESCRIBE_NUMBER', 0) }}
-    {% if GIT_DESCRIBE_NUMBER|int == 0 %}string: np{{ CONDA_NPY }}py{{ CONDA_PY }}_0
-    {% else %}string: np{{ CONDA_NPY }}py{{ CONDA_PY }}_{{ GIT_BUILD_STR }}{% endif %}
-
-source:
-    git_url: ../
-
-requirements:
-  build:
-    - {{ compiler('c') }}
-    - {{ compiler('cxx') }}
-  host:
-    - python
-    - pip
-    - cython
-    - numpy
-    - setuptools >=38.6.0
-    - python-dateutil >=2.7.3
-    - pytz
-  run:
-    - python {{ python }}
-    - {{ pin_compatible('numpy') }}
-    - python-dateutil >=2.7.3
-    - pytz
-
-test:
-  requires:
-    - pytest
-  commands:
-    - python -c "import pandas; pandas.test()"
-
-
-about:
-  home: https://pandas.pydata.org
-  license: BSD
diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index 291799cfe521d..a9c3d637a41e3 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -326,6 +326,15 @@ top-level :func:`~pandas.read_html` function:
 .. _lxml: https://lxml.de
 .. _tabulate: https://github.com/astanin/python-tabulate
 
+XML
+^^^
+
+========================= ================== =============================================================
+Dependency                Minimum Version    Notes
+========================= ================== =============================================================
+lxml                      4.3.0              XML parser for read_xml and tree builder for to_xml
+========================= ================== =============================================================
+
 SQL databases
 ^^^^^^^^^^^^^
 
diff --git a/doc/source/reference/io.rst b/doc/source/reference/io.rst
index e755ce94812bb..442631de50c7a 100644
--- a/doc/source/reference/io.rst
+++ b/doc/source/reference/io.rst
@@ -68,6 +68,13 @@ HTML
 
    read_html
 
+XML
+~~~~
+.. autosummary::
+   :toctree: api/
+
+   read_xml
+
 HDFStore: PyTables (HDF5)
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index d7c1ca8bca598..7e113c93baabe 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -22,6 +22,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
     text;Fixed-Width Text File;:ref:`read_fwf<io.fwf_reader>`
     text;`JSON <https://www.json.org/>`__;:ref:`read_json<io.json_reader>`;:ref:`to_json<io.json_writer>`
     text;`HTML <https://en.wikipedia.org/wiki/HTML>`__;:ref:`read_html<io.read_html>`;:ref:`to_html<io.html>`
+    text;`XML <https://www.w3.org/standards/xml/core>`__;:ref:`read_xml<io.read_xml>`;:ref:`to_xml<io.xml>`
     text; Local clipboard;:ref:`read_clipboard<io.clipboard>`;:ref:`to_clipboard<io.clipboard>`
     binary;`MS Excel <https://en.wikipedia.org/wiki/Microsoft_Excel>`__;:ref:`read_excel<io.excel_reader>`;:ref:`to_excel<io.excel_writer>`
     binary;`OpenDocument <http://www.opendocumentformat.org>`__;:ref:`read_excel<io.ods>`;
@@ -2831,6 +2832,461 @@ parse HTML tables in the top-level pandas io function ``read_html``.
 
 
 
+XML
+---
+
+.. _io.read_xml:
+
+Reading XML
+'''''''''''
+
+.. versionadded:: 1.3.0
+
+The top-level :func:`~pandas.io.xml.read_xml` function can accept an XML
+string/file/URL and will parse nodes and attributes into a pandas ``DataFrame``.
+
+.. note::
+
+   Since there is no standard XML structure where design types can vary in
+   many ways, ``read_xml`` works best with flatter, shallow versions. If
+   an XML document is deeply nested, use the ``stylesheet`` feature to
+   transform XML into a flatter version.
+
+Let's look at a few examples.
+
+Read an XML string:
+
+.. ipython:: python
+
+   xml = """<?xml version="1.0" encoding="UTF-8"?>
+   <bookstore>
+     <book category="cooking">
+       <title lang="en">Everyday Italian</title>
+       <author>Giada De Laurentiis</author>
+       <year>2005</year>
+       <price>30.00</price>
+     </book>
+     <book category="children">
+       <title lang="en">Harry Potter</title>
+       <author>J K. Rowling</author>
+       <year>2005</year>
+       <price>29.99</price>
+     </book>
+     <book category="web">
+       <title lang="en">Learning XML</title>
+       <author>Erik T. Ray</author>
+       <year>2003</year>
+       <price>39.95</price>
+     </book>
+   </bookstore>"""
+
+   df = pd.read_xml(xml)
+   df
+
+Read a URL with no options:
+
+.. ipython:: python
+
+   df = pd.read_xml("https://www.w3schools.com/xml/books.xml")
+   df
+
+Read in the content of the "books.xml" file and pass it to ``read_xml``
+as a string:
+
+.. ipython:: python
+   :suppress:
+
+   rel_path = os.path.join("..", "pandas", "tests", "io", "data", "xml",
+                           "books.xml")
+   file_path = os.path.abspath(rel_path)
+
+.. ipython:: python
+
+   with open(file_path, "r") as f:
+       df = pd.read_xml(f.read())
+   df
+
+Read in the content of the "books.xml" as instance of ``StringIO`` or
+``BytesIO`` and pass it to ``read_xml``:
+
+.. ipython:: python
+
+   with open(file_path, "r") as f:
+       sio = StringIO(f.read())
+
+   df = pd.read_xml(sio)
+   df
+
+.. ipython:: python
+
+   with open(file_path, "rb") as f:
+       bio = BytesIO(f.read())
+
+   df = pd.read_xml(bio)
+   df
+
+Even read XML from AWS S3 buckets such as Python Software Foundation's IRS 990 Form:
+
+.. ipython:: python
+
+   df = pd.read_xml(
+       "s3://irs-form-990/201923199349319487_public.xml",
+       xpath=".//irs:Form990PartVIISectionAGrp",
+       namespaces={"irs": "http://www.irs.gov/efile"}
+   )
+   df
+
+With `lxml`_ as default ``parser``, you access the full-featured XML library
+that extends Python's ElementTree API. One powerful tool is ability to query
+nodes selectively or conditionally with more expressive XPath:
+
+.. _lxml: https://lxml.de
+
+.. ipython:: python
+
+   df = pd.read_xml(file_path, xpath="//book[year=2005]")
+   df
+
+Specify only elements or only attributes to parse:
+
+.. ipython:: python
+
+   df = pd.read_xml(file_path, elems_only=True)
+   df
+
+.. ipython:: python
+
+   df = pd.read_xml(file_path, attrs_only=True)
+   df
+
+XML documents can have namespaces with prefixes and default namespaces without
+prefixes both of which are denoted with a special attribute ``xmlns``. In order
+to parse by node under a namespace context, ``xpath`` must reference a prefix.
+
+For example, below XML contains a namespace with prefix, ``doc``, and URI at
+``https://example.com``. In order to parse ``doc:row`` nodes,
+``namespaces`` must be used.
+
+.. ipython:: python
+
+   xml = """<?xml version='1.0' encoding='utf-8'?>
+   <doc:data xmlns:doc="https://example.com">
+     <doc:row>
+       <doc:shape>square</doc:shape>
+       <doc:degrees>360</doc:degrees>
+       <doc:sides>4.0</doc:sides>
+     </doc:row>
+     <doc:row>
+       <doc:shape>circle</doc:shape>
+       <doc:degrees>360</doc:degrees>
+       <doc:sides/>
+     </doc:row>
+     <doc:row>
+       <doc:shape>triangle</doc:shape>
+       <doc:degrees>180</doc:degrees>
+       <doc:sides>3.0</doc:sides>
+     </doc:row>
+   </doc:data>"""
+
+   df = pd.read_xml(xml,
+                    xpath="//doc:row",
+                    namespaces={"doc": "https://example.com"})
+   df
+
+Similarly, an XML document can have a default namespace without prefix. Failing
+to assign a temporary prefix will return no nodes and raise a ``ValueError``.
+But assiging *any* temporary name to correct URI allows parsing by nodes.
+
+.. ipython:: python
+
+   xml = """<?xml version='1.0' encoding='utf-8'?>
+   <data xmlns="https://example.com">
+    <row>
+      <shape>square</shape>
+      <degrees>360</degrees>
+      <sides>4.0</sides>
+    </row>
+    <row>
+      <shape>circle</shape>
+      <degrees>360</degrees>
+      <sides/>
+    </row>
+    <row>
+      <shape>triangle</shape>
+      <degrees>180</degrees>
+      <sides>3.0</sides>
+    </row>
+   </data>"""
+
+   df = pd.read_xml(xml,
+                    xpath="//pandas:row",
+                    namespaces={"pandas": "https://example.com"})
+   df
+
+However, if XPath does not reference node names such as default, ``/*``, then
+``namespaces`` is not required.
+
+With `lxml`_ as parser, you can flatten nested XML documents with an XSLT
+script which also can be string/file/URL types. As background, `XSLT`_ is
+a special-purpose language written in a special XML file that can transform
+original XML documents into other XML, HTML, even text (CSV, JSON, etc.)
+using an XSLT processor.
+
+.. _lxml: https://lxml.de
+.. _XSLT: https://www.w3.org/TR/xslt/
+
+For example, consider this somewhat nested structure of Chicago "L" Rides
+where station and rides elements encapsulate data in their own sections.
+With below XSLT, ``lxml`` can transform original nested document into a flatter
+output (as shown below for demonstration) for easier parse into ``DataFrame``:
+
+.. ipython:: python
+
+   xml = """<?xml version='1.0' encoding='utf-8'?>
+    <response>
+     <row>
+       <station id="40850" name="Library"/>
+       <month>2020-09-01T00:00:00</month>
+       <rides>
+         <avg_weekday_rides>864.2</avg_weekday_rides>
+         <avg_saturday_rides>534</avg_saturday_rides>
+         <avg_sunday_holiday_rides>417.2</avg_sunday_holiday_rides>
+       </rides>
+     </row>
+     <row>
+       <station id="41700" name="Washington/Wabash"/>
+       <month>2020-09-01T00:00:00</month>
+       <rides>
+         <avg_weekday_rides>2707.4</avg_weekday_rides>
+         <avg_saturday_rides>1909.8</avg_saturday_rides>
+         <avg_sunday_holiday_rides>1438.6</avg_sunday_holiday_rides>
+       </rides>
+     </row>
+     <row>
+       <station id="40380" name="Clark/Lake"/>
+       <month>2020-09-01T00:00:00</month>
+       <rides>
+         <avg_weekday_rides>2949.6</avg_weekday_rides>
+         <avg_saturday_rides>1657</avg_saturday_rides>
+         <avg_sunday_holiday_rides>1453.8</avg_sunday_holiday_rides>
+       </rides>
+     </row>
+    </response>"""
+
+   xsl = """<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+      <xsl:output method="xml" omit-xml-declaration="no" indent="yes"/>
+      <xsl:strip-space elements="*"/>
+      <xsl:template match="/response">
+         <xsl:copy>
+           <xsl:apply-templates select="row"/>
+         </xsl:copy>
+      </xsl:template>
+      <xsl:template match="row">
+         <xsl:copy>
+           <station_id><xsl:value-of select="station/@id"/></station_id>
+           <station_name><xsl:value-of select="station/@name"/></station_name>
+           <xsl:copy-of select="month|rides/*"/>
+         </xsl:copy>
+      </xsl:template>
+    </xsl:stylesheet>"""
+
+   output = """<?xml version='1.0' encoding='utf-8'?>
+    <response>
+      <row>
+         <station_id>40850</station_id>
+         <station_name>Library</station_name>
+         <month>2020-09-01T00:00:00</month>
+         <avg_weekday_rides>864.2</avg_weekday_rides>
+         <avg_saturday_rides>534</avg_saturday_rides>
+         <avg_sunday_holiday_rides>417.2</avg_sunday_holiday_rides>
+      </row>
+      <row>
+         <station_id>41700</station_id>
+         <station_name>Washington/Wabash</station_name>
+         <month>2020-09-01T00:00:00</month>
+         <avg_weekday_rides>2707.4</avg_weekday_rides>
+         <avg_saturday_rides>1909.8</avg_saturday_rides>
+         <avg_sunday_holiday_rides>1438.6</avg_sunday_holiday_rides>
+      </row>
+      <row>
+         <station_id>40380</station_id>
+         <station_name>Clark/Lake</station_name>
+         <month>2020-09-01T00:00:00</month>
+         <avg_weekday_rides>2949.6</avg_weekday_rides>
+         <avg_saturday_rides>1657</avg_saturday_rides>
+         <avg_sunday_holiday_rides>1453.8</avg_sunday_holiday_rides>
+      </row>
+    </response>"""
+
+   df = pd.read_xml(xml, stylesheet=xsl)
+   df
+
+
+.. _io.xml:
+
+Writing XML
+'''''''''''
+
+.. versionadded:: 1.3.0
+
+``DataFrame`` objects have an instance method ``to_xml`` which renders the
+contents of the ``DataFrame`` as an XML document.
+
+.. note::
+
+   This method does not support special properties of XML including DTD,
+   CData, XSD schemas, processing instructions, comments, and others.
+   Only namespaces at the root level is supported. However, ``stylesheet``
+   allows design changes after initial output.
+
+Let's look at a few examples.
+
+Write an XML without options:
+
+.. ipython:: python
+
+   geom_df = pd.DataFrame(
+       {
+           "shape": ["square", "circle", "triangle"],
+           "degrees": [360, 360, 180],
+           "sides": [4, np.nan, 3],
+       }
+   )
+
+   print(geom_df.to_xml())
+
+
+Write an XML with new root and row name:
+
+.. ipython:: python
+
+   print(geom_df.to_xml(root_name="geometry", row_name="objects"))
+
+Write an attribute-centric XML:
+
+.. ipython:: python
+
+   print(geom_df.to_xml(attr_cols=geom_df.columns.tolist()))
+
+Write a mix of elements and attributes:
+
+.. ipython:: python
+
+   print(
+       geom_df.to_xml(
+           index=False,
+           attr_cols=['shape'],
+           elem_cols=['degrees', 'sides'])
+   )
+
+Any ``DataFrames`` with hierarchical columns will be flattened for XML element names
+with levels delimited by underscores:
+
+.. ipython:: python
+
+   ext_geom_df = pd.DataFrame(
+       {
+           "type": ["polygon", "other", "polygon"],
+           "shape": ["square", "circle", "triangle"],
+           "degrees": [360, 360, 180],
+           "sides": [4, np.nan, 3],
+       }
+   )
+
+   pvt_df = ext_geom_df.pivot_table(index='shape',
+                                    columns='type',
+                                    values=['degrees', 'sides'],
+                                    aggfunc='sum')
+   pvt_df
+
+   print(pvt_df.to_xml())
+
+Write an XML with default namespace:
+
+.. ipython:: python
+
+   print(geom_df.to_xml(namespaces={"": "https://example.com"}))
+
+Write an XML with namespace prefix:
+
+.. ipython:: python
+
+   print(
+       geom_df.to_xml(namespaces={"doc": "https://example.com"},
+                      prefix="doc")
+   )
+
+Write an XML without declaration or pretty print:
+
+.. ipython:: python
+
+   print(
+       geom_df.to_xml(xml_declaration=False,
+                      pretty_print=False)
+   )
+
+Write an XML and transform with stylesheet:
+
+.. ipython:: python
+
+   xsl = """<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+      <xsl:output method="xml" omit-xml-declaration="no" indent="yes"/>
+      <xsl:strip-space elements="*"/>
+      <xsl:template match="/data">
+        <geometry>
+          <xsl:apply-templates select="row"/>
+        </geometry>
+      </xsl:template>
+      <xsl:template match="row">
+        <object index="{index}">
+          <xsl:if test="shape!='circle'">
+              <xsl:attribute name="type">polygon</xsl:attribute>
+          </xsl:if>
+          <xsl:copy-of select="shape"/>
+          <property>
+              <xsl:copy-of select="degrees|sides"/>
+          </property>
+        </object>
+      </xsl:template>
+    </xsl:stylesheet>"""
+
+   print(geom_df.to_xml(stylesheet=xsl))
+
+
+XML Final Notes
+'''''''''''''''
+
+* All XML documents adhere to `W3C specifications`_. Both ``etree`` and ``lxml``
+  parsers will fail to parse any markup document that is not well-formed or
+  follows XML syntax rules. Do be aware HTML is not an XML document unless it
+  follows XHTML specs. However, other popular markup types including KML, XAML,
+  RSS, MusicML, MathML are compliant `XML schemas`_.
+
+* For above reason, if your application builds XML prior to pandas operations,
+  use appropriate DOM libraries like ``etree`` and ``lxml`` to build the necessary
+  document and not by string concatenation or regex adjustments. Always remember
+  XML is a *special* text file with markup rules.
+
+* With very large XML files (several hundred MBs to GBs), XPath and XSLT
+  can become memory-intensive operations. Be sure to have enough available
+  RAM for reading and writing to large XML files (roughly about 5 times the
+  size of text).
+
+* Because XSLT is a programming language, use it with caution since such scripts
+  can pose a security risk in your environment and can run large or infinite
+  recursive operations. Always test scripts on small fragments before full run.
+
+* The `etree`_ parser supports all functionality of both ``read_xml`` and
+  ``to_xml`` except for complex XPath and any XSLT. Though limited in features,
+  ``etree`` is still a reliable and capable parser and tree builder. Its
+  performance may trail ``lxml`` to a certain degree for larger files but
+  relatively unnoticeable on small to medium size files.
+
+.. _`W3C specifications`: https://www.w3.org/TR/xml/
+.. _`XML schemas`: https://en.wikipedia.org/wiki/List_of_types_of_XML_schemas
+.. _`etree`: https://docs.python.org/3/library/xml.etree.elementtree.html
+
+
 
 .. _io.excel:
 
diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst
index 71c9b0613b3ce..8697182f5ca6f 100644
--- a/doc/source/whatsnew/index.rst
+++ b/doc/source/whatsnew/index.rst
@@ -24,6 +24,7 @@ Version 1.2
 .. toctree::
    :maxdepth: 2
 
+   v1.2.4
    v1.2.3
    v1.2.2
    v1.2.1
diff --git a/doc/source/whatsnew/v1.2.3.rst b/doc/source/whatsnew/v1.2.3.rst
index 28fc83459b69d..dec2d061504b4 100644
--- a/doc/source/whatsnew/v1.2.3.rst
+++ b/doc/source/whatsnew/v1.2.3.rst
@@ -1,6 +1,6 @@
 .. _whatsnew_123:
 
-What's new in 1.2.3 (March ??, 2021)
+What's new in 1.2.3 (March 02, 2021)
 ------------------------------------
 
 These are the changes in pandas 1.2.3. See :ref:`release` for a full changelog
@@ -19,27 +19,8 @@ Fixed regressions
 - Fixed regression in nullable integer unary ops propagating mask on assignment (:issue:`39943`)
 - Fixed regression in :meth:`DataFrame.__setitem__` not aligning :class:`DataFrame` on right-hand side for boolean indexer (:issue:`39931`)
 - Fixed regression in :meth:`~DataFrame.to_json` failing to use ``compression`` with URL-like paths that are internally opened in binary mode or with user-provided file objects that are opened in binary mode (:issue:`39985`)
--
-
-.. ---------------------------------------------------------------------------
-
-.. _whatsnew_123.bug_fixes:
-
-Bug fixes
-~~~~~~~~~
-
--
--
-
-.. ---------------------------------------------------------------------------
-
-.. _whatsnew_123.other:
-
-Other
-~~~~~
-
--
--
+- Fixed regression in :meth:`Series.sort_index` and :meth:`DataFrame.sort_index`, which exited with an ungraceful error when having kwarg ``ascending=None`` passed. Passing ``ascending=None`` is still considered invalid, and the improved error message suggests a proper usage (``ascending`` must be a boolean or a list-like of boolean) (:issue:`39434`)
+- Fixed regression in :meth:`DataFrame.transform` and :meth:`Series.transform` giving incorrect column labels when passed a dictionary with a mix of list and non-list values (:issue:`40018`)
 
 .. ---------------------------------------------------------------------------
 
@@ -48,4 +29,4 @@ Other
 Contributors
 ~~~~~~~~~~~~
 
-.. contributors:: v1.2.2..v1.2.3|HEAD
+.. contributors:: v1.2.2..v1.2.3
diff --git a/doc/source/whatsnew/v1.2.4.rst b/doc/source/whatsnew/v1.2.4.rst
new file mode 100644
index 0000000000000..790ff4c78cad6
--- /dev/null
+++ b/doc/source/whatsnew/v1.2.4.rst
@@ -0,0 +1,48 @@
+.. _whatsnew_124:
+
+What's new in 1.2.4 (April ??, 2021)
+---------------------------------------
+
+These are the changes in pandas 1.2.4. See :ref:`release` for a full changelog
+including other versions of pandas.
+
+{{ header }}
+
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_124.regressions:
+
+Fixed regressions
+~~~~~~~~~~~~~~~~~
+
+-
+-
+
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_124.bug_fixes:
+
+Bug fixes
+~~~~~~~~~
+
+-
+-
+
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_124.other:
+
+Other
+~~~~~
+
+-
+-
+
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_124.contributors:
+
+Contributors
+~~~~~~~~~~~~
+
+.. contributors:: v1.2.3..v1.2.4|HEAD
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 8deeb3cfae1d3..9bb9f0c7a467a 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -43,6 +43,73 @@ For example:
         storage_options=headers
     )
 
+.. _whatsnew_130.read_to_xml:
+
+Read and write XML documents
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+We added I/O support to read and render shallow versions of `XML`_ documents with
+:func:`pandas.read_xml` and :meth:`DataFrame.to_xml`. Using `lxml`_ as parser,
+both XPath 1.0 and XSLT 1.0 is available. (:issue:`27554`)
+
+.. _XML: https://www.w3.org/standards/xml/core
+.. _lxml: https://lxml.de
+
+.. code-block:: ipython
+
+    In [1]: xml = """<?xml version='1.0' encoding='utf-8'?>
+       ...: <data>
+       ...:  <row>
+       ...:     <shape>square</shape>
+       ...:     <degrees>360</degrees>
+       ...:     <sides>4.0</sides>
+       ...:  </row>
+       ...:  <row>
+       ...:     <shape>circle</shape>
+       ...:     <degrees>360</degrees>
+       ...:     <sides/>
+       ...:  </row>
+       ...:  <row>
+       ...:     <shape>triangle</shape>
+       ...:     <degrees>180</degrees>
+       ...:     <sides>3.0</sides>
+       ...:  </row>
+       ...:  </data>"""
+
+    In [2]: df = pd.read_xml(xml)
+    In [3]: df
+    Out[3]:
+          shape  degrees  sides
+    0    square      360    4.0
+    1    circle      360    NaN
+    2  triangle      180    3.0
+
+    In [4]: df.to_xml()
+    Out[4]:
+    <?xml version='1.0' encoding='utf-8'?>
+    <data>
+      <row>
+        <index>0</index>
+        <shape>square</shape>
+        <degrees>360</degrees>
+        <sides>4.0</sides>
+      </row>
+      <row>
+        <index>1</index>
+        <shape>circle</shape>
+        <degrees>360</degrees>
+        <sides/>
+      </row>
+      <row>
+        <index>2</index>
+        <shape>triangle</shape>
+        <degrees>180</degrees>
+        <sides>3.0</sides>
+      </row>
+    </data>
+
+For more, see :ref:`io.xml` in the user guide on IO tools.
+
 .. _whatsnew_130.enhancements.other:
 
 Other enhancements
@@ -61,6 +128,7 @@ Other enhancements
 - :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`)
 - :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
 - :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
+- Disallow :class:`DataFrame` indexer for ``iloc`` for :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__`, (:issue:`39004`)
 - :meth:`Series.apply` can now accept list-like or dictionary-like arguments that aren't lists or dictionaries, e.g. ``ser.apply(np.array(["sum", "mean"]))``, which was already the case for :meth:`DataFrame.apply` (:issue:`39140`)
 - :meth:`DataFrame.plot.scatter` can now accept a categorical column as the argument to ``c`` (:issue:`12380`, :issue:`31357`)
 - :meth:`.Styler.set_tooltips` allows on hover tooltips to be added to styled HTML dataframes (:issue:`35643`, :issue:`21266`, :issue:`39317`)
@@ -70,6 +138,7 @@ Other enhancements
 - :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`)
 - :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files.
 - Add support for parsing ``ISO 8601``-like timestamps with negative signs to :meth:`pandas.Timedelta` (:issue:`37172`)
+- Add support for unary operators in :class:`FloatingArray` (:issue:`38749`)
 
 .. ---------------------------------------------------------------------------
 
@@ -250,6 +319,7 @@ Deprecations
 - Deprecated comparison of :class:`Timestamp` object with ``datetime.date`` objects.  Instead of e.g. ``ts <= mydate`` use ``ts <= pd.Timestamp(mydate)`` or ``ts.date() <= mydate`` (:issue:`36131`)
 - Deprecated :attr:`Rolling.win_type` returning ``"freq"`` (:issue:`38963`)
 - Deprecated :attr:`Rolling.is_datetimelike` (:issue:`38963`)
+- Deprecated :class:`DataFrame` indexer for :meth:`Series.__setitem__` and :meth:`DataFrame.__setitem__` (:issue:`39004`)
 - Deprecated :meth:`core.window.ewm.ExponentialMovingWindow.vol` (:issue:`39220`)
 - Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`)
 - Deprecated casting ``datetime.date`` objects to ``datetime64`` when used as ``fill_value`` in :meth:`DataFrame.unstack`, :meth:`DataFrame.shift`, :meth:`Series.shift`, and :meth:`DataFrame.reindex`, pass ``pd.Timestamp(dateobj)`` instead (:issue:`39767`)
@@ -270,6 +340,8 @@ Performance improvements
 - Performance improvement in :func:`unique` for object data type (:issue:`37615`)
 - Performance improvement in :class:`core.window.rolling.ExpandingGroupby` aggregation methods (:issue:`39664`)
 - Performance improvement in :class:`Styler` where render times are more than 50% reduced (:issue:`39972` :issue:`39952`)
+- Performance improvement in :meth:`core.window.ewm.ExponentialMovingWindow.mean` with ``times`` (:issue:`39784`)
+- Performance improvement in :meth:`.GroupBy.apply` when requiring the python fallback implementation (:issue:`40176`)
 
 .. ---------------------------------------------------------------------------
 
@@ -332,7 +404,8 @@ Conversion
 ^^^^^^^^^^
 - Bug in :meth:`Series.to_dict` with ``orient='records'`` now returns python native types (:issue:`25969`)
 - Bug in :meth:`Series.view` and :meth:`Index.view` when converting between datetime-like (``datetime64[ns]``, ``datetime64[ns, tz]``, ``timedelta64``, ``period``) dtypes (:issue:`39788`)
--
+- Bug in creating a :class:`DataFrame` from an empty ``np.recarray`` not retaining the original dtypes (:issue:`40121`)
+- Bug in :class:`DataFrame` failing to raise ``TypeError`` when constructing from a ``frozenset`` (:issue:`40163`)
 -
 
 Strings
@@ -373,7 +446,7 @@ Indexing
 - Bug in :meth:`RangeIndex.append` where a single object of length 1 was concatenated incorrectly (:issue:`39401`)
 - Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`)
 - Bug in setting numeric values into a into a boolean-dtypes :class:`Series` using ``at`` or ``iat`` failing to cast to object-dtype (:issue:`39582`)
--
+- Bug in :meth:`DataFrame.loc.__setitem__` when setting-with-expansion incorrectly raising when the index in the expanding axis contains duplicates (:issue:`40096`)
 
 Missing
 ^^^^^^^
@@ -446,7 +519,9 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupBy.apply` where a :class:`MultiIndex` would be created instead of an :class:`Index` if a :class:`:meth:`core.window.rolling.RollingGroupby` object was created (:issue:`39732`)
 - Bug in :meth:`DataFrameGroupBy.sample` where error was raised when ``weights`` was specified and the index was an :class:`Int64Index` (:issue:`39927`)
 - Bug in :meth:`DataFrameGroupBy.aggregate` and :meth:`.Resampler.aggregate` would sometimes raise ``SpecificationError`` when passed a dictionary and columns were missing; will now always raise a ``KeyError`` instead (:issue:`40004`)
--
+- Bug in :meth:`DataFrameGroupBy.sample` where column selection was not applied to sample result (:issue:`39928`)
+- Bug in :class:`core.window.ewm.ExponentialMovingWindow` when calling ``__getitem__`` would incorrectly raise a ``ValueError`` when providing ``times`` (:issue:`40164`)
+- Bug in :class:`core.window.ewm.ExponentialMovingWindow` when calling ``__getitem__`` would not retain ``com``, ``span``, ``alpha`` or ``halflife`` attributes  (:issue:`40164`)
 
 Reshaping
 ^^^^^^^^^
@@ -492,6 +567,7 @@ Other
 - :class:`Styler` rendered HTML output minor alterations to support w3 good code standard (:issue:`39626`)
 - Bug in :class:`Styler` where rendered HTML was missing a column class identifier for certain header cells (:issue:`39716`)
 - Bug in :meth:`Styler.background_gradient` where text-color was not determined correctly (:issue:`39888`)
+- Bug in :class:`Styler` where multiple elements in CSS-selectors were not correctly added to ``table_styles`` (:issue:`39942`)
 - Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`)
 - Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`)
 
diff --git a/environment.yml b/environment.yml
index 113780ed0264a..f54bf41c14c75 100644
--- a/environment.yml
+++ b/environment.yml
@@ -23,7 +23,7 @@ dependencies:
   - flake8
   - flake8-comprehensions>=3.1.0  # used by flake8, linting of unnecessary comprehensions
   - isort>=5.2.1  # check that imports are in the right order
-  - mypy=0.800
+  - mypy=0.812
   - pre-commit>=2.9.2
   - pycodestyle  # used by flake8
   - pyupgrade
diff --git a/pandas/__init__.py b/pandas/__init__.py
index cc4c99efc4345..7cad3eded0585 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -167,6 +167,7 @@
     read_feather,
     read_gbq,
     read_html,
+    read_xml,
     read_json,
     read_stata,
     read_sas,
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 43bf6d9dd1fee..40e82798c0753 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -497,8 +497,9 @@ def _group_add(complexfloating_t[:, :] out,
         raise ValueError("len(index) != len(labels)")
 
     nobs = np.zeros((<object>out).shape, dtype=np.int64)
-    sumx = np.zeros_like(out)
-    compensation = np.zeros_like(out)
+    # the below is equivalent to `np.zeros_like(out)` but faster
+    sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
+    compensation = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
 
     N, K = (<object>values).shape
 
@@ -555,7 +556,7 @@ def _group_prod(floating[:, :] out,
         raise ValueError("len(index) != len(labels)")
 
     nobs = np.zeros((<object>out).shape, dtype=np.int64)
-    prodx = np.ones_like(out)
+    prodx = np.ones((<object>out).shape, dtype=(<object>out).base.dtype)
 
     N, K = (<object>values).shape
 
@@ -608,7 +609,7 @@ def _group_var(floating[:, :] out,
         raise ValueError("len(index) != len(labels)")
 
     nobs = np.zeros((<object>out).shape, dtype=np.int64)
-    mean = np.zeros_like(out)
+    mean = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
 
     N, K = (<object>values).shape
 
@@ -665,8 +666,9 @@ def _group_mean(floating[:, :] out,
         raise ValueError("len(index) != len(labels)")
 
     nobs = np.zeros((<object>out).shape, dtype=np.int64)
-    sumx = np.zeros_like(out)
-    compensation = np.zeros_like(out)
+    # the below is equivalent to `np.zeros_like(out)` but faster
+    sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
+    compensation = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
 
     N, K = (<object>values).shape
 
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index d2aa47f65d263..4e04425436af4 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1562,7 +1562,7 @@ def infer_datetimelike_array(arr: ndarray[object]) -> str:
                 seen_tz_aware = True
 
             if seen_tz_naive and seen_tz_aware:
-                return 'mixed'
+                return "mixed"
         elif util.is_datetime64_object(v):
             # np.datetime64
             seen_datetime = True
@@ -2250,7 +2250,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
                 break
         elif is_timedelta(val):
             if convert_timedelta:
-                itimedeltas[i] = convert_to_timedelta64(val, 'ns')
+                itimedeltas[i] = convert_to_timedelta64(val, "ns").view("i8")
                 seen.timedelta_ = True
             else:
                 seen.object_ = True
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 605e2135edc9f..337e131f0a2c9 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -376,7 +376,8 @@ cpdef array_to_datetime(
     bint dayfirst=False,
     bint yearfirst=False,
     bint utc=False,
-    bint require_iso8601=False
+    bint require_iso8601=False,
+    bint allow_mixed=False,
 ):
     """
     Converts a 1D array of date-like values to a numpy array of either:
@@ -405,6 +406,8 @@ cpdef array_to_datetime(
          indicator whether the dates should be UTC
     require_iso8601 : bool, default False
          indicator whether the datetime string should be iso8601
+    allow_mixed : bool, default False
+        Whether to allow mixed datetimes and integers.
 
     Returns
     -------
@@ -597,7 +600,7 @@ cpdef array_to_datetime(
         return ignore_errors_out_of_bounds_fallback(values), tz_out
 
     except TypeError:
-        return array_to_datetime_object(values, errors, dayfirst, yearfirst)
+        return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
 
     if seen_datetime and seen_integer:
         # we have mixed datetimes & integers
@@ -609,10 +612,12 @@ cpdef array_to_datetime(
                 val = values[i]
                 if is_integer_object(val) or is_float_object(val):
                     result[i] = NPY_NAT
+        elif allow_mixed:
+            pass
         elif is_raise:
             raise ValueError("mixed datetimes and integers in passed array")
         else:
-            return array_to_datetime_object(values, errors, dayfirst, yearfirst)
+            return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
 
     if seen_datetime_offset and not utc_convert:
         # GH#17697
@@ -623,7 +628,7 @@ cpdef array_to_datetime(
         #    (with individual dateutil.tzoffsets) are returned
         is_same_offsets = len(out_tzoffset_vals) == 1
         if not is_same_offsets:
-            return array_to_datetime_object(values, errors, dayfirst, yearfirst)
+            return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
         else:
             tz_offset = out_tzoffset_vals.pop()
             tz_out = pytz.FixedOffset(tz_offset / 60.)
@@ -670,7 +675,7 @@ cdef ignore_errors_out_of_bounds_fallback(ndarray[object] values):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-cdef array_to_datetime_object(
+cdef _array_to_datetime_object(
     ndarray[object] values,
     str errors,
     bint dayfirst=False,
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index 2d4704ad3bda6..4e6e5485b2ade 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -3579,7 +3579,7 @@ cpdef to_offset(freq):
         stride_sign = None
 
         try:
-            split = re.split(opattern, freq)
+            split = opattern.split(freq)
             if split[-1] != "" and not split[-1].isspace():
                 # the last element must be blank
                 raise ValueError("last element must be blank")
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 76a5b6cc9de12..3cdb654642b9c 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -178,11 +178,15 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1:
     if is_integer_object(delta):
         return delta
     if PyDelta_Check(delta):
-        return (
-            delta.days * 24 * 60 * 60 * 1_000_000
-            + delta.seconds * 1_000_000
-            + delta.microseconds
-        ) * 1000
+        try:
+            return (
+                delta.days * 24 * 60 * 60 * 1_000_000
+                + delta.seconds * 1_000_000
+                + delta.microseconds
+            ) * 1000
+        except OverflowError as err:
+            from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta
+            raise OutOfBoundsTimedelta(*err.args) from err
 
     raise TypeError(type(delta))
 
@@ -246,7 +250,7 @@ cdef object ensure_td64ns(object ts):
             td64_value = td64_value * mult
         except OverflowError as err:
             from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta
-            raise OutOfBoundsTimedelta(ts)
+            raise OutOfBoundsTimedelta(ts) from err
 
         return np.timedelta64(td64_value, "ns")
 
diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
index 5a95b0ec4e08a..efacfad40ef82 100644
--- a/pandas/_libs/window/aggregations.pyx
+++ b/pandas/_libs/window/aggregations.pyx
@@ -116,9 +116,10 @@ cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
 def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
              ndarray[int64_t] end, int64_t minp):
     cdef:
+        Py_ssize_t i, j
         float64_t sum_x = 0, compensation_add = 0, compensation_remove = 0
         int64_t s, e
-        int64_t nobs = 0, i, j, N = len(values)
+        int64_t nobs = 0, N = len(values)
         ndarray[float64_t] output
         bint is_monotonic_increasing_bounds
 
@@ -493,12 +494,13 @@ cdef inline void remove_skew(float64_t val, int64_t *nobs,
 def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
               ndarray[int64_t] end, int64_t minp):
     cdef:
+        Py_ssize_t i, j
         float64_t val, prev, min_val, mean_val, sum_val = 0
         float64_t compensation_xxx_add = 0, compensation_xxx_remove = 0
         float64_t compensation_xx_add = 0, compensation_xx_remove = 0
         float64_t compensation_x_add = 0, compensation_x_remove = 0
         float64_t x = 0, xx = 0, xxx = 0
-        int64_t nobs = 0, i, j, N = len(values), nobs_mean = 0
+        int64_t nobs = 0, N = len(values), nobs_mean = 0
         int64_t s, e
         ndarray[float64_t] output, mean_array, values_copy
         bint is_monotonic_increasing_bounds
@@ -674,13 +676,14 @@ cdef inline void remove_kurt(float64_t val, int64_t *nobs,
 def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
               ndarray[int64_t] end, int64_t minp):
     cdef:
+        Py_ssize_t i, j
         float64_t val, prev, mean_val, min_val, sum_val = 0
         float64_t compensation_xxxx_add = 0, compensation_xxxx_remove = 0
         float64_t compensation_xxx_remove = 0, compensation_xxx_add = 0
         float64_t compensation_xx_remove = 0, compensation_xx_add = 0
         float64_t compensation_x_remove = 0, compensation_x_add = 0
         float64_t x = 0, xx = 0, xxx = 0, xxxx = 0
-        int64_t nobs = 0, i, j, s, e, N = len(values), nobs_mean = 0
+        int64_t nobs = 0, s, e, N = len(values), nobs_mean = 0
         ndarray[float64_t] output, values_copy
         bint is_monotonic_increasing_bounds
 
@@ -754,15 +757,13 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
 def roll_median_c(const float64_t[:] values, ndarray[int64_t] start,
                   ndarray[int64_t] end, int64_t minp):
     cdef:
-        float64_t val, res, prev
-        bint err = False
-        int ret = 0
-        skiplist_t *sl
         Py_ssize_t i, j
+        bint err = False, is_monotonic_increasing_bounds
+        int midpoint, ret = 0
         int64_t nobs = 0, N = len(values), s, e, win
-        int midpoint
+        float64_t val, res, prev
+        skiplist_t *sl
         ndarray[float64_t] output
-        bint is_monotonic_increasing_bounds
 
     is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
         start, end
@@ -933,8 +934,8 @@ cdef _roll_min_max(ndarray[numeric] values,
                    bint is_max):
     cdef:
         numeric ai
-        int64_t i, k, curr_win_size, start
-        Py_ssize_t nobs = 0, N = len(values)
+        int64_t curr_win_size, start
+        Py_ssize_t i, k, nobs = 0, N = len(values)
         deque Q[int64_t]  # min/max always the front
         deque W[int64_t]  # track the whole window for nobs compute
         ndarray[float64_t, ndim=1] output
@@ -1017,14 +1018,14 @@ def roll_quantile(const float64_t[:] values, ndarray[int64_t] start,
     O(N log(window)) implementation using skip list
     """
     cdef:
+        Py_ssize_t i, j, s, e, N = len(values), idx
+        int ret = 0
+        int64_t nobs = 0, win
         float64_t val, prev, midpoint, idx_with_fraction
-        skiplist_t *skiplist
-        int64_t nobs = 0, i, j, s, e, N = len(values), win
-        Py_ssize_t idx
-        ndarray[float64_t] output
         float64_t vlow, vhigh
+        skiplist_t *skiplist
         InterpolationType interpolation_type
-        int ret = 0
+        ndarray[float64_t] output
 
     if quantile <= 0.0 or quantile >= 1.0:
         raise ValueError(f"quantile value {quantile} not in [0, 1]")
@@ -1041,10 +1042,10 @@ def roll_quantile(const float64_t[:] values, ndarray[int64_t] start,
     # actual skiplist ops outweigh any window computation costs
     output = np.empty(N, dtype=float)
 
-    if (end - start).max() == 0:
+    win = (end - start).max()
+    if win == 0:
         output[:] = NaN
         return output
-    win = (end - start).max()
     skiplist = skiplist_init(<int>win)
     if skiplist == NULL:
         raise MemoryError("skiplist_init failed")
@@ -1473,66 +1474,9 @@ def roll_weighted_var(const float64_t[:] values, const float64_t[:] weights,
 # ----------------------------------------------------------------------
 # Exponentially weighted moving average
 
-def ewma_time(const float64_t[:] vals, int64_t[:] start, int64_t[:] end,
-              int minp, ndarray[int64_t] times, int64_t halflife):
-    """
-    Compute exponentially-weighted moving average using halflife and time
-    distances.
-
-    Parameters
-    ----------
-    vals : ndarray[float_64]
-    start: ndarray[int_64]
-    end: ndarray[int_64]
-    minp : int
-    times : ndarray[int64]
-    halflife : int64
-
-    Returns
-    -------
-    ndarray
-    """
-    cdef:
-        Py_ssize_t i, j, num_not_nan = 0, N = len(vals)
-        bint is_not_nan
-        float64_t last_result, weights_dot, weights_sum, weight, halflife_float
-        float64_t[:] times_float
-        float64_t[:] observations = np.zeros(N, dtype=float)
-        float64_t[:] times_masked = np.zeros(N, dtype=float)
-        ndarray[float64_t] output = np.empty(N, dtype=float)
-
-    if N == 0:
-        return output
-
-    halflife_float = <float64_t>halflife
-    times_float = times.astype(float)
-    last_result = vals[0]
-
-    with nogil:
-        for i in range(N):
-            is_not_nan = vals[i] == vals[i]
-            num_not_nan += is_not_nan
-            if is_not_nan:
-                times_masked[num_not_nan-1] = times_float[i]
-                observations[num_not_nan-1] = vals[i]
-
-                weights_sum = 0
-                weights_dot = 0
-                for j in range(num_not_nan):
-                    weight = 0.5 ** (
-                        (times_float[i] - times_masked[j]) / halflife_float)
-                    weights_sum += weight
-                    weights_dot += weight * observations[j]
-
-                last_result = weights_dot / weights_sum
-
-            output[i] = last_result if num_not_nan >= minp else NaN
-
-    return output
-
-
-def ewma(float64_t[:] vals, int64_t[:] start, int64_t[:] end, int minp,
-         float64_t com, bint adjust, bint ignore_na):
+def ewma(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
+         int minp, float64_t com, bint adjust, bint ignore_na,
+         const float64_t[:] deltas):
     """
     Compute exponentially-weighted moving average using center-of-mass.
 
@@ -1543,8 +1487,10 @@ def ewma(float64_t[:] vals, int64_t[:] start, int64_t[:] end, int minp,
     end: ndarray (int64 type)
     minp : int
     com : float64
-    adjust : int
+    adjust : bool
     ignore_na : bool
+    times : ndarray (float64 type)
+    halflife : float64
 
     Returns
     -------
@@ -1553,7 +1499,7 @@ def ewma(float64_t[:] vals, int64_t[:] start, int64_t[:] end, int minp,
 
     cdef:
         Py_ssize_t i, j, s, e, nobs, win_size, N = len(vals), M = len(start)
-        float64_t[:] sub_vals
+        const float64_t[:] sub_vals
         ndarray[float64_t] sub_output, output = np.empty(N, dtype=float)
         float64_t alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur
         bint is_observation
@@ -1562,6 +1508,8 @@ def ewma(float64_t[:] vals, int64_t[:] start, int64_t[:] end, int minp,
         return output
 
     alpha = 1. / (1. + com)
+    old_wt_factor = 1. - alpha
+    new_wt = 1. if adjust else alpha
 
     for j in range(M):
         s = start[j]
@@ -1570,9 +1518,6 @@ def ewma(float64_t[:] vals, int64_t[:] start, int64_t[:] end, int minp,
         win_size = len(sub_vals)
         sub_output = np.empty(win_size, dtype=float)
 
-        old_wt_factor = 1. - alpha
-        new_wt = 1. if adjust else alpha
-
         weighted_avg = sub_vals[0]
         is_observation = weighted_avg == weighted_avg
         nobs = int(is_observation)
@@ -1587,8 +1532,7 @@ def ewma(float64_t[:] vals, int64_t[:] start, int64_t[:] end, int minp,
                 if weighted_avg == weighted_avg:
 
                     if is_observation or not ignore_na:
-
-                        old_wt *= old_wt_factor
+                        old_wt *= old_wt_factor ** deltas[i - 1]
                         if is_observation:
 
                             # avoid numerical errors on constant series
@@ -1613,8 +1557,9 @@ def ewma(float64_t[:] vals, int64_t[:] start, int64_t[:] end, int minp,
 # Exponentially weighted moving covariance
 
 
-def ewmcov(float64_t[:] input_x, int64_t[:] start, int64_t[:] end, int minp,
-           float64_t[:] input_y, float64_t com, bint adjust, bint ignore_na, bint bias):
+def ewmcov(const float64_t[:] input_x, const int64_t[:] start, const int64_t[:] end,
+           int minp, const float64_t[:] input_y, float64_t com, bint adjust,
+           bint ignore_na, bint bias):
     """
     Compute exponentially-weighted moving variance using center-of-mass.
 
@@ -1626,9 +1571,9 @@ def ewmcov(float64_t[:] input_x, int64_t[:] start, int64_t[:] end, int minp,
     minp : int
     input_y : ndarray (float64 type)
     com : float64
-    adjust : int
+    adjust : bool
     ignore_na : bool
-    bias : int
+    bias : bool
 
     Returns
     -------
@@ -1641,7 +1586,7 @@ def ewmcov(float64_t[:] input_x, int64_t[:] start, int64_t[:] end, int minp,
         float64_t alpha, old_wt_factor, new_wt, mean_x, mean_y, cov
         float64_t sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y
         float64_t numerator, denominator
-        float64_t[:] sub_x_vals, sub_y_vals
+        const float64_t[:] sub_x_vals, sub_y_vals
         ndarray[float64_t] sub_out, output = np.empty(N, dtype=float)
         bint is_observation
 
@@ -1652,6 +1597,8 @@ def ewmcov(float64_t[:] input_x, int64_t[:] start, int64_t[:] end, int minp,
         return output
 
     alpha = 1. / (1. + com)
+    old_wt_factor = 1. - alpha
+    new_wt = 1. if adjust else alpha
 
     for j in range(L):
         s = start[j]
@@ -1661,9 +1608,6 @@ def ewmcov(float64_t[:] input_x, int64_t[:] start, int64_t[:] end, int minp,
         win_size = len(sub_x_vals)
         sub_out = np.empty(win_size, dtype=float)
 
-        old_wt_factor = 1. - alpha
-        new_wt = 1. if adjust else alpha
-
         mean_x = sub_x_vals[0]
         mean_y = sub_y_vals[0]
         is_observation = (mean_x == mean_x) and (mean_y == mean_y)
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 819e5a1c32d9b..74fb0e2bd54fb 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -84,7 +84,7 @@
 
 from pandas.core.array_algos.take import take_nd
 from pandas.core.construction import (
-    array,
+    array as pd_array,
     ensure_wrapped_if_datetimelike,
     extract_array,
 )
@@ -108,9 +108,7 @@
 # --------------- #
 # dtype access    #
 # --------------- #
-def _ensure_data(
-    values: ArrayLike, dtype: Optional[DtypeObj] = None
-) -> Tuple[np.ndarray, DtypeObj]:
+def _ensure_data(values: ArrayLike) -> Tuple[np.ndarray, DtypeObj]:
     """
     routine to ensure that our data is of the correct
     input dtype for lower-level routines
@@ -126,8 +124,6 @@ def _ensure_data(
     Parameters
     ----------
     values : array-like
-    dtype : pandas_dtype, optional
-        coerce to this dtype
 
     Returns
     -------
@@ -135,34 +131,26 @@ def _ensure_data(
     pandas_dtype : np.dtype or ExtensionDtype
     """
 
-    if dtype is not None:
-        # We only have non-None dtype when called from `isin`, and
-        #  both Datetimelike and Categorical dispatch before getting here.
-        assert not needs_i8_conversion(dtype)
-        assert not is_categorical_dtype(dtype)
-
     if not isinstance(values, ABCMultiIndex):
         # extract_array would raise
         values = extract_array(values, extract_numpy=True)
 
     # we check some simple dtypes first
-    if is_object_dtype(dtype):
-        return ensure_object(np.asarray(values)), np.dtype("object")
-    elif is_object_dtype(values) and dtype is None:
+    if is_object_dtype(values):
         return ensure_object(np.asarray(values)), np.dtype("object")
 
     try:
-        if is_bool_dtype(values) or is_bool_dtype(dtype):
+        if is_bool_dtype(values):
             # we are actually coercing to uint64
             # until our algos support uint8 directly (see TODO)
             return np.asarray(values).astype("uint64"), np.dtype("bool")
-        elif is_signed_integer_dtype(values) or is_signed_integer_dtype(dtype):
+        elif is_signed_integer_dtype(values):
             return ensure_int64(values), np.dtype("int64")
-        elif is_unsigned_integer_dtype(values) or is_unsigned_integer_dtype(dtype):
+        elif is_unsigned_integer_dtype(values):
             return ensure_uint64(values), np.dtype("uint64")
-        elif is_float_dtype(values) or is_float_dtype(dtype):
+        elif is_float_dtype(values):
             return ensure_float64(values), np.dtype("float64")
-        elif is_complex_dtype(values) or is_complex_dtype(dtype):
+        elif is_complex_dtype(values):
 
             # ignore the fact that we are casting to float
             # which discards complex parts
@@ -177,12 +165,12 @@ def _ensure_data(
         return ensure_object(values), np.dtype("object")
 
     # datetimelike
-    if needs_i8_conversion(values.dtype) or needs_i8_conversion(dtype):
-        if is_period_dtype(values.dtype) or is_period_dtype(dtype):
+    if needs_i8_conversion(values.dtype):
+        if is_period_dtype(values.dtype):
             from pandas import PeriodIndex
 
             values = PeriodIndex(values)._data
-        elif is_timedelta64_dtype(values.dtype) or is_timedelta64_dtype(dtype):
+        elif is_timedelta64_dtype(values.dtype):
             from pandas import TimedeltaIndex
 
             values = TimedeltaIndex(values)._data
@@ -202,9 +190,7 @@ def _ensure_data(
         dtype = values.dtype
         return values.asi8, dtype
 
-    elif is_categorical_dtype(values.dtype) and (
-        is_categorical_dtype(dtype) or dtype is None
-    ):
+    elif is_categorical_dtype(values.dtype):
         values = cast("Categorical", values)
         values = values.codes
         dtype = pandas_dtype("category")
@@ -488,7 +474,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
 
     elif needs_i8_conversion(comps.dtype):
         # Dispatch to DatetimeLikeArrayMixin.isin
-        return array(comps).isin(values)
+        return pd_array(comps).isin(values)
     elif needs_i8_conversion(values.dtype) and not is_object_dtype(comps.dtype):
         # e.g. comps are integers and values are datetime64s
         return np.zeros(comps.shape, dtype=bool)
@@ -1580,7 +1566,7 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray:
         if is_scalar(value):
             value = dtype.type(value)
         else:
-            value = array(value, dtype=dtype)
+            value = pd_array(value, dtype=dtype)
     elif not (
         is_object_dtype(arr) or is_numeric_dtype(arr) or is_categorical_dtype(arr)
     ):
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index db4203e5158ef..203a0c675282d 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -98,22 +98,6 @@ def frame_apply(
     )
 
 
-def series_apply(
-    obj: Series,
-    func: AggFuncType,
-    convert_dtype: bool = True,
-    args=None,
-    kwargs=None,
-) -> SeriesApply:
-    return SeriesApply(
-        obj,
-        func,
-        convert_dtype,
-        args,
-        kwargs,
-    )
-
-
 class Apply(metaclass=abc.ABCMeta):
     axis: int
 
@@ -280,7 +264,7 @@ def transform_dict_like(self, func):
         if len(func) == 0:
             raise ValueError("No transform functions were provided")
 
-        self.validate_dictlike_arg("transform", obj, func)
+        func = self.normalize_dictlike_arg("transform", obj, func)
 
         results: Dict[Hashable, FrameOrSeriesUnion] = {}
         for name, how in func.items():
@@ -421,32 +405,17 @@ def agg_dict_like(self, _axis: int) -> FrameOrSeriesUnion:
         -------
         Result of aggregation.
         """
+        from pandas.core.reshape.concat import concat
+
         obj = self.obj
         arg = cast(AggFuncTypeDict, self.f)
 
-        is_aggregator = lambda x: isinstance(x, (list, tuple, dict))
-
         if _axis != 0:  # pragma: no cover
             raise ValueError("Can only pass dict with axis=0")
 
         selected_obj = obj._selected_obj
 
-        self.validate_dictlike_arg("agg", selected_obj, arg)
-
-        # if we have a dict of any non-scalars
-        # eg. {'A' : ['mean']}, normalize all to
-        # be list-likes
-        # Cannot use arg.values() because arg may be a Series
-        if any(is_aggregator(x) for _, x in arg.items()):
-            new_arg: AggFuncTypeDict = {}
-            for k, v in arg.items():
-                if not isinstance(v, (tuple, list, dict)):
-                    new_arg[k] = [v]
-                else:
-                    new_arg[k] = v
-            arg = new_arg
-
-        from pandas.core.reshape.concat import concat
+        arg = self.normalize_dictlike_arg("agg", selected_obj, arg)
 
         if selected_obj.ndim == 1:
             # key only used for output
@@ -540,14 +509,15 @@ def maybe_apply_multiple(self) -> Optional[FrameOrSeriesUnion]:
             return None
         return self.obj.aggregate(self.f, self.axis, *self.args, **self.kwargs)
 
-    def validate_dictlike_arg(
+    def normalize_dictlike_arg(
         self, how: str, obj: FrameOrSeriesUnion, func: AggFuncTypeDict
-    ) -> None:
+    ) -> AggFuncTypeDict:
         """
-        Raise if dict-like argument is invalid.
+        Handler for dict-like argument.
 
         Ensures that necessary columns exist if obj is a DataFrame, and
-        that a nested renamer is not passed.
+        that a nested renamer is not passed. Also normalizes to all lists
+        when values consists of a mix of list and non-lists.
         """
         assert how in ("apply", "agg", "transform")
 
@@ -567,6 +537,23 @@ def validate_dictlike_arg(
                 cols_sorted = list(safe_sort(list(cols)))
                 raise KeyError(f"Column(s) {cols_sorted} do not exist")
 
+        is_aggregator = lambda x: isinstance(x, (list, tuple, dict))
+
+        # if we have a dict of any non-scalars
+        # eg. {'A' : ['mean']}, normalize all to
+        # be list-likes
+        # Cannot use func.values() because arg may be a Series
+        if any(is_aggregator(x) for _, x in func.items()):
+            new_func: AggFuncTypeDict = {}
+            for k, v in func.items():
+                if not is_aggregator(v):
+                    # mypy can't realize v is not a list here
+                    new_func[k] = [v]  # type:ignore[list-item]
+                else:
+                    new_func[k] = v
+            func = new_func
+        return func
+
 
 class FrameApply(Apply):
     obj: DataFrame
@@ -896,9 +883,8 @@ def series_generator(self):
         #  of it.  Kids: don't do this at home.
         ser = self.obj._ixs(0, axis=0)
         mgr = ser._mgr
-        blk = mgr.blocks[0]
 
-        if is_extension_array_dtype(blk.dtype):
+        if is_extension_array_dtype(ser.dtype):
             # values will be incorrect for this block
             # TODO(EA2D): special case would be unnecessary with 2D EAs
             obj = self.obj
@@ -909,7 +895,7 @@ def series_generator(self):
             for (arr, name) in zip(values, self.index):
                 # GH#35462 re-pin mgr in case setitem changed it
                 ser._mgr = mgr
-                blk.values = arr
+                mgr.set_values(arr)
                 ser.name = name
                 yield ser
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 29a172dcdd2c7..7777cb4bf674e 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -48,8 +48,6 @@
 from pandas.core.dtypes.cast import (
     coerce_indexer_dtype,
     maybe_cast_to_extension_array,
-    maybe_infer_to_datetimelike,
-    sanitize_to_nanoseconds,
 )
 from pandas.core.dtypes.common import (
     ensure_int64,
@@ -99,7 +97,7 @@
 )
 import pandas.core.common as com
 from pandas.core.construction import (
-    array,
+    array as pd_array,
     extract_array,
     sanitize_array,
 )
@@ -396,20 +394,27 @@ def __init__(
             if dtype.categories is None:
                 dtype = CategoricalDtype(values.categories, dtype.ordered)
         elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)):
-            # sanitize_array coerces np.nan to a string under certain versions
-            # of numpy
-            values = maybe_infer_to_datetimelike(values)
-            if isinstance(values, np.ndarray):
-                values = sanitize_to_nanoseconds(values)
-            elif not isinstance(values, ExtensionArray):
-                values = com.convert_to_list_like(values)
-
+            values = com.convert_to_list_like(values)
+            if isinstance(values, list) and len(values) == 0:
                 # By convention, empty lists result in object dtype:
-                sanitize_dtype = np.dtype("O") if len(values) == 0 else None
-                null_mask = isna(values)
+                values = np.array([], dtype=object)
+            elif isinstance(values, np.ndarray):
+                if values.ndim > 1:
+                    # preempt sanitize_array from raising ValueError
+                    raise NotImplementedError(
+                        "> 1 ndim Categorical are not supported at this time"
+                    )
+                values = sanitize_array(values, None)
+            else:
+                # i.e. must be a list
+                arr = sanitize_array(values, None)
+                null_mask = isna(arr)
                 if null_mask.any():
-                    values = [values[idx] for idx in np.where(~null_mask)[0]]
-                values = sanitize_array(values, None, dtype=sanitize_dtype)
+                    # We remove null values here, then below will re-insert
+                    #  them, grep "full_codes"
+                    arr = [values[idx] for idx in np.where(~null_mask)[0]]
+                    arr = sanitize_array(arr, None)
+                values = arr
 
         if dtype.categories is None:
             try:
@@ -493,7 +498,7 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
 
         # TODO: consolidate with ndarray case?
         elif is_extension_array_dtype(dtype):
-            result = array(self, dtype=dtype, copy=copy)
+            result = pd_array(self, dtype=dtype, copy=copy)
 
         elif is_integer_dtype(dtype) and self.isna().any():
             raise ValueError("Cannot convert float NaN to integer")
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index e476c3566c10f..633a20d6bed37 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -106,7 +106,7 @@
 )
 import pandas.core.common as com
 from pandas.core.construction import (
-    array,
+    array as pd_array,
     extract_array,
 )
 from pandas.core.indexers import (
@@ -465,15 +465,15 @@ def view(self, dtype: Optional[Dtype] = None) -> ArrayLike:
         dtype = pandas_dtype(dtype)
         if isinstance(dtype, (PeriodDtype, DatetimeTZDtype)):
             cls = dtype.construct_array_type()
-            return cls._simple_new(self.asi8, dtype=dtype)
+            return cls(self.asi8, dtype=dtype)
         elif dtype == "M8[ns]":
             from pandas.core.arrays import DatetimeArray
 
-            return DatetimeArray._simple_new(self.asi8, dtype=dtype)
+            return DatetimeArray(self.asi8, dtype=dtype)
         elif dtype == "m8[ns]":
             from pandas.core.arrays import TimedeltaArray
 
-            return TimedeltaArray._simple_new(self.asi8.view("m8[ns]"), dtype=dtype)
+            return TimedeltaArray(self.asi8, dtype=dtype)
         return self._ndarray.view(dtype=dtype)
 
     # ------------------------------------------------------------------
@@ -719,7 +719,7 @@ def _validate_listlike(self, value, allow_object: bool = False):
 
         # Do type inference if necessary up front
         # e.g. we passed PeriodIndex.values and got an ndarray of Periods
-        value = array(value)
+        value = pd_array(value)
         value = extract_array(value, extract_numpy=True)
 
         if is_dtype_equal(value.dtype, "string"):
@@ -1102,10 +1102,10 @@ def _add_timedeltalike_scalar(self, other):
             return type(self)(new_values, dtype=self.dtype)
 
         inc = delta_to_nanoseconds(other)
-        new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan).view(
-            "i8"
-        )
+        new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan)
+        new_values = new_values.view("i8")
         new_values = self._maybe_mask_results(new_values)
+        new_values = new_values.view(self._ndarray.dtype)
 
         new_freq = None
         if isinstance(self.freq, Tick) or is_period_dtype(self.dtype):
@@ -1207,7 +1207,7 @@ def _addsub_object_array(self, other: np.ndarray, op):
         assert self.shape == other.shape, (self.shape, other.shape)
 
         res_values = op(self.astype("O"), np.asarray(other))
-        result = array(res_values.ravel())
+        result = pd_array(res_values.ravel())
         result = extract_array(result, extract_numpy=True).reshape(self.shape)
         return result
 
@@ -1700,6 +1700,7 @@ def _round(self, freq, mode, ambiguous, nonexistent):
         nanos = to_offset(freq).nanos
         result = round_nsint64(values, mode, nanos)
         result = self._maybe_mask_results(result, fill_value=iNaT)
+        result = result.view(self._ndarray.dtype)
         return self._simple_new(result, dtype=self.dtype)
 
     @Appender((_round_doc + _round_example).format(op="round"))
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 28e469547fe62..ce0ea7bca55cd 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -315,9 +315,7 @@ def _simple_new(
         cls, values, freq: Optional[BaseOffset] = None, dtype=DT64NS_DTYPE
     ) -> DatetimeArray:
         assert isinstance(values, np.ndarray)
-        if values.dtype != DT64NS_DTYPE:
-            assert values.dtype == "i8"
-            values = values.view(DT64NS_DTYPE)
+        assert values.dtype == DT64NS_DTYPE
 
         result = object.__new__(cls)
         result._ndarray = values
@@ -439,6 +437,7 @@ def _generate_range(
                 values = np.array([x.value for x in xdr], dtype=np.int64)
 
             _tz = start.tz if start is not None else end.tz
+            values = values.view("M8[ns]")
             index = cls._simple_new(values, freq=freq, dtype=tz_to_dtype(_tz))
 
             if tz is not None and index.tz is None:
@@ -464,9 +463,8 @@ def _generate_range(
                 + start.value
             )
             dtype = tz_to_dtype(tz)
-            index = cls._simple_new(
-                arr.astype("M8[ns]", copy=False), freq=None, dtype=dtype
-            )
+            arr = arr.astype("M8[ns]", copy=False)
+            index = cls._simple_new(arr, freq=None, dtype=dtype)
 
         if not left_closed and len(index) and index[0] == start:
             # TODO: overload DatetimeLikeArrayMixin.__getitem__
@@ -476,7 +474,7 @@ def _generate_range(
             index = cast(DatetimeArray, index[:-1])
 
         dtype = tz_to_dtype(tz)
-        return cls._simple_new(index.asi8, freq=freq, dtype=dtype)
+        return cls._simple_new(index._ndarray, freq=freq, dtype=dtype)
 
     # -----------------------------------------------------------------
     # DatetimeLike Interface
@@ -710,7 +708,7 @@ def _add_offset(self, offset):
                 values = self.tz_localize(None)
             else:
                 values = self
-            result = offset._apply_array(values)
+            result = offset._apply_array(values).view("M8[ns]")
             result = DatetimeArray._simple_new(result)
             result = result.tz_localize(self.tz)
 
@@ -833,7 +831,7 @@ def tz_convert(self, tz):
 
         # No conversion since timestamps are all UTC to begin with
         dtype = tz_to_dtype(tz)
-        return self._simple_new(self.asi8, dtype=dtype, freq=self.freq)
+        return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq)
 
     @dtl.ravel_compat
     def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"):
@@ -1906,6 +1904,26 @@ def std(
 # Constructor Helpers
 
 
+def sequence_to_datetimes(
+    data, allow_object: bool = False, require_iso8601: bool = False
+) -> Union[np.ndarray, DatetimeArray]:
+    """
+    Parse/convert the passed data to either DatetimeArray or np.ndarray[object].
+    """
+    result, tz, freq = sequence_to_dt64ns(
+        data,
+        allow_object=allow_object,
+        allow_mixed=True,
+        require_iso8601=require_iso8601,
+    )
+    if result.dtype == object:
+        return result
+
+    dtype = tz_to_dtype(tz)
+    dta = DatetimeArray._simple_new(result, freq=freq, dtype=dtype)
+    return dta
+
+
 def sequence_to_dt64ns(
     data,
     dtype=None,
@@ -1914,6 +1932,10 @@ def sequence_to_dt64ns(
     dayfirst=False,
     yearfirst=False,
     ambiguous="raise",
+    *,
+    allow_object: bool = False,
+    allow_mixed: bool = False,
+    require_iso8601: bool = False,
 ):
     """
     Parameters
@@ -1926,6 +1948,13 @@ def sequence_to_dt64ns(
     yearfirst : bool, default False
     ambiguous : str, bool, or arraylike, default 'raise'
         See pandas._libs.tslibs.tzconversion.tz_localize_to_utc.
+    allow_object : bool, default False
+        Whether to return an object-dtype ndarray instead of raising if the
+        data contains more than one timezone.
+    allow_mixed : bool, default False
+        Interpret integers as timestamps when datetime objects are also present.
+    require_iso8601 : bool, default False
+        Only consider ISO-8601 formats when parsing strings.
 
     Returns
     -------
@@ -1989,7 +2018,12 @@ def sequence_to_dt64ns(
             # data comes back here as either i8 to denote UTC timestamps
             #  or M8[ns] to denote wall times
             data, inferred_tz = objects_to_datetime64ns(
-                data, dayfirst=dayfirst, yearfirst=yearfirst
+                data,
+                dayfirst=dayfirst,
+                yearfirst=yearfirst,
+                allow_object=allow_object,
+                allow_mixed=allow_mixed,
+                require_iso8601=require_iso8601,
             )
             if tz and inferred_tz:
                 #  two timezones: convert to intended from base UTC repr
@@ -1997,6 +2031,9 @@ def sequence_to_dt64ns(
                 data = data.view(DT64NS_DTYPE)
             elif inferred_tz:
                 tz = inferred_tz
+            elif allow_object and data.dtype == object:
+                # We encountered mixed-timezones.
+                return data, None, None
 
         data_dtype = data.dtype
 
@@ -2053,8 +2090,9 @@ def objects_to_datetime64ns(
     yearfirst,
     utc=False,
     errors="raise",
-    require_iso8601=False,
-    allow_object=False,
+    require_iso8601: bool = False,
+    allow_object: bool = False,
+    allow_mixed: bool = False,
 ):
     """
     Convert data to array of timestamps.
@@ -2071,6 +2109,8 @@ def objects_to_datetime64ns(
     allow_object : bool
         Whether to return an object-dtype ndarray instead of raising if the
         data contains more than one timezone.
+    allow_mixed : bool, default False
+        Interpret integers as timestamps when datetime objects are also present.
 
     Returns
     -------
@@ -2099,6 +2139,7 @@ def objects_to_datetime64ns(
             dayfirst=dayfirst,
             yearfirst=yearfirst,
             require_iso8601=require_iso8601,
+            allow_mixed=allow_mixed,
         )
         result = result.reshape(data.shape, order=order)
     except ValueError as err:
@@ -2135,7 +2176,7 @@ def objects_to_datetime64ns(
         raise TypeError(result)
 
 
-def maybe_convert_dtype(data, copy):
+def maybe_convert_dtype(data, copy: bool):
     """
     Convert data based on dtype conventions, issuing deprecation warnings
     or errors where appropriate.
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index b16b4b3ae856a..61d63d2eed6e9 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -315,15 +315,6 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
             )
         super().__init__(values, mask, copy=copy)
 
-    def __neg__(self):
-        return type(self)(-self._data, self._mask.copy())
-
-    def __pos__(self):
-        return self
-
-    def __abs__(self):
-        return type(self)(np.abs(self._data), self._mask.copy())
-
     @classmethod
     def _from_sequence(
         cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index 4d165dac40397..43c3a5e8bfd4c 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -76,7 +76,7 @@
 from pandas.core.arrays.categorical import Categorical
 import pandas.core.common as com
 from pandas.core.construction import (
-    array,
+    array as pd_array,
     ensure_wrapped_if_datetimelike,
     extract_array,
 )
@@ -661,7 +661,7 @@ def _cmp_method(self, other, op):
         if is_list_like(other):
             if len(self) != len(other):
                 raise ValueError("Lengths must match to compare")
-            other = array(other)
+            other = pd_array(other)
         elif not isinstance(other, Interval):
             # non-interval scalar -> no matches
             return invalid_comparison(self, other, op)
diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index 57017e44a66e9..0dd98c5e3d3f2 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -199,3 +199,12 @@ def reconstruct(x):
             return tuple(reconstruct(x) for x in result)
         else:
             return reconstruct(result)
+
+    def __neg__(self):
+        return type(self)(-self._data, self._mask.copy())
+
+    def __pos__(self):
+        return self
+
+    def __abs__(self):
+        return type(self)(abs(self._data), self._mask.copy())
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index 8441b324515f3..26fe6338118b6 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -6,6 +6,7 @@
     Any,
     Optional,
     Sequence,
+    Tuple,
     Type,
     Union,
 )
@@ -20,6 +21,7 @@
     Dtype,
     NpDtype,
 )
+from pandas.util._decorators import doc
 from pandas.util._validators import validate_fillna_kwargs
 
 from pandas.core.dtypes.base import ExtensionDtype
@@ -273,9 +275,22 @@ def __len__(self) -> int:
         """
         return len(self._data)
 
-    @classmethod
-    def _from_factorized(cls, values, original):
-        return cls._from_sequence(values)
+    @doc(ExtensionArray.factorize)
+    def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]:
+        encoded = self._data.dictionary_encode()
+        indices = pa.chunked_array(
+            [c.indices for c in encoded.chunks], type=encoded.type.index_type
+        ).to_pandas()
+        if indices.dtype.kind == "f":
+            indices[np.isnan(indices)] = na_sentinel
+        indices = indices.astype(np.int64, copy=False)
+
+        if encoded.num_chunks:
+            uniques = type(self)(encoded.chunk(0).dictionary)
+        else:
+            uniques = type(self)(pa.array([], type=encoded.type.value_type))
+
+        return indices.values, uniques
 
     @classmethod
     def _concat_same_type(cls, to_concat) -> ArrowStringArray:
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index f7af1bb3da86b..c371e27eeceac 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -4,6 +4,7 @@
 from typing import (
     List,
     Optional,
+    Tuple,
     Union,
 )
 
@@ -229,13 +230,11 @@ def _simple_new(
     ) -> TimedeltaArray:
         assert dtype == TD64NS_DTYPE, dtype
         assert isinstance(values, np.ndarray), type(values)
-        if values.dtype != TD64NS_DTYPE:
-            assert values.dtype == "i8"
-            values = values.view(TD64NS_DTYPE)
+        assert values.dtype == TD64NS_DTYPE
 
         result = object.__new__(cls)
         result._ndarray = values
-        result._freq = to_offset(freq)
+        result._freq = freq
         result._dtype = TD64NS_DTYPE
         return result
 
@@ -317,7 +316,7 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
         if not right_closed:
             index = index[:-1]
 
-        return cls._simple_new(index, freq=freq)
+        return cls._simple_new(index.view("m8[ns]"), freq=freq)
 
     # ----------------------------------------------------------------
     # DatetimeLike Interface
@@ -907,7 +906,9 @@ def f(x):
 # Constructor Helpers
 
 
-def sequence_to_td64ns(data, copy=False, unit=None, errors="raise"):
+def sequence_to_td64ns(
+    data, copy=False, unit=None, errors="raise"
+) -> Tuple[np.ndarray, Optional[Tick]]:
     """
     Parameters
     ----------
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 8625c5063382f..871f5ac651cce 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -3,6 +3,7 @@
 
 Note: pandas.core.common is *not* part of the public API.
 """
+from __future__ import annotations
 
 from collections import (
     abc,
@@ -12,6 +13,7 @@
 from functools import partial
 import inspect
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Collection,
@@ -51,6 +53,9 @@
 from pandas.core.dtypes.inference import iterable_not_string
 from pandas.core.dtypes.missing import isna
 
+if TYPE_CHECKING:
+    from pandas import Index
+
 
 class SettingWithCopyError(ValueError):
     pass
@@ -512,3 +517,16 @@ def temp_setattr(obj, attr: str, value) -> Iterator[None]:
     setattr(obj, attr, value)
     yield obj
     setattr(obj, attr, old_value)
+
+
+def require_length_match(data, index: Index):
+    """
+    Check the length of data matches the length of the index.
+    """
+    if len(data) != len(index):
+        raise ValueError(
+            "Length of values "
+            f"({len(data)}) "
+            "does not match length of index "
+            f"({len(index)})"
+        )
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index 9aa1c620fe1d9..0fd685e4f53f1 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -304,6 +304,7 @@ def array(
         raise ValueError(msg)
 
     if dtype is None and isinstance(data, (ABCSeries, ABCIndex, ABCExtensionArray)):
+        # Note: we exclude np.ndarray here, will do type inference on it
         dtype = data.dtype
 
     data = extract_array(data, extract_numpy=True)
@@ -525,9 +526,9 @@ def sanitize_array(
 
     elif isinstance(data, (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0:
         # TODO: deque, array.array
-        if isinstance(data, set):
+        if isinstance(data, (set, frozenset)):
             # Raise only for unordered sets, e.g., not for dict_keys
-            raise TypeError("Set type is unordered")
+            raise TypeError(f"'{type(data).__name__}' type is unordered")
         data = list(data)
 
         if dtype is not None:
@@ -677,8 +678,10 @@ def _try_cast(
             subarr = arr
         else:
             subarr = maybe_cast_to_datetime(arr, dtype)
+            if dtype is not None and dtype.kind == "M":
+                return subarr
 
-        if not isinstance(subarr, (ABCExtensionArray, ABCIndex)):
+        if not isinstance(subarr, ABCExtensionArray):
             subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy)
     except OutOfBoundsDatetime:
         # in case of out of bound datetime64 -> always raise
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index b30dbe32eec4b..9f111282473c2 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -10,6 +10,7 @@
     datetime,
     timedelta,
 )
+import inspect
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -28,10 +29,7 @@
 
 import numpy as np
 
-from pandas._libs import (
-    lib,
-    tslib,
-)
+from pandas._libs import lib
 from pandas._libs.tslibs import (
     NaT,
     OutOfBoundsDatetime,
@@ -40,9 +38,9 @@
     Timedelta,
     Timestamp,
     conversion,
-    iNaT,
     ints_to_pydatetime,
 )
+from pandas._libs.tslibs.timedeltas import array_to_timedelta64
 from pandas._typing import (
     AnyArrayLike,
     ArrayLike,
@@ -86,6 +84,7 @@
     is_timedelta64_dtype,
     is_timedelta64_ns_dtype,
     is_unsigned_integer_dtype,
+    pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import (
     DatetimeTZDtype,
@@ -96,7 +95,6 @@
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
     ABCExtensionArray,
-    ABCIndex,
     ABCSeries,
 )
 from pandas.core.dtypes.inference import is_list_like
@@ -233,18 +231,17 @@ def _disallow_mismatched_datetimelike(value, dtype: DtypeObj):
         raise TypeError(f"Cannot cast {repr(value)} to {dtype}")
 
 
-def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]):
+def maybe_downcast_to_dtype(
+    result: ArrayLike, dtype: Union[str, np.dtype]
+) -> ArrayLike:
     """
     try to cast to the specified dtype (e.g. convert back to bool/int
     or could be an astype of float64->float32
     """
-    do_round = False
-
-    if is_scalar(result):
-        return result
-    elif isinstance(result, ABCDataFrame):
-        # occurs in pivot_table doctest
+    if isinstance(result, ABCDataFrame):
+        # see test_pivot_table_doctest_case
         return result
+    do_round = False
 
     if isinstance(dtype, str):
         if dtype == "infer":
@@ -265,6 +262,7 @@ def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]):
                     do_round = True
 
             else:
+                # TODO: complex?  what if result is already non-object?
                 dtype = "object"
 
         dtype = np.dtype(dtype)
@@ -289,14 +287,17 @@ def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]):
             i8values = result.astype("i8", copy=False)
             cls = dtype.construct_array_type()
             # equiv: DatetimeArray(i8values).tz_localize("UTC").tz_convert(dtype.tz)
-            result = cls._simple_new(i8values, dtype=dtype)
+            dt64values = i8values.view("M8[ns]")
+            result = cls._simple_new(dt64values, dtype=dtype)
         else:
             result = result.astype(dtype)
 
     return result
 
 
-def maybe_downcast_numeric(result, dtype: DtypeObj, do_round: bool = False):
+def maybe_downcast_numeric(
+    result: ArrayLike, dtype: DtypeObj, do_round: bool = False
+) -> ArrayLike:
     """
     Subset of maybe_downcast_to_dtype restricted to numeric dtypes.
 
@@ -310,7 +311,7 @@ def maybe_downcast_numeric(result, dtype: DtypeObj, do_round: bool = False):
     -------
     ndarray or ExtensionArray
     """
-    if not isinstance(dtype, np.dtype):
+    if not isinstance(dtype, np.dtype) or not isinstance(result.dtype, np.dtype):
         # e.g. SparseDtype has no itemsize attr
         return result
 
@@ -697,7 +698,7 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan):
     return dtype, fill_value
 
 
-def _ensure_dtype_type(value, dtype: DtypeObj):
+def _ensure_dtype_type(value, dtype: np.dtype):
     """
     Ensure that the given value is an instance of the given dtype.
 
@@ -707,21 +708,17 @@ def _ensure_dtype_type(value, dtype: DtypeObj):
     Parameters
     ----------
     value : object
-    dtype : np.dtype or ExtensionDtype
+    dtype : np.dtype
 
     Returns
     -------
     object
     """
     # Start with exceptions in which we do _not_ cast to numpy types
-    if is_extension_array_dtype(dtype):
-        return value
-    elif dtype == np.object_:
-        return value
-    elif isna(value):
-        # e.g. keep np.nan rather than try to cast to np.float32(np.nan)
+    if dtype == np.object_:
         return value
 
+    # Note: before we get here we have already excluded isna(value)
     return dtype.type(value)
 
 
@@ -1138,7 +1135,7 @@ def astype_nansafe(
     if isinstance(dtype, ExtensionDtype):
         return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy)
 
-    elif not isinstance(dtype, np.dtype):
+    elif not isinstance(dtype, np.dtype):  # pragma: no cover
         raise ValueError("dtype must be np.dtype or ExtensionDtype")
 
     if arr.dtype.kind in ["m", "M"] and (
@@ -1228,6 +1225,107 @@ def astype_nansafe(
     return arr.astype(dtype, copy=copy)
 
 
+def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> ArrayLike:
+    """
+    Cast array (ndarray or ExtensionArray) to the new dtype.
+
+    Parameters
+    ----------
+    values : ndarray or ExtensionArray
+    dtype : dtype object
+    copy : bool, default False
+        copy if indicated
+
+    Returns
+    -------
+    ndarray or ExtensionArray
+    """
+    if (
+        values.dtype.kind in ["m", "M"]
+        and dtype.kind in ["i", "u"]
+        and isinstance(dtype, np.dtype)
+        and dtype.itemsize != 8
+    ):
+        # TODO(2.0) remove special case once deprecation on DTA/TDA is enforced
+        msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]"
+        raise TypeError(msg)
+
+    if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
+        return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True)
+
+    if is_dtype_equal(values.dtype, dtype):
+        if copy:
+            return values.copy()
+        return values
+
+    if isinstance(values, ABCExtensionArray):
+        values = values.astype(dtype, copy=copy)
+
+    else:
+        values = astype_nansafe(values, dtype, copy=copy)
+
+    # in pandas we don't store numpy str dtypes, so convert to object
+    if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):
+        values = np.array(values, dtype=object)
+
+    return values
+
+
+def astype_array_safe(
+    values: ArrayLike, dtype, copy: bool = False, errors: str = "raise"
+) -> ArrayLike:
+    """
+    Cast array (ndarray or ExtensionArray) to the new dtype.
+
+    This basically is the implementation for DataFrame/Series.astype and
+    includes all custom logic for pandas (NaN-safety, converting str to object,
+    not allowing )
+
+    Parameters
+    ----------
+    values : ndarray or ExtensionArray
+    dtype : str, dtype convertible
+    copy : bool, default False
+        copy if indicated
+    errors : str, {'raise', 'ignore'}, default 'raise'
+        - ``raise`` : allow exceptions to be raised
+        - ``ignore`` : suppress exceptions. On error return original object
+
+    Returns
+    -------
+    ndarray or ExtensionArray
+    """
+    errors_legal_values = ("raise", "ignore")
+
+    if errors not in errors_legal_values:
+        invalid_arg = (
+            "Expected value of kwarg 'errors' to be one of "
+            f"{list(errors_legal_values)}. Supplied value is '{errors}'"
+        )
+        raise ValueError(invalid_arg)
+
+    if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype):
+        msg = (
+            f"Expected an instance of {dtype.__name__}, "
+            "but got the class instead. Try instantiating 'dtype'."
+        )
+        raise TypeError(msg)
+
+    dtype = pandas_dtype(dtype)
+
+    try:
+        new_values = astype_array(values, dtype, copy=copy)
+    except (ValueError, TypeError):
+        # e.g. astype_nansafe can fail on object-dtype of strings
+        #  trying to convert to float
+        if errors == "ignore":
+            new_values = values
+        else:
+            raise
+
+    return new_values
+
+
 def soft_convert_objects(
     values: np.ndarray,
     datetime: bool = True,
@@ -1388,9 +1486,7 @@ def maybe_castable(dtype: np.dtype) -> bool:
     return dtype.name not in POSSIBLY_CAST_DTYPES
 
 
-def maybe_infer_to_datetimelike(
-    value: Union[ArrayLike, Scalar], convert_dates: bool = False
-):
+def maybe_infer_to_datetimelike(value: Union[np.ndarray, List]):
     """
     we might have a array (or single object) that is datetime like,
     and no dtype is passed don't change the value unless we find a
@@ -1401,21 +1497,13 @@ def maybe_infer_to_datetimelike(
 
     Parameters
     ----------
-    value : np.array / Series / Index / list-like
-    convert_dates : bool, default False
-       if True try really hard to convert dates (such as datetime.date), other
-       leave inferred dtype 'date' alone
+    value : np.ndarray or list
 
     """
-    if isinstance(value, (ABCIndex, ABCExtensionArray)):
-        if not is_object_dtype(value.dtype):
-            raise ValueError("array-like value must be object-dtype")
+    if not isinstance(value, (np.ndarray, list)):
+        raise TypeError(type(value))  # pragma: no cover
 
-    v = value
-
-    if not is_list_like(v):
-        v = [v]
-    v = np.array(v, copy=False)
+    v = np.array(value, copy=False)
 
     # we only care about object dtypes
     if not is_object_dtype(v.dtype):
@@ -1429,50 +1517,46 @@ def maybe_infer_to_datetimelike(
         return value
 
     def try_datetime(v: np.ndarray) -> ArrayLike:
-        # safe coerce to datetime64
-        try:
-            # GH19671
-            # tznaive only
-            v = tslib.array_to_datetime(v, require_iso8601=True, errors="raise")[0]
-        except ValueError:
-
-            # we might have a sequence of the same-datetimes with tz's
-            # if so coerce to a DatetimeIndex; if they are not the same,
-            # then these stay as object dtype, xref GH19671
-            from pandas import DatetimeIndex
-
-            try:
+        # Coerce to datetime64, datetime64tz, or in corner cases
+        #  object[datetimes]
+        from pandas.core.arrays.datetimes import sequence_to_datetimes
 
-                values, tz = conversion.datetime_to_datetime64(v)
-            except (ValueError, TypeError):
-                pass
-            else:
-                dti = DatetimeIndex(values).tz_localize("UTC").tz_convert(tz=tz)
-                return dti._data
-        except TypeError:
+        try:
+            # GH#19671 we pass require_iso8601 to be relatively strict
+            #  when parsing strings.
+            dta = sequence_to_datetimes(v, require_iso8601=True, allow_object=True)
+        except (ValueError, TypeError):
             # e.g. <class 'numpy.timedelta64'> is not convertible to datetime
-            pass
-
-        return v.reshape(shape)
+            return v.reshape(shape)
+        else:
+            if dta.dtype == object or dta.tz is None:
+                # GH#19671 if we have mixed timezones we may have object-dtype
+                #  here.
+                # This is reachable bc allow_object=True, means we cast things
+                #  to mixed-tz datetime objects (mostly).  Only 1 test
+                #  relies on this behavior, see GH#40111
+                # FIXME: conditional reshape is kludgy
+                return np.asarray(dta).reshape(shape)
+            # otherwise we have dt64tz
+            return dta
 
     def try_timedelta(v: np.ndarray) -> np.ndarray:
         # safe coerce to timedelta64
 
         # will try first with a string & object conversion
-        from pandas import to_timedelta
-
         try:
-            td_values = to_timedelta(v)
+            # bc we know v.dtype == object, this is equivalent to
+            #  `np.asarray(to_timedelta(v))`, but using a lower-level API that
+            #  does not require a circular import.
+            td_values = array_to_timedelta64(v).view("m8[ns]")
         except (ValueError, OverflowError):
             return v.reshape(shape)
         else:
-            return np.asarray(td_values).reshape(shape)
+            return td_values.reshape(shape)
 
     inferred_type = lib.infer_datetimelike_array(ensure_object(v))
 
-    if inferred_type == "date" and convert_dates:
-        value = try_datetime(v)
-    elif inferred_type == "datetime":
+    if inferred_type == "datetime":
         value = try_datetime(v)
     elif inferred_type == "timedelta":
         value = try_timedelta(v)
@@ -1502,8 +1586,8 @@ def maybe_cast_to_datetime(
     try to cast the array/value to a datetimelike dtype, converting float
     nan to iNaT
     """
-    from pandas.core.tools.datetimes import to_datetime
-    from pandas.core.tools.timedeltas import to_timedelta
+    from pandas.core.arrays.datetimes import sequence_to_datetimes
+    from pandas.core.arrays.timedeltas import sequence_to_td64ns
 
     if not is_list_like(value):
         raise TypeError("value must be listlike")
@@ -1513,78 +1597,50 @@ def maybe_cast_to_datetime(
         is_datetime64tz = is_datetime64tz_dtype(dtype)
         is_timedelta64 = is_timedelta64_dtype(dtype)
 
-        if is_datetime64 or is_datetime64tz or is_timedelta64:
+        vdtype = getattr(value, "dtype", None)
 
-            # Force the dtype if needed.
-            msg = (
-                f"The '{dtype.name}' dtype has no unit. "
-                f"Please pass in '{dtype.name}[ns]' instead."
-            )
-
-            if is_datetime64:
-                # unpack e.g. SparseDtype
-                dtype = getattr(dtype, "subtype", dtype)
-                if not is_dtype_equal(dtype, DT64NS_DTYPE):
-
-                    # pandas supports dtype whose granularity is less than [ns]
-                    # e.g., [ps], [fs], [as]
-                    if dtype <= np.dtype("M8[ns]"):
-                        if dtype.name == "datetime64":
-                            raise ValueError(msg)
-                        dtype = DT64NS_DTYPE
-                    else:
-                        raise TypeError(
-                            f"cannot convert datetimelike to dtype [{dtype}]"
-                        )
-
-            elif is_timedelta64 and not is_dtype_equal(dtype, TD64NS_DTYPE):
-
-                # pandas supports dtype whose granularity is less than [ns]
-                # e.g., [ps], [fs], [as]
-                if dtype <= np.dtype("m8[ns]"):
-                    if dtype.name == "timedelta64":
-                        raise ValueError(msg)
-                    dtype = TD64NS_DTYPE
-                else:
-                    raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]")
+        if is_datetime64 or is_datetime64tz or is_timedelta64:
+            dtype = ensure_nanosecond_dtype(dtype)
 
             if not is_sparse(value):
                 value = np.array(value, copy=False)
 
-                # have a scalar array-like (e.g. NaT)
-                if value.ndim == 0:
-                    value = iNaT
-
                 # we have an array of datetime or timedeltas & nulls
-                elif value.size or not is_dtype_equal(value.dtype, dtype):
+                if value.size or not is_dtype_equal(value.dtype, dtype):
                     _disallow_mismatched_datetimelike(value, dtype)
 
                     try:
                         if is_datetime64:
-                            dti = to_datetime(value, errors="raise")
+                            dta = sequence_to_datetimes(value, allow_object=False)
                             # GH 25843: Remove tz information since the dtype
                             # didn't specify one
-                            if dti.tz is not None:
-                                dti = dti.tz_localize(None)
-                            value = dti._values
+                            if dta.tz is not None:
+                                # equiv: dta.view(dtype)
+                                # Note: NOT equivalent to dta.astype(dtype)
+                                dta = dta.tz_localize(None)
+                            value = dta
                         elif is_datetime64tz:
+                            dtype = cast(DatetimeTZDtype, dtype)
                             # The string check can be removed once issue #13712
                             # is solved. String data that is passed with a
                             # datetime64tz is assumed to be naive which should
                             # be localized to the timezone.
                             is_dt_string = is_string_dtype(value.dtype)
-                            dta = to_datetime(value, errors="raise").array
+                            dta = sequence_to_datetimes(value, allow_object=False)
                             if dta.tz is not None:
                                 value = dta.astype(dtype, copy=False)
                             elif is_dt_string:
                                 # Strings here are naive, so directly localize
+                                # equiv: dta.astype(dtype)  # though deprecated
                                 value = dta.tz_localize(dtype.tz)
                             else:
                                 # Numeric values are UTC at this point,
                                 # so localize and convert
+                                # equiv: Series(dta).astype(dtype) # though deprecated
                                 value = dta.tz_localize("UTC").tz_convert(dtype.tz)
                         elif is_timedelta64:
-                            value = to_timedelta(value, errors="raise")._values
+                            # if successful, we get a ndarray[td64ns]
+                            value, _ = sequence_to_td64ns(value)
                     except OutOfBoundsDatetime:
                         raise
                     except ValueError:
@@ -1593,9 +1649,7 @@ def maybe_cast_to_datetime(
                         pass
 
         # coerce datetimelike to object
-        elif is_datetime64_dtype(
-            getattr(value, "dtype", None)
-        ) and not is_datetime64_dtype(dtype):
+        elif is_datetime64_dtype(vdtype) and not is_datetime64_dtype(dtype):
             if is_object_dtype(dtype):
                 value = cast(np.ndarray, value)
 
@@ -1616,7 +1670,7 @@ def maybe_cast_to_datetime(
         elif value.dtype == object:
             value = maybe_infer_to_datetimelike(value)
 
-    else:
+    elif not isinstance(value, ABCExtensionArray):
         # only do this if we have an array and the dtype of the array is not
         # setup already we are not an integer/object, so don't bother with this
         # conversion
@@ -1639,6 +1693,52 @@ def sanitize_to_nanoseconds(values: np.ndarray) -> np.ndarray:
     return values
 
 
+def ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
+    """
+    Convert dtypes with granularity less than nanosecond to nanosecond
+
+    >>> ensure_nanosecond_dtype(np.dtype("M8[s]"))
+    dtype('<M8[ns]')
+
+    >>> ensure_nanosecond_dtype(np.dtype("m8[ps]"))
+    Traceback (most recent call last):
+        ...
+    TypeError: cannot convert timedeltalike to dtype [timedelta64[ps]]
+    """
+    msg = (
+        f"The '{dtype.name}' dtype has no unit. "
+        f"Please pass in '{dtype.name}[ns]' instead."
+    )
+
+    # unpack e.g. SparseDtype
+    dtype = getattr(dtype, "subtype", dtype)
+
+    if not isinstance(dtype, np.dtype):
+        # i.e. datetime64tz
+        pass
+
+    elif dtype.kind == "M" and dtype != DT64NS_DTYPE:
+        # pandas supports dtype whose granularity is less than [ns]
+        # e.g., [ps], [fs], [as]
+        if dtype <= np.dtype("M8[ns]"):
+            if dtype.name == "datetime64":
+                raise ValueError(msg)
+            dtype = DT64NS_DTYPE
+        else:
+            raise TypeError(f"cannot convert datetimelike to dtype [{dtype}]")
+
+    elif dtype.kind == "m" and dtype != TD64NS_DTYPE:
+        # pandas supports dtype whose granularity is less than [ns]
+        # e.g., [ps], [fs], [as]
+        if dtype <= np.dtype("m8[ns]"):
+            if dtype.name == "timedelta64":
+                raise ValueError(msg)
+            dtype = TD64NS_DTYPE
+        else:
+            raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]")
+    return dtype
+
+
 def find_common_type(types: List[DtypeObj]) -> DtypeObj:
     """
     Find a common data type among the given dtypes.
@@ -1829,7 +1929,16 @@ def construct_1d_ndarray_preserving_na(
     else:
         if dtype is not None:
             _disallow_mismatched_datetimelike(values, dtype)
-        subarr = np.array(values, dtype=dtype, copy=copy)
+
+        if (
+            dtype == object
+            and isinstance(values, np.ndarray)
+            and values.dtype.kind in ["m", "M"]
+        ):
+            # TODO(numpy#12550): special-case can be removed
+            subarr = construct_1d_object_array_from_listlike(list(values))
+        else:
+            subarr = np.array(values, dtype=dtype, copy=copy)
 
     return subarr
 
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 42ac786ff315e..1545b5b106803 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -25,7 +25,7 @@
 from pandas.core.arrays import ExtensionArray
 from pandas.core.arrays.sparse import SparseArray
 from pandas.core.construction import (
-    array,
+    array as pd_array,
     ensure_wrapped_if_datetimelike,
 )
 
@@ -66,7 +66,7 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
 
     if is_extension_array_dtype(dtype) and isinstance(arr, np.ndarray):
         # numpy's astype cannot handle ExtensionDtypes
-        return array(arr, dtype=dtype, copy=False)
+        return pd_array(arr, dtype=dtype, copy=False)
     return arr.astype(dtype, copy=False)
 
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 2c95e65c70899..dcd6ef77238f9 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -99,7 +99,6 @@
     maybe_box_native,
     maybe_convert_platform,
     maybe_downcast_to_dtype,
-    maybe_infer_to_datetimelike,
     validate_numeric_casting,
 )
 from pandas.core.dtypes.common import (
@@ -147,6 +146,7 @@
 from pandas.core.arrays.sparse import SparseFrameAccessor
 from pandas.core.construction import (
     extract_array,
+    sanitize_array,
     sanitize_masked_array,
 )
 from pandas.core.generic import (
@@ -177,13 +177,12 @@
 from pandas.core.internals.construction import (
     arrays_to_mgr,
     dataclasses_to_dicts,
-    init_dict,
-    init_ndarray,
-    masked_rec_array_to_mgr,
+    dict_to_mgr,
     mgr_to_mgr,
+    ndarray_to_mgr,
     nested_data_to_arrays,
+    rec_array_to_mgr,
     reorder_arrays,
-    sanitize_index,
     to_arrays,
     treat_as_nested,
 )
@@ -564,41 +563,55 @@ def __init__(
         if isinstance(data, DataFrame):
             data = data._mgr
 
-        if isinstance(data, (BlockManager, ArrayManager)):
-            if index is None and columns is None and dtype is None and copy is False:
-                # GH#33357 fastpath
-                NDFrame.__init__(self, data)
-                return
+        # first check if a Manager is passed without any other arguments
+        # -> use fastpath (without checking Manager type)
+        if (
+            index is None
+            and columns is None
+            and dtype is None
+            and copy is False
+            and isinstance(data, (BlockManager, ArrayManager))
+        ):
+            # GH#33357 fastpath
+            NDFrame.__init__(self, data)
+            return
+
+        manager = get_option("mode.data_manager")
 
+        if isinstance(data, (BlockManager, ArrayManager)):
             mgr = self._init_mgr(
                 data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy
             )
 
         elif isinstance(data, dict):
-            mgr = init_dict(data, index, columns, dtype=dtype)
+            mgr = dict_to_mgr(data, index, columns, dtype=dtype, typ=manager)
         elif isinstance(data, ma.MaskedArray):
             import numpy.ma.mrecords as mrecords
 
             # masked recarray
             if isinstance(data, mrecords.MaskedRecords):
-                mgr = masked_rec_array_to_mgr(data, index, columns, dtype, copy)
+                mgr = rec_array_to_mgr(data, index, columns, dtype, copy, typ=manager)
 
             # a masked array
             else:
                 data = sanitize_masked_array(data)
-                mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
+                mgr = ndarray_to_mgr(
+                    data, index, columns, dtype=dtype, copy=copy, typ=manager
+                )
 
         elif isinstance(data, (np.ndarray, Series, Index)):
             if data.dtype.names:
-                data_columns = list(data.dtype.names)
-                data = {k: data[k] for k in data_columns}
-                if columns is None:
-                    columns = data_columns
-                mgr = init_dict(data, index, columns, dtype=dtype)
+                # i.e. numpy structured array
+                mgr = rec_array_to_mgr(data, index, columns, dtype, copy, typ=manager)
             elif getattr(data, "name", None) is not None:
-                mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
+                # i.e. Series/Index with non-None name
+                mgr = dict_to_mgr(
+                    {data.name: data}, index, columns, dtype=dtype, typ=manager
+                )
             else:
-                mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
+                mgr = ndarray_to_mgr(
+                    data, index, columns, dtype=dtype, copy=copy, typ=manager
+                )
 
         # For data is list-like, or Iterable (will consume into list)
         elif is_list_like(data):
@@ -608,14 +621,20 @@ def __init__(
                 if is_dataclass(data[0]):
                     data = dataclasses_to_dicts(data)
                 if treat_as_nested(data):
+                    if columns is not None:
+                        columns = ensure_index(columns)
                     arrays, columns, index = nested_data_to_arrays(
                         data, columns, index, dtype
                     )
-                    mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
+                    mgr = arrays_to_mgr(
+                        arrays, columns, index, columns, dtype=dtype, typ=manager
+                    )
                 else:
-                    mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
+                    mgr = ndarray_to_mgr(
+                        data, index, columns, dtype=dtype, copy=copy, typ=manager
+                    )
             else:
-                mgr = init_dict({}, index, columns, dtype=dtype)
+                mgr = dict_to_mgr({}, index, columns, dtype=dtype, typ=manager)
         # For data is scalar
         else:
             if index is None or columns is None:
@@ -632,18 +651,19 @@ def __init__(
                     construct_1d_arraylike_from_scalar(data, len(index), dtype)
                     for _ in range(len(columns))
                 ]
-                mgr = arrays_to_mgr(values, columns, index, columns, dtype=None)
+                mgr = arrays_to_mgr(
+                    values, columns, index, columns, dtype=None, typ=manager
+                )
             else:
                 values = construct_2d_arraylike_from_scalar(
                     data, len(index), len(columns), dtype, copy
                 )
 
-                mgr = init_ndarray(
-                    values, index, columns, dtype=values.dtype, copy=False
+                mgr = ndarray_to_mgr(
+                    values, index, columns, dtype=values.dtype, copy=False, typ=manager
                 )
 
         # ensure correct Manager type according to settings
-        manager = get_option("mode.data_manager")
         mgr = mgr_to_mgr(mgr, typ=manager)
 
         NDFrame.__init__(self, mgr)
@@ -1919,12 +1939,11 @@ def from_records(
                         arr_columns_list.append(k)
                         arrays.append(v)
 
-                arrays, arr_columns = reorder_arrays(arrays, arr_columns_list, columns)
+                arr_columns = Index(arr_columns_list)
+                arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns)
 
         elif isinstance(data, (np.ndarray, DataFrame)):
             arrays, columns = to_arrays(data, columns)
-            if columns is not None:
-                columns = ensure_index(columns)
             arr_columns = columns
         else:
             arrays, arr_columns = to_arrays(data, columns)
@@ -1934,9 +1953,7 @@ def from_records(
                         arrays[i] = lib.maybe_convert_objects(arr, try_float=True)
 
             arr_columns = ensure_index(arr_columns)
-            if columns is not None:
-                columns = ensure_index(columns)
-            else:
+            if columns is None:
                 columns = arr_columns
 
         if exclude is None:
@@ -1971,7 +1988,8 @@ def from_records(
             arr_columns = arr_columns.drop(arr_exclude)
             columns = columns.drop(exclude)
 
-        mgr = arrays_to_mgr(arrays, arr_columns, result_index, columns)
+        manager = get_option("mode.data_manager")
+        mgr = arrays_to_mgr(arrays, arr_columns, result_index, columns, typ=manager)
 
         return cls(mgr)
 
@@ -2178,6 +2196,7 @@ def _from_arrays(
         if dtype is not None:
             dtype = pandas_dtype(dtype)
 
+        manager = get_option("mode.data_manager")
         mgr = arrays_to_mgr(
             arrays,
             columns,
@@ -2185,6 +2204,7 @@ def _from_arrays(
             columns,
             dtype=dtype,
             verify_integrity=verify_integrity,
+            typ=manager,
         )
         return cls(mgr)
 
@@ -2641,6 +2661,189 @@ def to_html(
             render_links=render_links,
         )
 
+    @doc(storage_options=generic._shared_docs["storage_options"])
+    def to_xml(
+        self,
+        path_or_buffer: Optional[FilePathOrBuffer] = None,
+        index: bool = True,
+        root_name: Optional[str] = "data",
+        row_name: Optional[str] = "row",
+        na_rep: Optional[str] = None,
+        attr_cols: Optional[Union[str, List[str]]] = None,
+        elem_cols: Optional[Union[str, List[str]]] = None,
+        namespaces: Optional[Dict[Optional[str], str]] = None,
+        prefix: Optional[str] = None,
+        encoding: str = "utf-8",
+        xml_declaration: Optional[bool] = True,
+        pretty_print: Optional[bool] = True,
+        parser: Optional[str] = "lxml",
+        stylesheet: Optional[FilePathOrBuffer] = None,
+        compression: CompressionOptions = "infer",
+        storage_options: StorageOptions = None,
+    ) -> Optional[str]:
+        """
+        Render a DataFrame to an XML document.
+
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        path_or_buffer : str, path object or file-like object, optional
+            File to write output to. If None, the output is returned as a
+            string.
+        index : bool, default True
+            Whether to include index in XML document.
+        root_name : str, default 'data'
+            The name of root element in XML document.
+        row_name : str, default 'row'
+            The name of row element in XML document.
+        na_rep : str, optional
+            Missing data representation.
+        attr_cols : list-like, optional
+            List of columns to write as attributes in row element.
+            Hierarchical columns will be flattened with underscore
+            delimiting the different levels.
+        elem_cols : list-like, optional
+            List of columns to write as children in row element. By default,
+            all columns output as children of row element. Hierarchical
+            columns will be flattened with underscore delimiting the
+            different levels.
+        namespaces : dict, optional
+            All namespaces to be defined in root element. Keys of dict
+            should be prefix names and values of dict corresponding URIs.
+            Default namespaces should be given empty string key. For
+            example, ::
+
+                namespaces = {{"": "https://example.com"}}
+
+        prefix : str, optional
+            Namespace prefix to be used for every element and/or attribute
+            in document. This should be one of the keys in ``namespaces``
+            dict.
+        encoding : str, default 'utf-8'
+            Encoding of the resulting document.
+        xml_declaration : bool, default True
+            Whether to include the XML declaration at start of document.
+        pretty_print : bool, default True
+            Whether output should be pretty printed with indentation and
+            line breaks.
+        parser : {{'lxml','etree'}}, default 'lxml'
+            Parser module to use for building of tree. Only 'lxml' and
+            'etree' are supported. With 'lxml', the ability to use XSLT
+            stylesheet is supported.
+        stylesheet : str, path object or file-like object, optional
+            A URL, file-like object, or a raw string containing an XSLT
+            script used to transform the raw XML output. Script should use
+            layout of elements and attributes from original output. This
+            argument requires ``lxml`` to be installed. Only XSLT 1.0
+            scripts and not later versions is currently supported.
+        compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
+            For on-the-fly decompression of on-disk data. If 'infer', then use
+            gzip, bz2, zip or xz if path_or_buffer is a string ending in
+            '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
+            otherwise. If using 'zip', the ZIP file must contain only one data
+            file to be read in. Set to None for no decompression.
+        {storage_options}
+
+        Returns
+        -------
+        None or str
+            If ``io`` is None, returns the resulting XML format as a
+            string. Otherwise returns None.
+
+        See Also
+        --------
+        to_json : Convert the pandas object to a JSON string.
+        to_html : Convert DataFrame to a html.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({{'shape': ['square', 'circle', 'triangle'],
+        ...                    'degrees': [360, 360, 180],
+        ...                    'sides': [4, np.nan, 3]}})
+
+        >>> df.to_xml()  # doctest: +SKIP
+        <?xml version='1.0' encoding='utf-8'?>
+        <data>
+          <row>
+            <index>0</index>
+            <shape>square</shape>
+            <degrees>360</degrees>
+            <sides>4.0</sides>
+          </row>
+          <row>
+            <index>1</index>
+            <shape>circle</shape>
+            <degrees>360</degrees>
+            <sides/>
+          </row>
+          <row>
+            <index>2</index>
+            <shape>triangle</shape>
+            <degrees>180</degrees>
+            <sides>3.0</sides>
+          </row>
+        </data>
+
+        >>> df.to_xml(attr_cols=[
+        ...           'index', 'shape', 'degrees', 'sides'
+        ...           ])  # doctest: +SKIP
+        <?xml version='1.0' encoding='utf-8'?>
+        <data>
+          <row index="0" shape="square" degrees="360" sides="4.0"/>
+          <row index="1" shape="circle" degrees="360"/>
+          <row index="2" shape="triangle" degrees="180" sides="3.0"/>
+        </data>
+
+        >>> df.to_xml(namespaces={{"doc": "https://example.com"}},
+        ...           prefix="doc")  # doctest: +SKIP
+        <?xml version='1.0' encoding='utf-8'?>
+        <doc:data xmlns:doc="https://example.com">
+          <doc:row>
+            <doc:index>0</doc:index>
+            <doc:shape>square</doc:shape>
+            <doc:degrees>360</doc:degrees>
+            <doc:sides>4.0</doc:sides>
+          </doc:row>
+          <doc:row>
+            <doc:index>1</doc:index>
+            <doc:shape>circle</doc:shape>
+            <doc:degrees>360</doc:degrees>
+            <doc:sides/>
+          </doc:row>
+          <doc:row>
+            <doc:index>2</doc:index>
+            <doc:shape>triangle</doc:shape>
+            <doc:degrees>180</doc:degrees>
+            <doc:sides>3.0</doc:sides>
+          </doc:row>
+        </doc:data>
+        """
+
+        formatter = fmt.DataFrameFormatter(
+            self,
+            index=index,
+        )
+
+        return fmt.DataFrameRenderer(formatter).to_xml(
+            path_or_buffer=path_or_buffer,
+            index=index,
+            root_name=root_name,
+            row_name=row_name,
+            na_rep=na_rep,
+            attr_cols=attr_cols,
+            elem_cols=elem_cols,
+            namespaces=namespaces,
+            prefix=prefix,
+            encoding=encoding,
+            xml_declaration=xml_declaration,
+            pretty_print=pretty_print,
+            parser=parser,
+            stylesheet=stylesheet,
+            compression=compression,
+            storage_options=storage_options,
+        )
+
     # ----------------------------------------------------------------------
     @Substitution(
         klass="DataFrame",
@@ -4024,15 +4227,14 @@ def _sanitize_column(self, value) -> ArrayLike:
             value = _reindex_for_setitem(value, self.index)
 
         elif isinstance(value, ExtensionArray):
-            # Explicitly copy here, instead of in sanitize_index,
-            # as sanitize_index won't copy an EA, even with copy=True
+            # Explicitly copy here
             value = value.copy()
-            value = sanitize_index(value, self.index)
+            com.require_length_match(value, self.index)
 
         elif is_sequence(value):
+            com.require_length_match(value, self.index)
 
             # turn me into an ndarray
-            value = sanitize_index(value, self.index)
             if not isinstance(value, (np.ndarray, Index)):
                 if isinstance(value, list) and len(value) > 0:
                     value = maybe_convert_platform(value)
@@ -4045,7 +4247,7 @@ def _sanitize_column(self, value) -> ArrayLike:
 
             # possibly infer to datetimelike
             if is_object_dtype(value.dtype):
-                value = maybe_infer_to_datetimelike(value)
+                value = sanitize_array(value, None)
 
         else:
             value = construct_1d_arraylike_from_scalar(value, len(self), dtype=None)
@@ -5625,7 +5827,7 @@ def sort_index(
         self,
         axis: Axis = 0,
         level: Optional[Level] = None,
-        ascending: bool = True,
+        ascending: Union[Union[bool, int], Sequence[Union[bool, int]]] = True,
         inplace: bool = False,
         kind: str = "quicksort",
         na_position: str = "last",
@@ -5646,7 +5848,7 @@ def sort_index(
             and 1 identifies the columns.
         level : int or level name or list of ints or list of level names
             If not None, sort on values in specified index level(s).
-        ascending : bool or list of bools, default True
+        ascending : bool or list-like of bools, default True
             Sort ascending vs. descending. When the index is a MultiIndex the
             sort direction can be controlled for each level individually.
         inplace : bool, default False
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index eb4c5c07af2c4..5bba7ab67b2bf 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -71,6 +71,7 @@
     rewrite_axis_style_signature,
 )
 from pandas.util._validators import (
+    validate_ascending,
     validate_bool_kwarg,
     validate_fillna_kwargs,
 )
@@ -138,6 +139,7 @@
     ArrayManager,
     BlockManager,
 )
+from pandas.core.internals.construction import mgr_to_mgr
 from pandas.core.missing import find_valid_index
 from pandas.core.ops import align_method_FRAME
 from pandas.core.reshape.concat import concat
@@ -4548,7 +4550,7 @@ def sort_index(
         self,
         axis=0,
         level=None,
-        ascending: bool_t = True,
+        ascending: Union[Union[bool_t, int], Sequence[Union[bool_t, int]]] = True,
         inplace: bool_t = False,
         kind: str = "quicksort",
         na_position: str = "last",
@@ -4559,6 +4561,8 @@ def sort_index(
 
         inplace = validate_bool_kwarg(inplace, "inplace")
         axis = self._get_axis_number(axis)
+        ascending = validate_ascending(ascending)
+
         target = self._get_axis(axis)
 
         indexer = get_indexer_indexer(
@@ -5752,6 +5756,8 @@ def _to_dict_of_blocks(self, copy: bool_t = True):
         Internal ONLY - only works for BlockManager
         """
         mgr = self._mgr
+        # convert to BlockManager if needed -> this way support ArrayManager as well
+        mgr = mgr_to_mgr(mgr, "block")
         mgr = cast(BlockManager, mgr)
         return {
             k: self._constructor(v).__finalize__(self)
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index f4c69ea9d89db..aaf67fb1be532 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1815,6 +1815,8 @@ def count(self) -> DataFrame:
         ids, _, ngroups = self.grouper.group_info
         mask = ids != -1
 
+        using_array_manager = isinstance(data, ArrayManager)
+
         def hfunc(bvalues: ArrayLike) -> ArrayLike:
             # TODO(2DEA): reshape would not be necessary with 2D EAs
             if bvalues.ndim == 1:
@@ -1824,6 +1826,10 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
                 masked = mask & ~isna(bvalues)
 
             counted = lib.count_level_2d(masked, labels=ids, max_bin=ngroups, axis=1)
+            if using_array_manager:
+                # count_level_2d return (1, N) array for single column
+                # -> extract 1D array
+                counted = counted[0, :]
             return counted
 
         new_mgr = data.grouped_reduce(hfunc)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index bf9fdb5d0cff7..d8135dbf3f08d 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -3098,11 +3098,12 @@ def sample(
         if random_state is not None:
             random_state = com.random_state(random_state)
 
+        group_iterator = self.grouper.get_iterator(self._selected_obj, self.axis)
         samples = [
             obj.sample(
                 n=n, frac=frac, replace=replace, weights=w, random_state=random_state
             )
-            for (_, obj), w in zip(self, ws)
+            for (_, obj), w in zip(group_iterator, ws)
         ]
 
         return concat(samples, axis=self.axis)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 5004d1fe08a5b..7fb6e98fb176e 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -8,6 +8,7 @@
 from __future__ import annotations
 
 import collections
+import functools
 from typing import (
     Dict,
     Generic,
@@ -84,6 +85,7 @@
     MultiIndex,
     ensure_index,
 )
+from pandas.core.internals import ArrayManager
 from pandas.core.series import Series
 from pandas.core.sorting import (
     compress_group_index,
@@ -94,6 +96,64 @@
     get_indexer_dict,
 )
 
+_CYTHON_FUNCTIONS = {
+    "aggregate": {
+        "add": "group_add",
+        "prod": "group_prod",
+        "min": "group_min",
+        "max": "group_max",
+        "mean": "group_mean",
+        "median": "group_median",
+        "var": "group_var",
+        "first": "group_nth",
+        "last": "group_last",
+        "ohlc": "group_ohlc",
+    },
+    "transform": {
+        "cumprod": "group_cumprod",
+        "cumsum": "group_cumsum",
+        "cummin": "group_cummin",
+        "cummax": "group_cummax",
+        "rank": "group_rank",
+    },
+}
+
+
+@functools.lru_cache(maxsize=None)
+def _get_cython_function(kind: str, how: str, dtype: np.dtype, is_numeric: bool):
+
+    dtype_str = dtype.name
+    ftype = _CYTHON_FUNCTIONS[kind][how]
+
+    # see if there is a fused-type version of function
+    # only valid for numeric
+    f = getattr(libgroupby, ftype, None)
+    if f is not None and is_numeric:
+        return f
+
+    # otherwise find dtype-specific version, falling back to object
+    for dt in [dtype_str, "object"]:
+        f2 = getattr(libgroupby, f"{ftype}_{dt}", None)
+        if f2 is not None:
+            return f2
+
+    if hasattr(f, "__signatures__"):
+        # inspect what fused types are implemented
+        if dtype_str == "object" and "object" not in f.__signatures__:
+            # disallow this function so we get a NotImplementedError below
+            #  instead of a TypeError at runtime
+            f = None
+
+    func = f
+
+    if func is None:
+        raise NotImplementedError(
+            f"function is not implemented for this dtype: "
+            f"[how->{how},dtype->{dtype_str}]"
+        )
+
+    return func
+
 
 class BaseGrouper:
     """
@@ -207,21 +267,25 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
         group_keys = self._get_group_keys()
         result_values = None
 
-        sdata: FrameOrSeries = splitter._get_sorted_data()
-        if sdata.ndim == 2 and np.any(sdata.dtypes.apply(is_extension_array_dtype)):
+        if data.ndim == 2 and np.any(data.dtypes.apply(is_extension_array_dtype)):
             # calling splitter.fast_apply will raise TypeError via apply_frame_axis0
             #  if we pass EA instead of ndarray
             #  TODO: can we have a workaround for EAs backed by ndarray?
             pass
 
+        elif isinstance(data._mgr, ArrayManager):
+            # TODO(ArrayManager) don't use fast_apply / libreduction.apply_frame_axis0
+            # for now -> relies on BlockManager internals
+            pass
         elif (
             com.get_callable_name(f) not in base.plotting_methods
             and isinstance(splitter, FrameSplitter)
             and axis == 0
             # fast_apply/libreduction doesn't allow non-numpy backed indexes
-            and not sdata.index._has_complex_internals
+            and not data.index._has_complex_internals
         ):
             try:
+                sdata = splitter.sorted_data
                 result_values, mutated = splitter.fast_apply(f, sdata, group_keys)
 
             except IndexError:
@@ -380,28 +444,6 @@ def get_group_levels(self) -> List[Index]:
     # ------------------------------------------------------------
     # Aggregation functions
 
-    _cython_functions = {
-        "aggregate": {
-            "add": "group_add",
-            "prod": "group_prod",
-            "min": "group_min",
-            "max": "group_max",
-            "mean": "group_mean",
-            "median": "group_median",
-            "var": "group_var",
-            "first": "group_nth",
-            "last": "group_last",
-            "ohlc": "group_ohlc",
-        },
-        "transform": {
-            "cumprod": "group_cumprod",
-            "cumsum": "group_cumsum",
-            "cummin": "group_cummin",
-            "cummax": "group_cummax",
-            "rank": "group_rank",
-        },
-    }
-
     _cython_arity = {"ohlc": 4}  # OHLC
 
     @final
@@ -412,43 +454,6 @@ def _is_builtin_func(self, arg):
         """
         return SelectionMixin._builtin_table.get(arg, arg)
 
-    @final
-    def _get_cython_function(
-        self, kind: str, how: str, values: np.ndarray, is_numeric: bool
-    ):
-
-        dtype_str = values.dtype.name
-        ftype = self._cython_functions[kind][how]
-
-        # see if there is a fused-type version of function
-        # only valid for numeric
-        f = getattr(libgroupby, ftype, None)
-        if f is not None and is_numeric:
-            return f
-
-        # otherwise find dtype-specific version, falling back to object
-        for dt in [dtype_str, "object"]:
-            f2 = getattr(libgroupby, f"{ftype}_{dt}", None)
-            if f2 is not None:
-                return f2
-
-        if hasattr(f, "__signatures__"):
-            # inspect what fused types are implemented
-            if dtype_str == "object" and "object" not in f.__signatures__:
-                # disallow this function so we get a NotImplementedError below
-                #  instead of a TypeError at runtime
-                f = None
-
-        func = f
-
-        if func is None:
-            raise NotImplementedError(
-                f"function is not implemented for this dtype: "
-                f"[how->{how},dtype->{dtype_str}]"
-            )
-
-        return func
-
     @final
     def _get_cython_func_and_vals(
         self, kind: str, how: str, values: np.ndarray, is_numeric: bool
@@ -469,7 +474,7 @@ def _get_cython_func_and_vals(
         values : np.ndarray
         """
         try:
-            func = self._get_cython_function(kind, how, values, is_numeric)
+            func = _get_cython_function(kind, how, values.dtype, is_numeric)
         except NotImplementedError:
             if is_numeric:
                 try:
@@ -479,7 +484,7 @@ def _get_cython_func_and_vals(
                         values = values.astype(complex)
                     else:
                         raise
-                func = self._get_cython_function(kind, how, values, is_numeric)
+                func = _get_cython_function(kind, how, values.dtype, is_numeric)
             else:
                 raise
         return func, values
@@ -537,7 +542,7 @@ def _ea_wrap_cython_operation(
                 return res_values
 
             res_values = res_values.astype("i8", copy=False)
-            result = type(orig_values)._simple_new(res_values, dtype=orig_values.dtype)
+            result = type(orig_values)(res_values, dtype=orig_values.dtype)
             return result
 
         elif is_integer_dtype(values.dtype) or is_bool_dtype(values.dtype):
@@ -774,7 +779,7 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
             counts[label] = group.shape[0]
             result[label] = res
 
-        result = lib.maybe_convert_objects(result, try_float=0)
+        result = lib.maybe_convert_objects(result, try_float=False)
         result = maybe_cast_result(result, obj, numeric_only=True)
 
         return result, counts
@@ -983,7 +988,7 @@ def sort_idx(self):
         return get_group_index_sorter(self.labels, self.ngroups)
 
     def __iter__(self):
-        sdata = self._get_sorted_data()
+        sdata = self.sorted_data
 
         if self.ngroups == 0:
             # we are inside a generator, rather than raise StopIteration
@@ -995,7 +1000,8 @@ def __iter__(self):
         for i, (start, end) in enumerate(zip(starts, ends)):
             yield i, self._chop(sdata, slice(start, end))
 
-    def _get_sorted_data(self) -> FrameOrSeries:
+    @cache_readonly
+    def sorted_data(self) -> FrameOrSeries:
         return self.data.take(self.sort_idx, axis=self.axis)
 
     def _chop(self, sdata, slice_obj: slice) -> NDFrame:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index e633d6b28a8c5..30190ef950af5 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -141,13 +141,17 @@
     PandasObject,
 )
 import pandas.core.common as com
-from pandas.core.construction import extract_array
+from pandas.core.construction import (
+    ensure_wrapped_if_datetimelike,
+    extract_array,
+)
 from pandas.core.indexers import deprecate_ndim_indexing
 from pandas.core.indexes.frozen import FrozenList
 from pandas.core.ops import get_op_result_name
 from pandas.core.ops.invalid import make_invalid_op
 from pandas.core.sorting import (
     ensure_key_mapped,
+    get_group_index_sorter,
     nargsort,
 )
 from pandas.core.strings import StringMethods
@@ -816,7 +820,7 @@ def view(self, cls=None):
                 arr = self._data.view("i8")
                 idx_cls = self._dtype_to_subclass(dtype)
                 arr_cls = idx_cls._data_cls
-                arr = arr_cls._simple_new(self._data.view("i8"), dtype=dtype)
+                arr = arr_cls(self._data.view("i8"), dtype=dtype)
                 return idx_cls._simple_new(arr, name=self.name)
 
             result = self._data.view(cls)
@@ -1152,7 +1156,7 @@ def _format_with_header(
         values = self._values
 
         if is_object_dtype(values.dtype):
-            values = lib.maybe_convert_objects(values, safe=1)
+            values = lib.maybe_convert_objects(values, safe=True)
 
             result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]
 
@@ -2912,7 +2916,7 @@ def union(self, other, sort=None):
 
         return self._wrap_setop_result(other, result)
 
-    def _union(self, other, sort):
+    def _union(self, other: Index, sort):
         """
         Specific union logic should go here. In subclasses, union behavior
         should be overwritten here rather than in `self.union`.
@@ -3041,7 +3045,7 @@ def intersection(self, other, sort=False):
         result = self._intersection(other, sort=sort)
         return self._wrap_setop_result(other, result)
 
-    def _intersection(self, other, sort=False):
+    def _intersection(self, other: Index, sort=False):
         """
         intersection specialized to the case with matching dtypes.
         """
@@ -3055,13 +3059,14 @@ def _intersection(self, other, sort=False):
             except TypeError:
                 pass
             else:
-                return algos.unique1d(result)
+                # TODO: algos.unique1d should preserve DTA/TDA
+                res = algos.unique1d(result)
+                return ensure_wrapped_if_datetimelike(res)
 
         try:
             indexer = other.get_indexer(lvals)
-        except (InvalidIndexError, IncompatibleFrequency):
+        except InvalidIndexError:
             # InvalidIndexError raised by get_indexer if non-unique
-            # IncompatibleFrequency raised by PeriodIndex.get_indexer
             indexer, _ = other.get_indexer_non_unique(lvals)
 
         mask = indexer != -1
@@ -4098,9 +4103,7 @@ def _get_leaf_sorter(labels):
                 return np.empty(0, dtype="int64")
 
             if len(labels) == 1:
-                lab = ensure_int64(labels[0])
-                sorter, _ = libalgos.groupsort_indexer(lab, 1 + lab.max())
-                return sorter
+                return get_group_index_sorter(labels[0])
 
             # find indexers of beginning of each set of
             # same-key labels w.r.t all but last level
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 6d5992540ef49..f7e37b10ef74c 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -47,7 +47,6 @@
     is_scalar,
 )
 from pandas.core.dtypes.concat import concat_compat
-from pandas.core.dtypes.generic import ABCSeries
 
 from pandas.core.arrays import (
     DatetimeArray,
@@ -86,25 +85,22 @@ def _join_i8_wrapper(joinf, with_indexers: bool = True):
     @staticmethod  # type: ignore[misc]
     def wrapper(left, right):
         # Note: these only get called with left.dtype == right.dtype
-        if isinstance(
-            left, (np.ndarray, DatetimeIndexOpsMixin, ABCSeries, DatetimeLikeArrayMixin)
-        ):
-            left = left.view("i8")
-        if isinstance(
-            right,
-            (np.ndarray, DatetimeIndexOpsMixin, ABCSeries, DatetimeLikeArrayMixin),
-        ):
-            right = right.view("i8")
+        orig_left = left
+
+        left = left.view("i8")
+        right = right.view("i8")
 
         results = joinf(left, right)
         if with_indexers:
-            # dtype should be timedelta64[ns] for TimedeltaIndex
-            #  and datetime64[ns] for DatetimeIndex
-            dtype = cast(np.dtype, left.dtype).base
 
             join_index, left_indexer, right_indexer = results
-            join_index = join_index.view(dtype)
+            if not isinstance(orig_left, np.ndarray):
+                # When called from Index._intersection/_union, we have the EA
+                join_index = join_index.view(orig_left._ndarray.dtype)
+                join_index = orig_left._from_backing_data(join_index)
+
             return join_index, left_indexer, right_indexer
+
         return results
 
     return wrapper
@@ -618,13 +614,10 @@ def delete(self: _T, loc) -> _T:
 
     @doc(NDArrayBackedExtensionIndex.insert)
     def insert(self, loc: int, item):
-        try:
-            result = super().insert(loc, item)
-        except (ValueError, TypeError):
-            # i.e. self._data._validate_scalar raised
-            return self.astype(object).insert(loc, item)
-
-        result._data._freq = self._get_insert_freq(loc, item)
+        result = super().insert(loc, item)
+        if isinstance(result, type(self)):
+            # i.e. parent class method did not cast
+            result._data._freq = self._get_insert_freq(loc, item)
         return result
 
     # --------------------------------------------------------------------
@@ -650,7 +643,8 @@ def _get_join_freq(self, other):
 
     def _wrap_joined_index(self, joined: np.ndarray, other):
         assert other.dtype == self.dtype, (other.dtype, self.dtype)
-
+        assert joined.dtype == "i8" or joined.dtype == self.dtype, joined.dtype
+        joined = joined.view(self._data._ndarray.dtype)
         result = super()._wrap_joined_index(joined, other)
         result._data._freq = self._get_join_freq(other)
         return result
@@ -708,6 +702,8 @@ def _intersection(self, other: Index, sort=False) -> Index:
             # We need to invalidate the freq because Index._intersection
             #  uses _shallow_copy on a view of self._data, which will preserve
             #  self.freq if we're not careful.
+            # At this point we should have result.dtype == self.dtype
+            #  and type(result) is type(self._data)
             result = self._wrap_setop_result(other, result)
             return result._with_freq(None)._with_freq("infer")
 
diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py
index f1418869713d6..ac70200c0c404 100644
--- a/pandas/core/indexes/extension.py
+++ b/pandas/core/indexes/extension.py
@@ -16,6 +16,10 @@
     doc,
 )
 
+from pandas.core.dtypes.cast import (
+    find_common_type,
+    infer_dtype_from,
+)
 from pandas.core.dtypes.common import (
     is_dtype_equal,
     is_object_dtype,
@@ -370,11 +374,19 @@ def insert(self: _T, loc: int, item) -> _T:
         ValueError if the item is not valid for this dtype.
         """
         arr = self._data
-        code = arr._validate_scalar(item)
-
-        new_vals = np.concatenate((arr._ndarray[:loc], [code], arr._ndarray[loc:]))
-        new_arr = arr._from_backing_data(new_vals)
-        return type(self)._simple_new(new_arr, name=self.name)
+        try:
+            code = arr._validate_scalar(item)
+        except (ValueError, TypeError):
+            # e.g. trying to insert an integer into a DatetimeIndex
+            #  We cannot keep the same dtype, so cast to the (often object)
+            #  minimal shared dtype before doing the insert.
+            dtype, _ = infer_dtype_from(item, pandas_dtype=True)
+            dtype = find_common_type([self.dtype, dtype])
+            return self.astype(dtype).insert(loc, item)
+        else:
+            new_vals = np.concatenate((arr._ndarray[:loc], [code], arr._ndarray[loc:]))
+            new_arr = arr._from_backing_data(new_vals)
+            return type(self)._simple_new(new_arr, name=self.name)
 
     def putmask(self, mask, value):
         res_values = self._data.copy()
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 1889821c79756..88b92c7b304ae 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -3719,12 +3719,7 @@ def insert(self, loc: int, item) -> MultiIndex:
                 # must insert at end otherwise you have to recompute all the
                 # other codes
                 lev_loc = len(level)
-                try:
-                    level = level.insert(lev_loc, k)
-                except TypeError:
-                    # TODO: Should this be done inside insert?
-                    # TODO: smarter casting rules?
-                    level = level.astype(object).insert(lev_loc, k)
+                level = level.insert(lev_loc, k)
             else:
                 lev_loc = level.get_loc(k)
 
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 3615d85273f99..bd6ec38b292f6 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -539,7 +539,7 @@ def equals(self, other: object) -> bool:
     # --------------------------------------------------------------------
     # Set Operations
 
-    def _intersection(self, other, sort=False):
+    def _intersection(self, other: Index, sort=False):
 
         if not isinstance(other, RangeIndex):
             # Int64Index
@@ -614,7 +614,7 @@ def _extended_gcd(self, a, b):
             old_t, t = t, old_t - quotient * t
         return old_r, old_s, old_t
 
-    def _union(self, other, sort):
+    def _union(self, other: Index, sort):
         """
         Form the union of two Index objects and sorts if possible
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index cfe16627d5c64..bded503a1e6db 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1421,6 +1421,15 @@ def _has_valid_setitem_indexer(self, indexer) -> bool:
         if isinstance(indexer, dict):
             raise IndexError("iloc cannot enlarge its target object")
 
+        if isinstance(indexer, ABCDataFrame):
+            warnings.warn(
+                "DataFrame indexer for .iloc is deprecated and will be removed in"
+                "a future version.\n"
+                "consider using .loc with a DataFrame indexer for automatic alignment.",
+                FutureWarning,
+                stacklevel=3,
+            )
+
         if not isinstance(indexer, tuple):
             indexer = _tuplify(self.ndim, indexer)
 
@@ -1508,6 +1517,12 @@ def _get_list_axis(self, key, axis: int):
             raise IndexError("positional indexers are out-of-bounds") from err
 
     def _getitem_axis(self, key, axis: int):
+        if isinstance(key, ABCDataFrame):
+            raise IndexError(
+                "DataFrame indexer is not allowed for .iloc\n"
+                "Consider using .loc for automatic alignment."
+            )
+
         if isinstance(key, slice):
             return self._get_slice_axis(key, axis=axis)
 
@@ -1641,7 +1656,17 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
                     # so the object is the same
                     index = self.obj._get_axis(i)
                     labels = index.insert(len(index), key)
-                    self.obj._mgr = self.obj.reindex(labels, axis=i)._mgr
+
+                    # We are expanding the Series/DataFrame values to match
+                    #  the length of thenew index `labels`.  GH#40096 ensure
+                    #  this is valid even if the index has duplicates.
+                    taker = np.arange(len(index) + 1, dtype=np.intp)
+                    taker[-1] = -1
+                    reindexers = {i: (labels, taker)}
+                    new_obj = self.obj._reindex_with_indexers(
+                        reindexers, allow_dups=True
+                    )
+                    self.obj._mgr = new_obj._mgr
                     self.obj._maybe_update_cacher(clear=True)
                     self.obj._is_copy = None
 
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index e0447378c4542..998f1ffcf02ee 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -28,11 +28,13 @@
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core.dtypes.cast import (
+    astype_array_safe,
     find_common_type,
     infer_dtype_from_scalar,
 )
 from pandas.core.dtypes.common import (
     is_bool_dtype,
+    is_datetime64_ns_dtype,
     is_dtype_equal,
     is_extension_array_dtype,
     is_numeric_dtype,
@@ -53,7 +55,11 @@
 )
 
 import pandas.core.algorithms as algos
-from pandas.core.arrays import ExtensionArray
+from pandas.core.arrays import (
+    DatetimeArray,
+    ExtensionArray,
+    TimedeltaArray,
+)
 from pandas.core.arrays.sparse import SparseDtype
 from pandas.core.construction import (
     ensure_wrapped_if_datetimelike,
@@ -113,6 +119,7 @@ def __init__(
 
         if verify_integrity:
             self._axes = [ensure_index(ax) for ax in axes]
+            self.arrays = [ensure_wrapped_if_datetimelike(arr) for arr in arrays]
             self._verify_integrity()
 
     def make_empty(self: T, axes=None) -> T:
@@ -270,15 +277,30 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
         -------
         ArrayManager
         """
-        # TODO ignore_failures
-        result_arrays = [func(arr) for arr in self.arrays]
+        result_arrays: List[np.ndarray] = []
+        result_indices: List[int] = []
+
+        for i, arr in enumerate(self.arrays):
+            try:
+                res = func(arr)
+            except (TypeError, NotImplementedError):
+                if not ignore_failures:
+                    raise
+                continue
+            result_arrays.append(res)
+            result_indices.append(i)
 
         if len(result_arrays) == 0:
             index = Index([None])  # placeholder
         else:
             index = Index(range(result_arrays[0].shape[0]))
 
-        return type(self)(result_arrays, [index, self.items])
+        if ignore_failures:
+            columns = self.items[np.array(result_indices, dtype="int64")]
+        else:
+            columns = self.items
+
+        return type(self)(result_arrays, [index, columns])
 
     def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager:
         """
@@ -452,7 +474,13 @@ def putmask(self, mask, new, align: bool = True):
         )
 
     def diff(self, n: int, axis: int) -> ArrayManager:
-        return self.apply_with_block("diff", n=n, axis=axis)
+        axis = self._normalize_axis(axis)
+        if axis == 1:
+            # DataFrame only calls this for n=0, in which case performing it
+            # with axis=0 is equivalent
+            assert n == 0
+            axis = 0
+        return self.apply(algos.diff, n=n, axis=axis)
 
     def interpolate(self, **kwargs) -> ArrayManager:
         return self.apply_with_block("interpolate", **kwargs)
@@ -478,7 +506,7 @@ def downcast(self) -> ArrayManager:
         return self.apply_with_block("downcast")
 
     def astype(self, dtype, copy: bool = False, errors: str = "raise") -> ArrayManager:
-        return self.apply("astype", dtype=dtype, copy=copy)  # , errors=errors)
+        return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors)
 
     def convert(
         self,
@@ -694,20 +722,16 @@ def fast_xs(self, loc: int) -> ArrayLike:
         """
         dtype = _interleaved_dtype(self.arrays)
 
-        if isinstance(dtype, SparseDtype):
-            temp_dtype = dtype.subtype
-        elif isinstance(dtype, PandasDtype):
-            temp_dtype = dtype.numpy_dtype
-        elif is_extension_array_dtype(dtype):
-            temp_dtype = "object"
-        elif is_dtype_equal(dtype, str):
-            temp_dtype = "object"
-        else:
-            temp_dtype = dtype
-
-        result = np.array([arr[loc] for arr in self.arrays], dtype=temp_dtype)
+        values = [arr[loc] for arr in self.arrays]
         if isinstance(dtype, ExtensionDtype):
-            result = dtype.construct_array_type()._from_sequence(result, dtype=dtype)
+            result = dtype.construct_array_type()._from_sequence(values, dtype=dtype)
+        # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT
+        elif is_datetime64_ns_dtype(dtype):
+            result = DatetimeArray._from_sequence(values, dtype=dtype)._data
+        elif is_timedelta64_ns_dtype(dtype):
+            result = TimedeltaArray._from_sequence(values, dtype=dtype)._data
+        else:
+            result = np.array(values, dtype=dtype)
         return result
 
     def iget(self, i: int) -> SingleBlockManager:
@@ -816,7 +840,13 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False
 
         value = extract_array(value, extract_numpy=True)
         if value.ndim == 2:
-            value = value[0, :]
+            if value.shape[0] == 1:
+                value = value[0, :]
+            else:
+                raise ValueError(
+                    f"Expected a 1D array, got an array with shape {value.shape}"
+                )
+
         # TODO self.arrays can be empty
         # assert len(value) == len(self.arrays[0])
 
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 689a067e1c211..f2b8499a316b7 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import inspect
 import re
 from typing import (
     TYPE_CHECKING,
@@ -36,8 +35,7 @@
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core.dtypes.cast import (
-    astype_dt64_to_dt64tz,
-    astype_nansafe,
+    astype_array_safe,
     can_hold_element,
     find_common_type,
     infer_dtype_from,
@@ -49,7 +47,6 @@
 )
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
-    is_datetime64_dtype,
     is_datetime64tz_dtype,
     is_dtype_equal,
     is_extension_array_dtype,
@@ -652,33 +649,11 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
         -------
         Block
         """
-        errors_legal_values = ("raise", "ignore")
-
-        if errors not in errors_legal_values:
-            invalid_arg = (
-                "Expected value of kwarg 'errors' to be one of "
-                f"{list(errors_legal_values)}. Supplied value is '{errors}'"
-            )
-            raise ValueError(invalid_arg)
-
-        if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype):
-            msg = (
-                f"Expected an instance of {dtype.__name__}, "
-                "but got the class instead. Try instantiating 'dtype'."
-            )
-            raise TypeError(msg)
-
-        dtype = pandas_dtype(dtype)
+        values = self.values
+        if values.dtype.kind in ["m", "M"]:
+            values = self.array_values()
 
-        try:
-            new_values = self._astype(dtype, copy=copy)
-        except (ValueError, TypeError):
-            # e.g. astype_nansafe can fail on object-dtype of strings
-            #  trying to convert to float
-            if errors == "ignore":
-                new_values = self.values
-            else:
-                raise
+        new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
 
         newb = self.make_block(new_values)
         if newb.shape != self.shape:
@@ -689,37 +664,6 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
             )
         return newb
 
-    def _astype(self, dtype: DtypeObj, copy: bool) -> ArrayLike:
-        values = self.values
-        if values.dtype.kind in ["m", "M"]:
-            values = self.array_values()
-
-        if (
-            values.dtype.kind in ["m", "M"]
-            and dtype.kind in ["i", "u"]
-            and isinstance(dtype, np.dtype)
-            and dtype.itemsize != 8
-        ):
-            # TODO(2.0) remove special case once deprecation on DTA/TDA is enforced
-            msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]"
-            raise TypeError(msg)
-
-        if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
-            return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True)
-
-        if is_dtype_equal(values.dtype, dtype):
-            if copy:
-                return values.copy()
-            return values
-
-        if isinstance(values, ExtensionArray):
-            values = values.astype(dtype, copy=copy)
-
-        else:
-            values = astype_nansafe(values, dtype, copy=copy)
-
-        return values
-
     def convert(
         self,
         copy: bool = True,
@@ -796,7 +740,6 @@ def replace(
         It is used in ObjectBlocks.  It is here for API compatibility.
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
-        original_to_replace = to_replace
 
         if not self._can_hold_element(to_replace):
             # We cannot hold `to_replace`, so we know immediately that
@@ -814,9 +757,20 @@ def replace(
             return [self] if inplace else [self.copy()]
 
         if not self._can_hold_element(value):
-            blk = self.astype(object)
+            if self.ndim == 2 and self.shape[0] > 1:
+                # split so that we only upcast where necessary
+                nbs = self._split()
+                res_blocks = extend_blocks(
+                    [
+                        blk.replace(to_replace, value, inplace=inplace, regex=regex)
+                        for blk in nbs
+                    ]
+                )
+                return res_blocks
+
+            blk = self.coerce_to_target_dtype(value)
             return blk.replace(
-                to_replace=original_to_replace,
+                to_replace=to_replace,
                 value=value,
                 inplace=True,
                 regex=regex,
@@ -824,7 +778,7 @@ def replace(
 
         blk = self if inplace else self.copy()
         putmask_inplace(blk.values, mask, value)
-        blocks = blk.convert(numeric=False, copy=not inplace)
+        blocks = blk.convert(numeric=False, copy=False)
         return blocks
 
     @final
@@ -867,11 +821,7 @@ def _replace_regex(
         replace_regex(new_values, rx, value, mask)
 
         block = self.make_block(new_values)
-        if convert:
-            nbs = block.convert(numeric=False)
-        else:
-            nbs = [block]
-        return nbs
+        return [block]
 
     @final
     def _replace_list(
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 9903dab9976c4..9a7ae39b9f8eb 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -22,10 +22,9 @@
 
 from pandas._libs import lib
 from pandas._typing import (
-    Axis,
+    ArrayLike,
     DtypeObj,
     Manager,
-    Scalar,
 )
 
 from pandas.core.dtypes.cast import (
@@ -36,7 +35,6 @@
     maybe_convert_platform,
     maybe_infer_to_datetimelike,
     maybe_upcast,
-    sanitize_to_nanoseconds,
 )
 from pandas.core.dtypes.common import (
     is_datetime64tz_dtype,
@@ -71,7 +69,9 @@
     get_objs_combined_axis,
     union_indexes,
 )
+from pandas.core.internals.array_manager import ArrayManager
 from pandas.core.internals.managers import (
+    BlockManager,
     create_block_manager_from_arrays,
     create_block_manager_from_blocks,
 )
@@ -79,6 +79,7 @@
 if TYPE_CHECKING:
     from numpy.ma.mrecords import MaskedRecords
 
+
 # ---------------------------------------------------------------------
 # BlockManager Interface
 
@@ -90,7 +91,8 @@ def arrays_to_mgr(
     columns,
     dtype: Optional[DtypeObj] = None,
     verify_integrity: bool = True,
-):
+    typ: Optional[str] = None,
+) -> Manager:
     """
     Segregate Series based on type and coerce into matrices.
 
@@ -108,19 +110,29 @@ def arrays_to_mgr(
         # don't force copy because getting jammed in an ndarray anyway
         arrays = _homogenize(arrays, index, dtype)
 
-        columns = ensure_index(columns)
     else:
-        columns = ensure_index(columns)
         index = ensure_index(index)
 
+    columns = ensure_index(columns)
+
     # from BlockManager perspective
     axes = [columns, index]
 
-    return create_block_manager_from_arrays(arrays, arr_names, axes)
+    if typ == "block":
+        return create_block_manager_from_arrays(arrays, arr_names, axes)
+    elif typ == "array":
+        return ArrayManager(arrays, [index, columns])
+    else:
+        raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{typ}'")
 
 
-def masked_rec_array_to_mgr(
-    data: MaskedRecords, index, columns, dtype: Optional[DtypeObj], copy: bool
+def rec_array_to_mgr(
+    data: Union[MaskedRecords, np.recarray, np.ndarray],
+    index,
+    columns,
+    dtype: Optional[DtypeObj],
+    copy: bool,
+    typ: str,
 ):
     """
     Extract from a masked rec array and create the manager.
@@ -129,48 +141,54 @@ def masked_rec_array_to_mgr(
     fdata = ma.getdata(data)
     if index is None:
         index = _get_names_from_index(fdata)
-        if index is None:
-            index = ibase.default_index(len(data))
-    index = ensure_index(index)
+    else:
+        index = ensure_index(index)
 
     if columns is not None:
         columns = ensure_index(columns)
     arrays, arr_columns = to_arrays(fdata, columns)
 
     # fill if needed
-    new_arrays = []
-    for col in arr_columns:
-        arr = data[col]
-        fv = arr.fill_value
-
-        mask = ma.getmaskarray(arr)
-        if mask.any():
-            arr, fv = maybe_upcast(arr, fill_value=fv, copy=True)
-            arr[mask] = fv
-        new_arrays.append(arr)
+    if isinstance(data, np.ma.MaskedArray):
+        new_arrays = fill_masked_arrays(data, arr_columns)
+    else:
+        new_arrays = arrays
 
     # create the manager
     arrays, arr_columns = reorder_arrays(new_arrays, arr_columns, columns)
     if columns is None:
         columns = arr_columns
 
-    mgr = arrays_to_mgr(arrays, arr_columns, index, columns, dtype)
+    mgr = arrays_to_mgr(arrays, arr_columns, index, columns, dtype, typ=typ)
 
     if copy:
         mgr = mgr.copy()
     return mgr
 
 
+def fill_masked_arrays(data: MaskedRecords, arr_columns: Index) -> List[np.ndarray]:
+    """
+    Convert numpy MaskedRecords to ensure mask is softened.
+    """
+    new_arrays = []
+
+    for col in arr_columns:
+        arr = data[col]
+        fv = arr.fill_value
+
+        mask = ma.getmaskarray(arr)
+        if mask.any():
+            arr, fv = maybe_upcast(arr, fill_value=fv, copy=True)
+            arr[mask] = fv
+        new_arrays.append(arr)
+    return new_arrays
+
+
 def mgr_to_mgr(mgr, typ: str):
     """
     Convert to specific type of Manager. Does not copy if the type is already
     correct. Does not guarantee a copy otherwise.
     """
-    from pandas.core.internals import (
-        ArrayManager,
-        BlockManager,
-    )
-
     new_mgr: Manager
 
     if typ == "block":
@@ -178,7 +196,7 @@ def mgr_to_mgr(mgr, typ: str):
             new_mgr = mgr
         else:
             new_mgr = arrays_to_mgr(
-                mgr.arrays, mgr.axes[0], mgr.axes[1], mgr.axes[0], dtype=None
+                mgr.arrays, mgr.axes[0], mgr.axes[1], mgr.axes[0], typ="block"
             )
     elif typ == "array":
         if isinstance(mgr, ArrayManager):
@@ -187,7 +205,7 @@ def mgr_to_mgr(mgr, typ: str):
             arrays = [mgr.iget_values(i).copy() for i in range(len(mgr.axes[0]))]
             new_mgr = ArrayManager(arrays, [mgr.axes[1], mgr.axes[0]])
     else:
-        raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{type}'")
+        raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{typ}'")
     return new_mgr
 
 
@@ -195,13 +213,16 @@ def mgr_to_mgr(mgr, typ: str):
 # DataFrame Constructor Interface
 
 
-def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
-    # input must be a ndarray, list, Series, index
+def ndarray_to_mgr(
+    values, index, columns, dtype: Optional[DtypeObj], copy: bool, typ: str
+) -> Manager:
+    # used in DataFrame.__init__
+    # input must be a ndarray, list, Series, Index, ExtensionArray
 
     if isinstance(values, ABCSeries):
         if columns is None:
             if values.name is not None:
-                columns = [values.name]
+                columns = Index([values.name])
         if index is None:
             index = values.index
         else:
@@ -224,22 +245,33 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
         if columns is None:
             columns = Index(range(len(values)))
 
-        return arrays_to_mgr(values, columns, index, columns, dtype=dtype)
+        return arrays_to_mgr(values, columns, index, columns, dtype=dtype, typ=typ)
 
     # by definition an array here
     # the dtypes will be coerced to a single dtype
     values = _prep_ndarray(values, copy=copy)
 
     if dtype is not None and not is_dtype_equal(values.dtype, dtype):
-        try:
-            values = construct_1d_ndarray_preserving_na(
-                values.ravel(), dtype=dtype, copy=False
-            ).reshape(values.shape)
-        except Exception as orig:
-            # e.g. ValueError when trying to cast object dtype to float64
-            raise ValueError(
-                f"failed to cast to '{dtype}' (Exception was: {orig})"
-            ) from orig
+        shape = values.shape
+        flat = values.ravel()
+
+        if not is_integer_dtype(dtype):
+            # TODO: skipping integer_dtype is needed to keep the tests passing,
+            #  not clear it is correct
+            # Note: we really only need _try_cast, but keeping to exposed funcs
+            values = sanitize_array(
+                flat, None, dtype=dtype, copy=copy, raise_cast_failure=True
+            )
+        else:
+            try:
+                values = construct_1d_ndarray_preserving_na(
+                    flat, dtype=dtype, copy=False
+                )
+            except Exception as err:
+                # e.g. ValueError when trying to cast object dtype to float64
+                msg = f"failed to cast to '{dtype}' (Exception was: {err})"
+                raise ValueError(msg) from err
+        values = values.reshape(shape)
 
     # _prep_ndarray ensures that values.ndim == 2 at this point
     index, columns = _get_axes(
@@ -277,10 +309,14 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
     return create_block_manager_from_blocks(block_values, [columns, index])
 
 
-def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
+def dict_to_mgr(
+    data: Dict, index, columns, dtype: Optional[DtypeObj], typ: str
+) -> Manager:
     """
     Segregate Series based on type and coerce into matrices.
     Needs to handle a lot of exceptional cases.
+
+    Used in DataFrame.__init__
     """
     arrays: Union[Sequence[Any], Series]
 
@@ -321,7 +357,7 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
         arrays = [
             arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
         ]
-    return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
+    return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype, typ=typ)
 
 
 def nested_data_to_arrays(
@@ -336,7 +372,7 @@ def nested_data_to_arrays(
     # By the time we get here we have already checked treat_as_nested(data)
 
     if is_named_tuple(data[0]) and columns is None:
-        columns = data[0]._fields
+        columns = ensure_index(data[0]._fields)
 
     arrays, columns = to_arrays(data, columns, dtype=dtype)
     columns = ensure_index(columns)
@@ -415,6 +451,11 @@ def _homogenize(data, index: Index, dtype: Optional[DtypeObj]):
                 # Forces alignment. No need to copy data since we
                 # are putting it into an ndarray later
                 val = val.reindex(index, copy=False)
+            # TODO extract_array should be preferred, but that gives failures for
+            # `extension/test_numpy.py` (extract_array will convert numpy arrays
+            # to PandasArray), see https://github.com/pandas-dev/pandas/issues/40021
+            # val = extract_array(val, extract_numpy=True)
+            val = val._values
         else:
             if isinstance(val, dict):
                 if oindex is None:
@@ -492,21 +533,18 @@ def extract_index(data) -> Index:
     return ensure_index(index)
 
 
-def reorder_arrays(arrays, arr_columns, columns):
+def reorder_arrays(
+    arrays: List[ArrayLike], arr_columns: Index, columns: Optional[Index]
+) -> Tuple[List[ArrayLike], Index]:
     # reorder according to the columns
-    if (
-        columns is not None
-        and len(columns)
-        and arr_columns is not None
-        and len(arr_columns)
-    ):
+    if columns is not None and len(columns) and len(arr_columns):
         indexer = ensure_index(arr_columns).get_indexer(columns)
         arr_columns = ensure_index([arr_columns[i] for i in indexer])
         arrays = [arrays[i] for i in indexer]
     return arrays, arr_columns
 
 
-def _get_names_from_index(data):
+def _get_names_from_index(data) -> Index:
     has_some_name = any(getattr(s, "name", None) is not None for s in data)
     if not has_some_name:
         return ibase.default_index(len(data))
@@ -521,7 +559,7 @@ def _get_names_from_index(data):
             index[i] = f"Unnamed {count}"
             count += 1
 
-    return index
+    return Index(index)
 
 
 def _get_axes(
@@ -574,7 +612,9 @@ def dataclasses_to_dicts(data):
 # Conversion of Inputs to Arrays
 
 
-def to_arrays(data, columns, dtype: Optional[DtypeObj] = None):
+def to_arrays(
+    data, columns: Optional[Index], dtype: Optional[DtypeObj] = None
+) -> Tuple[List[ArrayLike], Index]:
     """
     Return list of arrays, columns.
     """
@@ -595,8 +635,10 @@ def to_arrays(data, columns, dtype: Optional[DtypeObj] = None):
         if isinstance(data, np.ndarray):
             columns = data.dtype.names
             if columns is not None:
-                return [[]] * len(columns), columns
-        return [], []  # columns if columns is not None else []
+                # i.e. numpy structured array
+                arrays = [data[name] for name in columns]
+                return arrays, ensure_index(columns)
+        return [], ensure_index([])
 
     elif isinstance(data[0], Categorical):
         if columns is None:
@@ -605,12 +647,12 @@ def to_arrays(data, columns, dtype: Optional[DtypeObj] = None):
 
     elif isinstance(data, np.ndarray) and data.dtype.names is not None:
         # e.g. recarray
-        columns = list(data.dtype.names)
+        columns = Index(list(data.dtype.names))
         arrays = [data[k] for k in columns]
         return arrays, columns
 
     if isinstance(data[0], (list, tuple)):
-        content, columns = _list_to_arrays(data, columns)
+        content = _list_to_arrays(data)
     elif isinstance(data[0], abc.Mapping):
         content, columns = _list_of_dict_to_arrays(data, columns)
     elif isinstance(data[0], ABCSeries):
@@ -618,35 +660,35 @@ def to_arrays(data, columns, dtype: Optional[DtypeObj] = None):
     else:
         # last ditch effort
         data = [tuple(x) for x in data]
-        content, columns = _list_to_arrays(data, columns)
+        content = _list_to_arrays(data)
 
     content, columns = _finalize_columns_and_data(content, columns, dtype)
     return content, columns
 
 
-def _list_to_arrays(
-    data: List[Scalar],
-    columns: Union[Index, List],
-) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
+def _list_to_arrays(data: List[Union[Tuple, List]]) -> np.ndarray:
+    # Returned np.ndarray has ndim = 2
     # Note: we already check len(data) > 0 before getting hre
     if isinstance(data[0], tuple):
         content = lib.to_object_array_tuples(data)
     else:
         # list of lists
         content = lib.to_object_array(data)
-    return content, columns
+    return content
 
 
 def _list_of_series_to_arrays(
     data: List,
-    columns: Union[Index, List],
-) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
+    columns: Optional[Index],
+) -> Tuple[np.ndarray, Index]:
+    # returned np.ndarray has ndim == 2
+
     if columns is None:
         # We know pass_data is non-empty because data[0] is a Series
         pass_data = [x for x in data if isinstance(x, (ABCSeries, ABCDataFrame))]
         columns = get_objs_combined_axis(pass_data, sort=False)
 
-    indexer_cache: Dict[int, Scalar] = {}
+    indexer_cache: Dict[int, np.ndarray] = {}
 
     aligned_values = []
     for s in data:
@@ -669,8 +711,8 @@ def _list_of_series_to_arrays(
 
 def _list_of_dict_to_arrays(
     data: List[Dict],
-    columns: Union[Index, List],
-) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
+    columns: Optional[Index],
+) -> Tuple[np.ndarray, Index]:
     """
     Convert list of dicts to numpy arrays
 
@@ -687,13 +729,14 @@ def _list_of_dict_to_arrays(
 
     Returns
     -------
-    tuple
-        arrays, columns
+    content : np.ndarray[object, ndim=2]
+    columns : Index
     """
     if columns is None:
         gen = (list(x.keys()) for x in data)
         sort = not any(isinstance(d, dict) for d in data)
         columns = lib.fast_unique_multiple_list_gen(gen, sort=sort)
+        columns = ensure_index(columns)
 
     # assure that they are of the base dict class and not of derived
     # classes
@@ -704,10 +747,10 @@ def _list_of_dict_to_arrays(
 
 
 def _finalize_columns_and_data(
-    content: np.ndarray,
-    columns: Optional[Union[Index, List]],
+    content: np.ndarray,  # ndim == 2
+    columns: Optional[Index],
     dtype: Optional[DtypeObj],
-) -> Tuple[List[np.ndarray], Union[Index, List[Axis]]]:
+) -> Tuple[List[np.ndarray], Index]:
     """
     Ensure we have valid columns, cast object dtypes if possible.
     """
@@ -725,21 +768,21 @@ def _finalize_columns_and_data(
 
 
 def _validate_or_indexify_columns(
-    content: List, columns: Optional[Union[Index, List]]
-) -> Union[Index, List[Axis]]:
+    content: List[np.ndarray], columns: Optional[Index]
+) -> Index:
     """
     If columns is None, make numbers as column names; Otherwise, validate that
     columns have valid length.
 
     Parameters
     ----------
-    content: list of data
-    columns: Iterable or None
+    content : list of np.ndarrays
+    columns : Index or None
 
     Returns
     -------
-    columns: If columns is Iterable, return as is; If columns is None, assign
-    positional column index value as columns.
+    Index
+        If columns is None, assign positional column index value as columns.
 
     Raises
     ------
@@ -783,19 +826,19 @@ def _validate_or_indexify_columns(
 
 
 def _convert_object_array(
-    content: List[Scalar], dtype: Optional[DtypeObj] = None
-) -> List[Scalar]:
+    content: List[np.ndarray], dtype: Optional[DtypeObj]
+) -> List[ArrayLike]:
     """
     Internal function to convert object array.
 
     Parameters
     ----------
-    content: list of processed data records
-    dtype: np.dtype, default is None
+    content: List[np.ndarray]
+    dtype: np.dtype or ExtensionDtype
 
     Returns
     -------
-    arrays: casted content if not object dtype, otherwise return as is in list.
+    List[ArrayLike]
     """
     # provide soft conversion of object dtypes
     def convert(arr):
@@ -807,28 +850,3 @@ def convert(arr):
     arrays = [convert(arr) for arr in content]
 
     return arrays
-
-
-# ---------------------------------------------------------------------
-# Series-Based
-
-
-def sanitize_index(data, index: Index):
-    """
-    Sanitize an index type to return an ndarray of the underlying, pass
-    through a non-Index.
-    """
-    if len(data) != len(index):
-        raise ValueError(
-            "Length of values "
-            f"({len(data)}) "
-            "does not match length of index "
-            f"({len(index)})"
-        )
-
-    if isinstance(data, np.ndarray):
-
-        # coerce datetimelike types to ns
-        data = sanitize_to_nanoseconds(data)
-
-    return data
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index e013a7f680d6f..2ad7471d6f086 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1646,6 +1646,15 @@ def fast_xs(self, loc):
         """
         raise NotImplementedError("Use series._values[loc] instead")
 
+    def set_values(self, values: ArrayLike):
+        """
+        Set the values of the single block in place.
+
+        Use at your own risk! This does not check if the passed values are
+        valid for the current Block/SingleBlockManager (length, dtype, etc).
+        """
+        self.blocks[0].values = values
+
 
 # --------------------------------------------------------------------
 # Constructor Helpers
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
index 0b77a6d821c6d..d1597b23cf577 100644
--- a/pandas/core/missing.py
+++ b/pandas/core/missing.py
@@ -3,7 +3,10 @@
 """
 from __future__ import annotations
 
-from functools import partial
+from functools import (
+    partial,
+    wraps,
+)
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -11,6 +14,7 @@
     Optional,
     Set,
     Union,
+    cast,
 )
 
 import numpy as np
@@ -22,15 +26,13 @@
 from pandas._typing import (
     ArrayLike,
     Axis,
-    DtypeObj,
+    F,
 )
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.core.dtypes.cast import infer_dtype_from
 from pandas.core.dtypes.common import (
-    ensure_float64,
     is_array_like,
-    is_integer_dtype,
     is_numeric_v_string_like,
     needs_i8_conversion,
 )
@@ -674,54 +676,53 @@ def interpolate_2d(
     return result
 
 
-def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool):
-    """
-    Cast values to a dtype that algos.pad and algos.backfill can handle.
-    """
-    # TODO: for int-dtypes we make a copy, but for everything else this
-    #  alters the values in-place.  Is this intentional?
+def _fillna_prep(values, mask=None):
+    # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d
 
-    if needs_i8_conversion(dtype):
-        values = values.view(np.int64)
+    if mask is None:
+        mask = isna(values)
 
-    elif is_integer_dtype(values) and not has_mask:
-        # NB: this check needs to come after the datetime64 check above
-        # has_mask check to avoid casting i8 values that have already
-        #  been cast from PeriodDtype
-        values = ensure_float64(values)
+    mask = mask.view(np.uint8)
+    return mask
 
-    return values
 
+def _datetimelike_compat(func: F) -> F:
+    """
+    Wrapper to handle datetime64 and timedelta64 dtypes.
+    """
 
-def _fillna_prep(values, mask=None):
-    # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d
-    dtype = values.dtype
+    @wraps(func)
+    def new_func(values, limit=None, mask=None):
+        if needs_i8_conversion(values.dtype):
+            if mask is None:
+                # This needs to occur before casting to int64
+                mask = isna(values)
 
-    has_mask = mask is not None
-    if not has_mask:
-        # This needs to occur before datetime/timedeltas are cast to int64
-        mask = isna(values)
+            result = func(values.view("i8"), limit=limit, mask=mask)
+            return result.view(values.dtype)
 
-    values = _cast_values_for_fillna(values, dtype, has_mask)
+        return func(values, limit=limit, mask=mask)
 
-    mask = mask.view(np.uint8)
-    return values, mask
+    return cast(F, new_func)
 
 
+@_datetimelike_compat
 def _pad_1d(values, limit=None, mask=None):
-    values, mask = _fillna_prep(values, mask)
+    mask = _fillna_prep(values, mask)
     algos.pad_inplace(values, mask, limit=limit)
     return values
 
 
+@_datetimelike_compat
 def _backfill_1d(values, limit=None, mask=None):
-    values, mask = _fillna_prep(values, mask)
+    mask = _fillna_prep(values, mask)
     algos.backfill_inplace(values, mask, limit=limit)
     return values
 
 
+@_datetimelike_compat
 def _pad_2d(values, limit=None, mask=None):
-    values, mask = _fillna_prep(values, mask)
+    mask = _fillna_prep(values, mask)
 
     if np.all(values.shape):
         algos.pad_2d_inplace(values, mask, limit=limit)
@@ -731,8 +732,9 @@ def _pad_2d(values, limit=None, mask=None):
     return values
 
 
+@_datetimelike_compat
 def _backfill_2d(values, limit=None, mask=None):
-    values, mask = _fillna_prep(values, mask)
+    mask = _fillna_prep(values, mask)
 
     if np.all(values.shape):
         algos.backfill_2d_inplace(values, mask, limit=limit)
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 24e75a2bbeff2..a0dfb1c83a70b 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -1743,8 +1743,9 @@ def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
             result = result.view(orig_dtype)
         else:
             # DatetimeArray
+            # TODO: have this case go through a DTA method?
             result = type(values)._simple_new(  # type: ignore[attr-defined]
-                result, dtype=orig_dtype
+                result.view("M8[ns]"), dtype=orig_dtype
             )
 
     elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 543bf44e61216..271bb2ca8dd75 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -9,7 +9,6 @@
 
 import numpy as np
 
-import pandas._libs.algos as libalgos
 import pandas._libs.reshape as libreshape
 from pandas._libs.sparse import IntIndex
 from pandas._typing import Dtype
@@ -42,6 +41,7 @@
     decons_obs_group_ids,
     get_compressed_ids,
     get_group_index,
+    get_group_index_sorter,
 )
 
 
@@ -139,8 +139,7 @@ def _indexer_and_to_sort(self):
         comp_index, obs_ids = get_compressed_ids(to_sort, sizes)
         ngroups = len(obs_ids)
 
-        indexer = libalgos.groupsort_indexer(comp_index, ngroups)[0]
-        indexer = ensure_platform_int(indexer)
+        indexer = get_group_index_sorter(comp_index, ngroups)
 
         return indexer, to_sort
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 3f43b27cd88ce..6ee6ea801d872 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -15,9 +15,11 @@
     Iterable,
     List,
     Optional,
+    Sequence,
     Tuple,
     Type,
     Union,
+    cast,
 )
 import warnings
 
@@ -94,7 +96,7 @@
     ops,
 )
 from pandas.core.accessor import CachedAccessor
-from pandas.core.apply import series_apply
+from pandas.core.apply import SeriesApply
 from pandas.core.arrays import ExtensionArray
 from pandas.core.arrays.categorical import CategoricalAccessor
 from pandas.core.arrays.sparse import SparseAccessor
@@ -124,7 +126,6 @@
 from pandas.core.indexes.timedeltas import TimedeltaIndex
 from pandas.core.indexing import check_bool_indexer
 from pandas.core.internals import SingleBlockManager
-from pandas.core.internals.construction import sanitize_index
 from pandas.core.shared_docs import _shared_docs
 from pandas.core.sorting import (
     ensure_key_mapped,
@@ -374,10 +375,8 @@ def __init__(
                         "`index` argument. `copy` must be False."
                     )
 
-            elif is_extension_array_dtype(data):
+            elif isinstance(data, ExtensionArray):
                 pass
-            elif isinstance(data, (set, frozenset)):
-                raise TypeError(f"'{type(data).__name__}' type is unordered")
             else:
                 data = com.maybe_iterable_to_list(data)
 
@@ -386,7 +385,7 @@ def __init__(
                     data = [data]
                 index = ibase.default_index(len(data))
             elif is_list_like(data):
-                sanitize_index(data, index)
+                com.require_length_match(data, index)
 
             # create/copy the manager
             if isinstance(data, SingleBlockManager):
@@ -803,7 +802,7 @@ def __array__(self, dtype: Optional[NpDtype] = None) -> np.ndarray:
         array(['1999-12-31T23:00:00.000000000', ...],
               dtype='datetime64[ns]')
         """
-        return np.asarray(self.array, dtype)
+        return np.asarray(self._values, dtype)
 
     # ----------------------------------------------------------------------
     # Unary Methods
@@ -1797,7 +1796,7 @@ def count(self, level=None):
         2
         """
         if level is None:
-            return notna(self.array).sum()
+            return notna(self._values).sum()
         elif not isinstance(self.index, MultiIndex):
             raise ValueError("Series.count level is only valid with a MultiIndex")
 
@@ -2497,7 +2496,7 @@ def diff(self, periods: int = 1) -> Series:
         --------
         {examples}
         """
-        result = algorithms.diff(self.array, periods)
+        result = algorithms.diff(self._values, periods)
         return self._constructor(result, index=self.index).__finalize__(
             self, method="diff"
         )
@@ -3099,7 +3098,7 @@ def update(self, other) -> None:
     def sort_values(
         self,
         axis=0,
-        ascending=True,
+        ascending: Union[Union[bool, int], Sequence[Union[bool, int]]] = True,
         inplace: bool = False,
         kind: str = "quicksort",
         na_position: str = "last",
@@ -3117,7 +3116,7 @@ def sort_values(
         axis : {0 or 'index'}, default 0
             Axis to direct sorting. The value 'index' is accepted for
             compatibility with DataFrame.sort_values.
-        ascending : bool, default True
+        ascending : bool or list of bools, default True
             If True, sort values in ascending order, otherwise descending.
         inplace : bool, default False
             If True, perform operation in-place.
@@ -3277,6 +3276,7 @@ def sort_values(
             )
 
         if is_list_like(ascending):
+            ascending = cast(Sequence[Union[bool, int]], ascending)
             if len(ascending) != 1:
                 raise ValueError(
                     f"Length of ascending ({len(ascending)}) must be 1 for Series"
@@ -3291,7 +3291,7 @@ def sort_values(
 
         # GH 35922. Make sorting stable by leveraging nargsort
         values_to_sort = ensure_key_mapped(self, key)._values if key else self._values
-        sorted_index = nargsort(values_to_sort, kind, ascending, na_position)
+        sorted_index = nargsort(values_to_sort, kind, bool(ascending), na_position)
 
         result = self._constructor(
             self._values[sorted_index], index=self.index[sorted_index]
@@ -3309,7 +3309,7 @@ def sort_index(
         self,
         axis=0,
         level=None,
-        ascending: bool = True,
+        ascending: Union[Union[bool, int], Sequence[Union[bool, int]]] = True,
         inplace: bool = False,
         kind: str = "quicksort",
         na_position: str = "last",
@@ -3329,7 +3329,7 @@ def sort_index(
             Axis to direct sorting. This can only be 0 for Series.
         level : int, optional
             If not None, sort on values in specified index level(s).
-        ascending : bool or list of bools, default True
+        ascending : bool or list-like of bools, default True
             Sort ascending vs. descending. When the index is a MultiIndex the
             sort direction can be controlled for each level individually.
         inplace : bool, default False
@@ -3806,7 +3806,7 @@ def explode(self, ignore_index: bool = False) -> Series:
         if not len(self) or not is_object_dtype(self):
             return self.copy()
 
-        values, counts = reshape.explode(np.asarray(self.array))
+        values, counts = reshape.explode(np.asarray(self._values))
 
         if ignore_index:
             index = ibase.default_index(len(values))
@@ -4000,7 +4000,7 @@ def aggregate(self, func=None, axis=0, *args, **kwargs):
         if func is None:
             func = dict(kwargs.items())
 
-        op = series_apply(self, func, args=args, kwargs=kwargs)
+        op = SeriesApply(self, func, convert_dtype=False, args=args, kwargs=kwargs)
         result = op.agg()
         return result
 
@@ -4016,7 +4016,9 @@ def transform(
     ) -> FrameOrSeriesUnion:
         # Validate axis argument
         self._get_axis_number(axis)
-        result = series_apply(self, func=func, args=args, kwargs=kwargs).transform()
+        result = SeriesApply(
+            self, func=func, convert_dtype=True, args=args, kwargs=kwargs
+        ).transform()
         return result
 
     def apply(
@@ -4128,7 +4130,7 @@ def apply(
         Helsinki    2.484907
         dtype: float64
         """
-        return series_apply(self, func, convert_dtype, args, kwargs).apply()
+        return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
 
     def _reduce(
         self,
@@ -5009,7 +5011,7 @@ def _cmp_method(self, other, op):
         if isinstance(other, Series) and not self._indexed_same(other):
             raise ValueError("Can only compare identically-labeled Series objects")
 
-        lvalues = extract_array(self, extract_numpy=True)
+        lvalues = self._values
         rvalues = extract_array(other, extract_numpy=True, range_compat=True)
 
         res_values = ops.comparison_op(lvalues, rvalues, op)
@@ -5020,7 +5022,7 @@ def _logical_method(self, other, op):
         res_name = ops.get_op_result_name(self, other)
         self, other = ops.align_method_SERIES(self, other, align_asobject=True)
 
-        lvalues = extract_array(self, extract_numpy=True)
+        lvalues = self._values
         rvalues = extract_array(other, extract_numpy=True, range_compat=True)
 
         res_values = ops.logical_op(lvalues, rvalues, op)
@@ -5030,7 +5032,7 @@ def _arith_method(self, other, op):
         res_name = ops.get_op_result_name(self, other)
         self, other = ops.align_method_SERIES(self, other)
 
-        lvalues = extract_array(self, extract_numpy=True)
+        lvalues = self._values
         rvalues = extract_array(other, extract_numpy=True, range_compat=True)
 
         result = ops.arithmetic_op(lvalues, rvalues, op)
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index c3356386ef346..55e97f738072b 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -10,6 +10,7 @@
     Iterable,
     List,
     Optional,
+    Sequence,
     Tuple,
     Union,
 )
@@ -48,7 +49,7 @@
 def get_indexer_indexer(
     target: Index,
     level: Union[str, int, List[str], List[int]],
-    ascending: bool,
+    ascending: Union[Sequence[Union[bool, int]], Union[bool, int]],
     kind: str,
     na_position: str,
     sort_remaining: bool,
@@ -572,7 +573,9 @@ def get_indexer_dict(
 # sorting levels...cleverly?
 
 
-def get_group_index_sorter(group_index, ngroups: int):
+def get_group_index_sorter(
+    group_index: np.ndarray, ngroups: int | None = None
+) -> np.ndarray:
     """
     algos.groupsort_indexer implements `counting sort` and it is at least
     O(ngroups), where
@@ -586,6 +589,8 @@ def get_group_index_sorter(group_index, ngroups: int):
     groupby operations. e.g. consider:
         df.groupby(key)[col].transform('first')
     """
+    if ngroups is None:
+        ngroups = 1 + group_index.max()
     count = len(group_index)
     alpha = 0.0  # taking complexities literally; there may be
     beta = 1.0  # some room for fine-tuning these parameters
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 18f9ece3e3812..d58b5e5ffa83d 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -248,13 +248,13 @@ def _convert_and_box_cache(
     return _box_as_indexlike(result, utc=None, name=name)
 
 
-def _return_parsed_timezone_results(result, timezones, tz, name):
+def _return_parsed_timezone_results(result: np.ndarray, timezones, tz, name) -> Index:
     """
     Return results from array_strptime if a %z or %Z directive was passed.
 
     Parameters
     ----------
-    result : ndarray
+    result : ndarray[int64]
         int64 date representations of the dates
     timezones : ndarray
         pytz timezone objects
@@ -284,10 +284,10 @@ def _convert_listlike_datetimes(
     tz: Optional[Timezone] = None,
     unit: Optional[str] = None,
     errors: Optional[str] = None,
-    infer_datetime_format: Optional[bool] = None,
+    infer_datetime_format: bool = False,
     dayfirst: Optional[bool] = None,
     yearfirst: Optional[bool] = None,
-    exact: Optional[bool] = None,
+    exact: bool = True,
 ):
     """
     Helper function for to_datetime. Performs the conversions of 1D listlike
@@ -305,13 +305,13 @@ def _convert_listlike_datetimes(
         None or string of the frequency of the passed data
     errors : string
         error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
-    infer_datetime_format : boolean
+    infer_datetime_format : bool, default False
         inferring format behavior from to_datetime
     dayfirst : boolean
         dayfirst parsing behavior from to_datetime
     yearfirst : boolean
         yearfirst parsing behavior from to_datetime
-    exact : boolean
+    exact : bool, default True
         exact format matching behavior from to_datetime
 
     Returns
@@ -346,38 +346,7 @@ def _convert_listlike_datetimes(
     elif unit is not None:
         if format is not None:
             raise ValueError("cannot specify both format and unit")
-        arg = getattr(arg, "_values", arg)
-
-        # GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime
-        # because it expects an ndarray argument
-        if isinstance(arg, IntegerArray):
-            result = arg.astype(f"datetime64[{unit}]")
-            tz_parsed = None
-        else:
-
-            result, tz_parsed = tslib.array_with_unit_to_datetime(
-                arg, unit, errors=errors
-            )
-
-        if errors == "ignore":
-
-            result = Index(result, name=name)
-        else:
-            result = DatetimeIndex(result, name=name)
-        # GH 23758: We may still need to localize the result with tz
-        # GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
-        # result will be naive but in UTC
-        try:
-            result = result.tz_localize("UTC").tz_convert(tz_parsed)
-        except AttributeError:
-            # Regular Index from 'ignore' path
-            return result
-        if tz is not None:
-            if result.tz is None:
-                result = result.tz_localize(tz)
-            else:
-                result = result.tz_convert(tz)
-        return result
+        return _to_datetime_with_unit(arg, unit, name, tz, errors)
     elif getattr(arg, "ndim", 1) > 1:
         raise TypeError(
             "arg must be a string, datetime, list, tuple, 1-d array, or Series"
@@ -413,64 +382,14 @@ def _convert_listlike_datetimes(
             require_iso8601 = not infer_datetime_format
             format = None
 
-    tz_parsed = None
     result = None
 
     if format is not None:
-        try:
-            # shortcut formatting here
-            if format == "%Y%m%d":
-                try:
-                    # pass orig_arg as float-dtype may have been converted to
-                    # datetime64[ns]
-                    orig_arg = ensure_object(orig_arg)
-                    result = _attempt_YYYYMMDD(orig_arg, errors=errors)
-                except (ValueError, TypeError, OutOfBoundsDatetime) as err:
-                    raise ValueError(
-                        "cannot convert the input to '%Y%m%d' date format"
-                    ) from err
-
-            # fallback
-            if result is None:
-                try:
-                    result, timezones = array_strptime(
-                        arg, format, exact=exact, errors=errors
-                    )
-                    if "%Z" in format or "%z" in format:
-                        return _return_parsed_timezone_results(
-                            result, timezones, tz, name
-                        )
-                except OutOfBoundsDatetime:
-                    if errors == "raise":
-                        raise
-                    elif errors == "coerce":
-                        result = np.empty(arg.shape, dtype="M8[ns]")
-                        iresult = result.view("i8")
-                        iresult.fill(iNaT)
-                    else:
-                        result = arg
-                except ValueError:
-                    # if format was inferred, try falling back
-                    # to array_to_datetime - terminate here
-                    # for specified formats
-                    if not infer_datetime_format:
-                        if errors == "raise":
-                            raise
-                        elif errors == "coerce":
-                            result = np.empty(arg.shape, dtype="M8[ns]")
-                            iresult = result.view("i8")
-                            iresult.fill(iNaT)
-                        else:
-                            result = arg
-        except ValueError as e:
-            # Fallback to try to convert datetime objects if timezone-aware
-            #  datetime objects are found without passing `utc=True`
-            try:
-                values, tz = conversion.datetime_to_datetime64(arg)
-                dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
-                return DatetimeIndex._simple_new(dta, name=name)
-            except (ValueError, TypeError):
-                raise e
+        result = _to_datetime_with_format(
+            arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format
+        )
+        if result is not None:
+            return result
 
     if result is None:
         assert format is None or infer_datetime_format
@@ -485,16 +404,151 @@ def _convert_listlike_datetimes(
             allow_object=True,
         )
 
-    if tz_parsed is not None:
-        # We can take a shortcut since the datetime64 numpy array
-        # is in UTC
-        dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
-        return DatetimeIndex._simple_new(dta, name=name)
+        if tz_parsed is not None:
+            # We can take a shortcut since the datetime64 numpy array
+            # is in UTC
+            dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
+            return DatetimeIndex._simple_new(dta, name=name)
+
+    utc = tz == "utc"
+    return _box_as_indexlike(result, utc=utc, name=name)
+
 
+def _array_strptime_with_fallback(
+    arg,
+    name,
+    tz,
+    fmt: str,
+    exact: bool,
+    errors: Optional[str],
+    infer_datetime_format: bool,
+) -> Optional[Index]:
+    """
+    Call array_strptime, with fallback behavior depending on 'errors'.
+    """
     utc = tz == "utc"
+
+    try:
+        result, timezones = array_strptime(arg, fmt, exact=exact, errors=errors)
+        if "%Z" in fmt or "%z" in fmt:
+            return _return_parsed_timezone_results(result, timezones, tz, name)
+    except OutOfBoundsDatetime:
+        if errors == "raise":
+            raise
+        elif errors == "coerce":
+            result = np.empty(arg.shape, dtype="M8[ns]")
+            iresult = result.view("i8")
+            iresult.fill(iNaT)
+        else:
+            result = arg
+    except ValueError:
+        # if fmt was inferred, try falling back
+        # to array_to_datetime - terminate here
+        # for specified formats
+        if not infer_datetime_format:
+            if errors == "raise":
+                raise
+            elif errors == "coerce":
+                result = np.empty(arg.shape, dtype="M8[ns]")
+                iresult = result.view("i8")
+                iresult.fill(iNaT)
+            else:
+                result = arg
+        else:
+            # Indicates to the caller to fallback to objects_to_datetime64ns
+            return None
+
     return _box_as_indexlike(result, utc=utc, name=name)
 
 
+def _to_datetime_with_format(
+    arg,
+    orig_arg,
+    name,
+    tz,
+    fmt: str,
+    exact: bool,
+    errors: Optional[str],
+    infer_datetime_format: bool,
+) -> Optional[Index]:
+    """
+    Try parsing with the given format, returning None on failure.
+    """
+    result = None
+    try:
+        # shortcut formatting here
+        if fmt == "%Y%m%d":
+            # pass orig_arg as float-dtype may have been converted to
+            # datetime64[ns]
+            orig_arg = ensure_object(orig_arg)
+            try:
+                # may return None without raising
+                result = _attempt_YYYYMMDD(orig_arg, errors=errors)
+            except (ValueError, TypeError, OutOfBoundsDatetime) as err:
+                raise ValueError(
+                    "cannot convert the input to '%Y%m%d' date format"
+                ) from err
+            if result is not None:
+                utc = tz == "utc"
+                return _box_as_indexlike(result, utc=utc, name=name)
+
+        # fallback
+        if result is None:
+            result = _array_strptime_with_fallback(
+                arg, name, tz, fmt, exact, errors, infer_datetime_format
+            )
+            if result is not None:
+                return result
+
+    except ValueError as e:
+        # Fallback to try to convert datetime objects if timezone-aware
+        #  datetime objects are found without passing `utc=True`
+        try:
+            values, tz = conversion.datetime_to_datetime64(arg)
+            dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
+            return DatetimeIndex._simple_new(dta, name=name)
+        except (ValueError, TypeError):
+            raise e
+
+    return result
+
+
+def _to_datetime_with_unit(arg, unit, name, tz, errors: Optional[str]) -> Index:
+    """
+    to_datetime specalized to the case where a 'unit' is passed.
+    """
+    arg = getattr(arg, "_values", arg)
+
+    # GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime
+    # because it expects an ndarray argument
+    if isinstance(arg, IntegerArray):
+        result = arg.astype(f"datetime64[{unit}]")
+        tz_parsed = None
+    else:
+        result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
+
+    if errors == "ignore":
+        # Index constructor _may_ infer to DatetimeIndex
+        result = Index(result, name=name)
+    else:
+        result = DatetimeIndex(result, name=name)
+
+    if not isinstance(result, DatetimeIndex):
+        return result
+
+    # GH#23758: We may still need to localize the result with tz
+    # GH#25546: Apply tz_parsed first (from arg), then tz (from caller)
+    # result will be naive but in UTC
+    result = result.tz_localize("UTC").tz_convert(tz_parsed)
+
+    if tz is not None:
+        if result.tz is None:
+            result = result.tz_localize(tz)
+        else:
+            result = result.tz_convert(tz)
+    return result
+
+
 def _adjust_to_origin(arg, origin, unit):
     """
     Helper function for to_datetime.
@@ -965,7 +1019,7 @@ def coerce(values):
     return values
 
 
-def _attempt_YYYYMMDD(arg, errors):
+def _attempt_YYYYMMDD(arg: np.ndarray, errors: Optional[str]) -> Optional[np.ndarray]:
     """
     try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
     arg is a passed in as an object dtype, but could really be ints/strings
@@ -973,8 +1027,8 @@ def _attempt_YYYYMMDD(arg, errors):
 
     Parameters
     ----------
-    arg : passed value
-    errors : 'raise','ignore','coerce'
+    arg : np.ndarray[object]
+    errors : {'raise','ignore','coerce'}
     """
 
     def calc(carg):
diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
index 208b5ab0023eb..5a71db82f26e4 100644
--- a/pandas/core/window/ewm.py
+++ b/pandas/core/window/ewm.py
@@ -215,11 +215,13 @@ class ExponentialMovingWindow(BaseWindow):
 
     _attributes = [
         "com",
+        "span",
+        "halflife",
+        "alpha",
         "min_periods",
         "adjust",
         "ignore_na",
         "axis",
-        "halflife",
         "times",
     ]
 
@@ -245,38 +247,48 @@ def __init__(
             method="single",
             axis=axis,
         )
+        self.com = com
+        self.span = span
+        self.halflife = halflife
+        self.alpha = alpha
         self.adjust = adjust
         self.ignore_na = ignore_na
-        if times is not None:
+        self.times = times
+        if self.times is not None:
             if isinstance(times, str):
-                times = self._selected_obj[times]
-            if not is_datetime64_ns_dtype(times):
+                self.times = self._selected_obj[times]
+            if not is_datetime64_ns_dtype(self.times):
                 raise ValueError("times must be datetime64[ns] dtype.")
-            if len(times) != len(obj):
+            if len(self.times) != len(obj):
                 raise ValueError("times must be the same length as the object.")
             if not isinstance(halflife, (str, datetime.timedelta)):
                 raise ValueError(
                     "halflife must be a string or datetime.timedelta object"
                 )
-            if isna(times).any():
+            if isna(self.times).any():
                 raise ValueError("Cannot convert NaT values to integer")
-            self.times = np.asarray(times.view(np.int64))
-            self.halflife = Timedelta(halflife).value
+            _times = np.asarray(self.times.view(np.int64), dtype=np.float64)
+            _halflife = float(Timedelta(self.halflife).value)
+            self._deltas = np.diff(_times) / _halflife
             # Halflife is no longer applicable when calculating COM
             # But allow COM to still be calculated if the user passes other decay args
-            if common.count_not_none(com, span, alpha) > 0:
-                self.com = get_center_of_mass(com, span, None, alpha)
+            if common.count_not_none(self.com, self.span, self.alpha) > 0:
+                self._com = get_center_of_mass(self.com, self.span, None, self.alpha)
             else:
-                self.com = 0.0
+                self._com = 1.0
         else:
-            if halflife is not None and isinstance(halflife, (str, datetime.timedelta)):
+            if self.halflife is not None and isinstance(
+                self.halflife, (str, datetime.timedelta)
+            ):
                 raise ValueError(
                     "halflife can only be a timedelta convertible argument if "
                     "times is not None."
                 )
-            self.times = None
-            self.halflife = None
-            self.com = get_center_of_mass(com, span, halflife, alpha)
+            # Without times, points are equally spaced
+            self._deltas = np.ones(max(len(self.obj) - 1, 0), dtype=np.float64)
+            self._com = get_center_of_mass(
+                self.com, self.span, self.halflife, self.alpha
+            )
 
     def _get_window_indexer(self) -> BaseIndexer:
         """
@@ -334,21 +346,14 @@ def aggregate(self, func, *args, **kwargs):
     )
     def mean(self, *args, **kwargs):
         nv.validate_window_func("mean", args, kwargs)
-        if self.times is not None:
-            window_func = window_aggregations.ewma_time
-            window_func = partial(
-                window_func,
-                times=self.times,
-                halflife=self.halflife,
-            )
-        else:
-            window_func = window_aggregations.ewma
-            window_func = partial(
-                window_func,
-                com=self.com,
-                adjust=self.adjust,
-                ignore_na=self.ignore_na,
-            )
+        window_func = window_aggregations.ewma
+        window_func = partial(
+            window_func,
+            com=self._com,
+            adjust=self.adjust,
+            ignore_na=self.ignore_na,
+            deltas=self._deltas,
+        )
         return self._apply(window_func)
 
     @doc(
@@ -409,7 +414,7 @@ def var(self, bias: bool = False, *args, **kwargs):
         window_func = window_aggregations.ewmcov
         window_func = partial(
             window_func,
-            com=self.com,
+            com=self._com,
             adjust=self.adjust,
             ignore_na=self.ignore_na,
             bias=bias,
@@ -478,7 +483,7 @@ def cov_func(x, y):
                 end,
                 self.min_periods,
                 y_array,
-                self.com,
+                self._com,
                 self.adjust,
                 self.ignore_na,
                 bias,
@@ -544,7 +549,7 @@ def _cov(X, Y):
                     end,
                     self.min_periods,
                     Y,
-                    self.com,
+                    self._com,
                     self.adjust,
                     self.ignore_na,
                     1,
@@ -611,7 +616,7 @@ def mean(self, engine=None, engine_kwargs=None):
         if maybe_use_numba(engine):
             groupby_ewma_func = generate_numba_groupby_ewma_func(
                 engine_kwargs,
-                self.com,
+                self._com,
                 self.adjust,
                 self.ignore_na,
             )
diff --git a/pandas/io/api.py b/pandas/io/api.py
index 2241f491b5d48..5926f2166ee9d 100644
--- a/pandas/io/api.py
+++ b/pandas/io/api.py
@@ -37,3 +37,4 @@
     read_sql_table,
 )
 from pandas.io.stata import read_stata
+from pandas.io.xml import read_xml
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index a1b6986079723..44428abdcd8a5 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1,5 +1,5 @@
 """
-Internal module for formatting output data in csv, html,
+Internal module for formatting output data in csv, html, xml,
 and latex files. This module also applies to display formatting.
 """
 from __future__ import annotations
@@ -61,6 +61,8 @@
     IndexLabel,
     StorageOptions,
 )
+from pandas.compat._optional import import_optional_dependency
+from pandas.util._decorators import doc
 
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
@@ -96,6 +98,7 @@
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.core.indexes.timedeltas import TimedeltaIndex
 from pandas.core.reshape.concat import concat
+from pandas.core.shared_docs import _shared_docs
 
 from pandas.io.common import stringify_path
 from pandas.io.formats.printing import (
@@ -941,6 +944,7 @@ class DataFrameRenderer:
 
     Called in pandas.core.frame.DataFrame:
         - to_html
+        - to_xml
         - to_string
 
     Parameters
@@ -1033,6 +1037,135 @@ def to_html(
         string = html_formatter.to_string()
         return save_to_buffer(string, buf=buf, encoding=encoding)
 
+    @doc(storage_options=_shared_docs["storage_options"])
+    def to_xml(
+        self,
+        path_or_buffer: Optional[FilePathOrBuffer] = None,
+        index: Optional[bool] = True,
+        root_name: Optional[str] = "data",
+        row_name: Optional[str] = "row",
+        na_rep: Optional[str] = None,
+        attr_cols: Optional[Union[str, List[str]]] = None,
+        elem_cols: Optional[Union[str, List[str]]] = None,
+        namespaces: Optional[Dict[Optional[str], str]] = None,
+        prefix: Optional[str] = None,
+        encoding: str = "utf-8",
+        xml_declaration: Optional[bool] = True,
+        pretty_print: Optional[bool] = True,
+        parser: Optional[str] = "lxml",
+        stylesheet: Optional[FilePathOrBuffer] = None,
+        compression: CompressionOptions = "infer",
+        storage_options: StorageOptions = None,
+    ) -> Optional[str]:
+        """
+        Render a DataFrame to an XML document.
+
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        path_or_buffer : str, path object or file-like object, optional
+            File to write output to. If None, the output is returned as a
+            string.
+        index : bool, default True
+            Whether to include index in XML document.
+        root_name : str, default 'data'
+            The name of root element in XML document.
+        row_name : str, default 'row'
+            The name of row element in XML document.
+        na_rep : str, optional
+            Missing data representation.
+        attr_cols : list-like, optional
+            List of columns to write as attributes in row element.
+            Hierarchical columns will be flattened with underscore
+            delimiting the different levels.
+        elem_cols : list-like, optional
+            List of columns to write as children in row element. By default,
+            all columns output as children of row element. Hierarchical
+            columns will be flattened with underscore delimiting the
+            different levels.
+        namespaces : dict, optional
+            All namespaces to be defined in root element. Keys of dict
+            should be prefix names and values of dict corresponding URIs.
+            Default namespaces should be given empty string key. For
+            example, ::
+
+                namespaces = {{'': 'https://example.com'}}
+
+        prefix : str, optional
+            Namespace prefix to be used for every element and/or attribute
+            in document. This should be one of the keys in ``namespaces``
+            dict.
+        encoding : str, default 'utf-8'
+            Encoding of the resulting document.
+        xml_declaration : str, optional
+            Whether to include the XML declaration at start of document.
+        pretty_print : bool, default True
+            Whether output should be pretty printed with indentation and
+            line breaks.
+        parser : {{'lxml','etree'}}, default "lxml"
+            Parser module to use for building of tree. Only 'lxml' and
+            'etree' are supported. With 'lxml', the ability to use XSLT
+            stylesheet is supported.
+        stylesheet : str, path object or file-like object, optional
+            A URL, file-like object, or a raw string containing an XSLT
+            script used to transform the raw XML output. Script should use
+            layout of elements and attributes from original output. This
+            argument requires ``lxml`` to be installed. Only XSLT 1.0
+            scripts and not later versions is currently supported.
+        compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
+            For on-the-fly decompression of on-disk data. If 'infer', then use
+            gzip, bz2, zip or xz if path_or_buffer is a string ending in
+            '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
+            otherwise. If using 'zip', the ZIP file must contain only one data
+            file to be read in. Set to None for no decompression.
+        {storage_options}
+        """
+
+        from pandas.io.formats.xml import (
+            EtreeXMLFormatter,
+            LxmlXMLFormatter,
+        )
+
+        lxml = import_optional_dependency("lxml.etree", errors="ignore")
+
+        TreeBuilder: Union[Type[EtreeXMLFormatter], Type[LxmlXMLFormatter]]
+
+        if parser == "lxml":
+            if lxml is not None:
+                TreeBuilder = LxmlXMLFormatter
+            else:
+                raise ImportError(
+                    "lxml not found, please install or use the etree parser."
+                )
+
+        elif parser == "etree":
+            TreeBuilder = EtreeXMLFormatter
+
+        else:
+            raise ValueError("Values for parser can only be lxml or etree.")
+
+        xml_formatter = TreeBuilder(
+            self.fmt,
+            path_or_buffer=path_or_buffer,
+            index=index,
+            root_name=root_name,
+            row_name=row_name,
+            na_rep=na_rep,
+            attr_cols=attr_cols,
+            elem_cols=elem_cols,
+            namespaces=namespaces,
+            prefix=prefix,
+            encoding=encoding,
+            xml_declaration=xml_declaration,
+            pretty_print=pretty_print,
+            stylesheet=stylesheet,
+            compression=compression,
+            storage_options=storage_options,
+        )
+
+        return xml_formatter.write_output()
+
     def to_string(
         self,
         buf: Optional[FilePathOrBuffer[str]] = None,
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index 854f41d6b4dc3..e50f5986098d3 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -19,6 +19,7 @@
     Sequence,
     Tuple,
     Union,
+    cast,
 )
 from uuid import uuid4
 
@@ -55,7 +56,10 @@
 CSSPair = Tuple[str, Union[str, int, float]]
 CSSList = List[CSSPair]
 CSSProperties = Union[str, CSSList]
-CSSStyles = List[Dict[str, CSSProperties]]
+CSSStyles = List[Dict[str, CSSProperties]]  # = List[CSSDict]
+# class CSSDict(TypedDict):  # available when TypedDict is valid in pandas
+#     selector: str
+#     props: CSSProperties
 
 try:
     from matplotlib import colors
@@ -566,7 +570,7 @@ def _translate(self):
             "body": body,
             "uuid": uuid,
             "precision": precision,
-            "table_styles": table_styles,
+            "table_styles": _format_table_styles(table_styles),
             "caption": caption,
             "table_attributes": table_attr,
         }
@@ -1904,25 +1908,14 @@ def _pseudo_css(self, uuid: str, name: str, row: int, col: int, text: str):
         -------
         pseudo_css : List
         """
+        selector_id = "#T_" + uuid + "row" + str(row) + "_col" + str(col)
         return [
             {
-                "selector": "#T_"
-                + uuid
-                + "row"
-                + str(row)
-                + "_col"
-                + str(col)
-                + f":hover .{name}",
+                "selector": selector_id + f":hover .{name}",
                 "props": [("visibility", "visible")],
             },
             {
-                "selector": "#T_"
-                + uuid
-                + "row"
-                + str(row)
-                + "_col"
-                + str(col)
-                + f" .{name}::after",
+                "selector": selector_id + f" .{name}::after",
                 "props": [("content", f'"{text}"')],
             },
         ]
@@ -2077,6 +2070,26 @@ def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList:
     return style
 
 
+def _format_table_styles(styles: CSSStyles) -> CSSStyles:
+    """
+    looks for multiple CSS selectors and separates them:
+    [{'selector': 'td, th', 'props': 'a:v;'}]
+        ---> [{'selector': 'td', 'props': 'a:v;'},
+              {'selector': 'th', 'props': 'a:v;'}]
+    """
+    return [
+        item
+        for sublist in [
+            [  # this is a CSSDict when TypedDict is available to avoid cast.
+                {"selector": x, "props": style["props"]}
+                for x in cast(str, style["selector"]).split(",")
+            ]
+            for style in styles
+        ]
+        for item in sublist
+    ]
+
+
 def _non_reducing_slice(slice_):
     """
     Ensure that a slice doesn't reduce to a Series or Scalar.
diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py
new file mode 100644
index 0000000000000..dd68f0f78261e
--- /dev/null
+++ b/pandas/io/formats/xml.py
@@ -0,0 +1,618 @@
+"""
+:mod:`pandas.io.formats.xml` is a module for formatting data in XML.
+"""
+
+import codecs
+import io
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Union,
+)
+
+from pandas._typing import (
+    CompressionOptions,
+    FilePathOrBuffer,
+    StorageOptions,
+)
+from pandas.errors import AbstractMethodError
+
+from pandas.core.dtypes.common import is_list_like
+
+from pandas.io.common import get_handle
+from pandas.io.formats.format import DataFrameFormatter
+from pandas.io.xml import (
+    get_data_from_filepath,
+    preprocess_data,
+)
+
+
+class BaseXMLFormatter:
+    """
+    Subclass for formatting data in XML.
+
+    Parameters
+    ----------
+    path_or_buffer : str or file-like
+        This can be either a string of raw XML, a valid URL,
+        file or file-like object.
+
+    index : bool
+        Whether to include index in xml document.
+
+    row_name : str
+        Name for root of xml document. Default is 'data'.
+
+    root_name : str
+        Name for row elements of xml document. Default is 'row'.
+
+    na_rep : str
+        Missing data representation.
+
+    attrs_cols : list
+        List of columns to write as attributes in row element.
+
+    elem_cols : list
+        List of columns to write as children in row element.
+
+    namespacess : dict
+        The namespaces to define in XML document as dicts with key
+        being namespace and value the URI.
+
+    prefix : str
+        The prefix for each element in XML document including root.
+
+    encoding : str
+        Encoding of xml object or document.
+
+    xml_declaration : bool
+        Whether to include xml declaration at top line item in xml.
+
+    pretty_print : bool
+        Whether to write xml document with line breaks and indentation.
+
+    stylesheet : str or file-like
+        A URL, file, file-like object, or a raw string containing XSLT.
+
+    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
+        Compression type for on-the-fly decompression of on-disk data.
+        If 'infer', then use extension for gzip, bz2, zip or xz.
+
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection,
+        e.g. host, port, username, password, etc.,
+
+    See also
+    --------
+    pandas.io.formats.xml.EtreeXMLFormatter
+    pandas.io.formats.xml.LxmlXMLFormatter
+
+    """
+
+    def __init__(
+        self,
+        formatter: DataFrameFormatter,
+        path_or_buffer: Optional[FilePathOrBuffer] = None,
+        index: Optional[bool] = True,
+        root_name: Optional[str] = "data",
+        row_name: Optional[str] = "row",
+        na_rep: Optional[str] = None,
+        attr_cols: Optional[List[str]] = None,
+        elem_cols: Optional[List[str]] = None,
+        namespaces: Optional[Dict[Optional[str], str]] = None,
+        prefix: Optional[str] = None,
+        encoding: str = "utf-8",
+        xml_declaration: Optional[bool] = True,
+        pretty_print: Optional[bool] = True,
+        stylesheet: Optional[FilePathOrBuffer] = None,
+        compression: CompressionOptions = "infer",
+        storage_options: StorageOptions = None,
+    ) -> None:
+        self.fmt = formatter
+        self.path_or_buffer = path_or_buffer
+        self.index = index
+        self.root_name = root_name
+        self.row_name = row_name
+        self.na_rep = na_rep
+        self.attr_cols = attr_cols
+        self.elem_cols = elem_cols
+        self.namespaces = namespaces
+        self.prefix = prefix
+        self.encoding = encoding
+        self.xml_declaration = xml_declaration
+        self.pretty_print = pretty_print
+        self.stylesheet = stylesheet
+        self.compression = compression
+        self.storage_options = storage_options
+
+        self.frame = self.fmt.frame
+        self.orig_cols = self.fmt.frame.columns.tolist()
+        self.frame_dicts = self.process_dataframe()
+
+    def build_tree(self) -> bytes:
+        """
+        Build tree from  data.
+
+        This method initializes the root and builds attributes and elements
+        with optional namespaces.
+        """
+        raise AbstractMethodError(self)
+
+    def validate_columns(self) -> None:
+        """
+        Validate elems_cols and attrs_cols.
+
+        This method will check if columns is list-like.
+
+        Raises
+        ------
+        ValueError
+            * If value is not a list and less then length of nodes.
+        """
+        if self.attr_cols and not is_list_like(self.attr_cols):
+            raise TypeError(
+                f"{type(self.attr_cols).__name__} is not a valid type for attr_cols"
+            )
+
+        if self.elem_cols and not is_list_like(self.elem_cols):
+            raise TypeError(
+                f"{type(self.elem_cols).__name__} is not a valid type for elem_cols"
+            )
+
+    def validate_encoding(self) -> None:
+        """
+        Validate encoding.
+
+        This method will check if encoding is among listed under codecs.
+
+        Raises
+        ------
+        LookupError
+            * If encoding is not available in codecs.
+        """
+
+        codecs.lookup(self.encoding)
+
+    def process_dataframe(self) -> Dict[Union[int, str], Dict[str, Any]]:
+        """
+        Adjust Data Frame to fit xml output.
+
+        This method will adjust underlying data frame for xml output,
+        including optionally replacing missing values and including indexes.
+        """
+
+        df = self.fmt.frame
+
+        if self.index:
+            df = df.reset_index()
+
+        if self.na_rep:
+            df = df.replace({None: self.na_rep, float("nan"): self.na_rep})
+
+        return df.to_dict(orient="index")
+
+    def handle_indexes(self) -> None:
+        """
+        Handle indexes.
+
+        This method will add indexes into attr_cols or elem_cols.
+        """
+
+        indexes: List[str] = [
+            x for x in self.frame_dicts[0].keys() if x not in self.orig_cols
+        ]
+
+        if self.attr_cols and self.index:
+            self.attr_cols = indexes + self.attr_cols
+
+        if self.elem_cols and self.index:
+            self.elem_cols = indexes + self.elem_cols
+
+    def get_prefix_uri(self) -> str:
+        """
+        Get uri of namespace prefix.
+
+        This method retrieves corresponding URI to prefix in namespaces.
+
+        Raises
+        ------
+        KeyError
+            *If prefix is not included in namespace dict.
+        """
+
+        raise AbstractMethodError(self)
+
+    def other_namespaces(self) -> dict:
+        """
+        Define other namespaces.
+
+        This method will build dictionary of namespaces attributes
+        for root element, conditionally with optional namespaces and
+        prefix.
+        """
+
+        nmsp_dict: Dict[str, str] = {}
+        if self.namespaces and self.prefix is None:
+            nmsp_dict = {"xmlns": n for p, n in self.namespaces.items() if p != ""}
+
+        if self.namespaces and self.prefix:
+            nmsp_dict = {"xmlns": n for p, n in self.namespaces.items() if p == ""}
+
+        return nmsp_dict
+
+    def build_attribs(self) -> None:
+        """
+        Create attributes of row.
+
+        This method adds attributes using attr_cols to row element and
+        works with tuples for multindex or hierarchical columns.
+        """
+
+        raise AbstractMethodError(self)
+
+    def build_elems(self) -> None:
+        """
+        Create child elements of row.
+
+        This method adds child elements using elem_cols to row element and
+        works with tuples for multindex or hierarchical columns.
+        """
+
+        raise AbstractMethodError(self)
+
+    def write_output(self) -> Optional[str]:
+        xml_doc = self.build_tree()
+
+        out_str: Optional[str]
+
+        if self.path_or_buffer is not None:
+            with get_handle(
+                self.path_or_buffer,
+                "wb",
+                compression=self.compression,
+                storage_options=self.storage_options,
+                is_text=False,
+            ) as handles:
+                handles.handle.write(xml_doc)  # type: ignore[arg-type]
+            return None
+
+        else:
+            return xml_doc.decode(self.encoding).rstrip()
+
+
+class EtreeXMLFormatter(BaseXMLFormatter):
+    """
+    Class for formatting data in xml using Python standard library
+    modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.validate_columns()
+        self.validate_encoding()
+        self.handle_indexes()
+        self.prefix_uri = self.get_prefix_uri()
+
+    def build_tree(self) -> bytes:
+        from xml.etree.ElementTree import (
+            Element,
+            SubElement,
+            tostring,
+        )
+
+        self.root = Element(
+            f"{self.prefix_uri}{self.root_name}", attrib=self.other_namespaces()
+        )
+
+        for k, d in self.frame_dicts.items():
+            self.d = d
+            self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
+
+            if not self.attr_cols and not self.elem_cols:
+                self.elem_cols = list(self.frame_dicts[0].keys())
+                self.build_elems()
+
+            else:
+                self.build_attribs()
+                self.build_elems()
+
+        self.out_xml = tostring(self.root, method="xml", encoding=self.encoding)
+
+        if self.pretty_print:
+            self.out_xml = self.prettify_tree()
+
+        if self.xml_declaration:
+            self.out_xml = self.add_declaration()
+        else:
+            self.out_xml = self.remove_declaration()
+
+        if self.stylesheet is not None:
+            raise ValueError(
+                "To use stylesheet, you need lxml installed and selected as parser."
+            )
+
+        return self.out_xml
+
+    def get_prefix_uri(self) -> str:
+        from xml.etree.ElementTree import register_namespace
+
+        uri = ""
+        if self.namespaces:
+            for p, n in self.namespaces.items():
+                if isinstance(p, str) and isinstance(n, str):
+                    register_namespace(p, n)
+            if self.prefix:
+                try:
+                    uri = f"{{{self.namespaces[self.prefix]}}}"
+                except KeyError:
+                    raise KeyError(f"{self.prefix} is not included in namespaces")
+            else:
+                uri = f'{{{self.namespaces[""]}}}'
+
+        return uri
+
+    def build_attribs(self) -> None:
+        if not self.attr_cols:
+            return
+
+        for col in self.attr_cols:
+            flat_col = col
+            if isinstance(col, tuple):
+                flat_col = (
+                    "".join(str(c) for c in col).strip()
+                    if "" in col
+                    else "_".join(str(c) for c in col).strip()
+                )
+
+            attr_name = f"{self.prefix_uri}{flat_col}"
+            try:
+                val = (
+                    None
+                    if self.d[col] is None or self.d[col] != self.d[col]
+                    else str(self.d[col])
+                )
+                if val is not None:
+                    self.elem_row.attrib[attr_name] = val
+            except KeyError:
+                raise KeyError(f"no valid column, {col}")
+
+    def build_elems(self) -> None:
+        from xml.etree.ElementTree import SubElement
+
+        if not self.elem_cols:
+            return
+
+        for col in self.elem_cols:
+            flat_col = col
+            if isinstance(col, tuple):
+                flat_col = (
+                    "".join(str(c) for c in col).strip()
+                    if "" in col
+                    else "_".join(str(c) for c in col).strip()
+                )
+
+            elem_name = f"{self.prefix_uri}{flat_col}"
+            try:
+                val = (
+                    None
+                    if self.d[col] in [None, ""] or self.d[col] != self.d[col]
+                    else str(self.d[col])
+                )
+                SubElement(self.elem_row, elem_name).text = val
+            except KeyError:
+                raise KeyError(f"no valid column, {col}")
+
+    def prettify_tree(self) -> bytes:
+        """
+        Output tree for pretty print format.
+
+        This method will pretty print xml with line breaks and indentation.
+        """
+
+        from xml.dom.minidom import parseString
+
+        dom = parseString(self.out_xml)
+
+        return dom.toprettyxml(indent="  ", encoding=self.encoding)
+
+    def add_declaration(self) -> bytes:
+        """
+        Add xml declaration.
+
+        This method will add xml declaration of working tree. Currently,
+        xml_declaration is supported in etree starting in Python 3.8.
+        """
+        decl = f'<?xml version="1.0" encoding="{self.encoding}"?>\n'
+
+        doc = (
+            self.out_xml
+            if self.out_xml.startswith(b"<?xml")
+            else decl.encode(self.encoding) + self.out_xml
+        )
+
+        return doc
+
+    def remove_declaration(self) -> bytes:
+        """
+        Remove xml declaration.
+
+        This method will remove xml declaration of working tree. Currently,
+        pretty_print is not supported in etree.
+        """
+
+        return self.out_xml.split(b"?>")[-1].strip()
+
+
+class LxmlXMLFormatter(BaseXMLFormatter):
+    """
+    Class for formatting data in xml using Python standard library
+    modules: `xml.etree.ElementTree` and `xml.dom.minidom`.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.validate_columns()
+        self.validate_encoding()
+        self.prefix_uri = self.get_prefix_uri()
+
+        self.convert_empty_str_key()
+        self.handle_indexes()
+
+    def build_tree(self) -> bytes:
+        """
+        Build tree from  data.
+
+        This method initializes the root and builds attributes and elements
+        with optional namespaces.
+        """
+        from lxml.etree import (
+            Element,
+            SubElement,
+            tostring,
+        )
+
+        self.root = Element(f"{self.prefix_uri}{self.root_name}", nsmap=self.namespaces)
+
+        for k, d in self.frame_dicts.items():
+            self.d = d
+            self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}")
+
+            if not self.attr_cols and not self.elem_cols:
+                self.elem_cols = list(self.frame_dicts[0].keys())
+                self.build_elems()
+
+            else:
+                self.build_attribs()
+                self.build_elems()
+
+        self.out_xml = tostring(
+            self.root,
+            pretty_print=self.pretty_print,
+            method="xml",
+            encoding=self.encoding,
+            xml_declaration=self.xml_declaration,
+        )
+
+        if self.stylesheet is not None:
+            self.out_xml = self.transform_doc()
+
+        return self.out_xml
+
+    def convert_empty_str_key(self) -> None:
+        """
+        Replace zero-lengh string in `namespaces`.
+
+        This method will replce '' with None to align to `lxml`
+        requirement that empty string prefixes are not allowed.
+        """
+
+        if self.namespaces and "" in self.namespaces.keys():
+            self.namespaces[None] = self.namespaces.pop("", "default")
+
+    def get_prefix_uri(self) -> str:
+        uri = ""
+        if self.namespaces:
+            if self.prefix:
+                try:
+                    uri = f"{{{self.namespaces[self.prefix]}}}"
+                except KeyError:
+                    raise KeyError(f"{self.prefix} is not included in namespaces")
+            else:
+                uri = f'{{{self.namespaces[""]}}}'
+
+        return uri
+
+    def build_attribs(self) -> None:
+        if not self.attr_cols:
+            return
+
+        for col in self.attr_cols:
+            flat_col = col
+            if isinstance(col, tuple):
+                flat_col = (
+                    "".join(str(c) for c in col).strip()
+                    if "" in col
+                    else "_".join(str(c) for c in col).strip()
+                )
+
+            attr_name = f"{self.prefix_uri}{flat_col}"
+            try:
+                val = (
+                    None
+                    if self.d[col] is None or self.d[col] != self.d[col]
+                    else str(self.d[col])
+                )
+                if val is not None:
+                    self.elem_row.attrib[attr_name] = val
+            except KeyError:
+                raise KeyError(f"no valid column, {col}")
+
+    def build_elems(self) -> None:
+        from lxml.etree import SubElement
+
+        if not self.elem_cols:
+            return
+
+        for col in self.elem_cols:
+            flat_col = col
+            if isinstance(col, tuple):
+                flat_col = (
+                    "".join(str(c) for c in col).strip()
+                    if "" in col
+                    else "_".join(str(c) for c in col).strip()
+                )
+
+            elem_name = f"{self.prefix_uri}{flat_col}"
+            try:
+                val = (
+                    None
+                    if self.d[col] in [None, ""] or self.d[col] != self.d[col]
+                    else str(self.d[col])
+                )
+                SubElement(self.elem_row, elem_name).text = val
+            except KeyError:
+                raise KeyError(f"no valid column, {col}")
+
+    def transform_doc(self) -> bytes:
+        """
+        Parse stylesheet from file or buffer and run it.
+
+        This method will parse stylesheet object into tree for parsing
+        conditionally by its specific object type, then transforms
+        original tree with XSLT script.
+        """
+
+        from lxml.etree import (
+            XSLT,
+            XMLParser,
+            fromstring,
+            parse,
+        )
+
+        style_doc = self.stylesheet
+
+        handle_data = get_data_from_filepath(
+            filepath_or_buffer=style_doc,
+            encoding=self.encoding,
+            compression=self.compression,
+            storage_options=self.storage_options,
+        )
+
+        with preprocess_data(handle_data) as xml_data:
+            curr_parser = XMLParser(encoding=self.encoding)
+
+            if isinstance(xml_data, io.StringIO):
+                xsl_doc = fromstring(
+                    xml_data.getvalue().encode(self.encoding), parser=curr_parser
+                )
+            else:
+                xsl_doc = parse(xml_data, parser=curr_parser)
+
+        transformer = XSLT(xsl_doc)
+        new_doc = transformer(self.root)
+
+        return bytes(new_doc)
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
new file mode 100644
index 0000000000000..83eba5f17c7b3
--- /dev/null
+++ b/pandas/io/xml.py
@@ -0,0 +1,944 @@
+"""
+:mod:`pandas.io.xml` is a module for reading XML.
+"""
+
+import io
+from typing import (
+    Dict,
+    List,
+    Optional,
+    Union,
+)
+
+from pandas._typing import (
+    Buffer,
+    CompressionOptions,
+    FilePathOrBuffer,
+    StorageOptions,
+)
+from pandas.compat._optional import import_optional_dependency
+from pandas.errors import (
+    AbstractMethodError,
+    ParserError,
+)
+from pandas.util._decorators import doc
+
+from pandas.core.dtypes.common import is_list_like
+
+from pandas.core.frame import DataFrame
+from pandas.core.shared_docs import _shared_docs
+
+from pandas.io.common import (
+    file_exists,
+    get_handle,
+    is_fsspec_url,
+    is_url,
+    stringify_path,
+)
+from pandas.io.parsers import TextParser
+
+
+class _XMLFrameParser:
+    """
+    Internal subclass to parse XML into DataFrames.
+
+    Parameters
+    ----------
+    path_or_buffer : a valid JSON str, path object or file-like object
+        Any valid string path is acceptable. The string could be a URL. Valid
+        URL schemes include http, ftp, s3, and file.
+
+    xpath : str or regex
+        The XPath expression to parse required set of nodes for
+        migration to `Data Frame`. `etree` supports limited XPath.
+
+    namespacess : dict
+        The namespaces defined in XML document (`xmlns:namespace='URI')
+        as dicts with key being namespace and value the URI.
+
+    elems_only : bool
+        Parse only the child elements at the specified `xpath`.
+
+    attrs_only : bool
+        Parse only the attributes at the specified `xpath`.
+
+    names : list
+        Column names for Data Frame of parsed XML data.
+
+    encoding : str
+        Encoding of xml object or document.
+
+    stylesheet : str or file-like
+        URL, file, file-like object, or a raw string containing XSLT,
+        `etree` does not support XSLT but retained for consistency.
+
+    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'
+        Compression type for on-the-fly decompression of on-disk data.
+        If 'infer', then use extension for gzip, bz2, zip or xz.
+
+    storage_options : dict, optional
+        Extra options that make sense for a particular storage connection,
+        e.g. host, port, username, password, etc.,
+
+    See also
+    --------
+    pandas.io.xml._EtreeFrameParser
+    pandas.io.xml._LxmlFrameParser
+
+    Notes
+    -----
+    To subclass this class effectively you must override the following methods:`
+        * :func:`parse_data`
+        * :func:`_parse_nodes`
+        * :func:`_parse_doc`
+        * :func:`_validate_names`
+        * :func:`_validate_path`
+
+
+    See each method's respective documentation for details on their
+    functionality.
+    """
+
+    def __init__(
+        self,
+        path_or_buffer,
+        xpath,
+        namespaces,
+        elems_only,
+        attrs_only,
+        names,
+        encoding,
+        stylesheet,
+        compression,
+        storage_options,
+    ):
+        self.path_or_buffer = path_or_buffer
+        self.xpath = xpath
+        self.namespaces = namespaces
+        self.elems_only = elems_only
+        self.attrs_only = attrs_only
+        self.names = names
+        self.encoding = encoding
+        self.stylesheet = stylesheet
+        self.is_style = None
+        self.compression = compression
+        self.storage_options = storage_options
+
+    def parse_data(self) -> List[Dict[str, Optional[str]]]:
+        """
+        Parse xml data.
+
+        This method will call the other internal methods to
+        validate xpath, names, parse and return specific nodes.
+        """
+
+        raise AbstractMethodError(self)
+
+    def _parse_nodes(self) -> List[Dict[str, Optional[str]]]:
+        """
+        Parse xml nodes.
+
+        This method will parse the children and attributes of elements
+        in xpath, conditionally for only elements, only attributes
+        or both while optionally renaming node names.
+
+        Raises
+        ------
+        ValueError
+            * If only elements and only attributes are specified.
+
+        Notes
+        -----
+        Namespace URIs will be removed from return node values.Also,
+        elements with missing children or attributes compared to siblings
+        will have optional keys filled withi None values.
+        """
+
+        raise AbstractMethodError(self)
+
+    def _validate_path(self) -> None:
+        """
+        Validate xpath.
+
+        This method checks for syntax, evaluation, or empty nodes return.
+
+        Raises
+        ------
+        SyntaxError
+            * If xpah is not supported or issues with namespaces.
+
+        ValueError
+            * If xpah does not return any nodes.
+        """
+
+        raise AbstractMethodError(self)
+
+    def _validate_names(self) -> None:
+        """
+        Validate names.
+
+        This method will check if names is a list-like and aligns
+        with length of parse nodes.
+
+        Raises
+        ------
+        ValueError
+            * If value is not a list and less then length of nodes.
+        """
+        raise AbstractMethodError(self)
+
+    def _parse_doc(self):
+        """
+        Build tree from io.
+
+        This method will parse io object into tree for parsing
+        conditionally by its specific object type.
+        """
+
+        raise AbstractMethodError(self)
+
+
+class _EtreeFrameParser(_XMLFrameParser):
+    """
+    Internal class to parse XML into DataFrames with the Python
+    standard library XML module: `xml.etree.ElementTree`.
+    """
+
+    from xml.etree.ElementTree import (
+        Element,
+        ElementTree,
+    )
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def parse_data(self) -> List[Dict[str, Optional[str]]]:
+
+        if self.stylesheet is not None:
+            raise ValueError(
+                "To use stylesheet, you need lxml installed and selected as parser."
+            )
+
+        self.xml_doc = self._parse_doc()
+
+        self._validate_path()
+        self._validate_names()
+
+        return self._parse_nodes()
+
+    def _parse_nodes(self) -> List[Dict[str, Optional[str]]]:
+        elems = self.xml_doc.findall(self.xpath, namespaces=self.namespaces)
+        dicts: List[Dict[str, Optional[str]]]
+
+        if self.elems_only and self.attrs_only:
+            raise ValueError("Either element or attributes can be parsed not both.")
+        elif self.elems_only:
+            if self.names:
+                dicts = [
+                    {
+                        **(
+                            {el.tag: el.text.strip()}
+                            if el.text and not el.text.isspace()
+                            else {}
+                        ),
+                        **{
+                            nm: ch.text.strip() if ch.text else None
+                            for nm, ch in zip(self.names, el.findall("*"))
+                        },
+                    }
+                    for el in elems
+                ]
+            else:
+                dicts = [
+                    {
+                        ch.tag: ch.text.strip() if ch.text else None
+                        for ch in el.findall("*")
+                    }
+                    for el in elems
+                ]
+
+        elif self.attrs_only:
+            dicts = [
+                {k: v.strip() if v else None for k, v in el.attrib.items()}
+                for el in elems
+            ]
+
+        else:
+            if self.names:
+                dicts = [
+                    {
+                        **el.attrib,
+                        **(
+                            {el.tag: el.text.strip()}
+                            if el.text and not el.text.isspace()
+                            else {}
+                        ),
+                        **{
+                            nm: ch.text.strip() if ch.text else None
+                            for nm, ch in zip(self.names, el.findall("*"))
+                        },
+                    }
+                    for el in elems
+                ]
+
+            else:
+                dicts = [
+                    {
+                        **el.attrib,
+                        **(
+                            {el.tag: el.text.strip()}
+                            if el.text and not el.text.isspace()
+                            else {}
+                        ),
+                        **{
+                            ch.tag: ch.text.strip() if ch.text else None
+                            for ch in el.findall("*")
+                        },
+                    }
+                    for el in elems
+                ]
+
+        dicts = [
+            {k.split("}")[1] if "}" in k else k: v for k, v in d.items()} for d in dicts
+        ]
+
+        keys = list(dict.fromkeys([k for d in dicts for k in d.keys()]))
+        dicts = [{k: d[k] if k in d.keys() else None for k in keys} for d in dicts]
+
+        if self.names:
+            dicts = [
+                {nm: v for nm, (k, v) in zip(self.names, d.items())} for d in dicts
+            ]
+
+        return dicts
+
+    def _validate_path(self) -> None:
+        """
+        Notes
+        -----
+        `etree` supports limited XPath. If user attempts a more complex
+        expression syntax error will raise.
+        """
+
+        msg = (
+            "xpath does not return any nodes. "
+            "If document uses namespaces denoted with "
+            "xmlns, be sure to define namespaces and "
+            "use them in xpath."
+        )
+        try:
+            elems = self.xml_doc.find(self.xpath, namespaces=self.namespaces)
+            if elems is None:
+                raise ValueError(msg)
+
+            if elems is not None and elems.find("*") is None and elems.attrib is None:
+                raise ValueError(msg)
+
+        except (KeyError, SyntaxError):
+            raise SyntaxError(
+                "You have used an incorrect or unsupported XPath "
+                "expression for etree library or you used an "
+                "undeclared namespace prefix."
+            )
+
+    def _validate_names(self) -> None:
+        if self.names:
+            parent = self.xml_doc.find(self.xpath, namespaces=self.namespaces)
+            children = parent.findall("*") if parent else []
+
+            if is_list_like(self.names):
+                if len(self.names) < len(children):
+                    raise ValueError(
+                        "names does not match length of child elements in xpath."
+                    )
+            else:
+                raise TypeError(
+                    f"{type(self.names).__name__} is not a valid type for names"
+                )
+
+    def _parse_doc(self) -> Union[Element, ElementTree]:
+        from xml.etree.ElementTree import (
+            XMLParser,
+            parse,
+        )
+
+        handle_data = get_data_from_filepath(
+            filepath_or_buffer=self.path_or_buffer,
+            encoding=self.encoding,
+            compression=self.compression,
+            storage_options=self.storage_options,
+        )
+
+        with preprocess_data(handle_data) as xml_data:
+            curr_parser = XMLParser(encoding=self.encoding)
+            r = parse(xml_data, parser=curr_parser)
+
+        return r
+
+
+class _LxmlFrameParser(_XMLFrameParser):
+    """
+    Internal class to parse XML into DataFrames with third-party
+    full-featured XML library, `lxml`, that supports
+    XPath 1.0 and XSLT 1.0.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def parse_data(self) -> List[Dict[str, Optional[str]]]:
+        """
+        Parse xml data.
+
+        This method will call the other internal methods to
+        validate xpath, names, optionally parse and run XSLT,
+        and parse original or transformed XML and return specific nodes.
+        """
+
+        self.xml_doc = self._parse_doc(self.path_or_buffer)
+
+        if self.stylesheet is not None:
+            self.xsl_doc = self._parse_doc(self.stylesheet)
+            self.xml_doc = self._transform_doc()
+
+        self._validate_path()
+        self._validate_names()
+
+        return self._parse_nodes()
+
+    def _parse_nodes(self) -> List[Dict[str, Optional[str]]]:
+        elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces)
+        dicts: List[Dict[str, Optional[str]]]
+
+        if self.elems_only and self.attrs_only:
+            raise ValueError("Either element or attributes can be parsed not both.")
+
+        elif self.elems_only:
+            if self.names:
+                dicts = [
+                    {
+                        **(
+                            {el.tag: el.text.strip()}
+                            if el.text and not el.text.isspace()
+                            else {}
+                        ),
+                        **{
+                            nm: ch.text.strip() if ch.text else None
+                            for nm, ch in zip(self.names, el.xpath("*"))
+                        },
+                    }
+                    for el in elems
+                ]
+            else:
+                dicts = [
+                    {
+                        ch.tag: ch.text.strip() if ch.text else None
+                        for ch in el.xpath("*")
+                    }
+                    for el in elems
+                ]
+
+        elif self.attrs_only:
+            dicts = [el.attrib for el in elems]
+
+        else:
+            if self.names:
+                dicts = [
+                    {
+                        **el.attrib,
+                        **(
+                            {el.tag: el.text.strip()}
+                            if el.text and not el.text.isspace()
+                            else {}
+                        ),
+                        **{
+                            nm: ch.text.strip() if ch.text else None
+                            for nm, ch in zip(self.names, el.xpath("*"))
+                        },
+                    }
+                    for el in elems
+                ]
+            else:
+                dicts = [
+                    {
+                        **el.attrib,
+                        **(
+                            {el.tag: el.text.strip()}
+                            if el.text and not el.text.isspace()
+                            else {}
+                        ),
+                        **{
+                            ch.tag: ch.text.strip() if ch.text else None
+                            for ch in el.xpath("*")
+                        },
+                    }
+                    for el in elems
+                ]
+
+        if self.namespaces or "}" in list(dicts[0].keys())[0]:
+            dicts = [
+                {k.split("}")[1] if "}" in k else k: v for k, v in d.items()}
+                for d in dicts
+            ]
+
+        keys = list(dict.fromkeys([k for d in dicts for k in d.keys()]))
+        dicts = [{k: d[k] if k in d.keys() else None for k in keys} for d in dicts]
+
+        if self.names:
+            dicts = [
+                {nm: v for nm, (k, v) in zip(self.names, d.items())} for d in dicts
+            ]
+
+        return dicts
+
+    def _transform_doc(self):
+        """
+        Transform original tree using stylesheet.
+
+        This method will transform original xml using XSLT script into
+        am ideally flatter xml document for easier parsing and migration
+        to Data Frame.
+        """
+        from lxml.etree import XSLT
+
+        transformer = XSLT(self.xsl_doc)
+        new_doc = transformer(self.xml_doc)
+
+        return new_doc
+
+    def _validate_path(self) -> None:
+
+        msg = (
+            "xpath does not return any nodes. "
+            "Be sure row level nodes are in xpath. "
+            "If document uses namespaces denoted with "
+            "xmlns, be sure to define namespaces and "
+            "use them in xpath."
+        )
+
+        elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces)
+        children = self.xml_doc.xpath(self.xpath + "/*", namespaces=self.namespaces)
+        attrs = self.xml_doc.xpath(self.xpath + "/@*", namespaces=self.namespaces)
+
+        if elems == []:
+            raise ValueError(msg)
+
+        if elems != [] and attrs == [] and children == []:
+            raise ValueError(msg)
+
+    def _validate_names(self) -> None:
+        """
+        Validate names.
+
+        This method will check if names is a list and aligns with
+        length of parse nodes.
+
+        Raises
+        ------
+        ValueError
+            * If value is not a list and less then length of nodes.
+        """
+        if self.names:
+            children = self.xml_doc.xpath(
+                self.xpath + "[1]/*", namespaces=self.namespaces
+            )
+
+            if is_list_like(self.names):
+                if len(self.names) < len(children):
+                    raise ValueError(
+                        "names does not match length of child elements in xpath."
+                    )
+            else:
+                raise TypeError(
+                    f"{type(self.names).__name__} is not a valid type for names"
+                )
+
+    def _parse_doc(self, raw_doc):
+        from lxml.etree import (
+            XMLParser,
+            fromstring,
+            parse,
+        )
+
+        handle_data = get_data_from_filepath(
+            filepath_or_buffer=raw_doc,
+            encoding=self.encoding,
+            compression=self.compression,
+            storage_options=self.storage_options,
+        )
+
+        with preprocess_data(handle_data) as xml_data:
+            curr_parser = XMLParser(encoding=self.encoding)
+
+            if isinstance(xml_data, io.StringIO):
+                doc = fromstring(
+                    xml_data.getvalue().encode(self.encoding), parser=curr_parser
+                )
+            else:
+                doc = parse(xml_data, parser=curr_parser)
+
+        return doc
+
+
+def get_data_from_filepath(
+    filepath_or_buffer,
+    encoding,
+    compression,
+    storage_options,
+) -> Union[str, bytes, Buffer]:
+    """
+    Extract raw XML data.
+
+    The method accepts three input types:
+        1. filepath (string-like)
+        2. file-like object (e.g. open file object, StringIO)
+        3. XML string or bytes
+
+    This method turns (1) into (2) to simplify the rest of the processing.
+    It returns input types (2) and (3) unchanged.
+    """
+    filepath_or_buffer = stringify_path(filepath_or_buffer)
+
+    if (
+        isinstance(filepath_or_buffer, str)
+        and not filepath_or_buffer.startswith(("<?xml", "<"))
+    ) and (
+        not isinstance(filepath_or_buffer, str)
+        or is_url(filepath_or_buffer)
+        or is_fsspec_url(filepath_or_buffer)
+        or file_exists(filepath_or_buffer)
+    ):
+        with get_handle(
+            filepath_or_buffer,
+            "r",
+            encoding=encoding,
+            compression=compression,
+            storage_options=storage_options,
+        ) as handle_obj:
+            filepath_or_buffer = (
+                handle_obj.handle.read()
+                if hasattr(handle_obj.handle, "read")
+                else handle_obj.handle
+            )
+
+    return filepath_or_buffer
+
+
+def preprocess_data(data) -> Union[io.StringIO, io.BytesIO]:
+    """
+    Convert extracted raw data.
+
+    This method will return underlying data of extracted XML content.
+    The data either has a `read` attribute (e.g. a file object or a
+    StringIO/BytesIO) or is a string or bytes that is an XML document.
+    """
+
+    if isinstance(data, str):
+        data = io.StringIO(data)
+
+    elif isinstance(data, bytes):
+        data = io.BytesIO(data)
+
+    return data
+
+
+def _data_to_frame(data, **kwargs) -> DataFrame:
+    """
+    Convert parsed data to Data Frame.
+
+    This method will bind xml dictionary data of keys and values
+    into named columns of Data Frame using the built-in TextParser
+    class that build Data Frame and infers specific dtypes.
+    """
+
+    tags = next(iter(data))
+    nodes = [list(d.values()) for d in data]
+
+    try:
+        with TextParser(nodes, names=tags, **kwargs) as tp:
+            return tp.read()
+    except ParserError:
+        raise ParserError(
+            "XML document may be too complex for import. "
+            "Try to flatten document and use distinct "
+            "element and attribute names."
+        )
+
+
+def _parse(
+    path_or_buffer,
+    xpath,
+    namespaces,
+    elems_only,
+    attrs_only,
+    names,
+    encoding,
+    parser,
+    stylesheet,
+    compression,
+    storage_options,
+    **kwargs,
+) -> DataFrame:
+    """
+    Call internal parsers.
+
+    This method will conditionally call internal parsers:
+    LxmlFrameParser and/or EtreeParser.
+
+    Raises
+    ------
+    ImportError
+        * If lxml is not installed if selected as parser.
+
+    ValueError
+        * If parser is not lxml or etree.
+    """
+
+    lxml = import_optional_dependency("lxml.etree", errors="ignore")
+    p: Union[_EtreeFrameParser, _LxmlFrameParser]
+
+    if parser == "lxml":
+        if lxml is not None:
+            p = _LxmlFrameParser(
+                path_or_buffer,
+                xpath,
+                namespaces,
+                elems_only,
+                attrs_only,
+                names,
+                encoding,
+                stylesheet,
+                compression,
+                storage_options,
+            )
+        else:
+            raise ImportError("lxml not found, please install or use the etree parser.")
+
+    elif parser == "etree":
+        p = _EtreeFrameParser(
+            path_or_buffer,
+            xpath,
+            namespaces,
+            elems_only,
+            attrs_only,
+            names,
+            encoding,
+            stylesheet,
+            compression,
+            storage_options,
+        )
+    else:
+        raise ValueError("Values for parser can only be lxml or etree.")
+
+    data_dicts = p.parse_data()
+
+    return _data_to_frame(data=data_dicts, **kwargs)
+
+
+@doc(storage_options=_shared_docs["storage_options"])
+def read_xml(
+    path_or_buffer: FilePathOrBuffer,
+    xpath: Optional[str] = "./*",
+    namespaces: Optional[Union[dict, List[dict]]] = None,
+    elems_only: Optional[bool] = False,
+    attrs_only: Optional[bool] = False,
+    names: Optional[List[str]] = None,
+    encoding: Optional[str] = "utf-8",
+    parser: Optional[str] = "lxml",
+    stylesheet: Optional[FilePathOrBuffer] = None,
+    compression: CompressionOptions = "infer",
+    storage_options: StorageOptions = None,
+) -> DataFrame:
+    r"""
+    Read XML document into a ``DataFrame`` object.
+
+    .. versionadded:: 1.3.0
+
+    Parameters
+    ----------
+    path_or_buffer : str, path object, or file-like object
+        Any valid XML string or path is acceptable. The string could be a URL.
+        Valid URL schemes include http, ftp, s3, and file.
+
+    xpath : str, optional, default './\*'
+        The XPath to parse required set of nodes for migration to DataFrame.
+        XPath should return a collection of elements and not a single
+        element. Note: The ``etree`` parser supports limited XPath
+        expressions. For more complex XPath, use ``lxml`` which requires
+        installation.
+
+    namespaces : dict, optional
+        The namespaces defined in XML document as dicts with key being
+        namespace prefix and value the URI. There is no need to include all
+        namespaces in XML, only the ones used in ``xpath`` expression.
+        Note: if XML document uses default namespace denoted as
+        `xmlns='<URI>'` without a prefix, you must assign any temporary
+        namespace prefix such as 'doc' to the URI in order to parse
+        underlying nodes and/or attributes. For example, ::
+
+            namespaces = {{"doc": "https://example.com"}}
+
+    elems_only : bool, optional, default False
+        Parse only the child elements at the specified ``xpath``. By default,
+        all child elements and non-empty text nodes are returned.
+
+    attrs_only :  bool, optional, default False
+        Parse only the attributes at the specified ``xpath``.
+        By default, all attributes are returned.
+
+    names :  list-like, optional
+        Column names for DataFrame of parsed XML data. Use this parameter to
+        rename original element names and distinguish same named elements.
+
+    encoding : str, optional, default 'utf-8'
+        Encoding of XML document.
+
+    parser : {{'lxml','etree'}}, default 'lxml'
+        Parser module to use for retrieval of data. Only 'lxml' and
+        'etree' are supported. With 'lxml' more complex XPath searches
+        and ability to use XSLT stylesheet are supported.
+
+    stylesheet : str, path object or file-like object
+        A URL, file-like object, or a raw string containing an XSLT script.
+        This stylesheet should flatten complex, deeply nested XML documents
+        for easier parsing. To use this feature you must have ``lxml`` module
+        installed and specify 'lxml' as ``parser``. The ``xpath`` must
+        reference nodes of transformed XML document generated after XSLT
+        transformation and not the original XML document. Only XSLT 1.0
+        scripts and not later versions is currently supported.
+
+    compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
+        For on-the-fly decompression of on-disk data. If 'infer', then use
+        gzip, bz2, zip or xz if path_or_buffer is a string ending in
+        '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression
+        otherwise. If using 'zip', the ZIP file must contain only one data
+        file to be read in. Set to None for no decompression.
+
+    {storage_options}
+
+    Returns
+    -------
+    df
+        A DataFrame.
+
+    See Also
+    --------
+    read_json : Convert a JSON string to pandas object.
+    read_html : Read HTML tables into a list of DataFrame objects.
+
+    Notes
+    -----
+    This method is best designed to import shallow XML documents in
+    following format which is the ideal fit for the two-dimensions of a
+    ``DataFrame`` (row by column). ::
+
+            <root>
+                <row>
+                  <column1>data</column1>
+                  <column2>data</column2>
+                  <column3>data</column3>
+                  ...
+               </row>
+               <row>
+                  ...
+               </row>
+               ...
+            </root>
+
+    As a file format, XML documents can be designed any way including
+    layout of elements and attributes as long as it conforms to W3C
+    specifications. Therefore, this method is a convenience handler for
+    a specific flatter design and not all possible XML structures.
+
+    However, for more complex XML documents, ``stylesheet`` allows you to
+    temporarily redesign original document with XSLT (a special purpose
+    language) for a flatter version for migration to a DataFrame.
+
+    This function will *always* return a single :class:`DataFrame` or raise
+    exceptions due to issues with XML document, ``xpath``, or other
+    parameters.
+
+    Examples
+    --------
+    >>> xml = '''<?xml version='1.0' encoding='utf-8'?>
+    ... <data xmlns="http://example.com">
+    ...  <row>
+    ...    <shape>square</shape>
+    ...    <degrees>360</degrees>
+    ...    <sides>4.0</sides>
+    ...  </row>
+    ...  <row>
+    ...    <shape>circle</shape>
+    ...    <degrees>360</degrees>
+    ...    <sides/>
+    ...  </row>
+    ...  <row>
+    ...    <shape>triangle</shape>
+    ...    <degrees>180</degrees>
+    ...    <sides>3.0</sides>
+    ...  </row>
+    ... </data>'''
+
+    >>> df = pd.read_xml(xml)
+    >>> df
+          shape  degrees  sides
+    0    square      360    4.0
+    1    circle      360    NaN
+    2  triangle      180    3.0
+
+    >>> xml = '''<?xml version='1.0' encoding='utf-8'?>
+    ... <data>
+    ...   <row shape="square" degrees="360" sides="4.0"/>
+    ...   <row shape="circle" degrees="360"/>
+    ...   <row shape="triangle" degrees="180" sides="3.0"/>
+    ... </data>'''
+
+    >>> df = pd.read_xml(xml, xpath=".//row")
+    >>> df
+          shape  degrees  sides
+    0    square      360    4.0
+    1    circle      360    NaN
+    2  triangle      180    3.0
+
+    >>> xml = '''<?xml version='1.0' encoding='utf-8'?>
+    ... <doc:data xmlns:doc="https://example.com">
+    ...   <doc:row>
+    ...     <doc:shape>square</doc:shape>
+    ...     <doc:degrees>360</doc:degrees>
+    ...     <doc:sides>4.0</doc:sides>
+    ...   </doc:row>
+    ...   <doc:row>
+    ...     <doc:shape>circle</doc:shape>
+    ...     <doc:degrees>360</doc:degrees>
+    ...     <doc:sides/>
+    ...   </doc:row>
+    ...   <doc:row>
+    ...     <doc:shape>triangle</doc:shape>
+    ...     <doc:degrees>180</doc:degrees>
+    ...     <doc:sides>3.0</doc:sides>
+    ...   </doc:row>
+    ... </doc:data>'''
+
+    >>> df = pd.read_xml(xml,
+    ...                  xpath="//doc:row",
+    ...                  namespaces={{"doc": "https://example.com"}})
+    >>> df
+          shape  degrees  sides
+    0    square      360    4.0
+    1    circle      360    NaN
+    2  triangle      180    3.0
+    """
+
+    return _parse(
+        path_or_buffer=path_or_buffer,
+        xpath=xpath,
+        namespaces=namespaces,
+        elems_only=elems_only,
+        attrs_only=attrs_only,
+        names=names,
+        encoding=encoding,
+        parser=parser,
+        stylesheet=stylesheet,
+        compression=compression,
+        storage_options=storage_options,
+    )
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index 541c2988a0636..fd1c19219c4bf 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -159,6 +159,7 @@ class TestPDApi(Base):
         "read_gbq",
         "read_hdf",
         "read_html",
+        "read_xml",
         "read_json",
         "read_pickle",
         "read_sas",
diff --git a/pandas/tests/apply/conftest.py b/pandas/tests/apply/conftest.py
new file mode 100644
index 0000000000000..b68c6235cb0b8
--- /dev/null
+++ b/pandas/tests/apply/conftest.py
@@ -0,0 +1,18 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame
+
+
+@pytest.fixture
+def int_frame_const_col():
+    """
+    Fixture for DataFrame of ints which are constant per column
+
+    Columns are ['A', 'B', 'C'], with values (per column): [1, 2, 3]
+    """
+    df = DataFrame(
+        np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1,
+        columns=["A", "B", "C"],
+    )
+    return df
diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
index 3532040a2fd7b..12c803cbebaf3 100644
--- a/pandas/tests/apply/test_frame_apply.py
+++ b/pandas/tests/apply/test_frame_apply.py
@@ -14,41 +14,28 @@
     Series,
     Timestamp,
     date_range,
-    notna,
 )
 import pandas._testing as tm
-from pandas.core.base import SpecificationError
 from pandas.tests.frame.common import zip_frames
 
 
-@pytest.fixture
-def int_frame_const_col():
-    """
-    Fixture for DataFrame of ints which are constant per column
-
-    Columns are ['A', 'B', 'C'], with values (per column): [1, 2, 3]
-    """
-    df = DataFrame(
-        np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1,
-        columns=["A", "B", "C"],
-    )
-    return df
-
-
 def test_apply(float_frame):
     with np.errstate(all="ignore"):
         # ufunc
-        applied = float_frame.apply(np.sqrt)
-        tm.assert_series_equal(np.sqrt(float_frame["A"]), applied["A"])
+        result = np.sqrt(float_frame["A"])
+        expected = float_frame.apply(np.sqrt)["A"]
+        tm.assert_series_equal(result, expected)
 
         # aggregator
-        applied = float_frame.apply(np.mean)
-        assert applied["A"] == np.mean(float_frame["A"])
+        result = float_frame.apply(np.mean)["A"]
+        expected = np.mean(float_frame["A"])
+        assert result == expected
 
         d = float_frame.index[0]
-        applied = float_frame.apply(np.mean, axis=1)
-        assert applied[d] == np.mean(float_frame.xs(d))
-        assert applied.index is float_frame.index  # want this
+        result = float_frame.apply(np.mean, axis=1)
+        expected = np.mean(float_frame.xs(d))
+        assert result[d] == expected
+        assert result.index is float_frame.index
 
     # invalid axis
     df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"])
@@ -58,42 +45,42 @@ def test_apply(float_frame):
 
     # GH 9573
     df = DataFrame({"c0": ["A", "A", "B", "B"], "c1": ["C", "C", "D", "D"]})
-    df = df.apply(lambda ts: ts.astype("category"))
+    result = df.apply(lambda ts: ts.astype("category"))
 
-    assert df.shape == (4, 2)
-    assert isinstance(df["c0"].dtype, CategoricalDtype)
-    assert isinstance(df["c1"].dtype, CategoricalDtype)
+    assert result.shape == (4, 2)
+    assert isinstance(result["c0"].dtype, CategoricalDtype)
+    assert isinstance(result["c1"].dtype, CategoricalDtype)
 
 
 def test_apply_axis1_with_ea():
     # GH#36785
-    df = DataFrame({"A": [Timestamp("2013-01-01", tz="UTC")]})
-    result = df.apply(lambda x: x, axis=1)
-    tm.assert_frame_equal(result, df)
+    expected = DataFrame({"A": [Timestamp("2013-01-01", tz="UTC")]})
+    result = expected.apply(lambda x: x, axis=1)
+    tm.assert_frame_equal(result, expected)
 
 
 def test_apply_mixed_datetimelike():
     # mixed datetimelike
     # GH 7778
-    df = DataFrame(
+    expected = DataFrame(
         {
             "A": date_range("20130101", periods=3),
             "B": pd.to_timedelta(np.arange(3), unit="s"),
         }
     )
-    result = df.apply(lambda x: x, axis=1)
-    tm.assert_frame_equal(result, df)
+    result = expected.apply(lambda x: x, axis=1)
+    tm.assert_frame_equal(result, expected)
 
 
 def test_apply_empty(float_frame):
     # empty
     empty_frame = DataFrame()
 
-    applied = empty_frame.apply(np.sqrt)
-    assert applied.empty
+    result = empty_frame.apply(np.sqrt)
+    assert result.empty
 
-    applied = empty_frame.apply(np.mean)
-    assert applied.empty
+    result = empty_frame.apply(np.mean)
+    assert result.empty
 
     no_rows = float_frame[:0]
     result = no_rows.apply(lambda x: x.mean())
@@ -108,7 +95,7 @@ def test_apply_empty(float_frame):
     # GH 2476
     expected = DataFrame(index=["a"])
     result = expected.apply(lambda x: x["a"], axis=1)
-    tm.assert_frame_equal(expected, result)
+    tm.assert_frame_equal(result, expected)
 
 
 def test_apply_with_reduce_empty():
@@ -192,17 +179,6 @@ def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize(
-    "how, args", [("pct_change", ()), ("nsmallest", (1, ["a", "b"])), ("tail", 1)]
-)
-def test_apply_str_axis_1_raises(how, args):
-    # GH 39211 - some ops don't support axis=1
-    df = DataFrame({"a": [1, 2], "b": [3, 4]})
-    msg = f"Operation {how} does not support axis=1"
-    with pytest.raises(ValueError, match=msg):
-        df.apply(how, axis=1, args=args)
-
-
 def test_apply_broadcast(float_frame, int_frame_const_col):
 
     # scalars
@@ -256,27 +232,6 @@ def test_apply_broadcast(float_frame, int_frame_const_col):
     tm.assert_frame_equal(result, expected)
 
 
-def test_apply_broadcast_error(int_frame_const_col):
-    df = int_frame_const_col
-
-    # > 1 ndim
-    msg = "too many dims to broadcast"
-    with pytest.raises(ValueError, match=msg):
-        df.apply(
-            lambda x: np.array([1, 2]).reshape(-1, 2),
-            axis=1,
-            result_type="broadcast",
-        )
-
-    # cannot broadcast
-    msg = "cannot broadcast result"
-    with pytest.raises(ValueError, match=msg):
-        df.apply(lambda x: [1, 2], axis=1, result_type="broadcast")
-
-    with pytest.raises(ValueError, match=msg):
-        df.apply(lambda x: Series([1, 2]), axis=1, result_type="broadcast")
-
-
 def test_apply_raw(float_frame, mixed_type_frame):
     def _assert_raw(x):
         assert isinstance(x, np.ndarray)
@@ -285,14 +240,13 @@ def _assert_raw(x):
     float_frame.apply(_assert_raw, raw=True)
     float_frame.apply(_assert_raw, axis=1, raw=True)
 
-    result0 = float_frame.apply(np.mean, raw=True)
-    result1 = float_frame.apply(np.mean, axis=1, raw=True)
-
-    expected0 = float_frame.apply(lambda x: x.values.mean())
-    expected1 = float_frame.apply(lambda x: x.values.mean(), axis=1)
+    result = float_frame.apply(np.mean, raw=True)
+    expected = float_frame.apply(lambda x: x.values.mean())
+    tm.assert_series_equal(result, expected)
 
-    tm.assert_series_equal(result0, expected0)
-    tm.assert_series_equal(result1, expected1)
+    result = float_frame.apply(np.mean, axis=1, raw=True)
+    expected = float_frame.apply(lambda x: x.values.mean(), axis=1)
+    tm.assert_series_equal(result, expected)
 
     # no reduction
     result = float_frame.apply(lambda x: x * 2, raw=True)
@@ -306,8 +260,9 @@ def _assert_raw(x):
 
 def test_apply_axis1(float_frame):
     d = float_frame.index[0]
-    tapplied = float_frame.apply(np.mean, axis=1)
-    assert tapplied[d] == np.mean(float_frame.xs(d))
+    result = float_frame.apply(np.mean, axis=1)[d]
+    expected = np.mean(float_frame.xs(d))
+    assert result == expected
 
 
 def test_apply_mixed_dtype_corner():
@@ -401,92 +356,25 @@ def test_apply_reduce_to_dict():
     # GH 25196 37544
     data = DataFrame([[1, 2], [3, 4]], columns=["c0", "c1"], index=["i0", "i1"])
 
-    result0 = data.apply(dict, axis=0)
-    expected0 = Series([{"i0": 1, "i1": 3}, {"i0": 2, "i1": 4}], index=data.columns)
-    tm.assert_series_equal(result0, expected0)
+    result = data.apply(dict, axis=0)
+    expected = Series([{"i0": 1, "i1": 3}, {"i0": 2, "i1": 4}], index=data.columns)
+    tm.assert_series_equal(result, expected)
 
-    result1 = data.apply(dict, axis=1)
-    expected1 = Series([{"c0": 1, "c1": 2}, {"c0": 3, "c1": 4}], index=data.index)
-    tm.assert_series_equal(result1, expected1)
+    result = data.apply(dict, axis=1)
+    expected = Series([{"c0": 1, "c1": 2}, {"c0": 3, "c1": 4}], index=data.index)
+    tm.assert_series_equal(result, expected)
 
 
 def test_apply_differently_indexed():
     df = DataFrame(np.random.randn(20, 10))
 
-    result0 = df.apply(Series.describe, axis=0)
-    expected0 = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns)
-    tm.assert_frame_equal(result0, expected0)
-
-    result1 = df.apply(Series.describe, axis=1)
-    expected1 = DataFrame(
-        {i: v.describe() for i, v in df.T.items()}, columns=df.index
-    ).T
-    tm.assert_frame_equal(result1, expected1)
-
-
-def test_apply_modify_traceback():
-    data = DataFrame(
-        {
-            "A": [
-                "foo",
-                "foo",
-                "foo",
-                "foo",
-                "bar",
-                "bar",
-                "bar",
-                "bar",
-                "foo",
-                "foo",
-                "foo",
-            ],
-            "B": [
-                "one",
-                "one",
-                "one",
-                "two",
-                "one",
-                "one",
-                "one",
-                "two",
-                "two",
-                "two",
-                "one",
-            ],
-            "C": [
-                "dull",
-                "dull",
-                "shiny",
-                "dull",
-                "dull",
-                "shiny",
-                "shiny",
-                "dull",
-                "shiny",
-                "shiny",
-                "shiny",
-            ],
-            "D": np.random.randn(11),
-            "E": np.random.randn(11),
-            "F": np.random.randn(11),
-        }
-    )
-
-    data.loc[4, "C"] = np.nan
-
-    def transform(row):
-        if row["C"].startswith("shin") and row["A"] == "foo":
-            row["D"] = 7
-        return row
-
-    def transform2(row):
-        if notna(row["C"]) and row["C"].startswith("shin") and row["A"] == "foo":
-            row["D"] = 7
-        return row
+    result = df.apply(Series.describe, axis=0)
+    expected = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns)
+    tm.assert_frame_equal(result, expected)
 
-    msg = "'float' object has no attribute 'startswith'"
-    with pytest.raises(AttributeError, match=msg):
-        data.apply(transform, axis=1)
+    result = df.apply(Series.describe, axis=1)
+    expected = DataFrame({i: v.describe() for i, v in df.T.items()}, columns=df.index).T
+    tm.assert_frame_equal(result, expected)
 
 
 def test_apply_bug():
@@ -525,7 +413,7 @@ def f(r):
 
 
 def test_apply_convert_objects():
-    data = DataFrame(
+    expected = DataFrame(
         {
             "A": [
                 "foo",
@@ -572,8 +460,8 @@ def test_apply_convert_objects():
         }
     )
 
-    result = data.apply(lambda x: x, axis=1)
-    tm.assert_frame_equal(result._convert(datetime=True), data)
+    result = expected.apply(lambda x: x, axis=1)._convert(datetime=True)
+    tm.assert_frame_equal(result, expected)
 
 
 def test_apply_attach_name(float_frame):
@@ -635,17 +523,17 @@ def test_applymap(float_frame):
     float_frame.applymap(type)
 
     # GH 465: function returning tuples
-    result = float_frame.applymap(lambda x: (x, x))
-    assert isinstance(result["A"][0], tuple)
+    result = float_frame.applymap(lambda x: (x, x))["A"][0]
+    assert isinstance(result, tuple)
 
     # GH 2909: object conversion to float in constructor?
     df = DataFrame(data=[1, "a"])
-    result = df.applymap(lambda x: x)
-    assert result.dtypes[0] == object
+    result = df.applymap(lambda x: x).dtypes[0]
+    assert result == object
 
     df = DataFrame(data=[1.0, "a"])
-    result = df.applymap(lambda x: x)
-    assert result.dtypes[0] == object
+    result = df.applymap(lambda x: x).dtypes[0]
+    assert result == object
 
     # GH 2786
     df = DataFrame(np.random.random((3, 4)))
@@ -672,10 +560,10 @@ def test_applymap(float_frame):
         DataFrame(index=list("ABC")),
         DataFrame({"A": [], "B": [], "C": []}),
     ]
-    for frame in empty_frames:
+    for expected in empty_frames:
         for func in [round, lambda x: x]:
-            result = frame.applymap(func)
-            tm.assert_frame_equal(result, frame)
+            result = expected.applymap(func)
+            tm.assert_frame_equal(result, expected)
 
 
 def test_applymap_na_ignore(float_frame):
@@ -743,7 +631,8 @@ def test_frame_apply_dont_convert_datetime64():
     df = df.applymap(lambda x: x + BDay())
     df = df.applymap(lambda x: x + BDay())
 
-    assert df.x1.dtype == "M8[ns]"
+    result = df.x1.dtype
+    assert result == "M8[ns]"
 
 
 def test_apply_non_numpy_dtype():
@@ -787,11 +676,13 @@ def apply_list(row):
 
 def test_apply_noreduction_tzaware_object():
     # https://github.com/pandas-dev/pandas/issues/31505
-    df = DataFrame({"foo": [Timestamp("2020", tz="UTC")]}, dtype="datetime64[ns, UTC]")
-    result = df.apply(lambda x: x)
-    tm.assert_frame_equal(result, df)
-    result = df.apply(lambda x: x.copy())
-    tm.assert_frame_equal(result, df)
+    expected = DataFrame(
+        {"foo": [Timestamp("2020", tz="UTC")]}, dtype="datetime64[ns, UTC]"
+    )
+    result = expected.apply(lambda x: x)
+    tm.assert_frame_equal(result, expected)
+    result = expected.apply(lambda x: x.copy())
+    tm.assert_frame_equal(result, expected)
 
 
 def test_apply_function_runs_once():
@@ -885,11 +776,11 @@ def test_infer_row_shape():
     # GH 17437
     # if row shape is changing, infer it
     df = DataFrame(np.random.rand(10, 2))
-    result = df.apply(np.fft.fft, axis=0)
-    assert result.shape == (10, 2)
+    result = df.apply(np.fft.fft, axis=0).shape
+    assert result == (10, 2)
 
-    result = df.apply(np.fft.rfft, axis=0)
-    assert result.shape == (6, 2)
+    result = df.apply(np.fft.rfft, axis=0).shape
+    assert result == (6, 2)
 
 
 def test_with_dictlike_columns():
@@ -1101,19 +992,6 @@ def test_result_type(int_frame_const_col):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("result_type", ["foo", 1])
-def test_result_type_error(result_type, int_frame_const_col):
-    # allowed result_type
-    df = int_frame_const_col
-
-    msg = (
-        "invalid value for result_type, must be one of "
-        "{None, 'reduce', 'broadcast', 'expand'}"
-    )
-    with pytest.raises(ValueError, match=msg):
-        df.apply(lambda x: [1, 2, 3], axis=1, result_type=result_type)
-
-
 @pytest.mark.parametrize(
     "box",
     [lambda x: list(x), lambda x: tuple(x), lambda x: np.array(x, dtype="int64")],
@@ -1170,20 +1048,6 @@ def test_agg_transform(axis, float_frame):
         tm.assert_frame_equal(result, expected)
 
 
-def test_transform_and_agg_err(axis, float_frame):
-    # cannot both transform and agg
-    msg = "cannot combine transform and aggregation operations"
-    with pytest.raises(ValueError, match=msg):
-        with np.errstate(all="ignore"):
-            float_frame.agg(["max", "sqrt"], axis=axis)
-
-    df = DataFrame({"A": range(5), "B": 5})
-
-    def f():
-        with np.errstate(all="ignore"):
-            df.agg({"A": ["abs", "sum"], "B": ["mean", "max"]}, axis=axis)
-
-
 def test_demo():
     # demonstration tests
     df = DataFrame({"A": range(5), "B": 5})
@@ -1254,16 +1118,6 @@ def test_agg_multiple_mixed_no_warning():
     tm.assert_frame_equal(result, expected)
 
 
-def test_agg_dict_nested_renaming_depr():
-
-    df = DataFrame({"A": range(5), "B": 5})
-
-    # nested renaming
-    msg = r"nested renamer is not supported"
-    with pytest.raises(SpecificationError, match=msg):
-        df.agg({"A": {"foo": "min"}, "B": {"bar": "max"}})
-
-
 def test_agg_reduce(axis, float_frame):
     other_axis = 1 if axis in {0, "index"} else 0
     name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values()
@@ -1516,19 +1370,6 @@ def test_agg_cython_table_transform(df, func, expected, axis):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize(
-    "df, func, expected",
-    tm.get_cython_table_params(
-        DataFrame([["a", "b"], ["b", "a"]]), [["cumprod", TypeError]]
-    ),
-)
-def test_agg_cython_table_raises(df, func, expected, axis):
-    # GH 21224
-    msg = "can't multiply sequence by non-int of type 'str'"
-    with pytest.raises(expected, match=msg):
-        df.agg(func, axis=axis)
-
-
 @pytest.mark.parametrize("axis", [0, 1])
 @pytest.mark.parametrize(
     "args, kwargs",
diff --git a/pandas/tests/apply/test_frame_apply_relabeling.py b/pandas/tests/apply/test_frame_apply_relabeling.py
index 732aff24428ac..2da4a78991f5a 100644
--- a/pandas/tests/apply/test_frame_apply_relabeling.py
+++ b/pandas/tests/apply/test_frame_apply_relabeling.py
@@ -1,5 +1,4 @@
 import numpy as np
-import pytest
 
 import pandas as pd
 import pandas._testing as tm
@@ -96,12 +95,3 @@ def test_agg_namedtuple():
         index=pd.Index(["foo", "bar", "cat"]),
     )
     tm.assert_frame_equal(result, expected)
-
-
-def test_agg_raises():
-    # GH 26513
-    df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
-    msg = "Must provide"
-
-    with pytest.raises(TypeError, match=msg):
-        df.agg()
diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py
index 1888ddd8ec4aa..5dc828dea9e35 100644
--- a/pandas/tests/apply/test_frame_transform.py
+++ b/pandas/tests/apply/test_frame_transform.py
@@ -1,5 +1,4 @@
 import operator
-import re
 
 import numpy as np
 import pytest
@@ -10,7 +9,6 @@
     Series,
 )
 import pandas._testing as tm
-from pandas.core.base import SpecificationError
 from pandas.core.groupby.base import transformation_kernels
 from pandas.tests.frame.common import zip_frames
 
@@ -103,6 +101,17 @@ def test_transform_dictlike(axis, float_frame, box):
     tm.assert_frame_equal(result, expected)
 
 
+def test_transform_dictlike_mixed():
+    # GH 40018 - mix of lists and non-lists in values of a dictionary
+    df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]})
+    result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
+    expected = DataFrame(
+        [[1.0, 1, 1.0], [2.0, 4, 2.0]],
+        columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize(
     "ops",
     [
@@ -148,47 +157,6 @@ def test_transform_method_name(method):
     tm.assert_frame_equal(result, expected)
 
 
-def test_transform_and_agg_err(axis, float_frame):
-    # GH 35964
-    # cannot both transform and agg
-    msg = "Function did not transform"
-    with pytest.raises(ValueError, match=msg):
-        float_frame.transform(["max", "min"], axis=axis)
-
-    msg = "Function did not transform"
-    with pytest.raises(ValueError, match=msg):
-        float_frame.transform(["max", "sqrt"], axis=axis)
-
-
-def test_agg_dict_nested_renaming_depr():
-    df = DataFrame({"A": range(5), "B": 5})
-
-    # nested renaming
-    msg = r"nested renamer is not supported"
-    with pytest.raises(SpecificationError, match=msg):
-        # mypy identifies the argument as an invalid type
-        df.transform({"A": {"foo": "min"}, "B": {"bar": "max"}})
-
-
-def test_transform_reducer_raises(all_reductions, frame_or_series):
-    # GH 35964
-    op = all_reductions
-
-    obj = DataFrame({"A": [1, 2, 3]})
-    if frame_or_series is not DataFrame:
-        obj = obj["A"]
-
-    msg = "Function did not transform"
-    with pytest.raises(ValueError, match=msg):
-        obj.transform(op)
-    with pytest.raises(ValueError, match=msg):
-        obj.transform([op])
-    with pytest.raises(ValueError, match=msg):
-        obj.transform({"A": op})
-    with pytest.raises(ValueError, match=msg):
-        obj.transform({"A": [op]})
-
-
 wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"]
 frame_kernels_raise = [x for x in frame_kernels if x not in wont_fail]
 
@@ -256,30 +224,6 @@ def f(x, a, b, c):
     frame_or_series([1]).transform(f, 0, *expected_args, **expected_kwargs)
 
 
-def test_transform_missing_columns(axis):
-    # GH#35964
-    df = DataFrame({"A": [1, 2], "B": [3, 4]})
-    match = re.escape("Column(s) ['C'] do not exist")
-    with pytest.raises(KeyError, match=match):
-        df.transform({"C": "cumsum"})
-
-
-def test_transform_none_to_type():
-    # GH#34377
-    df = DataFrame({"a": [None]})
-    msg = "Transform function failed"
-    with pytest.raises(ValueError, match=msg):
-        df.transform({"a": int})
-
-
-def test_transform_mixed_column_name_dtypes():
-    # GH39025
-    df = DataFrame({"a": ["1"]})
-    msg = r"Column\(s\) \[1, 'b'\] do not exist"
-    with pytest.raises(KeyError, match=msg):
-        df.transform({"a": int, 1: str, "b": int})
-
-
 def test_transform_empty_dataframe():
     # https://github.com/pandas-dev/pandas/issues/39636
     df = DataFrame([], columns=["col1", "col2"])
diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
index c67259d3c8194..5ad5390ab3e16 100644
--- a/pandas/tests/apply/test_invalid_arg.py
+++ b/pandas/tests/apply/test_invalid_arg.py
@@ -1,15 +1,48 @@
 # Tests specifically aimed at detecting bad arguments.
+# This file is organized by reason for exception.
+#     1. always invalid argument values
+#     2. missing column(s)
+#     3. incompatible ops/dtype/args/kwargs
+#     4. invalid result shape/type
+# If your test does not fit into one of these categories, add to this list.
+
 import re
 
+import numpy as np
 import pytest
 
 from pandas import (
     DataFrame,
     Series,
+    date_range,
+    notna,
 )
+import pandas._testing as tm
 from pandas.core.base import SpecificationError
 
 
+@pytest.mark.parametrize("result_type", ["foo", 1])
+def test_result_type_error(result_type, int_frame_const_col):
+    # allowed result_type
+    df = int_frame_const_col
+
+    msg = (
+        "invalid value for result_type, must be one of "
+        "{None, 'reduce', 'broadcast', 'expand'}"
+    )
+    with pytest.raises(ValueError, match=msg):
+        df.apply(lambda x: [1, 2, 3], axis=1, result_type=result_type)
+
+
+def test_agg_raises():
+    # GH 26513
+    df = DataFrame({"A": [0, 1], "B": [1, 2]})
+    msg = "Must provide"
+
+    with pytest.raises(TypeError, match=msg):
+        df.agg()
+
+
 @pytest.mark.parametrize("box", [DataFrame, Series])
 @pytest.mark.parametrize("method", ["apply", "agg", "transform"])
 @pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}])
@@ -21,6 +54,45 @@ def test_nested_renamer(box, method, func):
         getattr(obj, method)(func)
 
 
+def test_transform_nested_renamer():
+    # GH 35964
+    match = "nested renamer is not supported"
+    with pytest.raises(SpecificationError, match=match):
+        Series([1]).transform({"A": {"B": ["sum"]}})
+
+
+def test_agg_dict_nested_renaming_depr_agg():
+
+    df = DataFrame({"A": range(5), "B": 5})
+
+    # nested renaming
+    msg = r"nested renamer is not supported"
+    with pytest.raises(SpecificationError, match=msg):
+        df.agg({"A": {"foo": "min"}, "B": {"bar": "max"}})
+
+
+def test_agg_dict_nested_renaming_depr_transform():
+    df = DataFrame({"A": range(5), "B": 5})
+
+    # nested renaming
+    msg = r"nested renamer is not supported"
+    with pytest.raises(SpecificationError, match=msg):
+        # mypy identifies the argument as an invalid type
+        df.transform({"A": {"foo": "min"}, "B": {"bar": "max"}})
+
+
+def test_apply_dict_depr():
+
+    tsdf = DataFrame(
+        np.random.randn(10, 3),
+        columns=["A", "B", "C"],
+        index=date_range("1/1/2000", periods=10),
+    )
+    msg = "nested renamer is not supported"
+    with pytest.raises(SpecificationError, match=msg):
+        tsdf.A.agg({"foo": ["sum", "mean"]})
+
+
 @pytest.mark.parametrize("method", ["apply", "agg", "transform"])
 @pytest.mark.parametrize("func", [{"B": "sum"}, {"B": ["sum"]}])
 def test_missing_column(method, func):
@@ -29,3 +101,215 @@ def test_missing_column(method, func):
     match = re.escape("Column(s) ['B'] do not exist")
     with pytest.raises(KeyError, match=match):
         getattr(obj, method)(func)
+
+
+def test_transform_missing_columns(axis):
+    # GH#35964
+    df = DataFrame({"A": [1, 2], "B": [3, 4]})
+    match = re.escape("Column(s) ['C'] do not exist")
+    with pytest.raises(KeyError, match=match):
+        df.transform({"C": "cumsum"})
+
+
+def test_transform_mixed_column_name_dtypes():
+    # GH39025
+    df = DataFrame({"a": ["1"]})
+    msg = r"Column\(s\) \[1, 'b'\] do not exist"
+    with pytest.raises(KeyError, match=msg):
+        df.transform({"a": int, 1: str, "b": int})
+
+
+@pytest.mark.parametrize(
+    "how, args", [("pct_change", ()), ("nsmallest", (1, ["a", "b"])), ("tail", 1)]
+)
+def test_apply_str_axis_1_raises(how, args):
+    # GH 39211 - some ops don't support axis=1
+    df = DataFrame({"a": [1, 2], "b": [3, 4]})
+    msg = f"Operation {how} does not support axis=1"
+    with pytest.raises(ValueError, match=msg):
+        df.apply(how, axis=1, args=args)
+
+
+def test_transform_axis_1_raises():
+    # GH 35964
+    msg = "No axis named 1 for object type Series"
+    with pytest.raises(ValueError, match=msg):
+        Series([1]).transform("sum", axis=1)
+
+
+def test_apply_modify_traceback():
+    data = DataFrame(
+        {
+            "A": [
+                "foo",
+                "foo",
+                "foo",
+                "foo",
+                "bar",
+                "bar",
+                "bar",
+                "bar",
+                "foo",
+                "foo",
+                "foo",
+            ],
+            "B": [
+                "one",
+                "one",
+                "one",
+                "two",
+                "one",
+                "one",
+                "one",
+                "two",
+                "two",
+                "two",
+                "one",
+            ],
+            "C": [
+                "dull",
+                "dull",
+                "shiny",
+                "dull",
+                "dull",
+                "shiny",
+                "shiny",
+                "dull",
+                "shiny",
+                "shiny",
+                "shiny",
+            ],
+            "D": np.random.randn(11),
+            "E": np.random.randn(11),
+            "F": np.random.randn(11),
+        }
+    )
+
+    data.loc[4, "C"] = np.nan
+
+    def transform(row):
+        if row["C"].startswith("shin") and row["A"] == "foo":
+            row["D"] = 7
+        return row
+
+    def transform2(row):
+        if notna(row["C"]) and row["C"].startswith("shin") and row["A"] == "foo":
+            row["D"] = 7
+        return row
+
+    msg = "'float' object has no attribute 'startswith'"
+    with pytest.raises(AttributeError, match=msg):
+        data.apply(transform, axis=1)
+
+
+@pytest.mark.parametrize(
+    "df, func, expected",
+    tm.get_cython_table_params(
+        DataFrame([["a", "b"], ["b", "a"]]), [["cumprod", TypeError]]
+    ),
+)
+def test_agg_cython_table_raises(df, func, expected, axis):
+    # GH 21224
+    msg = "can't multiply sequence by non-int of type 'str'"
+    with pytest.raises(expected, match=msg):
+        df.agg(func, axis=axis)
+
+
+def test_transform_none_to_type():
+    # GH#34377
+    df = DataFrame({"a": [None]})
+    msg = "Transform function failed"
+    with pytest.raises(ValueError, match=msg):
+        df.transform({"a": int})
+
+
+def test_apply_broadcast_error(int_frame_const_col):
+    df = int_frame_const_col
+
+    # > 1 ndim
+    msg = "too many dims to broadcast"
+    with pytest.raises(ValueError, match=msg):
+        df.apply(
+            lambda x: np.array([1, 2]).reshape(-1, 2),
+            axis=1,
+            result_type="broadcast",
+        )
+
+    # cannot broadcast
+    msg = "cannot broadcast result"
+    with pytest.raises(ValueError, match=msg):
+        df.apply(lambda x: [1, 2], axis=1, result_type="broadcast")
+
+    with pytest.raises(ValueError, match=msg):
+        df.apply(lambda x: Series([1, 2]), axis=1, result_type="broadcast")
+
+
+def test_transform_and_agg_err_agg(axis, float_frame):
+    # cannot both transform and agg
+    msg = "cannot combine transform and aggregation operations"
+    with pytest.raises(ValueError, match=msg):
+        with np.errstate(all="ignore"):
+            float_frame.agg(["max", "sqrt"], axis=axis)
+
+    df = DataFrame({"A": range(5), "B": 5})
+
+    def f():
+        with np.errstate(all="ignore"):
+            df.agg({"A": ["abs", "sum"], "B": ["mean", "max"]}, axis=axis)
+
+
+def test_transform_and_agg_error_agg(string_series):
+    # we are trying to transform with an aggregator
+    msg = "cannot combine transform and aggregation"
+    with pytest.raises(ValueError, match=msg):
+        with np.errstate(all="ignore"):
+            string_series.agg(["sqrt", "max"])
+
+    msg = "cannot perform both aggregation and transformation"
+    with pytest.raises(ValueError, match=msg):
+        with np.errstate(all="ignore"):
+            string_series.agg({"foo": np.sqrt, "bar": "sum"})
+
+
+def test_transform_and_agg_err_transform(axis, float_frame):
+    # GH 35964
+    # cannot both transform and agg
+    msg = "Function did not transform"
+    with pytest.raises(ValueError, match=msg):
+        float_frame.transform(["max", "min"], axis=axis)
+
+    msg = "Function did not transform"
+    with pytest.raises(ValueError, match=msg):
+        float_frame.transform(["max", "sqrt"], axis=axis)
+
+
+def test_transform_reducer_raises(all_reductions, frame_or_series):
+    # GH 35964
+    op = all_reductions
+
+    obj = DataFrame({"A": [1, 2, 3]})
+    if frame_or_series is not DataFrame:
+        obj = obj["A"]
+
+    msg = "Function did not transform"
+    with pytest.raises(ValueError, match=msg):
+        obj.transform(op)
+    with pytest.raises(ValueError, match=msg):
+        obj.transform([op])
+    with pytest.raises(ValueError, match=msg):
+        obj.transform({"A": op})
+    with pytest.raises(ValueError, match=msg):
+        obj.transform({"A": [op]})
+
+
+def test_transform_wont_agg(string_series):
+    # GH 35964
+    # we are trying to transform with an aggregator
+    msg = "Function did not transform"
+    with pytest.raises(ValueError, match=msg):
+        string_series.transform(["min", "max"])
+
+    msg = "Function did not transform"
+    with pytest.raises(ValueError, match=msg):
+        with np.errstate(all="ignore"):
+            string_series.transform(["sqrt", "max"])
diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
index 19e6cda4ebd22..5d4a2e489e172 100644
--- a/pandas/tests/apply/test_series_apply.py
+++ b/pandas/tests/apply/test_series_apply.py
@@ -182,18 +182,6 @@ def f(x):
     tm.assert_series_equal(result, exp)
 
 
-def test_apply_dict_depr():
-
-    tsdf = DataFrame(
-        np.random.randn(10, 3),
-        columns=["A", "B", "C"],
-        index=pd.date_range("1/1/2000", periods=10),
-    )
-    msg = "nested renamer is not supported"
-    with pytest.raises(SpecificationError, match=msg):
-        tsdf.A.agg({"foo": ["sum", "mean"]})
-
-
 def test_apply_categorical():
     values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
     ser = Series(values, name="XX", index=list("abcdefg"))
@@ -269,19 +257,6 @@ def test_transform(string_series):
         tm.assert_series_equal(result.reindex_like(expected), expected)
 
 
-def test_transform_and_agg_error(string_series):
-    # we are trying to transform with an aggregator
-    msg = "cannot combine transform and aggregation"
-    with pytest.raises(ValueError, match=msg):
-        with np.errstate(all="ignore"):
-            string_series.agg(["sqrt", "max"])
-
-    msg = "cannot perform both aggregation and transformation"
-    with pytest.raises(ValueError, match=msg):
-        with np.errstate(all="ignore"):
-            string_series.agg({"foo": np.sqrt, "bar": "sum"})
-
-
 def test_demo():
     # demonstration tests
     s = Series(range(6), dtype="int64", name="series")
diff --git a/pandas/tests/apply/test_series_transform.py b/pandas/tests/apply/test_series_transform.py
index e67ea4f14e4ac..90065d20e1a59 100644
--- a/pandas/tests/apply/test_series_transform.py
+++ b/pandas/tests/apply/test_series_transform.py
@@ -2,11 +2,12 @@
 import pytest
 
 from pandas import (
+    DataFrame,
+    MultiIndex,
     Series,
     concat,
 )
 import pandas._testing as tm
-from pandas.core.base import SpecificationError
 from pandas.core.groupby.base import transformation_kernels
 
 # tshift only works on time index and is deprecated
@@ -55,28 +56,12 @@ def test_transform_dictlike(string_series, box):
     tm.assert_frame_equal(result, expected)
 
 
-def test_transform_wont_agg(string_series):
-    # GH 35964
-    # we are trying to transform with an aggregator
-    msg = "Function did not transform"
-    with pytest.raises(ValueError, match=msg):
-        string_series.transform(["min", "max"])
-
-    msg = "Function did not transform"
-    with pytest.raises(ValueError, match=msg):
-        with np.errstate(all="ignore"):
-            string_series.transform(["sqrt", "max"])
-
-
-def test_transform_axis_1_raises():
-    # GH 35964
-    msg = "No axis named 1 for object type Series"
-    with pytest.raises(ValueError, match=msg):
-        Series([1]).transform("sum", axis=1)
-
-
-def test_transform_nested_renamer():
-    # GH 35964
-    match = "nested renamer is not supported"
-    with pytest.raises(SpecificationError, match=match):
-        Series([1]).transform({"A": {"B": ["sum"]}})
+def test_transform_dictlike_mixed():
+    # GH 40018 - mix of lists and non-lists in values of a dictionary
+    df = Series([1, 4])
+    result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
+    expected = DataFrame(
+        [[1.0, 1, 1.0], [2.0, 4, 2.0]],
+        columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
index 7c144c390a128..93ba16c5fda22 100644
--- a/pandas/tests/arrays/categorical/test_constructors.py
+++ b/pandas/tests/arrays/categorical/test_constructors.py
@@ -42,6 +42,12 @@ def test_categorical_scalar_deprecated(self):
         with tm.assert_produces_warning(FutureWarning):
             Categorical("A", categories=["A", "B"])
 
+    def test_categorical_1d_only(self):
+        # ndim > 1
+        msg = "> 1 ndim Categorical are not supported at this time"
+        with pytest.raises(NotImplementedError, match=msg):
+            Categorical(np.array([list("abcd")]))
+
     def test_validate_ordered(self):
         # see gh-14058
         exp_msg = "'ordered' must either be 'True' or 'False'"
diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py
index 7ba4da8a5ede9..e674b49a99bd4 100644
--- a/pandas/tests/arrays/floating/test_arithmetic.py
+++ b/pandas/tests/arrays/floating/test_arithmetic.py
@@ -180,3 +180,24 @@ def test_cross_type_arithmetic():
     result = df.A + df.B
     expected = pd.Series([2, np.nan, np.nan], dtype="Float64")
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "source, neg_target, abs_target",
+    [
+        ([1.1, 2.2, 3.3], [-1.1, -2.2, -3.3], [1.1, 2.2, 3.3]),
+        ([1.1, 2.2, None], [-1.1, -2.2, None], [1.1, 2.2, None]),
+        ([-1.1, 0.0, 1.1], [1.1, 0.0, -1.1], [1.1, 0.0, 1.1]),
+    ],
+)
+def test_unary_float_operators(float_ea_dtype, source, neg_target, abs_target):
+    # GH38794
+    dtype = float_ea_dtype
+    arr = pd.array(source, dtype=dtype)
+    neg_result, pos_result, abs_result = -arr, +arr, abs(arr)
+    neg_target = pd.array(neg_target, dtype=dtype)
+    abs_target = pd.array(abs_target, dtype=dtype)
+
+    tm.assert_extension_array_equal(neg_result, neg_target)
+    tm.assert_extension_array_equal(pos_result, arr)
+    tm.assert_extension_array_equal(abs_result, abs_target)
diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py
index 0c1b10f66a73b..2eb88b669bcb1 100644
--- a/pandas/tests/arrays/integer/test_arithmetic.py
+++ b/pandas/tests/arrays/integer/test_arithmetic.py
@@ -284,36 +284,22 @@ def test_reduce_to_float(op):
 
 
 @pytest.mark.parametrize(
-    "source, target",
+    "source, neg_target, abs_target",
     [
-        ([1, 2, 3], [-1, -2, -3]),
-        ([1, 2, None], [-1, -2, None]),
-        ([-1, 0, 1], [1, 0, -1]),
+        ([1, 2, 3], [-1, -2, -3], [1, 2, 3]),
+        ([1, 2, None], [-1, -2, None], [1, 2, None]),
+        ([-1, 0, 1], [1, 0, -1], [1, 0, 1]),
     ],
 )
-def test_unary_minus_nullable_int(any_signed_nullable_int_dtype, source, target):
+def test_unary_int_operators(
+    any_signed_nullable_int_dtype, source, neg_target, abs_target
+):
     dtype = any_signed_nullable_int_dtype
     arr = pd.array(source, dtype=dtype)
-    result = -arr
-    expected = pd.array(target, dtype=dtype)
-    tm.assert_extension_array_equal(result, expected)
-
-
-@pytest.mark.parametrize("source", [[1, 2, 3], [1, 2, None], [-1, 0, 1]])
-def test_unary_plus_nullable_int(any_signed_nullable_int_dtype, source):
-    dtype = any_signed_nullable_int_dtype
-    expected = pd.array(source, dtype=dtype)
-    result = +expected
-    tm.assert_extension_array_equal(result, expected)
+    neg_result, pos_result, abs_result = -arr, +arr, abs(arr)
+    neg_target = pd.array(neg_target, dtype=dtype)
+    abs_target = pd.array(abs_target, dtype=dtype)
 
-
-@pytest.mark.parametrize(
-    "source, target",
-    [([1, 2, 3], [1, 2, 3]), ([1, -2, None], [1, 2, None]), ([-1, 0, 1], [1, 0, 1])],
-)
-def test_abs_nullable_int(any_signed_nullable_int_dtype, source, target):
-    dtype = any_signed_nullable_int_dtype
-    s = pd.array(source, dtype=dtype)
-    result = abs(s)
-    expected = pd.array(target, dtype=dtype)
-    tm.assert_extension_array_equal(result, expected)
+    tm.assert_extension_array_equal(neg_result, neg_target)
+    tm.assert_extension_array_equal(pos_result, arr)
+    tm.assert_extension_array_equal(abs_result, abs_target)
diff --git a/pandas/tests/arrays/masked/test_arithmetic.py b/pandas/tests/arrays/masked/test_arithmetic.py
index 1fc7f824c6daa..adb52fce17f8b 100644
--- a/pandas/tests/arrays/masked/test_arithmetic.py
+++ b/pandas/tests/arrays/masked/test_arithmetic.py
@@ -165,12 +165,14 @@ def test_error_len_mismatch(data, all_arithmetic_operators):
 
 
 @pytest.mark.parametrize("op", ["__neg__", "__abs__", "__invert__"])
-@pytest.mark.parametrize(
-    "values, dtype", [([1, 2, 3], "Int64"), ([True, False, True], "boolean")]
-)
-def test_unary_op_does_not_propagate_mask(op, values, dtype):
+def test_unary_op_does_not_propagate_mask(data, op, request):
     # https://github.com/pandas-dev/pandas/issues/39943
-    s = pd.Series(values, dtype=dtype)
+    data, _ = data
+    if data.dtype in ["Float32", "Float64"] and op == "__invert__":
+        request.node.add_marker(
+            pytest.mark.xfail(reason="invert is not implemented for float ea dtypes")
+        )
+    s = pd.Series(data)
     result = getattr(s, op)()
     expected = result.copy(deep=True)
     s[0] = None
diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
index 070dec307f527..87a095e1003c4 100644
--- a/pandas/tests/arrays/test_datetimelike.py
+++ b/pandas/tests/arrays/test_datetimelike.py
@@ -85,12 +85,10 @@ def arr1d(self):
         arr = self.array_cls(data, freq="D")
         return arr
 
-    def test_compare_len1_raises(self):
+    def test_compare_len1_raises(self, arr1d):
         # make sure we raise when comparing with different lengths, specific
         #  to the case where one has length-1, which numpy would broadcast
-        data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
-
-        arr = self.array_cls._simple_new(data, freq="D")
+        arr = arr1d
         idx = self.index_cls(arr)
 
         with pytest.raises(ValueError, match="Lengths must match"):
@@ -153,7 +151,9 @@ def test_take(self):
         data = np.arange(100, dtype="i8") * 24 * 3600 * 10 ** 9
         np.random.shuffle(data)
 
-        arr = self.array_cls._simple_new(data, freq="D")
+        freq = None if self.array_cls is not PeriodArray else "D"
+
+        arr = self.array_cls(data, freq=freq)
         idx = self.index_cls._simple_new(arr)
 
         takers = [1, 4, 94]
@@ -172,7 +172,7 @@ def test_take(self):
     def test_take_fill_raises(self, fill_value):
         data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
 
-        arr = self.array_cls._simple_new(data, freq="D")
+        arr = self.array_cls(data, freq="D")
 
         msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got"
         with pytest.raises(TypeError, match=msg):
@@ -181,7 +181,7 @@ def test_take_fill_raises(self, fill_value):
     def test_take_fill(self):
         data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
 
-        arr = self.array_cls._simple_new(data, freq="D")
+        arr = self.array_cls(data, freq="D")
 
         result = arr.take([-1, 1], allow_fill=True, fill_value=None)
         assert result[0] is pd.NaT
@@ -202,10 +202,8 @@ def test_take_fill_str(self, arr1d):
         with pytest.raises(TypeError, match=msg):
             arr1d.take([-1, 1], allow_fill=True, fill_value="foo")
 
-    def test_concat_same_type(self):
-        data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
-
-        arr = self.array_cls._simple_new(data, freq="D")
+    def test_concat_same_type(self, arr1d):
+        arr = arr1d
         idx = self.index_cls(arr)
         idx = idx.insert(0, pd.NaT)
         arr = self.array_cls(idx)
diff --git a/pandas/tests/base/test_constructors.py b/pandas/tests/base/test_constructors.py
index b042e29986c80..ceb882ff9c963 100644
--- a/pandas/tests/base/test_constructors.py
+++ b/pandas/tests/base/test_constructors.py
@@ -124,9 +124,7 @@ class TestConstruction:
         [
             Series,
             lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"],
-            pytest.param(
-                lambda x, **kwargs: DataFrame(x, **kwargs)[0], marks=pytest.mark.xfail
-            ),
+            lambda x, **kwargs: DataFrame(x, **kwargs)[0],
             Index,
         ],
     )
diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py
index fe271392122a2..72da93a5c4de3 100644
--- a/pandas/tests/dtypes/cast/test_construct_ndarray.py
+++ b/pandas/tests/dtypes/cast/test_construct_ndarray.py
@@ -19,3 +19,13 @@
 def test_construct_1d_ndarray_preserving_na(values, dtype, expected):
     result = construct_1d_ndarray_preserving_na(values, dtype=dtype)
     tm.assert_numpy_array_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]"])
+def test_construct_1d_ndarray_preserving_na_datetimelike(dtype):
+    arr = np.arange(5, dtype=np.int64).view(dtype)
+    expected = np.array(list(arr), dtype=object)
+    assert all(isinstance(x, type(arr[0])) for x in expected)
+
+    result = construct_1d_ndarray_preserving_na(arr, np.dtype(object))
+    tm.assert_numpy_array_equal(result, expected)
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index bf83085058cfc..ca311768dc2d9 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -821,7 +821,7 @@ class TestCategoricalDtypeParametrized:
             np.arange(1000),
             ["a", "b", 10, 2, 1.3, True],
             [True, False],
-            pd.date_range("2017", periods=4),
+            date_range("2017", periods=4),
         ],
     )
     def test_basic(self, categories, ordered):
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index 046256535df57..78a62c832833f 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -564,19 +564,35 @@ def test_maybe_convert_objects_datetime(self):
             [np.datetime64("2000-01-01"), np.timedelta64(1, "s")], dtype=object
         )
         exp = arr.copy()
-        out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1)
+        out = lib.maybe_convert_objects(
+            arr, convert_datetime=True, convert_timedelta=True
+        )
         tm.assert_numpy_array_equal(out, exp)
 
         arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object)
         exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]")
-        out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1)
+        out = lib.maybe_convert_objects(
+            arr, convert_datetime=True, convert_timedelta=True
+        )
         tm.assert_numpy_array_equal(out, exp)
 
         arr = np.array([np.timedelta64(1, "s"), np.nan], dtype=object)
         exp = arr.copy()
-        out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1)
+        out = lib.maybe_convert_objects(
+            arr, convert_datetime=True, convert_timedelta=True
+        )
         tm.assert_numpy_array_equal(out, exp)
 
+    def test_maybe_convert_objects_timedelta64_nat(self):
+        obj = np.timedelta64("NaT", "ns")
+        arr = np.array([obj], dtype=object)
+        assert arr[0] is obj
+
+        result = lib.maybe_convert_objects(arr, convert_timedelta=True)
+
+        expected = np.array([obj], dtype="m8[ns]")
+        tm.assert_numpy_array_equal(result, expected)
+
     @pytest.mark.parametrize(
         "exp",
         [
@@ -587,7 +603,7 @@ def test_maybe_convert_objects_datetime(self):
     def test_maybe_convert_objects_nullable_integer(self, exp):
         # GH27335
         arr = np.array([2, np.NaN], dtype=object)
-        result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=1)
+        result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=True)
 
         tm.assert_extension_array_equal(result, exp)
 
@@ -601,7 +617,7 @@ def test_maybe_convert_objects_bool_nan(self):
     def test_mixed_dtypes_remain_object_array(self):
         # GH14956
         array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object)
-        result = lib.maybe_convert_objects(array, convert_datetime=1)
+        result = lib.maybe_convert_objects(array, convert_datetime=True)
         tm.assert_numpy_array_equal(result, array)
 
 
@@ -792,7 +808,7 @@ def test_unicode(self):
             (object, None, True, "empty"),
         ],
     )
-    @pytest.mark.parametrize("box", [pd.Series, np.array])
+    @pytest.mark.parametrize("box", [Series, np.array])
     def test_object_empty(self, box, missing, dtype, skipna, expected):
         # GH 23421
         arr = box([missing, missing], dtype=dtype)
@@ -899,7 +915,7 @@ def test_infer_dtype_period(self):
         arr = np.array([Period("2011-01", freq="D"), Period("2011-02", freq="M")])
         assert lib.infer_dtype(arr, skipna=True) == "period"
 
-    @pytest.mark.parametrize("klass", [pd.array, pd.Series, pd.Index])
+    @pytest.mark.parametrize("klass", [pd.array, Series, Index])
     @pytest.mark.parametrize("skipna", [True, False])
     def test_infer_dtype_period_array(self, klass, skipna):
         # https://github.com/pandas-dev/pandas/issues/23553
@@ -1248,7 +1264,7 @@ def test_interval(self):
         inferred = lib.infer_dtype(Series(idx), skipna=False)
         assert inferred == "interval"
 
-    @pytest.mark.parametrize("klass", [pd.array, pd.Series])
+    @pytest.mark.parametrize("klass", [pd.array, Series])
     @pytest.mark.parametrize("skipna", [True, False])
     @pytest.mark.parametrize("data", [["a", "b", "c"], ["a", "b", pd.NA]])
     def test_string_dtype(self, data, skipna, klass):
@@ -1257,7 +1273,7 @@ def test_string_dtype(self, data, skipna, klass):
         inferred = lib.infer_dtype(val, skipna=skipna)
         assert inferred == "string"
 
-    @pytest.mark.parametrize("klass", [pd.array, pd.Series])
+    @pytest.mark.parametrize("klass", [pd.array, Series])
     @pytest.mark.parametrize("skipna", [True, False])
     @pytest.mark.parametrize("data", [[True, False, True], [True, False, pd.NA]])
     def test_boolean_dtype(self, data, skipna, klass):
diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py
index ecd56b5b61244..02bae02436d8c 100644
--- a/pandas/tests/dtypes/test_missing.py
+++ b/pandas/tests/dtypes/test_missing.py
@@ -205,16 +205,16 @@ def test_isna_datetime(self):
 
     def test_isna_old_datetimelike(self):
         # isna_old should work for dt64tz, td64, and period, not just tznaive
-        dti = pd.date_range("2016-01-01", periods=3)
+        dti = date_range("2016-01-01", periods=3)
         dta = dti._data
-        dta[-1] = pd.NaT
+        dta[-1] = NaT
         expected = np.array([False, False, True], dtype=bool)
 
         objs = [dta, dta.tz_localize("US/Eastern"), dta - dta, dta.to_period("D")]
 
         for obj in objs:
             with cf.option_context("mode.use_inf_as_na", True):
-                result = pd.isna(obj)
+                result = isna(obj)
 
             tm.assert_numpy_array_equal(result, expected)
 
@@ -320,38 +320,38 @@ def test_period(self):
     def test_decimal(self):
         # scalars GH#23530
         a = Decimal(1.0)
-        assert pd.isna(a) is False
-        assert pd.notna(a) is True
+        assert isna(a) is False
+        assert notna(a) is True
 
         b = Decimal("NaN")
-        assert pd.isna(b) is True
-        assert pd.notna(b) is False
+        assert isna(b) is True
+        assert notna(b) is False
 
         # array
         arr = np.array([a, b])
         expected = np.array([False, True])
-        result = pd.isna(arr)
+        result = isna(arr)
         tm.assert_numpy_array_equal(result, expected)
 
-        result = pd.notna(arr)
+        result = notna(arr)
         tm.assert_numpy_array_equal(result, ~expected)
 
         # series
         ser = Series(arr)
         expected = Series(expected)
-        result = pd.isna(ser)
+        result = isna(ser)
         tm.assert_series_equal(result, expected)
 
-        result = pd.notna(ser)
+        result = notna(ser)
         tm.assert_series_equal(result, ~expected)
 
         # index
         idx = pd.Index(arr)
         expected = np.array([False, True])
-        result = pd.isna(idx)
+        result = isna(idx)
         tm.assert_numpy_array_equal(result, expected)
 
-        result = pd.notna(idx)
+        result = notna(idx)
         tm.assert_numpy_array_equal(result, ~expected)
 
 
@@ -578,7 +578,7 @@ def _check_behavior(self, arr, expected):
             tm.assert_numpy_array_equal(result, expected)
 
     def test_basic(self):
-        arr = np.array([1, None, "foo", -5.1, pd.NaT, np.nan])
+        arr = np.array([1, None, "foo", -5.1, NaT, np.nan])
         expected = np.array([False, True, False, False, True, True])
 
         self._check_behavior(arr, expected)
diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
index 30b115b9dba6f..d93afef60561a 100644
--- a/pandas/tests/extension/base/groupby.py
+++ b/pandas/tests/extension/base/groupby.py
@@ -1,5 +1,7 @@
 import pytest
 
+from pandas.compat.numpy import is_numpy_dev
+
 import pandas as pd
 import pandas._testing as tm
 from pandas.tests.extension.base.base import BaseExtensionTests
@@ -73,6 +75,10 @@ def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         df.groupby("A").apply(groupby_apply_op)
         df.groupby("A").B.apply(groupby_apply_op)
 
+    # Non-strict bc these xpass on dt64tz, Period, Interval, JSON, PandasArray
+    @pytest.mark.xfail(
+        is_numpy_dev, reason="2021-03-02 #40144 expecting fix in numpy", strict=False
+    )
     def test_groupby_apply_identity(self, data_for_grouping):
         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
         result = df.groupby("A").B.apply(lambda x: x.array)
diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
index 3ef3beaa9c1b1..89991a459795e 100644
--- a/pandas/tests/extension/test_boolean.py
+++ b/pandas/tests/extension/test_boolean.py
@@ -16,6 +16,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat.numpy import is_numpy_dev
+
 import pandas as pd
 import pandas._testing as tm
 from pandas.core.arrays.boolean import BooleanDtype
@@ -320,6 +322,7 @@ def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         df.groupby("A").apply(groupby_apply_op)
         df.groupby("A").B.apply(groupby_apply_op)
 
+    @pytest.mark.xfail(is_numpy_dev, reason="2021-03-02 #40144 expecting fix in numpy")
     def test_groupby_apply_identity(self, data_for_grouping):
         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping})
         result = df.groupby("A").B.apply(lambda x: x.array)
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index d0a3ef17afdbc..49aee76e10f6a 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -26,6 +26,29 @@
 from pandas.tests.extension import base
 
 
+def split_array(arr):
+    if not isinstance(arr.dtype, ArrowStringDtype):
+        pytest.skip("chunked array n/a")
+
+    def _split_array(arr):
+        import pyarrow as pa
+
+        arrow_array = arr._data
+        split = len(arrow_array) // 2
+        arrow_array = pa.chunked_array(
+            [*arrow_array[:split].chunks, *arrow_array[split:].chunks]
+        )
+        assert arrow_array.num_chunks == 2
+        return type(arr)(arrow_array)
+
+    return _split_array(arr)
+
+
+@pytest.fixture(params=[True, False])
+def chunked(request):
+    return request.param
+
+
 @pytest.fixture(
     params=[
         StringDtype,
@@ -39,28 +62,32 @@ def dtype(request):
 
 
 @pytest.fixture
-def data(dtype):
+def data(dtype, chunked):
     strings = np.random.choice(list(string.ascii_letters), size=100)
     while strings[0] == strings[1]:
         strings = np.random.choice(list(string.ascii_letters), size=100)
 
-    return dtype.construct_array_type()._from_sequence(strings)
+    arr = dtype.construct_array_type()._from_sequence(strings)
+    return split_array(arr) if chunked else arr
 
 
 @pytest.fixture
-def data_missing(dtype):
+def data_missing(dtype, chunked):
     """Length 2 array with [NA, Valid]"""
-    return dtype.construct_array_type()._from_sequence([pd.NA, "A"])
+    arr = dtype.construct_array_type()._from_sequence([pd.NA, "A"])
+    return split_array(arr) if chunked else arr
 
 
 @pytest.fixture
-def data_for_sorting(dtype):
-    return dtype.construct_array_type()._from_sequence(["B", "C", "A"])
+def data_for_sorting(dtype, chunked):
+    arr = dtype.construct_array_type()._from_sequence(["B", "C", "A"])
+    return split_array(arr) if chunked else arr
 
 
 @pytest.fixture
-def data_missing_for_sorting(dtype):
-    return dtype.construct_array_type()._from_sequence(["B", pd.NA, "A"])
+def data_missing_for_sorting(dtype, chunked):
+    arr = dtype.construct_array_type()._from_sequence(["B", pd.NA, "A"])
+    return split_array(arr) if chunked else arr
 
 
 @pytest.fixture
@@ -69,10 +96,11 @@ def na_value():
 
 
 @pytest.fixture
-def data_for_grouping(dtype):
-    return dtype.construct_array_type()._from_sequence(
+def data_for_grouping(dtype, chunked):
+    arr = dtype.construct_array_type()._from_sequence(
         ["B", "B", pd.NA, pd.NA, "A", "A", "B", "C"]
     )
+    return split_array(arr) if chunked else arr
 
 
 class TestDtype(base.BaseDtypeTests):
diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py
index 0d36f3bd80e26..bc1007162884a 100644
--- a/pandas/tests/frame/constructors/test_from_records.py
+++ b/pandas/tests/frame/constructors/test_from_records.py
@@ -6,11 +6,13 @@
 import pytz
 
 from pandas.compat import is_platform_little_endian
+import pandas.util._test_decorators as td
 
 from pandas import (
     CategoricalIndex,
     DataFrame,
     Index,
+    Int64Index,
     Interval,
     RangeIndex,
     Series,
@@ -118,6 +120,8 @@ def test_from_records_sequencelike(self):
         tm.assert_series_equal(result["C"], df["C"])
         tm.assert_series_equal(result["E1"], df["E1"].astype("float64"))
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) empty from_records
+    def test_from_records_sequencelike_empty(self):
         # empty case
         result = DataFrame.from_records([], columns=["foo", "bar", "baz"])
         assert len(result) == 0
@@ -184,7 +188,12 @@ def test_from_records_bad_index_column(self):
         tm.assert_index_equal(df1.index, Index(df.C))
 
         # should fail
-        msg = r"Shape of passed values is \(10, 3\), indices imply \(1, 3\)"
+        msg = "|".join(
+            [
+                r"Shape of passed values is \(10, 3\), indices imply \(1, 3\)",
+                "Passed arrays should have the same length as the rows Index: 10 vs 1",
+            ]
+        )
         with pytest.raises(ValueError, match=msg):
             DataFrame.from_records(df, index=[2])
         with pytest.raises(KeyError, match=r"^2$"):
@@ -208,6 +217,7 @@ def __iter__(self):
         expected = DataFrame.from_records(tups)
         tm.assert_frame_equal(result, expected)
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) empty from_records
     def test_from_records_len0_with_columns(self):
         # GH#2633
         result = DataFrame.from_records([], index="foo", columns=["foo", "bar"])
@@ -259,7 +269,12 @@ def test_from_records_to_records(self):
         tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2))
 
         # wrong length
-        msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)"
+        msg = "|".join(
+            [
+                r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)",
+                "Passed arrays should have the same length as the rows Index: 2 vs 1",
+            ]
+        )
         with pytest.raises(ValueError, match=msg):
             DataFrame.from_records(arr, index=index[:-1])
 
@@ -386,6 +401,7 @@ def create_dict(order_id):
         result = DataFrame.from_records(documents, index=["order_id", "quantity"])
         assert result.index.names == ("order_id", "quantity")
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) empty from_records
     def test_from_records_misc_brokenness(self):
         # GH#2179
 
@@ -424,6 +440,7 @@ def test_from_records_misc_brokenness(self):
         )
         tm.assert_series_equal(result, expected)
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) empty from_records
     def test_from_records_empty(self):
         # GH#3562
         result = DataFrame.from_records([], columns=["a", "b", "c"])
@@ -437,11 +454,11 @@ def test_from_records_empty(self):
     def test_from_records_empty_with_nonempty_fields_gh3682(self):
         a = np.array([(1, 2)], dtype=[("id", np.int64), ("value", np.int64)])
         df = DataFrame.from_records(a, index="id")
-        tm.assert_index_equal(df.index, Index([1], name="id"))
-        assert df.index.name == "id"
-        tm.assert_index_equal(df.columns, Index(["value"]))
-
-        b = np.array([], dtype=[("id", np.int64), ("value", np.int64)])
-        df = DataFrame.from_records(b, index="id")
-        tm.assert_index_equal(df.index, Index([], name="id"))
-        assert df.index.name == "id"
+
+        ex_index = Int64Index([1], name="id")
+        expected = DataFrame({"value": [2]}, index=ex_index, columns=["value"])
+        tm.assert_frame_equal(df, expected)
+
+        b = a[:0]
+        df2 = DataFrame.from_records(b, index="id")
+        tm.assert_frame_equal(df2, df.iloc[:0])
diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
index f2edfed019bdb..9d61be5887b7e 100644
--- a/pandas/tests/frame/indexing/test_setitem.py
+++ b/pandas/tests/frame/indexing/test_setitem.py
@@ -421,19 +421,26 @@ def test_setitem_intervals(self):
 
         # B & D end up as Categoricals
         # the remainer are converted to in-line objects
-        # contining an IntervalIndex.values
+        # containing an IntervalIndex.values
         df["B"] = ser
         df["C"] = np.array(ser)
         df["D"] = ser.values
         df["E"] = np.array(ser.values)
+        df["F"] = ser.astype(object)
 
         assert is_categorical_dtype(df["B"].dtype)
         assert is_interval_dtype(df["B"].cat.categories)
         assert is_categorical_dtype(df["D"].dtype)
         assert is_interval_dtype(df["D"].cat.categories)
 
-        assert is_object_dtype(df["C"])
-        assert is_object_dtype(df["E"])
+        # Thes goes through the Series constructor and so get inferred back
+        #  to IntervalDtype
+        assert is_interval_dtype(df["C"])
+        assert is_interval_dtype(df["E"])
+
+        # But the Series constructor doesn't do inference on Series objects,
+        #  so setting df["F"] doesnt get cast back to IntervalDtype
+        assert is_object_dtype(df["F"])
 
         # they compare equal as Index
         # when converted to numpy objects
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index 8c11f659e8454..161fe7990a327 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 import pandas as pd
 from pandas import (
     Categorical,
@@ -92,7 +90,6 @@ def test_astype_mixed_type(self, mixed_type_frame):
         casted = mn.astype("O")
         _check_cast(casted, "object")
 
-    @td.skip_array_manager_not_yet_implemented
     def test_astype_with_exclude_string(self, float_frame):
         df = float_frame.copy()
         expected = float_frame.astype(int)
@@ -127,7 +124,6 @@ def test_astype_with_view_mixed_float(self, mixed_float_frame):
         casted = tf.astype(np.int64)
         casted = tf.astype(np.float32)  # noqa
 
-    @td.skip_array_manager_not_yet_implemented
     @pytest.mark.parametrize("dtype", [np.int32, np.int64])
     @pytest.mark.parametrize("val", [np.nan, np.inf])
     def test_astype_cast_nan_inf_int(self, val, dtype):
@@ -386,7 +382,6 @@ def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit):
 
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_array_manager_not_yet_implemented
     @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"])
     def test_astype_to_datetime_unit(self, unit):
         # tests all units from datetime origination
@@ -411,7 +406,6 @@ def test_astype_to_timedelta_unit_ns(self, unit):
 
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_array_manager_not_yet_implemented
     @pytest.mark.parametrize("unit", ["us", "ms", "s", "h", "m", "D"])
     def test_astype_to_timedelta_unit(self, unit):
         # coerce to float
@@ -441,7 +435,6 @@ def test_astype_to_incorrect_datetimelike(self, unit):
         with pytest.raises(TypeError, match=msg):
             df.astype(dtype)
 
-    @td.skip_array_manager_not_yet_implemented
     def test_astype_arg_for_errors(self):
         # GH#14878
 
@@ -570,7 +563,6 @@ def test_astype_empty_dtype_dict(self):
         tm.assert_frame_equal(result, df)
         assert result is not df
 
-    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) ignore keyword
     @pytest.mark.parametrize(
         "df",
         [
diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
index 58016be82c405..564481d01abc8 100644
--- a/pandas/tests/frame/methods/test_fillna.py
+++ b/pandas/tests/frame/methods/test_fillna.py
@@ -265,12 +265,13 @@ def test_fillna_dtype_conversion(self):
         expected = DataFrame("nan", index=range(3), columns=["A", "B"])
         tm.assert_frame_equal(result, expected)
 
-        # equiv of replace
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) object upcasting
+    @pytest.mark.parametrize("val", ["", 1, np.nan, 1.0])
+    def test_fillna_dtype_conversion_equiv_replace(self, val):
         df = DataFrame({"A": [1, np.nan], "B": [1.0, 2.0]})
-        for v in ["", 1, np.nan, 1.0]:
-            expected = df.replace(np.nan, v)
-            result = df.fillna(v)
-            tm.assert_frame_equal(result, expected)
+        expected = df.replace(np.nan, val)
+        result = df.fillna(val)
+        tm.assert_frame_equal(result, expected)
 
     @td.skip_array_manager_invalid_test
     def test_fillna_datetime_columns(self):
diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py
index 677d862dfe077..462d588aff58f 100644
--- a/pandas/tests/frame/methods/test_rename.py
+++ b/pandas/tests/frame/methods/test_rename.py
@@ -170,6 +170,7 @@ def test_rename_multiindex(self):
         renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0)
         tm.assert_index_equal(renamed.index, new_index)
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) setitem copy/view
     def test_rename_nocopy(self, float_frame):
         renamed = float_frame.rename(columns={"C": "foo"}, copy=False)
         renamed["foo"] = 1.0
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index 9ae5bb151b685..6d1e90e2f9646 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -783,6 +783,8 @@ def test_replace_mixed(self, float_string_frame):
         tm.assert_frame_equal(result, expected)
         tm.assert_frame_equal(result.replace(-1e8, np.nan), float_string_frame)
 
+    def test_replace_mixed_int_block_upcasting(self):
+
         # int block upcasting
         df = DataFrame(
             {
@@ -803,6 +805,8 @@ def test_replace_mixed(self, float_string_frame):
         assert return_value is None
         tm.assert_frame_equal(df, expected)
 
+    def test_replace_mixed_int_block_splitting(self):
+
         # int block splitting
         df = DataFrame(
             {
@@ -821,6 +825,8 @@ def test_replace_mixed(self, float_string_frame):
         result = df.replace(0, 0.5)
         tm.assert_frame_equal(result, expected)
 
+    def test_replace_mixed2(self):
+
         # to object block upcasting
         df = DataFrame(
             {
@@ -846,6 +852,7 @@ def test_replace_mixed(self, float_string_frame):
         result = df.replace([1, 2], ["foo", "bar"])
         tm.assert_frame_equal(result, expected)
 
+    def test_replace_mixed3(self):
         # test case from
         df = DataFrame(
             {"A": Series([3, 0], dtype="int64"), "B": Series([0, 3], dtype="int64")}
diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py
index 221296bfd6d76..5fa60b55f4e21 100644
--- a/pandas/tests/frame/methods/test_sort_index.py
+++ b/pandas/tests/frame/methods/test_sort_index.py
@@ -761,6 +761,23 @@ def test_sort_index_with_categories(self, categories):
         )
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "ascending",
+        [
+            None,
+            [True, None],
+            [False, "True"],
+        ],
+    )
+    def test_sort_index_ascending_bad_value_raises(self, ascending):
+        # GH 39434
+        df = DataFrame(np.arange(64))
+        length = len(df.index)
+        df.index = [(i - length / 2) % length for i in range(length)]
+        match = 'For argument "ascending" expected type bool'
+        with pytest.raises(ValueError, match=match):
+            df.sort_index(axis=0, ascending=ascending, na_position="first")
+
 
 class TestDataFrameSortIndexKey:
     def test_sort_multi_index_key(self):
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index afc7ccb516c7f..4342f1960f178 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -18,6 +18,7 @@
 import pytz
 
 from pandas.compat import np_version_under1p19
+import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.common import is_integer_dtype
 from pandas.core.dtypes.dtypes import (
@@ -163,7 +164,12 @@ def test_constructor_cast_failure(self):
         df["foo"] = np.ones((4, 2)).tolist()
 
         # this is not ok
-        msg = "Wrong number of items passed 2, placement implies 1"
+        msg = "|".join(
+            [
+                "Wrong number of items passed 2, placement implies 1",
+                "Expected a 1D array, got an array with shape \\(4, 2\\)",
+            ]
+        )
         with pytest.raises(ValueError, match=msg):
             df["test"] = np.ones((4, 2))
 
@@ -178,12 +184,15 @@ def test_constructor_dtype_copy(self):
         new_df["col1"] = 200.0
         assert orig_df["col1"][0] == 1.0
 
-    def test_constructor_dtype_nocast_view(self):
+    def test_constructor_dtype_nocast_view_dataframe(self):
         df = DataFrame([[1, 2]])
         should_be_view = DataFrame(df, dtype=df[0].dtype)
         should_be_view[0][0] = 99
         assert df.values[0, 0] == 99
 
+    @td.skip_array_manager_invalid_test  # TODO(ArrayManager) keep view on 2D array?
+    def test_constructor_dtype_nocast_view_2d_array(self):
+        df = DataFrame([[1, 2]])
         should_be_view = DataFrame(df.values, dtype=df[0].dtype)
         should_be_view[0][0] = 97
         assert df.values[0, 0] == 97
@@ -279,6 +288,7 @@ def test_constructor_rec(self, float_frame):
         tm.assert_index_equal(df2.columns, Index(rec.dtype.names))
         tm.assert_index_equal(df2.index, index)
 
+        # case with columns != the ones we would infer from the data
         rng = np.arange(len(rec))[::-1]
         df3 = DataFrame(rec, index=rng, columns=["C", "B"])
         expected = DataFrame(rec, index=rng).reindex(columns=["C", "B"])
@@ -376,15 +386,18 @@ def test_constructor_dict(self):
         with pytest.raises(ValueError, match=msg):
             DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]})
 
+    def test_constructor_dict_length1(self):
         # Length-one dict micro-optimization
         frame = DataFrame({"A": {"1": 1, "2": 2}})
         tm.assert_index_equal(frame.index, Index(["1", "2"]))
 
+    def test_constructor_dict_with_index(self):
         # empty dict plus index
         idx = Index([0, 1, 2])
         frame = DataFrame({}, index=idx)
         assert frame.index is idx
 
+    def test_constructor_dict_with_index_and_columns(self):
         # empty dict with index and columns
         idx = Index([0, 1, 2])
         frame = DataFrame({}, index=idx, columns=idx)
@@ -392,10 +405,12 @@ def test_constructor_dict(self):
         assert frame.columns is idx
         assert len(frame._series) == 3
 
+    def test_constructor_dict_of_empty_lists(self):
         # with dict of empty list and Series
         frame = DataFrame({"A": [], "B": []}, columns=["A", "B"])
         tm.assert_index_equal(frame.index, RangeIndex(0), exact=True)
 
+    def test_constructor_dict_with_none(self):
         # GH 14381
         # Dict with None value
         frame_none = DataFrame({"a": None}, index=[0])
@@ -404,6 +419,7 @@ def test_constructor_dict(self):
         assert frame_none_list._get_value(0, "a") is None
         tm.assert_frame_equal(frame_none, frame_none_list)
 
+    def test_constructor_dict_errors(self):
         # GH10856
         # dict with scalar values should raise error, even if columns passed
         msg = "If using all scalar values, you must pass an index"
@@ -559,7 +575,7 @@ def test_constructor_error_msgs(self):
         with pytest.raises(ValueError, match=msg):
             DataFrame({"a": False, "b": True})
 
-    def test_constructor_subclass_dict(self, float_frame, dict_subclass):
+    def test_constructor_subclass_dict(self, dict_subclass):
         # Test for passing dict subclass to constructor
         data = {
             "col1": dict_subclass((x, 10.0 * x) for x in range(10)),
@@ -573,6 +589,7 @@ def test_constructor_subclass_dict(self, float_frame, dict_subclass):
         df = DataFrame(data)
         tm.assert_frame_equal(refdf, df)
 
+    def test_constructor_defaultdict(self, float_frame):
         # try with defaultdict
         from collections import defaultdict
 
@@ -607,6 +624,7 @@ def test_constructor_dict_cast(self):
         assert frame["B"].dtype == np.object_
         assert frame["A"].dtype == np.float64
 
+    def test_constructor_dict_cast2(self):
         # can't cast to float
         test_data = {
             "A": dict(zip(range(20), tm.makeStringIndex(20))),
@@ -622,6 +640,7 @@ def test_constructor_dict_dont_upcast(self):
         df = DataFrame(d)
         assert isinstance(df["Col1"]["Row2"], float)
 
+    def test_constructor_dict_dont_upcast2(self):
         dm = DataFrame([[1, 2], ["a", "b"]], index=[1, 2], columns=[1, 2])
         assert isinstance(dm[1][1], int)
 
@@ -1100,7 +1119,8 @@ def test_constructor_more(self, float_frame):
 
         # can't cast
         mat = np.array(["foo", "bar"], dtype=object).reshape(2, 1)
-        with pytest.raises(ValueError, match="cast"):
+        msg = "could not convert string to float: 'foo'"
+        with pytest.raises(ValueError, match=msg):
             DataFrame(mat, index=[0, 1], columns=[0], dtype=float)
 
         dm = DataFrame(DataFrame(float_frame._series))
@@ -1168,7 +1188,8 @@ def test_constructor_unequal_length_nested_list_column(self):
         # GH 32173
         arrays = [list("abcd"), list("cde")]
 
-        msg = "Length of columns passed for MultiIndex columns is different"
+        # exception raised inside MultiIndex constructor
+        msg = "all arrays must be same length"
         with pytest.raises(ValueError, match=msg):
             DataFrame([[1, 2, 3, 4], [4, 5, 6, 7]], columns=arrays)
 
@@ -1192,6 +1213,7 @@ def __len__(self, n):
         expected = DataFrame([[1, "a"], [2, "b"]], columns=columns)
         tm.assert_frame_equal(result, expected, check_dtype=False)
 
+    def test_constructor_stdlib_array(self):
         # GH 4297
         # support Array
         import array
@@ -1721,12 +1743,15 @@ def test_constructor_with_datetimes(self):
         )
         tm.assert_series_equal(result, expected)
 
+    def test_constructor_with_datetimes1(self):
+
         # GH 2809
         ind = date_range(start="2000-01-01", freq="D", periods=10)
         datetimes = [ts.to_pydatetime() for ts in ind]
         datetime_s = Series(datetimes)
         assert datetime_s.dtype == "M8[ns]"
 
+    def test_constructor_with_datetimes2(self):
         # GH 2810
         ind = date_range(start="2000-01-01", freq="D", periods=10)
         datetimes = [ts.to_pydatetime() for ts in ind]
@@ -1740,6 +1765,7 @@ def test_constructor_with_datetimes(self):
         )
         tm.assert_series_equal(result, expected)
 
+    def test_constructor_with_datetimes3(self):
         # GH 7594
         # don't coerce tz-aware
         tz = pytz.timezone("US/Eastern")
@@ -1757,6 +1783,7 @@ def test_constructor_with_datetimes(self):
             df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"})
         )
 
+    def test_constructor_with_datetimes4(self):
         # tz-aware (UTC and other tz's)
         # GH 8411
         dr = date_range("20130101", periods=3)
@@ -1769,6 +1796,7 @@ def test_constructor_with_datetimes(self):
         df = DataFrame({"value": dr})
         assert str(df.iat[0, 0].tz) == "US/Eastern"
 
+    def test_constructor_with_datetimes5(self):
         # GH 7822
         # preserver an index with a tz on dict construction
         i = date_range("1/1/2011", periods=5, freq="10s", tz="US/Eastern")
@@ -1781,7 +1809,9 @@ def test_constructor_with_datetimes(self):
         df = DataFrame({"a": i})
         tm.assert_frame_equal(df, expected)
 
+    def test_constructor_with_datetimes6(self):
         # multiples
+        i = date_range("1/1/2011", periods=5, freq="10s", tz="US/Eastern")
         i_no_tz = date_range("1/1/2011", periods=5, freq="10s")
         df = DataFrame({"a": i, "b": i_no_tz})
         expected = DataFrame({"a": i.to_series().reset_index(drop=True), "b": i_no_tz})
@@ -1935,6 +1965,8 @@ def test_constructor_frame_copy(self, float_frame):
         assert (cop["A"] == 5).all()
         assert not (float_frame["A"] == 5).all()
 
+    # TODO(ArrayManager) keep view on 2D array?
+    @td.skip_array_manager_not_yet_implemented
     def test_constructor_ndarray_copy(self, float_frame):
         df = DataFrame(float_frame.values)
 
@@ -1945,6 +1977,8 @@ def test_constructor_ndarray_copy(self, float_frame):
         float_frame.values[6] = 6
         assert not (df.values[6] == 6).all()
 
+    # TODO(ArrayManager) keep view on Series?
+    @td.skip_array_manager_not_yet_implemented
     def test_constructor_series_copy(self, float_frame):
         series = float_frame._series
 
@@ -2058,17 +2092,15 @@ def test_from_nested_listlike_mixed_types(self):
 
     def test_construct_from_listlikes_mismatched_lengths(self):
         # invalid (shape)
-        msg = r"Shape of passed values is \(6, 2\), indices imply \(3, 2\)"
+        msg = "|".join(
+            [
+                r"Shape of passed values is \(6, 2\), indices imply \(3, 2\)",
+                "Passed arrays should have the same length as the rows Index",
+            ]
+        )
         with pytest.raises(ValueError, match=msg):
             DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))])
 
-    def test_categorical_1d_only(self):
-        # TODO: belongs in Categorical tests
-        # ndim > 1
-        msg = "> 1 ndim Categorical are not supported at this time"
-        with pytest.raises(NotImplementedError, match=msg):
-            Categorical(np.array([list("abcd")]))
-
     def test_constructor_categorical_series(self):
 
         items = [1, 2, 3, 1]
@@ -2110,6 +2142,8 @@ def test_check_dtype_empty_numeric_column(self, dtype):
 
         assert data.b.dtype == dtype
 
+    # TODO(ArrayManager) astype to bytes dtypes does not yet give object dtype
+    @td.skip_array_manager_not_yet_implemented
     @pytest.mark.parametrize(
         "dtype", tm.STRING_DTYPES + tm.BYTES_DTYPES + tm.OBJECT_DTYPES
     )
@@ -2213,7 +2247,8 @@ class DatetimeSubclass(datetime):
     def test_with_mismatched_index_length_raises(self):
         # GH#33437
         dti = date_range("2016-01-01", periods=3, tz="US/Pacific")
-        with pytest.raises(ValueError, match="Shape of passed values"):
+        msg = "Shape of passed values|Passed arrays should have the same length"
+        with pytest.raises(ValueError, match=msg):
             DataFrame(dti, index=range(4))
 
     def test_frame_ctor_datetime64_column(self):
@@ -2423,11 +2458,16 @@ def test_from_2d_ndarray_with_dtype(self):
         expected = DataFrame(array_dim2).astype("datetime64[ns, UTC]")
         tm.assert_frame_equal(df, expected)
 
-    def test_construction_from_set_raises(self):
+    @pytest.mark.parametrize("typ", [set, frozenset])
+    def test_construction_from_set_raises(self, typ):
         # https://github.com/pandas-dev/pandas/issues/32582
-        msg = "Set type is unordered"
+        values = typ({1, 2, 3})
+        msg = f"'{typ.__name__}' type is unordered"
+        with pytest.raises(TypeError, match=msg):
+            DataFrame({"a": values})
+
         with pytest.raises(TypeError, match=msg):
-            DataFrame({"a": {1, 2, 3}})
+            Series(values)
 
 
 def get1(obj):
diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py
index de8335738791d..cc036bb484ff9 100644
--- a/pandas/tests/groupby/test_allowlist.py
+++ b/pandas/tests/groupby/test_allowlist.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas import (
     DataFrame,
     Index,
@@ -355,7 +357,8 @@ def test_groupby_function_rename(mframe):
         "cummax",
         "cummin",
         "cumprod",
-        "describe",
+        # TODO(ArrayManager) quantile
+        pytest.param("describe", marks=td.skip_array_manager_not_yet_implemented),
         "rank",
         "quantile",
         "diff",
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
index 639fe308529dc..79ec0af267234 100644
--- a/pandas/tests/groupby/test_apply.py
+++ b/pandas/tests/groupby/test_apply.py
@@ -7,6 +7,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -84,6 +86,7 @@ def test_apply_trivial_fail():
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) fast_apply not used
 def test_fast_apply():
     # make sure that fast apply is correctly called
     # rather than raising any kind of error
@@ -110,7 +113,7 @@ def f(g):
 
     splitter = grouper._get_splitter(g._selected_obj, axis=g.axis)
     group_keys = grouper._get_group_keys()
-    sdata = splitter._get_sorted_data()
+    sdata = splitter.sorted_data
 
     values, mutated = splitter.fast_apply(f, sdata, group_keys)
 
@@ -213,6 +216,7 @@ def test_group_apply_once_per_group2(capsys):
     assert result == expected
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) fast_apply not used
 @pytest.mark.xfail(reason="GH-34998")
 def test_apply_fast_slow_identical():
     # GH 31613
@@ -233,6 +237,7 @@ def fast(group):
     tm.assert_frame_equal(fast_df, slow_df)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) fast_apply not used
 @pytest.mark.parametrize(
     "func",
     [
@@ -313,6 +318,7 @@ def test_groupby_as_index_apply(df):
     tm.assert_index_equal(res, ind)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_apply_concat_preserve_names(three_group):
     grouped = three_group.groupby(["A", "B"])
 
@@ -1003,9 +1009,10 @@ def test_apply_function_with_indexing_return_column():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail(reason="GH-34998")
-def test_apply_with_timezones_aware():
+def test_apply_with_timezones_aware(using_array_manager, request):
     # GH: 27212
+    if not using_array_manager:
+        request.node.add_marker(pytest.mark.xfail(reason="GH-34998"))
 
     dates = ["2001-01-01"] * 2 + ["2001-01-02"] * 2 + ["2001-01-03"] * 2
     index_no_tz = pd.DatetimeIndex(dates)
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index f0356ad90a3ff..a7247c2c04761 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -81,6 +83,7 @@ def get_stats(group):
     assert result.index.names[0] == "C"
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_basic():
 
     cats = Categorical(
@@ -276,7 +279,9 @@ def test_apply(ordered):
     tm.assert_series_equal(result, expected)
 
 
-def test_observed(observed):
+# TODO(ArrayManager) incorrect dtype for mean()
+@td.skip_array_manager_not_yet_implemented
+def test_observed(observed, using_array_manager):
     # multiple groupers, don't re-expand the output space
     # of the grouper
     # gh-14942 (implement)
@@ -535,6 +540,7 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
         assert False, msg
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_datetime():
     # GH9049: ensure backward compatibility
     levels = pd.date_range("2014-01-01", periods=4)
@@ -600,6 +606,7 @@ def test_categorical_index():
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_describe_categorical_columns():
     # GH 11558
     cats = CategoricalIndex(
@@ -614,6 +621,7 @@ def test_describe_categorical_columns():
     tm.assert_categorical_equal(result.stack().columns.values, cats.values)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_unstack_categorical():
     # GH11558 (example is taken from the original issue)
     df = DataFrame(
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index cab5417e81445..598465a951e0f 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -367,6 +367,7 @@ def test_mad(self, gb, gni):
         result = gni.mad()
         tm.assert_frame_equal(result, expected)
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
     def test_describe(self, df, gb, gni):
         # describe
         expected_index = Index([1, 3], name="A")
@@ -923,11 +924,13 @@ def test_is_monotonic_decreasing(in_vals, out_vals):
 # --------------------------------
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_apply_describe_bug(mframe):
     grouped = mframe.groupby(level="first")
     grouped.describe()  # it works!
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_series_describe_multikey():
     ts = tm.makeTimeSeries()
     grouped = ts.groupby([lambda x: x.year, lambda x: x.month])
@@ -937,6 +940,7 @@ def test_series_describe_multikey():
     tm.assert_series_equal(result["min"], grouped.min(), check_names=False)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_series_describe_single():
     ts = tm.makeTimeSeries()
     grouped = ts.groupby(lambda x: x.month)
@@ -951,6 +955,7 @@ def test_series_index_name(df):
     assert result.index.name == "A"
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_frame_describe_multikey(tsframe):
     grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
     result = grouped.describe()
@@ -973,6 +978,7 @@ def test_frame_describe_multikey(tsframe):
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_frame_describe_tupleindex():
 
     # GH 14848 - regression from 0.19.0 to 0.19.1
@@ -992,6 +998,7 @@ def test_frame_describe_tupleindex():
         df2.groupby("key").describe()
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_frame_describe_unstacked_format():
     # GH 4792
     prices = {
@@ -1018,6 +1025,7 @@ def test_frame_describe_unstacked_format():
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 @pytest.mark.filterwarnings(
     "ignore:"
     "indexing past lexsort depth may impact performance:"
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index afde1daca74c1..8cbb9d2443cb2 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -7,6 +7,7 @@
 
 from pandas.compat import IS64
 from pandas.errors import PerformanceWarning
+import pandas.util._test_decorators as td
 
 import pandas as pd
 from pandas import (
@@ -210,6 +211,7 @@ def f(grp):
     tm.assert_series_equal(result, e)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_pass_args_kwargs(ts, tsframe):
     def f(x, q=None, axis=0):
         return np.percentile(x, q, axis=axis)
@@ -364,6 +366,7 @@ def f3(x):
         df2.groupby("a").apply(f3)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 def test_attr_wrapper(ts):
     grouped = ts.groupby(lambda x: x.weekday())
 
diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py
index 9c9d1aa881890..2924348e98b56 100644
--- a/pandas/tests/groupby/test_quantile.py
+++ b/pandas/tests/groupby/test_quantile.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -8,6 +10,9 @@
 )
 import pandas._testing as tm
 
+# TODO(ArrayManager) quantile
+pytestmark = td.skip_array_manager_not_yet_implemented
+
 
 @pytest.mark.parametrize(
     "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"]
diff --git a/pandas/tests/groupby/test_sample.py b/pandas/tests/groupby/test_sample.py
index 4b8b0173789ae..652a5fc1a3c34 100644
--- a/pandas/tests/groupby/test_sample.py
+++ b/pandas/tests/groupby/test_sample.py
@@ -132,3 +132,13 @@ def test_groupby_sample_with_weights(index, expected_index):
     result = df.groupby("a")["b"].sample(n=2, replace=True, weights=[1, 0, 1, 0])
     expected = Series(values, name="b", index=Index(expected_index))
     tm.assert_series_equal(result, expected)
+
+
+def test_groupby_sample_with_selections():
+    # GH 39928
+    values = [1] * 10 + [2] * 10
+    df = DataFrame({"a": values, "b": values, "c": values})
+
+    result = df.groupby("a")[["b", "c"]].sample(n=None, frac=None)
+    expected = DataFrame({"b": [1, 2], "c": [1, 2]}, index=result.index)
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 4956454ef2d4f..c4621d5fc0f8c 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -4,6 +4,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas.core.dtypes.common import (
     ensure_platform_int,
     is_timedelta64_dtype,
@@ -161,8 +163,13 @@ def test_transform_broadcast(tsframe, ts):
             assert_fp_equal(res.xs(idx), agged[idx])
 
 
-def test_transform_axis_1(request, transformation_func):
+def test_transform_axis_1(request, transformation_func, using_array_manager):
     # GH 36308
+    if using_array_manager and transformation_func == "pct_change":
+        # TODO(ArrayManager) column-wise shift
+        request.node.add_marker(
+            pytest.mark.xfail(reason="ArrayManager: shift axis=1 not yet implemented")
+        )
     warn = None
     if transformation_func == "tshift":
         warn = FutureWarning
@@ -183,6 +190,8 @@ def test_transform_axis_1(request, transformation_func):
     tm.assert_equal(result, expected)
 
 
+# TODO(ArrayManager) groupby().transform returns DataFrame backed by BlockManager
+@td.skip_array_manager_not_yet_implemented
 def test_transform_axis_ts(tsframe):
 
     # make sure that we are setting the axes
diff --git a/pandas/tests/indexes/categorical/test_append.py b/pandas/tests/indexes/categorical/test_append.py
new file mode 100644
index 0000000000000..b48c3219f5111
--- /dev/null
+++ b/pandas/tests/indexes/categorical/test_append.py
@@ -0,0 +1,62 @@
+import pytest
+
+from pandas import (
+    CategoricalIndex,
+    Index,
+)
+import pandas._testing as tm
+
+
+class TestAppend:
+    @pytest.fixture
+    def ci(self):
+        categories = list("cab")
+        return CategoricalIndex(list("aabbca"), categories=categories, ordered=False)
+
+    def test_append(self, ci):
+        # append cats with the same categories
+        result = ci[:3].append(ci[3:])
+        tm.assert_index_equal(result, ci, exact=True)
+
+        foos = [ci[:1], ci[1:3], ci[3:]]
+        result = foos[0].append(foos[1:])
+        tm.assert_index_equal(result, ci, exact=True)
+
+    def test_append_empty(self, ci):
+        # empty
+        result = ci.append([])
+        tm.assert_index_equal(result, ci, exact=True)
+
+    def test_append_mismatched_categories(self, ci):
+        # appending with different categories or reordered is not ok
+        msg = "all inputs must be Index"
+        with pytest.raises(TypeError, match=msg):
+            ci.append(ci.values.set_categories(list("abcd")))
+        with pytest.raises(TypeError, match=msg):
+            ci.append(ci.values.reorder_categories(list("abc")))
+
+    def test_append_category_objects(self, ci):
+        # with objects
+        result = ci.append(Index(["c", "a"]))
+        expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories)
+        tm.assert_index_equal(result, expected, exact=True)
+
+    def test_append_non_categories(self, ci):
+        # invalid objects -> cast to object via concat_compat
+        result = ci.append(Index(["a", "d"]))
+        expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"])
+        tm.assert_index_equal(result, expected, exact=True)
+
+    def test_append_object(self, ci):
+        # GH#14298 - if base object is not categorical -> coerce to object
+        result = Index(["c", "a"]).append(ci)
+        expected = Index(list("caaabbca"))
+        tm.assert_index_equal(result, expected, exact=True)
+
+    def test_append_to_another(self):
+        # hits Index._concat
+        fst = Index(["a", "b"])
+        snd = CategoricalIndex(["d", "e"])
+        result = fst.append(snd)
+        expected = Index(["a", "b", "d", "e"])
+        tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py
index 8c9caf2e59011..d3c9b02b3ba23 100644
--- a/pandas/tests/indexes/categorical/test_category.py
+++ b/pandas/tests/indexes/categorical/test_category.py
@@ -30,53 +30,6 @@ def test_can_hold_identifiers(self):
         key = idx[0]
         assert idx._can_hold_identifiers_and_holds_name(key) is True
 
-    def test_append(self):
-
-        ci = self.create_index()
-        categories = ci.categories
-
-        # append cats with the same categories
-        result = ci[:3].append(ci[3:])
-        tm.assert_index_equal(result, ci, exact=True)
-
-        foos = [ci[:1], ci[1:3], ci[3:]]
-        result = foos[0].append(foos[1:])
-        tm.assert_index_equal(result, ci, exact=True)
-
-        # empty
-        result = ci.append([])
-        tm.assert_index_equal(result, ci, exact=True)
-
-        # appending with different categories or reordered is not ok
-        msg = "all inputs must be Index"
-        with pytest.raises(TypeError, match=msg):
-            ci.append(ci.values.set_categories(list("abcd")))
-        with pytest.raises(TypeError, match=msg):
-            ci.append(ci.values.reorder_categories(list("abc")))
-
-        # with objects
-        result = ci.append(Index(["c", "a"]))
-        expected = CategoricalIndex(list("aabbcaca"), categories=categories)
-        tm.assert_index_equal(result, expected, exact=True)
-
-        # invalid objects -> cast to object via concat_compat
-        result = ci.append(Index(["a", "d"]))
-        expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"])
-        tm.assert_index_equal(result, expected, exact=True)
-
-        # GH14298 - if base object is not categorical -> coerce to object
-        result = Index(["c", "a"]).append(ci)
-        expected = Index(list("caaabbca"))
-        tm.assert_index_equal(result, expected, exact=True)
-
-    def test_append_to_another(self):
-        # hits Index._concat
-        fst = Index(["a", "b"])
-        snd = CategoricalIndex(["d", "e"])
-        result = fst.append(snd)
-        expected = Index(["a", "b", "d", "e"])
-        tm.assert_index_equal(result, expected)
-
     def test_insert(self):
 
         ci = self.create_index()
@@ -97,10 +50,10 @@ def test_insert(self):
         expected = CategoricalIndex(["a"], categories=categories)
         tm.assert_index_equal(result, expected, exact=True)
 
-        # invalid
-        msg = "'fill_value=d' is not present in this Categorical's categories"
-        with pytest.raises(TypeError, match=msg):
-            ci.insert(0, "d")
+        # invalid -> cast to object
+        expected = ci.astype(object).insert(0, "d")
+        result = ci.insert(0, "d")
+        tm.assert_index_equal(result, expected, exact=True)
 
         # GH 18295 (test missing)
         expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"])
@@ -110,9 +63,9 @@ def test_insert(self):
 
     def test_insert_na_mismatched_dtype(self):
         ci = CategoricalIndex([0, 1, 1])
-        msg = "'fill_value=NaT' is not present in this Categorical's categories"
-        with pytest.raises(TypeError, match=msg):
-            ci.insert(0, pd.NaT)
+        result = ci.insert(0, pd.NaT)
+        expected = Index([pd.NaT, 0, 1, 1], dtype=object)
+        tm.assert_index_equal(result, expected)
 
     def test_delete(self):
 
@@ -326,12 +279,6 @@ def test_map_str(self):
 class TestCategoricalIndex2:
     # Tests that are not overriding a test in Base
 
-    def test_format_different_scalar_lengths(self):
-        # GH35439
-        idx = CategoricalIndex(["aaaaaaaaa", "b"])
-        expected = ["aaaaaaaaa", "b"]
-        assert idx.format() == expected
-
     @pytest.mark.parametrize(
         "dtype, engine_type",
         [
diff --git a/pandas/tests/indexes/categorical/test_formats.py b/pandas/tests/indexes/categorical/test_formats.py
index 0f1cb55b9811c..2009d78e47c1c 100644
--- a/pandas/tests/indexes/categorical/test_formats.py
+++ b/pandas/tests/indexes/categorical/test_formats.py
@@ -7,6 +7,12 @@
 
 
 class TestCategoricalIndexRepr:
+    def test_format_different_scalar_lengths(self):
+        # GH#35439
+        idx = CategoricalIndex(["aaaaaaaaa", "b"])
+        expected = ["aaaaaaaaa", "b"]
+        assert idx.format() == expected
+
     def test_string_categorical_index_repr(self):
         # short
         idx = CategoricalIndex(["a", "bb", "ccc"])
diff --git a/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py b/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py
new file mode 100644
index 0000000000000..c56fc84b540c0
--- /dev/null
+++ b/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py
@@ -0,0 +1,80 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    PeriodIndex,
+    Series,
+    date_range,
+    period_range,
+    timedelta_range,
+)
+import pandas._testing as tm
+
+
+class DropDuplicates:
+    def test_drop_duplicates_metadata(self, idx):
+        # GH#10115
+        result = idx.drop_duplicates()
+        tm.assert_index_equal(idx, result)
+        assert idx.freq == result.freq
+
+        idx_dup = idx.append(idx)
+        result = idx_dup.drop_duplicates()
+
+        expected = idx
+        if not isinstance(idx, PeriodIndex):
+            # freq is reset except for PeriodIndex
+            assert idx_dup.freq is None
+            assert result.freq is None
+            expected = idx._with_freq(None)
+        else:
+            assert result.freq == expected.freq
+
+        tm.assert_index_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "keep, expected, index",
+        [
+            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
+            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
+            (
+                False,
+                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
+                np.arange(5, 10),
+            ),
+        ],
+    )
+    def test_drop_duplicates(self, keep, expected, index, idx):
+        # to check Index/Series compat
+        idx = idx.append(idx[:5])
+
+        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
+        expected = idx[~expected]
+
+        result = idx.drop_duplicates(keep=keep)
+        tm.assert_index_equal(result, expected)
+
+        result = Series(idx).drop_duplicates(keep=keep)
+        tm.assert_series_equal(result, Series(expected, index=index))
+
+
+class TestDropDuplicatesPeriodIndex(DropDuplicates):
+    @pytest.fixture(params=["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
+    def freq(self, request):
+        return request.param
+
+    @pytest.fixture
+    def idx(self, freq):
+        return period_range("2011-01-01", periods=10, freq=freq, name="idx")
+
+
+class TestDropDuplicatesDatetimeIndex(DropDuplicates):
+    @pytest.fixture
+    def idx(self, freq_sample):
+        return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
+
+
+class TestDropDuplicatesTimedeltaIndex(DropDuplicates):
+    @pytest.fixture
+    def idx(self, freq_sample):
+        return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
diff --git a/pandas/tests/indexes/datetimelike_/test_nat.py b/pandas/tests/indexes/datetimelike_/test_nat.py
new file mode 100644
index 0000000000000..b4a72ec65bd91
--- /dev/null
+++ b/pandas/tests/indexes/datetimelike_/test_nat.py
@@ -0,0 +1,54 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    DatetimeIndex,
+    NaT,
+    PeriodIndex,
+    TimedeltaIndex,
+)
+import pandas._testing as tm
+
+
+class NATests:
+    def test_nat(self, index_without_na):
+        empty_index = index_without_na[:0]
+
+        index_with_na = index_without_na.copy(deep=True)
+        index_with_na._data[1] = NaT
+
+        assert type(index_without_na)._na_value is NaT
+        assert empty_index._na_value is NaT
+        assert index_with_na._na_value is NaT
+        assert index_without_na._na_value is NaT
+
+        idx = index_without_na
+        assert idx._can_hold_na
+
+        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
+        assert idx.hasnans is False
+
+        idx = index_with_na
+        assert idx._can_hold_na
+
+        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
+        assert idx.hasnans is True
+
+
+class TestDatetimeIndexNA(NATests):
+    @pytest.fixture
+    def index_without_na(self, tz_naive_fixture):
+        tz = tz_naive_fixture
+        return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
+
+
+class TestTimedeltaIndexNA(NATests):
+    @pytest.fixture
+    def index_without_na(self):
+        return TimedeltaIndex(["1 days", "2 days"])
+
+
+class TestPeriodIndexNA(NATests):
+    @pytest.fixture
+    def index_without_na(self):
+        return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
diff --git a/pandas/tests/indexes/datetimelike_/test_sort_values.py b/pandas/tests/indexes/datetimelike_/test_sort_values.py
new file mode 100644
index 0000000000000..ad9c5ca848615
--- /dev/null
+++ b/pandas/tests/indexes/datetimelike_/test_sort_values.py
@@ -0,0 +1,317 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    DatetimeIndex,
+    Index,
+    NaT,
+    PeriodIndex,
+    TimedeltaIndex,
+    timedelta_range,
+)
+import pandas._testing as tm
+
+
+def check_freq_ascending(ordered, orig, ascending):
+    """
+    Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
+    when the original index is generated (or generate-able) with
+    period_range/date_range/timedelta_range.
+    """
+    if isinstance(ordered, PeriodIndex):
+        assert ordered.freq == orig.freq
+    elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
+        if ascending:
+            assert ordered.freq.n == orig.freq.n
+        else:
+            assert ordered.freq.n == -1 * orig.freq.n
+
+
+def check_freq_nonmonotonic(ordered, orig):
+    """
+    Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
+    when the original index is _not_ generated (or generate-able) with
+    period_range/date_range//timedelta_range.
+    """
+    if isinstance(ordered, PeriodIndex):
+        assert ordered.freq == orig.freq
+    elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
+        assert ordered.freq is None
+
+
+class TestSortValues:
+    @pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex])
+    def non_monotonic_idx(self, request):
+        if request.param is DatetimeIndex:
+            return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
+        elif request.param is PeriodIndex:
+            dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
+            return dti.to_period("D")
+        else:
+            return TimedeltaIndex(
+                ["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"]
+            )
+
+    def test_argmin_argmax(self, non_monotonic_idx):
+        assert non_monotonic_idx.argmin() == 1
+        assert non_monotonic_idx.argmax() == 0
+
+    def test_sort_values(self, non_monotonic_idx):
+        idx = non_monotonic_idx
+        ordered = idx.sort_values()
+        assert ordered.is_monotonic
+
+        ordered = idx.sort_values(ascending=False)
+        assert ordered[::-1].is_monotonic
+
+        ordered, dexer = idx.sort_values(return_indexer=True)
+        assert ordered.is_monotonic
+        tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
+
+        ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
+        assert ordered[::-1].is_monotonic
+        tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
+
+    def check_sort_values_with_freq(self, idx):
+        ordered = idx.sort_values()
+        tm.assert_index_equal(ordered, idx)
+        check_freq_ascending(ordered, idx, True)
+
+        ordered = idx.sort_values(ascending=False)
+        expected = idx[::-1]
+        tm.assert_index_equal(ordered, expected)
+        check_freq_ascending(ordered, idx, False)
+
+        ordered, indexer = idx.sort_values(return_indexer=True)
+        tm.assert_index_equal(ordered, idx)
+        tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp))
+        check_freq_ascending(ordered, idx, True)
+
+        ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
+        expected = idx[::-1]
+        tm.assert_index_equal(ordered, expected)
+        tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp))
+        check_freq_ascending(ordered, idx, False)
+
+    @pytest.mark.parametrize("freq", ["D", "H"])
+    def test_sort_values_with_freq_timedeltaindex(self, freq):
+        # GH#10295
+        idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx")
+
+        self.check_sort_values_with_freq(idx)
+
+    @pytest.mark.parametrize(
+        "idx",
+        [
+            DatetimeIndex(
+                ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
+            ),
+            DatetimeIndex(
+                ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
+                freq="H",
+                name="tzidx",
+                tz="Asia/Tokyo",
+            ),
+        ],
+    )
+    def test_sort_values_with_freq_datetimeindex(self, idx):
+        self.check_sort_values_with_freq(idx)
+
+    @pytest.mark.parametrize("freq", ["D", "2D", "4D"])
+    def test_sort_values_with_freq_periodindex(self, freq):
+        # here with_freq refers to being period_range-like
+        idx = PeriodIndex(
+            ["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
+        )
+        self.check_sort_values_with_freq(idx)
+
+    @pytest.mark.parametrize(
+        "idx",
+        [
+            PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A"),
+            Index([2011, 2012, 2013], name="idx"),  # for compatibility check
+        ],
+    )
+    def test_sort_values_with_freq_periodindex2(self, idx):
+        # here with_freq indicates this is period_range-like
+        self.check_sort_values_with_freq(idx)
+
+    def check_sort_values_without_freq(self, idx, expected):
+
+        ordered = idx.sort_values(na_position="first")
+        tm.assert_index_equal(ordered, expected)
+        check_freq_nonmonotonic(ordered, idx)
+
+        if not idx.isna().any():
+            ordered = idx.sort_values()
+            tm.assert_index_equal(ordered, expected)
+            check_freq_nonmonotonic(ordered, idx)
+
+        ordered = idx.sort_values(ascending=False)
+        tm.assert_index_equal(ordered, expected[::-1])
+        check_freq_nonmonotonic(ordered, idx)
+
+        ordered, indexer = idx.sort_values(return_indexer=True, na_position="first")
+        tm.assert_index_equal(ordered, expected)
+
+        exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
+        tm.assert_numpy_array_equal(indexer, exp)
+        check_freq_nonmonotonic(ordered, idx)
+
+        if not idx.isna().any():
+            ordered, indexer = idx.sort_values(return_indexer=True)
+            tm.assert_index_equal(ordered, expected)
+
+            exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
+            tm.assert_numpy_array_equal(indexer, exp)
+            check_freq_nonmonotonic(ordered, idx)
+
+        ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
+        tm.assert_index_equal(ordered, expected[::-1])
+
+        exp = np.array([2, 1, 3, 0, 4], dtype=np.intp)
+        tm.assert_numpy_array_equal(indexer, exp)
+        check_freq_nonmonotonic(ordered, idx)
+
+    def test_sort_values_without_freq_timedeltaindex(self):
+        # GH#10295
+
+        idx = TimedeltaIndex(
+            ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
+        )
+        expected = TimedeltaIndex(
+            ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
+        )
+        self.check_sort_values_without_freq(idx, expected)
+
+    @pytest.mark.parametrize(
+        "index_dates,expected_dates",
+        [
+            (
+                ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
+                ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
+            ),
+            (
+                ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
+                ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
+            ),
+            (
+                [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
+                [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
+            ),
+        ],
+    )
+    def test_sort_values_without_freq_datetimeindex(
+        self, index_dates, expected_dates, tz_naive_fixture
+    ):
+        tz = tz_naive_fixture
+
+        # without freq
+        idx = DatetimeIndex(index_dates, tz=tz, name="idx")
+        expected = DatetimeIndex(expected_dates, tz=tz, name="idx")
+
+        self.check_sort_values_without_freq(idx, expected)
+
+    @pytest.mark.parametrize(
+        "idx,expected",
+        [
+            (
+                PeriodIndex(
+                    [
+                        "2011-01-01",
+                        "2011-01-03",
+                        "2011-01-05",
+                        "2011-01-02",
+                        "2011-01-01",
+                    ],
+                    freq="D",
+                    name="idx1",
+                ),
+                PeriodIndex(
+                    [
+                        "2011-01-01",
+                        "2011-01-01",
+                        "2011-01-02",
+                        "2011-01-03",
+                        "2011-01-05",
+                    ],
+                    freq="D",
+                    name="idx1",
+                ),
+            ),
+            (
+                PeriodIndex(
+                    [
+                        "2011-01-01",
+                        "2011-01-03",
+                        "2011-01-05",
+                        "2011-01-02",
+                        "2011-01-01",
+                    ],
+                    freq="D",
+                    name="idx2",
+                ),
+                PeriodIndex(
+                    [
+                        "2011-01-01",
+                        "2011-01-01",
+                        "2011-01-02",
+                        "2011-01-03",
+                        "2011-01-05",
+                    ],
+                    freq="D",
+                    name="idx2",
+                ),
+            ),
+            (
+                PeriodIndex(
+                    [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
+                    freq="D",
+                    name="idx3",
+                ),
+                PeriodIndex(
+                    [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
+                    freq="D",
+                    name="idx3",
+                ),
+            ),
+            (
+                PeriodIndex(
+                    ["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A"
+                ),
+                PeriodIndex(
+                    ["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A"
+                ),
+            ),
+            (
+                # For compatibility check
+                Index([2011, 2013, 2015, 2012, 2011], name="idx"),
+                Index([2011, 2011, 2012, 2013, 2015], name="idx"),
+            ),
+        ],
+    )
+    def test_sort_values_without_freq_periodindex(self, idx, expected):
+        # here without_freq means not generateable by period_range
+        self.check_sort_values_without_freq(idx, expected)
+
+    def test_sort_values_without_freq_periodindex_nat(self):
+        # doesnt quite fit into check_sort_values_without_freq
+        idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
+        expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
+
+        ordered = idx.sort_values(na_position="first")
+        tm.assert_index_equal(ordered, expected)
+        check_freq_nonmonotonic(ordered, idx)
+
+        ordered = idx.sort_values(ascending=False)
+        tm.assert_index_equal(ordered, expected[::-1])
+        check_freq_nonmonotonic(ordered, idx)
+
+
+def test_order_stability_compat():
+    # GH#35922. sort_values is stable both for normal and datetime-like Index
+    pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A")
+    iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
+    ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False)
+    ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False)
+    tm.assert_numpy_array_equal(indexer1, indexer2)
diff --git a/pandas/tests/indexes/datetimelike_/test_value_counts.py b/pandas/tests/indexes/datetimelike_/test_value_counts.py
new file mode 100644
index 0000000000000..f0df6dd678ef5
--- /dev/null
+++ b/pandas/tests/indexes/datetimelike_/test_value_counts.py
@@ -0,0 +1,103 @@
+import numpy as np
+
+from pandas import (
+    DatetimeIndex,
+    NaT,
+    PeriodIndex,
+    Series,
+    TimedeltaIndex,
+    date_range,
+    period_range,
+    timedelta_range,
+)
+import pandas._testing as tm
+
+
+class TestValueCounts:
+    # GH#7735
+
+    def test_value_counts_unique_datetimeindex(self, tz_naive_fixture):
+        tz = tz_naive_fixture
+        orig = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz)
+        self._check_value_counts_with_repeats(orig)
+
+    def test_value_counts_unique_timedeltaindex(self):
+        orig = timedelta_range("1 days 09:00:00", freq="H", periods=10)
+        self._check_value_counts_with_repeats(orig)
+
+    def test_value_counts_unique_periodindex(self):
+        orig = period_range("2011-01-01 09:00", freq="H", periods=10)
+        self._check_value_counts_with_repeats(orig)
+
+    def _check_value_counts_with_repeats(self, orig):
+        # create repeated values, 'n'th element is repeated by n+1 times
+        idx = type(orig)(
+            np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype
+        )
+
+        exp_idx = orig[::-1]
+        if not isinstance(exp_idx, PeriodIndex):
+            exp_idx = exp_idx._with_freq(None)
+        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
+
+        for obj in [idx, Series(idx)]:
+            tm.assert_series_equal(obj.value_counts(), expected)
+
+        tm.assert_index_equal(idx.unique(), orig)
+
+    def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture):
+        tz = tz_naive_fixture
+        idx = DatetimeIndex(
+            [
+                "2013-01-01 09:00",
+                "2013-01-01 09:00",
+                "2013-01-01 09:00",
+                "2013-01-01 08:00",
+                "2013-01-01 08:00",
+                NaT,
+            ],
+            tz=tz,
+        )
+        self._check_value_counts_dropna(idx)
+
+    def test_value_counts_unique_timedeltaindex2(self):
+        idx = TimedeltaIndex(
+            [
+                "1 days 09:00:00",
+                "1 days 09:00:00",
+                "1 days 09:00:00",
+                "1 days 08:00:00",
+                "1 days 08:00:00",
+                NaT,
+            ]
+        )
+        self._check_value_counts_dropna(idx)
+
+    def test_value_counts_unique_periodindex2(self):
+        idx = PeriodIndex(
+            [
+                "2013-01-01 09:00",
+                "2013-01-01 09:00",
+                "2013-01-01 09:00",
+                "2013-01-01 08:00",
+                "2013-01-01 08:00",
+                NaT,
+            ],
+            freq="H",
+        )
+        self._check_value_counts_dropna(idx)
+
+    def _check_value_counts_dropna(self, idx):
+        exp_idx = idx[[2, 3]]
+        expected = Series([3, 2], index=exp_idx)
+
+        for obj in [idx, Series(idx)]:
+            tm.assert_series_equal(obj.value_counts(), expected)
+
+        exp_idx = idx[[2, 3, -1]]
+        expected = Series([3, 2, 1], index=exp_idx)
+
+        for obj in [idx, Series(idx)]:
+            tm.assert_series_equal(obj.value_counts(dropna=False), expected)
+
+        tm.assert_index_equal(idx.unique(), exp_idx)
diff --git a/pandas/tests/indexes/datetimes/methods/test_repeat.py b/pandas/tests/indexes/datetimes/methods/test_repeat.py
new file mode 100644
index 0000000000000..81768622fd3d5
--- /dev/null
+++ b/pandas/tests/indexes/datetimes/methods/test_repeat.py
@@ -0,0 +1,78 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    DatetimeIndex,
+    Timestamp,
+    date_range,
+)
+import pandas._testing as tm
+
+
+class TestRepeat:
+    def test_repeat_range(self, tz_naive_fixture):
+        tz = tz_naive_fixture
+        rng = date_range("1/1/2000", "1/1/2001")
+
+        result = rng.repeat(5)
+        assert result.freq is None
+        assert len(result) == 5 * len(rng)
+
+        index = date_range("2001-01-01", periods=2, freq="D", tz=tz)
+        exp = DatetimeIndex(
+            ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz
+        )
+        for res in [index.repeat(2), np.repeat(index, 2)]:
+            tm.assert_index_equal(res, exp)
+            assert res.freq is None
+
+        index = date_range("2001-01-01", periods=2, freq="2D", tz=tz)
+        exp = DatetimeIndex(
+            ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz
+        )
+        for res in [index.repeat(2), np.repeat(index, 2)]:
+            tm.assert_index_equal(res, exp)
+            assert res.freq is None
+
+        index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz)
+        exp = DatetimeIndex(
+            [
+                "2001-01-01",
+                "2001-01-01",
+                "2001-01-01",
+                "NaT",
+                "NaT",
+                "NaT",
+                "2003-01-01",
+                "2003-01-01",
+                "2003-01-01",
+            ],
+            tz=tz,
+        )
+        for res in [index.repeat(3), np.repeat(index, 3)]:
+            tm.assert_index_equal(res, exp)
+            assert res.freq is None
+
+    def test_repeat(self, tz_naive_fixture):
+        tz = tz_naive_fixture
+        reps = 2
+        msg = "the 'axis' parameter is not supported"
+
+        rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz)
+
+        expected_rng = DatetimeIndex(
+            [
+                Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
+                Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
+                Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
+                Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
+            ]
+        )
+
+        res = rng.repeat(reps)
+        tm.assert_index_equal(res, expected_rng)
+        assert res.freq is None
+
+        tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
+        with pytest.raises(ValueError, match=msg):
+            np.repeat(rng, reps, axis=1)
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index e03de3c75704a..17b80fbc0afc2 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -147,28 +147,6 @@ def test_string_index_series_name_converted(self):
         result = df.T["1/3/2000"]
         assert result.name == df.index[2]
 
-    def test_argmin_argmax(self):
-        idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
-        assert idx.argmin() == 1
-        assert idx.argmax() == 0
-
-    def test_sort_values(self):
-        idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
-
-        ordered = idx.sort_values()
-        assert ordered.is_monotonic
-
-        ordered = idx.sort_values(ascending=False)
-        assert ordered[::-1].is_monotonic
-
-        ordered, dexer = idx.sort_values(return_indexer=True)
-        assert ordered.is_monotonic
-        tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
-
-        ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
-        assert ordered[::-1].is_monotonic
-        tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
-
     def test_groupby_function_tuple_1677(self):
         df = DataFrame(np.random.rand(100), index=date_range("1/1/2000", periods=100))
         monthly_group = df.groupby(lambda x: (x.year, x.month))
diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py
index c65d9098a86a4..d29d4647f4753 100644
--- a/pandas/tests/indexes/datetimes/test_indexing.py
+++ b/pandas/tests/indexes/datetimes/test_indexing.py
@@ -551,6 +551,13 @@ def test_get_loc_reasonable_key_error(self):
         with pytest.raises(KeyError, match="2000"):
             index.get_loc("1/1/2000")
 
+    def test_get_loc_year_str(self):
+        rng = date_range("1/1/2000", "1/1/2010")
+
+        result = rng.get_loc("2009")
+        expected = slice(3288, 3653)
+        assert result == expected
+
 
 class TestContains:
     def test_dti_contains_with_duplicates(self):
diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py
index d230aa43e43d1..eff87a2b3f275 100644
--- a/pandas/tests/indexes/datetimes/test_misc.py
+++ b/pandas/tests/indexes/datetimes/test_misc.py
@@ -37,6 +37,8 @@ def test_range_edges(self):
         )
         tm.assert_index_equal(idx, exp)
 
+    def test_range_edges2(self):
+
         idx = date_range(
             start=Timestamp("1970-01-01 00:00:00.000000004"),
             end=Timestamp("1970-01-01 00:00:00.000000001"),
@@ -45,6 +47,8 @@ def test_range_edges(self):
         exp = DatetimeIndex([], freq="N")
         tm.assert_index_equal(idx, exp)
 
+    def test_range_edges3(self):
+
         idx = date_range(
             start=Timestamp("1970-01-01 00:00:00.000000001"),
             end=Timestamp("1970-01-01 00:00:00.000000001"),
@@ -53,6 +57,8 @@ def test_range_edges(self):
         exp = DatetimeIndex(["1970-01-01 00:00:00.000000001"], freq="N")
         tm.assert_index_equal(idx, exp)
 
+    def test_range_edges4(self):
+
         idx = date_range(
             start=Timestamp("1970-01-01 00:00:00.000001"),
             end=Timestamp("1970-01-01 00:00:00.000004"),
@@ -69,6 +75,8 @@ def test_range_edges(self):
         )
         tm.assert_index_equal(idx, exp)
 
+    def test_range_edges5(self):
+
         idx = date_range(
             start=Timestamp("1970-01-01 00:00:00.001"),
             end=Timestamp("1970-01-01 00:00:00.004"),
@@ -85,6 +93,7 @@ def test_range_edges(self):
         )
         tm.assert_index_equal(idx, exp)
 
+    def test_range_edges6(self):
         idx = date_range(
             start=Timestamp("1970-01-01 00:00:01"),
             end=Timestamp("1970-01-01 00:00:04"),
@@ -101,6 +110,7 @@ def test_range_edges(self):
         )
         tm.assert_index_equal(idx, exp)
 
+    def test_range_edges7(self):
         idx = date_range(
             start=Timestamp("1970-01-01 00:01"),
             end=Timestamp("1970-01-01 00:04"),
@@ -117,6 +127,7 @@ def test_range_edges(self):
         )
         tm.assert_index_equal(idx, exp)
 
+    def test_range_edges8(self):
         idx = date_range(
             start=Timestamp("1970-01-01 01:00"),
             end=Timestamp("1970-01-01 04:00"),
@@ -133,6 +144,7 @@ def test_range_edges(self):
         )
         tm.assert_index_equal(idx, exp)
 
+    def test_range_edges9(self):
         idx = date_range(
             start=Timestamp("1970-01-01"), end=Timestamp("1970-01-04"), freq="D"
         )
@@ -234,6 +246,7 @@ def test_datetimeindex_accessors(self):
             exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name")
             tm.assert_index_equal(res, exp)
 
+    def test_datetimeindex_accessors2(self):
         dti = date_range(freq="BQ-FEB", start=datetime(1998, 1, 1), periods=4)
 
         assert sum(dti.is_quarter_start) == 0
@@ -241,6 +254,7 @@ def test_datetimeindex_accessors(self):
         assert sum(dti.is_year_start) == 0
         assert sum(dti.is_year_end) == 1
 
+    def test_datetimeindex_accessors3(self):
         # Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
         bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu")
         dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
@@ -248,10 +262,12 @@ def test_datetimeindex_accessors(self):
         with pytest.raises(ValueError, match=msg):
             dti.is_month_start
 
+    def test_datetimeindex_accessors4(self):
         dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])
 
         assert dti.is_month_start[0] == 1
 
+    def test_datetimeindex_accessors5(self):
         tests = [
             (Timestamp("2013-06-01", freq="M").is_month_start, 1),
             (Timestamp("2013-06-01", freq="BM").is_month_start, 0),
@@ -290,6 +306,7 @@ def test_datetimeindex_accessors(self):
         for ts, value in tests:
             assert ts == value
 
+    def test_datetimeindex_accessors6(self):
         # GH 6538: Check that DatetimeIndex and its TimeStamp elements
         # return the same weekofyear accessor close to new year w/ tz
         dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py
index 676c0ee99ef7c..7df94b5820e5d 100644
--- a/pandas/tests/indexes/datetimes/test_ops.py
+++ b/pandas/tests/indexes/datetimes/test_ops.py
@@ -1,18 +1,15 @@
 from datetime import datetime
 
 from dateutil.tz import tzlocal
-import numpy as np
 import pytest
 
 from pandas.compat import IS64
 
-import pandas as pd
 from pandas import (
     DateOffset,
     DatetimeIndex,
     Index,
     Series,
-    Timestamp,
     bdate_range,
     date_range,
 )
@@ -46,73 +43,6 @@ def test_ops_properties_basic(self, datetime_series):
         with pytest.raises(AttributeError, match=msg):
             s.weekday
 
-    def test_repeat_range(self, tz_naive_fixture):
-        tz = tz_naive_fixture
-        rng = date_range("1/1/2000", "1/1/2001")
-
-        result = rng.repeat(5)
-        assert result.freq is None
-        assert len(result) == 5 * len(rng)
-
-        index = date_range("2001-01-01", periods=2, freq="D", tz=tz)
-        exp = DatetimeIndex(
-            ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz
-        )
-        for res in [index.repeat(2), np.repeat(index, 2)]:
-            tm.assert_index_equal(res, exp)
-            assert res.freq is None
-
-        index = date_range("2001-01-01", periods=2, freq="2D", tz=tz)
-        exp = DatetimeIndex(
-            ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz
-        )
-        for res in [index.repeat(2), np.repeat(index, 2)]:
-            tm.assert_index_equal(res, exp)
-            assert res.freq is None
-
-        index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz)
-        exp = DatetimeIndex(
-            [
-                "2001-01-01",
-                "2001-01-01",
-                "2001-01-01",
-                "NaT",
-                "NaT",
-                "NaT",
-                "2003-01-01",
-                "2003-01-01",
-                "2003-01-01",
-            ],
-            tz=tz,
-        )
-        for res in [index.repeat(3), np.repeat(index, 3)]:
-            tm.assert_index_equal(res, exp)
-            assert res.freq is None
-
-    def test_repeat(self, tz_naive_fixture):
-        tz = tz_naive_fixture
-        reps = 2
-        msg = "the 'axis' parameter is not supported"
-
-        rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz)
-
-        expected_rng = DatetimeIndex(
-            [
-                Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
-                Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"),
-                Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
-                Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"),
-            ]
-        )
-
-        res = rng.repeat(reps)
-        tm.assert_index_equal(res, expected_rng)
-        assert res.freq is None
-
-        tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
-        with pytest.raises(ValueError, match=msg):
-            np.repeat(rng, reps, axis=1)
-
     @pytest.mark.parametrize(
         "freq,expected",
         [
@@ -137,174 +67,6 @@ def test_resolution(self, request, tz_naive_fixture, freq, expected):
         idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
         assert idx.resolution == expected
 
-    def test_value_counts_unique(self, tz_naive_fixture):
-        tz = tz_naive_fixture
-        # GH 7735
-        idx = date_range("2011-01-01 09:00", freq="H", periods=10)
-        # create repeated values, 'n'th element is repeated by n+1 times
-        idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz)
-
-        exp_idx = date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz)
-        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
-        expected.index = expected.index._with_freq(None)
-
-        for obj in [idx, Series(idx)]:
-
-            tm.assert_series_equal(obj.value_counts(), expected)
-
-        expected = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz)
-        expected = expected._with_freq(None)
-        tm.assert_index_equal(idx.unique(), expected)
-
-        idx = DatetimeIndex(
-            [
-                "2013-01-01 09:00",
-                "2013-01-01 09:00",
-                "2013-01-01 09:00",
-                "2013-01-01 08:00",
-                "2013-01-01 08:00",
-                pd.NaT,
-            ],
-            tz=tz,
-        )
-
-        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz)
-        expected = Series([3, 2], index=exp_idx)
-
-        for obj in [idx, Series(idx)]:
-            tm.assert_series_equal(obj.value_counts(), expected)
-
-        exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz)
-        expected = Series([3, 2, 1], index=exp_idx)
-
-        for obj in [idx, Series(idx)]:
-            tm.assert_series_equal(obj.value_counts(dropna=False), expected)
-
-        tm.assert_index_equal(idx.unique(), exp_idx)
-
-    @pytest.mark.parametrize(
-        "idx",
-        [
-            DatetimeIndex(
-                ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
-            ),
-            DatetimeIndex(
-                ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
-                freq="H",
-                name="tzidx",
-                tz="Asia/Tokyo",
-            ),
-        ],
-    )
-    def test_order_with_freq(self, idx):
-        ordered = idx.sort_values()
-        tm.assert_index_equal(ordered, idx)
-        assert ordered.freq == idx.freq
-
-        ordered = idx.sort_values(ascending=False)
-        expected = idx[::-1]
-        tm.assert_index_equal(ordered, expected)
-        assert ordered.freq == expected.freq
-        assert ordered.freq.n == -1
-
-        ordered, indexer = idx.sort_values(return_indexer=True)
-        tm.assert_index_equal(ordered, idx)
-        tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
-        assert ordered.freq == idx.freq
-
-        ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
-        expected = idx[::-1]
-        tm.assert_index_equal(ordered, expected)
-        tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
-        assert ordered.freq == expected.freq
-        assert ordered.freq.n == -1
-
-    @pytest.mark.parametrize(
-        "index_dates,expected_dates",
-        [
-            (
-                ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
-                ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
-            ),
-            (
-                ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
-                ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
-            ),
-            (
-                [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT],
-                [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
-            ),
-        ],
-    )
-    def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture):
-        tz = tz_naive_fixture
-
-        # without freq
-        index = DatetimeIndex(index_dates, tz=tz, name="idx")
-        expected = DatetimeIndex(expected_dates, tz=tz, name="idx")
-
-        ordered = index.sort_values(na_position="first")
-        tm.assert_index_equal(ordered, expected)
-        assert ordered.freq is None
-
-        ordered = index.sort_values(ascending=False)
-        tm.assert_index_equal(ordered, expected[::-1])
-        assert ordered.freq is None
-
-        ordered, indexer = index.sort_values(return_indexer=True, na_position="first")
-        tm.assert_index_equal(ordered, expected)
-
-        exp = np.array([0, 4, 3, 1, 2])
-        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
-        assert ordered.freq is None
-
-        ordered, indexer = index.sort_values(return_indexer=True, ascending=False)
-        tm.assert_index_equal(ordered, expected[::-1])
-
-        exp = np.array([2, 1, 3, 0, 4])
-        tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
-        assert ordered.freq is None
-
-    def test_drop_duplicates_metadata(self, freq_sample):
-        # GH 10115
-        idx = date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
-        result = idx.drop_duplicates()
-        tm.assert_index_equal(idx, result)
-        assert idx.freq == result.freq
-
-        idx_dup = idx.append(idx)
-        assert idx_dup.freq is None  # freq is reset
-        result = idx_dup.drop_duplicates()
-        expected = idx._with_freq(None)
-        tm.assert_index_equal(result, expected)
-        assert result.freq is None
-
-    @pytest.mark.parametrize(
-        "keep, expected, index",
-        [
-            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
-            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
-            (
-                False,
-                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
-                np.arange(5, 10),
-            ),
-        ],
-    )
-    def test_drop_duplicates(self, freq_sample, keep, expected, index):
-        # to check Index/Series compat
-        idx = date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
-        idx = idx.append(idx[:5])
-
-        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
-        expected = idx[~expected]
-
-        result = idx.drop_duplicates(keep=keep)
-        tm.assert_index_equal(result, expected)
-
-        result = Series(idx).drop_duplicates(keep=keep)
-        tm.assert_series_equal(result, Series(expected, index=index))
-
     def test_infer_freq(self, freq_sample):
         # GH 11018
         idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
@@ -312,22 +74,6 @@ def test_infer_freq(self, freq_sample):
         tm.assert_index_equal(idx, result)
         assert result.freq == freq_sample
 
-    def test_nat(self, tz_naive_fixture):
-        tz = tz_naive_fixture
-        assert DatetimeIndex._na_value is pd.NaT
-        assert DatetimeIndex([])._na_value is pd.NaT
-
-        idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
-        assert idx._can_hold_na
-
-        assert idx.hasnans is False
-
-        idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz)
-        assert idx._can_hold_na
-
-        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
-        assert idx.hasnans is True
-
     @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
     @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)])
     @pytest.mark.parametrize("tz", [None, "US/Eastern"])
diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py
index 05ee67eee0da5..882515799f943 100644
--- a/pandas/tests/indexes/datetimes/test_partial_slicing.py
+++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py
@@ -55,12 +55,6 @@ def test_slice_year(self):
         expected = df[df.index.year == 2005]
         tm.assert_frame_equal(result, expected)
 
-        rng = date_range("1/1/2000", "1/1/2010")
-
-        result = rng.get_loc("2009")
-        expected = slice(3288, 3653)
-        assert result == expected
-
     @pytest.mark.parametrize(
         "partial_dtime",
         [
diff --git a/pandas/tests/indexes/period/methods/test_is_full.py b/pandas/tests/indexes/period/methods/test_is_full.py
new file mode 100644
index 0000000000000..490f199a59ed7
--- /dev/null
+++ b/pandas/tests/indexes/period/methods/test_is_full.py
@@ -0,0 +1,23 @@
+import pytest
+
+from pandas import PeriodIndex
+
+
+def test_is_full():
+    index = PeriodIndex([2005, 2007, 2009], freq="A")
+    assert not index.is_full
+
+    index = PeriodIndex([2005, 2006, 2007], freq="A")
+    assert index.is_full
+
+    index = PeriodIndex([2005, 2005, 2007], freq="A")
+    assert not index.is_full
+
+    index = PeriodIndex([2005, 2005, 2006], freq="A")
+    assert index.is_full
+
+    index = PeriodIndex([2006, 2005, 2005], freq="A")
+    with pytest.raises(ValueError, match="Index is not monotonic"):
+        index.is_full
+
+    assert index[:0].is_full
diff --git a/pandas/tests/indexes/period/methods/test_repeat.py b/pandas/tests/indexes/period/methods/test_repeat.py
new file mode 100644
index 0000000000000..fc344b06420d1
--- /dev/null
+++ b/pandas/tests/indexes/period/methods/test_repeat.py
@@ -0,0 +1,26 @@
+import numpy as np
+import pytest
+
+from pandas import (
+    PeriodIndex,
+    period_range,
+)
+import pandas._testing as tm
+
+
+class TestRepeat:
+    @pytest.mark.parametrize("use_numpy", [True, False])
+    @pytest.mark.parametrize(
+        "index",
+        [
+            period_range("2000-01-01", periods=3, freq="D"),
+            period_range("2001-01-01", periods=3, freq="2D"),
+            PeriodIndex(["2001-01", "NaT", "2003-01"], freq="M"),
+        ],
+    )
+    def test_repeat_freqstr(self, index, use_numpy):
+        # GH#10183
+        expected = PeriodIndex([per for per in index for _ in range(3)])
+        result = np.repeat(index, 3) if use_numpy else index.repeat(3)
+        tm.assert_index_equal(result, expected)
+        assert result.freqstr == index.freqstr
diff --git a/pandas/tests/indexes/period/test_join.py b/pandas/tests/indexes/period/test_join.py
index 2f16daa36d1fd..aa2393aceee52 100644
--- a/pandas/tests/indexes/period/test_join.py
+++ b/pandas/tests/indexes/period/test_join.py
@@ -16,7 +16,7 @@ def test_join_outer_indexer(self):
         pi = period_range("1/1/2000", "1/20/2000", freq="D")
 
         result = pi._outer_indexer(pi._values, pi._values)
-        tm.assert_numpy_array_equal(result[0], pi.asi8)
+        tm.assert_extension_array_equal(result[0], pi._values)
         tm.assert_numpy_array_equal(result[1], np.arange(len(pi), dtype=np.int64))
         tm.assert_numpy_array_equal(result[2], np.arange(len(pi), dtype=np.int64))
 
diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py
index 52f8de27cb6c6..9ebe44fb16c8d 100644
--- a/pandas/tests/indexes/period/test_ops.py
+++ b/pandas/tests/indexes/period/test_ops.py
@@ -1,13 +1,6 @@
-import numpy as np
 import pytest
 
 import pandas as pd
-from pandas import (
-    Index,
-    NaT,
-    PeriodIndex,
-    Series,
-)
 import pandas._testing as tm
 
 
@@ -30,266 +23,6 @@ def test_resolution(self, freq, expected):
         idx = pd.period_range(start="2013-04-01", periods=30, freq=freq)
         assert idx.resolution == expected
 
-    def test_value_counts_unique(self):
-        # GH 7735
-        idx = pd.period_range("2011-01-01 09:00", freq="H", periods=10)
-        # create repeated values, 'n'th element is repeated by n+1 times
-        idx = PeriodIndex(np.repeat(idx._values, range(1, len(idx) + 1)), freq="H")
-
-        exp_idx = PeriodIndex(
-            [
-                "2011-01-01 18:00",
-                "2011-01-01 17:00",
-                "2011-01-01 16:00",
-                "2011-01-01 15:00",
-                "2011-01-01 14:00",
-                "2011-01-01 13:00",
-                "2011-01-01 12:00",
-                "2011-01-01 11:00",
-                "2011-01-01 10:00",
-                "2011-01-01 09:00",
-            ],
-            freq="H",
-        )
-        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
-
-        for obj in [idx, Series(idx)]:
-            tm.assert_series_equal(obj.value_counts(), expected)
-
-        expected = pd.period_range("2011-01-01 09:00", freq="H", periods=10)
-        tm.assert_index_equal(idx.unique(), expected)
-
-        idx = PeriodIndex(
-            [
-                "2013-01-01 09:00",
-                "2013-01-01 09:00",
-                "2013-01-01 09:00",
-                "2013-01-01 08:00",
-                "2013-01-01 08:00",
-                NaT,
-            ],
-            freq="H",
-        )
-
-        exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00"], freq="H")
-        expected = Series([3, 2], index=exp_idx)
-
-        for obj in [idx, Series(idx)]:
-            tm.assert_series_equal(obj.value_counts(), expected)
-
-        exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00", NaT], freq="H")
-        expected = Series([3, 2, 1], index=exp_idx)
-
-        for obj in [idx, Series(idx)]:
-            tm.assert_series_equal(obj.value_counts(dropna=False), expected)
-
-        tm.assert_index_equal(idx.unique(), exp_idx)
-
-    @pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
-    def test_drop_duplicates_metadata(self, freq):
-        # GH 10115
-        idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx")
-        result = idx.drop_duplicates()
-        tm.assert_index_equal(idx, result)
-        assert idx.freq == result.freq
-
-        idx_dup = idx.append(idx)  # freq will not be reset
-        result = idx_dup.drop_duplicates()
-        tm.assert_index_equal(idx, result)
-        assert idx.freq == result.freq
-
-    @pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
-    @pytest.mark.parametrize(
-        "keep, expected, index",
-        [
-            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
-            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
-            (
-                False,
-                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
-                np.arange(5, 10),
-            ),
-        ],
-    )
-    def test_drop_duplicates(self, freq, keep, expected, index):
-        # to check Index/Series compat
-        idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx")
-        idx = idx.append(idx[:5])
-
-        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
-        expected = idx[~expected]
-
-        result = idx.drop_duplicates(keep=keep)
-        tm.assert_index_equal(result, expected)
-
-        result = Series(idx).drop_duplicates(keep=keep)
-        tm.assert_series_equal(result, Series(expected, index=index))
-
-    def test_order_compat(self):
-        def _check_freq(index, expected_index):
-            if isinstance(index, PeriodIndex):
-                assert index.freq == expected_index.freq
-
-        pidx = PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A")
-        # for compatibility check
-        iidx = Index([2011, 2012, 2013], name="idx")
-        for idx in [pidx, iidx]:
-            ordered = idx.sort_values()
-            tm.assert_index_equal(ordered, idx)
-            _check_freq(ordered, idx)
-
-            ordered = idx.sort_values(ascending=False)
-            tm.assert_index_equal(ordered, idx[::-1])
-            _check_freq(ordered, idx[::-1])
-
-            ordered, indexer = idx.sort_values(return_indexer=True)
-            tm.assert_index_equal(ordered, idx)
-            tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
-            _check_freq(ordered, idx)
-
-            ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
-            tm.assert_index_equal(ordered, idx[::-1])
-            tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
-            _check_freq(ordered, idx[::-1])
-
-        pidx = PeriodIndex(
-            ["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A"
-        )
-        pexpected = PeriodIndex(
-            ["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A"
-        )
-        # for compatibility check
-        iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
-        iexpected = Index([2011, 2011, 2012, 2013, 2015], name="idx")
-        for idx, expected in [(pidx, pexpected), (iidx, iexpected)]:
-            ordered = idx.sort_values()
-            tm.assert_index_equal(ordered, expected)
-            _check_freq(ordered, idx)
-
-            ordered = idx.sort_values(ascending=False)
-            tm.assert_index_equal(ordered, expected[::-1])
-            _check_freq(ordered, idx)
-
-            ordered, indexer = idx.sort_values(return_indexer=True)
-            tm.assert_index_equal(ordered, expected)
-
-            exp = np.array([0, 4, 3, 1, 2])
-            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
-            _check_freq(ordered, idx)
-
-            ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
-            tm.assert_index_equal(ordered, expected[::-1])
-            _check_freq(ordered, idx)
-
-        pidx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
-
-        result = pidx.sort_values(na_position="first")
-        expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
-        tm.assert_index_equal(result, expected)
-        assert result.freq == "D"
-
-        result = pidx.sort_values(ascending=False)
-        expected = PeriodIndex(["2013", "2011", "2011", "NaT"], name="pidx", freq="D")
-        tm.assert_index_equal(result, expected)
-        assert result.freq == "D"
-
-    def test_order(self):
-        for freq in ["D", "2D", "4D"]:
-            idx = PeriodIndex(
-                ["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
-            )
-
-            ordered = idx.sort_values()
-            tm.assert_index_equal(ordered, idx)
-            assert ordered.freq == idx.freq
-
-            ordered = idx.sort_values(ascending=False)
-            expected = idx[::-1]
-            tm.assert_index_equal(ordered, expected)
-            assert ordered.freq == expected.freq
-            assert ordered.freq == freq
-
-            ordered, indexer = idx.sort_values(return_indexer=True)
-            tm.assert_index_equal(ordered, idx)
-            tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
-            assert ordered.freq == idx.freq
-            assert ordered.freq == freq
-
-            ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
-            expected = idx[::-1]
-            tm.assert_index_equal(ordered, expected)
-            tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
-            assert ordered.freq == expected.freq
-            assert ordered.freq == freq
-
-        idx1 = PeriodIndex(
-            ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
-            freq="D",
-            name="idx1",
-        )
-        exp1 = PeriodIndex(
-            ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
-            freq="D",
-            name="idx1",
-        )
-
-        idx2 = PeriodIndex(
-            ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
-            freq="D",
-            name="idx2",
-        )
-        exp2 = PeriodIndex(
-            ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
-            freq="D",
-            name="idx2",
-        )
-
-        idx3 = PeriodIndex(
-            [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], freq="D", name="idx3"
-        )
-        exp3 = PeriodIndex(
-            [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], freq="D", name="idx3"
-        )
-
-        for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]:
-            ordered = idx.sort_values(na_position="first")
-            tm.assert_index_equal(ordered, expected)
-            assert ordered.freq == "D"
-
-            ordered = idx.sort_values(ascending=False)
-            tm.assert_index_equal(ordered, expected[::-1])
-            assert ordered.freq == "D"
-
-            ordered, indexer = idx.sort_values(return_indexer=True, na_position="first")
-            tm.assert_index_equal(ordered, expected)
-
-            exp = np.array([0, 4, 3, 1, 2])
-            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
-            assert ordered.freq == "D"
-
-            ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
-            tm.assert_index_equal(ordered, expected[::-1])
-
-            exp = np.array([2, 1, 3, 0, 4])
-            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
-            assert ordered.freq == "D"
-
-    def test_nat(self):
-        assert PeriodIndex._na_value is NaT
-        assert PeriodIndex([], freq="M")._na_value is NaT
-
-        idx = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
-        assert idx._can_hold_na
-
-        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
-        assert idx.hasnans is False
-
-        idx = PeriodIndex(["2011-01-01", "NaT"], freq="D")
-        assert idx._can_hold_na
-
-        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
-        assert idx.hasnans is True
-
     def test_freq_setter_deprecated(self):
         # GH 20678
         idx = pd.period_range("2018Q1", periods=4, freq="Q")
@@ -301,12 +34,3 @@ def test_freq_setter_deprecated(self):
         # warning for setter
         with pytest.raises(AttributeError, match="can't set attribute"):
             idx.freq = pd.offsets.Day()
-
-
-def test_order_stability_compat():
-    # GH 35922. sort_values is stable both for normal and datetime-like Index
-    pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A")
-    iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
-    ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False)
-    ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False)
-    tm.assert_numpy_array_equal(indexer1, indexer2)
diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py
index aabc837e25b4b..032b376f6d6a9 100644
--- a/pandas/tests/indexes/period/test_period.py
+++ b/pandas/tests/indexes/period/test_period.py
@@ -3,9 +3,7 @@
 
 from pandas._libs.tslibs.period import IncompatibleFrequency
 
-import pandas as pd
 from pandas import (
-    DataFrame,
     DatetimeIndex,
     Index,
     NaT,
@@ -49,22 +47,6 @@ def test_where(self):
         # This is handled in test_indexing
         pass
 
-    @pytest.mark.parametrize("use_numpy", [True, False])
-    @pytest.mark.parametrize(
-        "index",
-        [
-            period_range("2000-01-01", periods=3, freq="D"),
-            period_range("2001-01-01", periods=3, freq="2D"),
-            PeriodIndex(["2001-01", "NaT", "2003-01"], freq="M"),
-        ],
-    )
-    def test_repeat_freqstr(self, index, use_numpy):
-        # GH10183
-        expected = PeriodIndex([p for p in index for _ in range(3)])
-        result = np.repeat(index, 3) if use_numpy else index.repeat(3)
-        tm.assert_index_equal(result, expected)
-        assert result.freqstr == index.freqstr
-
     def test_no_millisecond_field(self):
         msg = "type object 'DatetimeIndex' has no attribute 'millisecond'"
         with pytest.raises(AttributeError, match=msg):
@@ -271,14 +253,6 @@ def test_is_(self):
         assert not index.is_(index - 2)
         assert not index.is_(index - 0)
 
-    def test_periods_number_check(self):
-        msg = (
-            "Of the three parameters: start, end, and periods, exactly two "
-            "must be specified"
-        )
-        with pytest.raises(ValueError, match=msg):
-            period_range("2011-1-1", "2012-1-1", "B")
-
     def test_index_duplicate_periods(self):
         # monotonic
         idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN")
@@ -355,25 +329,6 @@ def test_iteration(self):
         assert isinstance(result[0], Period)
         assert result[0].freq == index.freq
 
-    def test_is_full(self):
-        index = PeriodIndex([2005, 2007, 2009], freq="A")
-        assert not index.is_full
-
-        index = PeriodIndex([2005, 2006, 2007], freq="A")
-        assert index.is_full
-
-        index = PeriodIndex([2005, 2005, 2007], freq="A")
-        assert not index.is_full
-
-        index = PeriodIndex([2005, 2005, 2006], freq="A")
-        assert index.is_full
-
-        index = PeriodIndex([2006, 2005, 2005], freq="A")
-        with pytest.raises(ValueError, match="Index is not monotonic"):
-            index.is_full
-
-        assert index[:0].is_full
-
     def test_with_multi_index(self):
         # #1705
         index = date_range("1/1/2012", periods=4, freq="12H")
@@ -385,29 +340,6 @@ def test_with_multi_index(self):
 
         assert isinstance(s.index.values[0][0], Period)
 
-    def test_convert_array_of_periods(self):
-        rng = period_range("1/1/2000", periods=20, freq="D")
-        periods = list(rng)
-
-        result = Index(periods)
-        assert isinstance(result, PeriodIndex)
-
-    def test_append_concat(self):  # TODO: pd.concat test
-        # #1815
-        d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC")
-        d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC")
-
-        s1 = Series(np.random.randn(10), d1)
-        s2 = Series(np.random.randn(10), d2)
-
-        s1 = s1.to_period()
-        s2 = s2.to_period()
-
-        # drops index
-        result = pd.concat([s1, s2])
-        assert isinstance(result.index, PeriodIndex)
-        assert result.index[0] == s1.index[0]
-
     def test_pickle_freq(self):
         # GH2891
         prng = period_range("1/1/2011", "1/1/2012", freq="M")
@@ -423,44 +355,6 @@ def test_map(self):
         exp = Index([x.ordinal for x in index])
         tm.assert_index_equal(result, exp)
 
-    @pytest.mark.parametrize(
-        "msg, key",
-        [
-            (r"Period\('2019', 'A-DEC'\), 'foo', 'bar'", (Period(2019), "foo", "bar")),
-            (r"Period\('2019', 'A-DEC'\), 'y1', 'bar'", (Period(2019), "y1", "bar")),
-            (r"Period\('2019', 'A-DEC'\), 'foo', 'z1'", (Period(2019), "foo", "z1")),
-            (
-                r"Period\('2018', 'A-DEC'\), Period\('2016', 'A-DEC'\), 'bar'",
-                (Period(2018), Period(2016), "bar"),
-            ),
-            (r"Period\('2018', 'A-DEC'\), 'foo', 'y1'", (Period(2018), "foo", "y1")),
-            (
-                r"Period\('2017', 'A-DEC'\), 'foo', Period\('2015', 'A-DEC'\)",
-                (Period(2017), "foo", Period(2015)),
-            ),
-            (r"Period\('2017', 'A-DEC'\), 'z1', 'bar'", (Period(2017), "z1", "bar")),
-        ],
-    )
-    def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key):
-        # issue 20684
-        """
-        parse_time_string return parameter if type not matched.
-        PeriodIndex.get_loc takes returned value from parse_time_string as a tuple.
-        If first argument is Period and a tuple has 3 items,
-        process go on not raise exception
-        """
-        df = DataFrame(
-            {
-                "A": [Period(2019), "x1", "x2"],
-                "B": [Period(2018), Period(2016), "y1"],
-                "C": [Period(2017), "z1", Period(2015)],
-                "V1": [1, 2, 3],
-                "V2": [10, 20, 30],
-            }
-        ).set_index(["A", "B", "C"])
-        with pytest.raises(KeyError, match=msg):
-            df.loc[key]
-
     def test_format_empty(self):
         # GH35712
         empty_idx = self._holder([], freq="A")
diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py
index a5be19731b54a..c94ddf57c0ee1 100644
--- a/pandas/tests/indexes/period/test_period_range.py
+++ b/pandas/tests/indexes/period/test_period_range.py
@@ -12,6 +12,14 @@
 
 
 class TestPeriodRange:
+    def test_required_arguments(self):
+        msg = (
+            "Of the three parameters: start, end, and periods, exactly two "
+            "must be specified"
+        )
+        with pytest.raises(ValueError, match=msg):
+            period_range("2011-1-1", "2012-1-1", "B")
+
     @pytest.mark.parametrize("freq", ["D", "W", "M", "Q", "A"])
     def test_construction_from_string(self, freq):
         # non-empty
diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
index 97fe35bb7f2c9..5cf0134795b74 100644
--- a/pandas/tests/indexes/test_common.py
+++ b/pandas/tests/indexes/test_common.py
@@ -175,7 +175,7 @@ def test_get_unique_index(self, index_flat):
             vals = index[[0] * 5]._data
             vals[0] = pd.NaT
         elif needs_i8_conversion(index.dtype):
-            vals = index.asi8[[0] * 5]
+            vals = index._data._ndarray[[0] * 5]
             vals[0] = iNaT
         else:
             vals = index.values[[0] * 5]
@@ -184,7 +184,7 @@ def test_get_unique_index(self, index_flat):
         vals_unique = vals[:2]
         if index.dtype.kind in ["m", "M"]:
             # i.e. needs_i8_conversion but not period_dtype, as above
-            vals = type(index._data)._simple_new(vals, dtype=index.dtype)
+            vals = type(index._data)(vals, dtype=index.dtype)
             vals_unique = type(index._data)._simple_new(vals_unique, dtype=index.dtype)
         idx_nan = index._shallow_copy(vals)
         idx_unique_nan = index._shallow_copy(vals_unique)
diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py
index 4fba4b13835b3..5937f43102190 100644
--- a/pandas/tests/indexes/test_index_new.py
+++ b/pandas/tests/indexes/test_index_new.py
@@ -80,6 +80,13 @@ def test_constructor_infer_periodindex(self):
         tm.assert_index_equal(rs, xp)
         assert isinstance(rs, PeriodIndex)
 
+    def test_from_list_of_periods(self):
+        rng = period_range("1/1/2000", periods=20, freq="D")
+        periods = list(rng)
+
+        result = Index(periods)
+        assert isinstance(result, PeriodIndex)
+
     @pytest.mark.parametrize("pos", [0, 1])
     @pytest.mark.parametrize(
         "klass,dtype,ctor",
diff --git a/pandas/tests/indexes/timedeltas/methods/test_repeat.py b/pandas/tests/indexes/timedeltas/methods/test_repeat.py
new file mode 100644
index 0000000000000..2a9b58d1bf322
--- /dev/null
+++ b/pandas/tests/indexes/timedeltas/methods/test_repeat.py
@@ -0,0 +1,34 @@
+import numpy as np
+
+from pandas import (
+    TimedeltaIndex,
+    timedelta_range,
+)
+import pandas._testing as tm
+
+
+class TestRepeat:
+    def test_repeat(self):
+        index = timedelta_range("1 days", periods=2, freq="D")
+        exp = TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"])
+        for res in [index.repeat(2), np.repeat(index, 2)]:
+            tm.assert_index_equal(res, exp)
+            assert res.freq is None
+
+        index = TimedeltaIndex(["1 days", "NaT", "3 days"])
+        exp = TimedeltaIndex(
+            [
+                "1 days",
+                "1 days",
+                "1 days",
+                "NaT",
+                "NaT",
+                "NaT",
+                "3 days",
+                "3 days",
+                "3 days",
+            ]
+        )
+        for res in [index.repeat(3), np.repeat(index, 3)]:
+            tm.assert_index_equal(res, exp)
+            assert res.freq is None
diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py
index 7acfb50fe944b..5f0101eb4478c 100644
--- a/pandas/tests/indexes/timedeltas/test_indexing.py
+++ b/pandas/tests/indexes/timedeltas/test_indexing.py
@@ -7,13 +7,15 @@
 import numpy as np
 import pytest
 
-import pandas as pd
 from pandas import (
     Index,
+    NaT,
     Timedelta,
     TimedeltaIndex,
+    Timestamp,
     notna,
     timedelta_range,
+    to_timedelta,
 )
 import pandas._testing as tm
 
@@ -64,10 +66,10 @@ def test_getitem(self):
     @pytest.mark.parametrize(
         "key",
         [
-            pd.Timestamp("1970-01-01"),
-            pd.Timestamp("1970-01-02"),
+            Timestamp("1970-01-01"),
+            Timestamp("1970-01-02"),
             datetime(1970, 1, 1),
-            pd.Timestamp("1970-01-03").to_datetime64(),
+            Timestamp("1970-01-03").to_datetime64(),
             # non-matching NA values
             np.datetime64("NaT"),
         ],
@@ -81,7 +83,7 @@ def test_timestamp_invalid_key(self, key):
 
 class TestGetLoc:
     def test_get_loc(self):
-        idx = pd.to_timedelta(["0 days", "1 days", "2 days"])
+        idx = to_timedelta(["0 days", "1 days", "2 days"])
 
         for method in [None, "pad", "backfill", "nearest"]:
             assert idx.get_loc(idx[1], method) == 1
@@ -117,7 +119,7 @@ def test_get_loc(self):
     def test_get_loc_nat(self):
         tidx = TimedeltaIndex(["1 days 01:00:00", "NaT", "2 days 01:00:00"])
 
-        assert tidx.get_loc(pd.NaT) == 1
+        assert tidx.get_loc(NaT) == 1
         assert tidx.get_loc(None) == 1
         assert tidx.get_loc(float("nan")) == 1
         assert tidx.get_loc(np.nan) == 1
@@ -125,12 +127,12 @@ def test_get_loc_nat(self):
 
 class TestGetIndexer:
     def test_get_indexer(self):
-        idx = pd.to_timedelta(["0 days", "1 days", "2 days"])
+        idx = to_timedelta(["0 days", "1 days", "2 days"])
         tm.assert_numpy_array_equal(
             idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp)
         )
 
-        target = pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"])
+        target = to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"])
         tm.assert_numpy_array_equal(
             idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
         )
@@ -158,25 +160,25 @@ def test_where_invalid_dtypes(self):
         tdi = timedelta_range("1 day", periods=3, freq="D", name="idx")
 
         tail = tdi[2:].tolist()
-        i2 = Index([pd.NaT, pd.NaT] + tail)
+        i2 = Index([NaT, NaT] + tail)
         mask = notna(i2)
 
-        expected = Index([pd.NaT.value, pd.NaT.value] + tail, dtype=object, name="idx")
+        expected = Index([NaT.value, NaT.value] + tail, dtype=object, name="idx")
         assert isinstance(expected[0], int)
         result = tdi.where(mask, i2.asi8)
         tm.assert_index_equal(result, expected)
 
-        ts = i2 + pd.Timestamp.now()
+        ts = i2 + Timestamp.now()
         expected = Index([ts[0], ts[1]] + tail, dtype=object, name="idx")
         result = tdi.where(mask, ts)
         tm.assert_index_equal(result, expected)
 
-        per = (i2 + pd.Timestamp.now()).to_period("D")
+        per = (i2 + Timestamp.now()).to_period("D")
         expected = Index([per[0], per[1]] + tail, dtype=object, name="idx")
         result = tdi.where(mask, per)
         tm.assert_index_equal(result, expected)
 
-        ts = pd.Timestamp.now()
+        ts = Timestamp.now()
         expected = Index([ts, ts] + tail, dtype=object, name="idx")
         result = tdi.where(mask, ts)
         tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py
index 4e6d69913900d..2a5051b2982bb 100644
--- a/pandas/tests/indexes/timedeltas/test_ops.py
+++ b/pandas/tests/indexes/timedeltas/test_ops.py
@@ -1,7 +1,6 @@
 import numpy as np
 import pytest
 
-import pandas as pd
 from pandas import (
     Series,
     TimedeltaIndex,
@@ -17,50 +16,6 @@
 
 
 class TestTimedeltaIndexOps:
-    def test_value_counts_unique(self):
-        # GH 7735
-        idx = timedelta_range("1 days 09:00:00", freq="H", periods=10)
-        # create repeated values, 'n'th element is repeated by n+1 times
-        idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))
-
-        exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10)
-        exp_idx = exp_idx._with_freq(None)
-        expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
-
-        obj = idx
-        tm.assert_series_equal(obj.value_counts(), expected)
-
-        obj = Series(idx)
-        tm.assert_series_equal(obj.value_counts(), expected)
-
-        expected = timedelta_range("1 days 09:00:00", freq="H", periods=10)
-        tm.assert_index_equal(idx.unique(), expected)
-
-        idx = TimedeltaIndex(
-            [
-                "1 days 09:00:00",
-                "1 days 09:00:00",
-                "1 days 09:00:00",
-                "1 days 08:00:00",
-                "1 days 08:00:00",
-                pd.NaT,
-            ]
-        )
-
-        exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"])
-        expected = Series([3, 2], index=exp_idx)
-
-        for obj in [idx, Series(idx)]:
-            tm.assert_series_equal(obj.value_counts(), expected)
-
-        exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00", pd.NaT])
-        expected = Series([3, 2, 1], index=exp_idx)
-
-        for obj in [idx, Series(idx)]:
-            tm.assert_series_equal(obj.value_counts(dropna=False), expected)
-
-        tm.assert_index_equal(idx.unique(), exp_idx)
-
     def test_nonunique_contains(self):
         # GH 9512
         for idx in map(
@@ -84,106 +39,6 @@ def test_unknown_attribute(self):
         with pytest.raises(AttributeError, match=msg):
             ts.foo
 
-    def test_order(self):
-        # GH 10295
-        idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"], freq="D", name="idx")
-        idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"], freq="H", name="idx")
-
-        for idx in [idx1, idx2]:
-            ordered = idx.sort_values()
-            tm.assert_index_equal(ordered, idx)
-            assert ordered.freq == idx.freq
-
-            ordered = idx.sort_values(ascending=False)
-            expected = idx[::-1]
-            tm.assert_index_equal(ordered, expected)
-            assert ordered.freq == expected.freq
-            assert ordered.freq.n == -1
-
-            ordered, indexer = idx.sort_values(return_indexer=True)
-            tm.assert_index_equal(ordered, idx)
-            tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
-            assert ordered.freq == idx.freq
-
-            ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
-            tm.assert_index_equal(ordered, idx[::-1])
-            assert ordered.freq == expected.freq
-            assert ordered.freq.n == -1
-
-        idx1 = TimedeltaIndex(
-            ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
-        )
-        exp1 = TimedeltaIndex(
-            ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
-        )
-
-        idx2 = TimedeltaIndex(
-            ["1 day", "3 day", "5 day", "2 day", "1 day"], name="idx2"
-        )
-
-        for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]:
-            ordered = idx.sort_values()
-            tm.assert_index_equal(ordered, expected)
-            assert ordered.freq is None
-
-            ordered = idx.sort_values(ascending=False)
-            tm.assert_index_equal(ordered, expected[::-1])
-            assert ordered.freq is None
-
-            ordered, indexer = idx.sort_values(return_indexer=True)
-            tm.assert_index_equal(ordered, expected)
-
-            exp = np.array([0, 4, 3, 1, 2])
-            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
-            assert ordered.freq is None
-
-            ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
-            tm.assert_index_equal(ordered, expected[::-1])
-
-            exp = np.array([2, 1, 3, 0, 4])
-            tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
-            assert ordered.freq is None
-
-    def test_drop_duplicates_metadata(self, freq_sample):
-        # GH 10115
-        idx = timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
-        result = idx.drop_duplicates()
-        tm.assert_index_equal(idx, result)
-        assert idx.freq == result.freq
-
-        idx_dup = idx.append(idx)
-        assert idx_dup.freq is None  # freq is reset
-        result = idx_dup.drop_duplicates()
-        expected = idx._with_freq(None)
-        tm.assert_index_equal(expected, result)
-        assert result.freq is None
-
-    @pytest.mark.parametrize(
-        "keep, expected, index",
-        [
-            ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
-            ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
-            (
-                False,
-                np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
-                np.arange(5, 10),
-            ),
-        ],
-    )
-    def test_drop_duplicates(self, freq_sample, keep, expected, index):
-        # to check Index/Series compat
-        idx = timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
-        idx = idx.append(idx[:5])
-
-        tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
-        expected = idx[~expected]
-
-        result = idx.drop_duplicates(keep=keep)
-        tm.assert_index_equal(result, expected)
-
-        result = Series(idx).drop_duplicates(keep=keep)
-        tm.assert_series_equal(result, Series(expected, index=index))
-
     def test_infer_freq(self, freq_sample):
         # GH#11018
         idx = timedelta_range("1", freq=freq_sample, periods=10)
@@ -191,47 +46,6 @@ def test_infer_freq(self, freq_sample):
         tm.assert_index_equal(idx, result)
         assert result.freq == freq_sample
 
-    def test_repeat(self):
-        index = timedelta_range("1 days", periods=2, freq="D")
-        exp = TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"])
-        for res in [index.repeat(2), np.repeat(index, 2)]:
-            tm.assert_index_equal(res, exp)
-            assert res.freq is None
-
-        index = TimedeltaIndex(["1 days", "NaT", "3 days"])
-        exp = TimedeltaIndex(
-            [
-                "1 days",
-                "1 days",
-                "1 days",
-                "NaT",
-                "NaT",
-                "NaT",
-                "3 days",
-                "3 days",
-                "3 days",
-            ]
-        )
-        for res in [index.repeat(3), np.repeat(index, 3)]:
-            tm.assert_index_equal(res, exp)
-            assert res.freq is None
-
-    def test_nat(self):
-        assert TimedeltaIndex._na_value is pd.NaT
-        assert TimedeltaIndex([])._na_value is pd.NaT
-
-        idx = TimedeltaIndex(["1 days", "2 days"])
-        assert idx._can_hold_na
-
-        tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
-        assert idx.hasnans is False
-
-        idx = TimedeltaIndex(["1 days", "NaT"])
-        assert idx._can_hold_na
-
-        tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
-        assert idx.hasnans is True
-
     @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []])
     @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)])
     def test_freq_setter(self, values, freq):
diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py
deleted file mode 100644
index cca211c1eb155..0000000000000
--- a/pandas/tests/indexes/timedeltas/test_partial_slicing.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import numpy as np
-
-from pandas import (
-    Series,
-    timedelta_range,
-)
-import pandas._testing as tm
-
-
-class TestSlicing:
-    def test_partial_slice(self):
-        rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
-        s = Series(np.arange(len(rng)), index=rng)
-
-        result = s["5 day":"6 day"]
-        expected = s.iloc[86:134]
-        tm.assert_series_equal(result, expected)
-
-        result = s["5 day":]
-        expected = s.iloc[86:]
-        tm.assert_series_equal(result, expected)
-
-        result = s[:"6 day"]
-        expected = s.iloc[:134]
-        tm.assert_series_equal(result, expected)
-
-    def test_partial_slice_high_reso(self):
-
-        # higher reso
-        rng = timedelta_range("1 day 10:11:12", freq="us", periods=2000)
-        s = Series(np.arange(len(rng)), index=rng)
-
-        result = s["1 day 10:11:12":]
-        expected = s.iloc[0:]
-        tm.assert_series_equal(result, expected)
-
-        result = s["1 day 10:11:12.001":]
-        expected = s.iloc[1000:]
-        tm.assert_series_equal(result, expected)
-
-        result = s["1 days, 10:11:12.001001"]
-        assert result == s.iloc[1001]
diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py
index d16a32247b917..d0f4828e8c7bd 100644
--- a/pandas/tests/indexes/timedeltas/test_timedelta.py
+++ b/pandas/tests/indexes/timedeltas/test_timedelta.py
@@ -64,31 +64,6 @@ def test_isin(self):
             index.isin([index[2], 5]), np.array([False, False, True, False])
         )
 
-    def test_sort_values(self):
-
-        idx = TimedeltaIndex(["4d", "1d", "2d"])
-
-        ordered = idx.sort_values()
-        assert ordered.is_monotonic
-
-        ordered = idx.sort_values(ascending=False)
-        assert ordered[::-1].is_monotonic
-
-        ordered, dexer = idx.sort_values(return_indexer=True)
-        assert ordered.is_monotonic
-
-        tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0]), check_dtype=False)
-
-        ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
-        assert ordered[::-1].is_monotonic
-
-        tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1]), check_dtype=False)
-
-    def test_argmin_argmax(self):
-        idx = TimedeltaIndex(["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"])
-        assert idx.argmin() == 1
-        assert idx.argmax() == 0
-
     def test_misc_coverage(self):
 
         rng = timedelta_range("1 day", periods=5)
diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
index 68ae1a0dd6f3d..f104587ebbded 100644
--- a/pandas/tests/indexing/test_categorical.py
+++ b/pandas/tests/indexing/test_categorical.py
@@ -37,20 +37,24 @@ def setup_method(self, method):
         )
 
     def test_loc_scalar(self):
+        dtype = CDT(list("cab"))
         result = self.df.loc["a"]
-        expected = DataFrame(
-            {"A": [0, 1, 5], "B": (Series(list("aaa")).astype(CDT(list("cab"))))}
-        ).set_index("B")
+        bidx = Series(list("aaa"), name="B").astype(dtype)
+        assert bidx.dtype == dtype
+
+        expected = DataFrame({"A": [0, 1, 5]}, index=Index(bidx))
         tm.assert_frame_equal(result, expected)
 
         df = self.df.copy()
         df.loc["a"] = 20
+        bidx2 = Series(list("aabbca"), name="B").astype(dtype)
+        assert bidx2.dtype == dtype
         expected = DataFrame(
             {
                 "A": [20, 20, 2, 3, 4, 20],
-                "B": (Series(list("aabbca")).astype(CDT(list("cab")))),
-            }
-        ).set_index("B")
+            },
+            index=Index(bidx2),
+        )
         tm.assert_frame_equal(df, expected)
 
         # value not in the categories
@@ -64,14 +68,38 @@ def test_loc_scalar(self):
         df2.loc["d"] = 10
         tm.assert_frame_equal(df2, expected)
 
-        msg = "'fill_value=d' is not present in this Categorical's categories"
-        with pytest.raises(TypeError, match=msg):
-            df.loc["d", "A"] = 10
-        with pytest.raises(TypeError, match=msg):
-            df.loc["d", "C"] = 10
+    def test_loc_setitem_with_expansion_non_category(self):
+        # Setting-with-expansion with a new key "d" that is not among caegories
+        df = self.df
+        df.loc["a"] = 20
+
+        # Setting a new row on an existing column
+        df3 = df.copy()
+        df3.loc["d", "A"] = 10
+        bidx3 = Index(list("aabbcad"), name="B")
+        expected3 = DataFrame(
+            {
+                "A": [20, 20, 2, 3, 4, 20, 10.0],
+            },
+            index=Index(bidx3),
+        )
+        tm.assert_frame_equal(df3, expected3)
+
+        # Settig a new row _and_ new column
+        df4 = df.copy()
+        df4.loc["d", "C"] = 10
+        expected3 = DataFrame(
+            {
+                "A": [20, 20, 2, 3, 4, 20, np.nan],
+                "C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 10],
+            },
+            index=Index(bidx3),
+        )
+        tm.assert_frame_equal(df4, expected3)
 
+    def test_loc_getitem_scalar_non_category(self):
         with pytest.raises(KeyError, match="^1$"):
-            df.loc[1]
+            self.df.loc[1]
 
     def test_slicing(self):
         cat = Series(Categorical([1, 2, 3, 4]))
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index 43ffc9e8eaedd..d0fdf81121c71 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -1090,6 +1090,20 @@ def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame):
             # GH#32257 we let numpy do validation, get their exception
             float_frame.iloc[:, :, :] = 1
 
+    def test_iloc_frame_indexer(self):
+        # GH#39004
+        df = DataFrame({"a": [1, 2, 3]})
+        indexer = DataFrame({"a": [True, False, True]})
+        with tm.assert_produces_warning(FutureWarning):
+            df.iloc[indexer] = 1
+
+        msg = (
+            "DataFrame indexer is not allowed for .iloc\n"
+            "Consider using .loc for automatic alignment."
+        )
+        with pytest.raises(IndexError, match=msg):
+            df.iloc[indexer]
+
 
 class TestILocSetItemDuplicateColumns:
     def test_iloc_setitem_scalar_duplicate_columns(self):
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 466e60e84b318..5b6c042a11332 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -23,7 +23,9 @@
     DatetimeIndex,
     Index,
     IndexSlice,
+    IntervalIndex,
     MultiIndex,
+    Period,
     Series,
     SparseDtype,
     Timedelta,
@@ -145,6 +147,43 @@ def test_setitem_from_duplicate_axis(self):
 
 class TestLoc2:
     # TODO: better name, just separating out things that rely on base class
+    @pytest.mark.parametrize(
+        "msg, key",
+        [
+            (r"Period\('2019', 'A-DEC'\), 'foo', 'bar'", (Period(2019), "foo", "bar")),
+            (r"Period\('2019', 'A-DEC'\), 'y1', 'bar'", (Period(2019), "y1", "bar")),
+            (r"Period\('2019', 'A-DEC'\), 'foo', 'z1'", (Period(2019), "foo", "z1")),
+            (
+                r"Period\('2018', 'A-DEC'\), Period\('2016', 'A-DEC'\), 'bar'",
+                (Period(2018), Period(2016), "bar"),
+            ),
+            (r"Period\('2018', 'A-DEC'\), 'foo', 'y1'", (Period(2018), "foo", "y1")),
+            (
+                r"Period\('2017', 'A-DEC'\), 'foo', Period\('2015', 'A-DEC'\)",
+                (Period(2017), "foo", Period(2015)),
+            ),
+            (r"Period\('2017', 'A-DEC'\), 'z1', 'bar'", (Period(2017), "z1", "bar")),
+        ],
+    )
+    def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key):
+        # GH#20684
+        """
+        parse_time_string return parameter if type not matched.
+        PeriodIndex.get_loc takes returned value from parse_time_string as a tuple.
+        If first argument is Period and a tuple has 3 items,
+        process go on not raise exception
+        """
+        df = DataFrame(
+            {
+                "A": [Period(2019), "x1", "x2"],
+                "B": [Period(2018), Period(2016), "y1"],
+                "C": [Period(2017), "z1", Period(2015)],
+                "V1": [1, 2, 3],
+                "V2": [10, 20, 30],
+            }
+        ).set_index(["A", "B", "C"])
+        with pytest.raises(KeyError, match=msg):
+            df.loc[key]
 
     def test_loc_getitem_missing_unicode_key(self):
         df = DataFrame({"a": [1]})
@@ -1619,6 +1658,55 @@ def test_loc_setitem_with_expansion_inf_upcast_empty(self):
         expected = pd.Float64Index([0, 1, np.inf])
         tm.assert_index_equal(result, expected)
 
+    @pytest.mark.filterwarnings("ignore:indexing past lexsort depth")
+    def test_loc_setitem_with_expansion_nonunique_index(self, index, request):
+        # GH#40096
+        if not len(index):
+            return
+        if isinstance(index, IntervalIndex):
+            mark = pytest.mark.xfail(reason="IntervalIndex raises")
+            request.node.add_marker(mark)
+
+        index = index.repeat(2)  # ensure non-unique
+        N = len(index)
+        arr = np.arange(N).astype(np.int64)
+
+        orig = DataFrame(arr, index=index, columns=[0])
+
+        # key that will requiring object-dtype casting in the index
+        key = "kapow"
+        assert key not in index  # otherwise test is invalid
+        # TODO: using a tuple key breaks here in many cases
+
+        exp_index = index.insert(len(index), key)
+        if isinstance(index, MultiIndex):
+            assert exp_index[-1][0] == key
+        else:
+            assert exp_index[-1] == key
+        exp_data = np.arange(N + 1).astype(np.float64)
+        expected = DataFrame(exp_data, index=exp_index, columns=[0])
+
+        # Add new row, but no new columns
+        df = orig.copy()
+        df.loc[key, 0] = N
+        tm.assert_frame_equal(df, expected)
+
+        # add new row on a Series
+        ser = orig.copy()[0]
+        ser.loc[key] = N
+        # the series machinery lets us preserve int dtype instead of float
+        expected = expected[0].astype(np.int64)
+        tm.assert_series_equal(ser, expected)
+
+        # add new row and new column
+        df = orig.copy()
+        df.loc[key, 1] = N
+        expected = DataFrame(
+            {0: list(arr) + [np.nan], 1: [np.nan] * N + [float(N)]},
+            index=exp_index,
+        )
+        tm.assert_frame_equal(df, expected)
+
 
 class TestLocCallable:
     def test_frame_loc_getitem_callable(self):
diff --git a/pandas/tests/io/data/xml/baby_names.xml b/pandas/tests/io/data/xml/baby_names.xml
new file mode 100644
index 0000000000000..b4797b79d7112
--- /dev/null
+++ b/pandas/tests/io/data/xml/baby_names.xml
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<data>
+  <row>
+    <rank>1</rank>
+    <malename>Jos�</malename>
+    <femalename>Sof�a</femalename>
+  </row>
+  <row>
+    <rank>2</rank>
+    <malename>Luis</malename>
+    <femalename>Valentina</femalename>
+  </row>
+  <row>
+    <rank>3</rank>
+    <malename>Carlos</malename>
+    <femalename>Isabella</femalename>
+  </row>
+  <row>
+    <rank>4</rank>
+    <malename>Juan</malename>
+    <femalename>Camila</femalename>
+  </row>
+  <row>
+    <rank>5</rank>
+    <malename>Jorge</malename>
+    <femalename>Valeria</femalename>
+  </row>
+  <row>
+    <rank>6</rank>
+    <malename>Pedro</malename>
+    <femalename>Mariana</femalename>
+  </row>
+  <row>
+    <rank>7</rank>
+    <malename>Jes�s</malename>
+    <femalename>Gabriela</femalename>
+  </row>
+  <row>
+    <rank>8</rank>
+    <malename>Manuel</malename>
+    <femalename>Sara</femalename>
+  </row>
+  <row>
+    <rank>9</rank>
+    <malename>Santiago</malename>
+    <femalename>Daniella</femalename>
+  </row>
+  <row>
+    <rank>10</rank>
+    <malename>Sebasti�n</malename>
+    <femalename>Mar�a Jos�</femalename>
+  </row>
+</data>
diff --git a/pandas/tests/io/data/xml/books.xml b/pandas/tests/io/data/xml/books.xml
new file mode 100644
index 0000000000000..666ce60e9a2be
--- /dev/null
+++ b/pandas/tests/io/data/xml/books.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<bookstore>
+  <book category="cooking">
+    <title lang="en">Everyday Italian</title>
+    <author>Giada De Laurentiis</author>
+    <year>2005</year>
+    <price>30.00</price>
+  </book>
+  <book category="children">
+    <title lang="en">Harry Potter</title>
+    <author>J K. Rowling</author>
+    <year>2005</year>
+    <price>29.99</price>
+  </book>
+  <book category="web">
+    <title lang="en">Learning XML</title>
+    <author>Erik T. Ray</author>
+    <year>2003</year>
+    <price>39.95</price>
+  </book>
+</bookstore>
diff --git a/pandas/tests/io/data/xml/cta_rail_lines.kml b/pandas/tests/io/data/xml/cta_rail_lines.kml
new file mode 100644
index 0000000000000..c031137ee7b20
--- /dev/null
+++ b/pandas/tests/io/data/xml/cta_rail_lines.kml
@@ -0,0 +1,92 @@
+<kml xmlns="http://www.opengis.net/kml/2.2"
+     xmlns:gx="http://www.google.com/kml/ext/2.2"
+     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+     xsi:schemaLocation="http://www.opengis.net/kml/2.2 http://schemas.opengis.net/kml/2.2.0/ogckml22.xsd http://www.google.com/kml/ext/2.2 http://code.google.com/apis/kml/schema/kml22gx.xsd">
+   <Document id="CTA_RailLines">
+      <name>CTA_RailLines</name>
+      <Snippet/>
+      <Folder id="FeatureLayer0">
+         <name>CTA_RailLines</name>
+         <Snippet/>
+         <Placemark id="ID_00001">
+            <name>Blue Line (Forest Park)</name>
+            <Snippet/>
+            <description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Blue Line (Forest Park)</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>1</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100001</td> </tr> <tr> <td>LINES</td> <td>Blue Line (Forest Park)</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Oak Park to Austin</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>BL</td> </tr> <tr> <td>ALT_LEGEND</td> <td>BL</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Blue Line Forest Park</td> </tr> <tr> <td>SHAPE.LEN</td> <td>4060.368778</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
+            <styleUrl>#LineStyle01</styleUrl>
+            <MultiGeometry>
+               <LineString>
+                  <extrude>0</extrude>
+                  <altitudeMode>clampedToGround</altitudeMode>
+                  <coordinates>-87.77678526964958,41.8708863930319,0 -87.77826234150609,41.87097820122218,0 -87.78251583439344,41.87130129991005,0 -87.78418294588424,41.87145055520308,0 -87.7872369165933,41.8717239119163,0 -87.79160214925886,41.87210797280065,0</coordinates>
+               </LineString>
+            </MultiGeometry>
+         </Placemark>
+         <Placemark id="ID_00002">
+            <name>Red, Purple Line</name>
+            <Snippet/>
+            <description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Red, Purple Line</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>2</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100002</td> </tr> <tr> <td>LINES</td> <td>Red, Purple Line</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Lawrence to Wilson</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>RD</td> </tr> <tr> <td>ALT_LEGEND</td> <td>RDPR</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Red Line North Side</td> </tr> <tr> <td>SHAPE.LEN</td> <td>1800.132896</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
+            <styleUrl>#LineStyle01</styleUrl>
+            <MultiGeometry>
+               <LineString>
+                  <extrude>0</extrude>
+                  <altitudeMode>clampedToGround</altitudeMode>
+                  <coordinates>-87.65758750947528,41.96427269188822,0 -87.65802133507393,41.96581929055245,0 -87.65819033925305,41.96621846093642,0 -87.6583189819129,41.96650362897086,0 -87.65835858701473,41.96669002089185,0 -87.65838428411853,41.96688150295095,0 -87.65842208882658,41.96745896091846,0 -87.65846556843937,41.9683761425439,0 -87.65849296214573,41.96913893870342,0</coordinates>
+               </LineString>
+            </MultiGeometry>
+         </Placemark>
+         <Placemark id="ID_00003">
+            <name>Red, Purple Line</name>
+            <Snippet/>
+            <description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Red, Purple Line</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>3</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100003</td> </tr> <tr> <td>LINES</td> <td>Red, Purple Line</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Wilson to Sheridan</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>RD</td> </tr> <tr> <td>ALT_LEGEND</td> <td>RDPR</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Red Line North Side</td> </tr> <tr> <td>SHAPE.LEN</td> <td>4256.243677</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
+            <styleUrl>#LineStyle01</styleUrl>
+            <MultiGeometry>
+               <LineString>
+                  <extrude>0</extrude>
+                  <altitudeMode>clampedToGround</altitudeMode>
+                  <coordinates>-87.65492939166126,41.95377494531437,0 -87.65557043199591,41.95376544118533,0 -87.65606302030132,41.95376391658746,0 -87.65623502146268,41.95377379126367,0 -87.65634748981634,41.95380103566435,0 -87.65646537904269,41.95387703994676,0 -87.65656532461145,41.95396622645799,0 -87.65664760856414,41.95404201996044,0 -87.65671750555913,41.95416647054043,0 -87.65673983607117,41.95429949810849,0 -87.65673866475777,41.95441024240925,0 -87.6567690255541,41.95490657227902,0 -87.65683672482363,41.95692259283837,0 -87.6568900886376,41.95861070983142,0 -87.65699865558875,41.96181418669004,0 -87.65756347177603,41.96397045777844,0 -87.65758750947528,41.96427269188822,0</coordinates>
+               </LineString>
+            </MultiGeometry>
+         </Placemark>
+         <Placemark id="ID_00004">
+            <name>Red, Purple Line</name>
+            <Snippet/>
+            <description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Red, Purple Line</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>4</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100004</td> </tr> <tr> <td>LINES</td> <td>Red, Purple Line</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Sheridan to Addison</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>RD</td> </tr> <tr> <td>ALT_LEGEND</td> <td>RDPR</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Red Line North Side</td> </tr> <tr> <td>SHAPE.LEN</td> <td>2581.713736</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
+            <styleUrl>#LineStyle01</styleUrl>
+            <MultiGeometry>
+               <LineString>
+                  <extrude>0</extrude>
+                  <altitudeMode>clampedToGround</altitudeMode>
+                  <coordinates>-87.65362593118043,41.94742799535678,0 -87.65363554415794,41.94819886386848,0 -87.6536456393239,41.95059994675451,0 -87.65365831235026,41.95108288489359,0 -87.6536604873874,41.9519954657554,0 -87.65362592053201,41.95245597302328,0 -87.65367158496069,41.95311153649393,0 -87.65368468595476,41.9533202828916,0 -87.65369271253692,41.95343095587119,0 -87.65373335834569,41.95351536301472,0 -87.65378605844126,41.95358212680591,0 -87.65385067928185,41.95364452823767,0 -87.6539390793817,41.95370263886964,0 -87.6540786298351,41.95373403675265,0 -87.65430648647626,41.9537535411832,0 -87.65492939166126,41.95377494531437,0</coordinates>
+               </LineString>
+            </MultiGeometry>
+         </Placemark>
+         <Placemark id="ID_00005">
+            <name>Red, Purple Line</name>
+            <Snippet/>
+            <description><![CDATA[<html xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:msxsl="urn:schemas-microsoft-com:xslt"> <head> <META http-equiv="Content-Type" content="text/html"> <meta http-equiv="content-type" content="text/html; charset=UTF-8"> </head> <body style="margin:0px 0px 0px 0px;overflow:auto;background:#FFFFFF;"> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-collapse:collapse;padding:3px 3px 3px 3px"> <tr style="text-align:center;font-weight:bold;background:#9CBCE2"> <td>Red, Purple Line</td> </tr> <tr> <td> <table style="font-family:Arial,Verdana,Times;font-size:12px;text-align:left;width:100%;border-spacing:0px; padding:3px 3px 3px 3px"> <tr> <td>OBJECTID_1</td> <td>5</td> </tr> <tr bgcolor="#D4E4F3"> <td>ASSET_ID</td> <td>21100005</td> </tr> <tr> <td>LINES</td> <td>Red, Purple Line</td> </tr> <tr bgcolor="#D4E4F3"> <td>DESCRIPTIO</td> <td>Addison to Clark Junction</td> </tr> <tr> <td>TYPE</td> <td>Elevated or at Grade</td> </tr> <tr bgcolor="#D4E4F3"> <td>LEGEND</td> <td>RD</td> </tr> <tr> <td>ALT_LEGEND</td> <td>RDPR</td> </tr> <tr bgcolor="#D4E4F3"> <td>BRANCH</td> <td>Red Line North Side</td> </tr> <tr> <td>SHAPE.LEN</td> <td>1918.716686</td> </tr> </table> </td> </tr> </table> </body> </html>]]></description>
+            <styleUrl>#LineStyle01</styleUrl>
+            <MultiGeometry>
+               <LineString>
+                  <extrude>0</extrude>
+                  <altitudeMode>clampedToGround</altitudeMode>
+                  <coordinates>-87.65345391792157,41.94217681262115,0 -87.65342448305786,41.94237224420864,0 -87.65339745703922,41.94268217746244,0 -87.65337753982941,41.94288140770284,0 -87.65336256753105,41.94317369618263,0 -87.65338799707138,41.94357253961736,0 -87.65340240886648,41.94389158188269,0 -87.65341837392448,41.94406444407721,0 -87.65342275247338,41.94421065714904,0 -87.65347469646018,41.94434829382345,0 -87.65351486483024,41.94447699917548,0 -87.65353483605053,41.9453896864472,0 -87.65361975532807,41.94689193720703,0 -87.65362593118043,41.94742799535678,0</coordinates>
+               </LineString>
+            </MultiGeometry>
+         </Placemark>
+      </Folder>
+      <Style id="LineStyle01">
+         <LabelStyle>
+            <color>00000000</color>
+            <scale>0.000000</scale>
+         </LabelStyle>
+         <LineStyle>
+            <color>ff899e00</color>
+            <width>1.000000</width>
+         </LineStyle>
+         <PolyStyle>
+            <color>00000000</color>
+            <outline>0</outline>
+         </PolyStyle>
+      </Style>
+   </Document>
+</kml>
diff --git a/pandas/tests/io/data/xml/flatten_doc.xsl b/pandas/tests/io/data/xml/flatten_doc.xsl
new file mode 100644
index 0000000000000..a9d62d180beaf
--- /dev/null
+++ b/pandas/tests/io/data/xml/flatten_doc.xsl
@@ -0,0 +1,18 @@
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+                              xmlns:k="http://www.opengis.net/kml/2.2">
+    <xsl:output method="xml" omit-xml-declaration="yes"
+                cdata-section-elements="k:description" indent="yes"/>
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="node()|@*">
+     <xsl:copy>
+       <xsl:apply-templates select="node()|@*"/>
+     </xsl:copy>
+    </xsl:template>
+
+    <xsl:template match="k:MultiGeometry|k:LineString">
+        <xsl:apply-templates select='*'/>
+    </xsl:template>
+
+    <xsl:template match="k:description|k:Snippet|k:Style"/>
+</xsl:stylesheet>
diff --git a/pandas/tests/io/data/xml/row_field_output.xsl b/pandas/tests/io/data/xml/row_field_output.xsl
new file mode 100644
index 0000000000000..5a0f0e655a78e
--- /dev/null
+++ b/pandas/tests/io/data/xml/row_field_output.xsl
@@ -0,0 +1,19 @@
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+	<xsl:output method="xml" encoding="utf-8" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+	<xsl:template match="@*|node()">
+		<xsl:copy>
+		    <xsl:apply-templates select="@*|node()"/>
+		</xsl:copy>
+	</xsl:template>
+
+	<xsl:template match="row/*">
+	    <field>
+    	    <xsl:attribute name="field">
+    	        <xsl:value-of select="name()"/>
+    	    </xsl:attribute>
+    	    <xsl:value-of select="text()"/>
+	    </field>
+	</xsl:template>
+</xsl:stylesheet>
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
index c650f59a7da95..d8448736c7cc8 100644
--- a/pandas/tests/io/excel/test_writers.py
+++ b/pandas/tests/io/excel/test_writers.py
@@ -275,7 +275,7 @@ def test_read_excel_parse_dates(self, ext):
 
     def test_multiindex_interval_datetimes(self, ext):
         # GH 30986
-        midx = pd.MultiIndex.from_arrays(
+        midx = MultiIndex.from_arrays(
             [
                 range(4),
                 pd.interval_range(
@@ -289,7 +289,7 @@ def test_multiindex_interval_datetimes(self, ext):
             result = pd.read_excel(pth, index_col=[0, 1])
         expected = DataFrame(
             range(4),
-            pd.MultiIndex.from_arrays(
+            MultiIndex.from_arrays(
                 [
                     range(4),
                     [
diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py
index 01ed234f6e248..f0d1090899043 100644
--- a/pandas/tests/io/formats/style/test_style.py
+++ b/pandas/tests/io/formats/style/test_style.py
@@ -626,6 +626,19 @@ def test_table_styles(self):
         result = " ".join(styler.render().split())
         assert "th { foo: bar; }" in result
 
+    def test_table_styles_multiple(self):
+        ctx = self.df.style.set_table_styles(
+            [
+                {"selector": "th,td", "props": "color:red;"},
+                {"selector": "tr", "props": "color:green;"},
+            ]
+        )._translate()["table_styles"]
+        assert ctx == [
+            {"selector": "th", "props": [("color", "red")]},
+            {"selector": "td", "props": [("color", "red")]},
+            {"selector": "tr", "props": [("color", "green")]},
+        ]
+
     def test_maybe_convert_css_to_tuples(self):
         expected = [("a", "b"), ("c", "d e")]
         assert _maybe_convert_css_to_tuples("a:b;c:d e;") == expected
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index 41efb594fd8e4..06e0eadb84c59 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -249,7 +249,7 @@ def test_repr_deprecation_negative_int(self):
 
     def test_repr_chop_threshold(self):
         df = DataFrame([[0.1, 0.5], [0.5, -0.1]])
-        pd.reset_option("display.chop_threshold")  # default None
+        reset_option("display.chop_threshold")  # default None
         assert repr(df) == "     0    1\n0  0.1  0.5\n1  0.5 -0.1"
 
         with option_context("display.chop_threshold", 0.2):
@@ -382,7 +382,7 @@ def test_repr_truncates_terminal_size(self, monkeypatch):
         )
 
         index = range(5)
-        columns = pd.MultiIndex.from_tuples(
+        columns = MultiIndex.from_tuples(
             [
                 ("This is a long title with > 37 chars.", "cat"),
                 ("This is a loooooonger title with > 43 chars.", "dog"),
@@ -689,7 +689,7 @@ def test_east_asian_unicode_false(self):
         assert repr(df) == expected
 
         # MultiIndex
-        idx = pd.MultiIndex.from_tuples(
+        idx = MultiIndex.from_tuples(
             [("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")]
         )
         df = DataFrame(
@@ -833,7 +833,7 @@ def test_east_asian_unicode_true(self):
             assert repr(df) == expected
 
             # MultiIndex
-            idx = pd.MultiIndex.from_tuples(
+            idx = MultiIndex.from_tuples(
                 [("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")]
             )
             df = DataFrame(
@@ -1002,14 +1002,14 @@ def test_truncate_with_different_dtypes(self):
             + [datetime.datetime(2012, 1, 3)] * 10
         )
 
-        with pd.option_context("display.max_rows", 8):
+        with option_context("display.max_rows", 8):
             result = str(s)
             assert "object" in result
 
         # 12045
         df = DataFrame({"text": ["some words"] + [None] * 9})
 
-        with pd.option_context("display.max_rows", 8, "display.max_columns", 3):
+        with option_context("display.max_rows", 8, "display.max_columns", 3):
             result = str(df)
             assert "None" in result
             assert "NaN" not in result
@@ -1026,9 +1026,7 @@ def test_truncate_with_different_dtypes_multiindex(self):
     def test_datetimelike_frame(self):
 
         # GH 12211
-        df = DataFrame(
-            {"date": [Timestamp("20130101").tz_localize("UTC")] + [pd.NaT] * 5}
-        )
+        df = DataFrame({"date": [Timestamp("20130101").tz_localize("UTC")] + [NaT] * 5})
 
         with option_context("display.max_rows", 5):
             result = str(df)
@@ -1037,7 +1035,7 @@ def test_datetimelike_frame(self):
             assert "..." in result
             assert "[6 rows x 1 columns]" in result
 
-        dts = [Timestamp("2011-01-01", tz="US/Eastern")] * 5 + [pd.NaT] * 5
+        dts = [Timestamp("2011-01-01", tz="US/Eastern")] * 5 + [NaT] * 5
         df = DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})
         with option_context("display.max_rows", 5):
             expected = (
@@ -1051,7 +1049,7 @@ def test_datetimelike_frame(self):
             )
             assert repr(df) == expected
 
-        dts = [pd.NaT] * 5 + [Timestamp("2011-01-01", tz="US/Eastern")] * 5
+        dts = [NaT] * 5 + [Timestamp("2011-01-01", tz="US/Eastern")] * 5
         df = DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})
         with option_context("display.max_rows", 5):
             expected = (
@@ -1117,7 +1115,7 @@ def test_unicode_problem_decoding_as_ascii(self):
 
     def test_string_repr_encoding(self, datapath):
         filepath = datapath("io", "parser", "data", "unicode_series.csv")
-        df = pd.read_csv(filepath, header=None, encoding="latin1")
+        df = read_csv(filepath, header=None, encoding="latin1")
         repr(df)
         repr(df[1])
 
@@ -1548,7 +1546,7 @@ def test_to_string_float_index(self):
 
     def test_to_string_complex_float_formatting(self):
         # GH #25514, 25745
-        with pd.option_context("display.precision", 5):
+        with option_context("display.precision", 5):
             df = DataFrame(
                 {
                     "x": [
@@ -1785,7 +1783,7 @@ def test_repr_html_mathjax(self):
         df = DataFrame([[1, 2], [3, 4]])
         assert "tex2jax_ignore" not in df._repr_html_()
 
-        with pd.option_context("display.html.use_mathjax", False):
+        with option_context("display.html.use_mathjax", False):
             assert "tex2jax_ignore" in df._repr_html_()
 
     def test_repr_html_wide(self):
@@ -2229,7 +2227,7 @@ def test_east_asian_unicode_series(self):
         assert repr(s) == expected
 
         # MultiIndex
-        idx = pd.MultiIndex.from_tuples(
+        idx = MultiIndex.from_tuples(
             [("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")]
         )
         s = Series([1, 22, 3333, 44444], index=idx)
@@ -2324,7 +2322,7 @@ def test_east_asian_unicode_series(self):
             assert repr(s) == expected
 
             # MultiIndex
-            idx = pd.MultiIndex.from_tuples(
+            idx = MultiIndex.from_tuples(
                 [("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")]
             )
             s = Series([1, 22, 3333, 44444], index=idx)
@@ -2853,7 +2851,7 @@ def test_output_display_precision_trailing_zeroes(self):
         # Issue #20359: trimming zeros while there is no decimal point
 
         # Happens when display precision is set to zero
-        with pd.option_context("display.precision", 0):
+        with option_context("display.precision", 0):
             s = Series([840.0, 4200.0])
             expected_output = "0     840\n1    4200\ndtype: float64"
             assert str(s) == expected_output
@@ -2862,7 +2860,7 @@ def test_output_significant_digits(self):
         # Issue #9764
 
         # In case default display precision changes:
-        with pd.option_context("display.precision", 6):
+        with option_context("display.precision", 6):
             # DataFrame example from issue #9764
             d = DataFrame(
                 {
@@ -2933,7 +2931,7 @@ def test_output_significant_digits(self):
 
     def test_too_long(self):
         # GH 10451
-        with pd.option_context("display.precision", 4):
+        with option_context("display.precision", 4):
             # need both a number > 1e6 and something that normally formats to
             # having length > display.precision + 6
             df = DataFrame({"x": [12345.6789]})
@@ -3011,7 +3009,7 @@ def test_all(self):
 
 class TestTimedelta64Formatter:
     def test_days(self):
-        x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit="D")
+        x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")
         result = fmt.Timedelta64Formatter(x, box=True).get_result()
         assert result[0].strip() == "'0 days'"
         assert result[1].strip() == "'1 days'"
@@ -3027,25 +3025,25 @@ def test_days(self):
         assert result[0].strip() == "1 days"
 
     def test_days_neg(self):
-        x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit="D")
+        x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")
         result = fmt.Timedelta64Formatter(-x, box=True).get_result()
         assert result[0].strip() == "'0 days'"
         assert result[1].strip() == "'-1 days'"
 
     def test_subdays(self):
-        y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit="s")
+        y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")
         result = fmt.Timedelta64Formatter(y, box=True).get_result()
         assert result[0].strip() == "'0 days 00:00:00'"
         assert result[1].strip() == "'0 days 00:00:01'"
 
     def test_subdays_neg(self):
-        y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit="s")
+        y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")
         result = fmt.Timedelta64Formatter(-y, box=True).get_result()
         assert result[0].strip() == "'0 days 00:00:00'"
         assert result[1].strip() == "'-1 days +23:59:59'"
 
     def test_zero(self):
-        x = pd.to_timedelta(list(range(1)) + [pd.NaT], unit="D")
+        x = pd.to_timedelta(list(range(1)) + [NaT], unit="D")
         result = fmt.Timedelta64Formatter(x, box=True).get_result()
         assert result[0].strip() == "'0 days'"
 
@@ -3056,13 +3054,13 @@ def test_zero(self):
 
 class TestDatetime64Formatter:
     def test_mixed(self):
-        x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), pd.NaT])
+        x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), NaT])
         result = fmt.Datetime64Formatter(x).get_result()
         assert result[0].strip() == "2013-01-01 00:00:00"
         assert result[1].strip() == "2013-01-01 12:00:00"
 
     def test_dates(self):
-        x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT])
+        x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), NaT])
         result = fmt.Datetime64Formatter(x).get_result()
         assert result[0].strip() == "2013-01-01"
         assert result[1].strip() == "2013-01-02"
@@ -3137,20 +3135,20 @@ def format_func(x):
 
 class TestNaTFormatting:
     def test_repr(self):
-        assert repr(pd.NaT) == "NaT"
+        assert repr(NaT) == "NaT"
 
     def test_str(self):
-        assert str(pd.NaT) == "NaT"
+        assert str(NaT) == "NaT"
 
 
 class TestDatetimeIndexFormat:
     def test_datetime(self):
-        formatted = pd.to_datetime([datetime(2003, 1, 1, 12), pd.NaT]).format()
+        formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format()
         assert formatted[0] == "2003-01-01 12:00:00"
         assert formatted[1] == "NaT"
 
     def test_date(self):
-        formatted = pd.to_datetime([datetime(2003, 1, 1), pd.NaT]).format()
+        formatted = pd.to_datetime([datetime(2003, 1, 1), NaT]).format()
         assert formatted[0] == "2003-01-01"
         assert formatted[1] == "NaT"
 
@@ -3158,11 +3156,11 @@ def test_date_tz(self):
         formatted = pd.to_datetime([datetime(2013, 1, 1)], utc=True).format()
         assert formatted[0] == "2013-01-01 00:00:00+00:00"
 
-        formatted = pd.to_datetime([datetime(2013, 1, 1), pd.NaT], utc=True).format()
+        formatted = pd.to_datetime([datetime(2013, 1, 1), NaT], utc=True).format()
         assert formatted[0] == "2013-01-01 00:00:00+00:00"
 
     def test_date_explicit_date_format(self):
-        formatted = pd.to_datetime([datetime(2003, 2, 1), pd.NaT]).format(
+        formatted = pd.to_datetime([datetime(2003, 2, 1), NaT]).format(
             date_format="%m-%d-%Y", na_rep="UT"
         )
         assert formatted[0] == "02-01-2003"
@@ -3226,7 +3224,7 @@ def test_tz_dateutil(self):
 
     def test_nat_representations(self):
         for f in (str, repr, methodcaller("isoformat")):
-            assert f(pd.NaT) == "NaT"
+            assert f(NaT) == "NaT"
 
 
 def test_format_percentiles():
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 8c634509bdc84..5e599818308b8 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -326,7 +326,7 @@ def test_to_csv_multi_index(self):
             ),
         ],
     )
-    @pytest.mark.parametrize("klass", [pd.DataFrame, pd.Series])
+    @pytest.mark.parametrize("klass", [DataFrame, pd.Series])
     def test_to_csv_single_level_multi_index(self, ind, expected, klass):
         # see gh-19589
         result = klass(pd.Series([1], ind, name="data")).to_csv(
diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py
index 347e1fda3c79d..1c89c4e392a7f 100644
--- a/pandas/tests/io/formats/test_to_html.py
+++ b/pandas/tests/io/formats/test_to_html.py
@@ -763,7 +763,7 @@ def test_to_html_render_links(render_links, expected, datapath):
 def test_ignore_display_max_colwidth(method, expected, max_colwidth):
     # see gh-17004
     df = DataFrame([lorem_ipsum])
-    with pd.option_context("display.max_colwidth", max_colwidth):
+    with option_context("display.max_colwidth", max_colwidth):
         result = getattr(df, method)()
     expected = expected(max_colwidth)
     assert expected in result
@@ -782,7 +782,7 @@ def test_to_html_invalid_classes_type(classes):
 def test_to_html_round_column_headers():
     # GH 17280
     df = DataFrame([1], columns=[0.55555])
-    with pd.option_context("display.precision", 3):
+    with option_context("display.precision", 3):
         html = df.to_html(notebook=False)
         notebook = df.to_html(notebook=True)
     assert "0.55555" in html
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 89248447c98d3..9a793e274ce48 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -149,7 +149,7 @@ def test_frame_default_orient(self, float_frame):
     @pytest.mark.parametrize("numpy", [True, False])
     def test_roundtrip_simple(self, orient, convert_axes, numpy, dtype, float_frame):
         data = float_frame.to_json(orient=orient)
-        result = pd.read_json(
+        result = read_json(
             data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype
         )
 
@@ -162,7 +162,7 @@ def test_roundtrip_simple(self, orient, convert_axes, numpy, dtype, float_frame)
     @pytest.mark.parametrize("numpy", [True, False])
     def test_roundtrip_intframe(self, orient, convert_axes, numpy, dtype, int_frame):
         data = int_frame.to_json(orient=orient)
-        result = pd.read_json(
+        result = read_json(
             data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype
         )
         expected = int_frame
@@ -195,7 +195,7 @@ def test_roundtrip_str_axes(self, request, orient, convert_axes, numpy, dtype):
             )
 
         data = df.to_json(orient=orient)
-        result = pd.read_json(
+        result = read_json(
             data, orient=orient, convert_axes=convert_axes, numpy=numpy, dtype=dtype
         )
 
@@ -235,9 +235,7 @@ def test_roundtrip_categorical(self, request, orient, convert_axes, numpy):
                 pytest.mark.xfail(reason=f"Orient {orient} is broken with numpy=True")
             )
 
-        result = pd.read_json(
-            data, orient=orient, convert_axes=convert_axes, numpy=numpy
-        )
+        result = read_json(data, orient=orient, convert_axes=convert_axes, numpy=numpy)
 
         expected = self.categorical.copy()
         expected.index = expected.index.astype(str)  # Categorical not preserved
@@ -252,9 +250,7 @@ def test_roundtrip_categorical(self, request, orient, convert_axes, numpy):
     @pytest.mark.parametrize("numpy", [True, False])
     def test_roundtrip_empty(self, orient, convert_axes, numpy, empty_frame):
         data = empty_frame.to_json(orient=orient)
-        result = pd.read_json(
-            data, orient=orient, convert_axes=convert_axes, numpy=numpy
-        )
+        result = read_json(data, orient=orient, convert_axes=convert_axes, numpy=numpy)
         expected = empty_frame.copy()
 
         # TODO: both conditions below are probably bugs
@@ -271,9 +267,7 @@ def test_roundtrip_empty(self, orient, convert_axes, numpy, empty_frame):
     def test_roundtrip_timestamp(self, orient, convert_axes, numpy, datetime_frame):
         # TODO: improve coverage with date_format parameter
         data = datetime_frame.to_json(orient=orient)
-        result = pd.read_json(
-            data, orient=orient, convert_axes=convert_axes, numpy=numpy
-        )
+        result = read_json(data, orient=orient, convert_axes=convert_axes, numpy=numpy)
         expected = datetime_frame.copy()
 
         if not convert_axes:  # one off for ts handling
@@ -305,9 +299,7 @@ def test_roundtrip_mixed(self, request, orient, convert_axes, numpy):
         df = DataFrame(data=values, index=index)
 
         data = df.to_json(orient=orient)
-        result = pd.read_json(
-            data, orient=orient, convert_axes=convert_axes, numpy=numpy
-        )
+        result = read_json(data, orient=orient, convert_axes=convert_axes, numpy=numpy)
 
         expected = df.copy()
         expected = expected.assign(**expected.select_dtypes("number").astype(np.int64))
@@ -487,12 +479,12 @@ def test_v12_compat(self, datapath):
 
         dirpath = datapath("io", "json", "data")
         v12_json = os.path.join(dirpath, "tsframe_v012.json")
-        df_unser = pd.read_json(v12_json)
+        df_unser = read_json(v12_json)
         tm.assert_frame_equal(df, df_unser)
 
         df_iso = df.drop(["modified"], axis=1)
         v12_iso_json = os.path.join(dirpath, "tsframe_iso_v012.json")
-        df_unser_iso = pd.read_json(v12_iso_json)
+        df_unser_iso = read_json(v12_iso_json)
         tm.assert_frame_equal(df_iso, df_unser_iso)
 
     def test_blocks_compat_GH9037(self):
@@ -581,7 +573,7 @@ def test_blocks_compat_GH9037(self):
         # JSON deserialisation always creates unicode strings
         df_mixed.columns = df_mixed.columns.astype("unicode")
 
-        df_roundtrip = pd.read_json(df_mixed.to_json(orient="split"), orient="split")
+        df_roundtrip = read_json(df_mixed.to_json(orient="split"), orient="split")
         tm.assert_frame_equal(
             df_mixed,
             df_roundtrip,
@@ -654,7 +646,7 @@ def test_series_default_orient(self, string_series):
     @pytest.mark.parametrize("numpy", [True, False])
     def test_series_roundtrip_simple(self, orient, numpy, string_series):
         data = string_series.to_json(orient=orient)
-        result = pd.read_json(data, typ="series", orient=orient, numpy=numpy)
+        result = read_json(data, typ="series", orient=orient, numpy=numpy)
 
         expected = string_series
         if orient in ("values", "records"):
@@ -668,9 +660,7 @@ def test_series_roundtrip_simple(self, orient, numpy, string_series):
     @pytest.mark.parametrize("numpy", [True, False])
     def test_series_roundtrip_object(self, orient, numpy, dtype, object_series):
         data = object_series.to_json(orient=orient)
-        result = pd.read_json(
-            data, typ="series", orient=orient, numpy=numpy, dtype=dtype
-        )
+        result = read_json(data, typ="series", orient=orient, numpy=numpy, dtype=dtype)
 
         expected = object_series
         if orient in ("values", "records"):
@@ -683,7 +673,7 @@ def test_series_roundtrip_object(self, orient, numpy, dtype, object_series):
     @pytest.mark.parametrize("numpy", [True, False])
     def test_series_roundtrip_empty(self, orient, numpy, empty_series):
         data = empty_series.to_json(orient=orient)
-        result = pd.read_json(data, typ="series", orient=orient, numpy=numpy)
+        result = read_json(data, typ="series", orient=orient, numpy=numpy)
 
         expected = empty_series
         if orient in ("values", "records"):
@@ -696,7 +686,7 @@ def test_series_roundtrip_empty(self, orient, numpy, empty_series):
     @pytest.mark.parametrize("numpy", [True, False])
     def test_series_roundtrip_timeseries(self, orient, numpy, datetime_series):
         data = datetime_series.to_json(orient=orient)
-        result = pd.read_json(data, typ="series", orient=orient, numpy=numpy)
+        result = read_json(data, typ="series", orient=orient, numpy=numpy)
 
         expected = datetime_series
         if orient in ("values", "records"):
@@ -711,7 +701,7 @@ def test_series_roundtrip_timeseries(self, orient, numpy, datetime_series):
     def test_series_roundtrip_numeric(self, orient, numpy, dtype):
         s = Series(range(6), index=["a", "b", "c", "d", "e", "f"])
         data = s.to_json(orient=orient)
-        result = pd.read_json(data, typ="series", orient=orient, numpy=numpy)
+        result = read_json(data, typ="series", orient=orient, numpy=numpy)
 
         expected = s.copy()
         if orient in ("values", "records"):
@@ -747,7 +737,7 @@ def test_series_with_dtype(self):
     def test_series_with_dtype_datetime(self, dtype, expected):
         s = Series(["2000-01-01"], dtype="datetime64[ns]")
         data = s.to_json()
-        result = pd.read_json(data, typ="series", dtype=dtype)
+        result = read_json(data, typ="series", dtype=dtype)
         tm.assert_series_equal(result, expected)
 
     def test_frame_from_json_precise_float(self):
@@ -1001,7 +991,7 @@ def test_round_trip_exception_(self):
         csv = "https://raw.github.com/hayd/lahman2012/master/csvs/Teams.csv"
         df = pd.read_csv(csv)
         s = df.to_json()
-        result = pd.read_json(s)
+        result = read_json(s)
         tm.assert_frame_equal(result.reindex(index=df.index, columns=df.columns), df)
 
     @tm.network
@@ -1025,17 +1015,17 @@ def test_timedelta(self):
         s = Series([timedelta(23), timedelta(seconds=5)])
         assert s.dtype == "timedelta64[ns]"
 
-        result = pd.read_json(s.to_json(), typ="series").apply(converter)
+        result = read_json(s.to_json(), typ="series").apply(converter)
         tm.assert_series_equal(result, s)
 
         s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1]))
         assert s.dtype == "timedelta64[ns]"
-        result = pd.read_json(s.to_json(), typ="series").apply(converter)
+        result = read_json(s.to_json(), typ="series").apply(converter)
         tm.assert_series_equal(result, s)
 
         frame = DataFrame([timedelta(23), timedelta(seconds=5)])
         assert frame[0].dtype == "timedelta64[ns]"
-        tm.assert_frame_equal(frame, pd.read_json(frame.to_json()).apply(converter))
+        tm.assert_frame_equal(frame, read_json(frame.to_json()).apply(converter))
 
         frame = DataFrame(
             {
@@ -1045,7 +1035,7 @@ def test_timedelta(self):
             }
         )
 
-        result = pd.read_json(frame.to_json(date_unit="ns"))
+        result = read_json(frame.to_json(date_unit="ns"))
         result["a"] = pd.to_timedelta(result.a, unit="ns")
         result["c"] = pd.to_datetime(result.c)
         tm.assert_frame_equal(frame, result)
@@ -1056,7 +1046,7 @@ def test_mixed_timedelta_datetime(self):
         expected = DataFrame(
             {"a": [pd.Timedelta(frame.a[0]).value, Timestamp(frame.a[1]).value]}
         )
-        result = pd.read_json(frame.to_json(date_unit="ns"), dtype={"a": "int64"})
+        result = read_json(frame.to_json(date_unit="ns"), dtype={"a": "int64"})
         tm.assert_frame_equal(result, expected, check_index_type=False)
 
     @pytest.mark.parametrize("as_object", [True, False])
@@ -1086,7 +1076,7 @@ def test_default_handler(self):
         value = object()
         frame = DataFrame({"a": [7, value]})
         expected = DataFrame({"a": [7, str(value)]})
-        result = pd.read_json(frame.to_json(default_handler=str))
+        result = read_json(frame.to_json(default_handler=str))
         tm.assert_frame_equal(expected, result, check_index_type=False)
 
     def test_default_handler_indirect(self):
@@ -1319,14 +1309,14 @@ def test_to_jsonl(self):
         result = df.to_json(orient="records", lines=True)
         expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}\n'
         assert result == expected
-        tm.assert_frame_equal(pd.read_json(result, lines=True), df)
+        tm.assert_frame_equal(read_json(result, lines=True), df)
 
         # GH15096: escaped characters in columns and data
         df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
         result = df.to_json(orient="records", lines=True)
         expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}\n'
         assert result == expected
-        tm.assert_frame_equal(pd.read_json(result, lines=True), df)
+        tm.assert_frame_equal(read_json(result, lines=True), df)
 
     # TODO: there is a near-identical test for pytables; can we share?
     def test_latin_encoding(self):
@@ -1382,14 +1372,14 @@ def test_from_json_to_json_table_index_and_columns(self, index, columns):
         # GH25433 GH25435
         expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns)
         dfjson = expected.to_json(orient="table")
-        result = pd.read_json(dfjson, orient="table")
+        result = read_json(dfjson, orient="table")
         tm.assert_frame_equal(result, expected)
 
     def test_from_json_to_json_table_dtypes(self):
         # GH21345
         expected = DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]})
         dfjson = expected.to_json(orient="table")
-        result = pd.read_json(dfjson, orient="table")
+        result = read_json(dfjson, orient="table")
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}])
@@ -1399,7 +1389,7 @@ def test_read_json_table_dtype_raises(self, dtype):
         dfjson = df.to_json(orient="table")
         msg = "cannot pass both dtype and orient='table'"
         with pytest.raises(ValueError, match=msg):
-            pd.read_json(dfjson, orient="table", dtype=dtype)
+            read_json(dfjson, orient="table", dtype=dtype)
 
     def test_read_json_table_convert_axes_raises(self):
         # GH25433 GH25435
@@ -1407,7 +1397,7 @@ def test_read_json_table_convert_axes_raises(self):
         dfjson = df.to_json(orient="table")
         msg = "cannot pass both convert_axes and orient='table'"
         with pytest.raises(ValueError, match=msg):
-            pd.read_json(dfjson, orient="table", convert_axes=True)
+            read_json(dfjson, orient="table", convert_axes=True)
 
     @pytest.mark.parametrize(
         "data, expected",
@@ -1681,7 +1671,7 @@ def test_json_negative_indent_raises(self):
     def test_emca_262_nan_inf_support(self):
         # GH 12213
         data = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]'
-        result = pd.read_json(data)
+        result = read_json(data)
         expected = DataFrame(
             ["a", np.nan, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"]
         )
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index a8cf94421dbde..711addb1ac237 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -93,7 +93,7 @@ def test_readjson_chunks(lines_json_df, chunksize):
 def test_readjson_chunksize_requires_lines(lines_json_df):
     msg = "chunksize can only be passed if lines=True"
     with pytest.raises(ValueError, match=msg):
-        with pd.read_json(StringIO(lines_json_df), lines=False, chunksize=2) as _:
+        with read_json(StringIO(lines_json_df), lines=False, chunksize=2) as _:
             pass
 
 
@@ -102,10 +102,10 @@ def test_readjson_chunks_series():
     s = pd.Series({"A": 1, "B": 2})
 
     strio = StringIO(s.to_json(lines=True, orient="records"))
-    unchunked = pd.read_json(strio, lines=True, typ="Series")
+    unchunked = read_json(strio, lines=True, typ="Series")
 
     strio = StringIO(s.to_json(lines=True, orient="records"))
-    with pd.read_json(strio, lines=True, typ="Series", chunksize=1) as reader:
+    with read_json(strio, lines=True, typ="Series", chunksize=1) as reader:
         chunked = pd.concat(reader)
 
     tm.assert_series_equal(chunked, unchunked)
@@ -114,7 +114,7 @@ def test_readjson_chunks_series():
 def test_readjson_each_chunk(lines_json_df):
     # Other tests check that the final result of read_json(chunksize=True)
     # is correct. This checks the intermediate chunks.
-    with pd.read_json(StringIO(lines_json_df), lines=True, chunksize=2) as reader:
+    with read_json(StringIO(lines_json_df), lines=True, chunksize=2) as reader:
         chunks = list(reader)
     assert chunks[0].shape == (2, 2)
     assert chunks[1].shape == (1, 2)
@@ -124,9 +124,9 @@ def test_readjson_chunks_from_file():
     with tm.ensure_clean("test.json") as path:
         df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
         df.to_json(path, lines=True, orient="records")
-        with pd.read_json(path, lines=True, chunksize=1) as reader:
+        with read_json(path, lines=True, chunksize=1) as reader:
             chunked = pd.concat(reader)
-        unchunked = pd.read_json(path, lines=True)
+        unchunked = read_json(path, lines=True)
         tm.assert_frame_equal(unchunked, chunked)
 
 
@@ -164,9 +164,7 @@ def test_readjson_invalid_chunksize(lines_json_df, chunksize):
     msg = r"'chunksize' must be an integer >=1"
 
     with pytest.raises(ValueError, match=msg):
-        with pd.read_json(
-            StringIO(lines_json_df), lines=True, chunksize=chunksize
-        ) as _:
+        with read_json(StringIO(lines_json_df), lines=True, chunksize=chunksize) as _:
             pass
 
 
@@ -189,7 +187,7 @@ def test_readjson_chunks_multiple_empty_lines(chunksize):
     {"A":3,"B":6}
     """
     orig = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
-    test = pd.read_json(j, lines=True, chunksize=chunksize)
+    test = read_json(j, lines=True, chunksize=chunksize)
     if chunksize is not None:
         with test:
             test = pd.concat(test)
@@ -215,7 +213,7 @@ def test_readjson_nrows(nrows):
         {"a": 3, "b": 4}
         {"a": 5, "b": 6}
         {"a": 7, "b": 8}"""
-    result = pd.read_json(jsonl, lines=True, nrows=nrows)
+    result = read_json(jsonl, lines=True, nrows=nrows)
     expected = DataFrame({"a": [1, 3, 5, 7], "b": [2, 4, 6, 8]}).iloc[:nrows]
     tm.assert_frame_equal(result, expected)
 
@@ -243,7 +241,7 @@ def test_readjson_nrows_requires_lines():
         {"a": 7, "b": 8}"""
     msg = "nrows can only be passed if lines=True"
     with pytest.raises(ValueError, match=msg):
-        pd.read_json(jsonl, lines=False, nrows=2)
+        read_json(jsonl, lines=False, nrows=2)
 
 
 def test_readjson_lines_chunks_fileurl(datapath):
@@ -256,7 +254,7 @@ def test_readjson_lines_chunks_fileurl(datapath):
     ]
     os_path = datapath("io", "json", "data", "line_delimited.json")
     file_url = Path(os_path).as_uri()
-    with pd.read_json(file_url, lines=True, chunksize=1) as url_reader:
+    with read_json(file_url, lines=True, chunksize=1) as url_reader:
         for index, chuck in enumerate(url_reader):
             tm.assert_frame_equal(chuck, df_list_expected[index])
 
@@ -285,5 +283,5 @@ def __iter__(self):
             return iter(self.stringio)
 
     reader = MyReader(jsonl)
-    assert len(list(pd.read_json(reader, lines=True, chunksize=100))) > 1
+    assert len(list(read_json(reader, lines=True, chunksize=100))) > 1
     assert reader.read_count > 10
diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py
index 8e1e9fb6e458f..4bc3f3c38f506 100644
--- a/pandas/tests/io/parser/common/test_chunksize.py
+++ b/pandas/tests/io/parser/common/test_chunksize.py
@@ -193,7 +193,7 @@ def test_warn_if_chunks_have_mismatched_type(all_parsers, request):
         # 2021-02-21 this occasionally fails on the CI with an unexpected
         #  ResourceWarning that we have been unable to track down,
         #  see GH#38630
-        if "ResourceError" not in str(err) or parser.engine != "python":
+        if "ResourceWarning" not in str(err) or parser.engine != "python":
             raise
 
         # Check the main assertion of the test before re-raising
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index 9f94f3f8f8a8b..72644693f652b 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -1526,7 +1526,7 @@ def test_parse_timezone(all_parsers):
 
     dti = DatetimeIndex(
         list(
-            pd.date_range(
+            date_range(
                 start="2018-01-04 09:01:00",
                 end="2018-01-04 09:05:00",
                 freq="1min",
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
index 5586b4915b6ea..9739a2a75886a 100644
--- a/pandas/tests/io/parser/test_read_fwf.py
+++ b/pandas/tests/io/parser/test_read_fwf.py
@@ -16,7 +16,6 @@
 
 from pandas.errors import EmptyDataError
 
-import pandas as pd
 from pandas import (
     DataFrame,
     DatetimeIndex,
@@ -687,7 +686,7 @@ def test_binary_mode():
     with tm.ensure_clean() as path:
         Path(path).write_text(data)
         with open(path, "rb") as file:
-            df = pd.read_fwf(file)
+            df = read_fwf(file)
             file.seek(0)
             tm.assert_frame_equal(df, df_reference)
 
@@ -701,7 +700,7 @@ def test_encoding_mmap(memory_map):
     """
     encoding = "iso8859_1"
     data = BytesIO(" 1 A Ä 2\n".encode(encoding))
-    df = pd.read_fwf(
+    df = read_fwf(
         data,
         header=None,
         widths=[2, 2, 2, 2],
diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py
index 3eebeee9788c6..8c324d73a7e54 100644
--- a/pandas/tests/io/pytables/test_append.py
+++ b/pandas/tests/io/pytables/test_append.py
@@ -415,12 +415,12 @@ def check_col(key, name, size):
             # just make sure there is a longer string:
             df2 = df.copy().reset_index().assign(C="longer").set_index("C")
             store.append("ss3", df2)
-            tm.assert_frame_equal(store.select("ss3"), pd.concat([df, df2]))
+            tm.assert_frame_equal(store.select("ss3"), concat([df, df2]))
 
             # same as above, with a Series
             store.put("ss4", df["B"], format="table", min_itemsize={"index": 6})
             store.append("ss4", df2["B"])
-            tm.assert_series_equal(store.select("ss4"), pd.concat([df["B"], df2["B"]]))
+            tm.assert_series_equal(store.select("ss4"), concat([df["B"], df2["B"]]))
 
             # with nans
             _maybe_remove(store, "df")
diff --git a/pandas/tests/io/pytables/test_errors.py b/pandas/tests/io/pytables/test_errors.py
index 11ee5e3564634..2ae330e5139be 100644
--- a/pandas/tests/io/pytables/test_errors.py
+++ b/pandas/tests/io/pytables/test_errors.py
@@ -6,7 +6,6 @@
 import numpy as np
 import pytest
 
-import pandas as pd
 from pandas import (
     CategoricalIndex,
     DataFrame,
@@ -207,7 +206,7 @@ def test_unsuppored_hdf_file_error(datapath):
     )
 
     with pytest.raises(ValueError, match=message):
-        pd.read_hdf(data_path)
+        read_hdf(data_path)
 
 
 def test_read_hdf_errors(setup_path):
diff --git a/pandas/tests/io/pytables/test_file_handling.py b/pandas/tests/io/pytables/test_file_handling.py
index 6340311b234f1..88e2b5f080282 100644
--- a/pandas/tests/io/pytables/test_file_handling.py
+++ b/pandas/tests/io/pytables/test_file_handling.py
@@ -5,7 +5,6 @@
 
 from pandas.compat import is_platform_little_endian
 
-import pandas as pd
 from pandas import (
     DataFrame,
     HDFStore,
@@ -188,7 +187,7 @@ def test_complibs_default_settings(setup_path):
     # default value
     with ensure_clean_path(setup_path) as tmpfile:
         df.to_hdf(tmpfile, "df", complevel=9)
-        result = pd.read_hdf(tmpfile, "df")
+        result = read_hdf(tmpfile, "df")
         tm.assert_frame_equal(result, df)
 
         with tables.open_file(tmpfile, mode="r") as h5file:
@@ -199,7 +198,7 @@ def test_complibs_default_settings(setup_path):
     # Set complib and check to see if compression is disabled
     with ensure_clean_path(setup_path) as tmpfile:
         df.to_hdf(tmpfile, "df", complib="zlib")
-        result = pd.read_hdf(tmpfile, "df")
+        result = read_hdf(tmpfile, "df")
         tm.assert_frame_equal(result, df)
 
         with tables.open_file(tmpfile, mode="r") as h5file:
@@ -210,7 +209,7 @@ def test_complibs_default_settings(setup_path):
     # Check if not setting complib or complevel results in no compression
     with ensure_clean_path(setup_path) as tmpfile:
         df.to_hdf(tmpfile, "df")
-        result = pd.read_hdf(tmpfile, "df")
+        result = read_hdf(tmpfile, "df")
         tm.assert_frame_equal(result, df)
 
         with tables.open_file(tmpfile, mode="r") as h5file:
@@ -256,7 +255,7 @@ def test_complibs(setup_path):
 
             # Write and read file to see if data is consistent
             df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl)
-            result = pd.read_hdf(tmpfile, gname)
+            result = read_hdf(tmpfile, gname)
             tm.assert_frame_equal(result, df)
 
             # Open file and check metadata
diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py
index f8d302a0190f8..1c9e63c66aadb 100644
--- a/pandas/tests/io/pytables/test_read.py
+++ b/pandas/tests/io/pytables/test_read.py
@@ -35,7 +35,7 @@ def test_read_missing_key_close_store(setup_path):
         df.to_hdf(path, "k1")
 
         with pytest.raises(KeyError, match="'No object named k2 in the file'"):
-            pd.read_hdf(path, "k2")
+            read_hdf(path, "k2")
 
         # smoke test to test that file is properly closed after
         # read with KeyError before another write
@@ -51,11 +51,11 @@ def test_read_missing_key_opened_store(setup_path):
         with HDFStore(path, "r") as store:
 
             with pytest.raises(KeyError, match="'No object named k2 in the file'"):
-                pd.read_hdf(store, "k2")
+                read_hdf(store, "k2")
 
             # Test that the file is still open after a KeyError and that we can
             # still read from it.
-            pd.read_hdf(store, "k1")
+            read_hdf(store, "k1")
 
 
 def test_read_column(setup_path):
@@ -315,7 +315,7 @@ def test_read_hdf_series_mode_r(format, setup_path):
     series = tm.makeFloatSeries()
     with ensure_clean_path(setup_path) as path:
         series.to_hdf(path, key="data", format=format)
-        result = pd.read_hdf(path, key="data", mode="r")
+        result = read_hdf(path, key="data", mode="r")
     tm.assert_series_equal(result, series)
 
 
diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py
index a8f63bdc5fb2f..8ad5dbc049380 100644
--- a/pandas/tests/io/pytables/test_select.py
+++ b/pandas/tests/io/pytables/test_select.py
@@ -978,5 +978,5 @@ def test_select_empty_where(where):
     with ensure_clean_path("empty_where.h5") as path:
         with HDFStore(path) as store:
             store.put("df", df, "t")
-            result = pd.read_hdf(store, "df", where=where)
+            result = read_hdf(store, "df", where=where)
             tm.assert_frame_equal(result, df)
diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
index ef75c86190a25..b0a11b5e7690e 100644
--- a/pandas/tests/io/pytables/test_store.py
+++ b/pandas/tests/io/pytables/test_store.py
@@ -335,12 +335,12 @@ def test_to_hdf_with_min_itemsize(setup_path):
         # just make sure there is a longer string:
         df2 = df.copy().reset_index().assign(C="longer").set_index("C")
         df2.to_hdf(path, "ss3", append=True, format="table")
-        tm.assert_frame_equal(pd.read_hdf(path, "ss3"), pd.concat([df, df2]))
+        tm.assert_frame_equal(read_hdf(path, "ss3"), concat([df, df2]))
 
         # same as above, with a Series
         df["B"].to_hdf(path, "ss4", format="table", min_itemsize={"index": 6})
         df2["B"].to_hdf(path, "ss4", append=True, format="table")
-        tm.assert_series_equal(pd.read_hdf(path, "ss4"), pd.concat([df["B"], df2["B"]]))
+        tm.assert_series_equal(read_hdf(path, "ss4"), concat([df["B"], df2["B"]]))
 
 
 @pytest.mark.parametrize("format", ["fixed", "table"])
@@ -352,7 +352,7 @@ def test_to_hdf_errors(format, setup_path):
         # GH 20835
         ser.to_hdf(path, "table", format=format, errors="surrogatepass")
 
-        result = pd.read_hdf(path, "table", errors="surrogatepass")
+        result = read_hdf(path, "table", errors="surrogatepass")
         tm.assert_series_equal(result, ser)
 
 
@@ -532,11 +532,7 @@ def test_same_name_scoping(setup_path):
 
     with ensure_clean_store(setup_path) as store:
 
-        import pandas as pd
-
-        df = DataFrame(
-            np.random.randn(20, 2), index=pd.date_range("20130101", periods=20)
-        )
+        df = DataFrame(np.random.randn(20, 2), index=date_range("20130101", periods=20))
         store.put("df", df, format="table")
         expected = df[df.index > Timestamp("20130105")]
 
@@ -762,7 +758,7 @@ def test_start_stop_fixed(setup_path):
         # fixed, GH 8287
         df = DataFrame(
             {"A": np.random.rand(20), "B": np.random.rand(20)},
-            index=pd.date_range("20130101", periods=20),
+            index=date_range("20130101", periods=20),
         )
         store.put("df", df)
 
@@ -818,7 +814,7 @@ def test_path_pathlib(setup_path):
     df = tm.makeDataFrame()
 
     result = tm.round_trip_pathlib(
-        lambda p: df.to_hdf(p, "df"), lambda p: pd.read_hdf(p, "df")
+        lambda p: df.to_hdf(p, "df"), lambda p: read_hdf(p, "df")
     )
     tm.assert_frame_equal(df, result)
 
@@ -849,7 +845,7 @@ def writer(path):
 
     def reader(path):
         with HDFStore(path) as store:
-            return pd.read_hdf(store, "df")
+            return read_hdf(store, "df")
 
     result = tm.round_trip_pathlib(writer, reader)
     tm.assert_frame_equal(df, result)
@@ -858,7 +854,7 @@ def reader(path):
 def test_pickle_path_localpath(setup_path):
     df = tm.makeDataFrame()
     result = tm.round_trip_pathlib(
-        lambda p: df.to_hdf(p, "df"), lambda p: pd.read_hdf(p, "df")
+        lambda p: df.to_hdf(p, "df"), lambda p: read_hdf(p, "df")
     )
     tm.assert_frame_equal(df, result)
 
@@ -872,7 +868,7 @@ def writer(path):
 
     def reader(path):
         with HDFStore(path) as store:
-            return pd.read_hdf(store, "df")
+            return read_hdf(store, "df")
 
     result = tm.round_trip_localpath(writer, reader)
     tm.assert_frame_equal(df, result)
@@ -1013,5 +1009,5 @@ def test_to_hdf_with_object_column_names(setup_path):
         with ensure_clean_path(setup_path) as path:
             with catch_warnings(record=True):
                 df.to_hdf(path, "df", format="table", data_columns=True)
-                result = pd.read_hdf(path, "df", where=f"index = [{df.index[0]}]")
+                result = read_hdf(path, "df", where=f"index = [{df.index[0]}]")
                 assert len(result)
diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py
index f67efb4cc60be..0532ddd17cd19 100644
--- a/pandas/tests/io/pytables/test_timezones.py
+++ b/pandas/tests/io/pytables/test_timezones.py
@@ -327,7 +327,7 @@ def test_legacy_datetimetz_object(datapath, setup_path):
 def test_dst_transitions(setup_path):
     # make sure we are not failing on transitions
     with ensure_clean_store(setup_path) as store:
-        times = pd.date_range(
+        times = date_range(
             "2013-10-26 23:00",
             "2013-10-27 01:00",
             tz="Europe/London",
@@ -347,7 +347,7 @@ def test_dst_transitions(setup_path):
 def test_read_with_where_tz_aware_index(setup_path):
     # GH 11926
     periods = 10
-    dts = pd.date_range("20151201", periods=periods, freq="D", tz="UTC")
+    dts = date_range("20151201", periods=periods, freq="D", tz="UTC")
     mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"])
     expected = DataFrame({"MYCOL": 0}, index=mi)
 
diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
index e60807db55f97..45d9ad430aa43 100644
--- a/pandas/tests/io/test_clipboard.py
+++ b/pandas/tests/io/test_clipboard.py
@@ -3,7 +3,6 @@
 import numpy as np
 import pytest
 
-import pandas as pd
 from pandas import (
     DataFrame,
     get_option,
@@ -216,7 +215,7 @@ def test_read_clipboard_infer_excel(self, request, mock_clipboard):
             """.strip()
         )
         mock_clipboard[request.node.name] = text
-        df = pd.read_clipboard(**clip_kwargs)
+        df = read_clipboard(**clip_kwargs)
 
         # excel data is parsed correctly
         assert df.iloc[1][1] == "Harry Carney"
@@ -230,7 +229,7 @@ def test_read_clipboard_infer_excel(self, request, mock_clipboard):
             """.strip()
         )
         mock_clipboard[request.node.name] = text
-        res = pd.read_clipboard(**clip_kwargs)
+        res = read_clipboard(**clip_kwargs)
 
         text = dedent(
             """
@@ -240,7 +239,7 @@ def test_read_clipboard_infer_excel(self, request, mock_clipboard):
             """.strip()
         )
         mock_clipboard[request.node.name] = text
-        exp = pd.read_clipboard(**clip_kwargs)
+        exp = read_clipboard(**clip_kwargs)
 
         tm.assert_frame_equal(res, exp)
 
@@ -250,7 +249,7 @@ def test_invalid_encoding(self, df):
         with pytest.raises(ValueError, match=msg):
             df.to_clipboard(encoding="ascii")
         with pytest.raises(NotImplementedError, match=msg):
-            pd.read_clipboard(encoding="ascii")
+            read_clipboard(encoding="ascii")
 
     @pytest.mark.parametrize("enc", ["UTF-8", "utf-8", "utf8"])
     def test_round_trip_valid_encodings(self, enc, df):
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index db742fb69dd10..e1dcec56913f9 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -9,6 +9,7 @@
 import mmap
 import os
 from pathlib import Path
+import tempfile
 
 import pytest
 
@@ -119,10 +120,11 @@ def test_infer_compression_from_path(self, extension, expected, path_type):
     @pytest.mark.parametrize("path_type", [str, CustomFSPath, Path])
     def test_get_handle_with_path(self, path_type):
         # ignore LocalPath: it creates strange paths: /absolute/~/sometest
-        filename = path_type("~/sometest")
-        with icom.get_handle(filename, "w") as handles:
-            assert os.path.isabs(handles.handle.name)
-            assert os.path.expanduser(filename) == handles.handle.name
+        with tempfile.TemporaryDirectory(dir=Path.home()) as tmp:
+            filename = path_type("~/" + Path(tmp).name + "/sometest")
+            with icom.get_handle(filename, "w") as handles:
+                assert Path(handles.handle.name).is_absolute()
+                assert os.path.expanduser(filename) == handles.handle.name
 
     def test_get_handle_with_buffer(self):
         input_buffer = StringIO()
diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py
index fc83026f67930..ab0b3b08a11e8 100644
--- a/pandas/tests/io/test_feather.py
+++ b/pandas/tests/io/test_feather.py
@@ -177,12 +177,12 @@ def test_write_with_index(self):
 
     def test_path_pathlib(self):
         df = tm.makeDataFrame().reset_index()
-        result = tm.round_trip_pathlib(df.to_feather, pd.read_feather)
+        result = tm.round_trip_pathlib(df.to_feather, read_feather)
         tm.assert_frame_equal(df, result)
 
     def test_path_localpath(self):
         df = tm.makeDataFrame().reset_index()
-        result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
+        result = tm.round_trip_localpath(df.to_feather, read_feather)
         tm.assert_frame_equal(df, result)
 
     @td.skip_if_no("pyarrow", min_version="0.16.1.dev")
@@ -198,6 +198,6 @@ def test_http_path(self, feather_file):
             "https://raw.githubusercontent.com/pandas-dev/pandas/master/"
             "pandas/tests/io/data/feather/feather-0_3_1.feather"
         )
-        expected = pd.read_feather(feather_file)
-        res = pd.read_feather(url)
+        expected = read_feather(feather_file)
+        res = read_feather(url)
         tm.assert_frame_equal(expected, res)
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index d5567f1208c8c..edb20c7aa9254 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -359,7 +359,7 @@ def test_parquet_read_from_url(self, df_compat, engine):
             "https://raw.githubusercontent.com/pandas-dev/pandas/"
             "master/pandas/tests/io/data/parquet/simple.parquet"
         )
-        df = pd.read_parquet(url)
+        df = read_parquet(url)
         tm.assert_frame_equal(df, df_compat)
 
 
@@ -605,7 +605,7 @@ def test_to_bytes_without_path_or_buf_provided(self, pa, df_full):
         assert isinstance(buf_bytes, bytes)
 
         buf_stream = BytesIO(buf_bytes)
-        res = pd.read_parquet(buf_stream)
+        res = read_parquet(buf_stream)
 
         tm.assert_frame_equal(df_full, res)
 
@@ -740,7 +740,7 @@ def test_s3_roundtrip_for_dir(
     def test_read_file_like_obj_support(self, df_compat):
         buffer = BytesIO()
         df_compat.to_parquet(buffer)
-        df_from_buf = pd.read_parquet(buffer)
+        df_from_buf = read_parquet(buffer)
         tm.assert_frame_equal(df_compat, df_from_buf)
 
     @td.skip_if_no("pyarrow")
@@ -748,7 +748,7 @@ def test_expand_user(self, df_compat, monkeypatch):
         monkeypatch.setenv("HOME", "TestingUser")
         monkeypatch.setenv("USERPROFILE", "TestingUser")
         with pytest.raises(OSError, match=r".*TestingUser.*"):
-            pd.read_parquet("~/file.parquet")
+            read_parquet("~/file.parquet")
         with pytest.raises(OSError, match=r".*TestingUser.*"):
             df_compat.to_parquet("~/file.parquet")
 
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index 63dfbd59acd94..8f5a7673fa45f 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -428,7 +428,7 @@ def test_read(self, protocol, get_random_path):
 @pytest.mark.parametrize(
     ["pickle_file", "excols"],
     [
-        ("test_py27.pkl", pd.Index(["a", "b", "c"])),
+        ("test_py27.pkl", Index(["a", "b", "c"])),
         (
             "test_mi_py27.pkl",
             pd.MultiIndex.from_arrays([["a", "b", "c"], ["A", "B", "C"]]),
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 0be26ab285079..e57030a4bf125 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -675,7 +675,7 @@ def test_read_sql_with_chunksize_no_result(self):
         query = "SELECT * FROM iris_view WHERE SepalLength < 0.0"
         with_batch = sql.read_sql_query(query, self.conn, chunksize=5)
         without_batch = sql.read_sql_query(query, self.conn)
-        tm.assert_frame_equal(pd.concat(with_batch), without_batch)
+        tm.assert_frame_equal(concat(with_batch), without_batch)
 
     def test_to_sql(self):
         sql.to_sql(self.test_frame1, "test_frame1", self.conn)
@@ -1592,7 +1592,7 @@ def check(col):
                 )
 
         # GH11216
-        df = pd.read_sql_query("select * from types_test_data", self.conn)
+        df = read_sql_query("select * from types_test_data", self.conn)
         if not hasattr(df, "DateColWithTz"):
             pytest.skip("no column with datetime with time zone")
 
@@ -1602,7 +1602,7 @@ def check(col):
         col = df.DateColWithTz
         assert is_datetime64tz_dtype(col.dtype)
 
-        df = pd.read_sql_query(
+        df = read_sql_query(
             "select * from types_test_data", self.conn, parse_dates=["DateColWithTz"]
         )
         if not hasattr(df, "DateColWithTz"):
@@ -1612,11 +1612,9 @@ def check(col):
         assert str(col.dt.tz) == "UTC"
         check(df.DateColWithTz)
 
-        df = pd.concat(
+        df = concat(
             list(
-                pd.read_sql_query(
-                    "select * from types_test_data", self.conn, chunksize=1
-                )
+                read_sql_query("select * from types_test_data", self.conn, chunksize=1)
             ),
             ignore_index=True,
         )
@@ -2851,7 +2849,7 @@ def test_chunksize_read_type(self):
         sql.to_sql(frame, name="test", con=self.conn)
         query = "select * from test"
         chunksize = 5
-        chunk_gen = pd.read_sql_query(
+        chunk_gen = read_sql_query(
             sql=query, con=self.conn, chunksize=chunksize, index_col="index"
         )
         chunk_df = next(chunk_gen)
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index de1f3cf1e6338..05a6b3c360c61 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -31,6 +31,7 @@
     StataMissingValue,
     StataReader,
     StataWriterUTF8,
+    ValueLabelTypeMismatch,
     read_stata,
 )
 
@@ -435,7 +436,7 @@ def test_read_write_dta11(self):
         formatted = formatted.astype(np.int32)
 
         with tm.ensure_clean() as path:
-            with tm.assert_produces_warning(pd.io.stata.InvalidColumnName):
+            with tm.assert_produces_warning(InvalidColumnName):
                 original.to_stata(path, None)
 
             written_and_read_again = self.read_dta(path)
@@ -643,7 +644,7 @@ def test_105(self):
         # Data obtained from:
         # http://go.worldbank.org/ZXY29PVJ21
         dpath = os.path.join(self.dirpath, "S4_EDUC1.dta")
-        df = pd.read_stata(dpath)
+        df = read_stata(dpath)
         df0 = [[1, 1, 3, -2], [2, 1, 2, -2], [4, 1, 1, -2]]
         df0 = DataFrame(df0)
         df0.columns = ["clustnum", "pri_schl", "psch_num", "psch_dis"]
@@ -1022,7 +1023,7 @@ def test_categorical_warnings_and_errors(self):
             [original[col].astype("category") for col in original], axis=1
         )
 
-        with tm.assert_produces_warning(pd.io.stata.ValueLabelTypeMismatch):
+        with tm.assert_produces_warning(ValueLabelTypeMismatch):
             original.to_stata(path)
             # should get a warning for mixed content
 
@@ -1541,7 +1542,7 @@ def test_value_labels_iterator(self, write_index):
         with tm.ensure_clean() as path:
             df.to_stata(path, write_index=write_index)
 
-            with pd.read_stata(path, iterator=True) as dta_iter:
+            with read_stata(path, iterator=True) as dta_iter:
                 value_labels = dta_iter.value_labels()
         assert value_labels == {"A": {0: "A", 1: "B", 2: "C", 3: "E"}}
 
@@ -1551,7 +1552,7 @@ def test_set_index(self):
         df.index.name = "index"
         with tm.ensure_clean() as path:
             df.to_stata(path)
-            reread = pd.read_stata(path, index_col="index")
+            reread = read_stata(path, index_col="index")
         tm.assert_frame_equal(df, reread)
 
     @pytest.mark.parametrize(
@@ -1652,7 +1653,7 @@ def test_convert_strl_name_swap(self):
         )
         original.index.name = "index"
 
-        with tm.assert_produces_warning(pd.io.stata.InvalidColumnName):
+        with tm.assert_produces_warning(InvalidColumnName):
             with tm.ensure_clean() as path:
                 original.to_stata(path, convert_strl=["long", 1], version=117)
                 reread = self.read_dta(path)
@@ -1691,7 +1692,7 @@ def test_nonfile_writing(self, version):
             bio.seek(0)
             with open(path, "wb") as dta:
                 dta.write(bio.read())
-            reread = pd.read_stata(path, index_col="index")
+            reread = read_stata(path, index_col="index")
         tm.assert_frame_equal(df, reread)
 
     def test_gzip_writing(self):
@@ -1702,7 +1703,7 @@ def test_gzip_writing(self):
             with gzip.GzipFile(path, "wb") as gz:
                 df.to_stata(gz, version=114)
             with gzip.GzipFile(path, "rb") as gz:
-                reread = pd.read_stata(gz, index_col="index")
+                reread = read_stata(gz, index_col="index")
         tm.assert_frame_equal(df, reread)
 
     def test_unicode_dta_118(self):
@@ -1873,8 +1874,8 @@ def test_backward_compat(version, datapath):
     data_base = datapath("io", "data", "stata")
     ref = os.path.join(data_base, "stata-compat-118.dta")
     old = os.path.join(data_base, f"stata-compat-{version}.dta")
-    expected = pd.read_stata(ref)
-    old_dta = pd.read_stata(old)
+    expected = read_stata(ref)
+    old_dta = read_stata(old)
     tm.assert_frame_equal(old_dta, expected, check_dtype=False)
 
 
@@ -1984,7 +1985,7 @@ def test_iterator_value_labels():
     with tm.ensure_clean() as path:
         df.to_stata(path, write_index=False)
         expected = pd.Index(["a_label", "b_label", "c_label"], dtype="object")
-        with pd.read_stata(path, chunksize=100) as reader:
+        with read_stata(path, chunksize=100) as reader:
             for j, chunk in enumerate(reader):
                 for i in range(2):
                     tm.assert_index_equal(chunk.dtypes[i].categories, expected)
@@ -2025,7 +2026,7 @@ def test_compression_roundtrip(compression):
         # explicitly ensure file was compressed.
         with tm.decompress_file(path, compression) as fh:
             contents = io.BytesIO(fh.read())
-        reread = pd.read_stata(contents, index_col="index")
+        reread = read_stata(contents, index_col="index")
         tm.assert_frame_equal(df, reread)
 
 
@@ -2049,5 +2050,5 @@ def test_stata_compression(compression_only, read_infer, to_infer):
 
     with tm.ensure_clean(filename) as path:
         df.to_stata(path, compression=to_compression)
-        result = pd.read_stata(path, compression=read_compression, index_col="index")
+        result = read_stata(path, compression=read_compression, index_col="index")
         tm.assert_frame_equal(result, df)
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
new file mode 100644
index 0000000000000..97793ce8f65b8
--- /dev/null
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -0,0 +1,1301 @@
+from io import (
+    BytesIO,
+    StringIO,
+)
+import os
+import sys
+from typing import Union
+
+import numpy as np
+import pytest
+
+import pandas.util._test_decorators as td
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+from pandas.io.common import get_handle
+from pandas.io.xml import read_xml
+
+"""
+CHECKLIST
+
+[x] - ValueError: "Values for parser can only be lxml or etree."
+
+etree
+[x] - ImportError: "lxml not found, please install or use the etree parser."
+[X] - TypeError: "...is not a valid type for attr_cols"
+[X] - TypeError: "...is not a valid type for elem_cols"
+[X] - LookupError: "unknown encoding"
+[X] - KeyError: "...is not included in namespaces"
+[X] - KeyError: "no valid column"
+[X] - ValueError: "To use stylesheet, you need lxml installed..."
+[]  - OSError: (NEED PERMISSOIN ISSUE, DISK FULL, ETC.)
+[X] - FileNotFoundError: "No such file or directory"
+[X] - PermissionError: "Forbidden"
+
+lxml
+[X] - TypeError: "...is not a valid type for attr_cols"
+[X] - TypeError: "...is not a valid type for elem_cols"
+[X] - LookupError: "unknown encoding"
+[]  - OSError: (NEED PERMISSOIN ISSUE, DISK FULL, ETC.)
+[X] - FileNotFoundError: "No such file or directory"
+[X] - KeyError: "...is not included in namespaces"
+[X] - KeyError: "no valid column"
+[X] - ValueError: "stylesheet is not a url, file, or xml string."
+[]  - LookupError: (NEED WRONG ENCODING FOR FILE OUTPUT)
+[]  - URLError: (USUALLY DUE TO NETWORKING)
+[]  - HTTPError: (NEED AN ONLINE STYLESHEET)
+[X] - OSError: "failed to load external entity"
+[X] - XMLSyntaxError: "Opening and ending tag mismatch"
+[X] - XSLTApplyError: "Cannot resolve URI"
+[X] - XSLTParseError: "failed to compile"
+[X] - PermissionError: "Forbidden"
+"""
+
+geom_df = DataFrame(
+    {
+        "shape": ["square", "circle", "triangle"],
+        "degrees": [360, 360, 180],
+        "sides": [4, np.nan, 3],
+    }
+)
+
+planet_df = DataFrame(
+    {
+        "planet": [
+            "Mercury",
+            "Venus",
+            "Earth",
+            "Mars",
+            "Jupiter",
+            "Saturn",
+            "Uranus",
+            "Neptune",
+        ],
+        "type": [
+            "terrestrial",
+            "terrestrial",
+            "terrestrial",
+            "terrestrial",
+            "gas giant",
+            "gas giant",
+            "ice giant",
+            "ice giant",
+        ],
+        "location": [
+            "inner",
+            "inner",
+            "inner",
+            "inner",
+            "outer",
+            "outer",
+            "outer",
+            "outer",
+        ],
+        "mass": [
+            0.330114,
+            4.86747,
+            5.97237,
+            0.641712,
+            1898.187,
+            568.3174,
+            86.8127,
+            102.4126,
+        ],
+    }
+)
+
+from_file_expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <index>0</index>
+    <category>cooking</category>
+    <title>Everyday Italian</title>
+    <author>Giada De Laurentiis</author>
+    <year>2005</year>
+    <price>30.0</price>
+  </row>
+  <row>
+    <index>1</index>
+    <category>children</category>
+    <title>Harry Potter</title>
+    <author>J K. Rowling</author>
+    <year>2005</year>
+    <price>29.99</price>
+  </row>
+  <row>
+    <index>2</index>
+    <category>web</category>
+    <title>Learning XML</title>
+    <author>Erik T. Ray</author>
+    <year>2003</year>
+    <price>39.95</price>
+  </row>
+</data>"""
+
+
+def equalize_decl(doc):
+    # etree and lxml differ on quotes and case in xml declaration
+    if doc is not None:
+        doc = doc.replace(
+            '<?xml version="1.0" encoding="utf-8"?',
+            "<?xml version='1.0' encoding='utf-8'?",
+        )
+
+    return doc
+
+
+@pytest.fixture(params=["rb", "r"])
+def mode(request):
+    return request.param
+
+
+@pytest.fixture(params=[pytest.param("lxml", marks=td.skip_if_no("lxml")), "etree"])
+def parser(request):
+    return request.param
+
+
+# FILE OUTPUT
+
+
+def test_file_output_str_read(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, parser=parser)
+
+    with tm.ensure_clean("test.xml") as path:
+        df_file.to_xml(path, parser=parser)
+        with open(path, "rb") as f:
+            output = f.read().decode("utf-8").strip()
+
+        output = equalize_decl(output)
+
+        assert output == from_file_expected
+
+
+def test_file_output_bytes_read(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, parser=parser)
+
+    with tm.ensure_clean("test.xml") as path:
+        df_file.to_xml(path, parser=parser)
+        with open(path, "rb") as f:
+            output = f.read().decode("utf-8").strip()
+
+        output = equalize_decl(output)
+
+        assert output == from_file_expected
+
+
+def test_str_output(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, parser=parser)
+
+    output = df_file.to_xml(parser=parser)
+    output = equalize_decl(output)
+
+    assert output == from_file_expected
+
+
+def test_wrong_file_path(parser):
+    with pytest.raises(
+        FileNotFoundError, match=("No such file or directory|没有那个文件或目录")
+    ):
+        geom_df.to_xml("/my/fake/path/output.xml", parser=parser)
+
+
+# INDEX
+
+
+def test_index_false(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <category>cooking</category>
+    <title>Everyday Italian</title>
+    <author>Giada De Laurentiis</author>
+    <year>2005</year>
+    <price>30.0</price>
+  </row>
+  <row>
+    <category>children</category>
+    <title>Harry Potter</title>
+    <author>J K. Rowling</author>
+    <year>2005</year>
+    <price>29.99</price>
+  </row>
+  <row>
+    <category>web</category>
+    <title>Learning XML</title>
+    <author>Erik T. Ray</author>
+    <year>2003</year>
+    <price>39.95</price>
+  </row>
+</data>"""
+
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, parser=parser)
+
+    with tm.ensure_clean("test.xml") as path:
+        df_file.to_xml(path, index=False, parser=parser)
+        with open(path, "rb") as f:
+            output = f.read().decode("utf-8").strip()
+
+        output = equalize_decl(output)
+
+        assert output == expected
+
+
+def test_index_false_rename_row_root(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<books>
+  <book>
+    <category>cooking</category>
+    <title>Everyday Italian</title>
+    <author>Giada De Laurentiis</author>
+    <year>2005</year>
+    <price>30.0</price>
+  </book>
+  <book>
+    <category>children</category>
+    <title>Harry Potter</title>
+    <author>J K. Rowling</author>
+    <year>2005</year>
+    <price>29.99</price>
+  </book>
+  <book>
+    <category>web</category>
+    <title>Learning XML</title>
+    <author>Erik T. Ray</author>
+    <year>2003</year>
+    <price>39.95</price>
+  </book>
+</books>"""
+
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, parser=parser)
+
+    with tm.ensure_clean("test.xml") as path:
+        df_file.to_xml(
+            path, index=False, root_name="books", row_name="book", parser=parser
+        )
+        with open(path, "rb") as f:
+            output = f.read().decode("utf-8").strip()
+
+        output = equalize_decl(output)
+
+        assert output == expected
+
+
+# NA_REP
+
+na_expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <index>0</index>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+  </row>
+  <row>
+    <index>1</index>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides/>
+  </row>
+  <row>
+    <index>2</index>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+
+def test_na_elem_output(datapath, parser):
+    output = geom_df.to_xml(parser=parser)
+    output = equalize_decl(output)
+
+    assert output == na_expected
+
+
+def test_na_empty_str_elem_option(datapath, parser):
+    output = geom_df.to_xml(na_rep="", parser=parser)
+    output = equalize_decl(output)
+
+    assert output == na_expected
+
+
+def test_na_empty_elem_option(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <index>0</index>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+  </row>
+  <row>
+    <index>1</index>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides>0.0</sides>
+  </row>
+  <row>
+    <index>2</index>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+    output = geom_df.to_xml(na_rep="0.0", parser=parser)
+    output = equalize_decl(output)
+
+    assert output == expected
+
+
+# ATTR_COLS
+
+
+@pytest.mark.skipif(
+    sys.version_info < (3, 8),
+    reason=("etree alpha ordered attributes <= py3.7"),
+)
+def test_attrs_cols_nan_output(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row index="0" shape="square" degrees="360" sides="4.0"/>
+  <row index="1" shape="circle" degrees="360"/>
+  <row index="2" shape="triangle" degrees="180" sides="3.0"/>
+</data>"""
+
+    output = geom_df.to_xml(attr_cols=["shape", "degrees", "sides"], parser=parser)
+    output = equalize_decl(output)
+
+    assert output == expected
+
+
+@pytest.mark.skipif(
+    sys.version_info < (3, 8),
+    reason=("etree alpha ordered attributes <= py3.7"),
+)
+def test_attrs_cols_prefix(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<doc:data xmlns:doc="http://example.xom">
+  <doc:row doc:index="0" doc:shape="square" \
+doc:degrees="360" doc:sides="4.0"/>
+  <doc:row doc:index="1" doc:shape="circle" \
+doc:degrees="360"/>
+  <doc:row doc:index="2" doc:shape="triangle" \
+doc:degrees="180" doc:sides="3.0"/>
+</doc:data>"""
+
+    output = geom_df.to_xml(
+        attr_cols=["index", "shape", "degrees", "sides"],
+        namespaces={"doc": "http://example.xom"},
+        prefix="doc",
+        parser=parser,
+    )
+    output = equalize_decl(output)
+
+    assert output == expected
+
+
+def test_attrs_unknown_column(parser):
+    with pytest.raises(KeyError, match=("no valid column")):
+        geom_df.to_xml(attr_cols=["shape", "degreees", "sides"], parser=parser)
+
+
+def test_attrs_wrong_type(parser):
+    with pytest.raises(TypeError, match=("is not a valid type for attr_cols")):
+        geom_df.to_xml(attr_cols='"shape", "degreees", "sides"', parser=parser)
+
+
+# ELEM_COLS
+
+
+def test_elems_cols_nan_output(datapath, parser):
+    elems_cols_expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+    <shape>square</shape>
+  </row>
+  <row>
+    <degrees>360</degrees>
+    <sides/>
+    <shape>circle</shape>
+  </row>
+  <row>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+    <shape>triangle</shape>
+  </row>
+</data>"""
+
+    output = geom_df.to_xml(
+        index=False, elem_cols=["degrees", "sides", "shape"], parser=parser
+    )
+    output = equalize_decl(output)
+
+    assert output == elems_cols_expected
+
+
+def test_elems_unknown_column(parser):
+    with pytest.raises(KeyError, match=("no valid column")):
+        geom_df.to_xml(elem_cols=["shape", "degreees", "sides"], parser=parser)
+
+
+def test_elems_wrong_type(parser):
+    with pytest.raises(TypeError, match=("is not a valid type for elem_cols")):
+        geom_df.to_xml(elem_cols='"shape", "degreees", "sides"', parser=parser)
+
+
+def test_elems_and_attrs_cols(datapath, parser):
+    elems_cols_expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row shape="square">
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+  </row>
+  <row shape="circle">
+    <degrees>360</degrees>
+    <sides/>
+  </row>
+  <row shape="triangle">
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+    output = geom_df.to_xml(
+        index=False,
+        elem_cols=["degrees", "sides"],
+        attr_cols=["shape"],
+        parser=parser,
+    )
+    output = equalize_decl(output)
+
+    assert output == elems_cols_expected
+
+
+# HIERARCHICAL COLUMNS
+
+
+def test_hierarchical_columns(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <location>inner</location>
+    <type>terrestrial</type>
+    <count_mass>4</count_mass>
+    <sum_mass>11.81</sum_mass>
+    <mean_mass>2.95</mean_mass>
+  </row>
+  <row>
+    <location>outer</location>
+    <type>gas giant</type>
+    <count_mass>2</count_mass>
+    <sum_mass>2466.5</sum_mass>
+    <mean_mass>1233.25</mean_mass>
+  </row>
+  <row>
+    <location>outer</location>
+    <type>ice giant</type>
+    <count_mass>2</count_mass>
+    <sum_mass>189.23</sum_mass>
+    <mean_mass>94.61</mean_mass>
+  </row>
+  <row>
+    <location>All</location>
+    <type/>
+    <count_mass>8</count_mass>
+    <sum_mass>2667.54</sum_mass>
+    <mean_mass>333.44</mean_mass>
+  </row>
+</data>"""
+
+    pvt = planet_df.pivot_table(
+        index=["location", "type"],
+        values="mass",
+        aggfunc=["count", "sum", "mean"],
+        margins=True,
+    ).round(2)
+
+    output = pvt.to_xml(parser=parser)
+    output = equalize_decl(output)
+
+    assert output == expected
+
+
+@pytest.mark.skipif(
+    sys.version_info < (3, 8),
+    reason=("etree alpha ordered attributes <= py3.7"),
+)
+def test_hierarchical_attrs_columns(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row location="inner" type="terrestrial" count_mass="4" \
+sum_mass="11.81" mean_mass="2.95"/>
+  <row location="outer" type="gas giant" count_mass="2" \
+sum_mass="2466.5" mean_mass="1233.25"/>
+  <row location="outer" type="ice giant" count_mass="2" \
+sum_mass="189.23" mean_mass="94.61"/>
+  <row location="All" type="" count_mass="8" \
+sum_mass="2667.54" mean_mass="333.44"/>
+</data>"""
+
+    pvt = planet_df.pivot_table(
+        index=["location", "type"],
+        values="mass",
+        aggfunc=["count", "sum", "mean"],
+        margins=True,
+    ).round(2)
+
+    output = pvt.to_xml(attr_cols=list(pvt.reset_index().columns.values), parser=parser)
+    output = equalize_decl(output)
+
+    assert output == expected
+
+
+# MULTIINDEX
+
+
+def test_multi_index(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <location>inner</location>
+    <type>terrestrial</type>
+    <count>4</count>
+    <sum>11.81</sum>
+    <mean>2.95</mean>
+  </row>
+  <row>
+    <location>outer</location>
+    <type>gas giant</type>
+    <count>2</count>
+    <sum>2466.5</sum>
+    <mean>1233.25</mean>
+  </row>
+  <row>
+    <location>outer</location>
+    <type>ice giant</type>
+    <count>2</count>
+    <sum>189.23</sum>
+    <mean>94.61</mean>
+  </row>
+</data>"""
+
+    agg = (
+        planet_df.groupby(["location", "type"])["mass"]
+        .agg(["count", "sum", "mean"])
+        .round(2)
+    )
+
+    output = agg.to_xml(parser=parser)
+    output = equalize_decl(output)
+
+    assert output == expected
+
+
+@pytest.mark.skipif(
+    sys.version_info < (3, 8),
+    reason=("etree alpha ordered attributes <= py3.7"),
+)
+def test_multi_index_attrs_cols(datapath, parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row location="inner" type="terrestrial" count="4" \
+sum="11.81" mean="2.95"/>
+  <row location="outer" type="gas giant" count="2" \
+sum="2466.5" mean="1233.25"/>
+  <row location="outer" type="ice giant" count="2" \
+sum="189.23" mean="94.61"/>
+</data>"""
+
+    agg = (
+        planet_df.groupby(["location", "type"])["mass"]
+        .agg(["count", "sum", "mean"])
+        .round(2)
+    )
+    output = agg.to_xml(attr_cols=list(agg.reset_index().columns.values), parser=parser)
+    output = equalize_decl(output)
+
+    assert output == expected
+
+
+# NAMESPACE
+
+
+def test_default_namespace(parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data xmlns="http://example.com">
+  <row>
+    <index>0</index>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+  </row>
+  <row>
+    <index>1</index>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides/>
+  </row>
+  <row>
+    <index>2</index>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+    output = geom_df.to_xml(namespaces={"": "http://example.com"}, parser=parser)
+    output = equalize_decl(output)
+
+    assert output == expected
+
+
+# PREFIX
+
+
+def test_namespace_prefix(parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<doc:data xmlns:doc="http://example.com">
+  <doc:row>
+    <doc:index>0</doc:index>
+    <doc:shape>square</doc:shape>
+    <doc:degrees>360</doc:degrees>
+    <doc:sides>4.0</doc:sides>
+  </doc:row>
+  <doc:row>
+    <doc:index>1</doc:index>
+    <doc:shape>circle</doc:shape>
+    <doc:degrees>360</doc:degrees>
+    <doc:sides/>
+  </doc:row>
+  <doc:row>
+    <doc:index>2</doc:index>
+    <doc:shape>triangle</doc:shape>
+    <doc:degrees>180</doc:degrees>
+    <doc:sides>3.0</doc:sides>
+  </doc:row>
+</doc:data>"""
+
+    output = geom_df.to_xml(
+        namespaces={"doc": "http://example.com"}, prefix="doc", parser=parser
+    )
+    output = equalize_decl(output)
+
+    assert output == expected
+
+
+def test_missing_prefix_in_nmsp(parser):
+    with pytest.raises(KeyError, match=("doc is not included in namespaces")):
+
+        geom_df.to_xml(
+            namespaces={"": "http://example.com"}, prefix="doc", parser=parser
+        )
+
+
+def test_namespace_prefix_and_default(parser):
+    expected = """\
+<?xml version='1.0' encoding='utf-8'?>
+<doc:data xmlns="http://example.com" xmlns:doc="http://other.org">
+  <doc:row>
+    <doc:index>0</doc:index>
+    <doc:shape>square</doc:shape>
+    <doc:degrees>360</doc:degrees>
+    <doc:sides>4.0</doc:sides>
+  </doc:row>
+  <doc:row>
+    <doc:index>1</doc:index>
+    <doc:shape>circle</doc:shape>
+    <doc:degrees>360</doc:degrees>
+    <doc:sides/>
+  </doc:row>
+  <doc:row>
+    <doc:index>2</doc:index>
+    <doc:shape>triangle</doc:shape>
+    <doc:degrees>180</doc:degrees>
+    <doc:sides>3.0</doc:sides>
+  </doc:row>
+</doc:data>"""
+
+    output = geom_df.to_xml(
+        namespaces={"": "http://example.com", "doc": "http://other.org"},
+        prefix="doc",
+        parser=parser,
+    )
+    output = equalize_decl(output)
+
+    if output is not None:
+        # etree and lxml differs on order of namespace prefixes
+        output = output.replace(
+            'xmlns:doc="http://other.org" xmlns="http://example.com"',
+            'xmlns="http://example.com" xmlns:doc="http://other.org"',
+        )
+
+    assert output == expected
+
+
+# ENCODING
+
+encoding_expected = """\
+<?xml version='1.0' encoding='ISO-8859-1'?>
+<data>
+  <row>
+    <index>0</index>
+    <rank>1</rank>
+    <malename>José</malename>
+    <femalename>Sofía</femalename>
+  </row>
+  <row>
+    <index>1</index>
+    <rank>2</rank>
+    <malename>Luis</malename>
+    <femalename>Valentina</femalename>
+  </row>
+  <row>
+    <index>2</index>
+    <rank>3</rank>
+    <malename>Carlos</malename>
+    <femalename>Isabella</femalename>
+  </row>
+  <row>
+    <index>3</index>
+    <rank>4</rank>
+    <malename>Juan</malename>
+    <femalename>Camila</femalename>
+  </row>
+  <row>
+    <index>4</index>
+    <rank>5</rank>
+    <malename>Jorge</malename>
+    <femalename>Valeria</femalename>
+  </row>
+</data>"""
+
+
+def test_encoding_option_str(datapath, parser):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    df_file = read_xml(filename, parser=parser, encoding="ISO-8859-1").head(5)
+
+    output = df_file.to_xml(encoding="ISO-8859-1", parser=parser)
+
+    if output is not None:
+        # etree and lxml differ on quotes and case in xml declaration
+        output = output.replace(
+            '<?xml version="1.0" encoding="ISO-8859-1"?',
+            "<?xml version='1.0' encoding='ISO-8859-1'?",
+        )
+
+    assert output == encoding_expected
+
+
+@td.skip_if_no("lxml")
+def test_correct_encoding_file(datapath):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    df_file = read_xml(filename, encoding="ISO-8859-1", parser="lxml")
+
+    with tm.ensure_clean("test.xml") as path:
+        df_file.to_xml(path, index=False, encoding="ISO-8859-1", parser="lxml")
+
+
+@td.skip_if_no("lxml")
+@pytest.mark.parametrize("encoding", ["UTF-8", "UTF-16", "ISO-8859-1"])
+def test_wrong_encoding_option_lxml(datapath, parser, encoding):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    df_file = read_xml(filename, encoding="ISO-8859-1", parser="lxml")
+
+    with tm.ensure_clean("test.xml") as path:
+        df_file.to_xml(path, index=False, encoding=encoding, parser=parser)
+
+
+def test_misspelled_encoding(parser):
+    with pytest.raises(LookupError, match=("unknown encoding")):
+        geom_df.to_xml(encoding="uft-8", parser=parser)
+
+
+# PRETTY PRINT
+
+
+@td.skip_if_no("lxml")
+def test_xml_declaration_pretty_print():
+    expected = """\
+<data>
+  <row>
+    <index>0</index>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+  </row>
+  <row>
+    <index>1</index>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides/>
+  </row>
+  <row>
+    <index>2</index>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+    output = geom_df.to_xml(xml_declaration=False)
+
+    assert output == expected
+
+
+def test_no_pretty_print_with_decl(parser):
+    expected = (
+        "<?xml version='1.0' encoding='utf-8'?>\n"
+        "<data><row><index>0</index><shape>square</shape>"
+        "<degrees>360</degrees><sides>4.0</sides></row><row>"
+        "<index>1</index><shape>circle</shape><degrees>360"
+        "</degrees><sides/></row><row><index>2</index><shape>"
+        "triangle</shape><degrees>180</degrees><sides>3.0</sides>"
+        "</row></data>"
+    )
+
+    output = geom_df.to_xml(pretty_print=False, parser=parser)
+    output = equalize_decl(output)
+
+    # etree adds space for closed tags
+    if output is not None:
+        output = output.replace(" />", "/>")
+
+    assert output == expected
+
+
+def test_no_pretty_print_no_decl(parser):
+    expected = (
+        "<data><row><index>0</index><shape>square</shape>"
+        "<degrees>360</degrees><sides>4.0</sides></row><row>"
+        "<index>1</index><shape>circle</shape><degrees>360"
+        "</degrees><sides/></row><row><index>2</index><shape>"
+        "triangle</shape><degrees>180</degrees><sides>3.0</sides>"
+        "</row></data>"
+    )
+
+    output = geom_df.to_xml(xml_declaration=False, pretty_print=False, parser=parser)
+
+    # etree adds space for closed tags
+    if output is not None:
+        output = output.replace(" />", "/>")
+
+    assert output == expected
+
+
+# PARSER
+
+
+@td.skip_if_installed("lxml")
+def test_default_parser_no_lxml():
+    with pytest.raises(
+        ImportError, match=("lxml not found, please install or use the etree parser.")
+    ):
+        geom_df.to_xml()
+
+
+def test_unknown_parser():
+    with pytest.raises(
+        ValueError, match=("Values for parser can only be lxml or etree.")
+    ):
+        geom_df.to_xml(parser="bs4")
+
+
+# STYLESHEET
+
+xsl_expected = """\
+<?xml version="1.0" encoding="utf-8"?>
+<data>
+  <row>
+    <field field="index">0</field>
+    <field field="shape">square</field>
+    <field field="degrees">360</field>
+    <field field="sides">4.0</field>
+  </row>
+  <row>
+    <field field="index">1</field>
+    <field field="shape">circle</field>
+    <field field="degrees">360</field>
+    <field field="sides"/>
+  </row>
+  <row>
+    <field field="index">2</field>
+    <field field="shape">triangle</field>
+    <field field="degrees">180</field>
+    <field field="sides">3.0</field>
+  </row>
+</data>"""
+
+
+@td.skip_if_no("lxml")
+def test_stylesheet_file_like(datapath, mode):
+    xsl = datapath("io", "data", "xml", "row_field_output.xsl")
+
+    with open(xsl, mode) as f:
+        assert geom_df.to_xml(stylesheet=f) == xsl_expected
+
+
+@td.skip_if_no("lxml")
+def test_stylesheet_io(datapath, mode):
+    xsl_path = datapath("io", "data", "xml", "row_field_output.xsl")
+
+    xsl_obj: Union[BytesIO, StringIO]
+
+    with open(xsl_path, mode) as f:
+        if mode == "rb":
+            xsl_obj = BytesIO(f.read())
+        else:
+            xsl_obj = StringIO(f.read())
+
+    output = geom_df.to_xml(stylesheet=xsl_obj)
+
+    assert output == xsl_expected
+
+
+@td.skip_if_no("lxml")
+def test_stylesheet_buffered_reader(datapath, mode):
+    xsl = datapath("io", "data", "xml", "row_field_output.xsl")
+
+    with open(xsl, mode) as f:
+        xsl_obj = f.read()
+
+    output = geom_df.to_xml(stylesheet=xsl_obj)
+
+    assert output == xsl_expected
+
+
+@td.skip_if_no("lxml")
+def test_stylesheet_wrong_path(datapath):
+    from lxml.etree import XMLSyntaxError
+
+    xsl = os.path.join("data", "xml", "row_field_output.xslt")
+
+    with pytest.raises(
+        XMLSyntaxError,
+        match=("Start tag expected, '<' not found"),
+    ):
+        geom_df.to_xml(stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+@pytest.mark.parametrize("val", ["", b""])
+def test_empty_string_stylesheet(val):
+    from lxml.etree import XMLSyntaxError
+
+    with pytest.raises(
+        XMLSyntaxError, match=("Document is empty|Start tag expected, '<' not found")
+    ):
+        geom_df.to_xml(stylesheet=val)
+
+
+@td.skip_if_no("lxml")
+def test_incorrect_xsl_syntax():
+    from lxml.etree import XMLSyntaxError
+
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="xml" encoding="utf-8" indent="yes" >
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="@*|node()">
+        <xsl:copy>
+            <xsl:apply-templates select="@*|node()"/>
+        </xsl:copy>
+    </xsl:template>
+
+    <xsl:template match="row/*">
+        <field>
+            <xsl:attribute name="field">
+                <xsl:value-of select="name()"/>
+            </xsl:attribute>
+            <xsl:value-of select="text()"/>
+        </field>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    with pytest.raises(XMLSyntaxError, match=("Opening and ending tag mismatch")):
+        geom_df.to_xml(stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+def test_incorrect_xsl_eval():
+    from lxml.etree import XSLTParseError
+
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="xml" encoding="utf-8" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="@*|node(*)">
+        <xsl:copy>
+            <xsl:apply-templates select="@*|node()"/>
+        </xsl:copy>
+    </xsl:template>
+
+    <xsl:template match="row/*">
+        <field>
+            <xsl:attribute name="field">
+                <xsl:value-of select="name()"/>
+            </xsl:attribute>
+            <xsl:value-of select="text()"/>
+        </field>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    with pytest.raises(XSLTParseError, match=("failed to compile")):
+        geom_df.to_xml(stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+def test_incorrect_xsl_apply(parser):
+    from lxml.etree import XSLTApplyError
+
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="xml" encoding="utf-8" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="@*|node()">
+        <xsl:copy>
+            <xsl:copy-of select="document('non_existent.xml')/*"/>
+        </xsl:copy>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    with pytest.raises(XSLTApplyError, match=("Cannot resolve URI")):
+        with tm.ensure_clean("test.xml") as path:
+            geom_df.to_xml(path, stylesheet=xsl)
+
+
+def test_stylesheet_with_etree(datapath):
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="xml" encoding="utf-8" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="@*|node(*)">
+        <xsl:copy>
+            <xsl:apply-templates select="@*|node()"/>
+        </xsl:copy>
+    </xsl:template>"""
+
+    with pytest.raises(
+        ValueError, match=("To use stylesheet, you need lxml installed")
+    ):
+        geom_df.to_xml(parser="etree", stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+def test_style_to_csv():
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="text" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:param name="delim">,</xsl:param>
+    <xsl:template match="/data">
+        <xsl:text>,shape,degrees,sides&#xa;</xsl:text>
+        <xsl:apply-templates select="row"/>
+    </xsl:template>
+
+    <xsl:template match="row">
+        <xsl:value-of select="concat(index, $delim, shape, $delim,
+                                     degrees, $delim, sides)"/>
+         <xsl:text>&#xa;</xsl:text>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    out_csv = geom_df.to_csv(line_terminator="\n")
+
+    if out_csv is not None:
+        out_csv = out_csv.strip()
+    out_xml = geom_df.to_xml(stylesheet=xsl)
+
+    assert out_csv == out_xml
+
+
+@td.skip_if_no("lxml")
+def test_style_to_string():
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="text" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:param name="delim"><xsl:text>               </xsl:text></xsl:param>
+    <xsl:template match="/data">
+        <xsl:text>      shape  degrees  sides&#xa;</xsl:text>
+        <xsl:apply-templates select="row"/>
+    </xsl:template>
+
+    <xsl:template match="row">
+        <xsl:value-of select="concat(index, ' ',
+                                     substring($delim, 1, string-length('triangle')
+                                               - string-length(shape) + 1),
+                                     shape,
+                                     substring($delim, 1, string-length(name(degrees))
+                                               - string-length(degrees) + 2),
+                                     degrees,
+                                     substring($delim, 1, string-length(name(sides))
+                                               - string-length(sides) + 2),
+                                     sides)"/>
+         <xsl:text>&#xa;</xsl:text>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    out_str = geom_df.to_string()
+    out_xml = geom_df.to_xml(na_rep="NaN", stylesheet=xsl)
+
+    assert out_xml == out_str
+
+
+@td.skip_if_no("lxml")
+def test_style_to_json():
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="text" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:param name="quot">"</xsl:param>
+
+    <xsl:template match="/data">
+        <xsl:text>{"shape":{</xsl:text>
+        <xsl:apply-templates select="descendant::row/shape"/>
+        <xsl:text>},"degrees":{</xsl:text>
+        <xsl:apply-templates select="descendant::row/degrees"/>
+        <xsl:text>},"sides":{</xsl:text>
+        <xsl:apply-templates select="descendant::row/sides"/>
+        <xsl:text>}}</xsl:text>
+    </xsl:template>
+
+    <xsl:template match="shape|degrees|sides">
+        <xsl:variable name="val">
+            <xsl:if test = ".=''">
+                <xsl:value-of select="'null'"/>
+            </xsl:if>
+            <xsl:if test = "number(text()) = text()">
+                <xsl:value-of select="text()"/>
+            </xsl:if>
+            <xsl:if test = "number(text()) != text()">
+                <xsl:value-of select="concat($quot, text(), $quot)"/>
+            </xsl:if>
+        </xsl:variable>
+        <xsl:value-of select="concat($quot, preceding-sibling::index,
+                                     $quot,':', $val)"/>
+        <xsl:if test="preceding-sibling::index != //row[last()]/index">
+            <xsl:text>,</xsl:text>
+        </xsl:if>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    out_json = geom_df.to_json()
+    out_xml = geom_df.to_xml(stylesheet=xsl)
+
+    assert out_json == out_xml
+
+
+# COMPRESSION
+
+
+geom_xml = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <index>0</index>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+  </row>
+  <row>
+    <index>1</index>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides/>
+  </row>
+  <row>
+    <index>2</index>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+
+@pytest.mark.parametrize("comp", ["bz2", "gzip", "xz", "zip"])
+def test_compression_output(parser, comp):
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, parser=parser, compression=comp)
+
+        with get_handle(
+            path,
+            "r",
+            compression=comp,
+        ) as handle_obj:
+            output = handle_obj.handle.read()
+
+    output = equalize_decl(output)
+
+    assert geom_xml == output.strip()
+
+
+@pytest.mark.parametrize("comp", ["bz2", "gzip", "xz", "zip"])
+@pytest.mark.parametrize("compfile", ["xml.bz2", "xml.gz", "xml.xz", "xml.zip"])
+def test_filename_and_suffix_comp(parser, comp, compfile):
+    with tm.ensure_clean(filename=compfile) as path:
+        geom_df.to_xml(path, parser=parser, compression=comp)
+
+        with get_handle(
+            path,
+            "r",
+            compression=comp,
+        ) as handle_obj:
+            output = handle_obj.handle.read()
+
+    output = equalize_decl(output)
+
+    assert geom_xml == output.strip()
+
+
+def test_unsuported_compression(datapath, parser):
+    with pytest.raises(ValueError, match="Unrecognized compression type"):
+        with tm.ensure_clean() as path:
+            geom_df.to_xml(path, parser=parser, compression="7z")
+
+
+# STORAGE OPTIONS
+
+
+@tm.network
+@td.skip_if_no("s3fs")
+@td.skip_if_no("lxml")
+def test_s3_permission_output(parser):
+    import s3fs
+
+    with pytest.raises(PermissionError, match="Access Denied"):
+        fs = s3fs.S3FileSystem(anon=True)
+        fs.ls("pandas-test")
+
+        geom_df.to_xml("s3://pandas-test/geom.xml", compression="zip", parser=parser)
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
new file mode 100644
index 0000000000000..6902b4e93443f
--- /dev/null
+++ b/pandas/tests/io/xml/test_xml.py
@@ -0,0 +1,1097 @@
+from io import (
+    BytesIO,
+    StringIO,
+)
+import os
+from typing import Union
+from urllib.error import HTTPError
+
+import numpy as np
+import pytest
+
+import pandas.util._test_decorators as td
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+from pandas.io.xml import read_xml
+
+"""
+CHECK LIST
+
+[x] - ValueError: "Values for parser can only be lxml or etree."
+
+etree
+[X] - ImportError: "lxml not found, please install or use the etree parser."
+[X] - TypeError: "expected str, bytes or os.PathLike object, not NoneType"
+[X] - ValueError: "Either element or attributes can be parsed not both."
+[X] - ValueError: "xpath does not return any nodes..."
+[X] - SyntaxError: "You have used an incorrect or unsupported XPath"
+[X] - ValueError: "names does not match length of child elements in xpath."
+[X] - TypeError: "...is not a valid type for names"
+[X] - ValueError: "To use stylesheet, you need lxml installed..."
+[]  - URLError: (GENERAL ERROR WITH HTTPError AS SUBCLASS)
+[X] - HTTPError: "HTTP Error 404: Not Found"
+[]  - OSError: (GENERAL ERROR WITH FileNotFoundError AS SUBCLASS)
+[X] - FileNotFoundError: "No such file or directory"
+[]  - ParseError    (FAILSAFE CATCH ALL FOR VERY COMPLEX XML)
+[X] - UnicodeDecodeError: "'utf-8' codec can't decode byte 0xe9..."
+[X] - UnicodeError: "UTF-16 stream does not start with BOM"
+[X] - BadZipFile: "File is not a zip file"
+[X] - OSError: "Invalid data stream"
+[X] - LZMAError: "Input format not supported by decoder"
+[X] - ValueError: "Unrecognized compression type"
+[X] - PermissionError: "Forbidden"
+
+lxml
+[X] - ValueError: "Either element or attributes can be parsed not both."
+[X] - AttributeError: "__enter__"
+[X] - XSLTApplyError: "Cannot resolve URI"
+[X] - XSLTParseError: "document is not a stylesheet"
+[X] - ValueError: "xpath does not return any nodes."
+[X] - XPathEvalError: "Invalid expression"
+[]  - XPathSyntaxError: (OLD VERSION IN lxml FOR XPATH ERRORS)
+[X] - TypeError: "empty namespace prefix is not supported in XPath"
+[X] - ValueError: "names does not match length of child elements in xpath."
+[X] - TypeError: "...is not a valid type for names"
+[X] - LookupError: "unknown encoding"
+[]  - URLError: (USUALLY DUE TO NETWORKING)
+[X  - HTTPError: "HTTP Error 404: Not Found"
+[X] - OSError: "failed to load external entity"
+[X] - XMLSyntaxError: "Start tag expected, '<' not found"
+[]  - ParserError: (FAILSAFE CATCH ALL FOR VERY COMPLEX XML
+[X] - ValueError: "Values for parser can only be lxml or etree."
+[X] - UnicodeDecodeError: "'utf-8' codec can't decode byte 0xe9..."
+[X] - UnicodeError: "UTF-16 stream does not start with BOM"
+[X] - BadZipFile: "File is not a zip file"
+[X] - OSError: "Invalid data stream"
+[X] - LZMAError: "Input format not supported by decoder"
+[X] - ValueError: "Unrecognized compression type"
+[X] - PermissionError: "Forbidden"
+"""
+
+geom_df = DataFrame(
+    {
+        "shape": ["square", "circle", "triangle"],
+        "degrees": [360, 360, 180],
+        "sides": [4, np.nan, 3],
+    }
+)
+
+xml_default_nmsp = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data xmlns="http://example.com">
+  <row>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4</sides>
+  </row>
+  <row>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides/>
+  </row>
+  <row>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3</sides>
+  </row>
+</data>"""
+
+xml_prefix_nmsp = """\
+<?xml version='1.0' encoding='utf-8'?>
+<doc:data xmlns:doc="http://example.com">
+  <doc:row>
+    <doc:shape>square</doc:shape>
+    <doc:degrees>360</doc:degrees>
+    <doc:sides>4.0</doc:sides>
+  </doc:row>
+  <doc:row>
+    <doc:shape>circle</doc:shape>
+    <doc:degrees>360</doc:degrees>
+    <doc:sides/>
+  </doc:row>
+  <doc:row>
+    <doc:shape>triangle</doc:shape>
+    <doc:degrees>180</doc:degrees>
+    <doc:sides>3.0</doc:sides>
+  </doc:row>
+</doc:data>"""
+
+
+df_kml = DataFrame(
+    {
+        "id": {
+            0: "ID_00001",
+            1: "ID_00002",
+            2: "ID_00003",
+            3: "ID_00004",
+            4: "ID_00005",
+        },
+        "name": {
+            0: "Blue Line (Forest Park)",
+            1: "Red, Purple Line",
+            2: "Red, Purple Line",
+            3: "Red, Purple Line",
+            4: "Red, Purple Line",
+        },
+        "styleUrl": {
+            0: "#LineStyle01",
+            1: "#LineStyle01",
+            2: "#LineStyle01",
+            3: "#LineStyle01",
+            4: "#LineStyle01",
+        },
+        "extrude": {0: 0, 1: 0, 2: 0, 3: 0, 4: 0},
+        "altitudeMode": {
+            0: "clampedToGround",
+            1: "clampedToGround",
+            2: "clampedToGround",
+            3: "clampedToGround",
+            4: "clampedToGround",
+        },
+        "coordinates": {
+            0: (
+                "-87.77678526964958,41.8708863930319,0 "
+                "-87.77826234150609,41.87097820122218,0 "
+                "-87.78251583439344,41.87130129991005,0 "
+                "-87.78418294588424,41.87145055520308,0 "
+                "-87.7872369165933,41.8717239119163,0 "
+                "-87.79160214925886,41.87210797280065,0"
+            ),
+            1: (
+                "-87.65758750947528,41.96427269188822,0 "
+                "-87.65802133507393,41.96581929055245,0 "
+                "-87.65819033925305,41.96621846093642,0 "
+                "-87.6583189819129,41.96650362897086,0 "
+                "-87.65835858701473,41.96669002089185,0 "
+                "-87.65838428411853,41.96688150295095,0 "
+                "-87.65842208882658,41.96745896091846,0 "
+                "-87.65846556843937,41.9683761425439,0 "
+                "-87.65849296214573,41.96913893870342,0"
+            ),
+            2: (
+                "-87.65492939166126,41.95377494531437,0 "
+                "-87.65557043199591,41.95376544118533,0 "
+                "-87.65606302030132,41.95376391658746,0 "
+                "-87.65623502146268,41.95377379126367,0 "
+                "-87.65634748981634,41.95380103566435,0 "
+                "-87.65646537904269,41.95387703994676,0 "
+                "-87.65656532461145,41.95396622645799,0 "
+                "-87.65664760856414,41.95404201996044,0 "
+                "-87.65671750555913,41.95416647054043,0 "
+                "-87.65673983607117,41.95429949810849,0 "
+                "-87.65673866475777,41.95441024240925,0 "
+                "-87.6567690255541,41.95490657227902,0 "
+                "-87.65683672482363,41.95692259283837,0 "
+                "-87.6568900886376,41.95861070983142,0 "
+                "-87.65699865558875,41.96181418669004,0 "
+                "-87.65756347177603,41.96397045777844,0 "
+                "-87.65758750947528,41.96427269188822,0"
+            ),
+            3: (
+                "-87.65362593118043,41.94742799535678,0 "
+                "-87.65363554415794,41.94819886386848,0 "
+                "-87.6536456393239,41.95059994675451,0 "
+                "-87.65365831235026,41.95108288489359,0 "
+                "-87.6536604873874,41.9519954657554,0 "
+                "-87.65362592053201,41.95245597302328,0 "
+                "-87.65367158496069,41.95311153649393,0 "
+                "-87.65368468595476,41.9533202828916,0 "
+                "-87.65369271253692,41.95343095587119,0 "
+                "-87.65373335834569,41.95351536301472,0 "
+                "-87.65378605844126,41.95358212680591,0 "
+                "-87.65385067928185,41.95364452823767,0 "
+                "-87.6539390793817,41.95370263886964,0 "
+                "-87.6540786298351,41.95373403675265,0 "
+                "-87.65430648647626,41.9537535411832,0 "
+                "-87.65492939166126,41.95377494531437,0"
+            ),
+            4: (
+                "-87.65345391792157,41.94217681262115,0 "
+                "-87.65342448305786,41.94237224420864,0 "
+                "-87.65339745703922,41.94268217746244,0 "
+                "-87.65337753982941,41.94288140770284,0 "
+                "-87.65336256753105,41.94317369618263,0 "
+                "-87.65338799707138,41.94357253961736,0 "
+                "-87.65340240886648,41.94389158188269,0 "
+                "-87.65341837392448,41.94406444407721,0 "
+                "-87.65342275247338,41.94421065714904,0 "
+                "-87.65347469646018,41.94434829382345,0 "
+                "-87.65351486483024,41.94447699917548,0 "
+                "-87.65353483605053,41.9453896864472,0 "
+                "-87.65361975532807,41.94689193720703,0 "
+                "-87.65362593118043,41.94742799535678,0"
+            ),
+        },
+    }
+)
+
+
+@pytest.fixture(params=["rb", "r"])
+def mode(request):
+    return request.param
+
+
+@pytest.fixture(params=[pytest.param("lxml", marks=td.skip_if_no("lxml")), "etree"])
+def parser(request):
+    return request.param
+
+
+# FILE / URL
+
+
+@td.skip_if_no("lxml")
+def test_parser_consistency_file(datapath):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file_lxml = read_xml(filename, parser="lxml")
+    df_file_etree = read_xml(filename, parser="etree")
+
+    tm.assert_frame_equal(df_file_lxml, df_file_etree)
+
+
+@tm.network
+@pytest.mark.slow
+@td.skip_if_no("lxml")
+def test_parser_consistency_url(datapath):
+    url = (
+        "https://data.cityofchicago.org/api/views/"
+        "8pix-ypme/rows.xml?accessType=DOWNLOAD"
+    )
+    df_url_lxml = read_xml(url, xpath=".//row/row", parser="lxml")
+    df_url_etree = read_xml(url, xpath=".//row/row", parser="etree")
+
+    tm.assert_frame_equal(df_url_lxml, df_url_etree)
+
+
+def test_file_like(datapath, parser, mode):
+    filename = datapath("io", "data", "xml", "books.xml")
+    with open(filename, mode) as f:
+        df_file = read_xml(f, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "category": ["cooking", "children", "web"],
+            "title": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "year": [2005, 2005, 2003],
+            "price": [30.00, 29.99, 39.95],
+        }
+    )
+
+    tm.assert_frame_equal(df_file, df_expected)
+
+
+def test_file_io(datapath, parser, mode):
+    filename = datapath("io", "data", "xml", "books.xml")
+    with open(filename, mode) as f:
+        xml_obj = f.read()
+
+    df_io = read_xml(
+        (BytesIO(xml_obj) if isinstance(xml_obj, bytes) else StringIO(xml_obj)),
+        parser=parser,
+    )
+
+    df_expected = DataFrame(
+        {
+            "category": ["cooking", "children", "web"],
+            "title": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "year": [2005, 2005, 2003],
+            "price": [30.00, 29.99, 39.95],
+        }
+    )
+
+    tm.assert_frame_equal(df_io, df_expected)
+
+
+def test_file_buffered_reader_string(datapath, parser, mode):
+    filename = datapath("io", "data", "xml", "books.xml")
+    with open(filename, mode) as f:
+        xml_obj = f.read()
+
+    df_str = read_xml(xml_obj, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "category": ["cooking", "children", "web"],
+            "title": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "year": [2005, 2005, 2003],
+            "price": [30.00, 29.99, 39.95],
+        }
+    )
+
+    tm.assert_frame_equal(df_str, df_expected)
+
+
+def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
+    filename = datapath("io", "data", "xml", "books.xml")
+    with open(filename, mode) as f:
+        next(f)
+        xml_obj = f.read()
+
+    df_str = read_xml(xml_obj, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "category": ["cooking", "children", "web"],
+            "title": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "year": [2005, 2005, 2003],
+            "price": [30.00, 29.99, 39.95],
+        }
+    )
+
+    tm.assert_frame_equal(df_str, df_expected)
+
+
+def test_file_handle_close(datapath, parser):
+    xml_file = datapath("io", "data", "xml", "books.xml")
+
+    with open(xml_file, "rb") as f:
+        read_xml(BytesIO(f.read()), parser=parser)
+
+        assert not f.closed
+
+
+@td.skip_if_no("lxml")
+@pytest.mark.parametrize("val", ["", b""])
+def test_empty_string_lxml(val):
+    from lxml.etree import XMLSyntaxError
+
+    with pytest.raises(XMLSyntaxError, match="Document is empty"):
+        read_xml(val, parser="lxml")
+
+
+@pytest.mark.parametrize("val", ["", b""])
+def test_empty_string_etree(val):
+    from xml.etree.ElementTree import ParseError
+
+    with pytest.raises(ParseError, match="no element found"):
+        read_xml(val, parser="etree")
+
+
+@td.skip_if_no("lxml")
+def test_wrong_file_path_lxml():
+    from lxml.etree import XMLSyntaxError
+
+    filename = os.path.join("data", "html", "books.xml")
+
+    with pytest.raises(
+        XMLSyntaxError,
+        match=("Start tag expected, '<' not found"),
+    ):
+        read_xml(filename, parser="lxml")
+
+
+def test_wrong_file_path_etree():
+    from xml.etree.ElementTree import ParseError
+
+    filename = os.path.join("data", "html", "books.xml")
+
+    with pytest.raises(
+        ParseError,
+        match=("not well-formed"),
+    ):
+        read_xml(filename, parser="etree")
+
+
+@tm.network
+@td.skip_if_no("lxml")
+def test_url():
+    url = "https://www.w3schools.com/xml/books.xml"
+    df_url = read_xml(url, xpath=".//book[count(*)=4]")
+
+    df_expected = DataFrame(
+        {
+            "category": ["cooking", "children", "web"],
+            "title": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "year": [2005, 2005, 2003],
+            "price": [30.00, 29.99, 39.95],
+            "cover": [None, None, "paperback"],
+        }
+    )
+
+    tm.assert_frame_equal(df_url, df_expected)
+
+
+def test_wrong_url(parser):
+    with pytest.raises(HTTPError, match=("HTTP Error 404: Not Found")):
+        url = "https://www.w3schools.com/xml/python.xml"
+        read_xml(url, xpath=".//book[count(*)=4]", parser=parser)
+
+
+# XPATH
+
+
+@td.skip_if_no("lxml")
+def test_empty_xpath_lxml(datapath):
+    filename = datapath("io", "data", "xml", "books.xml")
+    with pytest.raises(ValueError, match=("xpath does not return any nodes")):
+        read_xml(filename, xpath=".//python", parser="lxml")
+
+
+def test_bad_xpath_etree(datapath):
+    filename = datapath("io", "data", "xml", "books.xml")
+    with pytest.raises(
+        SyntaxError, match=("You have used an incorrect or unsupported XPath")
+    ):
+        read_xml(filename, xpath=".//[book]", parser="etree")
+
+
+@td.skip_if_no("lxml")
+def test_bad_xpath_lxml(datapath):
+    from lxml.etree import XPathEvalError
+
+    filename = datapath("io", "data", "xml", "books.xml")
+    with pytest.raises(XPathEvalError, match=("Invalid expression")):
+        read_xml(filename, xpath=".//[book]", parser="lxml")
+
+
+# NAMESPACE
+
+
+def test_default_namespace(parser):
+    df_nmsp = read_xml(
+        xml_default_nmsp,
+        xpath=".//ns:row",
+        namespaces={"ns": "http://example.com"},
+        parser=parser,
+    )
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+        }
+    )
+
+    tm.assert_frame_equal(df_nmsp, df_expected)
+
+
+def test_prefix_namespace(parser):
+    df_nmsp = read_xml(
+        xml_prefix_nmsp,
+        xpath=".//doc:row",
+        namespaces={"doc": "http://example.com"},
+        parser=parser,
+    )
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+        }
+    )
+
+    tm.assert_frame_equal(df_nmsp, df_expected)
+
+
+@td.skip_if_no("lxml")
+def test_consistency_default_namespace():
+    df_lxml = read_xml(
+        xml_default_nmsp,
+        xpath=".//ns:row",
+        namespaces={"ns": "http://example.com"},
+        parser="lxml",
+    )
+
+    df_etree = read_xml(
+        xml_default_nmsp,
+        xpath=".//doc:row",
+        namespaces={"doc": "http://example.com"},
+        parser="etree",
+    )
+
+    tm.assert_frame_equal(df_lxml, df_etree)
+
+
+@td.skip_if_no("lxml")
+def test_consistency_prefix_namespace():
+    df_lxml = read_xml(
+        xml_prefix_nmsp,
+        xpath=".//doc:row",
+        namespaces={"doc": "http://example.com"},
+        parser="lxml",
+    )
+
+    df_etree = read_xml(
+        xml_prefix_nmsp,
+        xpath=".//doc:row",
+        namespaces={"doc": "http://example.com"},
+        parser="etree",
+    )
+
+    tm.assert_frame_equal(df_lxml, df_etree)
+
+
+# PREFIX
+
+
+def test_missing_prefix_with_default_namespace(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    with pytest.raises(ValueError, match=("xpath does not return any nodes")):
+        read_xml(filename, xpath=".//Placemark", parser=parser)
+
+
+def test_missing_prefix_definition_etree(datapath):
+    filename = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    with pytest.raises(SyntaxError, match=("you used an undeclared namespace prefix")):
+        read_xml(filename, xpath=".//kml:Placemark", parser="etree")
+
+
+@td.skip_if_no("lxml")
+def test_missing_prefix_definition_lxml(datapath):
+    from lxml.etree import XPathEvalError
+
+    filename = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    with pytest.raises(XPathEvalError, match=("Undefined namespace prefix")):
+        read_xml(filename, xpath=".//kml:Placemark", parser="lxml")
+
+
+@td.skip_if_no("lxml")
+@pytest.mark.parametrize("key", ["", None])
+def test_none_namespace_prefix(key):
+    with pytest.raises(
+        TypeError, match=("empty namespace prefix is not supported in XPath")
+    ):
+        read_xml(
+            xml_default_nmsp,
+            xpath=".//kml:Placemark",
+            namespaces={key: "http://www.opengis.net/kml/2.2"},
+            parser="lxml",
+        )
+
+
+# ELEMS AND ATTRS
+
+
+def test_file_elems_and_attrs(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, parser=parser)
+    df_expected = DataFrame(
+        {
+            "category": ["cooking", "children", "web"],
+            "title": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "year": [2005, 2005, 2003],
+            "price": [30.00, 29.99, 39.95],
+        }
+    )
+
+    tm.assert_frame_equal(df_file, df_expected)
+
+
+def test_file_only_attrs(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, attrs_only=True, parser=parser)
+    df_expected = DataFrame({"category": ["cooking", "children", "web"]})
+
+    tm.assert_frame_equal(df_file, df_expected)
+
+
+def test_file_only_elems(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(filename, elems_only=True, parser=parser)
+    df_expected = DataFrame(
+        {
+            "title": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "author": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "year": [2005, 2005, 2003],
+            "price": [30.00, 29.99, 39.95],
+        }
+    )
+
+    tm.assert_frame_equal(df_file, df_expected)
+
+
+def test_elem_and_attrs_only(datapath, parser):
+    filename = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    with pytest.raises(
+        ValueError,
+        match=("Either element or attributes can be parsed not both"),
+    ):
+        read_xml(filename, elems_only=True, attrs_only=True, parser=parser)
+
+
+@td.skip_if_no("lxml")
+def test_attribute_centric_xml():
+    xml = """\
+<?xml version="1.0" encoding="UTF-8"?>
+<TrainSchedule>
+      <Stations>
+         <station Name="Manhattan" coords="31,460,195,498"/>
+         <station Name="Laraway Road" coords="63,409,194,455"/>
+         <station Name="179th St (Orland Park)" coords="0,364,110,395"/>
+         <station Name="153rd St (Orland Park)" coords="7,333,113,362"/>
+         <station Name="143rd St (Orland Park)" coords="17,297,115,330"/>
+         <station Name="Palos Park" coords="128,281,239,303"/>
+         <station Name="Palos Heights" coords="148,257,283,279"/>
+         <station Name="Worth" coords="170,230,248,255"/>
+         <station Name="Chicago Ridge" coords="70,187,208,214"/>
+         <station Name="Oak Lawn" coords="166,159,266,185"/>
+         <station Name="Ashburn" coords="197,133,336,157"/>
+         <station Name="Wrightwood" coords="219,106,340,133"/>
+         <station Name="Chicago Union Sta" coords="220,0,360,43"/>
+      </Stations>
+</TrainSchedule>"""
+
+    df_lxml = read_xml(xml, xpath=".//station")
+    df_etree = read_xml(xml, xpath=".//station", parser="etree")
+
+    tm.assert_frame_equal(df_lxml, df_etree)
+
+
+# NAMES
+
+
+def test_names_option_output(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+    df_file = read_xml(
+        filename, names=["Col1", "Col2", "Col3", "Col4", "Col5"], parser=parser
+    )
+
+    df_expected = DataFrame(
+        {
+            "Col1": ["cooking", "children", "web"],
+            "Col2": ["Everyday Italian", "Harry Potter", "Learning XML"],
+            "Col3": ["Giada De Laurentiis", "J K. Rowling", "Erik T. Ray"],
+            "Col4": [2005, 2005, 2003],
+            "Col5": [30.00, 29.99, 39.95],
+        }
+    )
+
+    tm.assert_frame_equal(df_file, df_expected)
+
+
+def test_names_option_wrong_length(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+
+    with pytest.raises(ValueError, match=("names does not match length")):
+        read_xml(filename, names=["Col1", "Col2", "Col3"], parser=parser)
+
+
+def test_names_option_wrong_type(datapath, parser):
+    filename = datapath("io", "data", "xml", "books.xml")
+
+    with pytest.raises(TypeError, match=("is not a valid type for names")):
+        read_xml(
+            filename, names="Col1, Col2, Col3", parser=parser  # type: ignore[arg-type]
+        )
+
+
+# ENCODING
+
+
+def test_wrong_encoding(datapath, parser):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    with pytest.raises(UnicodeDecodeError, match=("'utf-8' codec can't decode")):
+        read_xml(filename, parser=parser)
+
+
+def test_utf16_encoding(datapath, parser):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    with pytest.raises(
+        UnicodeError,
+        match=(
+            "UTF-16 stream does not start with BOM|"
+            "'utf-16-le' codec can't decode byte"
+        ),
+    ):
+        read_xml(filename, encoding="UTF-16", parser=parser)
+
+
+def test_unknown_encoding(datapath, parser):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    with pytest.raises(LookupError, match=("unknown encoding: uft-8")):
+        read_xml(filename, encoding="UFT-8", parser=parser)
+
+
+def test_ascii_encoding(datapath, parser):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    with pytest.raises(UnicodeDecodeError, match=("'ascii' codec can't decode byte")):
+        read_xml(filename, encoding="ascii", parser=parser)
+
+
+@td.skip_if_no("lxml")
+def test_parser_consistency_with_encoding(datapath):
+    filename = datapath("io", "data", "xml", "baby_names.xml")
+    df_lxml = read_xml(filename, parser="lxml", encoding="ISO-8859-1")
+    df_etree = read_xml(filename, parser="etree", encoding="iso-8859-1")
+
+    tm.assert_frame_equal(df_lxml, df_etree)
+
+
+# PARSER
+
+
+@td.skip_if_installed("lxml")
+def test_default_parser_no_lxml(datapath):
+    filename = datapath("io", "data", "xml", "books.xml")
+
+    with pytest.raises(
+        ImportError, match=("lxml not found, please install or use the etree parser.")
+    ):
+        read_xml(filename)
+
+
+def test_wrong_parser(datapath):
+    filename = datapath("io", "data", "xml", "books.xml")
+
+    with pytest.raises(
+        ValueError, match=("Values for parser can only be lxml or etree.")
+    ):
+        read_xml(filename, parser="bs4")
+
+
+# STYLESHEET
+
+
+@td.skip_if_no("lxml")
+def test_stylesheet_file(datapath):
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
+
+    df_style = read_xml(
+        kml,
+        xpath=".//k:Placemark",
+        namespaces={"k": "http://www.opengis.net/kml/2.2"},
+        stylesheet=xsl,
+    )
+
+    tm.assert_frame_equal(df_kml, df_style)
+
+
+@td.skip_if_no("lxml")
+def test_stylesheet_file_like(datapath, mode):
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
+
+    with open(xsl, mode) as f:
+        df_style = read_xml(
+            kml,
+            xpath=".//k:Placemark",
+            namespaces={"k": "http://www.opengis.net/kml/2.2"},
+            stylesheet=f,
+        )
+
+    tm.assert_frame_equal(df_kml, df_style)
+
+
+@td.skip_if_no("lxml")
+def test_stylesheet_io(datapath, mode):
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
+
+    xsl_obj: Union[BytesIO, StringIO]
+
+    with open(xsl, mode) as f:
+        if mode == "rb":
+            xsl_obj = BytesIO(f.read())
+        else:
+            xsl_obj = StringIO(f.read())
+
+    df_style = read_xml(
+        kml,
+        xpath=".//k:Placemark",
+        namespaces={"k": "http://www.opengis.net/kml/2.2"},
+        stylesheet=xsl_obj,
+    )
+
+    tm.assert_frame_equal(df_kml, df_style)
+
+
+@td.skip_if_no("lxml")
+def test_stylesheet_buffered_reader(datapath, mode):
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
+
+    with open(xsl, mode) as f:
+        xsl_obj = f.read()
+
+    df_style = read_xml(
+        kml,
+        xpath=".//k:Placemark",
+        namespaces={"k": "http://www.opengis.net/kml/2.2"},
+        stylesheet=xsl_obj,
+    )
+
+    tm.assert_frame_equal(df_kml, df_style)
+
+
+@td.skip_if_no("lxml")
+def test_not_stylesheet(datapath):
+    from lxml.etree import XSLTParseError
+
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    xsl = datapath("io", "data", "xml", "books.xml")
+
+    with pytest.raises(XSLTParseError, match=("document is not a stylesheet")):
+        read_xml(kml, stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+def test_incorrect_xsl_syntax(datapath):
+    from lxml.etree import XMLSyntaxError
+
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+                              xmlns:k="http://www.opengis.net/kml/2.2"/>
+    <xsl:output method="xml" omit-xml-declaration="yes"
+                cdata-section-elements="k:description" indent="yes"/>
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="node()|@*">
+     <xsl:copy>
+       <xsl:apply-templates select="node()|@*"/>
+     </xsl:copy>
+    </xsl:template>
+
+    <xsl:template match="k:MultiGeometry|k:LineString">
+        <xsl:apply-templates select='*'/>
+    </xsl:template>
+
+    <xsl:template match="k:description|k:Snippet|k:Style"/>
+</xsl:stylesheet>"""
+
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+
+    with pytest.raises(
+        XMLSyntaxError, match=("Extra content at the end of the document")
+    ):
+        read_xml(kml, stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+def test_incorrect_xsl_eval(datapath):
+    from lxml.etree import XSLTParseError
+
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+                              xmlns:k="http://www.opengis.net/kml/2.2">
+    <xsl:output method="xml" omit-xml-declaration="yes"
+                cdata-section-elements="k:description" indent="yes"/>
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="node(*)|@*">
+     <xsl:copy>
+       <xsl:apply-templates select="node()|@*"/>
+     </xsl:copy>
+    </xsl:template>
+
+    <xsl:template match="k:MultiGeometry|k:LineString">
+        <xsl:apply-templates select='*'/>
+    </xsl:template>
+
+    <xsl:template match="k:description|k:Snippet|k:Style"/>
+</xsl:stylesheet>"""
+
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+
+    with pytest.raises(XSLTParseError, match=("failed to compile")):
+        read_xml(kml, stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+def test_incorrect_xsl_apply(datapath):
+    from lxml.etree import XSLTApplyError
+
+    xsl = """\
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="xml" encoding="utf-8" indent="yes" />
+    <xsl:strip-space elements="*"/>
+
+    <xsl:template match="@*|node()">
+        <xsl:copy>
+            <xsl:copy-of select="document('non_existent.xml')/*"/>
+        </xsl:copy>
+    </xsl:template>
+</xsl:stylesheet>"""
+
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+
+    with pytest.raises(XSLTApplyError, match=("Cannot resolve URI")):
+        read_xml(kml, stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+def test_wrong_stylesheet():
+    from lxml.etree import XMLSyntaxError
+
+    kml = os.path.join("data", "xml", "cta_rail_lines.kml")
+    xsl = os.path.join("data", "xml", "flatten.xsl")
+
+    with pytest.raises(
+        XMLSyntaxError,
+        match=("Start tag expected, '<' not found"),
+    ):
+        read_xml(kml, stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+def test_stylesheet_file_close(datapath, mode):
+    kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
+    xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
+
+    xsl_obj: Union[BytesIO, StringIO]
+
+    with open(xsl, mode) as f:
+        if mode == "rb":
+            xsl_obj = BytesIO(f.read())
+        else:
+            xsl_obj = StringIO(f.read())
+
+        read_xml(kml, stylesheet=xsl_obj)
+
+        assert not f.closed
+
+
+@td.skip_if_no("lxml")
+def test_stylesheet_with_etree(datapath):
+    kml = os.path.join("data", "xml", "cta_rail_lines.kml")
+    xsl = os.path.join("data", "xml", "flatten_doc.xsl")
+
+    with pytest.raises(
+        ValueError, match=("To use stylesheet, you need lxml installed")
+    ):
+        read_xml(kml, parser="etree", stylesheet=xsl)
+
+
+@td.skip_if_no("lxml")
+@pytest.mark.parametrize("val", ["", b""])
+def test_empty_stylesheet(val):
+    from lxml.etree import XMLSyntaxError
+
+    kml = os.path.join("data", "xml", "cta_rail_lines.kml")
+
+    with pytest.raises(
+        XMLSyntaxError, match=("Document is empty|Start tag expected, '<' not found")
+    ):
+        read_xml(kml, stylesheet=val)
+
+
+@tm.network
+@td.skip_if_no("lxml")
+def test_online_stylesheet():
+    xml = "https://www.w3schools.com/xml/cdcatalog_with_xsl.xml"
+    xsl = "https://www.w3schools.com/xml/cdcatalog.xsl"
+
+    df_xsl = read_xml(
+        xml,
+        xpath=".//tr[td and position() <= 6]",
+        names=["title", "artist"],
+        stylesheet=xsl,
+    )
+
+    df_expected = DataFrame(
+        {
+            "title": {
+                0: "Empire Burlesque",
+                1: "Hide your heart",
+                2: "Greatest Hits",
+                3: "Still got the blues",
+                4: "Eros",
+            },
+            "artist": {
+                0: "Bob Dylan",
+                1: "Bonnie Tyler",
+                2: "Dolly Parton",
+                3: "Gary Moore",
+                4: "Eros Ramazzotti",
+            },
+        }
+    )
+
+    tm.assert_frame_equal(df_expected, df_xsl)
+
+
+# COMPRESSION
+
+
+@pytest.mark.parametrize("comp", ["bz2", "gzip", "xz", "zip"])
+def test_compression_read(parser, comp):
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, index=False, parser=parser, compression=comp)
+
+        xml_df = read_xml(path, parser=parser, compression=comp)
+
+    tm.assert_frame_equal(xml_df, geom_df)
+
+
+@pytest.mark.parametrize("comp", ["gzip", "xz", "zip"])
+def test_wrong_compression_bz2(parser, comp):
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, parser=parser, compression=comp)
+
+        with pytest.raises(OSError, match="Invalid data stream"):
+            read_xml(path, parser=parser, compression="bz2")
+
+
+@pytest.mark.parametrize("comp", ["bz2", "xz", "zip"])
+def test_wrong_compression_gz(parser, comp):
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, parser=parser, compression=comp)
+
+        with pytest.raises(OSError, match="Not a gzipped file"):
+            read_xml(path, parser=parser, compression="gzip")
+
+
+@pytest.mark.parametrize("comp", ["bz2", "gzip", "zip"])
+def test_wrong_compression_xz(parser, comp):
+    from lzma import LZMAError
+
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, parser=parser, compression=comp)
+
+        with pytest.raises(LZMAError, match="Input format not supported by decoder"):
+            read_xml(path, parser=parser, compression="xz")
+
+
+@pytest.mark.parametrize("comp", ["bz2", "gzip", "xz"])
+def test_wrong_compression_zip(parser, comp):
+    from zipfile import BadZipFile
+
+    with tm.ensure_clean() as path:
+        geom_df.to_xml(path, parser=parser, compression=comp)
+
+        with pytest.raises(BadZipFile, match="File is not a zip file"):
+            read_xml(path, parser=parser, compression="zip")
+
+
+def test_unsuported_compression(datapath, parser):
+    with pytest.raises(ValueError, match="Unrecognized compression type"):
+        with tm.ensure_clean() as path:
+            read_xml(path, parser=parser, compression="7z")
+
+
+# STORAGE OPTIONS
+
+
+@tm.network
+@td.skip_if_no("s3fs")
+@td.skip_if_no("lxml")
+def test_s3_parser_consistency():
+    # Python Software Foundation (2019 IRS-990 RETURN)
+    s3 = "s3://irs-form-990/201923199349319487_public.xml"
+
+    df_lxml = read_xml(
+        s3,
+        xpath=".//irs:Form990PartVIISectionAGrp",
+        namespaces={"irs": "http://www.irs.gov/efile"},
+        parser="lxml",
+        storage_options={"anon": True},
+    )
+
+    df_etree = read_xml(
+        s3,
+        xpath=".//irs:Form990PartVIISectionAGrp",
+        namespaces={"irs": "http://www.irs.gov/efile"},
+        parser="etree",
+        storage_options={"anon": True},
+    )
+
+    tm.assert_frame_equal(df_lxml, df_etree)
diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
index bf3e6d822ab19..733a8c0aa58ec 100644
--- a/pandas/tests/resample/test_base.py
+++ b/pandas/tests/resample/test_base.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas import (
     DataFrame,
     NaT,
@@ -245,6 +247,7 @@ def test_resampler_is_iterable(series):
         tm.assert_series_equal(rv, gv)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
 @all_ts
 def test_resample_quantile(series):
     # GH 15023
diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py
index a125f85efc8d3..7b520171379c3 100644
--- a/pandas/tests/reshape/concat/test_concat.py
+++ b/pandas/tests/reshape/concat/test_concat.py
@@ -13,6 +13,7 @@
     DataFrame,
     Index,
     MultiIndex,
+    PeriodIndex,
     Series,
     concat,
     date_range,
@@ -24,6 +25,22 @@
 
 
 class TestConcatenate:
+    def test_append_concat(self):
+        # GH#1815
+        d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC")
+        d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC")
+
+        s1 = Series(np.random.randn(10), d1)
+        s2 = Series(np.random.randn(10), d2)
+
+        s1 = s1.to_period()
+        s2 = s2.to_period()
+
+        # drops index
+        result = concat([s1, s2])
+        assert isinstance(result.index, PeriodIndex)
+        assert result.index[0] == s1.index[0]
+
     def test_concat_copy(self):
         df = DataFrame(np.random.randn(4, 3))
         df2 = DataFrame(np.random.randint(0, 10, size=4).reshape(4, 1))
@@ -420,21 +437,21 @@ def __getitem__(self, index):
                 except KeyError as err:
                     raise IndexError from err
 
-        tm.assert_frame_equal(pd.concat(CustomIterator1(), ignore_index=True), expected)
+        tm.assert_frame_equal(concat(CustomIterator1(), ignore_index=True), expected)
 
         class CustomIterator2(abc.Iterable):
             def __iter__(self):
                 yield df1
                 yield df2
 
-        tm.assert_frame_equal(pd.concat(CustomIterator2(), ignore_index=True), expected)
+        tm.assert_frame_equal(concat(CustomIterator2(), ignore_index=True), expected)
 
     def test_concat_order(self):
         # GH 17344
         dfs = [DataFrame(index=range(3), columns=["a", 1, None])]
         dfs += [DataFrame(index=range(3), columns=[None, 1, "a"]) for i in range(100)]
 
-        result = pd.concat(dfs, sort=True).columns
+        result = concat(dfs, sort=True).columns
         expected = dfs[0].columns
         tm.assert_index_equal(result, expected)
 
@@ -442,20 +459,20 @@ def test_concat_different_extension_dtypes_upcasts(self):
         a = Series(pd.array([1, 2], dtype="Int64"))
         b = Series(to_decimal([1, 2]))
 
-        result = pd.concat([a, b], ignore_index=True)
+        result = concat([a, b], ignore_index=True)
         expected = Series([1, 2, Decimal(1), Decimal(2)], dtype=object)
         tm.assert_series_equal(result, expected)
 
     def test_concat_ordered_dict(self):
         # GH 21510
-        expected = pd.concat(
+        expected = concat(
             [Series(range(3)), Series(range(4))], keys=["First", "Another"]
         )
-        result = pd.concat({"First": Series(range(3)), "Another": Series(range(4))})
+        result = concat({"First": Series(range(3)), "Another": Series(range(4))})
         tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize("pdt", [Series, pd.DataFrame])
+@pytest.mark.parametrize("pdt", [Series, DataFrame])
 @pytest.mark.parametrize("dt", np.sctypes["float"])
 def test_concat_no_unnecessary_upcast(dt, pdt):
     # GH 13247
@@ -466,11 +483,11 @@ def test_concat_no_unnecessary_upcast(dt, pdt):
         pdt(np.array([np.nan], dtype=dt, ndmin=dims)),
         pdt(np.array([5], dtype=dt, ndmin=dims)),
     ]
-    x = pd.concat(dfs)
+    x = concat(dfs)
     assert x.values.dtype == dt
 
 
-@pytest.mark.parametrize("pdt", [create_series_with_explicit_dtype, pd.DataFrame])
+@pytest.mark.parametrize("pdt", [create_series_with_explicit_dtype, DataFrame])
 @pytest.mark.parametrize("dt", np.sctypes["int"])
 def test_concat_will_upcast(dt, pdt):
     with catch_warnings(record=True):
@@ -480,7 +497,7 @@ def test_concat_will_upcast(dt, pdt):
             pdt(np.array([np.nan], ndmin=dims)),
             pdt(np.array([5], dtype=dt, ndmin=dims)),
         ]
-        x = pd.concat(dfs)
+        x = concat(dfs)
         assert x.values.dtype == "float64"
 
 
@@ -489,7 +506,7 @@ def test_concat_empty_and_non_empty_frame_regression():
     df1 = DataFrame({"foo": [1]})
     df2 = DataFrame({"foo": []})
     expected = DataFrame({"foo": [1.0]})
-    result = pd.concat([df1, df2])
+    result = concat([df1, df2])
     tm.assert_frame_equal(result, expected)
 
 
@@ -499,7 +516,7 @@ def test_concat_sparse():
     expected = DataFrame(data=[[0, 0], [1, 1], [2, 2]]).astype(
         pd.SparseDtype(np.int64, 0)
     )
-    result = pd.concat([a, a], axis=1)
+    result = concat([a, a], axis=1)
     tm.assert_frame_equal(result, expected)
 
 
@@ -510,7 +527,7 @@ def test_concat_dense_sparse():
     expected = Series(data=[1, None, 1], index=[0, 1, 0]).astype(
         pd.SparseDtype(np.float64, None)
     )
-    result = pd.concat([a, b], axis=0)
+    result = concat([a, b], axis=0)
     tm.assert_series_equal(result, expected)
 
 
@@ -548,11 +565,11 @@ def test_concat_frame_axis0_extension_dtypes():
     df1 = DataFrame({"a": pd.array([1, 2, 3], dtype="Int64")})
     df2 = DataFrame({"a": np.array([4, 5, 6])})
 
-    result = pd.concat([df1, df2], ignore_index=True)
+    result = concat([df1, df2], ignore_index=True)
     expected = DataFrame({"a": [1, 2, 3, 4, 5, 6]}, dtype="Int64")
     tm.assert_frame_equal(result, expected)
 
-    result = pd.concat([df2, df1], ignore_index=True)
+    result = concat([df2, df1], ignore_index=True)
     expected = DataFrame({"a": [4, 5, 6, 1, 2, 3]}, dtype="Int64")
     tm.assert_frame_equal(result, expected)
 
@@ -561,7 +578,7 @@ def test_concat_preserves_extension_int64_dtype():
     # GH 24768
     df_a = DataFrame({"a": [-1]}, dtype="Int64")
     df_b = DataFrame({"b": [1]}, dtype="Int64")
-    result = pd.concat([df_a, df_b], ignore_index=True)
+    result = concat([df_a, df_b], ignore_index=True)
     expected = DataFrame({"a": [-1, None], "b": [None, 1]}, dtype="Int64")
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/reshape/concat/test_dataframe.py b/pandas/tests/reshape/concat/test_dataframe.py
index f5eb0ab8c9a17..3636139c19eef 100644
--- a/pandas/tests/reshape/concat/test_dataframe.py
+++ b/pandas/tests/reshape/concat/test_dataframe.py
@@ -17,7 +17,7 @@ def test_concat_multiple_frames_dtypes(self):
         # GH#2759
         A = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64)
         B = DataFrame(data=np.ones((10, 2)), dtype=np.float32)
-        results = pd.concat((A, B), axis=1).dtypes
+        results = concat((A, B), axis=1).dtypes
         expected = Series(
             [np.dtype("float64")] * 2 + [np.dtype("float32")] * 2,
             index=["foo", "bar", 0, 1],
@@ -28,7 +28,7 @@ def test_concat_tuple_keys(self):
         # GH#14438
         df1 = DataFrame(np.ones((2, 2)), columns=list("AB"))
         df2 = DataFrame(np.ones((3, 2)) * 2, columns=list("AB"))
-        results = pd.concat((df1, df2), keys=[("bee", "bah"), ("bee", "boo")])
+        results = concat((df1, df2), keys=[("bee", "bah"), ("bee", "boo")])
         expected = DataFrame(
             {
                 "A": {
@@ -53,7 +53,7 @@ def test_concat_named_keys(self):
         # GH#14252
         df = DataFrame({"foo": [1, 2], "bar": [0.1, 0.2]})
         index = Index(["a", "b"], name="baz")
-        concatted_named_from_keys = pd.concat([df, df], keys=index)
+        concatted_named_from_keys = concat([df, df], keys=index)
         expected_named = DataFrame(
             {"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]},
             index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=["baz", None]),
@@ -61,12 +61,10 @@ def test_concat_named_keys(self):
         tm.assert_frame_equal(concatted_named_from_keys, expected_named)
 
         index_no_name = Index(["a", "b"], name=None)
-        concatted_named_from_names = pd.concat(
-            [df, df], keys=index_no_name, names=["baz"]
-        )
+        concatted_named_from_names = concat([df, df], keys=index_no_name, names=["baz"])
         tm.assert_frame_equal(concatted_named_from_names, expected_named)
 
-        concatted_unnamed = pd.concat([df, df], keys=index_no_name)
+        concatted_unnamed = concat([df, df], keys=index_no_name)
         expected_unnamed = DataFrame(
             {"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]},
             index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=[None, None]),
@@ -81,13 +79,13 @@ def test_concat_axis_parameter(self):
         # Index/row/0 DataFrame
         expected_index = DataFrame({"A": [0.1, 0.2, 0.3, 0.4]}, index=[0, 1, 0, 1])
 
-        concatted_index = pd.concat([df1, df2], axis="index")
+        concatted_index = concat([df1, df2], axis="index")
         tm.assert_frame_equal(concatted_index, expected_index)
 
-        concatted_row = pd.concat([df1, df2], axis="rows")
+        concatted_row = concat([df1, df2], axis="rows")
         tm.assert_frame_equal(concatted_row, expected_index)
 
-        concatted_0 = pd.concat([df1, df2], axis=0)
+        concatted_0 = concat([df1, df2], axis=0)
         tm.assert_frame_equal(concatted_0, expected_index)
 
         # Columns/1 DataFrame
@@ -95,10 +93,10 @@ def test_concat_axis_parameter(self):
             [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=["A", "A"]
         )
 
-        concatted_columns = pd.concat([df1, df2], axis="columns")
+        concatted_columns = concat([df1, df2], axis="columns")
         tm.assert_frame_equal(concatted_columns, expected_columns)
 
-        concatted_1 = pd.concat([df1, df2], axis=1)
+        concatted_1 = concat([df1, df2], axis=1)
         tm.assert_frame_equal(concatted_1, expected_columns)
 
         series1 = Series([0.1, 0.2])
@@ -107,13 +105,13 @@ def test_concat_axis_parameter(self):
         # Index/row/0 Series
         expected_index_series = Series([0.1, 0.2, 0.3, 0.4], index=[0, 1, 0, 1])
 
-        concatted_index_series = pd.concat([series1, series2], axis="index")
+        concatted_index_series = concat([series1, series2], axis="index")
         tm.assert_series_equal(concatted_index_series, expected_index_series)
 
-        concatted_row_series = pd.concat([series1, series2], axis="rows")
+        concatted_row_series = concat([series1, series2], axis="rows")
         tm.assert_series_equal(concatted_row_series, expected_index_series)
 
-        concatted_0_series = pd.concat([series1, series2], axis=0)
+        concatted_0_series = concat([series1, series2], axis=0)
         tm.assert_series_equal(concatted_0_series, expected_index_series)
 
         # Columns/1 Series
@@ -121,15 +119,15 @@ def test_concat_axis_parameter(self):
             [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=[0, 1]
         )
 
-        concatted_columns_series = pd.concat([series1, series2], axis="columns")
+        concatted_columns_series = concat([series1, series2], axis="columns")
         tm.assert_frame_equal(concatted_columns_series, expected_columns_series)
 
-        concatted_1_series = pd.concat([series1, series2], axis=1)
+        concatted_1_series = concat([series1, series2], axis=1)
         tm.assert_frame_equal(concatted_1_series, expected_columns_series)
 
         # Testing ValueError
         with pytest.raises(ValueError, match="No axis named"):
-            pd.concat([series1, series2], axis="something")
+            concat([series1, series2], axis="something")
 
     def test_concat_numerical_names(self):
         # GH#15262, GH#12223
@@ -142,7 +140,7 @@ def test_concat_numerical_names(self):
                 )
             ),
         )
-        result = pd.concat((df.iloc[:2, :], df.iloc[-2:, :]))
+        result = concat((df.iloc[:2, :], df.iloc[-2:, :]))
         expected = DataFrame(
             {"col": [0, 1, 7, 8]},
             dtype="int32",
@@ -155,7 +153,7 @@ def test_concat_numerical_names(self):
     def test_concat_astype_dup_col(self):
         # GH#23049
         df = DataFrame([{"a": "b"}])
-        df = pd.concat([df, df], axis=1)
+        df = concat([df, df], axis=1)
 
         result = df.astype("category")
         expected = DataFrame(
diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py
index 92181e7dffc50..332c3c8f30562 100644
--- a/pandas/tests/reshape/concat/test_datetimes.py
+++ b/pandas/tests/reshape/concat/test_datetimes.py
@@ -44,15 +44,15 @@ def test_concat_datetime_datetime64_frame(self):
         df1 = DataFrame({"date": ind, "test": range(10)})
 
         # it works!
-        pd.concat([df1, df2_obj])
+        concat([df1, df2_obj])
 
     def test_concat_datetime_timezone(self):
         # GH 18523
-        idx1 = pd.date_range("2011-01-01", periods=3, freq="H", tz="Europe/Paris")
-        idx2 = pd.date_range(start=idx1[0], end=idx1[-1], freq="H")
+        idx1 = date_range("2011-01-01", periods=3, freq="H", tz="Europe/Paris")
+        idx2 = date_range(start=idx1[0], end=idx1[-1], freq="H")
         df1 = DataFrame({"a": [1, 2, 3]}, index=idx1)
         df2 = DataFrame({"b": [1, 2, 3]}, index=idx2)
-        result = pd.concat([df1, df2], axis=1)
+        result = concat([df1, df2], axis=1)
 
         exp_idx = (
             DatetimeIndex(
@@ -73,9 +73,9 @@ def test_concat_datetime_timezone(self):
 
         tm.assert_frame_equal(result, expected)
 
-        idx3 = pd.date_range("2011-01-01", periods=3, freq="H", tz="Asia/Tokyo")
+        idx3 = date_range("2011-01-01", periods=3, freq="H", tz="Asia/Tokyo")
         df3 = DataFrame({"b": [1, 2, 3]}, index=idx3)
-        result = pd.concat([df1, df3], axis=1)
+        result = concat([df1, df3], axis=1)
 
         exp_idx = DatetimeIndex(
             [
@@ -104,9 +104,7 @@ def test_concat_datetime_timezone(self):
         tm.assert_frame_equal(result, expected)
 
         # GH 13783: Concat after resample
-        result = pd.concat(
-            [df1.resample("H").mean(), df2.resample("H").mean()], sort=True
-        )
+        result = concat([df1.resample("H").mean(), df2.resample("H").mean()], sort=True)
         expected = DataFrame(
             {"a": [1, 2, 3] + [np.nan] * 3, "b": [np.nan] * 3 + [1, 2, 3]},
             index=idx1.append(idx1),
@@ -116,14 +114,14 @@ def test_concat_datetime_timezone(self):
     def test_concat_datetimeindex_freq(self):
         # GH 3232
         # Monotonic index result
-        dr = pd.date_range("01-Jan-2013", periods=100, freq="50L", tz="UTC")
+        dr = date_range("01-Jan-2013", periods=100, freq="50L", tz="UTC")
         data = list(range(100))
         expected = DataFrame(data, index=dr)
-        result = pd.concat([expected[:50], expected[50:]])
+        result = concat([expected[:50], expected[50:]])
         tm.assert_frame_equal(result, expected)
 
         # Non-monotonic index result
-        result = pd.concat([expected[50:], expected[:50]])
+        result = concat([expected[50:], expected[:50]])
         expected = DataFrame(data[50:] + data[:50], index=dr[50:].append(dr[:50]))
         expected.index._data.freq = None
         tm.assert_frame_equal(result, expected)
@@ -179,21 +177,21 @@ def test_concat_NaT_series(self):
 
         # all NaT with tz
         expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns, US/Eastern]")
-        result = pd.concat([y, y], ignore_index=True)
+        result = concat([y, y], ignore_index=True)
         tm.assert_series_equal(result, expected)
 
         # without tz
-        x = Series(pd.date_range("20151124 08:00", "20151124 09:00", freq="1h"))
-        y = Series(pd.date_range("20151124 10:00", "20151124 11:00", freq="1h"))
+        x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h"))
+        y = Series(date_range("20151124 10:00", "20151124 11:00", freq="1h"))
         y[:] = pd.NaT
         expected = Series([x[0], x[1], pd.NaT, pd.NaT])
-        result = pd.concat([x, y], ignore_index=True)
+        result = concat([x, y], ignore_index=True)
         tm.assert_series_equal(result, expected)
 
         # all NaT without tz
         x[:] = pd.NaT
         expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns]")
-        result = pd.concat([x, y], ignore_index=True)
+        result = concat([x, y], ignore_index=True)
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("tz", [None, "UTC"])
@@ -215,7 +213,7 @@ def test_concat_NaT_dataframes(self, tz):
             ]
         )
 
-        result = pd.concat([first, second], axis=0)
+        result = concat([first, second], axis=0)
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("tz1", [None, "UTC"])
@@ -228,7 +226,7 @@ def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, s):
         first = DataFrame([[pd.NaT], [pd.NaT]]).apply(lambda x: x.dt.tz_localize(tz1))
         second = DataFrame([s]).apply(lambda x: x.dt.tz_localize(tz2))
 
-        result = pd.concat([first, second], axis=0)
+        result = concat([first, second], axis=0)
         expected = DataFrame(Series([pd.NaT, pd.NaT, s], index=[0, 1, 0]))
         expected = expected.apply(lambda x: x.dt.tz_localize(tz2))
         if tz1 != tz2:
@@ -249,7 +247,7 @@ def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2):
                 1: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz2),
             }
         )
-        result = pd.concat([first, second], axis=1)
+        result = concat([first, second], axis=1)
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("tz1", [None, "UTC"])
@@ -278,7 +276,7 @@ def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2):
         if tz1 != tz2:
             expected = expected.astype(object)
 
-        result = pd.concat([first, second])
+        result = concat([first, second])
         tm.assert_frame_equal(result, expected)
 
 
@@ -306,7 +304,7 @@ def test_concat_tz_series(self):
         second = DataFrame([[datetime(2016, 1, 2)]])
         second[0] = second[0].dt.tz_localize("UTC")
 
-        result = pd.concat([first, second])
+        result = concat([first, second])
         assert result[0].dtype == "datetime64[ns, UTC]"
 
         # Concatenating two London times
@@ -316,7 +314,7 @@ def test_concat_tz_series(self):
         second = DataFrame([[datetime(2016, 1, 2)]])
         second[0] = second[0].dt.tz_localize("Europe/London")
 
-        result = pd.concat([first, second])
+        result = concat([first, second])
         assert result[0].dtype == "datetime64[ns, Europe/London]"
 
         # Concatenating 2+1 London times
@@ -326,7 +324,7 @@ def test_concat_tz_series(self):
         second = DataFrame([[datetime(2016, 1, 3)]])
         second[0] = second[0].dt.tz_localize("Europe/London")
 
-        result = pd.concat([first, second])
+        result = concat([first, second])
         assert result[0].dtype == "datetime64[ns, Europe/London]"
 
         # Concat'ing 1+2 London times
@@ -336,7 +334,7 @@ def test_concat_tz_series(self):
         second = DataFrame([[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]])
         second[0] = second[0].dt.tz_localize("Europe/London")
 
-        result = pd.concat([first, second])
+        result = concat([first, second])
         assert result[0].dtype == "datetime64[ns, Europe/London]"
 
     def test_concat_tz_series_tzlocal(self):
@@ -379,7 +377,7 @@ def test_concat_tz_frame(self):
         )
 
         # concat
-        df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1)
+        df3 = concat([df2.A.to_frame(), df2.B.to_frame()], axis=1)
         tm.assert_frame_equal(df2, df3)
 
     def test_concat_multiple_tzs(self):
@@ -393,15 +391,15 @@ def test_concat_multiple_tzs(self):
         df2 = DataFrame({"time": [ts2]})
         df3 = DataFrame({"time": [ts3]})
 
-        results = pd.concat([df1, df2]).reset_index(drop=True)
+        results = concat([df1, df2]).reset_index(drop=True)
         expected = DataFrame({"time": [ts1, ts2]}, dtype=object)
         tm.assert_frame_equal(results, expected)
 
-        results = pd.concat([df1, df3]).reset_index(drop=True)
+        results = concat([df1, df3]).reset_index(drop=True)
         expected = DataFrame({"time": [ts1, ts3]}, dtype=object)
         tm.assert_frame_equal(results, expected)
 
-        results = pd.concat([df2, df3]).reset_index(drop=True)
+        results = concat([df2, df3]).reset_index(drop=True)
         expected = DataFrame({"time": [ts2, ts3]})
         tm.assert_frame_equal(results, expected)
 
@@ -439,7 +437,7 @@ def test_concat_tz_not_aligned(self):
         ts = pd.to_datetime([1, 2]).tz_localize("UTC")
         a = DataFrame({"A": ts})
         b = DataFrame({"A": ts, "B": ts})
-        result = pd.concat([a, b], sort=True, ignore_index=True)
+        result = concat([a, b], sort=True, ignore_index=True)
         expected = DataFrame(
             {"A": list(ts) + list(ts), "B": [pd.NaT, pd.NaT] + list(ts)}
         )
@@ -467,7 +465,7 @@ def test_concat_tz_NaT(self, t1):
         df1 = DataFrame([[ts1, ts2]])
         df2 = DataFrame([[ts3]])
 
-        result = pd.concat([df1, df2])
+        result = concat([df1, df2])
         expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0])
 
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py
index 0e86cb0ae48c0..ab419e0481973 100644
--- a/pandas/tests/reshape/concat/test_empty.py
+++ b/pandas/tests/reshape/concat/test_empty.py
@@ -49,7 +49,7 @@ def test_concat_empty_series(self):
         # GH 11082
         s1 = Series([1, 2, 3], name="x")
         s2 = Series(name="y", dtype="float64")
-        res = pd.concat([s1, s2], axis=1)
+        res = concat([s1, s2], axis=1)
         exp = DataFrame(
             {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan]},
             index=Index([0, 1, 2], dtype="O"),
@@ -58,7 +58,7 @@ def test_concat_empty_series(self):
 
         s1 = Series([1, 2, 3], name="x")
         s2 = Series(name="y", dtype="float64")
-        res = pd.concat([s1, s2], axis=0)
+        res = concat([s1, s2], axis=0)
         # name will be reset
         exp = Series([1, 2, 3])
         tm.assert_series_equal(res, exp)
@@ -66,7 +66,7 @@ def test_concat_empty_series(self):
         # empty Series with no name
         s1 = Series([1, 2, 3], name="x")
         s2 = Series(name=None, dtype="float64")
-        res = pd.concat([s1, s2], axis=1)
+        res = concat([s1, s2], axis=1)
         exp = DataFrame(
             {"x": [1, 2, 3], 0: [np.nan, np.nan, np.nan]},
             columns=["x", 0],
@@ -109,7 +109,7 @@ def test_concat_empty_series_timelike(self, tz, values):
         ],
     )
     def test_concat_empty_series_dtypes(self, left, right, expected):
-        result = pd.concat([Series(dtype=left), Series(dtype=right)])
+        result = concat([Series(dtype=left), Series(dtype=right)])
         assert result.dtype == expected
 
     @pytest.mark.parametrize(
@@ -118,10 +118,10 @@ def test_concat_empty_series_dtypes(self, left, right, expected):
     def test_concat_empty_series_dtypes_match_roundtrips(self, dtype):
         dtype = np.dtype(dtype)
 
-        result = pd.concat([Series(dtype=dtype)])
+        result = concat([Series(dtype=dtype)])
         assert result.dtype == dtype
 
-        result = pd.concat([Series(dtype=dtype), Series(dtype=dtype)])
+        result = concat([Series(dtype=dtype), Series(dtype=dtype)])
         assert result.dtype == dtype
 
     def test_concat_empty_series_dtypes_roundtrips(self):
@@ -164,13 +164,13 @@ def get_result_type(dtype, dtype2):
                     continue
 
                 expected = get_result_type(dtype, dtype2)
-                result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype
+                result = concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype
                 assert result.kind == expected
 
     def test_concat_empty_series_dtypes_triple(self):
 
         assert (
-            pd.concat(
+            concat(
                 [Series(dtype="M8[ns]"), Series(dtype=np.bool_), Series(dtype=np.int64)]
             ).dtype
             == np.object_
@@ -179,14 +179,14 @@ def test_concat_empty_series_dtypes_triple(self):
     def test_concat_empty_series_dtype_category_with_array(self):
         # GH#18515
         assert (
-            pd.concat(
+            concat(
                 [Series(np.array([]), dtype="category"), Series(dtype="float64")]
             ).dtype
             == "float64"
         )
 
     def test_concat_empty_series_dtypes_sparse(self):
-        result = pd.concat(
+        result = concat(
             [
                 Series(dtype="float64").astype("Sparse"),
                 Series(dtype="float64").astype("Sparse"),
@@ -194,14 +194,14 @@ def test_concat_empty_series_dtypes_sparse(self):
         )
         assert result.dtype == "Sparse[float64]"
 
-        result = pd.concat(
+        result = concat(
             [Series(dtype="float64").astype("Sparse"), Series(dtype="float64")]
         )
         # TODO: release-note: concat sparse dtype
         expected = pd.SparseDtype(np.float64)
         assert result.dtype == expected
 
-        result = pd.concat(
+        result = concat(
             [Series(dtype="float64").astype("Sparse"), Series(dtype="object")]
         )
         # TODO: release-note: concat sparse dtype
@@ -212,7 +212,7 @@ def test_concat_empty_df_object_dtype(self):
         # GH 9149
         df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]})
         df_2 = DataFrame(columns=df_1.columns)
-        result = pd.concat([df_1, df_2], axis=0)
+        result = concat([df_1, df_2], axis=0)
         expected = df_1.astype(object)
         tm.assert_frame_equal(result, expected)
 
@@ -222,12 +222,12 @@ def test_concat_empty_dataframe_dtypes(self):
         df["b"] = df["b"].astype(np.int32)
         df["c"] = df["c"].astype(np.float64)
 
-        result = pd.concat([df, df])
+        result = concat([df, df])
         assert result["a"].dtype == np.bool_
         assert result["b"].dtype == np.int32
         assert result["c"].dtype == np.float64
 
-        result = pd.concat([df, df.astype(np.float64)])
+        result = concat([df, df.astype(np.float64)])
         assert result["a"].dtype == np.object_
         assert result["b"].dtype == np.float64
         assert result["c"].dtype == np.float64
@@ -239,7 +239,7 @@ def test_concat_inner_join_empty(self):
         df_expected = DataFrame({"a": []}, index=[], dtype="int64")
 
         for how, expected in [("inner", df_expected), ("outer", df_a)]:
-            result = pd.concat([df_a, df_empty], axis=1, join=how)
+            result = concat([df_a, df_empty], axis=1, join=how)
             tm.assert_frame_equal(result, expected)
 
     def test_empty_dtype_coerce(self):
diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py
index c822dab9b8cfc..bd845f73c7c69 100644
--- a/pandas/tests/reshape/concat/test_index.py
+++ b/pandas/tests/reshape/concat/test_index.py
@@ -60,7 +60,7 @@ def test_concat_same_index_names(self, name_in1, name_in2, name_in3, name_out):
         frames = [
             DataFrame({c: [0, 1, 2]}, index=i) for i, c in zip(indices, ["x", "y", "z"])
         ]
-        result = pd.concat(frames, axis=1)
+        result = concat(frames, axis=1)
 
         exp_ind = Index(["a", "b", "c", "d", "e"], name=name_out)
         expected = DataFrame(
@@ -113,7 +113,7 @@ def test_default_index(self):
         # is_series and ignore_index
         s1 = Series([1, 2, 3], name="x")
         s2 = Series([4, 5, 6], name="y")
-        res = pd.concat([s1, s2], axis=1, ignore_index=True)
+        res = concat([s1, s2], axis=1, ignore_index=True)
         assert isinstance(res.columns, pd.RangeIndex)
         exp = DataFrame([[1, 4], [2, 5], [3, 6]])
         # use check_index_type=True to check the result have
@@ -123,7 +123,7 @@ def test_default_index(self):
         # is_series and all inputs have no names
         s1 = Series([1, 2, 3])
         s2 = Series([4, 5, 6])
-        res = pd.concat([s1, s2], axis=1, ignore_index=False)
+        res = concat([s1, s2], axis=1, ignore_index=False)
         assert isinstance(res.columns, pd.RangeIndex)
         exp = DataFrame([[1, 4], [2, 5], [3, 6]])
         exp.columns = pd.RangeIndex(2)
@@ -133,11 +133,11 @@ def test_default_index(self):
         df1 = DataFrame({"A": [1, 2], "B": [5, 6]})
         df2 = DataFrame({"A": [3, 4], "B": [7, 8]})
 
-        res = pd.concat([df1, df2], axis=0, ignore_index=True)
+        res = concat([df1, df2], axis=0, ignore_index=True)
         exp = DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]], columns=["A", "B"])
         tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
 
-        res = pd.concat([df1, df2], axis=1, ignore_index=True)
+        res = concat([df1, df2], axis=1, ignore_index=True)
         exp = DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]])
         tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
 
@@ -261,7 +261,7 @@ def test_concat_multiindex_dfs_with_deepcopy(self):
             names=["testname", None, None],
         )
         expected = DataFrame([[0], [1]], index=expected_index)
-        result_copy = pd.concat(deepcopy(example_dict), names=["testname"])
+        result_copy = concat(deepcopy(example_dict), names=["testname"])
         tm.assert_frame_equal(result_copy, expected)
-        result_no_copy = pd.concat(example_dict, names=["testname"])
+        result_no_copy = concat(example_dict, names=["testname"])
         tm.assert_frame_equal(result_no_copy, expected)
diff --git a/pandas/tests/reshape/concat/test_series.py b/pandas/tests/reshape/concat/test_series.py
index 44e29f08f282e..34bba581b31c7 100644
--- a/pandas/tests/reshape/concat/test_series.py
+++ b/pandas/tests/reshape/concat/test_series.py
@@ -1,7 +1,6 @@
 import numpy as np
 import pytest
 
-import pandas as pd
 from pandas import (
     DataFrame,
     DatetimeIndex,
@@ -48,7 +47,7 @@ def test_concat_empty_and_non_empty_series_regression(self):
         s2 = Series([], dtype=object)
 
         expected = s1
-        result = pd.concat([s1, s2])
+        result = concat([s1, s2])
         tm.assert_series_equal(result, expected)
 
     def test_concat_series_axis1(self, sort=sort):
@@ -117,7 +116,7 @@ def test_concat_series_name_npscalar_tuple(self, s1name, s2name):
         # GH21015
         s1 = Series({"a": 1, "b": 2}, name=s1name)
         s2 = Series({"c": 5, "d": 6}, name=s2name)
-        result = pd.concat([s1, s2])
+        result = concat([s1, s2])
         expected = Series({"a": 1, "b": 2, "c": 5, "d": 6})
         tm.assert_series_equal(result, expected)
 
@@ -147,5 +146,5 @@ def test_concat_series_partial_columns_names(self):
     def test_concat_series_length_one_reversed(self, frame_or_series):
         # GH39401
         obj = frame_or_series([100])
-        result = pd.concat([obj.iloc[::-1]])
+        result = concat([obj.iloc[::-1]])
         tm.assert_equal(result, obj)
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index 2ec94d4cebf5a..d31930aa233cd 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -836,15 +836,13 @@ def test_join_cross(input_col, output_cols):
 def test_join_multiindex_one_level(join_type):
     # GH#36909
     left = DataFrame(
-        data={"c": 3}, index=pd.MultiIndex.from_tuples([(1, 2)], names=("a", "b"))
-    )
-    right = DataFrame(
-        data={"d": 4}, index=pd.MultiIndex.from_tuples([(2,)], names=("b",))
+        data={"c": 3}, index=MultiIndex.from_tuples([(1, 2)], names=("a", "b"))
     )
+    right = DataFrame(data={"d": 4}, index=MultiIndex.from_tuples([(2,)], names=("b",)))
     result = left.join(right, how=join_type)
     expected = DataFrame(
         {"c": [3], "d": [4]},
-        index=pd.MultiIndex.from_tuples([(2, 1)], names=["b", "a"]),
+        index=MultiIndex.from_tuples([(2, 1)], names=["b", "a"]),
     )
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index e1b1e80a29a43..4fa2865a9e320 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -134,7 +134,7 @@ def test_merge_inner_join_empty(self):
         # GH 15328
         df_empty = DataFrame()
         df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64")
-        result = pd.merge(df_empty, df_a, left_index=True, right_index=True)
+        result = merge(df_empty, df_a, left_index=True, right_index=True)
         expected = DataFrame({"a": []}, index=[], dtype="int64")
         tm.assert_frame_equal(result, expected)
 
@@ -152,7 +152,7 @@ def test_merge_non_string_columns(self):
 
         right = left.astype(float)
         expected = left
-        result = pd.merge(left, right)
+        result = merge(left, right)
         tm.assert_frame_equal(expected, result)
 
     def test_merge_index_as_on_arg(self):
@@ -459,7 +459,7 @@ def test_merge_left_empty_right_empty(self, join_type, kwarg):
             dtype=object,
         )
 
-        result = pd.merge(left, right, how=join_type, **kwarg)
+        result = merge(left, right, how=join_type, **kwarg)
         tm.assert_frame_equal(result, exp_in)
 
     def test_merge_left_empty_right_notempty(self):
@@ -483,15 +483,15 @@ def test_merge_left_empty_right_notempty(self):
         exp_in.index = exp_in.index.astype(object)
 
         def check1(exp, kwarg):
-            result = pd.merge(left, right, how="inner", **kwarg)
+            result = merge(left, right, how="inner", **kwarg)
             tm.assert_frame_equal(result, exp)
-            result = pd.merge(left, right, how="left", **kwarg)
+            result = merge(left, right, how="left", **kwarg)
             tm.assert_frame_equal(result, exp)
 
         def check2(exp, kwarg):
-            result = pd.merge(left, right, how="right", **kwarg)
+            result = merge(left, right, how="right", **kwarg)
             tm.assert_frame_equal(result, exp)
-            result = pd.merge(left, right, how="outer", **kwarg)
+            result = merge(left, right, how="outer", **kwarg)
             tm.assert_frame_equal(result, exp)
 
         for kwarg in [
@@ -532,15 +532,15 @@ def test_merge_left_notempty_right_empty(self):
         exp_in.index = exp_in.index.astype(object)
 
         def check1(exp, kwarg):
-            result = pd.merge(left, right, how="inner", **kwarg)
+            result = merge(left, right, how="inner", **kwarg)
             tm.assert_frame_equal(result, exp)
-            result = pd.merge(left, right, how="right", **kwarg)
+            result = merge(left, right, how="right", **kwarg)
             tm.assert_frame_equal(result, exp)
 
         def check2(exp, kwarg):
-            result = pd.merge(left, right, how="left", **kwarg)
+            result = merge(left, right, how="left", **kwarg)
             tm.assert_frame_equal(result, exp)
-            result = pd.merge(left, right, how="outer", **kwarg)
+            result = merge(left, right, how="outer", **kwarg)
             tm.assert_frame_equal(result, exp)
 
             for kwarg in [
@@ -800,7 +800,7 @@ def test_merge_on_datetime64tz(self):
                 "value_y": [np.nan, 1, 2, 3],
             }
         )
-        result = pd.merge(left, right, on="key", how="outer")
+        result = merge(left, right, on="key", how="outer")
         tm.assert_frame_equal(result, expected)
 
         left = DataFrame(
@@ -824,7 +824,7 @@ def test_merge_on_datetime64tz(self):
                 + list(pd.date_range("20151011", periods=2, tz="US/Eastern")),
             }
         )
-        result = pd.merge(left, right, on="key", how="outer")
+        result = merge(left, right, on="key", how="outer")
         tm.assert_frame_equal(result, expected)
         assert result["value_x"].dtype == "datetime64[ns, US/Eastern]"
         assert result["value_y"].dtype == "datetime64[ns, US/Eastern]"
@@ -874,7 +874,7 @@ def test_merge_datetime64tz_with_dst_transition(self):
             }
         )
         df2["date"] = df2["date"].dt.tz_localize("UTC").dt.tz_convert("Europe/Madrid")
-        result = pd.merge(df1, df2, how="outer", on="date")
+        result = merge(df1, df2, how="outer", on="date")
         expected = DataFrame(
             {
                 "date": pd.date_range(
@@ -917,7 +917,7 @@ def test_merge_on_periods(self):
                 "value_y": [np.nan, 1, 2, 3],
             }
         )
-        result = pd.merge(left, right, on="key", how="outer")
+        result = merge(left, right, on="key", how="outer")
         tm.assert_frame_equal(result, expected)
 
         left = DataFrame(
@@ -936,7 +936,7 @@ def test_merge_on_periods(self):
                 "value_y": [pd.NaT] + list(exp_y),
             }
         )
-        result = pd.merge(left, right, on="key", how="outer")
+        result = merge(left, right, on="key", how="outer")
         tm.assert_frame_equal(result, expected)
         assert result["value_x"].dtype == "Period[D]"
         assert result["value_y"].dtype == "Period[D]"
@@ -1430,7 +1430,7 @@ def test_different(self, right_vals):
         # GH 9780
         # We allow merging on object and categorical cols and cast
         # categorical cols to object
-        result = pd.merge(left, right, on="A")
+        result = merge(left, right, on="A")
         assert is_object_dtype(result.A.dtype)
 
     @pytest.mark.parametrize("d1", [np.int64, np.int32, np.int16, np.int8, np.uint8])
@@ -1530,9 +1530,9 @@ def test_merge_incompat_infer_boolean_object(self):
         df2 = DataFrame({"key": [True, False]})
 
         expected = DataFrame({"key": [True, False]}, dtype=object)
-        result = pd.merge(df1, df2, on="key")
+        result = merge(df1, df2, on="key")
         tm.assert_frame_equal(result, expected)
-        result = pd.merge(df2, df1, on="key")
+        result = merge(df2, df1, on="key")
         tm.assert_frame_equal(result, expected)
 
         # with missing value
@@ -1540,9 +1540,9 @@ def test_merge_incompat_infer_boolean_object(self):
         df2 = DataFrame({"key": [True, False]})
 
         expected = DataFrame({"key": [True, False]}, dtype=object)
-        result = pd.merge(df1, df2, on="key")
+        result = merge(df1, df2, on="key")
         tm.assert_frame_equal(result, expected)
-        result = pd.merge(df2, df1, on="key")
+        result = merge(df2, df1, on="key")
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -1564,9 +1564,9 @@ def test_merge_incompat_dtypes_are_ok(self, df1_vals, df2_vals):
         df1 = DataFrame({"A": df1_vals})
         df2 = DataFrame({"A": df2_vals})
 
-        result = pd.merge(df1, df2, on=["A"])
+        result = merge(df1, df2, on=["A"])
         assert is_object_dtype(result.A.dtype)
-        result = pd.merge(df2, df1, on=["A"])
+        result = merge(df2, df1, on=["A"])
         assert is_object_dtype(result.A.dtype)
 
     @pytest.mark.parametrize(
@@ -1605,7 +1605,7 @@ def test_merge_incompat_dtypes_error(self, df1_vals, df2_vals):
         )
         msg = re.escape(msg)
         with pytest.raises(ValueError, match=msg):
-            pd.merge(df1, df2, on=["A"])
+            merge(df1, df2, on=["A"])
 
         # Check that error still raised when swapping order of dataframes
         msg = (
@@ -1615,7 +1615,7 @@ def test_merge_incompat_dtypes_error(self, df1_vals, df2_vals):
         )
         msg = re.escape(msg)
         with pytest.raises(ValueError, match=msg):
-            pd.merge(df2, df1, on=["A"])
+            merge(df2, df1, on=["A"])
 
 
 @pytest.fixture
@@ -1642,7 +1642,7 @@ def right():
 class TestMergeCategorical:
     def test_identical(self, left):
         # merging on the same, should preserve dtypes
-        merged = pd.merge(left, left, on="X")
+        merged = merge(left, left, on="X")
         result = merged.dtypes.sort_index()
         expected = Series(
             [CategoricalDtype(categories=["foo", "bar"]), np.dtype("O"), np.dtype("O")],
@@ -1653,7 +1653,7 @@ def test_identical(self, left):
     def test_basic(self, left, right):
         # we have matching Categorical dtypes in X
         # so should preserve the merged column
-        merged = pd.merge(left, right, on="X")
+        merged = merge(left, right, on="X")
         result = merged.dtypes.sort_index()
         expected = Series(
             [
@@ -1680,7 +1680,7 @@ def test_merge_categorical(self):
                 "b": {0: "g", 1: "g", 2: "g", 3: "g", 4: "g"},
             }
         )
-        df = pd.merge(left, right, how="left", left_on="b", right_on="c")
+        df = merge(left, right, how="left", left_on="b", right_on="c")
 
         # object-object
         expected = df.copy()
@@ -1690,14 +1690,14 @@ def test_merge_categorical(self):
         # because we don't have any matching rows
         cright = right.copy()
         cright["d"] = cright["d"].astype("category")
-        result = pd.merge(left, cright, how="left", left_on="b", right_on="c")
+        result = merge(left, cright, how="left", left_on="b", right_on="c")
         expected["d"] = expected["d"].astype(CategoricalDtype(["null"]))
         tm.assert_frame_equal(result, expected)
 
         # cat-object
         cleft = left.copy()
         cleft["b"] = cleft["b"].astype("category")
-        result = pd.merge(cleft, cright, how="left", left_on="b", right_on="c")
+        result = merge(cleft, cright, how="left", left_on="b", right_on="c")
         tm.assert_frame_equal(result, expected)
 
         # cat-cat
@@ -1705,7 +1705,7 @@ def test_merge_categorical(self):
         cright["d"] = cright["d"].astype("category")
         cleft = left.copy()
         cleft["b"] = cleft["b"].astype("category")
-        result = pd.merge(cleft, cright, how="left", left_on="b", right_on="c")
+        result = merge(cleft, cright, how="left", left_on="b", right_on="c")
         tm.assert_frame_equal(result, expected)
 
     def tests_merge_categorical_unordered_equal(self):
@@ -1723,7 +1723,7 @@ def tests_merge_categorical_unordered_equal(self):
                 "Right": ["C1", "B1", "A1"],
             }
         )
-        result = pd.merge(df1, df2, on=["Foo"])
+        result = merge(df1, df2, on=["Foo"])
         expected = DataFrame(
             {
                 "Foo": Categorical(["A", "B", "C"]),
@@ -1737,7 +1737,7 @@ def test_other_columns(self, left, right):
         # non-merge columns should preserve if possible
         right = right.assign(Z=right.Z.astype("category"))
 
-        merged = pd.merge(left, right, on="X")
+        merged = merge(left, right, on="X")
         result = merged.dtypes.sort_index()
         expected = Series(
             [
@@ -1770,7 +1770,7 @@ def test_dtype_on_merged_different(self, change, join_type, left, right):
         assert is_categorical_dtype(left.X.values.dtype)
         # assert not left.X.values._categories_match_up_to_permutation(right.X.values)
 
-        merged = pd.merge(left, right, on="X", how=join_type)
+        merged = merge(left, right, on="X", how=join_type)
 
         result = merged.dtypes.sort_index()
         expected = Series(
@@ -1814,7 +1814,7 @@ def test_self_join_multiple_categories(self):
         df = df.apply(lambda x: x.astype("category"))
 
         # self-join should equal ourselves
-        result = pd.merge(df, df, on=list(df.columns))
+        result = merge(df, df, on=list(df.columns))
 
         tm.assert_frame_equal(result, df)
 
@@ -1840,14 +1840,14 @@ def test_dtype_on_categorical_dates(self):
             ],
             columns=["date", "num2", "num4"],
         )
-        result_outer = pd.merge(df, df2, how="outer", on=["date"])
+        result_outer = merge(df, df2, how="outer", on=["date"])
         tm.assert_frame_equal(result_outer, expected_outer)
 
         expected_inner = DataFrame(
             [[pd.Timestamp("2001-01-01").date(), 1.1, 1.3]],
             columns=["date", "num2", "num4"],
         )
-        result_inner = pd.merge(df, df2, how="inner", on=["date"])
+        result_inner = merge(df, df2, how="inner", on=["date"])
         tm.assert_frame_equal(result_inner, expected_inner)
 
     @pytest.mark.parametrize("ordered", [True, False])
@@ -1875,7 +1875,7 @@ def test_merging_with_bool_or_int_cateorical_column(
     def test_merge_on_int_array(self):
         # GH 23020
         df = DataFrame({"A": Series([1, 2, np.nan], dtype="Int64"), "B": 1})
-        result = pd.merge(df, df, on="A")
+        result = merge(df, df, on="A")
         expected = DataFrame(
             {"A": Series([1, 2, np.nan], dtype="Int64"), "B_x": 1, "B_y": 1}
         )
@@ -1941,7 +1941,7 @@ class TestMergeOnIndexes:
         ],
     )
     def test_merge_on_indexes(self, left_df, right_df, how, sort, expected):
-        result = pd.merge(
+        result = merge(
             left_df, right_df, left_index=True, right_index=True, how=how, sort=sort
         )
         tm.assert_frame_equal(result, expected)
@@ -1988,23 +1988,19 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm):
     # GH 21220
     a = DataFrame(
         {"A": [1, 2, 3, 4]},
-        index=pd.MultiIndex.from_product(
-            [["a", "b"], [0, 1]], names=["outer", "inner"]
-        ),
+        index=MultiIndex.from_product([["a", "b"], [0, 1]], names=["outer", "inner"]),
     )
     b = Series(
         [1, 2, 3, 4],
-        index=pd.MultiIndex.from_product(
-            [["a", "b"], [1, 2]], names=["outer", "inner"]
-        ),
+        index=MultiIndex.from_product([["a", "b"], [1, 2]], names=["outer", "inner"]),
         name=nm,
     )
     expected = DataFrame(
         {"A": [2, 4], "B": [1, 3]},
-        index=pd.MultiIndex.from_product([["a", "b"], [1]], names=["outer", "inner"]),
+        index=MultiIndex.from_product([["a", "b"], [1]], names=["outer", "inner"]),
     )
     if nm is not None:
-        result = pd.merge(
+        result = merge(
             a,
             b,
             on=on,
@@ -2017,7 +2013,7 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm):
     else:
         msg = "Cannot merge a Series without a name"
         with pytest.raises(ValueError, match=msg):
-            result = pd.merge(
+            result = merge(
                 a,
                 b,
                 on=on,
@@ -2056,7 +2052,7 @@ def test_merge_suffix(col1, col2, kwargs, expected_cols):
     result = a.merge(b, left_index=True, right_index=True, **kwargs)
     tm.assert_frame_equal(result, expected)
 
-    result = pd.merge(a, b, left_index=True, right_index=True, **kwargs)
+    result = merge(a, b, left_index=True, right_index=True, **kwargs)
     tm.assert_frame_equal(result, expected)
 
 
@@ -2102,7 +2098,7 @@ def test_merge_suffix_error(col1, col2, suffixes):
     # TODO: might reconsider current raise behaviour, see issue 24782
     msg = "columns overlap but no suffix specified"
     with pytest.raises(ValueError, match=msg):
-        pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)
+        merge(a, b, left_index=True, right_index=True, suffixes=suffixes)
 
 
 @pytest.mark.parametrize("suffixes", [{"left", "right"}, {"left": 0, "right": 0}])
@@ -2111,7 +2107,7 @@ def test_merge_suffix_warns(suffixes):
     b = DataFrame({"b": [3, 4, 5]})
 
     with tm.assert_produces_warning(FutureWarning):
-        pd.merge(a, b, left_index=True, right_index=True, suffixes={"left", "right"})
+        merge(a, b, left_index=True, right_index=True, suffixes={"left", "right"})
 
 
 @pytest.mark.parametrize(
@@ -2126,7 +2122,7 @@ def test_merge_suffix_length_error(col1, col2, suffixes, msg):
     b = DataFrame({col2: [3, 4, 5]})
 
     with pytest.raises(ValueError, match=msg):
-        pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)
+        merge(a, b, left_index=True, right_index=True, suffixes=suffixes)
 
 
 @pytest.mark.parametrize("cat_dtype", ["one", "two"])
@@ -2196,7 +2192,7 @@ def test_merge_on_cat_and_ext_array():
     left = right.copy()
     left["a"] = left["a"].astype("category")
 
-    result = pd.merge(left, right, how="inner", on="a")
+    result = merge(left, right, how="inner", on="a")
     expected = right.copy()
 
     tm.assert_frame_equal(result, expected)
@@ -2210,7 +2206,7 @@ def test_merge_multiindex_columns():
 
     letters = ["a", "b", "c", "d"]
     numbers = ["1", "2", "3"]
-    index = pd.MultiIndex.from_product((letters, numbers), names=["outer", "inner"])
+    index = MultiIndex.from_product((letters, numbers), names=["outer", "inner"])
 
     frame_x = DataFrame(columns=index)
     frame_x["id"] = ""
@@ -2225,7 +2221,7 @@ def test_merge_multiindex_columns():
     expected_labels = [letter + l_suf for letter in letters] + [
         letter + r_suf for letter in letters
     ]
-    expected_index = pd.MultiIndex.from_product(
+    expected_index = MultiIndex.from_product(
         [expected_labels, numbers], names=["outer", "inner"]
     )
     expected = DataFrame(columns=expected_index)
@@ -2240,7 +2236,7 @@ def test_merge_datetime_upcast_dtype():
     df2 = DataFrame(
         {"y": ["1", "2", "3"], "z": pd.to_datetime(["2000", "2001", "2002"])}
     )
-    result = pd.merge(df1, df2, how="left", on="y")
+    result = merge(df1, df2, how="left", on="y")
     expected = DataFrame(
         {
             "x": ["a", "b", "c"],
@@ -2387,7 +2383,7 @@ def test_merge_right_left_index():
     # GH#38616
     left = DataFrame({"x": [1, 1], "z": ["foo", "foo"]})
     right = DataFrame({"x": [1, 1], "z": ["foo", "foo"]})
-    result = pd.merge(left, right, how="right", left_index=True, right_on="x")
+    result = merge(left, right, how="right", left_index=True, right_on="x")
     expected = DataFrame(
         {
             "x": [1, 1],
diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py
index 5fa08904e3fcf..3f5bb9b84372c 100644
--- a/pandas/tests/reshape/merge/test_merge_asof.py
+++ b/pandas/tests/reshape/merge/test_merge_asof.py
@@ -47,14 +47,14 @@ def test_examples1(self):
             {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, 3, 7]}
         )
 
-        result = pd.merge_asof(left, right, on="a")
+        result = merge_asof(left, right, on="a")
         tm.assert_frame_equal(result, expected)
 
     def test_examples2(self):
         """ doc-string examples """
         trades = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     [
                         "20160525 13:30:00.023",
                         "20160525 13:30:00.038",
@@ -72,7 +72,7 @@ def test_examples2(self):
 
         quotes = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     [
                         "20160525 13:30:00.023",
                         "20160525 13:30:00.023",
@@ -100,15 +100,13 @@ def test_examples2(self):
             columns=["time", "ticker", "bid", "ask"],
         )
 
-        pd.merge_asof(trades, quotes, on="time", by="ticker")
+        merge_asof(trades, quotes, on="time", by="ticker")
 
-        pd.merge_asof(
-            trades, quotes, on="time", by="ticker", tolerance=Timedelta("2ms")
-        )
+        merge_asof(trades, quotes, on="time", by="ticker", tolerance=Timedelta("2ms"))
 
         expected = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     [
                         "20160525 13:30:00.023",
                         "20160525 13:30:00.038",
@@ -126,7 +124,7 @@ def test_examples2(self):
             columns=["time", "ticker", "price", "quantity", "bid", "ask"],
         )
 
-        result = pd.merge_asof(
+        result = merge_asof(
             trades,
             quotes,
             on="time",
@@ -147,7 +145,7 @@ def test_examples3(self):
             {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, 6, np.nan]}
         )
 
-        result = pd.merge_asof(left, right, on="a", direction="forward")
+        result = merge_asof(left, right, on="a", direction="forward")
         tm.assert_frame_equal(result, expected)
 
     def test_examples4(self):
@@ -161,7 +159,7 @@ def test_examples4(self):
             {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, 6, 7]}
         )
 
-        result = pd.merge_asof(left, right, on="a", direction="nearest")
+        result = merge_asof(left, right, on="a", direction="nearest")
         tm.assert_frame_equal(result, expected)
 
     def test_basic(self):
@@ -282,7 +280,7 @@ def test_multiby(self):
         # GH13936
         trades = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     [
                         "20160525 13:30:00.023",
                         "20160525 13:30:00.023",
@@ -301,7 +299,7 @@ def test_multiby(self):
 
         quotes = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     [
                         "20160525 13:30:00.023",
                         "20160525 13:30:00.023",
@@ -321,7 +319,7 @@ def test_multiby(self):
 
         expected = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     [
                         "20160525 13:30:00.023",
                         "20160525 13:30:00.023",
@@ -340,14 +338,14 @@ def test_multiby(self):
             columns=["time", "ticker", "exch", "price", "quantity", "bid", "ask"],
         )
 
-        result = pd.merge_asof(trades, quotes, on="time", by=["ticker", "exch"])
+        result = merge_asof(trades, quotes, on="time", by=["ticker", "exch"])
         tm.assert_frame_equal(result, expected)
 
     def test_multiby_heterogeneous_types(self):
         # GH13936
         trades = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     [
                         "20160525 13:30:00.023",
                         "20160525 13:30:00.023",
@@ -366,7 +364,7 @@ def test_multiby_heterogeneous_types(self):
 
         quotes = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     [
                         "20160525 13:30:00.023",
                         "20160525 13:30:00.023",
@@ -386,7 +384,7 @@ def test_multiby_heterogeneous_types(self):
 
         expected = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     [
                         "20160525 13:30:00.023",
                         "20160525 13:30:00.023",
@@ -405,42 +403,42 @@ def test_multiby_heterogeneous_types(self):
             columns=["time", "ticker", "exch", "price", "quantity", "bid", "ask"],
         )
 
-        result = pd.merge_asof(trades, quotes, on="time", by=["ticker", "exch"])
+        result = merge_asof(trades, quotes, on="time", by=["ticker", "exch"])
         tm.assert_frame_equal(result, expected)
 
     def test_multiby_indexed(self):
         # GH15676
         left = pd.DataFrame(
             [
-                [pd.to_datetime("20160602"), 1, "a"],
-                [pd.to_datetime("20160602"), 2, "a"],
-                [pd.to_datetime("20160603"), 1, "b"],
-                [pd.to_datetime("20160603"), 2, "b"],
+                [to_datetime("20160602"), 1, "a"],
+                [to_datetime("20160602"), 2, "a"],
+                [to_datetime("20160603"), 1, "b"],
+                [to_datetime("20160603"), 2, "b"],
             ],
             columns=["time", "k1", "k2"],
         ).set_index("time")
 
         right = pd.DataFrame(
             [
-                [pd.to_datetime("20160502"), 1, "a", 1.0],
-                [pd.to_datetime("20160502"), 2, "a", 2.0],
-                [pd.to_datetime("20160503"), 1, "b", 3.0],
-                [pd.to_datetime("20160503"), 2, "b", 4.0],
+                [to_datetime("20160502"), 1, "a", 1.0],
+                [to_datetime("20160502"), 2, "a", 2.0],
+                [to_datetime("20160503"), 1, "b", 3.0],
+                [to_datetime("20160503"), 2, "b", 4.0],
             ],
             columns=["time", "k1", "k2", "value"],
         ).set_index("time")
 
         expected = pd.DataFrame(
             [
-                [pd.to_datetime("20160602"), 1, "a", 1.0],
-                [pd.to_datetime("20160602"), 2, "a", 2.0],
-                [pd.to_datetime("20160603"), 1, "b", 3.0],
-                [pd.to_datetime("20160603"), 2, "b", 4.0],
+                [to_datetime("20160602"), 1, "a", 1.0],
+                [to_datetime("20160602"), 2, "a", 2.0],
+                [to_datetime("20160603"), 1, "b", 3.0],
+                [to_datetime("20160603"), 2, "b", 4.0],
             ],
             columns=["time", "k1", "k2", "value"],
         ).set_index("time")
 
-        result = pd.merge_asof(
+        result = merge_asof(
             left, right, left_index=True, right_index=True, by=["k1", "k2"]
         )
 
@@ -449,7 +447,7 @@ def test_multiby_indexed(self):
         with pytest.raises(
             MergeError, match="left_by and right_by must be same length"
         ):
-            pd.merge_asof(
+            merge_asof(
                 left,
                 right,
                 left_index=True,
@@ -629,7 +627,7 @@ def test_tolerance_forward(self):
             {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, np.nan, 11]}
         )
 
-        result = pd.merge_asof(left, right, on="a", direction="forward", tolerance=1)
+        result = merge_asof(left, right, on="a", direction="forward", tolerance=1)
         tm.assert_frame_equal(result, expected)
 
     def test_tolerance_nearest(self):
@@ -642,7 +640,7 @@ def test_tolerance_nearest(self):
             {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, np.nan, 11]}
         )
 
-        result = pd.merge_asof(left, right, on="a", direction="nearest", tolerance=1)
+        result = merge_asof(left, right, on="a", direction="nearest", tolerance=1)
         tm.assert_frame_equal(result, expected)
 
     def test_tolerance_tz(self):
@@ -650,7 +648,7 @@ def test_tolerance_tz(self):
         left = pd.DataFrame(
             {
                 "date": pd.date_range(
-                    start=pd.to_datetime("2016-01-02"),
+                    start=to_datetime("2016-01-02"),
                     freq="D",
                     periods=5,
                     tz=pytz.timezone("UTC"),
@@ -661,7 +659,7 @@ def test_tolerance_tz(self):
         right = pd.DataFrame(
             {
                 "date": pd.date_range(
-                    start=pd.to_datetime("2016-01-01"),
+                    start=to_datetime("2016-01-01"),
                     freq="D",
                     periods=5,
                     tz=pytz.timezone("UTC"),
@@ -669,12 +667,12 @@ def test_tolerance_tz(self):
                 "value2": list("ABCDE"),
             }
         )
-        result = pd.merge_asof(left, right, on="date", tolerance=Timedelta("1 day"))
+        result = merge_asof(left, right, on="date", tolerance=Timedelta("1 day"))
 
         expected = pd.DataFrame(
             {
                 "date": pd.date_range(
-                    start=pd.to_datetime("2016-01-02"),
+                    start=to_datetime("2016-01-02"),
                     freq="D",
                     periods=5,
                     tz=pytz.timezone("UTC"),
@@ -700,7 +698,7 @@ def test_tolerance_float(self):
             }
         )
 
-        result = pd.merge_asof(left, right, on="a", direction="nearest", tolerance=0.5)
+        result = merge_asof(left, right, on="a", direction="nearest", tolerance=0.5)
         tm.assert_frame_equal(result, expected)
 
     def test_index_tolerance(self):
@@ -709,7 +707,7 @@ def test_index_tolerance(self):
         trades = self.trades.set_index("time")
         quotes = self.quotes.set_index("time")
 
-        result = pd.merge_asof(
+        result = merge_asof(
             trades,
             quotes,
             left_index=True,
@@ -737,7 +735,7 @@ def test_allow_exact_matches_forward(self):
             {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [2, 7, 11]}
         )
 
-        result = pd.merge_asof(
+        result = merge_asof(
             left, right, on="a", direction="forward", allow_exact_matches=False
         )
         tm.assert_frame_equal(result, expected)
@@ -752,7 +750,7 @@ def test_allow_exact_matches_nearest(self):
             {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [2, 3, 11]}
         )
 
-        result = pd.merge_asof(
+        result = merge_asof(
             left, right, on="a", direction="nearest", allow_exact_matches=False
         )
         tm.assert_frame_equal(result, expected)
@@ -773,38 +771,38 @@ def test_allow_exact_matches_and_tolerance(self):
     def test_allow_exact_matches_and_tolerance2(self):
         # GH 13695
         df1 = pd.DataFrame(
-            {"time": pd.to_datetime(["2016-07-15 13:30:00.030"]), "username": ["bob"]}
+            {"time": to_datetime(["2016-07-15 13:30:00.030"]), "username": ["bob"]}
         )
         df2 = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     ["2016-07-15 13:30:00.000", "2016-07-15 13:30:00.030"]
                 ),
                 "version": [1, 2],
             }
         )
 
-        result = pd.merge_asof(df1, df2, on="time")
+        result = merge_asof(df1, df2, on="time")
         expected = pd.DataFrame(
             {
-                "time": pd.to_datetime(["2016-07-15 13:30:00.030"]),
+                "time": to_datetime(["2016-07-15 13:30:00.030"]),
                 "username": ["bob"],
                 "version": [2],
             }
         )
         tm.assert_frame_equal(result, expected)
 
-        result = pd.merge_asof(df1, df2, on="time", allow_exact_matches=False)
+        result = merge_asof(df1, df2, on="time", allow_exact_matches=False)
         expected = pd.DataFrame(
             {
-                "time": pd.to_datetime(["2016-07-15 13:30:00.030"]),
+                "time": to_datetime(["2016-07-15 13:30:00.030"]),
                 "username": ["bob"],
                 "version": [1],
             }
         )
         tm.assert_frame_equal(result, expected)
 
-        result = pd.merge_asof(
+        result = merge_asof(
             df1,
             df2,
             on="time",
@@ -813,7 +811,7 @@ def test_allow_exact_matches_and_tolerance2(self):
         )
         expected = pd.DataFrame(
             {
-                "time": pd.to_datetime(["2016-07-15 13:30:00.030"]),
+                "time": to_datetime(["2016-07-15 13:30:00.030"]),
                 "username": ["bob"],
                 "version": [np.nan],
             }
@@ -824,7 +822,7 @@ def test_allow_exact_matches_and_tolerance3(self):
         # GH 13709
         df1 = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     ["2016-07-15 13:30:00.030", "2016-07-15 13:30:00.030"]
                 ),
                 "username": ["bob", "charlie"],
@@ -832,14 +830,14 @@ def test_allow_exact_matches_and_tolerance3(self):
         )
         df2 = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     ["2016-07-15 13:30:00.000", "2016-07-15 13:30:00.030"]
                 ),
                 "version": [1, 2],
             }
         )
 
-        result = pd.merge_asof(
+        result = merge_asof(
             df1,
             df2,
             on="time",
@@ -848,7 +846,7 @@ def test_allow_exact_matches_and_tolerance3(self):
         )
         expected = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     ["2016-07-15 13:30:00.030", "2016-07-15 13:30:00.030"]
                 ),
                 "username": ["bob", "charlie"],
@@ -867,7 +865,7 @@ def test_allow_exact_matches_and_tolerance_forward(self):
             {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [np.nan, 6, 11]}
         )
 
-        result = pd.merge_asof(
+        result = merge_asof(
             left,
             right,
             on="a",
@@ -887,7 +885,7 @@ def test_allow_exact_matches_and_tolerance_nearest(self):
             {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [np.nan, 4, 11]}
         )
 
-        result = pd.merge_asof(
+        result = merge_asof(
             left,
             right,
             on="a",
@@ -924,7 +922,7 @@ def test_forward_by(self):
             }
         )
 
-        result = pd.merge_asof(left, right, on="a", by="b", direction="forward")
+        result = merge_asof(left, right, on="a", by="b", direction="forward")
         tm.assert_frame_equal(result, expected)
 
     def test_nearest_by(self):
@@ -954,14 +952,14 @@ def test_nearest_by(self):
             }
         )
 
-        result = pd.merge_asof(left, right, on="a", by="b", direction="nearest")
+        result = merge_asof(left, right, on="a", by="b", direction="nearest")
         tm.assert_frame_equal(result, expected)
 
     def test_by_int(self):
         # we specialize by type, so test that this is correct
         df1 = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     [
                         "20160525 13:30:00.020",
                         "20160525 13:30:00.030",
@@ -978,7 +976,7 @@ def test_by_int(self):
 
         df2 = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     [
                         "20160525 13:30:00.015",
                         "20160525 13:30:00.020",
@@ -996,11 +994,11 @@ def test_by_int(self):
             columns=["time", "key", "value2"],
         )
 
-        result = pd.merge_asof(df1, df2, on="time", by="key")
+        result = merge_asof(df1, df2, on="time", by="key")
 
         expected = pd.DataFrame(
             {
-                "time": pd.to_datetime(
+                "time": to_datetime(
                     [
                         "20160525 13:30:00.020",
                         "20160525 13:30:00.030",
@@ -1035,7 +1033,7 @@ def test_on_float(self):
 
         df1 = df1.sort_values("price").reset_index(drop=True)
 
-        result = pd.merge_asof(df1, df2, on="price")
+        result = merge_asof(df1, df2, on="price")
 
         expected = pd.DataFrame(
             {
@@ -1065,7 +1063,7 @@ def test_on_specialized_type(self, any_real_dtype):
         df2.value = dtype(df2.value)
 
         df1 = df1.sort_values("value").reset_index(drop=True)
-        result = pd.merge_asof(df1, df2, on="value")
+        result = merge_asof(df1, df2, on="value")
 
         expected = pd.DataFrame(
             {
@@ -1100,7 +1098,7 @@ def test_on_specialized_type_by_int(self, any_real_dtype):
         df2.value = dtype(df2.value)
 
         df1 = df1.sort_values("value").reset_index(drop=True)
-        result = pd.merge_asof(df1, df2, on="value", by="key")
+        result = merge_asof(df1, df2, on="value", by="key")
 
         expected = pd.DataFrame(
             {
@@ -1148,7 +1146,7 @@ def test_on_float_by_int(self):
         df1 = df1.sort_values("price").reset_index(drop=True)
         df2 = df2.sort_values("price").reset_index(drop=True)
 
-        result = pd.merge_asof(df1, df2, on="price", by="exch")
+        result = merge_asof(df1, df2, on="price", by="exch")
 
         expected = pd.DataFrame(
             {
@@ -1241,7 +1239,7 @@ def test_merge_by_col_tz_aware(self):
                 "values": ["b"],
             }
         )
-        result = pd.merge_asof(left, right, by="by_col", on="on_col")
+        result = merge_asof(left, right, by="by_col", on="on_col")
         expected = pd.DataFrame(
             [[pd.Timestamp("2018-01-01", tz="UTC"), 2, "a", "b"]],
             columns=["by_col", "on_col", "values_x", "values_y"],
@@ -1266,7 +1264,7 @@ def test_by_mixed_tz_aware(self):
                 "value": ["b"],
             }
         )
-        result = pd.merge_asof(left, right, by=["by_col1", "by_col2"], on="on_col")
+        result = merge_asof(left, right, by=["by_col1", "by_col2"], on="on_col")
         expected = pd.DataFrame(
             [[pd.Timestamp("2018-01-01", tz="UTC"), "HELLO", 2, "a"]],
             columns=["by_col1", "by_col2", "on_col", "value_x"],
@@ -1304,7 +1302,7 @@ def test_timedelta_tolerance_nearest(self):
 
         expected["time"] = pd.to_timedelta(expected["time"], "ms")
 
-        result = pd.merge_asof(
+        result = merge_asof(
             left, right, on="time", tolerance=Timedelta("1ms"), direction="nearest"
         )
 
@@ -1323,7 +1321,7 @@ def test_int_type_tolerance(self, any_int_dtype):
         )
         expected["a"] = expected["a"].astype(any_int_dtype)
 
-        result = pd.merge_asof(left, right, on="a", tolerance=10)
+        result = merge_asof(left, right, on="a", tolerance=10)
         tm.assert_frame_equal(result, expected)
 
     def test_merge_index_column_tz(self):
@@ -1331,7 +1329,7 @@ def test_merge_index_column_tz(self):
         index = pd.date_range("2019-10-01", freq="30min", periods=5, tz="UTC")
         left = pd.DataFrame([0.9, 0.8, 0.7, 0.6], columns=["xyz"], index=index[1:])
         right = pd.DataFrame({"from_date": index, "abc": [2.46] * 4 + [2.19]})
-        result = pd.merge_asof(
+        result = merge_asof(
             left=left, right=right, left_index=True, right_on=["from_date"]
         )
         expected = pd.DataFrame(
@@ -1344,7 +1342,7 @@ def test_merge_index_column_tz(self):
         )
         tm.assert_frame_equal(result, expected)
 
-        result = pd.merge_asof(
+        result = merge_asof(
             left=right, right=left, right_index=True, left_on=["from_date"]
         )
         expected = pd.DataFrame(
@@ -1370,7 +1368,7 @@ def test_left_index_right_index_tolerance(self):
         expected = pd.DataFrame(
             {"val1": "foo", "val2": "bar"}, index=pd.DatetimeIndex(dr1)
         )
-        result = pd.merge_asof(
+        result = merge_asof(
             df1,
             df2,
             left_index=True,
@@ -1395,7 +1393,7 @@ def test_merge_asof_non_numerical_dtype(kwargs, data):
         MergeError,
         match=r"Incompatible merge dtype, .*, both sides must have numeric dtype",
     ):
-        pd.merge_asof(left, right, **kwargs)
+        merge_asof(left, right, **kwargs)
 
 
 def test_merge_asof_non_numerical_dtype_object():
@@ -1406,7 +1404,7 @@ def test_merge_asof_non_numerical_dtype_object():
         MergeError,
         match=r"Incompatible merge dtype, .*, both sides must have numeric dtype",
     ):
-        pd.merge_asof(
+        merge_asof(
             left,
             right,
             left_on="left_val1",
diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py
index 56ea3c9718a41..d9143549e127d 100644
--- a/pandas/tests/reshape/merge/test_multi.py
+++ b/pandas/tests/reshape/merge/test_multi.py
@@ -112,7 +112,7 @@ def test_merge_on_multikey(self, left, right, join_type):
         on_cols = ["key1", "key2"]
         result = left.join(right, on=on_cols, how=join_type).reset_index(drop=True)
 
-        expected = pd.merge(left, right.reset_index(), on=on_cols, how=join_type)
+        expected = merge(left, right.reset_index(), on=on_cols, how=join_type)
 
         tm.assert_frame_equal(result, expected)
 
@@ -120,7 +120,7 @@ def test_merge_on_multikey(self, left, right, join_type):
             drop=True
         )
 
-        expected = pd.merge(
+        expected = merge(
             left, right.reset_index(), on=on_cols, how=join_type, sort=True
         )
 
@@ -200,13 +200,13 @@ def test_merge_right_vs_left(self, left, right, sort):
 
     def test_merge_multiple_cols_with_mixed_cols_index(self):
         # GH29522
-        s = pd.Series(
+        s = Series(
             range(6),
             MultiIndex.from_product([["A", "B"], [1, 2, 3]], names=["lev1", "lev2"]),
             name="Amount",
         )
         df = DataFrame({"lev1": list("AAABBB"), "lev2": [1, 2, 3, 1, 2, 3], "col": 0})
-        result = pd.merge(df, s.reset_index(), on=["lev1", "lev2"])
+        result = merge(df, s.reset_index(), on=["lev1", "lev2"])
         expected = DataFrame(
             {
                 "lev1": list("AAABBB"),
@@ -840,7 +840,7 @@ def test_join_multi_multi(
     ):
         # Multi-index join tests
         expected = (
-            pd.merge(
+            merge(
                 left_multi.reset_index(),
                 right_multi.reset_index(),
                 how=join_type,
@@ -861,7 +861,7 @@ def test_join_multi_empty_frames(
         right_multi = right_multi.drop(columns=right_multi.columns)
 
         expected = (
-            pd.merge(
+            merge(
                 left_multi.reset_index(),
                 right_multi.reset_index(),
                 how=join_type,
@@ -917,7 +917,7 @@ def test_single_common_level(self):
         )
 
         result = left.join(right)
-        expected = pd.merge(
+        expected = merge(
             left.reset_index(), right.reset_index(), on=["key"], how="inner"
         ).set_index(["key", "X", "Y"])
 
diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py
index 1ecb408d49813..e467dbb7d49b6 100644
--- a/pandas/tests/reshape/test_crosstab.py
+++ b/pandas/tests/reshape/test_crosstab.py
@@ -259,6 +259,8 @@ def test_margin_dropna(self):
         expected.columns = Index([3, 4, "All"], name="b")
         tm.assert_frame_equal(actual, expected)
 
+    def test_margin_dropna2(self):
+
         df = DataFrame(
             {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]}
         )
@@ -268,6 +270,8 @@ def test_margin_dropna(self):
         expected.columns = Index([3.0, 4.0, "All"], name="b")
         tm.assert_frame_equal(actual, expected)
 
+    def test_margin_dropna3(self):
+
         df = DataFrame(
             {"a": [1, np.nan, np.nan, np.nan, np.nan, 2], "b": [3, 3, 4, 4, 4, 4]}
         )
@@ -277,6 +281,7 @@ def test_margin_dropna(self):
         expected.columns = Index([3, 4, "All"], name="b")
         tm.assert_frame_equal(actual, expected)
 
+    def test_margin_dropna4(self):
         # GH 12642
         # _add_margins raises KeyError: Level None not found
         # when margins=True and dropna=False
@@ -287,6 +292,7 @@ def test_margin_dropna(self):
         expected.columns = Index([3, 4, "All"], name="b")
         tm.assert_frame_equal(actual, expected)
 
+    def test_margin_dropna5(self):
         df = DataFrame(
             {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]}
         )
@@ -296,6 +302,7 @@ def test_margin_dropna(self):
         expected.columns = Index([3.0, 4.0, "All"], name="b")
         tm.assert_frame_equal(actual, expected)
 
+    def test_margin_dropna6(self):
         a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object)
         b = np.array(["one", "one", "two", "one", "two", np.nan, "two"], dtype=object)
         c = np.array(
@@ -395,6 +402,12 @@ def test_crosstab_normalize(self):
             crosstab(df.a, df.b, normalize=True, margins=True), all_normal_margins
         )
 
+    def test_crosstab_normalize_arrays(self):
+        # GH#12578
+        df = DataFrame(
+            {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]}
+        )
+
         # Test arrays
         crosstab(
             [np.array([1, 1, 2, 2]), np.array([1, 2, 1, 2])], np.array([1, 2, 1, 2])
@@ -798,7 +811,7 @@ def test_categoricals(a_dtype, b_dtype):
     if not a_is_cat:
         expected = expected.loc[[0, 2, "All"]]
         expected["All"] = expected["All"].astype("int64")
-    print(result)
-    print(expected)
-    print(expected.loc[[0, 2, "All"]])
+    repr(result)
+    repr(expected)
+    repr(expected.loc[[0, 2, "All"]])
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py
index 56326dd15bd9b..06159cf70b1ab 100644
--- a/pandas/tests/reshape/test_cut.py
+++ b/pandas/tests/reshape/test_cut.py
@@ -681,7 +681,7 @@ def test_cut_unordered_with_series_labels():
     s = Series([1, 2, 3, 4, 5])
     bins = Series([0, 2, 4, 6])
     labels = Series(["a", "b", "c"])
-    result = pd.cut(s, bins=bins, labels=labels, ordered=False)
+    result = cut(s, bins=bins, labels=labels, ordered=False)
     expected = Series(["a", "a", "b", "b", "c"], dtype="category")
     tm.assert_series_equal(result, expected)
 
@@ -690,4 +690,4 @@ def test_cut_no_warnings():
     df = DataFrame({"value": np.random.randint(0, 100, 20)})
     labels = [f"{i} - {i + 9}" for i in range(0, 100, 10)]
     with tm.assert_produces_warning(False):
-        df["group"] = pd.cut(df.value, range(0, 105, 10), right=False, labels=labels)
+        df["group"] = cut(df.value, range(0, 105, 10), right=False, labels=labels)
diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
index 53244569d0432..a950c648838ff 100644
--- a/pandas/tests/reshape/test_melt.py
+++ b/pandas/tests/reshape/test_melt.py
@@ -302,7 +302,7 @@ def test_pandas_dtypes(self, col):
     def test_preserve_category(self):
         # GH 15853
         data = DataFrame({"A": [1, 2], "B": pd.Categorical(["X", "Y"])})
-        result = pd.melt(data, ["B"], ["A"])
+        result = melt(data, ["B"], ["A"])
         expected = DataFrame(
             {"B": pd.Categorical(["X", "Y"]), "variable": ["A", "A"], "value": [1, 2]}
         )
@@ -668,7 +668,7 @@ def test_stubs(self):
         stubs = ["inc", "edu"]
 
         # TODO: unused?
-        df_long = pd.wide_to_long(df, stubs, i="id", j="age")  # noqa
+        df_long = wide_to_long(df, stubs, i="id", j="age")  # noqa
 
         assert stubs == ["inc", "edu"]
 
@@ -1055,10 +1055,8 @@ def test_col_substring_of_stubname(self):
             "PA3": {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67},
         }
         wide_df = DataFrame.from_dict(wide_data)
-        expected = pd.wide_to_long(
-            wide_df, stubnames=["PA"], i=["node_id", "A"], j="time"
-        )
-        result = pd.wide_to_long(wide_df, stubnames="PA", i=["node_id", "A"], j="time")
+        expected = wide_to_long(wide_df, stubnames=["PA"], i=["node_id", "A"], j="time")
+        result = wide_to_long(wide_df, stubnames="PA", i=["node_id", "A"], j="time")
         tm.assert_frame_equal(result, expected)
 
     def test_warn_of_column_name_value(self):
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 8d2b4f2b325c2..e345f4f4b5f7f 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -197,7 +197,7 @@ def test_pivot_table_categorical(self):
             ["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True
         )
         df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
-        result = pd.pivot_table(df, values="values", index=["A", "B"], dropna=True)
+        result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
 
         exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
         expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index)
@@ -302,7 +302,7 @@ def test_pivot_with_interval_index_margins(self):
             }
         )
 
-        pivot_tab = pd.pivot_table(
+        pivot_tab = pivot_table(
             df, index="C", columns="B", values="A", aggfunc="sum", margins=True
         )
 
@@ -409,7 +409,7 @@ def test_pivot_no_values(self):
         df = DataFrame(
             {
                 "A": [1, 2, 3, 4, 5],
-                "dt": pd.date_range("2011-01-01", freq="D", periods=5),
+                "dt": date_range("2011-01-01", freq="D", periods=5),
             },
             index=idx,
         )
@@ -492,7 +492,7 @@ def test_pivot_index_with_nan(self, method):
         # GH9491
         df = DataFrame(
             {
-                "a": pd.date_range("2014-02-01", periods=6, freq="D"),
+                "a": date_range("2014-02-01", periods=6, freq="D"),
                 "c": 100 + np.arange(6),
             }
         )
@@ -605,7 +605,7 @@ def test_pivot_tz_in_values(self):
         df = df.set_index("ts").reset_index()
         mins = df.ts.map(lambda x: x.replace(hour=0, minute=0, second=0, microsecond=0))
 
-        result = pd.pivot_table(
+        result = pivot_table(
             df.set_index("ts").reset_index(),
             values="ts",
             index=["uid"],
@@ -1101,7 +1101,7 @@ def test_pivot_columns_lexsorted(self):
         iproduct = np.random.randint(0, len(products), n)
         items["Index"] = products["Index"][iproduct]
         items["Symbol"] = products["Symbol"][iproduct]
-        dr = pd.date_range(date(2000, 1, 1), date(2010, 12, 31))
+        dr = date_range(date(2000, 1, 1), date(2010, 12, 31))
         dates = dr[np.random.randint(0, len(dr), n)]
         items["Year"] = dates.year
         items["Month"] = dates.month
@@ -1664,17 +1664,17 @@ def test_pivot_table_with_iterator_values(self):
         # GH 12017
         aggs = {"D": "sum", "E": "mean"}
 
-        pivot_values_list = pd.pivot_table(
+        pivot_values_list = pivot_table(
             self.data, index=["A"], values=list(aggs.keys()), aggfunc=aggs
         )
 
-        pivot_values_keys = pd.pivot_table(
+        pivot_values_keys = pivot_table(
             self.data, index=["A"], values=aggs.keys(), aggfunc=aggs
         )
         tm.assert_frame_equal(pivot_values_keys, pivot_values_list)
 
         agg_values_gen = (value for value in aggs.keys())
-        pivot_values_gen = pd.pivot_table(
+        pivot_values_gen = pivot_table(
             self.data, index=["A"], values=agg_values_gen, aggfunc=aggs
         )
         tm.assert_frame_equal(pivot_values_gen, pivot_values_list)
@@ -1749,7 +1749,7 @@ def test_margins_casted_to_float(self, observed):
             }
         )
 
-        result = pd.pivot_table(df, index="D", margins=True)
+        result = pivot_table(df, index="D", margins=True)
         expected = DataFrame(
             {"A": [3, 7, 5], "B": [2.5, 6.5, 4.5], "C": [2, 5, 3.5]},
             index=Index(["X", "Y", "All"], name="D"),
@@ -1887,7 +1887,7 @@ def test_pivot_margins_name_unicode(self):
         # issue #13292
         greek = "\u0394\u03bf\u03ba\u03b9\u03bc\u03ae"
         frame = DataFrame({"foo": [1, 2, 3]})
-        table = pd.pivot_table(
+        table = pivot_table(
             frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek
         )
         index = Index([1, 2, 3, greek], dtype="object", name="foo")
@@ -2006,7 +2006,7 @@ def ret_sum(x):
         def ret_none(x):
             return np.nan
 
-        result = pd.pivot_table(
+        result = pivot_table(
             df, columns="fruit", aggfunc=[ret_sum, ret_none, ret_one], dropna=dropna
         )
 
@@ -2028,7 +2028,7 @@ def test_pivot_table_aggfunc_scalar_dropna(self, dropna):
             {"A": ["one", "two", "one"], "x": [3, np.nan, 2], "y": [1, np.nan, np.nan]}
         )
 
-        result = pd.pivot_table(df, columns="A", aggfunc=np.mean, dropna=dropna)
+        result = pivot_table(df, columns="A", aggfunc=np.mean, dropna=dropna)
 
         data = [[2.5, np.nan], [1, np.nan]]
         col = Index(["one", "two"], name="A")
@@ -2063,6 +2063,55 @@ def agg(arr):
         with pytest.raises(KeyError, match="notpresent"):
             foo.pivot_table("notpresent", "X", "Y", aggfunc=agg)
 
+    def test_pivot_table_doctest_case(self):
+        # TODO: better name.  the relevant characteristic is that
+        #  the call to maybe_downcast_to_dtype(agged[v], data[v].dtype) in
+        #  __internal_pivot_table has `agged[v]` a DataFrame instead of Series,
+        #  i.e agged.columns is not unique
+        df = DataFrame(
+            {
+                "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
+                "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"],
+                "C": [
+                    "small",
+                    "large",
+                    "large",
+                    "small",
+                    "small",
+                    "large",
+                    "small",
+                    "small",
+                    "large",
+                ],
+                "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
+                "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
+            }
+        )
+
+        table = pivot_table(
+            df,
+            values=["D", "E"],
+            index=["A", "C"],
+            aggfunc={"D": np.mean, "E": [min, max, np.mean]},
+        )
+        cols = MultiIndex.from_tuples(
+            [("D", "mean"), ("E", "max"), ("E", "mean"), ("E", "min")]
+        )
+        index = MultiIndex.from_tuples(
+            [("bar", "large"), ("bar", "small"), ("foo", "large"), ("foo", "small")],
+            names=["A", "C"],
+        )
+        vals = np.array(
+            [
+                [5.5, 9.0, 7.5, 6.0],
+                [5.5, 9.0, 8.5, 8.0],
+                [2.0, 5.0, 4.5, 4.0],
+                [2.33333333, 6.0, 4.33333333, 2.0],
+            ]
+        )
+        expected = DataFrame(vals, columns=cols, index=index)
+        tm.assert_frame_equal(table, expected)
+
 
 class TestPivot:
     def test_pivot(self):
diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py
index b9594a9c876c6..9f6cdbb81bd89 100644
--- a/pandas/tests/scalar/timedelta/test_arithmetic.py
+++ b/pandas/tests/scalar/timedelta/test_arithmetic.py
@@ -11,6 +11,7 @@
 import pytest
 
 from pandas.compat import is_numpy_dev
+from pandas.errors import OutOfBoundsTimedelta
 
 import pandas as pd
 from pandas import (
@@ -104,7 +105,7 @@ def test_td_add_timestamp_overflow(self):
         with pytest.raises(OverflowError, match=msg):
             Timestamp("1700-01-01") + Timedelta(13 * 19999, unit="D")
 
-        with pytest.raises(OverflowError, match=msg):
+        with pytest.raises(OutOfBoundsTimedelta, match=msg):
             Timestamp("1700-01-01") + timedelta(days=13 * 19999)
 
     @pytest.mark.parametrize("op", [operator.add, ops.radd])
diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py
index 47b09280854de..ea4a56be6da48 100644
--- a/pandas/tests/scalar/timedelta/test_constructors.py
+++ b/pandas/tests/scalar/timedelta/test_constructors.py
@@ -200,7 +200,7 @@ def test_overflow_on_construction():
     with pytest.raises(OverflowError, match=msg):
         Timedelta(7 * 19999, unit="D")
 
-    with pytest.raises(OverflowError, match=msg):
+    with pytest.raises(OutOfBoundsTimedelta, match=msg):
         Timedelta(timedelta(days=13 * 19999))
 
 
diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py
index 6199e77e10166..5db159e1abb80 100644
--- a/pandas/tests/series/accessors/test_dt_accessor.py
+++ b/pandas/tests/series/accessors/test_dt_accessor.py
@@ -74,7 +74,7 @@ def get_expected(s, name):
             if isinstance(result, np.ndarray):
                 if is_integer_dtype(result):
                     result = result.astype("int64")
-            elif not is_list_like(result) or isinstance(result, pd.DataFrame):
+            elif not is_list_like(result) or isinstance(result, DataFrame):
                 return result
             return Series(result, index=s.index, name=s.name)
 
@@ -83,7 +83,7 @@ def compare(s, name):
             b = get_expected(s, prop)
             if not (is_list_like(a) and is_list_like(b)):
                 assert a == b
-            elif isinstance(a, pd.DataFrame):
+            elif isinstance(a, DataFrame):
                 tm.assert_frame_equal(a, b)
             else:
                 tm.assert_series_equal(a, b)
@@ -180,7 +180,7 @@ def compare(s, name):
             assert result.dtype == object
 
             result = s.dt.total_seconds()
-            assert isinstance(result, pd.Series)
+            assert isinstance(result, Series)
             assert result.dtype == "float64"
 
             freq_result = s.dt.freq
@@ -236,11 +236,11 @@ def get_dir(s):
 
         # 11295
         # ambiguous time error on the conversions
-        s = Series(pd.date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx")
+        s = Series(date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx")
         s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
         results = get_dir(s)
         tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
-        exp_values = pd.date_range(
+        exp_values = date_range(
             "2015-01-01", "2016-01-01", freq="T", tz="UTC"
         ).tz_convert("America/Chicago")
         # freq not preserved by tz_localize above
@@ -297,7 +297,7 @@ def test_dt_round_tz(self):
     @pytest.mark.parametrize("method", ["ceil", "round", "floor"])
     def test_dt_round_tz_ambiguous(self, method):
         # GH 18946 round near "fall back" DST
-        df1 = pd.DataFrame(
+        df1 = DataFrame(
             [
                 pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True),
                 pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True),
@@ -634,7 +634,7 @@ def test_dt_accessor_invalid(self, ser):
         assert not hasattr(ser, "dt")
 
     def test_dt_accessor_updates_on_inplace(self):
-        s = Series(pd.date_range("2018-01-01", periods=10))
+        s = Series(date_range("2018-01-01", periods=10))
         s[2] = None
         return_value = s.fillna(pd.Timestamp("2018-01-01"), inplace=True)
         assert return_value is None
@@ -680,7 +680,7 @@ def test_dt_timetz_accessor(self, tz_naive_fixture):
     )
     def test_isocalendar(self, input_series, expected_output):
         result = pd.to_datetime(Series(input_series)).dt.isocalendar()
-        expected_frame = pd.DataFrame(
+        expected_frame = DataFrame(
             expected_output, columns=["year", "week", "day"], dtype="UInt32"
         )
         tm.assert_frame_equal(result, expected_frame)
diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py
index 1de6540217655..e4ba530d0741c 100644
--- a/pandas/tests/series/indexing/test_datetime.py
+++ b/pandas/tests/series/indexing/test_datetime.py
@@ -352,7 +352,7 @@ def test_indexing_over_size_cutoff_period_index(monkeypatch):
     monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
 
     n = 1100
-    idx = pd.period_range("1/1/2000", freq="T", periods=n)
+    idx = period_range("1/1/2000", freq="T", periods=n)
     assert idx._engine.over_size_threshold
 
     s = Series(np.random.randn(len(idx)), index=idx)
diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py
index e6dfafabbfec2..7642ccff31c6a 100644
--- a/pandas/tests/series/indexing/test_getitem.py
+++ b/pandas/tests/series/indexing/test_getitem.py
@@ -202,6 +202,38 @@ def test_getitem_slice_strings_with_datetimeindex(self):
         expected = ts[1:4]
         tm.assert_series_equal(result, expected)
 
+    def test_getitem_partial_str_slice_with_timedeltaindex(self):
+        rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
+        ser = Series(np.arange(len(rng)), index=rng)
+
+        result = ser["5 day":"6 day"]
+        expected = ser.iloc[86:134]
+        tm.assert_series_equal(result, expected)
+
+        result = ser["5 day":]
+        expected = ser.iloc[86:]
+        tm.assert_series_equal(result, expected)
+
+        result = ser[:"6 day"]
+        expected = ser.iloc[:134]
+        tm.assert_series_equal(result, expected)
+
+    def test_getitem_partial_str_slice_high_reso_with_timedeltaindex(self):
+        # higher reso
+        rng = timedelta_range("1 day 10:11:12", freq="us", periods=2000)
+        ser = Series(np.arange(len(rng)), index=rng)
+
+        result = ser["1 day 10:11:12":]
+        expected = ser.iloc[0:]
+        tm.assert_series_equal(result, expected)
+
+        result = ser["1 day 10:11:12.001":]
+        expected = ser.iloc[1000:]
+        tm.assert_series_equal(result, expected)
+
+        result = ser["1 days, 10:11:12.001001"]
+        assert result == ser.iloc[1001]
+
     def test_getitem_slice_2d(self, datetime_series):
         # GH#30588 multi-dimensional indexing deprecated
 
@@ -277,7 +309,7 @@ def test_getitem_slice_integers(self):
 
 
 class TestSeriesGetitemListLike:
-    @pytest.mark.parametrize("box", [list, np.array, Index, pd.Series])
+    @pytest.mark.parametrize("box", [list, np.array, Index, Series])
     def test_getitem_no_matches(self, box):
         # GH#33462 we expect the same behavior for list/ndarray/Index/Series
         ser = Series(["A", "B"])
diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py
index cd5a7af1d5ec0..30c37113f6b8f 100644
--- a/pandas/tests/series/indexing/test_indexing.py
+++ b/pandas/tests/series/indexing/test_indexing.py
@@ -5,7 +5,6 @@
 import numpy as np
 import pytest
 
-import pandas as pd
 from pandas import (
     DataFrame,
     IndexSlice,
@@ -58,7 +57,7 @@ def test_basic_getitem_dt64tz_values():
     # GH12089
     # with tz for values
     ser = Series(
-        pd.date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
+        date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
     )
     expected = Timestamp("2011-01-01", tz="US/Eastern")
     result = ser.loc["a"]
@@ -114,7 +113,7 @@ def test_getitem_setitem_integers():
 
 
 def test_series_box_timestamp():
-    rng = pd.date_range("20090415", "20090519", freq="B")
+    rng = date_range("20090415", "20090519", freq="B")
     ser = Series(rng)
     assert isinstance(ser[0], Timestamp)
     assert isinstance(ser.at[1], Timestamp)
@@ -131,7 +130,7 @@ def test_series_box_timestamp():
 
 
 def test_series_box_timedelta():
-    rng = pd.timedelta_range("1 day 1 s", periods=5, freq="h")
+    rng = timedelta_range("1 day 1 s", periods=5, freq="h")
     ser = Series(rng)
     assert isinstance(ser[0], Timedelta)
     assert isinstance(ser.at[1], Timedelta)
diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py
index 1e50fef55b4ec..799f3d257434d 100644
--- a/pandas/tests/series/indexing/test_where.py
+++ b/pandas/tests/series/indexing/test_where.py
@@ -475,7 +475,7 @@ def test_where_datetimelike_categorical(tz_naive_fixture):
     # GH#37682
     tz = tz_naive_fixture
 
-    dr = pd.date_range("2001-01-01", periods=3, tz=tz)._with_freq(None)
+    dr = date_range("2001-01-01", periods=3, tz=tz)._with_freq(None)
     lvals = pd.DatetimeIndex([dr[0], dr[1], pd.NaT])
     rvals = pd.Categorical([dr[0], pd.NaT, dr[2]])
 
diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py
index cad5476d4861c..5686e6478772d 100644
--- a/pandas/tests/series/methods/test_interpolate.py
+++ b/pandas/tests/series/methods/test_interpolate.py
@@ -642,7 +642,7 @@ def test_interp_datetime64(self, method, tz_naive_fixture):
 
     def test_interp_pad_datetime64tz_values(self):
         # GH#27628 missing.interpolate_2d should handle datetimetz values
-        dti = pd.date_range("2015-04-05", periods=3, tz="US/Central")
+        dti = date_range("2015-04-05", periods=3, tz="US/Central")
         ser = Series(dti)
         ser[1] = pd.NaT
         result = ser.interpolate(method="pad")
@@ -735,13 +735,13 @@ def test_series_interpolate_method_values(self):
 
     def test_series_interpolate_intraday(self):
         # #1698
-        index = pd.date_range("1/1/2012", periods=4, freq="12D")
+        index = date_range("1/1/2012", periods=4, freq="12D")
         ts = Series([0, 12, 24, 36], index)
         new_index = index.append(index + pd.DateOffset(days=1)).sort_values()
 
         exp = ts.reindex(new_index).interpolate(method="time")
 
-        index = pd.date_range("1/1/2012", periods=4, freq="12H")
+        index = date_range("1/1/2012", periods=4, freq="12H")
         ts = Series([0, 12, 24, 36], index)
         new_index = index.append(index + pd.DateOffset(hours=1)).sort_values()
         result = ts.reindex(new_index).interpolate(method="time")
diff --git a/pandas/tests/series/methods/test_shift.py b/pandas/tests/series/methods/test_shift.py
index 60ec0a90e906f..73684e300ed77 100644
--- a/pandas/tests/series/methods/test_shift.py
+++ b/pandas/tests/series/methods/test_shift.py
@@ -353,14 +353,14 @@ def test_shift_preserve_freqstr(self, periods):
         # GH#21275
         ser = Series(
             range(periods),
-            index=pd.date_range("2016-1-1 00:00:00", periods=periods, freq="H"),
+            index=date_range("2016-1-1 00:00:00", periods=periods, freq="H"),
         )
 
         result = ser.shift(1, "2H")
 
         expected = Series(
             range(periods),
-            index=pd.date_range("2016-1-1 02:00:00", periods=periods, freq="H"),
+            index=date_range("2016-1-1 02:00:00", periods=periods, freq="H"),
         )
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/series/methods/test_sort_index.py b/pandas/tests/series/methods/test_sort_index.py
index d70abe2311acd..4df6f52e0fff4 100644
--- a/pandas/tests/series/methods/test_sort_index.py
+++ b/pandas/tests/series/methods/test_sort_index.py
@@ -203,6 +203,20 @@ def test_sort_index_ascending_list(self):
         expected = ser.iloc[[0, 4, 1, 5, 2, 6, 3, 7]]
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "ascending",
+        [
+            None,
+            (True, None),
+            (False, "True"),
+        ],
+    )
+    def test_sort_index_ascending_bad_value_raises(self, ascending):
+        ser = Series(range(10), index=[0, 3, 2, 1, 4, 5, 7, 6, 8, 9])
+        match = 'For argument "ascending" expected type bool'
+        with pytest.raises(ValueError, match=match):
+            ser.sort_index(ascending=ascending)
+
 
 class TestSeriesSortIndexKey:
     def test_sort_index_multiindex_key(self):
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index c2d0bf5975059..a69a693bb6203 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -69,6 +69,7 @@ class TestSeriesConstructors:
         ],
     )
     def test_empty_constructor(self, constructor, check_index_type):
+        # TODO: share with frame test of the same name
         with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
             expected = Series()
             result = constructor()
@@ -310,6 +311,7 @@ def test_constructor_generator(self):
         exp = Series(range(10))
         tm.assert_series_equal(result, exp)
 
+        # same but with non-default index
         gen = (i for i in range(10))
         result = Series(gen, index=range(10, 20))
         exp.index = range(10, 20)
@@ -323,6 +325,7 @@ def test_constructor_map(self):
         exp = Series(range(10))
         tm.assert_series_equal(result, exp)
 
+        # same but with non-default index
         m = map(lambda x: x, range(10))
         result = Series(m, index=range(10, 20))
         exp.index = range(10, 20)
@@ -386,6 +389,7 @@ def test_constructor_categorical_with_coercion(self):
         str(df.values)
         str(df)
 
+    def test_constructor_categorical_with_coercion2(self):
         # GH8623
         x = DataFrame(
             [[1, "John P. Doe"], [2, "Jane Dove"], [1, "John P. Doe"]],
@@ -689,16 +693,16 @@ def test_constructor_pass_nan_nat(self):
         tm.assert_series_equal(Series([np.nan, np.nan]), exp)
         tm.assert_series_equal(Series(np.array([np.nan, np.nan])), exp)
 
-        exp = Series([pd.NaT, pd.NaT])
+        exp = Series([NaT, NaT])
         assert exp.dtype == "datetime64[ns]"
-        tm.assert_series_equal(Series([pd.NaT, pd.NaT]), exp)
-        tm.assert_series_equal(Series(np.array([pd.NaT, pd.NaT])), exp)
+        tm.assert_series_equal(Series([NaT, NaT]), exp)
+        tm.assert_series_equal(Series(np.array([NaT, NaT])), exp)
 
-        tm.assert_series_equal(Series([pd.NaT, np.nan]), exp)
-        tm.assert_series_equal(Series(np.array([pd.NaT, np.nan])), exp)
+        tm.assert_series_equal(Series([NaT, np.nan]), exp)
+        tm.assert_series_equal(Series(np.array([NaT, np.nan])), exp)
 
-        tm.assert_series_equal(Series([np.nan, pd.NaT]), exp)
-        tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp)
+        tm.assert_series_equal(Series([np.nan, NaT]), exp)
+        tm.assert_series_equal(Series(np.array([np.nan, NaT])), exp)
 
     def test_constructor_cast(self):
         msg = "could not convert string to float"
@@ -747,6 +751,7 @@ def test_constructor_datelike_coercion(self):
         assert s.iloc[1] == "NOV"
         assert s.dtype == object
 
+    def test_constructor_datelike_coercion2(self):
         # the dtype was being reset on the slicing and re-inferred to datetime
         # even thought the blocks are mixed
         belly = "216 3T19".split()
@@ -760,6 +765,14 @@ def test_constructor_datelike_coercion(self):
         result = df.loc["216"]
         assert result.dtype == object
 
+    def test_constructor_mixed_int_and_timestamp(self, frame_or_series):
+        # specifically Timestamp with nanos, not datetimes
+        objs = [Timestamp(9), 10, NaT.value]
+        result = frame_or_series(objs, dtype="M8[ns]")
+
+        expected = frame_or_series([Timestamp(9), Timestamp(10), NaT])
+        tm.assert_equal(result, expected)
+
     def test_constructor_datetimes_with_nulls(self):
         # gh-15869
         for arr in [
@@ -790,6 +803,7 @@ def test_constructor_dtype_datetime64(self):
         assert isna(s[1])
         assert s.dtype == "M8[ns]"
 
+    def test_constructor_dtype_datetime64_10(self):
         # GH3416
         dates = [
             np.datetime64(datetime(2013, 1, 1)),
@@ -816,7 +830,7 @@ def test_constructor_dtype_datetime64(self):
         tm.assert_series_equal(result, expected)
 
         expected = Series(
-            [pd.NaT, datetime(2013, 1, 2), datetime(2013, 1, 3)], dtype="datetime64[ns]"
+            [NaT, datetime(2013, 1, 2), datetime(2013, 1, 3)], dtype="datetime64[ns]"
         )
         result = Series([np.nan] + dates[1:], dtype="datetime64[ns]")
         tm.assert_series_equal(result, expected)
@@ -842,6 +856,7 @@ def test_constructor_dtype_datetime64(self):
             expected = Series(dts.astype(np.int64))
         tm.assert_series_equal(result, expected)
 
+    def test_constructor_dtype_datetime64_9(self):
         # invalid dates can be help as object
         result = Series([datetime(2, 1, 1)])
         assert result[0] == datetime(2, 1, 1, 0, 0)
@@ -849,11 +864,13 @@ def test_constructor_dtype_datetime64(self):
         result = Series([datetime(3000, 1, 1)])
         assert result[0] == datetime(3000, 1, 1, 0, 0)
 
+    def test_constructor_dtype_datetime64_8(self):
         # don't mix types
         result = Series([Timestamp("20130101"), 1], index=["a", "b"])
         assert result["a"] == Timestamp("20130101")
         assert result["b"] == 1
 
+    def test_constructor_dtype_datetime64_7(self):
         # GH6529
         # coerce datetime64 non-ns properly
         dates = date_range("01-Jan-2015", "01-Dec-2015", freq="M")
@@ -879,16 +896,18 @@ def test_constructor_dtype_datetime64(self):
         tm.assert_numpy_array_equal(series1.values, dates2)
         assert series1.dtype == object
 
+    def test_constructor_dtype_datetime64_6(self):
         # these will correctly infer a datetime
-        s = Series([None, pd.NaT, "2013-08-05 15:30:00.000001"])
+        s = Series([None, NaT, "2013-08-05 15:30:00.000001"])
         assert s.dtype == "datetime64[ns]"
-        s = Series([np.nan, pd.NaT, "2013-08-05 15:30:00.000001"])
+        s = Series([np.nan, NaT, "2013-08-05 15:30:00.000001"])
         assert s.dtype == "datetime64[ns]"
-        s = Series([pd.NaT, None, "2013-08-05 15:30:00.000001"])
+        s = Series([NaT, None, "2013-08-05 15:30:00.000001"])
         assert s.dtype == "datetime64[ns]"
-        s = Series([pd.NaT, np.nan, "2013-08-05 15:30:00.000001"])
+        s = Series([NaT, np.nan, "2013-08-05 15:30:00.000001"])
         assert s.dtype == "datetime64[ns]"
 
+    def test_constructor_dtype_datetime64_5(self):
         # tz-aware (UTC and other tz's)
         # GH 8411
         dr = date_range("20130101", periods=3)
@@ -898,18 +917,21 @@ def test_constructor_dtype_datetime64(self):
         dr = date_range("20130101", periods=3, tz="US/Eastern")
         assert str(Series(dr).iloc[0].tz) == "US/Eastern"
 
+    def test_constructor_dtype_datetime64_4(self):
         # non-convertible
-        s = Series([1479596223000, -1479590, pd.NaT])
+        s = Series([1479596223000, -1479590, NaT])
         assert s.dtype == "object"
-        assert s[2] is pd.NaT
+        assert s[2] is NaT
         assert "NaT" in str(s)
 
+    def test_constructor_dtype_datetime64_3(self):
         # if we passed a NaT it remains
-        s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), pd.NaT])
+        s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), NaT])
         assert s.dtype == "object"
-        assert s[2] is pd.NaT
+        assert s[2] is NaT
         assert "NaT" in str(s)
 
+    def test_constructor_dtype_datetime64_2(self):
         # if we passed a nan it remains
         s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan])
         assert s.dtype == "object"
@@ -933,7 +955,7 @@ def test_constructor_with_datetime_tz(self):
         assert isinstance(result, np.ndarray)
         assert result.dtype == "datetime64[ns]"
 
-        exp = pd.DatetimeIndex(result)
+        exp = DatetimeIndex(result)
         exp = exp.tz_localize("UTC").tz_convert(tz=s.dt.tz)
         tm.assert_index_equal(dr, exp)
 
@@ -969,9 +991,10 @@ def test_constructor_with_datetime_tz(self):
         t = Series(date_range("20130101", periods=1000, tz="US/Eastern"))
         assert "datetime64[ns, US/Eastern]" in str(t)
 
-        result = pd.DatetimeIndex(s, freq="infer")
+        result = DatetimeIndex(s, freq="infer")
         tm.assert_index_equal(result, dr)
 
+    def test_constructor_with_datetime_tz4(self):
         # inference
         s = Series(
             [
@@ -982,6 +1005,7 @@ def test_constructor_with_datetime_tz(self):
         assert s.dtype == "datetime64[ns, US/Pacific]"
         assert lib.infer_dtype(s, skipna=True) == "datetime64"
 
+    def test_constructor_with_datetime_tz3(self):
         s = Series(
             [
                 Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"),
@@ -991,9 +1015,10 @@ def test_constructor_with_datetime_tz(self):
         assert s.dtype == "object"
         assert lib.infer_dtype(s, skipna=True) == "datetime"
 
+    def test_constructor_with_datetime_tz2(self):
         # with all NaT
-        s = Series(pd.NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]")
-        expected = Series(pd.DatetimeIndex(["NaT", "NaT"], tz="US/Eastern"))
+        s = Series(NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]")
+        expected = Series(DatetimeIndex(["NaT", "NaT"], tz="US/Eastern"))
         tm.assert_series_equal(s, expected)
 
     @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
@@ -1010,7 +1035,7 @@ def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit):
 
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.parametrize("arg", ["2013-01-01 00:00:00", pd.NaT, np.nan, None])
+    @pytest.mark.parametrize("arg", ["2013-01-01 00:00:00", NaT, np.nan, None])
     def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg):
         # GH 17415: With naive string
         result = Series([arg], dtype="datetime64[ns, CET]")
@@ -1223,14 +1248,6 @@ def test_constructor_dict_of_tuples(self):
         expected = Series([3, 6], index=MultiIndex.from_tuples([(1, 2), (None, 5)]))
         tm.assert_series_equal(result, expected)
 
-    def test_constructor_set(self):
-        values = {1, 2, 3, 4, 5}
-        with pytest.raises(TypeError, match="'set' type is unordered"):
-            Series(values)
-        values = frozenset(values)
-        with pytest.raises(TypeError, match="'frozenset' type is unordered"):
-            Series(values)
-
     # https://github.com/pandas-dev/pandas/issues/22698
     @pytest.mark.filterwarnings("ignore:elementwise comparison:FutureWarning")
     def test_fromDict(self):
@@ -1294,7 +1311,7 @@ def test_constructor_dtype_timedelta64(self):
         td = Series([timedelta(days=1), np.nan], dtype="m8[ns]")
         assert td.dtype == "timedelta64[ns]"
 
-        td = Series([np.timedelta64(300000000), pd.NaT], dtype="m8[ns]")
+        td = Series([np.timedelta64(300000000), NaT], dtype="m8[ns]")
         assert td.dtype == "timedelta64[ns]"
 
         # improved inference
@@ -1309,7 +1326,7 @@ def test_constructor_dtype_timedelta64(self):
         td = Series([np.timedelta64(300000000), np.nan])
         assert td.dtype == "timedelta64[ns]"
 
-        td = Series([pd.NaT, np.timedelta64(300000000)])
+        td = Series([NaT, np.timedelta64(300000000)])
         assert td.dtype == "timedelta64[ns]"
 
         td = Series([np.timedelta64(1, "s")])
@@ -1341,13 +1358,13 @@ def test_constructor_dtype_timedelta64(self):
         assert td.dtype == "object"
 
         # these will correctly infer a timedelta
-        s = Series([None, pd.NaT, "1 Day"])
+        s = Series([None, NaT, "1 Day"])
         assert s.dtype == "timedelta64[ns]"
-        s = Series([np.nan, pd.NaT, "1 Day"])
+        s = Series([np.nan, NaT, "1 Day"])
         assert s.dtype == "timedelta64[ns]"
-        s = Series([pd.NaT, None, "1 Day"])
+        s = Series([NaT, None, "1 Day"])
         assert s.dtype == "timedelta64[ns]"
-        s = Series([pd.NaT, np.nan, "1 Day"])
+        s = Series([NaT, np.nan, "1 Day"])
         assert s.dtype == "timedelta64[ns]"
 
     # GH 16406
@@ -1598,7 +1615,7 @@ def test_constructor_dict_multiindex(self):
         _d = sorted(d.items())
         result = Series(d)
         expected = Series(
-            [x[1] for x in _d], index=pd.MultiIndex.from_tuples([x[0] for x in _d])
+            [x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d])
         )
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py
index a91908f7fba52..96a69476ccbef 100644
--- a/pandas/tests/series/test_repr.py
+++ b/pandas/tests/series/test_repr.py
@@ -169,7 +169,7 @@ def test_repr_should_return_str(self):
 
     def test_repr_max_rows(self):
         # GH 6863
-        with pd.option_context("max_rows", None):
+        with option_context("max_rows", None):
             str(Series(range(1001)))  # should not raise exception
 
     def test_unicode_string_with_unicode(self):
diff --git a/pandas/tests/series/test_unary.py b/pandas/tests/series/test_unary.py
index 40d5e56203c6c..67bb89b42a56d 100644
--- a/pandas/tests/series/test_unary.py
+++ b/pandas/tests/series/test_unary.py
@@ -18,40 +18,35 @@ def test_invert(self):
         tm.assert_series_equal(-(ser < 0), ~(ser < 0))
 
     @pytest.mark.parametrize(
-        "source, target",
+        "source, neg_target, abs_target",
         [
-            ([1, 2, 3], [-1, -2, -3]),
-            ([1, 2, None], [-1, -2, None]),
-            ([-1, 0, 1], [1, 0, -1]),
+            ([1, 2, 3], [-1, -2, -3], [1, 2, 3]),
+            ([1, 2, None], [-1, -2, None], [1, 2, None]),
         ],
     )
-    def test_unary_minus_nullable_int(
-        self, any_signed_nullable_int_dtype, source, target
+    def test_all_numeric_unary_operators(
+        self, any_nullable_numeric_dtype, source, neg_target, abs_target
     ):
-        dtype = any_signed_nullable_int_dtype
+        # GH38794
+        dtype = any_nullable_numeric_dtype
         ser = Series(source, dtype=dtype)
-        result = -ser
-        expected = Series(target, dtype=dtype)
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize("source", [[1, 2, 3], [1, 2, None], [-1, 0, 1]])
-    def test_unary_plus_nullable_int(self, any_signed_nullable_int_dtype, source):
-        dtype = any_signed_nullable_int_dtype
-        expected = Series(source, dtype=dtype)
-        result = +expected
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize(
-        "source, target",
-        [
-            ([1, 2, 3], [1, 2, 3]),
-            ([1, -2, None], [1, 2, None]),
-            ([-1, 0, 1], [1, 0, 1]),
-        ],
-    )
-    def test_abs_nullable_int(self, any_signed_nullable_int_dtype, source, target):
-        dtype = any_signed_nullable_int_dtype
-        ser = Series(source, dtype=dtype)
-        result = abs(ser)
-        expected = Series(target, dtype=dtype)
-        tm.assert_series_equal(result, expected)
+        neg_result, pos_result, abs_result = -ser, +ser, abs(ser)
+        if dtype.startswith("U"):
+            neg_target = -Series(source, dtype=dtype)
+        else:
+            neg_target = Series(neg_target, dtype=dtype)
+
+        abs_target = Series(abs_target, dtype=dtype)
+
+        tm.assert_series_equal(neg_result, neg_target)
+        tm.assert_series_equal(pos_result, ser)
+        tm.assert_series_equal(abs_result, abs_target)
+
+    @pytest.mark.parametrize("op", ["__neg__", "__abs__"])
+    def test_unary_float_op_mask(self, float_ea_dtype, op):
+        dtype = float_ea_dtype
+        ser = Series([1.1, 2.2, 3.3], dtype=dtype)
+        result = getattr(ser, op)()
+        target = result.copy(deep=True)
+        ser[0] = None
+        tm.assert_series_equal(result, target)
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index 2b65655e7bdad..1a47b5b37e3d2 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -20,7 +20,10 @@
     iNaT,
     parsing,
 )
-from pandas.errors import OutOfBoundsDatetime
+from pandas.errors import (
+    OutOfBoundsDatetime,
+    OutOfBoundsTimedelta,
+)
 import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.common import is_datetime64_ns_dtype
@@ -1675,12 +1678,14 @@ def test_to_datetime_overflow(self):
         # gh-17637
         # we are overflowing Timedelta range here
 
-        msg = (
-            "(Python int too large to convert to C long)|"
-            "(long too big to convert)|"
-            "(int too big to convert)"
+        msg = "|".join(
+            [
+                "Python int too large to convert to C long",
+                "long too big to convert",
+                "int too big to convert",
+            ]
         )
-        with pytest.raises(OverflowError, match=msg):
+        with pytest.raises(OutOfBoundsTimedelta, match=msg):
             date_range(start="1/1/1700", freq="B", periods=100000)
 
     @pytest.mark.parametrize("cache", [True, False])
diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py
index fbd7a36a75bf0..3e823844c7f56 100644
--- a/pandas/tests/window/test_ewm.py
+++ b/pandas/tests/window/test_ewm.py
@@ -142,6 +142,29 @@ def test_ewm_with_nat_raises(halflife_with_times):
         ser.ewm(com=0.1, halflife=halflife_with_times, times=times)
 
 
+def test_ewm_with_times_getitem(halflife_with_times):
+    # GH 40164
+    halflife = halflife_with_times
+    data = np.arange(10.0)
+    data[::2] = np.nan
+    times = date_range("2000", freq="D", periods=10)
+    df = DataFrame({"A": data, "B": data})
+    result = df.ewm(halflife=halflife, times=times)["A"].mean()
+    expected = df.ewm(halflife=1.0)["A"].mean()
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("arg", ["com", "halflife", "span", "alpha"])
+def test_ewm_getitem_attributes_retained(arg, adjust, ignore_na):
+    # GH 40164
+    kwargs = {arg: 1, "adjust": adjust, "ignore_na": ignore_na}
+    ewm = DataFrame({"A": range(1), "B": range(1)}).ewm(**kwargs)
+    expected = {attr: getattr(ewm, attr) for attr in ewm._attributes}
+    ewm_slice = ewm["A"]
+    result = {attr: getattr(ewm, attr) for attr in ewm_slice._attributes}
+    assert result == expected
+
+
 def test_ewm_vol_deprecated():
     ser = Series(range(1))
     with tm.assert_produces_warning(FutureWarning):
diff --git a/pandas/util/_exceptions.py b/pandas/util/_exceptions.py
index 5ca96a1f9989f..c31c421ee1445 100644
--- a/pandas/util/_exceptions.py
+++ b/pandas/util/_exceptions.py
@@ -31,7 +31,7 @@ def find_stack_level() -> int:
         if stack[n].function == "astype":
             break
 
-    while stack[n].function in ["astype", "apply", "_astype"]:
+    while stack[n].function in ["astype", "apply", "astype_array_safe", "astype_array"]:
         # e.g.
         #  bump up Block.astype -> BlockManager.astype -> NDFrame.astype
         #  bump up Datetime.Array.astype -> DatetimeIndex.astype
diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py
index 60a81ed63b005..087dccfadcce1 100644
--- a/pandas/util/_validators.py
+++ b/pandas/util/_validators.py
@@ -4,6 +4,7 @@
 """
 from typing import (
     Iterable,
+    Sequence,
     Union,
 )
 import warnings
@@ -208,9 +209,39 @@ def validate_args_and_kwargs(fname, args, kwargs, max_fname_arg_count, compat_ar
     validate_kwargs(fname, kwargs, compat_args)
 
 
-def validate_bool_kwarg(value, arg_name):
-    """ Ensures that argument passed in arg_name is of type bool. """
-    if not (is_bool(value) or value is None):
+def validate_bool_kwarg(value, arg_name, none_allowed=True, int_allowed=False):
+    """
+    Ensure that argument passed in arg_name can be interpreted as boolean.
+
+    Parameters
+    ----------
+    value : bool
+        Value to be validated.
+    arg_name : str
+        Name of the argument. To be reflected in the error message.
+    none_allowed : bool, default True
+        Whether to consider None to be a valid boolean.
+    int_allowed : bool, default False
+        Whether to consider integer value to be a valid boolean.
+
+    Returns
+    -------
+    value
+        The same value as input.
+
+    Raises
+    ------
+    ValueError
+        If the value is not a valid boolean.
+    """
+    good_value = is_bool(value)
+    if none_allowed:
+        good_value = good_value or value is None
+
+    if int_allowed:
+        good_value = good_value or isinstance(value, int)
+
+    if not good_value:
         raise ValueError(
             f'For argument "{arg_name}" expected type bool, received '
             f"type {type(value).__name__}."
@@ -384,3 +415,14 @@ def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray:
         if not all(0 <= qs <= 1 for qs in q_arr):
             raise ValueError(msg.format(q_arr / 100.0))
     return q_arr
+
+
+def validate_ascending(
+    ascending: Union[Union[bool, int], Sequence[Union[bool, int]]] = True,
+):
+    """Validate ``ascending`` kwargs for ``sort_index`` method."""
+    kwargs = {"none_allowed": False, "int_allowed": True}
+    if not isinstance(ascending, (list, tuple)):
+        return validate_bool_kwarg(ascending, "ascending", **kwargs)
+
+    return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index be60c90aef8aa..37adbbb8e671f 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -11,7 +11,7 @@ cpplint
 flake8
 flake8-comprehensions>=3.1.0
 isort>=5.2.1
-mypy==0.800
+mypy==0.812
 pre-commit>=2.9.2
 pycodestyle
 pyupgrade
diff --git a/setup.cfg b/setup.cfg
index ce055f550a868..ca0673bd5fc34 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -125,7 +125,7 @@ ignore-words-list = ba,blocs,coo,hist,nd,ser
 ignore-regex = https://(\w+\.)+
 
 [coverage:run]
-branch = False
+branch = True
 omit =
     */tests/*
     pandas/_typing.py

From 6bf4a8fa93eb48b8248a426c20eed35a2caa8a16 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 7 Mar 2021 18:12:20 -0800
Subject: [PATCH 04/10] restore RangeIndex.array

---
 pandas/core/construction.py                    | 14 +++++++++-----
 pandas/core/indexes/range.py                   |  9 +--------
 pandas/core/reshape/merge.py                   |  4 ++--
 pandas/core/series.py                          |  6 +++---
 pandas/tests/construction/__init__.py          |  0
 .../tests/construction/test_extract_array.py   | 18 ++++++++++++++++++
 6 files changed, 33 insertions(+), 18 deletions(-)
 create mode 100644 pandas/tests/construction/__init__.py
 create mode 100644 pandas/tests/construction/test_extract_array.py

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index bcfa238f6d0dd..7573b520b7141 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -371,7 +371,7 @@ def array(
 
 
 def extract_array(
-    obj: object, extract_numpy: bool = False, range_compat: bool = False
+    obj: object, extract_numpy: bool = False, extract_range: bool = False
 ) -> Union[Any, ArrayLike]:
     """
     Extract the ndarray or ExtensionArray from a Series or Index.
@@ -387,8 +387,9 @@ def extract_array(
     extract_numpy : bool, default False
         Whether to extract the ndarray from a PandasArray
 
-    range_compat : bool, default False
-        If we have a RangeIndex, return range._values if True, otherwise raise.
+    extract_range : bool, default False
+        If we have a RangeIndex, return range._values if True, otherwise
+        return unchanged.
 
     Returns
     -------
@@ -418,8 +419,11 @@ def extract_array(
     array([1, 2, 3])
     """
     if isinstance(obj, (ABCIndex, ABCSeries)):
-        if range_compat and isinstance(obj, ABCRangeIndex):
-            return obj._values
+        if isinstance(obj, ABCRangeIndex):
+            if extract_range:
+                return obj._values
+            return obj
+
         obj = obj.array
 
     if extract_numpy and isinstance(obj, ABCPandasArray):
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index b0697eee9e1a0..e501c4cb5348e 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -185,13 +185,6 @@ def _data(self):
         """
         return np.arange(self.start, self.stop, self.step, dtype=np.int64)
 
-    @property
-    def array(self):
-        raise ValueError(
-            f"{type(self).__name__} has no single backing array. Use "
-            f"'{type(self).__name__}.to_numpy()' to get a NumPy array."
-        )
-
     @cache_readonly
     def _cached_int64index(self) -> Int64Index:
         return Int64Index._simple_new(self._data, name=self.name)
@@ -897,7 +890,7 @@ def _arith_method(self, other, op):
             step = op
 
         # TODO: if other is a RangeIndex we may have more efficient options
-        other = extract_array(other, extract_numpy=True, range_compat=True)
+        other = extract_array(other, extract_numpy=True, extract_range=True)
         attrs = self._get_attributes_dict()
 
         left, right = self, other
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index ce06fc55ee8e6..8ce9195e70080 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -2058,8 +2058,8 @@ def _factorize_keys(
     (array([0, 1, 2]), array([0, 1]), 3)
     """
     # Some pre-processing for non-ndarray lk / rk
-    lk = extract_array(lk, extract_numpy=True, range_compat=True)
-    rk = extract_array(rk, extract_numpy=True, range_compat=True)
+    lk = extract_array(lk, extract_numpy=True, extract_range=True)
+    rk = extract_array(rk, extract_numpy=True, extract_range=True)
     # TODO: if either is a RangeIndex, we can likely factorize more efficiently?
 
     if is_datetime64tz_dtype(lk.dtype) and is_datetime64tz_dtype(rk.dtype):
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 8ed94edd395ca..e56773f239901 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -5027,7 +5027,7 @@ def _cmp_method(self, other, op):
             raise ValueError("Can only compare identically-labeled Series objects")
 
         lvalues = self._values
-        rvalues = extract_array(other, extract_numpy=True, range_compat=True)
+        rvalues = extract_array(other, extract_numpy=True, extract_range=True)
 
         res_values = ops.comparison_op(lvalues, rvalues, op)
 
@@ -5038,7 +5038,7 @@ def _logical_method(self, other, op):
         self, other = ops.align_method_SERIES(self, other, align_asobject=True)
 
         lvalues = self._values
-        rvalues = extract_array(other, extract_numpy=True, range_compat=True)
+        rvalues = extract_array(other, extract_numpy=True, extract_range=True)
 
         res_values = ops.logical_op(lvalues, rvalues, op)
         return self._construct_result(res_values, name=res_name)
@@ -5048,7 +5048,7 @@ def _arith_method(self, other, op):
         self, other = ops.align_method_SERIES(self, other)
 
         lvalues = self._values
-        rvalues = extract_array(other, extract_numpy=True, range_compat=True)
+        rvalues = extract_array(other, extract_numpy=True, extract_range=True)
 
         result = ops.arithmetic_op(lvalues, rvalues, op)
 
diff --git a/pandas/tests/construction/__init__.py b/pandas/tests/construction/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/tests/construction/test_extract_array.py b/pandas/tests/construction/test_extract_array.py
new file mode 100644
index 0000000000000..4dd3eda8c995c
--- /dev/null
+++ b/pandas/tests/construction/test_extract_array.py
@@ -0,0 +1,18 @@
+from pandas import Index
+import pandas._testing as tm
+from pandas.core.construction import extract_array
+
+
+def test_extract_array_rangeindex():
+    ri = Index(range(5))
+
+    expected = ri._values
+    res = extract_array(ri, extract_numpy=True, extract_range=True)
+    tm.assert_numpy_array_equal(res, expected)
+    res = extract_array(ri, extract_numpy=False, extract_range=True)
+    tm.assert_numpy_array_equal(res, expected)
+
+    res = extract_array(ri, extract_numpy=True, extract_range=False)
+    tm.assert_index_equal(res, ri)
+    res = extract_array(ri, extract_numpy=False, extract_range=False)
+    tm.assert_index_equal(res, ri)

From bc7c6281ef8e52d22304a03c9438be175348a9e2 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 8 Mar 2021 11:01:06 -0800
Subject: [PATCH 05/10] troubleshoot ci

---
 pandas/tests/extension/test_boolean.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
index 89991a459795e..3ef3beaa9c1b1 100644
--- a/pandas/tests/extension/test_boolean.py
+++ b/pandas/tests/extension/test_boolean.py
@@ -16,8 +16,6 @@
 import numpy as np
 import pytest
 
-from pandas.compat.numpy import is_numpy_dev
-
 import pandas as pd
 import pandas._testing as tm
 from pandas.core.arrays.boolean import BooleanDtype
@@ -322,7 +320,6 @@ def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         df.groupby("A").apply(groupby_apply_op)
         df.groupby("A").B.apply(groupby_apply_op)
 
-    @pytest.mark.xfail(is_numpy_dev, reason="2021-03-02 #40144 expecting fix in numpy")
     def test_groupby_apply_identity(self, data_for_grouping):
         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping})
         result = df.groupby("A").B.apply(lambda x: x.array)

From c5977d65748c3a7f6d0c26a10840e2bfb36043bf Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 8 Mar 2021 12:25:44 -0800
Subject: [PATCH 06/10] Update pandas/core/construction.py

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 pandas/core/construction.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index 7573b520b7141..c96b139b70fd8 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -388,7 +388,7 @@ def extract_array(
         Whether to extract the ndarray from a PandasArray
 
     extract_range : bool, default False
-        If we have a RangeIndex, return range._values if True, otherwise
+        If we have a RangeIndex, return range._values if True (which is a materialized integer ndarray), otherwise
         return unchanged.
 
     Returns

From 217dd68edf8842e6e00ace9febe36ae38f9f7e4e Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 8 Mar 2021 12:30:41 -0800
Subject: [PATCH 07/10] no longer need to skip array manager test

---
 pandas/tests/groupby/transform/test_transform.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index c4621d5fc0f8c..9350a3fcd3036 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -4,8 +4,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 from pandas.core.dtypes.common import (
     ensure_platform_int,
     is_timedelta64_dtype,
@@ -190,8 +188,6 @@ def test_transform_axis_1(request, transformation_func, using_array_manager):
     tm.assert_equal(result, expected)
 
 
-# TODO(ArrayManager) groupby().transform returns DataFrame backed by BlockManager
-@td.skip_array_manager_not_yet_implemented
 def test_transform_axis_ts(tsframe):
 
     # make sure that we are setting the axes

From 5c24b7534d808c0b66196cef0e6539553305bcb3 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 8 Mar 2021 12:53:40 -0800
Subject: [PATCH 08/10] flake8 fixup

---
 pandas/core/construction.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
index c96b139b70fd8..18bd8ac97d756 100644
--- a/pandas/core/construction.py
+++ b/pandas/core/construction.py
@@ -388,8 +388,8 @@ def extract_array(
         Whether to extract the ndarray from a PandasArray
 
     extract_range : bool, default False
-        If we have a RangeIndex, return range._values if True (which is a materialized integer ndarray), otherwise
-        return unchanged.
+        If we have a RangeIndex, return range._values if True
+        (which is a materialized integer ndarray), otherwise return unchanged.
 
     Returns
     -------

From 20bb6fc01b53dd6277e80bdc96ee144831cde15e Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 17 Mar 2021 19:01:45 -0700
Subject: [PATCH 09/10] mypy fixup

---
 pandas/_testing/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 679c9a2b44b53..fdcefc622f373 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -209,7 +209,7 @@ def box_expected(expected, box_cls, transpose=True):
     if box_cls is pd.array:
         if isinstance(expected, pd.RangeIndex):
             # pd.array would return an IntegerArray
-            expected = PandasArray(expected._values)
+            expected = PandasArray(np.asarray(expected._values))
         else:
             expected = pd.array(expected)
     elif box_cls is pd.Index:

From 9dcf5f483cc199395c9cbfdf39576e380f7bf00a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 18 Mar 2021 11:31:46 -0700
Subject: [PATCH 10/10] revert xfail

---
 pandas/tests/extension/base/groupby.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
index d93afef60561a..30b115b9dba6f 100644
--- a/pandas/tests/extension/base/groupby.py
+++ b/pandas/tests/extension/base/groupby.py
@@ -1,7 +1,5 @@
 import pytest
 
-from pandas.compat.numpy import is_numpy_dev
-
 import pandas as pd
 import pandas._testing as tm
 from pandas.tests.extension.base.base import BaseExtensionTests
@@ -75,10 +73,6 @@ def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         df.groupby("A").apply(groupby_apply_op)
         df.groupby("A").B.apply(groupby_apply_op)
 
-    # Non-strict bc these xpass on dt64tz, Period, Interval, JSON, PandasArray
-    @pytest.mark.xfail(
-        is_numpy_dev, reason="2021-03-02 #40144 expecting fix in numpy", strict=False
-    )
     def test_groupby_apply_identity(self, data_for_grouping):
         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
         result = df.groupby("A").B.apply(lambda x: x.array)