Andy-Grigg
diff --git a/‎.github/workflows/posix.yml
+1-1 b/‎.github/workflows/posix.yml
+1-1
diff --git a/‎.pre-commit-config.yaml
+1-1 b/‎.pre-commit-config.yaml
+1-1
diff --git a/‎Dockerfile
+6-1 b/‎Dockerfile
+6-1
diff --git a/‎asv_bench/benchmarks/groupby.py
+4-2 b/‎asv_bench/benchmarks/groupby.py
+4-2
diff --git a/‎asv_bench/benchmarks/indexing.py
+39-9 b/‎asv_bench/benchmarks/indexing.py
+39-9
diff --git a/‎asv_bench/benchmarks/reindex.py
+7-1 b/‎asv_bench/benchmarks/reindex.py
+7-1
diff --git a/‎doc/source/_static/style/footer_extended.png
12 KB b/‎doc/source/_static/style/footer_extended.png
12 KB
diff --git a/‎doc/source/_static/style/footer_simple.png
8.51 KB b/‎doc/source/_static/style/footer_simple.png
8.51 KB
diff --git a/‎doc/source/reference/arrays.rst
+89-16 b/‎doc/source/reference/arrays.rst
+89-16
diff --git a/‎doc/source/reference/general_functions.rst
-7 b/‎doc/source/reference/general_functions.rst
-7
@@ -28,7 +28,7 @@ jobs:
         pattern: ["not single_cpu", "single_cpu"]
         # Don't test pyarrow v2/3: Causes timeouts in read_csv engine
         # even if tests are skipped/xfailed
-        pyarrow_version: ["5", "6", "7"]
+        pyarrow_version: ["5", "7"]
         include:
           - env_file: actions-38-downstream_compat.yaml
             pattern: "not slow and not network and not single_cpu"
 
@@ -178,7 +178,7 @@ repos:
         language: python
         files: ^pandas/core/generic\.py$
     -   id: pandas-errors-documented
-        name: Ensure pandas errors are documented in doc/source/reference/general_utility_functions.rst
+        name: Ensure pandas errors are documented in doc/source/reference/testing.rst
         entry: python scripts/pandas_errors_documented.py
         language: python
         files: ^pandas/errors/__init__.py$
 
@@ -1,4 +1,4 @@
-FROM quay.io/condaforge/miniforge3:4.11.0-0
+FROM quay.io/condaforge/miniforge3
 
 # if you forked pandas, you can pass in your own GitHub username to use your fork
 # i.e. gh_username=myname
@@ -12,6 +12,11 @@ ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update \
     && apt-get -y install --no-install-recommends apt-utils dialog 2>&1 \
     #
+    # Install tzdata and configure timezone (fix for tests which try to read from "/etc/localtime")
+    && apt-get -y install tzdata \
+    && ln -fs /usr/share/zoneinfo/Etc/UTC /etc/localtime \
+    && dpkg-reconfigure -f noninteractive tzdata \
+    #
     # Verify git, process tools, lsb-release (common in install instructions for CLIs) installed
     && apt-get -y install git iproute2 procps iproute2 lsb-release \
     #
 
@@ -18,6 +18,7 @@
 
 method_blocklist = {
     "object": {
+        "diff",
         "median",
         "prod",
         "sem",
@@ -405,7 +406,7 @@ class GroupByMethods:
 
     param_names = ["dtype", "method", "application", "ncols"]
     params = [
-        ["int", "float", "object", "datetime", "uint"],
+        ["int", "int16", "float", "object", "datetime", "uint"],
         [
             "all",
             "any",
@@ -417,6 +418,7 @@ class GroupByMethods:
             "cumprod",
             "cumsum",
             "describe",
+            "diff",
             "ffill",
             "first",
             "head",
@@ -478,7 +480,7 @@ def setup(self, dtype, method, application, ncols):
         values = rng.take(taker, axis=0)
         if dtype == "int":
             key = np.random.randint(0, size, size=size)
-        elif dtype == "uint":
+        elif dtype in ("int16", "uint"):
             key = np.random.randint(0, size, size=size, dtype=dtype)
         elif dtype == "float":
             key = np.concatenate(
 
@@ -204,11 +204,11 @@ class MultiIndexing:
     param_names = ["unique_levels"]
 
     def setup(self, unique_levels):
-        self.ndim = 2
+        self.nlevels = 2
         if unique_levels:
-            mi = MultiIndex.from_arrays([range(1000000)] * self.ndim)
+            mi = MultiIndex.from_arrays([range(1000000)] * self.nlevels)
         else:
-            mi = MultiIndex.from_product([range(1000)] * self.ndim)
+            mi = MultiIndex.from_product([range(1000)] * self.nlevels)
         self.df = DataFrame(np.random.randn(len(mi)), index=mi)
 
         self.tgt_slice = slice(200, 800)
@@ -232,27 +232,27 @@ def time_loc_partial_key_list(self, unique_levels):
     def time_loc_partial_key_scalar(self, unique_levels):
         self.df.loc[self.tgt_scalar, :]
 
-    def time_loc_partial_bool_indexer(self, unique_levels):
+    def time_loc_partial_key_bool_indexer(self, unique_levels):
         self.df.loc[self.tgt_bool_indexer, :]
 
     def time_loc_all_slices(self, unique_levels):
-        target = tuple([self.tgt_slice] * self.ndim)
+        target = tuple([self.tgt_slice] * self.nlevels)
         self.df.loc[target, :]
 
     def time_loc_all_null_slices(self, unique_levels):
-        target = tuple([self.tgt_null_slice] * self.ndim)
+        target = tuple([self.tgt_null_slice] * self.nlevels)
         self.df.loc[target, :]
 
     def time_loc_all_lists(self, unique_levels):
-        target = tuple([self.tgt_list] * self.ndim)
+        target = tuple([self.tgt_list] * self.nlevels)
         self.df.loc[target, :]
 
     def time_loc_all_scalars(self, unique_levels):
-        target = tuple([self.tgt_scalar] * self.ndim)
+        target = tuple([self.tgt_scalar] * self.nlevels)
         self.df.loc[target, :]
 
     def time_loc_all_bool_indexers(self, unique_levels):
-        target = tuple([self.tgt_bool_indexer] * self.ndim)
+        target = tuple([self.tgt_bool_indexer] * self.nlevels)
         self.df.loc[target, :]
 
     def time_loc_slice_plus_null_slice(self, unique_levels):
@@ -263,6 +263,18 @@ def time_loc_null_slice_plus_slice(self, unique_levels):
         target = (self.tgt_null_slice, self.tgt_slice)
         self.df.loc[target, :]
 
+    def time_xs_level_0(self, unique_levels):
+        target = self.tgt_scalar
+        self.df.xs(target, level=0)
+
+    def time_xs_level_1(self, unique_levels):
+        target = self.tgt_scalar
+        self.df.xs(target, level=1)
+
+    def time_xs_full_key(self, unique_levels):
+        target = tuple([self.tgt_scalar] * self.nlevels)
+        self.df.xs(target)
+
 
 class IntervalIndexing:
     def setup_cache(self):
@@ -297,6 +309,24 @@ def time_get_indexer_mismatched_tz(self):
         self.dti.get_indexer(self.dti2)
 
 
+class SortedAndUnsortedDatetimeIndexLoc:
+    def setup(self):
+        dti = date_range("2016-01-01", periods=10000, tz="US/Pacific")
+        index = np.array(dti)
+
+        unsorted_index = index.copy()
+        unsorted_index[10] = unsorted_index[20]
+
+        self.df_unsorted = DataFrame(index=unsorted_index, data={"a": 1})
+        self.df_sort = DataFrame(index=index, data={"a": 1})
+
+    def time_loc_unsorted(self):
+        self.df_unsorted.loc["2016-6-11"]
+
+    def time_loc_sorted(self):
+        self.df_sort.loc["2016-6-11"]
+
+
 class CategoricalIndexIndexing:
 
     params = ["monotonic_incr", "monotonic_decr", "non_monotonic"]
 
@@ -28,16 +28,22 @@ def setup(self):
         index = MultiIndex.from_arrays([level1, level2])
         self.s = Series(np.random.randn(N * K), index=index)
         self.s_subset = self.s[::2]
+        self.s_subset_no_cache = self.s[::2].copy()
 
     def time_reindex_dates(self):
         self.df.reindex(self.rng_subset)
 
     def time_reindex_columns(self):
         self.df2.reindex(columns=self.df.columns[1:5])
 
-    def time_reindex_multiindex(self):
+    def time_reindex_multiindex_with_cache(self):
+        # MultiIndex._values gets cached
         self.s.reindex(self.s_subset.index)
 
+    def time_reindex_multiindex_no_cache(self):
+        # Copy to avoid MultiIndex._values getting cached
+        self.s.reindex(self.s_subset_no_cache.index.copy())
+
 
 class ReindexMethod:
 
 
@@ -6,6 +6,10 @@
 pandas arrays, scalars, and data types
 ======================================
 
+*******
+Objects
+*******
+
 .. currentmodule:: pandas
 
 For most data types, pandas uses NumPy arrays as the concrete
@@ -40,8 +44,8 @@ stored in a :class:`Series`, :class:`Index`, or as a column in a :class:`DataFra
 
 .. _api.arrays.datetime:
 
-Datetime data
--------------
+Datetimes
+---------
 
 NumPy cannot natively represent timezone-aware datetimes. pandas supports this
 with the :class:`arrays.DatetimeArray` extension array, which can hold timezone-naive
@@ -161,8 +165,8 @@ If the data are timezone-aware, then every value in the array must have the same
 
 .. _api.arrays.timedelta:
 
-Timedelta data
---------------
+Timedeltas
+----------
 
 NumPy can natively represent timedeltas. pandas provides :class:`Timedelta`
 for symmetry with :class:`Timestamp`.
@@ -216,8 +220,8 @@ A collection of :class:`Timedelta` may be stored in a :class:`TimedeltaArray`.
 
 .. _api.arrays.period:
 
-Timespan data
--------------
+Periods
+-------
 
 pandas represents spans of times as :class:`Period` objects.
 
@@ -284,8 +288,8 @@ Every period in a :class:`arrays.PeriodArray` must have the same ``freq``.
 
 .. _api.arrays.interval:
 
-Interval data
--------------
+Intervals
+---------
 
 Arbitrary intervals can be represented as :class:`Interval` objects.
 
@@ -379,8 +383,8 @@ pandas provides this through :class:`arrays.IntegerArray`.
 
 .. _api.arrays.categorical:
 
-Categorical data
-----------------
+Categoricals
+------------
 
 pandas defines a custom data type for representing data that can take only a
 limited, fixed set of values. The dtype of a :class:`Categorical` can be described by
@@ -444,8 +448,8 @@ data. See :ref:`api.series.cat` for more.
 
 .. _api.arrays.sparse:
 
-Sparse data
------------
+Sparse
+------
 
 Data where a single value is repeated many times (e.g. ``0`` or ``NaN``) may
 be stored efficiently as a :class:`arrays.SparseArray`.
@@ -469,8 +473,8 @@ and methods if the :class:`Series` contains sparse values. See
 
 .. _api.arrays.string:
 
-Text data
----------
+Strings
+-------
 
 When working with text data, where each valid element is a string or missing,
 we recommend using :class:`StringDtype` (with the alias ``"string"``).
@@ -494,8 +498,8 @@ See :ref:`api.series.str` for more.
 
 .. _api.arrays.bool:
 
-Boolean data with missing values
---------------------------------
+Nullable Boolean
+----------------
 
 The boolean dtype (with the alias ``"boolean"``) provides support for storing
 boolean data (``True``, ``False``) with missing values, which is not possible
@@ -525,3 +529,72 @@ with a bool :class:`numpy.ndarray`.
       DatetimeTZDtype.tz
       PeriodDtype.freq
       IntervalDtype.subtype
+
+*********
+Utilities
+*********
+
+Constructors
+------------
+.. autosummary::
+   :toctree: api/
+
+   api.types.union_categoricals
+   api.types.infer_dtype
+   api.types.pandas_dtype
+
+Data type introspection
+~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+    api.types.is_bool_dtype
+    api.types.is_categorical_dtype
+    api.types.is_complex_dtype
+    api.types.is_datetime64_any_dtype
+    api.types.is_datetime64_dtype
+    api.types.is_datetime64_ns_dtype
+    api.types.is_datetime64tz_dtype
+    api.types.is_extension_type
+    api.types.is_extension_array_dtype
+    api.types.is_float_dtype
+    api.types.is_int64_dtype
+    api.types.is_integer_dtype
+    api.types.is_interval_dtype
+    api.types.is_numeric_dtype
+    api.types.is_object_dtype
+    api.types.is_period_dtype
+    api.types.is_signed_integer_dtype
+    api.types.is_string_dtype
+    api.types.is_timedelta64_dtype
+    api.types.is_timedelta64_ns_dtype
+    api.types.is_unsigned_integer_dtype
+    api.types.is_sparse
+
+Iterable introspection
+~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+    api.types.is_dict_like
+    api.types.is_file_like
+    api.types.is_list_like
+    api.types.is_named_tuple
+    api.types.is_iterator
+
+Scalar introspection
+~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+    api.types.is_bool
+    api.types.is_categorical
+    api.types.is_complex
+    api.types.is_float
+    api.types.is_hashable
+    api.types.is_integer
+    api.types.is_interval
+    api.types.is_number
+    api.types.is_re
+    api.types.is_re_compilable
+    api.types.is_scalar
@@ -78,10 +78,3 @@ Hashing
 
    util.hash_array
    util.hash_pandas_object
-
-Testing
-~~~~~~~
-.. autosummary::
-   :toctree: api/
-
-   test