From 2b9ad6f772d33428d0a6e7e5c26ee542e082000d Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Fri, 19 Mar 2021 09:22:48 -0500
Subject: [PATCH 01/14] CI: run database tests only #39550 (#39666)

---
 .github/workflows/database.yml                | 102 ++----------------
 ...-37-locale.yaml => actions-37-db-min.yaml} |   1 +
 ...actions-37-cov.yaml => actions-37-db.yaml} |   0
 3 files changed, 10 insertions(+), 93 deletions(-)
 rename ci/deps/{actions-37-locale.yaml => actions-37-db-min.yaml} (97%)
 rename ci/deps/{actions-37-cov.yaml => actions-37-db.yaml} (100%)

diff --git a/.github/workflows/database.yml b/.github/workflows/database.yml
index a30dbc048c03d..ba5a0a1fd0909 100644
--- a/.github/workflows/database.yml
+++ b/.github/workflows/database.yml
@@ -12,17 +12,19 @@ env:
   PYTEST_WORKERS: "auto"
   PANDAS_CI: 1
   PATTERN: ((not slow and not network and not clipboard) or (single and db))
+  COVERAGE: true
 
 jobs:
-  Linux_py37_locale:
+  Linux_py37_IO:
     runs-on: ubuntu-latest
     defaults:
       run:
         shell: bash -l {0}
 
-    env:
-      ENV_FILE: ci/deps/actions-37-locale.yaml
-      LOCALE_OVERRIDE: zh_CN.UTF-8
+    strategy:
+      matrix:
+        ENV_FILE: [ci/deps/actions-37-db-min.yaml, ci/deps/actions-37-db.yaml]
+      fail-fast: false
 
     services:
       mysql:
@@ -63,106 +65,20 @@ jobs:
       with:
         path: ~/conda_pkgs_dir
         key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
-          hashFiles('${{ env.ENV_FILE }}') }}
+          hashFiles('${{ matrix.ENV_FILE }}') }}
 
     - uses: conda-incubator/setup-miniconda@v2
       with:
         activate-environment: pandas-dev
         channel-priority: strict
-        environment-file: ${{ env.ENV_FILE }}
+        environment-file: ${{ matrix.ENV_FILE }}
         use-only-tar-bz2: true
 
     - name: Build Pandas
       uses: ./.github/actions/build_pandas
 
     - name: Test
-      run: ci/run_tests.sh
-      if: always()
-
-    - name: Build Version
-      run: pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
-
-    - name: Publish test results
-      uses: actions/upload-artifact@master
-      with:
-        name: Test results
-        path: test-data.xml
-      if: failure()
-
-    - name: Print skipped tests
-      run: python ci/print_skipped.py
-
-    - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v1
-      with:
-        flags: unittests
-        name: codecov-pandas
-        fail_ci_if_error: false
-
-  Linux_py37_cov:
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        shell: bash -l {0}
-
-    env:
-      ENV_FILE: ci/deps/actions-37-cov.yaml
-      PANDAS_TESTING_MODE: deprecate
-      COVERAGE: true
-
-    services:
-      mysql:
-        image: mysql
-        env:
-          MYSQL_ALLOW_EMPTY_PASSWORD: yes
-          MYSQL_DATABASE: pandas
-        options: >-
-          --health-cmd "mysqladmin ping"
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
-        ports:
-          - 3306:3306
-
-      postgres:
-        image: postgres
-        env:
-          POSTGRES_USER: postgres
-          POSTGRES_PASSWORD: postgres
-          POSTGRES_DB: pandas
-        options: >-
-          --health-cmd pg_isready
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
-        ports:
-          - 5432:5432
-
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v1
-
-    - name: Cache conda
-      uses: actions/cache@v1
-      env:
-        CACHE_NUMBER: 0
-      with:
-        path: ~/conda_pkgs_dir
-        key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
-          hashFiles('${{ env.ENV_FILE }}') }}
-
-    - uses: conda-incubator/setup-miniconda@v2
-      with:
-        activate-environment: pandas-dev
-        channel-priority: strict
-        environment-file: ${{ env.ENV_FILE }}
-        use-only-tar-bz2: true
-
-    - name: Build Pandas
-      uses: ./.github/actions/build_pandas
-
-    - name: Test
-      run: ci/run_tests.sh
+      run: pytest -m "${{ env.PATTERN }}" -n 2 --dist=loadfile -s --strict-markers --durations=30 --junitxml=test-data.xml -s --cov=pandas --cov-report=xml pandas/tests/io
       if: always()
 
     - name: Build Version
diff --git a/ci/deps/actions-37-locale.yaml b/ci/deps/actions-37-db-min.yaml
similarity index 97%
rename from ci/deps/actions-37-locale.yaml
rename to ci/deps/actions-37-db-min.yaml
index 551308f1d5fac..1d3794576220a 100644
--- a/ci/deps/actions-37-locale.yaml
+++ b/ci/deps/actions-37-db-min.yaml
@@ -7,6 +7,7 @@ dependencies:
   # tools
   - cython>=0.29.21
   - pytest>=5.0.1
+  - pytest-cov
   - pytest-xdist>=1.21
   - hypothesis>=3.58.0
 
diff --git a/ci/deps/actions-37-cov.yaml b/ci/deps/actions-37-db.yaml
similarity index 100%
rename from ci/deps/actions-37-cov.yaml
rename to ci/deps/actions-37-db.yaml

From 69a4d60d20c4c05a4dbe489533b5c19cc95ca914 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.e.gorelli@gmail.com>
Date: Fri, 19 Mar 2021 14:47:39 +0000
Subject: [PATCH 02/14] no no-string-hints (#40516)

---
 .pre-commit-config.yaml | 38 ++++++++++++--------------------------
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index aa8c2b74d7a7e..e3dd6b018b8aa 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -50,10 +50,6 @@ repos:
     rev: 5.7.0
     hooks:
     -   id: isort
--   repo: https://github.com/MarcoGorelli/no-string-hints
-    rev: v0.1.7
-    hooks:
-    -   id: no-string-hints
 -   repo: https://github.com/asottile/pyupgrade
     rev: v2.10.0
     hooks:
@@ -111,12 +107,6 @@ repos:
             pandas/tests/io/excel/test_writers\.py
             |pandas/tests/io/pytables/common\.py
             |pandas/tests/io/pytables/test_store\.py$
-    -   id: no-pandas-api-types
-        name: Check code for instances of pd.api.types
-        entry: (pd|pandas)\.api\.types\.
-        language: pygrep
-        types: [python]
-        files: ^pandas/tests/
     -   id: non-standard-imports
         name: Check for non-standard imports
         language: pygrep
@@ -128,6 +118,11 @@ repos:
 
             # Check for imports from collections.abc instead of `from collections import abc`
             |from\ collections\.abc\ import
+
+            # Numpy
+            |from\ numpy\ import\ random
+            |from\ numpy\.random\ import
+        types: [python]
     -   id: non-standard-imports-in-tests
         name: Check for non-standard imports in test suite
         language: pygrep
@@ -143,26 +138,17 @@ repos:
 
             # Check for use of pandas.testing instead of tm
             |pd\.testing\.
+
+            # Check for pd.api.types instead of from pandas.api.types import ...
+            |(pd|pandas)\.api\.types\.
         types: [python]
         files: ^pandas/tests/
-    -   id: non-standard-numpy-random-related-imports
-        name: Check for non-standard numpy.random-related imports excluding pandas/_testing.py
-        language: pygrep
-        exclude: pandas/_testing.py
+    -   id: np-bool-and-np-object
+        name: Check for use of np.bool/np.object instead of np.bool_/np.object_
         entry: |
             (?x)
-            # Check for imports from np.random.<method> instead of `from numpy import random` or `from numpy.random import <method>`
-            from\ numpy\ import\ random
-            |from\ numpy.random\ import
-        types: [python]
-    -   id: np-bool
-        name: Check for use of np.bool instead of np.bool_
-        entry: np\.bool[^_8]
-        language: pygrep
-        types_or: [python, cython, rst]
-    -   id: np-object
-        name: Check for use of np.object instead of np.object_
-        entry: np\.object[^_8]
+            np\.bool[^_8]
+            |np\.object[^_8]
         language: pygrep
         types_or: [python, cython, rst]
     -   id: pip-to-conda

From bbe34fc21131796918bc12aa9e12d74bc608c888 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 19 Mar 2021 09:46:36 -0700
Subject: [PATCH 03/14] REF: share to_native_types with ArrayManager (#40490)

---
 pandas/core/internals/array_manager.py |   3 +-
 pandas/core/internals/blocks.py        | 158 ++++++++++++-------------
 2 files changed, 81 insertions(+), 80 deletions(-)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index a417cd0e06872..34b3d83c066c2 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -88,6 +88,7 @@
 from pandas.core.internals.blocks import (
     ensure_block_shape,
     new_block,
+    to_native_types,
 )
 
 if TYPE_CHECKING:
@@ -634,7 +635,7 @@ def replace_list(
         )
 
     def to_native_types(self, **kwargs):
-        return self.apply_with_block("to_native_types", **kwargs)
+        return self.apply(to_native_types, **kwargs)
 
     @property
     def is_mixed_type(self) -> bool:
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 3fd1ebaca19f0..99e54bace8915 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -97,6 +97,7 @@
     FloatingArray,
     IntegerArray,
     PandasArray,
+    TimedeltaArray,
 )
 from pandas.core.base import PandasObject
 import pandas.core.common as com
@@ -260,9 +261,11 @@ def get_block_values_for_json(self) -> np.ndarray:
         # TODO(EA2D): reshape will be unnecessary with 2D EAs
         return np.asarray(self.values).reshape(self.shape)
 
+    @final
     @property
     def fill_value(self):
-        return np.nan
+        # Used in reindex_indexer
+        return na_value_for_dtype(self.dtype, compat=False)
 
     @property
     def mgr_locs(self) -> BlockPlacement:
@@ -652,24 +655,11 @@ def should_store(self, value: ArrayLike) -> bool:
         """
         return is_dtype_equal(value.dtype, self.dtype)
 
+    @final
     def to_native_types(self, na_rep="nan", quoting=None, **kwargs):
         """ convert to our native types format """
-        values = self.values
-
-        mask = isna(values)
-        itemsize = writers.word_len(na_rep)
-
-        if not self.is_object and not quoting and itemsize:
-            values = values.astype(str)
-            if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:
-                # enlarge for the na_rep
-                values = values.astype(f"<U{itemsize}")
-        else:
-            values = np.array(values, dtype="object")
-
-        values[mask] = na_rep
-        values = values.astype(object, copy=False)
-        return self.make_block(values)
+        result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs)
+        return self.make_block(result)
 
     # block actions #
     @final
@@ -1498,11 +1488,6 @@ def _holder(self):
         # For extension blocks, the holder is values-dependent.
         return type(self.values)
 
-    @property
-    def fill_value(self):
-        # Used in reindex_indexer
-        return self.values.dtype.na_value
-
     @property
     def _can_hold_na(self):
         # The default ExtensionArray._can_hold_na is True
@@ -1565,15 +1550,6 @@ def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray:
     def array_values(self) -> ExtensionArray:
         return self.values
 
-    def to_native_types(self, na_rep="nan", quoting=None, **kwargs):
-        """override to use ExtensionArray astype for the conversion"""
-        values = self.values
-        mask = isna(values)
-
-        new_values = np.asarray(values.astype(object))
-        new_values[mask] = na_rep
-        return self.make_block(new_values)
-
     def take_nd(
         self,
         indexer,
@@ -1808,41 +1784,6 @@ def is_bool(self):
 class FloatBlock(NumericBlock):
     __slots__ = ()
 
-    def to_native_types(
-        self, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs
-    ):
-        """ convert to our native types format """
-        values = self.values
-
-        # see gh-13418: no special formatting is desired at the
-        # output (important for appropriate 'quoting' behaviour),
-        # so do not pass it through the FloatArrayFormatter
-        if float_format is None and decimal == ".":
-            mask = isna(values)
-
-            if not quoting:
-                values = values.astype(str)
-            else:
-                values = np.array(values, dtype="object")
-
-            values[mask] = na_rep
-            values = values.astype(object, copy=False)
-            return self.make_block(values)
-
-        from pandas.io.formats.format import FloatArrayFormatter
-
-        formatter = FloatArrayFormatter(
-            values,
-            na_rep=na_rep,
-            float_format=float_format,
-            decimal=decimal,
-            quoting=quoting,
-            fixed_width=False,
-        )
-        res = formatter.get_result_as_array()
-        res = res.astype(object, copy=False)
-        return self.make_block(res)
-
 
 class NDArrayBackedExtensionBlock(HybridMixin, Block):
     """
@@ -1962,18 +1903,6 @@ def array_values(self):
     def _holder(self):
         return type(self.array_values())
 
-    @property
-    def fill_value(self):
-        return na_value_for_dtype(self.dtype)
-
-    def to_native_types(self, na_rep="NaT", **kwargs):
-        """ convert to our native types format """
-        arr = self.array_values()
-
-        result = arr._format_native_types(na_rep=na_rep, **kwargs)
-        result = result.astype(object, copy=False)
-        return self.make_block(result)
-
 
 class DatetimeBlock(DatetimeLikeBlockMixin):
     __slots__ = ()
@@ -1999,7 +1928,6 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
 
     internal_values = Block.internal_values
     _can_hold_element = DatetimeBlock._can_hold_element
-    to_native_types = DatetimeBlock.to_native_types
     diff = DatetimeBlock.diff
     where = DatetimeBlock.where
     putmask = DatetimeLikeBlockMixin.putmask
@@ -2316,3 +2244,75 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:
             # We can't, and don't need to, reshape.
             values = np.asarray(values).reshape(1, -1)
     return values
+
+
+def to_native_types(
+    values: ArrayLike,
+    *,
+    na_rep="nan",
+    quoting=None,
+    float_format=None,
+    decimal=".",
+    **kwargs,
+) -> np.ndarray:
+    """ convert to our native types format """
+    values = ensure_wrapped_if_datetimelike(values)
+
+    if isinstance(values, (DatetimeArray, TimedeltaArray)):
+        result = values._format_native_types(na_rep=na_rep, **kwargs)
+        result = result.astype(object, copy=False)
+        return result
+
+    elif isinstance(values, ExtensionArray):
+        mask = isna(values)
+
+        new_values = np.asarray(values.astype(object))
+        new_values[mask] = na_rep
+        return new_values
+
+    elif values.dtype.kind == "f":
+        # see GH#13418: no special formatting is desired at the
+        # output (important for appropriate 'quoting' behaviour),
+        # so do not pass it through the FloatArrayFormatter
+        if float_format is None and decimal == ".":
+            mask = isna(values)
+
+            if not quoting:
+                values = values.astype(str)
+            else:
+                values = np.array(values, dtype="object")
+
+            values[mask] = na_rep
+            values = values.astype(object, copy=False)
+            return values
+
+        from pandas.io.formats.format import FloatArrayFormatter
+
+        formatter = FloatArrayFormatter(
+            values,
+            na_rep=na_rep,
+            float_format=float_format,
+            decimal=decimal,
+            quoting=quoting,
+            fixed_width=False,
+        )
+        res = formatter.get_result_as_array()
+        res = res.astype(object, copy=False)
+        return res
+
+    else:
+
+        mask = isna(values)
+        itemsize = writers.word_len(na_rep)
+
+        if values.dtype != _dtype_obj and not quoting and itemsize:
+            values = values.astype(str)
+            if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:
+                # enlarge for the na_rep
+                values = values.astype(f"<U{itemsize}")
+        else:
+            values = np.array(values, dtype="object")
+
+        values[mask] = na_rep
+        values = values.astype(object, copy=False)
+        return values

From bfe734fe8043b53539a8dc38eef41f13f7a66fc9 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 19 Mar 2021 17:51:28 +0100
Subject: [PATCH 04/14] PERF: optimize is_numeric_v_string_like (#40501)

---
 pandas/core/dtypes/common.py       | 53 ++++--------------------------
 pandas/core/dtypes/missing.py      |  3 +-
 pandas/tests/dtypes/test_common.py | 13 +-------
 3 files changed, 8 insertions(+), 61 deletions(-)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 7a2d6468f1b63..32ea82d9c0402 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1100,7 +1100,7 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool:
 
 
 # This exists to silence numpy deprecation warnings, see GH#29553
-def is_numeric_v_string_like(a, b):
+def is_numeric_v_string_like(a: ArrayLike, b):
     """
     Check if we are comparing a string-like object to a numeric ndarray.
     NumPy doesn't like to compare such objects, especially numeric arrays
@@ -1108,7 +1108,7 @@ def is_numeric_v_string_like(a, b):
 
     Parameters
     ----------
-    a : array-like, scalar
+    a : array-like
         The first object to check.
     b : array-like, scalar
         The second object to check.
@@ -1120,16 +1120,8 @@ def is_numeric_v_string_like(a, b):
 
     Examples
     --------
-    >>> is_numeric_v_string_like(1, 1)
-    False
-    >>> is_numeric_v_string_like("foo", "foo")
-    False
-    >>> is_numeric_v_string_like(1, "foo")  # non-array numeric
-    False
     >>> is_numeric_v_string_like(np.array([1]), "foo")
     True
-    >>> is_numeric_v_string_like("foo", np.array([1]))  # symmetric check
-    True
     >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"]))
     True
     >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))
@@ -1142,17 +1134,15 @@ def is_numeric_v_string_like(a, b):
     is_a_array = isinstance(a, np.ndarray)
     is_b_array = isinstance(b, np.ndarray)
 
-    is_a_numeric_array = is_a_array and is_numeric_dtype(a)
-    is_b_numeric_array = is_b_array and is_numeric_dtype(b)
-    is_a_string_array = is_a_array and is_string_like_dtype(a)
-    is_b_string_array = is_b_array and is_string_like_dtype(b)
+    is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b")
+    is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b")
+    is_a_string_array = is_a_array and a.dtype.kind in ("S", "U")
+    is_b_string_array = is_b_array and b.dtype.kind in ("S", "U")
 
-    is_a_scalar_string_like = not is_a_array and isinstance(a, str)
     is_b_scalar_string_like = not is_b_array and isinstance(b, str)
 
     return (
         (is_a_numeric_array and is_b_scalar_string_like)
-        or (is_b_numeric_array and is_a_scalar_string_like)
         or (is_a_numeric_array and is_b_string_array)
         or (is_b_numeric_array and is_a_string_array)
     )
@@ -1305,37 +1295,6 @@ def is_numeric_dtype(arr_or_dtype) -> bool:
     )
 
 
-def is_string_like_dtype(arr_or_dtype) -> bool:
-    """
-    Check whether the provided array or dtype is of a string-like dtype.
-
-    Unlike `is_string_dtype`, the object dtype is excluded because it
-    is a mixed dtype.
-
-    Parameters
-    ----------
-    arr_or_dtype : array-like
-        The array or dtype to check.
-
-    Returns
-    -------
-    boolean
-        Whether or not the array or dtype is of the string dtype.
-
-    Examples
-    --------
-    >>> is_string_like_dtype(str)
-    True
-    >>> is_string_like_dtype(object)
-    False
-    >>> is_string_like_dtype(np.array(['a', 'b']))
-    True
-    >>> is_string_like_dtype(pd.Series([1, 2]))
-    False
-    """
-    return _is_dtype(arr_or_dtype, lambda dtype: dtype.kind in ("S", "U"))
-
-
 def is_float_dtype(arr_or_dtype) -> bool:
     """
     Check whether the provided array or dtype is of a float dtype.
diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index 59d6f9a51ed43..8c2cff21c114e 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -35,7 +35,6 @@
     is_object_dtype,
     is_scalar,
     is_string_dtype,
-    is_string_like_dtype,
     needs_i8_conversion,
 )
 from pandas.core.dtypes.dtypes import ExtensionDtype
@@ -258,7 +257,7 @@ def _isna_string_dtype(values: np.ndarray, inf_as_na: bool) -> np.ndarray:
     dtype = values.dtype
     shape = values.shape
 
-    if is_string_like_dtype(dtype):
+    if dtype.kind in ("S", "U"):
         result = np.zeros(values.shape, dtype=bool)
     else:
         result = np.empty(shape, dtype=bool)
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index 248798408381e..406aec9d4c16e 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -469,14 +469,11 @@ def test_is_datetime_or_timedelta_dtype():
 
 
 def test_is_numeric_v_string_like():
-    assert not com.is_numeric_v_string_like(1, 1)
-    assert not com.is_numeric_v_string_like(1, "foo")
-    assert not com.is_numeric_v_string_like("foo", "foo")
+    assert not com.is_numeric_v_string_like(np.array([1]), 1)
     assert not com.is_numeric_v_string_like(np.array([1]), np.array([2]))
     assert not com.is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"]))
 
     assert com.is_numeric_v_string_like(np.array([1]), "foo")
-    assert com.is_numeric_v_string_like("foo", np.array([1]))
     assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"]))
     assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))
 
@@ -521,14 +518,6 @@ def test_is_numeric_dtype():
     assert com.is_numeric_dtype(pd.Index([1, 2.0]))
 
 
-def test_is_string_like_dtype():
-    assert not com.is_string_like_dtype(object)
-    assert not com.is_string_like_dtype(pd.Series([1, 2]))
-
-    assert com.is_string_like_dtype(str)
-    assert com.is_string_like_dtype(np.array(["a", "b"]))
-
-
 def test_is_float_dtype():
     assert not com.is_float_dtype(str)
     assert not com.is_float_dtype(int)

From b524462e1f88319912ee5ad91a45e6d1986c9dba Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 19 Mar 2021 18:15:26 +0100
Subject: [PATCH 05/14] CLN/PERF: remove catching of numpy deprecation warning
 in comparison_op (#40515)

---
 pandas/core/ops/array_ops.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index 04737d91c0d4e..333bdbf57bab3 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -6,7 +6,6 @@
 from functools import partial
 import operator
 from typing import Any
-import warnings
 
 import numpy as np
 
@@ -232,7 +231,7 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
     """
     # NB: We assume extract_array has already been called on left and right
     lvalues = ensure_wrapped_if_datetimelike(left)
-    rvalues = right
+    rvalues = ensure_wrapped_if_datetimelike(right)
 
     rvalues = lib.item_from_zerodim(rvalues)
     if isinstance(rvalues, list):
@@ -267,10 +266,7 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike:
         res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
 
     else:
-        with warnings.catch_warnings():
-            # suppress warnings from numpy about element-wise comparison
-            warnings.simplefilter("ignore", DeprecationWarning)
-            res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True)
+        res_values = _na_arithmetic_op(lvalues, rvalues, op, is_cmp=True)
 
     return res_values
 

From b519386e17ab4dbcc53d1a67d636dbb31c93085e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 19 Mar 2021 18:09:25 -0700
Subject: [PATCH 06/14] TYP: get_reverse_indexer, get_group_index_sorter
 (#40476)

---
 pandas/_libs/internals.pyx               | 20 ++++++-----
 pandas/_libs/lib.pyx                     | 44 +++++++++++++++++-------
 pandas/core/groupby/ops.py               |  6 ++--
 pandas/core/indexes/base.py              | 29 +++++++++++-----
 pandas/core/indexes/multi.py             |  2 +-
 pandas/core/reshape/reshape.py           |  2 +-
 pandas/core/sorting.py                   | 14 ++++++--
 pandas/tests/internals/test_internals.py | 12 +++----
 pandas/tests/libs/test_lib.py            |  4 +--
 9 files changed, 88 insertions(+), 45 deletions(-)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index 8b643c03b6a19..5352ca53e1b54 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -13,14 +13,16 @@ import numpy as np
 
 cimport numpy as cnp
 from numpy cimport (
-    NPY_INT64,
+    NPY_INTP,
     int64_t,
+    intp_t,
     ndarray,
 )
 
 cnp.import_array()
 
 from pandas._libs.algos import ensure_int64
+
 from pandas._libs.util cimport is_integer_object
 
 
@@ -30,7 +32,7 @@ cdef class BlockPlacement:
     # __slots__ = '_as_slice', '_as_array', '_len'
     cdef:
         slice _as_slice
-        ndarray _as_array  # Note: this still allows `None`
+        ndarray _as_array  # Note: this still allows `None`; will be intp_t
         bint _has_slice, _has_array, _is_known_slice_like
 
     def __cinit__(self, val):
@@ -53,12 +55,12 @@ cdef class BlockPlacement:
                 self._as_slice = slc
                 self._has_slice = True
             else:
-                arr = np.empty(0, dtype=np.int64)
+                arr = np.empty(0, dtype=np.intp)
                 self._as_array = arr
                 self._has_array = True
         else:
             # Cython memoryview interface requires ndarray to be writeable.
-            arr = np.require(val, dtype=np.int64, requirements='W')
+            arr = np.require(val, dtype=np.intp, requirements='W')
             assert arr.ndim == 1, arr.shape
             self._as_array = arr
             self._has_array = True
@@ -125,8 +127,8 @@ cdef class BlockPlacement:
         if not self._has_array:
             start, stop, step, _ = slice_get_indices_ex(self._as_slice)
             # NOTE: this is the C-optimized equivalent of
-            #  `np.arange(start, stop, step, dtype=np.int64)`
-            self._as_array = cnp.PyArray_Arange(start, stop, step, NPY_INT64)
+            #  `np.arange(start, stop, step, dtype=np.intp)`
+            self._as_array = cnp.PyArray_Arange(start, stop, step, NPY_INTP)
             self._has_array = True
 
         return self._as_array
@@ -325,13 +327,13 @@ cdef slice_getitem(slice slc, ind):
     else:
         # NOTE:
         # this is the C-optimized equivalent of
-        # `np.arange(s_start, s_stop, s_step, dtype=np.int64)[ind]`
-        return cnp.PyArray_Arange(s_start, s_stop, s_step, NPY_INT64)[ind]
+        # `np.arange(s_start, s_stop, s_step, dtype=np.intp)[ind]`
+        return cnp.PyArray_Arange(s_start, s_stop, s_step, NPY_INTP)[ind]
 
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef slice indexer_as_slice(int64_t[:] vals):
+cdef slice indexer_as_slice(intp_t[:] vals):
     cdef:
         Py_ssize_t i, n, start, stop
         int64_t d
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 1ff481553e413..fc3e1ecfb55c1 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -451,7 +451,7 @@ def fast_zip(list ndarrays) -> ndarray[object]:
     return result
 
 
-def get_reverse_indexer(const int64_t[:] indexer, Py_ssize_t length):
+def get_reverse_indexer(const intp_t[:] indexer, Py_ssize_t length) -> ndarray:
     """
     Reverse indexing operation.
 
@@ -459,14 +459,25 @@ def get_reverse_indexer(const int64_t[:] indexer, Py_ssize_t length):
 
         indexer_inv[indexer[x]] = x
 
-    .. note:: If indexer is not unique, only first occurrence is accounted.
+    Parameters
+    ----------
+    indexer : np.ndarray[np.intp]
+    length : int
+
+    Returns
+    -------
+    np.ndarray[np.intp]
+
+    Notes
+    -----
+    If indexer is not unique, only first occurrence is accounted.
     """
     cdef:
         Py_ssize_t i, n = len(indexer)
-        ndarray[int64_t] rev_indexer
-        int64_t idx
+        ndarray[intp_t] rev_indexer
+        intp_t idx
 
-    rev_indexer = np.empty(length, dtype=np.int64)
+    rev_indexer = np.empty(length, dtype=np.intp)
     rev_indexer[:] = -1
     for i in range(n):
         idx = indexer[i]
@@ -808,23 +819,32 @@ def generate_bins_dt64(ndarray[int64_t] values, const int64_t[:] binner,
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def get_level_sorter(const int64_t[:] label, const int64_t[:] starts):
+def get_level_sorter(
+    ndarray[int64_t, ndim=1] codes, const intp_t[:] starts
+) -> ndarray:
     """
     Argsort for a single level of a multi-index, keeping the order of higher
     levels unchanged. `starts` points to starts of same-key indices w.r.t
     to leading levels; equivalent to:
-        np.hstack([label[starts[i]:starts[i+1]].argsort(kind='mergesort')
+        np.hstack([codes[starts[i]:starts[i+1]].argsort(kind='mergesort')
             + starts[i] for i in range(len(starts) - 1)])
+
+    Parameters
+    ----------
+    codes : np.ndarray[int64_t, ndim=1]
+    starts : np.ndarray[intp, ndim=1]
+
+    Returns
+    -------
+    np.ndarray[np.int, ndim=1]
     """
     cdef:
-        int64_t l, r
-        Py_ssize_t i
-        ndarray[int64_t, ndim=1] out = np.empty(len(label), dtype=np.int64)
-        ndarray[int64_t, ndim=1] label_arr = np.asarray(label)
+        Py_ssize_t i, l, r
+        ndarray[intp_t, ndim=1] out = np.empty(len(codes), dtype=np.intp)
 
     for i in range(len(starts) - 1):
         l, r = starts[i], starts[i + 1]
-        out[l:r] = l + label_arr[l:r].argsort(kind='mergesort')
+        out[l:r] = l + codes[l:r].argsort(kind='mergesort')
 
     return out
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 74e96015b4544..a222a8cc464fb 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -991,10 +991,10 @@ def __init__(self, data: FrameOrSeries, labels, ngroups: int, axis: int = 0):
     @cache_readonly
     def slabels(self):
         # Sorted labels
-        return algorithms.take_nd(self.labels, self.sort_idx, allow_fill=False)
+        return algorithms.take_nd(self.labels, self._sort_idx, allow_fill=False)
 
     @cache_readonly
-    def sort_idx(self):
+    def _sort_idx(self) -> np.ndarray:  # np.ndarray[np.intp]
         # Counting sort indexer
         return get_group_index_sorter(self.labels, self.ngroups)
 
@@ -1013,7 +1013,7 @@ def __iter__(self):
 
     @cache_readonly
     def sorted_data(self) -> FrameOrSeries:
-        return self.data.take(self.sort_idx, axis=self.axis)
+        return self.data.take(self._sort_idx, axis=self.axis)
 
     def _chop(self, sdata, slice_obj: slice) -> NDFrame:
         raise AbstractMethodError(self)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 3a468758ab3fd..a5c0a5c6694e5 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4135,13 +4135,22 @@ def _join_level(
         """
         from pandas.core.indexes.multi import MultiIndex
 
-        def _get_leaf_sorter(labels):
+        def _get_leaf_sorter(labels: List[np.ndarray]) -> np.ndarray:
             """
             Returns sorter for the inner most level while preserving the
             order of higher levels.
+
+            Parameters
+            ----------
+            labels : list[np.ndarray]
+                Each ndarray has signed integer dtype, not necessarily identical.
+
+            Returns
+            -------
+            np.ndarray[np.intp]
             """
             if labels[0].size == 0:
-                return np.empty(0, dtype="int64")
+                return np.empty(0, dtype=np.intp)
 
             if len(labels) == 1:
                 return get_group_index_sorter(labels[0])
@@ -4154,7 +4163,7 @@ def _get_leaf_sorter(labels):
 
             starts = np.hstack(([True], tic, [True])).nonzero()[0]
             lab = ensure_int64(labels[-1])
-            return lib.get_level_sorter(lab, ensure_int64(starts))
+            return lib.get_level_sorter(lab, ensure_platform_int(starts))
 
         if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
             raise TypeError("Join on level between two MultiIndex objects is ambiguous")
@@ -4189,12 +4198,12 @@ def _get_leaf_sorter(labels):
                 join_index = left[left_indexer]
 
         else:
-            left_lev_indexer = ensure_int64(left_lev_indexer)
+            left_lev_indexer = ensure_platform_int(left_lev_indexer)
             rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level))
             old_codes = left.codes[level]
-            new_lev_codes = algos.take_nd(
-                rev_indexer, old_codes[old_codes != -1], allow_fill=False
-            )
+
+            taker = old_codes[old_codes != -1]
+            new_lev_codes = rev_indexer.take(taker)
 
             new_codes = list(left.codes)
             new_codes[level] = new_lev_codes
@@ -4204,6 +4213,7 @@ def _get_leaf_sorter(labels):
 
             if keep_order:  # just drop missing values. o.w. keep order
                 left_indexer = np.arange(len(left), dtype=np.intp)
+                left_indexer = cast(np.ndarray, left_indexer)
                 mask = new_lev_codes != -1
                 if not mask.all():
                     new_codes = [lab[mask] for lab in new_codes]
@@ -4213,11 +4223,12 @@ def _get_leaf_sorter(labels):
                 if level == 0:  # outer most level, take the fast route
                     ngroups = 1 + new_lev_codes.max()
                     left_indexer, counts = libalgos.groupsort_indexer(
-                        new_lev_codes, ngroups
+                        ensure_int64(new_lev_codes), ngroups
                     )
 
                     # missing values are placed first; drop them!
-                    left_indexer = left_indexer[counts[0] :]
+                    # error: Value of type "Optional[ndarray]" is not indexable
+                    left_indexer = left_indexer[counts[0] :]  # type: ignore[index]
                     new_codes = [lab[left_indexer] for lab in new_codes]
 
                 else:  # sort the leaves
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 97492f35232e3..31aa5e301d17c 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1952,7 +1952,7 @@ def _sort_levels_monotonic(self) -> MultiIndex:
                     lev = lev.take(indexer)
 
                     # indexer to reorder the level codes
-                    indexer = ensure_int64(indexer)
+                    indexer = ensure_platform_int(indexer)
                     ri = lib.get_reverse_indexer(indexer, len(indexer))
                     level_codes = algos.take_nd(ri, level_codes)
 
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 6a0286b1c40ef..613669b8cc1d8 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -145,7 +145,7 @@ def _indexer_and_to_sort(self):
         ngroups = len(obs_ids)
 
         indexer = get_group_index_sorter(comp_index, ngroups)
-
+        indexer = ensure_platform_int(indexer)
         return indexer, to_sort
 
     @cache_readonly
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 720643d3d98aa..10c13327c79d3 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -582,6 +582,16 @@ def get_group_index_sorter(
     Both algorithms are `stable` sort and that is necessary for correctness of
     groupby operations. e.g. consider:
         df.groupby(key)[col].transform('first')
+
+    Parameters
+    ----------
+    group_index : np.ndarray
+        signed integer dtype
+    ngroups : int or None, default None
+
+    Returns
+    -------
+    np.ndarray[np.intp]
     """
     if ngroups is None:
         # error: Incompatible types in assignment (expression has type "number[Any]",
@@ -596,9 +606,9 @@ def get_group_index_sorter(
     )
     if do_groupsort:
         sorter, _ = algos.groupsort_indexer(ensure_int64(group_index), ngroups)
-        return ensure_platform_int(sorter)
     else:
-        return group_index.argsort(kind="mergesort")
+        sorter = group_index.argsort(kind="mergesort")
+    return ensure_platform_int(sorter)
 
 
 def compress_group_index(group_index, sort: bool = True):
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index c63d5271f1fae..ef1c3ec0c2860 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -259,7 +259,7 @@ def _check(blk):
     def test_mgr_locs(self):
         assert isinstance(self.fblock.mgr_locs, BlockPlacement)
         tm.assert_numpy_array_equal(
-            self.fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.int64)
+            self.fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.intp)
         )
 
     def test_attrs(self):
@@ -277,7 +277,7 @@ def test_delete(self):
         newb.delete(0)
         assert isinstance(newb.mgr_locs, BlockPlacement)
         tm.assert_numpy_array_equal(
-            newb.mgr_locs.as_array, np.array([2, 4], dtype=np.int64)
+            newb.mgr_locs.as_array, np.array([2, 4], dtype=np.intp)
         )
         assert (newb.values[0] == 1).all()
 
@@ -285,14 +285,14 @@ def test_delete(self):
         newb.delete(1)
         assert isinstance(newb.mgr_locs, BlockPlacement)
         tm.assert_numpy_array_equal(
-            newb.mgr_locs.as_array, np.array([0, 4], dtype=np.int64)
+            newb.mgr_locs.as_array, np.array([0, 4], dtype=np.intp)
         )
         assert (newb.values[1] == 2).all()
 
         newb = self.fblock.copy()
         newb.delete(2)
         tm.assert_numpy_array_equal(
-            newb.mgr_locs.as_array, np.array([0, 2], dtype=np.int64)
+            newb.mgr_locs.as_array, np.array([0, 2], dtype=np.intp)
         )
         assert (newb.values[1] == 1).all()
 
@@ -665,7 +665,7 @@ def test_consolidate_ordering_issues(self, mgr):
         assert cons.nblocks == 1
         assert isinstance(cons.blocks[0].mgr_locs, BlockPlacement)
         tm.assert_numpy_array_equal(
-            cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.int64)
+            cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.intp)
         )
 
     def test_reindex_items(self):
@@ -1095,7 +1095,7 @@ def test_slice_iter(self, slc, expected):
     )
     def test_slice_to_array_conversion(self, slc, arr):
         tm.assert_numpy_array_equal(
-            BlockPlacement(slc).as_array, np.asarray(arr, dtype=np.int64)
+            BlockPlacement(slc).as_array, np.asarray(arr, dtype=np.intp)
         )
 
     def test_blockplacement_add(self):
diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py
index 60c42878497c2..0532de9998c5f 100644
--- a/pandas/tests/libs/test_lib.py
+++ b/pandas/tests/libs/test_lib.py
@@ -197,9 +197,9 @@ def test_maybe_booleans_to_slice(self):
         assert result == slice(0, 0)
 
     def test_get_reverse_indexer(self):
-        indexer = np.array([-1, -1, 1, 2, 0, -1, 3, 4], dtype=np.int64)
+        indexer = np.array([-1, -1, 1, 2, 0, -1, 3, 4], dtype=np.intp)
         result = lib.get_reverse_indexer(indexer, 5)
-        expected = np.array([4, 2, 3, 6, 7], dtype=np.int64)
+        expected = np.array([4, 2, 3, 6, 7], dtype=np.intp)
         tm.assert_numpy_array_equal(result, expected)
 
 

From 7e71c3bac3b6dc694642ffdf650c1d4ae605c080 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 19 Mar 2021 18:10:02 -0700
Subject: [PATCH 07/14] CLN: factorize returns ndarray[intp], not int64
 (#40474)

---
 pandas/_libs/hashtable.pyx                 | 26 +++++++++++-----
 pandas/_libs/hashtable_class_helper.pxi.in | 36 +++++++++++-----------
 pandas/_libs/join.pyx                      | 20 ++++++------
 pandas/core/reshape/merge.py               | 15 ++++++---
 pandas/tests/libs/test_join.py             | 16 +++++-----
 5 files changed, 65 insertions(+), 48 deletions(-)

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index 1bbffaa7bb5d2..e402a4b7c0ccc 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -66,13 +66,18 @@ cdef class Factorizer:
         self.uniques = ObjectVector()
         self.count = 0
 
-    def get_count(self):
+    def get_count(self) -> int:
         return self.count
 
     def factorize(
         self, ndarray[object] values, sort=False, na_sentinel=-1, na_value=None
-    ):
+    ) -> np.ndarray:
         """
+
+        Returns
+        -------
+        np.ndarray[np.intp]
+
         Examples
         --------
         Factorize values with nans replaced by na_sentinel
@@ -80,6 +85,9 @@ cdef class Factorizer:
         >>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20)
         array([ 0,  1, 20])
         """
+        cdef:
+            ndarray[intp_t] labels
+
         if self.uniques.external_view_exists:
             uniques = ObjectVector()
             uniques.extend(self.uniques.to_array())
@@ -89,8 +97,6 @@ cdef class Factorizer:
         mask = (labels == na_sentinel)
         # sort on
         if sort:
-            if labels.dtype != np.intp:
-                labels = labels.astype(np.intp)
             sorter = self.uniques.to_array().argsort()
             reverse_indexer = np.empty(len(sorter), dtype=np.intp)
             reverse_indexer.put(sorter, np.arange(len(sorter)))
@@ -119,8 +125,12 @@ cdef class Int64Factorizer:
         return self.count
 
     def factorize(self, const int64_t[:] values, sort=False,
-                  na_sentinel=-1, na_value=None):
+                  na_sentinel=-1, na_value=None) -> np.ndarray:
         """
+        Returns
+        -------
+        ndarray[intp_t]
+
         Examples
         --------
         Factorize values with nans replaced by na_sentinel
@@ -128,6 +138,9 @@ cdef class Int64Factorizer:
         >>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20)
         array([ 0,  1, 20])
         """
+        cdef:
+            ndarray[intp_t] labels
+
         if self.uniques.external_view_exists:
             uniques = Int64Vector()
             uniques.extend(self.uniques.to_array())
@@ -138,9 +151,6 @@ cdef class Int64Factorizer:
 
         # sort on
         if sort:
-            if labels.dtype != np.intp:
-                labels = labels.astype(np.intp)
-
             sorter = self.uniques.to_array().argsort()
             reverse_indexer = np.empty(len(sorter), dtype=np.intp)
             reverse_indexer.put(sorter, np.arange(len(sorter)))
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 0b6bb170cc531..6ace327ca3599 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -539,12 +539,12 @@ cdef class {{name}}HashTable(HashTable):
         -------
         uniques : ndarray[{{dtype}}]
             Unique values of input, not sorted
-        labels : ndarray[int64] (if return_inverse=True)
+        labels : ndarray[intp_t] (if return_inverse=True)
             The labels from values to uniques
         """
         cdef:
             Py_ssize_t i, idx, count = count_prior, n = len(values)
-            int64_t[:] labels
+            intp_t[:] labels
             int ret = 0
             {{c_type}} val, na_value2
             khiter_t k
@@ -553,7 +553,7 @@ cdef class {{name}}HashTable(HashTable):
             uint8_t[:] mask_values
 
         if return_inverse:
-            labels = np.empty(n, dtype=np.int64)
+            labels = np.empty(n, dtype=np.intp)
         ud = uniques.data
         use_na_value = na_value is not None
         use_mask = mask is not None
@@ -614,7 +614,7 @@ cdef class {{name}}HashTable(HashTable):
                     labels[i] = idx
 
         if return_inverse:
-            return uniques.to_array(), np.asarray(labels)
+            return uniques.to_array(), labels.base  # .base -> underlying ndarray
         return uniques.to_array()
 
     def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False):
@@ -633,7 +633,7 @@ cdef class {{name}}HashTable(HashTable):
         -------
         uniques : ndarray[{{dtype}}]
             Unique values of input, not sorted
-        labels : ndarray[int64] (if return_inverse)
+        labels : ndarray[intp_t] (if return_inverse)
             The labels from values to uniques
         """
         uniques = {{name}}Vector()
@@ -668,7 +668,7 @@ cdef class {{name}}HashTable(HashTable):
         -------
         uniques : ndarray[{{dtype}}]
             Unique values of input, not sorted
-        labels : ndarray[int64]
+        labels : ndarray[intp_t]
             The labels from values to uniques
         """
         uniques_vector = {{name}}Vector()
@@ -918,12 +918,12 @@ cdef class StringHashTable(HashTable):
         -------
         uniques : ndarray[object]
             Unique values of input, not sorted
-        labels : ndarray[int64] (if return_inverse=True)
+        labels : ndarray[intp_t] (if return_inverse=True)
             The labels from values to uniques
         """
         cdef:
             Py_ssize_t i, idx, count = count_prior, n = len(values)
-            int64_t[:] labels
+            intp_t[:] labels
             int64_t[:] uindexer
             int ret = 0
             object val
@@ -933,7 +933,7 @@ cdef class StringHashTable(HashTable):
             bint use_na_value
 
         if return_inverse:
-            labels = np.zeros(n, dtype=np.int64)
+            labels = np.zeros(n, dtype=np.intp)
         uindexer = np.empty(n, dtype=np.int64)
         use_na_value = na_value is not None
 
@@ -972,13 +972,13 @@ cdef class StringHashTable(HashTable):
                     uindexer[count] = i
                     if return_inverse:
                         self.table.vals[k] = count
-                        labels[i] = <int64_t>count
+                        labels[i] = count
                     count += 1
                 elif return_inverse:
                     # k falls into a previous bucket
                     # only relevant in case we need to construct the inverse
                     idx = self.table.vals[k]
-                    labels[i] = <int64_t>idx
+                    labels[i] = idx
 
         free(vecs)
 
@@ -987,7 +987,7 @@ cdef class StringHashTable(HashTable):
             uniques.append(values[uindexer[i]])
 
         if return_inverse:
-            return uniques.to_array(), np.asarray(labels)
+            return uniques.to_array(), labels.base  # .base -> underlying ndarray
         return uniques.to_array()
 
     def unique(self, ndarray[object] values, bint return_inverse=False):
@@ -1193,19 +1193,19 @@ cdef class PyObjectHashTable(HashTable):
         -------
         uniques : ndarray[object]
             Unique values of input, not sorted
-        labels : ndarray[int64] (if return_inverse=True)
+        labels : ndarray[intp_t] (if return_inverse=True)
             The labels from values to uniques
         """
         cdef:
             Py_ssize_t i, idx, count = count_prior, n = len(values)
-            int64_t[:] labels
+            intp_t[:] labels
             int ret = 0
             object val
             khiter_t k
             bint use_na_value
 
         if return_inverse:
-            labels = np.empty(n, dtype=np.int64)
+            labels = np.empty(n, dtype=np.intp)
         use_na_value = na_value is not None
 
         for i in range(n):
@@ -1240,7 +1240,7 @@ cdef class PyObjectHashTable(HashTable):
                 labels[i] = idx
 
         if return_inverse:
-            return uniques.to_array(), np.asarray(labels)
+            return uniques.to_array(), labels.base  # .base -> underlying ndarray
         return uniques.to_array()
 
     def unique(self, ndarray[object] values, bint return_inverse=False):
@@ -1259,7 +1259,7 @@ cdef class PyObjectHashTable(HashTable):
         -------
         uniques : ndarray[object]
             Unique values of input, not sorted
-        labels : ndarray[int64] (if return_inverse)
+        labels : ndarray[intp_t] (if return_inverse)
             The labels from values to uniques
         """
         uniques = ObjectVector()
@@ -1292,7 +1292,7 @@ cdef class PyObjectHashTable(HashTable):
         -------
         uniques : ndarray[object]
             Unique values of input, not sorted
-        labels : ndarray[int64]
+        labels : ndarray[intp_t]
             The labels from values to uniques
         """
         uniques_vector = ObjectVector()
diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx
index 1b79d68c13570..511b373bc7e1f 100644
--- a/pandas/_libs/join.pyx
+++ b/pandas/_libs/join.pyx
@@ -10,6 +10,7 @@ from numpy cimport (
     int16_t,
     int32_t,
     int64_t,
+    intp_t,
     ndarray,
     uint8_t,
     uint16_t,
@@ -20,6 +21,7 @@ from numpy cimport (
 cnp.import_array()
 
 from pandas._libs.algos import (
+    ensure_int64,
     ensure_platform_int,
     groupsort_indexer,
     take_1d_int64_int64,
@@ -27,7 +29,7 @@ from pandas._libs.algos import (
 
 
 @cython.boundscheck(False)
-def inner_join(const int64_t[:] left, const int64_t[:] right,
+def inner_join(const intp_t[:] left, const intp_t[:] right,
                Py_ssize_t max_groups):
     cdef:
         Py_ssize_t i, j, k, count = 0
@@ -39,8 +41,8 @@ def inner_join(const int64_t[:] left, const int64_t[:] right,
 
     # NA group in location 0
 
-    left_sorter, left_count = groupsort_indexer(left, max_groups)
-    right_sorter, right_count = groupsort_indexer(right, max_groups)
+    left_sorter, left_count = groupsort_indexer(ensure_int64(left), max_groups)
+    right_sorter, right_count = groupsort_indexer(ensure_int64(right), max_groups)
 
     with nogil:
         # First pass, determine size of result set, do not use the NA group
@@ -78,7 +80,7 @@ def inner_join(const int64_t[:] left, const int64_t[:] right,
 
 
 @cython.boundscheck(False)
-def left_outer_join(const int64_t[:] left, const int64_t[:] right,
+def left_outer_join(const intp_t[:] left, const intp_t[:] right,
                     Py_ssize_t max_groups, bint sort=True):
     cdef:
         Py_ssize_t i, j, k, count = 0
@@ -91,8 +93,8 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right,
 
     # NA group in location 0
 
-    left_sorter, left_count = groupsort_indexer(left, max_groups)
-    right_sorter, right_count = groupsort_indexer(right, max_groups)
+    left_sorter, left_count = groupsort_indexer(ensure_int64(left), max_groups)
+    right_sorter, right_count = groupsort_indexer(ensure_int64(right), max_groups)
 
     with nogil:
         # First pass, determine size of result set, do not use the NA group
@@ -151,7 +153,7 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right,
 
 
 @cython.boundscheck(False)
-def full_outer_join(const int64_t[:] left, const int64_t[:] right,
+def full_outer_join(const intp_t[:] left, const intp_t[:] right,
                     Py_ssize_t max_groups):
     cdef:
         Py_ssize_t i, j, k, count = 0
@@ -163,8 +165,8 @@ def full_outer_join(const int64_t[:] left, const int64_t[:] right,
 
     # NA group in location 0
 
-    left_sorter, left_count = groupsort_indexer(left, max_groups)
-    right_sorter, right_count = groupsort_indexer(right, max_groups)
+    left_sorter, left_count = groupsort_indexer(ensure_int64(left), max_groups)
+    right_sorter, right_count = groupsort_indexer(ensure_int64(right), max_groups)
 
     with nogil:
         # First pass, determine size of result set, do not use the NA group
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index c01bf3931b27a..3c1279d62b126 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1973,7 +1973,7 @@ def _get_single_indexer(join_key, index, sort: bool = False):
     left_key, right_key, count = _factorize_keys(join_key, index, sort=sort)
 
     left_indexer, right_indexer = libjoin.left_outer_join(
-        ensure_int64(left_key), ensure_int64(right_key), count, sort=sort
+        left_key, right_key, count, sort=sort
     )
 
     return left_indexer, right_indexer
@@ -2029,9 +2029,9 @@ def _factorize_keys(
 
     Returns
     -------
-    array
+    np.ndarray[np.intp]
         Left (resp. right if called with `key='right'`) labels, as enumerated type.
-    array
+    np.ndarray[np.intp]
         Right (resp. left if called with `key='right'`) labels, as enumerated type.
     int
         Number of unique elements in union of left and right labels.
@@ -2117,6 +2117,8 @@ def _factorize_keys(
 
     llab = rizer.factorize(lk)
     rlab = rizer.factorize(rk)
+    assert llab.dtype == np.intp, llab.dtype
+    assert rlab.dtype == np.intp, rlab.dtype
 
     count = rizer.get_count()
 
@@ -2142,13 +2144,16 @@ def _factorize_keys(
     return llab, rlab, count
 
 
-def _sort_labels(uniques: np.ndarray, left, right):
+def _sort_labels(
+    uniques: np.ndarray, left: np.ndarray, right: np.ndarray
+) -> tuple[np.ndarray, np.ndarray]:
+    # Both returned ndarrays are np.intp
 
     llength = len(left)
     labels = np.concatenate([left, right])
 
     _, new_labels = algos.safe_sort(uniques, labels, na_sentinel=-1)
-    new_labels = ensure_int64(new_labels)
+    assert new_labels.dtype == np.intp
     new_left, new_right = new_labels[:llength], new_labels[llength:]
 
     return new_left, new_right
diff --git a/pandas/tests/libs/test_join.py b/pandas/tests/libs/test_join.py
index 0bdb7b0e71e2d..f5426c71511bb 100644
--- a/pandas/tests/libs/test_join.py
+++ b/pandas/tests/libs/test_join.py
@@ -46,8 +46,8 @@ def test_outer_join_indexer(self, dtype):
         tm.assert_numpy_array_equal(rindexer, exp)
 
     def test_cython_left_outer_join(self):
-        left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64)
-        right = np.array([1, 1, 0, 4, 2, 2, 1], dtype=np.int64)
+        left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.intp)
+        right = np.array([1, 1, 0, 4, 2, 2, 1], dtype=np.intp)
         max_group = 5
 
         ls, rs = left_outer_join(left, right, max_group)
@@ -70,8 +70,8 @@ def test_cython_left_outer_join(self):
         tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
 
     def test_cython_right_outer_join(self):
-        left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64)
-        right = np.array([1, 1, 0, 4, 2, 2, 1], dtype=np.int64)
+        left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.intp)
+        right = np.array([1, 1, 0, 4, 2, 2, 1], dtype=np.intp)
         max_group = 5
 
         rs, ls = left_outer_join(right, left, max_group)
@@ -116,8 +116,8 @@ def test_cython_right_outer_join(self):
         tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
 
     def test_cython_inner_join(self):
-        left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64)
-        right = np.array([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.int64)
+        left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.intp)
+        right = np.array([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.intp)
         max_group = 5
 
         ls, rs = inner_join(left, right, max_group)
@@ -256,10 +256,10 @@ def test_left_outer_join_bug():
             0,
             2,
         ],
-        dtype=np.int64,
+        dtype=np.intp,
     )
 
-    right = np.array([3, 1], dtype=np.int64)
+    right = np.array([3, 1], dtype=np.intp)
     max_groups = 4
 
     lidx, ridx = libjoin.left_outer_join(left, right, max_groups, sort=False)

From bd8c79fa04ea76cb28b11052fb97ff00775f8f6d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 20 Mar 2021 02:14:13 +0100
Subject: [PATCH 08/14] PERF: increase the minimum number of elements to use
 numexpr for ops from 1e4 to 1e6 (#40502)

---
 pandas/core/computation/expressions.py | 2 +-
 pandas/tests/test_expressions.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py
index 0dbe5e8d83741..4f14ea73d5a88 100644
--- a/pandas/core/computation/expressions.py
+++ b/pandas/core/computation/expressions.py
@@ -40,7 +40,7 @@
 }
 
 # the minimum prod shape that we will use numexpr
-_MIN_ELEMENTS = 10000
+_MIN_ELEMENTS = 1_000_000
 
 
 def set_use_numexpr(v=True):
diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py
index 30f88ba5e76f6..96347ba5a733f 100644
--- a/pandas/tests/test_expressions.py
+++ b/pandas/tests/test_expressions.py
@@ -12,7 +12,7 @@
 )
 from pandas.core.computation import expressions as expr
 
-_frame = DataFrame(np.random.randn(10000, 4), columns=list("ABCD"), dtype="float64")
+_frame = DataFrame(np.random.randn(1000000, 4), columns=list("ABCD"), dtype="float64")
 _frame2 = DataFrame(np.random.randn(100, 4), columns=list("ABCD"), dtype="float64")
 _mixed = DataFrame(
     {

From 3e6bc0c8ed88a5c5d238453e4548dc4f377b3fb7 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 20 Mar 2021 02:15:23 +0100
Subject: [PATCH 09/14] [ArrayManager] TST: run all tests with separate not
 slow / slow build (#40495)

---
 .github/workflows/ci.yml                      | 45 ++++---------------
 pandas/tests/reshape/concat/__init__.py       |  4 ++
 pandas/tests/reshape/test_crosstab.py         |  3 ++
 pandas/tests/reshape/test_pivot.py            |  3 ++
 pandas/tests/reshape/test_pivot_multilevel.py |  3 ++
 5 files changed, 21 insertions(+), 37 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 014c666a17084..d6744f578560c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -141,6 +141,9 @@ jobs:
   data_manager:
     name: Test experimental data manager
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        pattern: ["not slow and not network and not clipboard", "slow"]
     steps:
 
     - name: Checkout
@@ -152,43 +155,11 @@ jobs:
     - name: Run tests
       env:
         PANDAS_DATA_MANAGER: array
+        PATTERN: ${{ matrix.pattern }}
+        PYTEST_WORKERS: "auto"
       run: |
         source activate pandas-dev
+        ci/run_tests.sh
 
-        pytest pandas/tests/frame/
-        pytest pandas/tests/reductions/
-        pytest pandas/tests/generic/test_generic.py
-        pytest pandas/tests/arithmetic/
-        pytest pandas/tests/groupby/
-        pytest pandas/tests/resample/
-        pytest pandas/tests/reshape/merge
-        pytest pandas/tests/series/
-        pytest pandas/tests/indexing/
-
-        pytest pandas/tests/test_*
-        pytest pandas/tests/api/
-        pytest pandas/tests/apply/
-        pytest pandas/tests/arrays/
-        pytest pandas/tests/base/
-        pytest pandas/tests/computation/
-        pytest pandas/tests/config/
-        pytest pandas/tests/dtypes/
-        pytest pandas/tests/extension/
-        pytest pandas/tests/generic/
-        pytest pandas/tests/indexes/
-        pytest pandas/tests/internals/
-        pytest pandas/tests/io/test_* -m "not slow and not clipboard"
-        pytest pandas/tests/io/excel/ -m "not slow and not clipboard"
-        pytest pandas/tests/io/formats/ -m "not slow and not clipboard"
-        pytest pandas/tests/io/parser/ -m "not slow and not clipboard"
-        pytest pandas/tests/io/sas/ -m "not slow and not clipboard"
-        pytest pandas/tests/io/xml/ -m "not slow and not clipboard"
-        pytest pandas/tests/libs/
-        pytest pandas/tests/plotting/
-        pytest pandas/tests/scalar/
-        pytest pandas/tests/strings/
-        pytest pandas/tests/tools/
-        pytest pandas/tests/tseries/
-        pytest pandas/tests/tslibs/
-        pytest pandas/tests/util/
-        pytest pandas/tests/window/
+    - name: Print skipped tests
+      run: python ci/print_skipped.py
diff --git a/pandas/tests/reshape/concat/__init__.py b/pandas/tests/reshape/concat/__init__.py
index e69de29bb2d1d..777923be02398 100644
--- a/pandas/tests/reshape/concat/__init__.py
+++ b/pandas/tests/reshape/concat/__init__.py
@@ -0,0 +1,4 @@
+import pandas.util._test_decorators as td
+
+# TODO(ArrayManager) concat axis=0
+pytestmark = td.skip_array_manager_not_yet_implemented
diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py
index e467dbb7d49b6..5cc65feee869b 100644
--- a/pandas/tests/reshape/test_crosstab.py
+++ b/pandas/tests/reshape/test_crosstab.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas.core.dtypes.common import is_categorical_dtype
 
 from pandas import (
@@ -438,6 +440,7 @@ def test_crosstab_normalize_arrays(self):
         )
         tm.assert_frame_equal(test_case, norm_sum)
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) concat axis=0
     def test_crosstab_with_empties(self):
         # Check handling of empties
         df = DataFrame(
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index c50886ba43019..8d8a83c233444 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -1197,6 +1199,7 @@ def test_pivot_table_with_margins_set_margin_name(self, margin_name):
                 margins_name=margin_name,
             )
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) concat axis=0
     def test_pivot_timegrouper(self):
         df = DataFrame(
             {
diff --git a/pandas/tests/reshape/test_pivot_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py
index df2ae0d52c660..ab41a94d1ff25 100644
--- a/pandas/tests/reshape/test_pivot_multilevel.py
+++ b/pandas/tests/reshape/test_pivot_multilevel.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     Index,
@@ -196,6 +198,7 @@ def test_pivot_list_like_columns(
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) concat axis=0
 def test_pivot_multiindexed_rows_and_cols():
     # GH 36360
 

From fd9354a715c6bb4f5bcd689c6312188eeea5561e Mon Sep 17 00:00:00 2001
From: attack68 <24256554+attack68@users.noreply.github.com>
Date: Sat, 20 Mar 2021 02:17:05 +0100
Subject: [PATCH 10/14] DOC: Styler docs - split PR from #39720 (#40493)

---
 pandas/io/formats/style.py | 148 +++++++++++++++++++++++++------------
 1 file changed, 99 insertions(+), 49 deletions(-)

diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index 3abb39d2194c0..5ec2141028fa4 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -217,8 +217,10 @@ def set_tooltips(
         css_class: Optional[str] = None,
     ) -> Styler:
         """
-        Add string based tooltips that will appear in the `Styler` HTML result. These
-        tooltips are applicable only to`<td>` elements.
+        Set the DataFrame of strings on ``Styler`` generating ``:hover`` tooltips.
+
+        These string based tooltips are only applicable to ``<td>`` HTML elements,
+        and cannot be used for column or index headers.
 
         .. versionadded:: 1.3.0
 
@@ -227,7 +229,7 @@ def set_tooltips(
         ttips : DataFrame
             DataFrame containing strings that will be translated to tooltips, mapped
             by identical column and index values that must exist on the underlying
-            `Styler` data. None, NaN values, and empty strings will be ignored and
+            Styler data. None, NaN values, and empty strings will be ignored and
             not affect the rendered HTML.
         props : list-like or str, optional
             List of (attr, value) tuples or a valid CSS string. If ``None`` adopts
@@ -671,21 +673,33 @@ def format(
 
     def set_td_classes(self, classes: DataFrame) -> Styler:
         """
-        Add string based CSS class names to data cells that will appear within the
-        `Styler` HTML result. These classes are added within specified `<td>` elements.
+        Set the DataFrame of strings added to the ``class`` attribute of ``<td>``
+        HTML elements.
 
         Parameters
         ----------
         classes : DataFrame
             DataFrame containing strings that will be translated to CSS classes,
-            mapped by identical column and index values that must exist on the
-            underlying `Styler` data. None, NaN values, and empty strings will
+            mapped by identical column and index key values that must exist on the
+            underlying Styler data. None, NaN values, and empty strings will
             be ignored and not affect the rendered HTML.
 
         Returns
         -------
         self : Styler
 
+        See Also
+        --------
+        Styler.set_table_styles: Set the table styles included within the ``<style>``
+            HTML element.
+        Styler.set_table_attributes: Set the table attributes added to the ``<table>``
+            HTML element.
+
+        Notes
+        -----
+        Can be used in combination with ``Styler.set_table_styles`` to define an
+        internal CSS solution without reference to external CSS files.
+
         Examples
         --------
         >>> df = pd.DataFrame(data=[[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"])
@@ -707,16 +721,16 @@ def set_td_classes(self, classes: DataFrame) -> Styler:
         Form of the output with new additional css classes,
 
         >>> df = pd.DataFrame([[1]])
-        >>> css = pd.DataFrame(["other-class"])
+        >>> css = pd.DataFrame([["other-class"]])
         >>> s = Styler(df, uuid="_", cell_ids=False).set_td_classes(css)
         >>> s.hide_index().render()
-        '<style  type="text/css" ></style>'
-        '<table id="T__" >'
+        '<style type="text/css"></style>'
+        '<table id="T__">'
         '  <thead>'
         '    <tr><th class="col_heading level0 col0" >0</th></tr>'
         '  </thead>'
         '  <tbody>'
-        '    <tr><td  class="data row0 col0 other-class" >1</td></tr>'
+        '    <tr><td class="data row0 col0 other-class" >1</td></tr>'
         '  </tbody>'
         '</table>'
         """
@@ -736,7 +750,7 @@ def set_td_classes(self, classes: DataFrame) -> Styler:
 
     def render(self, **kwargs) -> str:
         """
-        Render the built up styles to HTML.
+        Render the ``Styler`` including all applied styles to HTML.
 
         Parameters
         ----------
@@ -753,7 +767,7 @@ def render(self, **kwargs) -> str:
 
         Notes
         -----
-        ``Styler`` objects have defined the ``_repr_html_`` method
+        Styler objects have defined the ``_repr_html_`` method
         which automatically calls ``self.render()`` when it's the
         last item in a Notebook cell. When calling ``Styler.render()``
         directly, wrap the result in ``IPython.display.HTML`` to view
@@ -779,7 +793,7 @@ def render(self, **kwargs) -> str:
 
     def _update_ctx(self, attrs: DataFrame) -> None:
         """
-        Update the state of the Styler for data cells.
+        Update the state of the ``Styler`` for data cells.
 
         Collects a mapping of {index_label: [('<property>', '<value>'), ..]}.
 
@@ -839,7 +853,7 @@ def __deepcopy__(self, memo) -> Styler:
 
     def clear(self) -> None:
         """
-        Reset the styler, removing any previously applied styles.
+        Reset the ``Styler``, removing any previously applied styles.
 
         Returns None.
         """
@@ -923,10 +937,11 @@ def apply(
         Parameters
         ----------
         func : function
-            ``func`` should take a Series or DataFrame (depending
-            on ``axis``), and return an object with the same shape.
-            Must return a DataFrame with identical index and
-            column labels or an ndarray with same shape as input when ``axis=None``.
+            ``func`` should take a Series if ``axis`` in [0,1] and return an object
+            of same length, also with identical index if the object is a Series.
+            ``func`` should take a DataFrame if ``axis`` is ``None`` and return either
+            an ndarray with the same shape or a DataFrame with identical columns and
+            index.
 
             .. versionchanged:: 1.3.0
 
@@ -944,13 +959,16 @@ def apply(
         -------
         self : Styler
 
+        See Also
+        --------
+        Styler.where: Apply CSS-styles based on a conditional function elementwise.
+        Styler.applymap: Apply a CSS-styling function elementwise.
+
         Notes
         -----
-        The output of ``func`` should be elements having CSS style as string or,
+        The elements of the output of ``func`` should be CSS styles as strings, in the
+        format 'attribute: value; attribute2: value2; ...' or,
         if nothing is to be applied to that element, an empty string or ``None``.
-        The output shape must match the input, i.e. if
-        ``x`` is the input row, column, or table (depending on ``axis``),
-        then ``func(x).shape == x.shape`` should be ``True``.
 
         This is similar to ``DataFrame.apply``, except that ``axis=None``
         applies the function to the entire DataFrame at once,
@@ -1001,13 +1019,14 @@ def applymap(self, func: Callable, subset=None, **kwargs) -> Styler:
 
         See Also
         --------
-        Styler.where: Updates the HTML representation with a style which is
-            selected in accordance with the return value of a function.
+        Styler.where: Apply CSS-styles based on a conditional function elementwise.
+        Styler.apply: Apply a CSS-styling function column-wise, row-wise, or table-wise.
 
         Notes
         -----
-        The output of ``func`` should be a CSS style as string or, if nothing is to be
-        applied, an empty string or ``None``.
+        The elements of the output of ``func`` should be CSS styles as strings, in the
+        format 'attribute: value; attribute2: value2; ...' or,
+        if nothing is to be applied to that element, an empty string or ``None``.
 
         Examples
         --------
@@ -1030,7 +1049,7 @@ def where(
         **kwargs,
     ) -> Styler:
         """
-        Apply a function elementwise.
+        Apply CSS-styles based on a conditional function elementwise.
 
         Updates the HTML representation with a style which is
         selected in accordance with the return value of a function.
@@ -1055,7 +1074,15 @@ def where(
 
         See Also
         --------
-        Styler.applymap: Updates the HTML representation with the result.
+        Styler.applymap: Apply a CSS-styling function elementwise.
+        Styler.apply: Apply a CSS-styling function column-wise, row-wise, or table-wise.
+
+        Examples
+        --------
+        >>> def cond(v):
+        ...     return v > 1 and v != 4
+        >>> df = pd.DataFrame([[1, 2], [3, 4]])
+        >>> df.style.where(cond, value='color:red;', other='font-size:2em;')
         """
         if other is None:
             other = ""
@@ -1092,10 +1119,9 @@ def set_precision(self, precision: int) -> Styler:
 
     def set_table_attributes(self, attributes: str) -> Styler:
         """
-        Set the table attributes.
+        Set the table attributes added to the ``<table>`` HTML element.
 
-        These are the items that show up in the opening ``<table>`` tag
-        in addition to automatic (by default) id.
+        These are items in addition to automatic (by default) ``id`` attribute.
 
         Parameters
         ----------
@@ -1105,6 +1131,13 @@ def set_table_attributes(self, attributes: str) -> Styler:
         -------
         self : Styler
 
+        See Also
+        --------
+        Styler.set_table_styles: Set the table styles included within the ``<style>``
+            HTML element.
+        Styler.set_td_classes: Set the DataFrame of strings added to the ``class``
+            attribute of ``<td>`` HTML elements.
+
         Examples
         --------
         >>> df = pd.DataFrame(np.random.randn(10, 4))
@@ -1116,9 +1149,9 @@ def set_table_attributes(self, attributes: str) -> Styler:
 
     def export(self) -> List[Tuple[Callable, Tuple, Dict]]:
         """
-        Export the styles to applied to the current Styler.
+        Export the styles applied to the current ``Styler``.
 
-        Can be applied to a second style with ``Styler.use``.
+        Can be applied to a second Styler with ``Styler.use``.
 
         Returns
         -------
@@ -1126,13 +1159,13 @@ def export(self) -> List[Tuple[Callable, Tuple, Dict]]:
 
         See Also
         --------
-        Styler.use: Set the styles on the current Styler.
+        Styler.use: Set the styles on the current ``Styler``.
         """
         return self._todo
 
     def use(self, styles: List[Tuple[Callable, Tuple, Dict]]) -> Styler:
         """
-        Set the styles on the current Styler.
+        Set the styles on the current ``Styler``.
 
         Possibly uses styles from ``Styler.export``.
 
@@ -1147,14 +1180,14 @@ def use(self, styles: List[Tuple[Callable, Tuple, Dict]]) -> Styler:
 
         See Also
         --------
-        Styler.export : Export the styles to applied to the current Styler.
+        Styler.export : Export the styles to applied to the current ``Styler``.
         """
         self._todo.extend(styles)
         return self
 
     def set_uuid(self, uuid: str) -> Styler:
         """
-        Set the uuid for a Styler.
+        Set the uuid applied to ``id`` attributes of HTML elements.
 
         Parameters
         ----------
@@ -1163,13 +1196,19 @@ def set_uuid(self, uuid: str) -> Styler:
         Returns
         -------
         self : Styler
+
+        Notes
+        -----
+        Almost all HTML elements within the table, and including the ``<table>`` element
+        are assigned ``id`` attributes. The format is ``T_uuid_<extra>`` where
+        ``<extra>`` is typically a more specific identifier, such as ``row1_col2``.
         """
         self.uuid = uuid
         return self
 
     def set_caption(self, caption: str) -> Styler:
         """
-        Set the caption on a Styler.
+        Set the text added to a ``<caption>`` HTML element.
 
         Parameters
         ----------
@@ -1189,9 +1228,7 @@ def set_table_styles(
         overwrite: bool = True,
     ) -> Styler:
         """
-        Set the table styles on a Styler.
-
-        These are placed in a ``<style>`` tag before the generated HTML table.
+        Set the table styles included within the ``<style>`` HTML element.
 
         This function can be used to style the entire table, columns, rows or
         specific HTML selectors.
@@ -1232,6 +1269,13 @@ def set_table_styles(
         -------
         self : Styler
 
+        See Also
+        --------
+        Styler.set_td_classes: Set the DataFrame of strings added to the ``class``
+            attribute of ``<td>`` HTML elements.
+        Styler.set_table_attributes: Set the table attributes added to the ``<table>``
+            HTML element.
+
         Examples
         --------
         >>> df = pd.DataFrame(np.random.randn(10, 4),
@@ -1295,7 +1339,7 @@ def set_table_styles(
 
     def set_na_rep(self, na_rep: str) -> Styler:
         """
-        Set the missing data representation on a Styler.
+        Set the missing data representation on a ``Styler``.
 
         .. versionadded:: 1.0.0
 
@@ -1505,7 +1549,8 @@ def css(rgba) -> str:
 
     def set_properties(self, subset=None, **kwargs) -> Styler:
         """
-        Method to set one or more non-data dependent properties or each cell.
+        Set defined CSS-properties to each ``<td>`` HTML element within the given
+        subset.
 
         Parameters
         ----------
@@ -1518,6 +1563,11 @@ def set_properties(self, subset=None, **kwargs) -> Styler:
         -------
         self : Styler
 
+        Notes
+        -----
+        This is a convenience methods which wraps the :meth:`Styler.applymap` calling a
+        function returning the CSS-properties independently of the data.
+
         Examples
         --------
         >>> df = pd.DataFrame(np.random.randn(10, 4))
@@ -1865,8 +1915,8 @@ def pipe(self, func: Callable, *args, **kwargs):
         See Also
         --------
         DataFrame.pipe : Analogous method for DataFrame.
-        Styler.apply : Apply a function row-wise, column-wise, or table-wise to
-            modify the dataframe's styling.
+        Styler.apply : Apply a CSS-styling function column-wise, row-wise, or
+            table-wise.
 
         Notes
         -----
@@ -1915,7 +1965,7 @@ def pipe(self, func: Callable, *args, **kwargs):
 class _Tooltips:
     """
     An extension to ``Styler`` that allows for and manipulates tooltips on hover
-    of table data-cells in the HTML result.
+    of ``<td>`` cells in the HTML result.
 
     Parameters
     ----------
@@ -1924,7 +1974,7 @@ class _Tooltips:
     css_props: list-like, default; see Notes
         List of (attr, value) tuples defining properties of the CSS class.
     tooltips: DataFrame, default empty
-        DataFrame of strings aligned with underlying ``Styler`` data for tooltip
+        DataFrame of strings aligned with underlying Styler data for tooltip
         display.
 
     Notes
@@ -2025,7 +2075,7 @@ def _translate(self, styler_data: FrameOrSeriesUnion, uuid: str, d: Dict):
         """
         Mutate the render dictionary to allow for tooltips:
 
-        - Add `<span>` HTML element to each data cells `display_value`. Ignores
+        - Add ``<span>`` HTML element to each data cells ``display_value``. Ignores
           headers.
         - Add table level CSS styles to control pseudo classes.
 

From e7f9a54f521dd03054dc259021788761903ebe6c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 19 Mar 2021 18:18:39 -0700
Subject: [PATCH 11/14] CLN: sqlalchemy deprecation, annotations, share shape
 properties (#40491)

---
 pandas/_libs/intervaltree.pxi.in   |  2 ++
 pandas/_libs/lib.pyx               |  5 +++--
 pandas/_libs/ops.pyx               |  8 ++++----
 pandas/_libs/reshape.pyx           |  7 +++++--
 pandas/_libs/tslibs/nattype.pyx    |  2 +-
 pandas/_libs/tslibs/vectorized.pyx |  8 ++++----
 pandas/_libs/writers.pyx           |  2 +-
 pandas/core/arrays/_mixins.py      | 10 +++++++---
 pandas/core/indexes/base.py        |  7 +++----
 pandas/core/indexes/multi.py       |  9 ---------
 pandas/core/indexes/range.py       |  6 +++---
 pandas/core/util/hashing.py        |  9 +++++----
 pandas/io/json/_normalize.py       |  2 +-
 pandas/tests/io/test_sql.py        | 10 ++++++++--
 14 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in
index a8728050f8071..1af5b23e3393f 100644
--- a/pandas/_libs/intervaltree.pxi.in
+++ b/pandas/_libs/intervaltree.pxi.in
@@ -238,6 +238,8 @@ NODE_CLASSES = {}
 {{for dtype, dtype_title, closed, closed_title, cmp_left, cmp_right,
       cmp_left_converse, cmp_right_converse, fused_prefix in nodes}}
 
+
+@cython.internal
 cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode:
     """Non-terminal node for an IntervalTree
 
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index fc3e1ecfb55c1..9ef3c859633c2 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1140,6 +1140,7 @@ except AttributeError:
     pass
 
 
+@cython.internal
 cdef class Seen:
     """
     Class for keeping track of the types of elements
@@ -2600,7 +2601,7 @@ def tuples_to_object_array(ndarray[object] tuples):
     return result
 
 
-def to_object_array_tuples(rows: object):
+def to_object_array_tuples(rows: object) -> np.ndarray:
     """
     Convert a list of tuples into an object array. Any subclass of
     tuple in `rows` will be casted to tuple.
@@ -2612,7 +2613,7 @@ def to_object_array_tuples(rows: object):
 
     Returns
     -------
-    numpy array of the object dtype.
+    np.ndarray[object, ndim=2]
     """
     cdef:
         Py_ssize_t i, j, n, k, tmp
diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx
index 1e51a578c44ea..ecb7041fb2c5a 100644
--- a/pandas/_libs/ops.pyx
+++ b/pandas/_libs/ops.pyx
@@ -32,7 +32,7 @@ from pandas._libs.util cimport (
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def scalar_compare(object[:] values, object val, object op):
+def scalar_compare(object[:] values, object val, object op) -> ndarray:
     """
     Compare each element of `values` array with the scalar `val`, with
     the comparison operation described by `op`.
@@ -114,7 +114,7 @@ def scalar_compare(object[:] values, object val, object op):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def vec_compare(ndarray[object] left, ndarray[object] right, object op):
+def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarray:
     """
     Compare the elements of `left` with the elements of `right` pointwise,
     with the comparison operation described by `op`.
@@ -180,7 +180,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def scalar_binop(object[:] values, object val, object op):
+def scalar_binop(object[:] values, object val, object op) -> ndarray:
     """
     Apply the given binary operator `op` between each element of the array
     `values` and the scalar `val`.
@@ -217,7 +217,7 @@ def scalar_binop(object[:] values, object val, object op):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def vec_binop(object[:] left, object[:] right, object op):
+def vec_binop(object[:] left, object[:] right, object op) -> ndarray:
     """
     Apply the given binary operator `op` pointwise to the elements of
     arrays `left` and `right`.
diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx
index 75dbb4b74aabd..05b255c40f4b2 100644
--- a/pandas/_libs/reshape.pyx
+++ b/pandas/_libs/reshape.pyx
@@ -41,7 +41,7 @@ ctypedef fused reshape_t:
 @cython.boundscheck(False)
 def unstack(reshape_t[:, :] values, const uint8_t[:] mask,
             Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width,
-            reshape_t[:, :] new_values, uint8_t[:, :] new_mask):
+            reshape_t[:, :] new_values, uint8_t[:, :] new_mask) -> None:
     """
     Transform long values to wide new_values.
 
@@ -111,7 +111,10 @@ def explode(ndarray[object] values):
 
     Returns
     -------
-    tuple(values, counts)
+    ndarray[object]
+        result
+    ndarray[int64_t]
+        counts
     """
     cdef:
         Py_ssize_t i, j, count, n
diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
index 2879528b2c501..d86d3261d404e 100644
--- a/pandas/_libs/tslibs/nattype.pyx
+++ b/pandas/_libs/tslibs/nattype.pyx
@@ -286,7 +286,7 @@ cdef class _NaT(datetime):
         # This allows Timestamp(ts.isoformat()) to always correctly roundtrip.
         return "NaT"
 
-    def __hash__(self):
+    def __hash__(self) -> int:
         return NPY_NAT
 
     @property
diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx
index 30d9f5e64b282..02bdae3a8dbac 100644
--- a/pandas/_libs/tslibs/vectorized.pyx
+++ b/pandas/_libs/tslibs/vectorized.pyx
@@ -90,7 +90,7 @@ def ints_to_pydatetime(
     object freq=None,
     bint fold=False,
     str box="datetime"
-):
+) -> np.ndarray:
     """
     Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp.
 
@@ -116,7 +116,7 @@ def ints_to_pydatetime(
 
     Returns
     -------
-    ndarray of dtype specified by box
+    ndarray[object] of type specified by box
     """
     cdef:
         Py_ssize_t i, n = len(arr)
@@ -223,7 +223,7 @@ cdef inline int _reso_stamp(npy_datetimestruct *dts):
     return RESO_DAY
 
 
-def get_resolution(const int64_t[:] stamps, tzinfo tz=None):
+def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution:
     cdef:
         Py_ssize_t i, n = len(stamps)
         npy_datetimestruct dts
@@ -332,7 +332,7 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None):
+def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool:
     """
     Check if all of the given (nanosecond) timestamps are normalized to
     midnight, i.e. hour == minute == second == 0.  If the optional timezone
diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx
index 6adda1fe92044..9fbeb67aa35e9 100644
--- a/pandas/_libs/writers.pyx
+++ b/pandas/_libs/writers.pyx
@@ -77,7 +77,7 @@ def write_csv_rows(
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def convert_json_to_lines(arr: object) -> str:
+def convert_json_to_lines(arr: str) -> str:
     """
     replace comma separated json with line feeds, paying special attention
     to quotes & brackets
diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
index 848e467afb7b6..678e532f05772 100644
--- a/pandas/core/arrays/_mixins.py
+++ b/pandas/core/arrays/_mixins.py
@@ -8,12 +8,16 @@
     Type,
     TypeVar,
     Union,
+    cast,
 )
 
 import numpy as np
 
 from pandas._libs import lib
-from pandas._typing import Shape
+from pandas._typing import (
+    F,
+    Shape,
+)
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import (
@@ -41,7 +45,7 @@
 )
 
 
-def ravel_compat(meth):
+def ravel_compat(meth: F) -> F:
     """
     Decorator to ravel a 2D array before passing it to a cython operation,
     then reshape the result to our own shape.
@@ -58,7 +62,7 @@ def method(self, *args, **kwargs):
         order = "F" if flags.f_contiguous else "C"
         return result.reshape(self.shape, order=order)
 
-    return method
+    return cast(F, method)
 
 
 class NDArrayBackedExtensionArray(ExtensionArray):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index a5c0a5c6694e5..e8b83af16254a 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -6141,15 +6141,14 @@ def _maybe_disable_logical_methods(self, opname: str_t):
             # This call will raise
             make_invalid_op(opname)(self)
 
+    @final
     @property
     def shape(self) -> Shape:
         """
         Return a tuple of the shape of the underlying data.
         """
-        # not using "(len(self), )" to return "correct" shape if the values
-        # consists of a >1 D array (see GH-27775)
-        # overridden in MultiIndex.shape to avoid materializing the values
-        return self._values.shape
+        # See GH#27775, GH#27384 for history/reasoning in how this is defined.
+        return (len(self),)
 
 
 def ensure_index_from_sequences(sequences, names=None):
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 31aa5e301d17c..283b60da14844 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -752,15 +752,6 @@ def dtypes(self) -> Series:
             }
         )
 
-    @property
-    def shape(self) -> Shape:
-        """
-        Return a tuple of the shape of the underlying data.
-        """
-        # overriding the base Index.shape definition to avoid materializing
-        # the values (GH-27384, GH-27775)
-        return (len(self),)
-
     def __len__(self) -> int:
         return len(self.codes[0])
 
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 456d87766bdb7..f37faa4ab844b 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -450,7 +450,7 @@ def take(
                 **kwargs,
             )
 
-    def tolist(self):
+    def tolist(self) -> list[int]:
         return list(self._range)
 
     @doc(Int64Index.__iter__)
@@ -494,13 +494,13 @@ def _minmax(self, meth: str):
 
         return self.start + self.step * no_steps
 
-    def min(self, axis=None, skipna=True, *args, **kwargs) -> int:
+    def min(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
         """The minimum value of the RangeIndex"""
         nv.validate_minmax_axis(axis)
         nv.validate_min(args, kwargs)
         return self._minmax("min")
 
-    def max(self, axis=None, skipna=True, *args, **kwargs) -> int:
+    def max(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
         """The maximum value of the RangeIndex"""
         nv.validate_minmax_axis(axis)
         nv.validate_max(args, kwargs)
diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py
index 87be5c0997072..375901bc3fb58 100644
--- a/pandas/core/util/hashing.py
+++ b/pandas/core/util/hashing.py
@@ -56,7 +56,7 @@ def combine_hash_arrays(arrays: Iterator[np.ndarray], num_items: int) -> np.ndar
 
     Returns
     -------
-    np.ndarray[int64]
+    np.ndarray[uint64]
 
     Should be the same as CPython's tupleobject.c
     """
@@ -184,7 +184,7 @@ def hash_tuples(
 
     Returns
     -------
-    ndarray of hashed values array
+    ndarray[np.uint64] of hashed values
     """
     if not is_list_like(vals):
         raise TypeError("must be convertible to a list-of-tuples")
@@ -227,7 +227,7 @@ def _hash_categorical(cat: Categorical, encoding: str, hash_key: str) -> np.ndar
 
     Returns
     -------
-    ndarray of hashed values array, same size as len(c)
+    ndarray[np.uint64] of hashed values, same size as len(c)
     """
     # Convert ExtensionArrays to ndarrays
     values = np.asarray(cat.categories._values)
@@ -274,7 +274,8 @@ def hash_array(
 
     Returns
     -------
-    1d uint64 numpy array of hash values, same length as the vals
+    ndarray[np.uint64, ndim=1]
+        Hashed values, same length as the vals.
     """
     if not hasattr(vals, "dtype"):
         raise TypeError("must pass a ndarray-like")
diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
index 75f133745e3a2..39d12c5b05c2f 100644
--- a/pandas/io/json/_normalize.py
+++ b/pandas/io/json/_normalize.py
@@ -27,7 +27,7 @@
 from pandas import DataFrame
 
 
-def convert_to_line_delimits(s):
+def convert_to_line_delimits(s: str) -> str:
     """
     Helper function that converts JSON lists to line delimited JSON.
     """
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 7d923e57834ea..a22f0cd8dff83 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -1991,8 +1991,14 @@ def bar(connection, data):
         def main(connectable):
             with connectable.connect() as conn:
                 with conn.begin():
-                    foo_data = conn.run_callable(foo)
-                    conn.run_callable(bar, foo_data)
+                    if _gt14():
+                        # https://github.com/sqlalchemy/sqlalchemy/commit/
+                        #  00b5c10846e800304caa86549ab9da373b42fa5d#r48323973
+                        foo_data = foo(conn)
+                        bar(conn, foo_data)
+                    else:
+                        foo_data = conn.run_callable(foo)
+                        conn.run_callable(bar, foo_data)
 
         DataFrame({"test_foo_data": [0, 1, 2]}).to_sql("test_foo_data", self.conn)
         main(self.conn)

From d0ec737520ecd597c1f64d0d90e664d2c3d4152e Mon Sep 17 00:00:00 2001
From: Julia Signell <jsignell@gmail.com>
Date: Fri, 19 Mar 2021 21:24:05 -0400
Subject: [PATCH 12/14] Clean up docs warnings: (#40523)

---
 pandas/core/arrays/categorical.py | 2 +-
 pandas/core/arrays/interval.py    | 2 +-
 pandas/core/generic.py            | 2 +-
 pandas/core/indexes/category.py   | 2 +-
 pandas/core/indexes/multi.py      | 2 +-
 pandas/core/shared_docs.py        | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 864bd0684d445..53929ad9eadc8 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -305,7 +305,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi
     Notes
     -----
     See the `user guide
-    <https://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html>`_
+    <https://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html>`__
     for more.
 
     Examples
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index ea561dca9a090..1cc0465121335 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -154,7 +154,7 @@
 Notes
 -----
 See the `user guide
-<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`_
+<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`__
 for more.
 
 %(examples)s\
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 7138995d1b018..c20b2840a40ab 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7958,7 +7958,7 @@ def resample(
         Notes
         -----
         See the `user guide
-        <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#resampling>`_
+        <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#resampling>`__
         for more.
 
         To learn more about the offset strings, please see `this link
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index f372db5287604..c132712020cac 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -150,7 +150,7 @@ class CategoricalIndex(NDArrayBackedExtensionIndex, accessor.PandasDelegate):
     Notes
     -----
     See the `user guide
-    <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#categoricalindex>`_
+    <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#categoricalindex>`__
     for more.
 
     Examples
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 283b60da14844..244fcb9f49ec6 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -267,7 +267,7 @@ class MultiIndex(Index):
     Notes
     -----
     See the `user guide
-    <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`_
+    <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`__
     for more.
 
     Examples
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
index 49eb87a3bc8ba..66d84ef85880c 100644
--- a/pandas/core/shared_docs.py
+++ b/pandas/core/shared_docs.py
@@ -143,7 +143,7 @@
 Notes
 -----
 See the `user guide
-<https://pandas.pydata.org/pandas-docs/stable/groupby.html>`_ for more.
+<https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more.
 """
 
 _shared_docs[

From 64e87b377f28f6bee9a74df4379d8aea54ea3cc9 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 19 Mar 2021 18:25:12 -0700
Subject: [PATCH 13/14] BUG: better exception on invalid slicing on
 CategoricalIndex (#40364)

---
 pandas/core/indexes/category.py           | 7 -------
 pandas/tests/indexing/test_categorical.py | 6 +++++-
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index c132712020cac..c9c39fde1da46 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -542,13 +542,6 @@ def _convert_list_indexer(self, keyarr):
 
         return self.get_indexer_for(keyarr)
 
-    @doc(Index._maybe_cast_slice_bound)
-    def _maybe_cast_slice_bound(self, label, side: str, kind):
-        if kind == "loc":
-            return label
-
-        return super()._maybe_cast_slice_bound(label, side, kind)
-
     # --------------------------------------------------------------------
 
     def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
index f104587ebbded..11943d353e8c8 100644
--- a/pandas/tests/indexing/test_categorical.py
+++ b/pandas/tests/indexing/test_categorical.py
@@ -469,7 +469,11 @@ def test_ix_categorical_index_non_unique(self):
 
     def test_loc_slice(self):
         # GH9748
-        with pytest.raises(KeyError, match="1"):
+        msg = (
+            "cannot do slice indexing on CategoricalIndex with these "
+            r"indexers \[1\] of type int"
+        )
+        with pytest.raises(TypeError, match=msg):
             self.df.loc[1:5]
 
         result = self.df.loc["b":"c"]

From 38640d112c89225a74b1cfcdc455d1c69223805a Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 19 Mar 2021 19:34:22 -0700
Subject: [PATCH 14/14] CLN: intp_t instead of int64_t for indexers in libs
 funcs (#40475)

---
 pandas/_libs/algos.pyx                   | 15 +++++++++------
 pandas/_libs/algos_take_helper.pxi.in    |  6 +++---
 pandas/_libs/groupby.pyx                 |  2 ++
 pandas/_libs/join.pyx                    | 18 ++++++++++++------
 pandas/core/array_algos/take.py          |  9 +++++----
 pandas/core/arrays/categorical.py        |  5 +++--
 pandas/core/sorting.py                   |  1 +
 pandas/tests/groupby/test_categorical.py |  6 +++---
 pandas/tests/test_algos.py               |  8 ++++----
 9 files changed, 42 insertions(+), 28 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 5783d3c2353aa..047eb848b7540 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -199,8 +199,10 @@ def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
 
     Returns
     -------
-    tuple
-        1-d indexer ordered by groups, group counts.
+    ndarray[intp_t, ndim=1]
+        Indexer
+    ndarray[int64_t, ndim=1]
+        Group Counts
 
     Notes
     -----
@@ -208,11 +210,12 @@ def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
     """
     cdef:
         Py_ssize_t i, loc, label, n
-        ndarray[int64_t] counts, where, result
+        ndarray[int64_t] counts, where
+        ndarray[intp_t] indexer
 
     counts = np.zeros(ngroups + 1, dtype=np.int64)
     n = len(index)
-    result = np.zeros(n, dtype=np.int64)
+    indexer = np.zeros(n, dtype=np.intp)
     where = np.zeros(ngroups + 1, dtype=np.int64)
 
     with nogil:
@@ -228,10 +231,10 @@ def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
         # this is our indexer
         for i in range(n):
             label = index[i] + 1
-            result[where[label]] = i
+            indexer[where[label]] = i
             where[label] += 1
 
-    return result, counts
+    return indexer, counts
 
 
 @cython.boundscheck(False)
diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in
index 4eefd9d1f7267..cdf4ef3b119d2 100644
--- a/pandas/_libs/algos_take_helper.pxi.in
+++ b/pandas/_libs/algos_take_helper.pxi.in
@@ -66,7 +66,7 @@ def take_1d_{{name}}_{{dest}}(const {{c_type_in}}[:] values,
 {{else}}
 def take_1d_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=1] values,
 {{endif}}
-                              const int64_t[:] indexer,
+                              const intp_t[:] indexer,
                               {{c_type_out}}[:] out,
                               fill_value=np.nan):
 
@@ -102,7 +102,7 @@ def take_2d_axis0_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
 {{else}}
 def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
 {{endif}}
-                                    ndarray[int64_t] indexer,
+                                    ndarray[intp_t] indexer,
                                     {{c_type_out}}[:, :] out,
                                     fill_value=np.nan):
     cdef:
@@ -156,7 +156,7 @@ def take_2d_axis1_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
 {{else}}
 def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
 {{endif}}
-                                    ndarray[int64_t] indexer,
+                                    ndarray[intp_t] indexer,
                                     {{c_type_out}}[:, :] out,
                                     fill_value=np.nan):
 
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 1bfb66cbf21ac..89020f2078584 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -19,6 +19,7 @@ from numpy cimport (
     int16_t,
     int32_t,
     int64_t,
+    intp_t,
     ndarray,
     uint8_t,
     uint16_t,
@@ -141,6 +142,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
         Py_ssize_t i, j, N, K, ngroups, size
         ndarray[int64_t] _counts
         ndarray[float64_t, ndim=2] data
+        ndarray[intp_t] indexer
         float64_t* ptr
 
     assert min_count == -1, "'min_count' only used in add and prod"
diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx
index 511b373bc7e1f..c2947de943e1a 100644
--- a/pandas/_libs/join.pyx
+++ b/pandas/_libs/join.pyx
@@ -33,7 +33,8 @@ def inner_join(const intp_t[:] left, const intp_t[:] right,
                Py_ssize_t max_groups):
     cdef:
         Py_ssize_t i, j, k, count = 0
-        ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
+        ndarray[intp_t] left_sorter, right_sorter
+        ndarray[int64_t] left_count, right_count
         ndarray[int64_t] left_indexer, right_indexer
         int64_t lc, rc
         Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
@@ -84,8 +85,8 @@ def left_outer_join(const intp_t[:] left, const intp_t[:] right,
                     Py_ssize_t max_groups, bint sort=True):
     cdef:
         Py_ssize_t i, j, k, count = 0
-        ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
-        ndarray rev
+        ndarray[int64_t] left_count, right_count
+        ndarray[intp_t] rev, left_sorter, right_sorter
         ndarray[int64_t] left_indexer, right_indexer
         int64_t lc, rc
         Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
@@ -157,7 +158,8 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
                     Py_ssize_t max_groups):
     cdef:
         Py_ssize_t i, j, k, count = 0
-        ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
+        ndarray[intp_t] left_sorter, right_sorter
+        ndarray[int64_t] left_count, right_count
         ndarray[int64_t] left_indexer, right_indexer
         int64_t lc, rc
         int64_t left_pos = 0, right_pos = 0
@@ -215,12 +217,16 @@ def full_outer_join(const intp_t[:] left, const intp_t[:] right,
             _get_result_indexer(right_sorter, right_indexer))
 
 
-cdef _get_result_indexer(ndarray[int64_t] sorter, ndarray[int64_t] indexer):
+cdef ndarray[int64_t] _get_result_indexer(
+    ndarray[intp_t] sorter, ndarray[int64_t] indexer
+):
     if len(sorter) > 0:
         # cython-only equivalent to
         #  `res = algos.take_nd(sorter, indexer, fill_value=-1)`
         res = np.empty(len(indexer), dtype=np.int64)
-        take_1d_int64_int64(sorter, indexer, res, -1)
+        take_1d_int64_int64(ensure_int64(sorter), ensure_platform_int(indexer), res, -1)
+        # FIXME: sorter is intp_t, not int64_t, opposite for indexer;
+        #  will this break on 32bit builds?
     else:
         # length-0 case
         res = np.empty(len(indexer), dtype=np.int64)
diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py
index c1abd8bbf39d0..ba1b2a0f0e76e 100644
--- a/pandas/core/array_algos/take.py
+++ b/pandas/core/array_algos/take.py
@@ -117,10 +117,10 @@ def _take_nd_ndarray(
 ) -> np.ndarray:
 
     if indexer is None:
-        indexer = np.arange(arr.shape[axis], dtype=np.int64)
+        indexer = np.arange(arr.shape[axis], dtype=np.intp)
         dtype, fill_value = arr.dtype, arr.dtype.type()
     else:
-        indexer = ensure_int64(indexer, copy=False)
+        indexer = ensure_platform_int(indexer)
     indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
         arr, indexer, out, fill_value, allow_fill
     )
@@ -317,7 +317,7 @@ def _get_take_nd_function(
     if func is None:
 
         def func(arr, indexer, out, fill_value=np.nan):
-            indexer = ensure_int64(indexer)
+            indexer = ensure_platform_int(indexer)
             _take_nd_object(
                 arr, indexer, out, axis=axis, fill_value=fill_value, mask_info=mask_info
             )
@@ -468,7 +468,7 @@ def wrapper(
 
 def _take_nd_object(
     arr: np.ndarray,
-    indexer: np.ndarray,
+    indexer: np.ndarray,  # np.ndarray[np.intp]
     out: np.ndarray,
     axis: int,
     fill_value,
@@ -544,4 +544,5 @@ def _take_preprocess_indexer_and_fill_value(
                 # to crash when trying to cast it to dtype)
                 dtype, fill_value = arr.dtype, arr.dtype.type()
 
+    indexer = ensure_platform_int(indexer)
     return indexer, dtype, fill_value, mask_info
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 53929ad9eadc8..769ae52744c74 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1961,7 +1961,8 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
 
         Returns
         -------
-        dict of categories -> indexers
+        Dict[Hashable, np.ndarray[np.intp]]
+            dict of categories -> indexers
 
         Examples
         --------
@@ -1979,7 +1980,7 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
         """
         categories = self.categories
         r, counts = libalgos.groupsort_indexer(
-            self.codes.astype("int64"), categories.size
+            self.codes.astype("int64", copy=False), categories.size
         )
         counts = counts.cumsum()
         _result = (r[start:end] for start, end in zip(counts, counts[1:]))
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 10c13327c79d3..3aa4d26f7dc8f 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -606,6 +606,7 @@ def get_group_index_sorter(
     )
     if do_groupsort:
         sorter, _ = algos.groupsort_indexer(ensure_int64(group_index), ngroups)
+        # sorter _should_ already be intp, but mypy is not yet able to verify
     else:
         sorter = group_index.argsort(kind="mergesort")
     return ensure_platform_int(sorter)
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index f685680515a8f..da438826a939a 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1717,9 +1717,9 @@ def test_groupby_categorical_indices_unused_categories():
     grouped = df.groupby("key", sort=False)
     result = grouped.indices
     expected = {
-        "b": np.array([0, 1], dtype="int64"),
-        "a": np.array([2], dtype="int64"),
-        "c": np.array([], dtype="int64"),
+        "b": np.array([0, 1], dtype="intp"),
+        "a": np.array([2], dtype="intp"),
+        "c": np.array([], dtype="intp"),
     }
     assert result.keys() == expected.keys()
     for key in result.keys():
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 876df69ae7f63..c8df18ddaeebe 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -2123,19 +2123,19 @@ def test_groupsort_indexer():
 
     # need to use a stable sort
     # np.argsort returns int, groupsort_indexer
-    # always returns int64
+    # always returns intp
     expected = np.argsort(a, kind="mergesort")
-    expected = expected.astype(np.int64)
+    expected = expected.astype(np.intp)
 
     tm.assert_numpy_array_equal(result, expected)
 
     # compare with lexsort
     # np.lexsort returns int, groupsort_indexer
-    # always returns int64
+    # always returns intp
     key = a * 1000 + b
     result = libalgos.groupsort_indexer(key, 1000000)[0]
     expected = np.lexsort((b, a))
-    expected = expected.astype(np.int64)
+    expected = expected.astype(np.intp)
 
     tm.assert_numpy_array_equal(result, expected)