From 9ab6154a38d6f92e1a7769450bacfa917baa54c4 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 17 Mar 2021 16:01:45 -0700
Subject: [PATCH 1/3] Annotate, sqlalchemy compat, combine shape properties

---
 pandas/core/arrays/_mixins.py | 10 +++++++---
 pandas/core/indexes/base.py   |  7 +++----
 pandas/core/indexes/multi.py  |  9 ---------
 pandas/tests/io/test_sql.py   | 10 ++++++++--
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py
index 848e467afb7b6..678e532f05772 100644
--- a/pandas/core/arrays/_mixins.py
+++ b/pandas/core/arrays/_mixins.py
@@ -8,12 +8,16 @@
     Type,
     TypeVar,
     Union,
+    cast,
 )
 
 import numpy as np
 
 from pandas._libs import lib
-from pandas._typing import Shape
+from pandas._typing import (
+    F,
+    Shape,
+)
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import (
@@ -41,7 +45,7 @@
 )
 
 
-def ravel_compat(meth):
+def ravel_compat(meth: F) -> F:
     """
     Decorator to ravel a 2D array before passing it to a cython operation,
     then reshape the result to our own shape.
@@ -58,7 +62,7 @@ def method(self, *args, **kwargs):
         order = "F" if flags.f_contiguous else "C"
         return result.reshape(self.shape, order=order)
 
-    return method
+    return cast(F, method)
 
 
 class NDArrayBackedExtensionArray(ExtensionArray):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 3a468758ab3fd..0590b2d585401 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -6130,15 +6130,14 @@ def _maybe_disable_logical_methods(self, opname: str_t):
             # This call will raise
             make_invalid_op(opname)(self)
 
+    @final
     @property
     def shape(self) -> Shape:
         """
         Return a tuple of the shape of the underlying data.
         """
-        # not using "(len(self), )" to return "correct" shape if the values
-        # consists of a >1 D array (see GH-27775)
-        # overridden in MultiIndex.shape to avoid materializing the values
-        return self._values.shape
+        # See GH#27775, GH#27384 for history/reasoning in how this is defined.
+        return (len(self),)
 
 
 def ensure_index_from_sequences(sequences, names=None):
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 97492f35232e3..587e2ab232efb 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -752,15 +752,6 @@ def dtypes(self) -> Series:
             }
         )
 
-    @property
-    def shape(self) -> Shape:
-        """
-        Return a tuple of the shape of the underlying data.
-        """
-        # overriding the base Index.shape definition to avoid materializing
-        # the values (GH-27384, GH-27775)
-        return (len(self),)
-
     def __len__(self) -> int:
         return len(self.codes[0])
 
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 7d923e57834ea..a22f0cd8dff83 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -1991,8 +1991,14 @@ def bar(connection, data):
         def main(connectable):
             with connectable.connect() as conn:
                 with conn.begin():
-                    foo_data = conn.run_callable(foo)
-                    conn.run_callable(bar, foo_data)
+                    if _gt14():
+                        # https://github.com/sqlalchemy/sqlalchemy/commit/
+                        #  00b5c10846e800304caa86549ab9da373b42fa5d#r48323973
+                        foo_data = foo(conn)
+                        bar(conn, foo_data)
+                    else:
+                        foo_data = conn.run_callable(foo)
+                        conn.run_callable(bar, foo_data)
 
         DataFrame({"test_foo_data": [0, 1, 2]}).to_sql("test_foo_data", self.conn)
         main(self.conn)

From 53738b99dca945917db0defa6a6e6f236cda6884 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 17 Mar 2021 17:38:44 -0700
Subject: [PATCH 2/3] annotations, docstrings

---
 pandas/_libs/intervaltree.pxi.in   | 2 ++
 pandas/_libs/lib.pyx               | 5 +++--
 pandas/_libs/ops.pyx               | 8 ++++----
 pandas/_libs/reshape.pyx           | 7 +++++--
 pandas/_libs/tslibs/nattype.pyx    | 2 +-
 pandas/_libs/tslibs/vectorized.pyx | 8 ++++----
 pandas/_libs/writers.pyx           | 2 +-
 pandas/core/indexes/range.py       | 6 +++---
 pandas/core/util/hashing.py        | 9 +++++----
 pandas/io/json/_normalize.py       | 2 +-
 10 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in
index a8728050f8071..1af5b23e3393f 100644
--- a/pandas/_libs/intervaltree.pxi.in
+++ b/pandas/_libs/intervaltree.pxi.in
@@ -238,6 +238,8 @@ NODE_CLASSES = {}
 {{for dtype, dtype_title, closed, closed_title, cmp_left, cmp_right,
       cmp_left_converse, cmp_right_converse, fused_prefix in nodes}}
 
+
+@cython.internal
 cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode:
     """Non-terminal node for an IntervalTree
 
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 1ff481553e413..87d155953f514 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1120,6 +1120,7 @@ except AttributeError:
     pass
 
 
+@cython.internal
 cdef class Seen:
     """
     Class for keeping track of the types of elements
@@ -2580,7 +2581,7 @@ def tuples_to_object_array(ndarray[object] tuples):
     return result
 
 
-def to_object_array_tuples(rows: object):
+def to_object_array_tuples(rows: object) -> np.ndarray:
     """
     Convert a list of tuples into an object array. Any subclass of
     tuple in `rows` will be casted to tuple.
@@ -2592,7 +2593,7 @@ def to_object_array_tuples(rows: object):
 
     Returns
     -------
-    numpy array of the object dtype.
+    np.ndarray[object, ndim=2]
     """
     cdef:
         Py_ssize_t i, j, n, k, tmp
diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx
index 1e51a578c44ea..ecb7041fb2c5a 100644
--- a/pandas/_libs/ops.pyx
+++ b/pandas/_libs/ops.pyx
@@ -32,7 +32,7 @@ from pandas._libs.util cimport (
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def scalar_compare(object[:] values, object val, object op):
+def scalar_compare(object[:] values, object val, object op) -> ndarray:
     """
     Compare each element of `values` array with the scalar `val`, with
     the comparison operation described by `op`.
@@ -114,7 +114,7 @@ def scalar_compare(object[:] values, object val, object op):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def vec_compare(ndarray[object] left, ndarray[object] right, object op):
+def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarray:
     """
     Compare the elements of `left` with the elements of `right` pointwise,
     with the comparison operation described by `op`.
@@ -180,7 +180,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def scalar_binop(object[:] values, object val, object op):
+def scalar_binop(object[:] values, object val, object op) -> ndarray:
     """
     Apply the given binary operator `op` between each element of the array
     `values` and the scalar `val`.
@@ -217,7 +217,7 @@ def scalar_binop(object[:] values, object val, object op):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def vec_binop(object[:] left, object[:] right, object op):
+def vec_binop(object[:] left, object[:] right, object op) -> ndarray:
     """
     Apply the given binary operator `op` pointwise to the elements of
     arrays `left` and `right`.
diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx
index 75dbb4b74aabd..05b255c40f4b2 100644
--- a/pandas/_libs/reshape.pyx
+++ b/pandas/_libs/reshape.pyx
@@ -41,7 +41,7 @@ ctypedef fused reshape_t:
 @cython.boundscheck(False)
 def unstack(reshape_t[:, :] values, const uint8_t[:] mask,
             Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width,
-            reshape_t[:, :] new_values, uint8_t[:, :] new_mask):
+            reshape_t[:, :] new_values, uint8_t[:, :] new_mask) -> None:
     """
     Transform long values to wide new_values.
 
@@ -111,7 +111,10 @@ def explode(ndarray[object] values):
 
     Returns
     -------
-    tuple(values, counts)
+    ndarray[object]
+        result
+    ndarray[int64_t]
+        counts
     """
     cdef:
         Py_ssize_t i, j, count, n
diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
index 2879528b2c501..d86d3261d404e 100644
--- a/pandas/_libs/tslibs/nattype.pyx
+++ b/pandas/_libs/tslibs/nattype.pyx
@@ -286,7 +286,7 @@ cdef class _NaT(datetime):
         # This allows Timestamp(ts.isoformat()) to always correctly roundtrip.
         return "NaT"
 
-    def __hash__(self):
+    def __hash__(self) -> int:
         return NPY_NAT
 
     @property
diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx
index 30d9f5e64b282..02bdae3a8dbac 100644
--- a/pandas/_libs/tslibs/vectorized.pyx
+++ b/pandas/_libs/tslibs/vectorized.pyx
@@ -90,7 +90,7 @@ def ints_to_pydatetime(
     object freq=None,
     bint fold=False,
     str box="datetime"
-):
+) -> np.ndarray:
     """
     Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp.
 
@@ -116,7 +116,7 @@ def ints_to_pydatetime(
 
     Returns
     -------
-    ndarray of dtype specified by box
+    ndarray[object] of type specified by box
     """
     cdef:
         Py_ssize_t i, n = len(arr)
@@ -223,7 +223,7 @@ cdef inline int _reso_stamp(npy_datetimestruct *dts):
     return RESO_DAY
 
 
-def get_resolution(const int64_t[:] stamps, tzinfo tz=None):
+def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution:
     cdef:
         Py_ssize_t i, n = len(stamps)
         npy_datetimestruct dts
@@ -332,7 +332,7 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None):
+def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool:
     """
     Check if all of the given (nanosecond) timestamps are normalized to
     midnight, i.e. hour == minute == second == 0.  If the optional timezone
diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx
index 6adda1fe92044..9fbeb67aa35e9 100644
--- a/pandas/_libs/writers.pyx
+++ b/pandas/_libs/writers.pyx
@@ -77,7 +77,7 @@ def write_csv_rows(
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def convert_json_to_lines(arr: object) -> str:
+def convert_json_to_lines(arr: str) -> str:
     """
     replace comma separated json with line feeds, paying special attention
     to quotes & brackets
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 456d87766bdb7..f37faa4ab844b 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -450,7 +450,7 @@ def take(
                 **kwargs,
             )
 
-    def tolist(self):
+    def tolist(self) -> list[int]:
         return list(self._range)
 
     @doc(Int64Index.__iter__)
@@ -494,13 +494,13 @@ def _minmax(self, meth: str):
 
         return self.start + self.step * no_steps
 
-    def min(self, axis=None, skipna=True, *args, **kwargs) -> int:
+    def min(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
         """The minimum value of the RangeIndex"""
         nv.validate_minmax_axis(axis)
         nv.validate_min(args, kwargs)
         return self._minmax("min")
 
-    def max(self, axis=None, skipna=True, *args, **kwargs) -> int:
+    def max(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
         """The maximum value of the RangeIndex"""
         nv.validate_minmax_axis(axis)
         nv.validate_max(args, kwargs)
diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py
index 87be5c0997072..a175605347af1 100644
--- a/pandas/core/util/hashing.py
+++ b/pandas/core/util/hashing.py
@@ -56,7 +56,7 @@ def combine_hash_arrays(arrays: Iterator[np.ndarray], num_items: int) -> np.ndar
 
     Returns
     -------
-    np.ndarray[int64]
+    np.ndarray[uint64]
 
     Should be the same as CPython's tupleobject.c
     """
@@ -184,7 +184,7 @@ def hash_tuples(
 
     Returns
     -------
-    ndarray of hashed values array
+    ndarray[np.uint64] of hashed values
     """
     if not is_list_like(vals):
         raise TypeError("must be convertible to a list-of-tuples")
@@ -227,7 +227,7 @@ def _hash_categorical(cat: Categorical, encoding: str, hash_key: str) -> np.ndar
 
     Returns
     -------
-    ndarray of hashed values array, same size as len(c)
+    ndarray[np.uint64] of hashed values, same size as len(c)
     """
     # Convert ExtensionArrays to ndarrays
     values = np.asarray(cat.categories._values)
@@ -274,7 +274,8 @@ def hash_array(
 
     Returns
     -------
-    1d uint64 numpy array of hash values, same length as the vals
+    ndarray[np.uint64, ndim=1]
+        hash values, same length as the vals
     """
     if not hasattr(vals, "dtype"):
         raise TypeError("must pass a ndarray-like")
diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
index 75f133745e3a2..39d12c5b05c2f 100644
--- a/pandas/io/json/_normalize.py
+++ b/pandas/io/json/_normalize.py
@@ -27,7 +27,7 @@
 from pandas import DataFrame
 
 
-def convert_to_line_delimits(s):
+def convert_to_line_delimits(s: str) -> str:
     """
     Helper function that converts JSON lists to line delimited JSON.
     """

From 30dade01fd8e4fffc391c5414847d216e4918826 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 17 Mar 2021 18:44:03 -0700
Subject: [PATCH 3/3] docstring fixup

---
 pandas/core/util/hashing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py
index a175605347af1..375901bc3fb58 100644
--- a/pandas/core/util/hashing.py
+++ b/pandas/core/util/hashing.py
@@ -275,7 +275,7 @@ def hash_array(
     Returns
     -------
     ndarray[np.uint64, ndim=1]
-        hash values, same length as the vals
+        Hashed values, same length as the vals.
     """
     if not hasattr(vals, "dtype"):
         raise TypeError("must pass a ndarray-like")