From 9ab6154a38d6f92e1a7769450bacfa917baa54c4 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 Mar 2021 16:01:45 -0700 Subject: [PATCH 1/3] Annotate, sqlalchemy compat, combine shape properties --- pandas/core/arrays/_mixins.py | 10 +++++++--- pandas/core/indexes/base.py | 7 +++---- pandas/core/indexes/multi.py | 9 --------- pandas/tests/io/test_sql.py | 10 ++++++++-- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 848e467afb7b6..678e532f05772 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -8,12 +8,16 @@ Type, TypeVar, Union, + cast, ) import numpy as np from pandas._libs import lib -from pandas._typing import Shape +from pandas._typing import ( + F, + Shape, +) from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import ( @@ -41,7 +45,7 @@ ) -def ravel_compat(meth): +def ravel_compat(meth: F) -> F: """ Decorator to ravel a 2D array before passing it to a cython operation, then reshape the result to our own shape. @@ -58,7 +62,7 @@ def method(self, *args, **kwargs): order = "F" if flags.f_contiguous else "C" return result.reshape(self.shape, order=order) - return method + return cast(F, method) class NDArrayBackedExtensionArray(ExtensionArray): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3a468758ab3fd..0590b2d585401 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6130,15 +6130,14 @@ def _maybe_disable_logical_methods(self, opname: str_t): # This call will raise make_invalid_op(opname)(self) + @final @property def shape(self) -> Shape: """ Return a tuple of the shape of the underlying data. """ - # not using "(len(self), )" to return "correct" shape if the values - # consists of a >1 D array (see GH-27775) - # overridden in MultiIndex.shape to avoid materializing the values - return self._values.shape + # See GH#27775, GH#27384 for history/reasoning in how this is defined. + return (len(self),) def ensure_index_from_sequences(sequences, names=None): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 97492f35232e3..587e2ab232efb 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -752,15 +752,6 @@ def dtypes(self) -> Series: } ) - @property - def shape(self) -> Shape: - """ - Return a tuple of the shape of the underlying data. - """ - # overriding the base Index.shape definition to avoid materializing - # the values (GH-27384, GH-27775) - return (len(self),) - def __len__(self) -> int: return len(self.codes[0]) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 7d923e57834ea..a22f0cd8dff83 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1991,8 +1991,14 @@ def bar(connection, data): def main(connectable): with connectable.connect() as conn: with conn.begin(): - foo_data = conn.run_callable(foo) - conn.run_callable(bar, foo_data) + if _gt14(): + # https://github.com/sqlalchemy/sqlalchemy/commit/ + # 00b5c10846e800304caa86549ab9da373b42fa5d#r48323973 + foo_data = foo(conn) + bar(conn, foo_data) + else: + foo_data = conn.run_callable(foo) + conn.run_callable(bar, foo_data) DataFrame({"test_foo_data": [0, 1, 2]}).to_sql("test_foo_data", self.conn) main(self.conn) From 53738b99dca945917db0defa6a6e6f236cda6884 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 Mar 2021 17:38:44 -0700 Subject: [PATCH 2/3] annotations, docstrings --- pandas/_libs/intervaltree.pxi.in | 2 ++ pandas/_libs/lib.pyx | 5 +++-- pandas/_libs/ops.pyx | 8 ++++---- pandas/_libs/reshape.pyx | 7 +++++-- pandas/_libs/tslibs/nattype.pyx | 2 +- pandas/_libs/tslibs/vectorized.pyx | 8 ++++---- pandas/_libs/writers.pyx | 2 +- pandas/core/indexes/range.py | 6 +++--- pandas/core/util/hashing.py | 9 +++++---- pandas/io/json/_normalize.py | 2 +- 10 files changed, 29 insertions(+), 22 deletions(-) diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index a8728050f8071..1af5b23e3393f 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -238,6 +238,8 @@ NODE_CLASSES = {} {{for dtype, dtype_title, closed, closed_title, cmp_left, cmp_right, cmp_left_converse, cmp_right_converse, fused_prefix in nodes}} + +@cython.internal cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode: """Non-terminal node for an IntervalTree diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 1ff481553e413..87d155953f514 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1120,6 +1120,7 @@ except AttributeError: pass +@cython.internal cdef class Seen: """ Class for keeping track of the types of elements @@ -2580,7 +2581,7 @@ def tuples_to_object_array(ndarray[object] tuples): return result -def to_object_array_tuples(rows: object): +def to_object_array_tuples(rows: object) -> np.ndarray: """ Convert a list of tuples into an object array. Any subclass of tuple in `rows` will be casted to tuple. @@ -2592,7 +2593,7 @@ def to_object_array_tuples(rows: object): Returns ------- - numpy array of the object dtype. + np.ndarray[object, ndim=2] """ cdef: Py_ssize_t i, j, n, k, tmp diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index 1e51a578c44ea..ecb7041fb2c5a 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -32,7 +32,7 @@ from pandas._libs.util cimport ( @cython.wraparound(False) @cython.boundscheck(False) -def scalar_compare(object[:] values, object val, object op): +def scalar_compare(object[:] values, object val, object op) -> ndarray: """ Compare each element of `values` array with the scalar `val`, with the comparison operation described by `op`. @@ -114,7 +114,7 @@ def scalar_compare(object[:] values, object val, object op): @cython.wraparound(False) @cython.boundscheck(False) -def vec_compare(ndarray[object] left, ndarray[object] right, object op): +def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarray: """ Compare the elements of `left` with the elements of `right` pointwise, with the comparison operation described by `op`. @@ -180,7 +180,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op): @cython.wraparound(False) @cython.boundscheck(False) -def scalar_binop(object[:] values, object val, object op): +def scalar_binop(object[:] values, object val, object op) -> ndarray: """ Apply the given binary operator `op` between each element of the array `values` and the scalar `val`. @@ -217,7 +217,7 @@ def scalar_binop(object[:] values, object val, object op): @cython.wraparound(False) @cython.boundscheck(False) -def vec_binop(object[:] left, object[:] right, object op): +def vec_binop(object[:] left, object[:] right, object op) -> ndarray: """ Apply the given binary operator `op` pointwise to the elements of arrays `left` and `right`. diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index 75dbb4b74aabd..05b255c40f4b2 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -41,7 +41,7 @@ ctypedef fused reshape_t: @cython.boundscheck(False) def unstack(reshape_t[:, :] values, const uint8_t[:] mask, Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width, - reshape_t[:, :] new_values, uint8_t[:, :] new_mask): + reshape_t[:, :] new_values, uint8_t[:, :] new_mask) -> None: """ Transform long values to wide new_values. @@ -111,7 +111,10 @@ def explode(ndarray[object] values): Returns ------- - tuple(values, counts) + ndarray[object] + result + ndarray[int64_t] + counts """ cdef: Py_ssize_t i, j, count, n diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 2879528b2c501..d86d3261d404e 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -286,7 +286,7 @@ cdef class _NaT(datetime): # This allows Timestamp(ts.isoformat()) to always correctly roundtrip. return "NaT" - def __hash__(self): + def __hash__(self) -> int: return NPY_NAT @property diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 30d9f5e64b282..02bdae3a8dbac 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -90,7 +90,7 @@ def ints_to_pydatetime( object freq=None, bint fold=False, str box="datetime" -): +) -> np.ndarray: """ Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp. @@ -116,7 +116,7 @@ def ints_to_pydatetime( Returns ------- - ndarray of dtype specified by box + ndarray[object] of type specified by box """ cdef: Py_ssize_t i, n = len(arr) @@ -223,7 +223,7 @@ cdef inline int _reso_stamp(npy_datetimestruct *dts): return RESO_DAY -def get_resolution(const int64_t[:] stamps, tzinfo tz=None): +def get_resolution(const int64_t[:] stamps, tzinfo tz=None) -> Resolution: cdef: Py_ssize_t i, n = len(stamps) npy_datetimestruct dts @@ -332,7 +332,7 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t @cython.wraparound(False) @cython.boundscheck(False) -def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None): +def is_date_array_normalized(const int64_t[:] stamps, tzinfo tz=None) -> bool: """ Check if all of the given (nanosecond) timestamps are normalized to midnight, i.e. hour == minute == second == 0. If the optional timezone diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 6adda1fe92044..9fbeb67aa35e9 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -77,7 +77,7 @@ def write_csv_rows( @cython.boundscheck(False) @cython.wraparound(False) -def convert_json_to_lines(arr: object) -> str: +def convert_json_to_lines(arr: str) -> str: """ replace comma separated json with line feeds, paying special attention to quotes & brackets diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 456d87766bdb7..f37faa4ab844b 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -450,7 +450,7 @@ def take( **kwargs, ) - def tolist(self): + def tolist(self) -> list[int]: return list(self._range) @doc(Int64Index.__iter__) @@ -494,13 +494,13 @@ def _minmax(self, meth: str): return self.start + self.step * no_steps - def min(self, axis=None, skipna=True, *args, **kwargs) -> int: + def min(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: """The minimum value of the RangeIndex""" nv.validate_minmax_axis(axis) nv.validate_min(args, kwargs) return self._minmax("min") - def max(self, axis=None, skipna=True, *args, **kwargs) -> int: + def max(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: """The maximum value of the RangeIndex""" nv.validate_minmax_axis(axis) nv.validate_max(args, kwargs) diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 87be5c0997072..a175605347af1 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -56,7 +56,7 @@ def combine_hash_arrays(arrays: Iterator[np.ndarray], num_items: int) -> np.ndar Returns ------- - np.ndarray[int64] + np.ndarray[uint64] Should be the same as CPython's tupleobject.c """ @@ -184,7 +184,7 @@ def hash_tuples( Returns ------- - ndarray of hashed values array + ndarray[np.uint64] of hashed values """ if not is_list_like(vals): raise TypeError("must be convertible to a list-of-tuples") @@ -227,7 +227,7 @@ def _hash_categorical(cat: Categorical, encoding: str, hash_key: str) -> np.ndar Returns ------- - ndarray of hashed values array, same size as len(c) + ndarray[np.uint64] of hashed values, same size as len(c) """ # Convert ExtensionArrays to ndarrays values = np.asarray(cat.categories._values) @@ -274,7 +274,8 @@ def hash_array( Returns ------- - 1d uint64 numpy array of hash values, same length as the vals + ndarray[np.uint64, ndim=1] + hash values, same length as the vals """ if not hasattr(vals, "dtype"): raise TypeError("must pass a ndarray-like") diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 75f133745e3a2..39d12c5b05c2f 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -27,7 +27,7 @@ from pandas import DataFrame -def convert_to_line_delimits(s): +def convert_to_line_delimits(s: str) -> str: """ Helper function that converts JSON lists to line delimited JSON. """ From 30dade01fd8e4fffc391c5414847d216e4918826 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 17 Mar 2021 18:44:03 -0700 Subject: [PATCH 3/3] docstring fixup --- pandas/core/util/hashing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index a175605347af1..375901bc3fb58 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -275,7 +275,7 @@ def hash_array( Returns ------- ndarray[np.uint64, ndim=1] - hash values, same length as the vals + Hashed values, same length as the vals. """ if not hasattr(vals, "dtype"): raise TypeError("must pass a ndarray-like")