Skip to content

TYP: mostly core.arrays, some core.indexes #40545

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Apr 2, 2021
12 changes: 6 additions & 6 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -411,15 +411,15 @@ cdef class {{name}}HashTable(HashTable):
k = kh_get_{{dtype}}(self.table, ckey)
return k != self.table.n_buckets

def sizeof(self, deep=False):
def sizeof(self, deep: bool = False) -> int:
""" return the size of my table in bytes """
overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*)
for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t)
for_pairs = self.table.n_buckets * (sizeof({{dtype}}_t) + # keys
sizeof(Py_ssize_t)) # vals
return overhead + for_flags + for_pairs

def get_state(self):
def get_state(self) -> dict[str, int]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should be consistent about this pattern, IIRC we use Dict[str, int] ? cc @simonjayhawkins ok either way, we should have a rule and convert

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC there was no objection to using the new syntax, just not ready to convert all existing annotations yet. I recall a discussion about this and I _think_the outcome was that we would wait till closer to 1.3 to use pyupgrade to reduce the potential conflicts with backports but I can't find the relevant discussion. @MarcoGorelli

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was #39930

Removing --keep-runtime-typing will upgrade to newer syntax in files with from __future__ import annotations

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh yes. it was because mypy in 1.2.x is on an older version. #39538 (review)

nbd. can fixup the backports if necessary to use the older syntax. so imo can do whenever.

""" returns infos about the state of the hashtable"""
return {
'n_buckets' : self.table.n_buckets,
Expand Down Expand Up @@ -747,14 +747,14 @@ cdef class StringHashTable(HashTable):
kh_destroy_str(self.table)
self.table = NULL

def sizeof(self, deep=False):
def sizeof(self, deep: bool = False) -> int:
overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*)
for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t)
for_pairs = self.table.n_buckets * (sizeof(char *) + # keys
sizeof(Py_ssize_t)) # vals
return overhead + for_flags + for_pairs

def get_state(self):
def get_state(self) -> dict[str, int]:
""" returns infos about the state of the hashtable"""
return {
'n_buckets' : self.table.n_buckets,
Expand Down Expand Up @@ -1079,15 +1079,15 @@ cdef class PyObjectHashTable(HashTable):
k = kh_get_pymap(self.table, <PyObject*>key)
return k != self.table.n_buckets

def sizeof(self, deep=False):
def sizeof(self, deep: bool = False) -> int:
""" return the size of my table in bytes """
overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*)
for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t)
for_pairs = self.table.n_buckets * (sizeof(PyObject *) + # keys
sizeof(Py_ssize_t)) # vals
return overhead + for_flags + for_pairs

def get_state(self):
def get_state(self) -> dict[str, int]:
"""
returns infos about the current state of the hashtable like size,
number of buckets and so on.
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/ops.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def vec_binop(object[:] left, object[:] right, object op) -> ndarray:


def maybe_convert_bool(ndarray[object] arr,
true_values=None, false_values=None):
true_values=None, false_values=None) -> ndarray:
cdef:
Py_ssize_t i, n
ndarray[uint8_t] result
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,9 @@ cdef convert_to_timedelta64(object ts, str unit):

@cython.boundscheck(False)
@cython.wraparound(False)
def array_to_timedelta64(ndarray[object] values, str unit=None, str errors="raise"):
def array_to_timedelta64(
ndarray[object] values, str unit=None, str errors="raise"
) -> ndarray:
"""
Convert an ndarray to an array of timedeltas. If errors == 'coerce',
coerce non-convertible objects to NaT. Otherwise, raise.
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1619,7 +1619,7 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray:
_diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"}


def diff(arr, n: int, axis: int = 0, stacklevel=3):
def diff(arr, n: int, axis: int = 0, stacklevel: int = 3):
"""
difference of n between self,
analogous to s-s.shift(n)
Expand Down Expand Up @@ -1865,7 +1865,7 @@ def safe_sort(
return ordered, ensure_platform_int(new_codes)


def _sort_mixed(values):
def _sort_mixed(values) -> np.ndarray:
""" order ints before strings in 1d arrays, safe in py3 """
str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
nums = np.sort(values[~str_pos])
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,7 @@ def ndim(self) -> int:

@cache_readonly
def size(self) -> int:
# error: Incompatible return value type (got "number", expected "int")
return np.prod(self.shape) # type: ignore[return-value]
return self._ndarray.size

@cache_readonly
def nbytes(self) -> int:
Expand Down Expand Up @@ -190,7 +189,7 @@ def equals(self, other) -> bool:
return False
return bool(array_equivalent(self._ndarray, other._ndarray))

def _values_for_argsort(self):
def _values_for_argsort(self) -> np.ndarray:
return self._ndarray

# Signature of "argmin" incompatible with supertype "ExtensionArray"
Expand Down
16 changes: 9 additions & 7 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,9 @@ def _from_inferred_categories(
if true_values is None:
true_values = ["True", "TRUE", "true"]

cats = cats.isin(true_values)
# error: Incompatible types in assignment (expression has type
# "ndarray", variable has type "Index")
cats = cats.isin(true_values) # type: ignore[assignment]

if known_categories:
# Recode from observation order to dtype.categories order.
Expand Down Expand Up @@ -1444,7 +1446,7 @@ def memory_usage(self, deep: bool = False) -> int:
"""
return self._codes.nbytes + self.dtype.categories.memory_usage(deep=deep)

def isna(self):
def isna(self) -> np.ndarray:
"""
Detect missing values

Expand All @@ -1465,7 +1467,7 @@ def isna(self):

isnull = isna

def notna(self):
def notna(self) -> np.ndarray:
"""
Inverse of isna

Expand Down Expand Up @@ -1731,7 +1733,7 @@ def view(self, dtype=None):
raise NotImplementedError(dtype)
return self._from_backing_data(self._ndarray)

def to_dense(self):
def to_dense(self) -> np.ndarray:
"""
Return my 'dense' representation

Expand Down Expand Up @@ -1804,14 +1806,14 @@ def __contains__(self, key) -> bool:
"""
# if key is a NaN, check if any NaN is in self.
if is_valid_na_for_dtype(key, self.categories.dtype):
return self.isna().any()
return bool(self.isna().any())

return contains(self, key, container=self._codes)

# ------------------------------------------------------------------
# Rendering Methods

def _formatter(self, boxed=False):
def _formatter(self, boxed: bool = False):
# Defer to CategoricalFormatter's formatter.
return None

Expand Down Expand Up @@ -1889,7 +1891,7 @@ def _repr_footer(self) -> str:
info = self._repr_categories_info()
return f"Length: {len(self)}\n{info}"

def _get_repr(self, length=True, na_rep="NaN", footer=True) -> str:
def _get_repr(self, length: bool = True, na_rep="NaN", footer: bool = True) -> str:
from pandas.io.formats import format as fmt

formatter = fmt.CategoricalFormatter(
Expand Down
29 changes: 16 additions & 13 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def _format_native_types(self, na_rep="NaT", date_format=None):
"""
raise AbstractMethodError(self)

def _formatter(self, boxed=False):
def _formatter(self, boxed: bool = False):
# TODO: Remove Datetime & DatetimeTZ formatters.
return "'{}'".format

Expand All @@ -354,7 +354,7 @@ def __getitem__(
result._freq = self._get_getitem_freq(key)
return result

def _get_getitem_freq(self, key):
def _get_getitem_freq(self, key) -> Optional[BaseOffset]:
"""
Find the `freq` attribute to assign to the result of a __getitem__ lookup.
"""
Expand Down Expand Up @@ -406,7 +406,7 @@ def _maybe_clear_freq(self):
# DatetimeArray and TimedeltaArray
pass

def astype(self, dtype, copy=True):
def astype(self, dtype, copy: bool = True):
# Some notes on cases we don't have to handle here in the base class:
# 1. PeriodArray.astype handles period -> period
# 2. DatetimeArray.astype handles conversion between tz.
Expand Down Expand Up @@ -545,7 +545,7 @@ def _values_for_factorize(self):

@classmethod
def _from_factorized(
cls: Type[DatetimeLikeArrayT], values, original
cls: Type[DatetimeLikeArrayT], values, original: DatetimeLikeArrayT
) -> DatetimeLikeArrayT:
return cls(values, dtype=original.dtype)

Expand Down Expand Up @@ -939,7 +939,7 @@ def freq(self, value):
self._freq = value

@property
def freqstr(self):
def freqstr(self) -> Optional[str]:
"""
Return the frequency object as a string if its set, otherwise None.
"""
Expand All @@ -948,7 +948,7 @@ def freqstr(self):
return self.freq.freqstr

@property # NB: override with cache_readonly in immutable subclasses
def inferred_freq(self):
def inferred_freq(self) -> Optional[str]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't we have a Freq typing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we do, but this is more specific

"""
Tries to return a string representing a frequency guess,
generated by infer_freq. Returns None if it can't autodetect the
Expand All @@ -963,8 +963,11 @@ def inferred_freq(self):

@property # NB: override with cache_readonly in immutable subclasses
def _resolution_obj(self) -> Optional[Resolution]:
freqstr = self.freqstr
if freqstr is None:
return None
try:
return Resolution.get_reso_from_freq(self.freqstr)
return Resolution.get_reso_from_freq(freqstr)
except KeyError:
return None

Expand Down Expand Up @@ -1241,7 +1244,7 @@ def _addsub_object_array(self, other: np.ndarray, op):
)
return result

def _time_shift(self, periods, freq=None):
def _time_shift(self, periods: int, freq=None):
"""
Shift each value by `periods`.

Expand Down Expand Up @@ -1440,7 +1443,7 @@ def __isub__(self, other):
# --------------------------------------------------------------
# Reductions

def min(self, *, axis=None, skipna=True, **kwargs):
def min(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs):
"""
Return the minimum value of the Array or minimum along
an axis.
Expand Down Expand Up @@ -1469,7 +1472,7 @@ def min(self, *, axis=None, skipna=True, **kwargs):
result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)
return self._wrap_reduction_result(axis, result)

def max(self, *, axis=None, skipna=True, **kwargs):
def max(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs):
"""
Return the maximum value of the Array or maximum along
an axis.
Expand Down Expand Up @@ -1500,7 +1503,7 @@ def max(self, *, axis=None, skipna=True, **kwargs):
result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)
return self._wrap_reduction_result(axis, result)

def mean(self, *, skipna=True, axis: Optional[int] = 0):
def mean(self, *, skipna: bool = True, axis: Optional[int] = 0):
"""
Return the mean value of the Array.

Expand Down Expand Up @@ -1568,7 +1571,7 @@ class DatelikeOps(DatetimeLikeArrayMixin):
URL="https://docs.python.org/3/library/datetime.html"
"#strftime-and-strptime-behavior"
)
def strftime(self, date_format):
def strftime(self, date_format: str) -> np.ndarray:
"""
Convert to Index using specified date_format.

Expand Down Expand Up @@ -1760,7 +1763,7 @@ def all(self, *, axis: Optional[int] = None, skipna: bool = True):
# --------------------------------------------------------------
# Frequency Methods

def _maybe_clear_freq(self):
def _maybe_clear_freq(self) -> None:
self._freq = None

def _with_freq(self, freq):
Expand Down
Loading