diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 255fd85531d14..d9bde2a471e06 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -171,17 +171,17 @@ cdef class IndexEngine: raise KeyError(val) - def sizeof(self, deep=False): + def sizeof(self, deep: bool = False) -> int: """ return the sizeof our mapping """ if not self.is_mapping_populated: return 0 return self.mapping.sizeof(deep=deep) - def __sizeof__(self): + def __sizeof__(self) -> int: return self.sizeof() @property - def is_unique(self): + def is_unique(self) -> bool: if self.need_unique_check: self._do_unique_check() @@ -193,14 +193,14 @@ cdef class IndexEngine: self._ensure_mapping_populated() @property - def is_monotonic_increasing(self): + def is_monotonic_increasing(self) -> bool: if self.need_monotonic_check: self._do_monotonic_check() return self.monotonic_inc == 1 @property - def is_monotonic_decreasing(self): + def is_monotonic_decreasing(self) -> bool: if self.need_monotonic_check: self._do_monotonic_check() @@ -243,7 +243,7 @@ cdef class IndexEngine: hash(val) @property - def is_mapping_populated(self): + def is_mapping_populated(self) -> bool: return self.mapping is not None cdef inline _ensure_mapping_populated(self): diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 2c9f632e8bc24..7760c8ec397a9 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -692,7 +692,12 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): def value_counts( - values, sort=True, ascending=False, normalize=False, bins=None, dropna=True + values, + sort: bool = True, + ascending: bool = False, + normalize: bool = False, + bins=None, + dropna: bool = True, ): """ Compute a histogram of the counts of non-null values. @@ -700,22 +705,21 @@ def value_counts( Parameters ---------- values : ndarray (1-d) - sort : boolean, default True + sort : bool, default True Sort by values - ascending : boolean, default False + ascending : bool, default False Sort in ascending order - normalize: boolean, default False + normalize: bool, default False If True then compute a relative histogram bins : integer, optional Rather than count values, group them into half-open bins, convenience for pd.cut, only works with numeric data - dropna : boolean, default True + dropna : bool, default True Don't include counts of NaN Returns ------- - value_counts : Series - + Series """ from pandas.core.series import Series, Index diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index a2d48b5100a2e..3b6ac25e7c6b4 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1340,7 +1340,7 @@ def _intersection_non_unique(self, other: "IntervalIndex") -> "IntervalIndex": return self[mask] - def _setop(op_name, sort=None): + def _setop(op_name: str, sort=None): @SetopCheck(op_name=op_name) def func(self, other, sort=sort): result = getattr(self._multiindex, op_name)(other._multiindex, sort=sort) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 09b80d1b3a9ac..5dd4cc946572c 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -7,7 +7,7 @@ from pandas._config import get_option -from pandas._libs import iNaT, lib, tslibs +from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask @@ -53,7 +53,7 @@ def __init__(self, *dtypes): super().__init__() self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes) - def check(self, obj): + def check(self, obj) -> bool: return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes) def __call__(self, f): @@ -128,7 +128,7 @@ def f(values, axis=None, skipna=True, **kwds): return f -def _bn_ok_dtype(dt, name): +def _bn_ok_dtype(dt, name: str) -> bool: # Bottleneck chokes on datetime64 if not is_object_dtype(dt) and not ( is_datetime_or_timedelta_dtype(dt) or is_datetime64tz_dtype(dt) @@ -149,7 +149,7 @@ def _bn_ok_dtype(dt, name): return False -def _has_infs(result): +def _has_infs(result) -> bool: if isinstance(result, np.ndarray): if result.dtype == "f8": return lib.has_infs_f8(result.ravel()) @@ -176,19 +176,22 @@ def _get_fill_value(dtype, fill_value=None, fill_value_typ=None): return -np.inf else: if fill_value_typ is None: - return tslibs.iNaT + return iNaT else: if fill_value_typ == "+inf": # need the max int here return _int64_max else: - return tslibs.iNaT + return iNaT def _maybe_get_mask( values: np.ndarray, skipna: bool, mask: Optional[np.ndarray] ) -> Optional[np.ndarray]: - """ This function will compute a mask iff it is necessary. Otherwise, + """ + Compute a mask if and only if necessary. + + This function will compute a mask iff it is necessary. Otherwise, return the provided mask (potentially None) when a mask does not need to be computed. @@ -214,7 +217,6 @@ def _maybe_get_mask( Returns ------- Optional[np.ndarray] - """ if mask is None: @@ -346,7 +348,7 @@ def _wrap_results(result, dtype, fill_value=None): assert not isna(fill_value), "Expected non-null fill_value" if result == fill_value: result = np.nan - result = tslibs.Timestamp(result, tz=tz) + result = Timestamp(result, tz=tz) else: result = result.view(dtype) elif is_timedelta64_dtype(dtype): @@ -358,21 +360,22 @@ def _wrap_results(result, dtype, fill_value=None): if np.fabs(result) > _int64_max: raise ValueError("overflow in timedelta operation") - result = tslibs.Timedelta(result, unit="ns") + result = Timedelta(result, unit="ns") else: result = result.astype("m8[ns]").view(dtype) return result -def _na_for_min_count(values, axis): - """Return the missing value for `values` +def _na_for_min_count(values, axis: Optional[int]): + """ + Return the missing value for `values`. Parameters ---------- values : ndarray axis : int or None - axis for the reduction + axis for the reduction, required if values.ndim > 1. Returns ------- @@ -388,13 +391,14 @@ def _na_for_min_count(values, axis): if values.ndim == 1: return fill_value else: + assert axis is not None # assertion to make mypy happy result_shape = values.shape[:axis] + values.shape[axis + 1 :] result = np.empty(result_shape, dtype=values.dtype) result.fill(fill_value) return result -def nanany(values, axis=None, skipna=True, mask=None): +def nanany(values, axis=None, skipna: bool = True, mask=None): """ Check if any elements along an axis evaluate to True. @@ -426,7 +430,7 @@ def nanany(values, axis=None, skipna=True, mask=None): return values.any(axis) -def nanall(values, axis=None, skipna=True, mask=None): +def nanall(values, axis=None, skipna: bool = True, mask=None): """ Check if all elements along an axis evaluate to True. @@ -1195,7 +1199,7 @@ def _maybe_null_out( else: # GH12941, use None to auto cast null result[null_mask] = None - elif result is not tslibs.NaT: + elif result is not NaT: if mask is not None: null_mask = mask.size - mask.sum() else: diff --git a/pandas/io/common.py b/pandas/io/common.py index 0b8594bbbd3e4..0bef14e4999c7 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -569,11 +569,11 @@ def __iter__(self) -> "MMapWrapper": return self def __next__(self) -> str: - newline = self.mmap.readline() + newbytes = self.mmap.readline() # readline returns bytes, not str, but Python's CSV reader # expects str, so convert the output to str before continuing - newline = newline.decode("utf-8") + newline = newbytes.decode("utf-8") # mmap doesn't raise if reading past the allocated # data but instead returns an empty string, so raise diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 3be36663bac79..66a186161e01b 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -9,7 +9,8 @@ class _ODFReader(_BaseExcelReader): - """Read tables out of OpenDocument formatted files + """ + Read tables out of OpenDocument formatted files. Parameters ----------