CLN: Add types in a handful of places (pandas-dev#29178)

jbrockmendel · proost · commit ea9114f65cc9 · 2019-12-20T01:09:07.000+09:00
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -171,17 +171,17 @@ cdef class IndexEngine:
 
         raise KeyError(val)
 
-    def sizeof(self, deep=False):
+    def sizeof(self, deep: bool = False) -> int:
         """ return the sizeof our mapping """
         if not self.is_mapping_populated:
             return 0
         return self.mapping.sizeof(deep=deep)
 
-    def __sizeof__(self):
+    def __sizeof__(self) -> int:
         return self.sizeof()
 
     @property
-    def is_unique(self):
+    def is_unique(self) -> bool:
         if self.need_unique_check:
             self._do_unique_check()
 
@@ -193,14 +193,14 @@ cdef class IndexEngine:
         self._ensure_mapping_populated()
 
     @property
-    def is_monotonic_increasing(self):
+    def is_monotonic_increasing(self) -> bool:
         if self.need_monotonic_check:
             self._do_monotonic_check()
 
         return self.monotonic_inc == 1
 
     @property
-    def is_monotonic_decreasing(self):
+    def is_monotonic_decreasing(self) -> bool:
         if self.need_monotonic_check:
             self._do_monotonic_check()
 
@@ -243,7 +243,7 @@ cdef class IndexEngine:
         hash(val)
 
     @property
-    def is_mapping_populated(self):
+    def is_mapping_populated(self) -> bool:
         return self.mapping is not None
 
     cdef inline _ensure_mapping_populated(self):
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -692,30 +692,34 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
 
 
 def value_counts(
-    values, sort=True, ascending=False, normalize=False, bins=None, dropna=True
+    values,
+    sort: bool = True,
+    ascending: bool = False,
+    normalize: bool = False,
+    bins=None,
+    dropna: bool = True,
 ):
     """
     Compute a histogram of the counts of non-null values.
 
     Parameters
     ----------
     values : ndarray (1-d)
-    sort : boolean, default True
+    sort : bool, default True
         Sort by values
-    ascending : boolean, default False
+    ascending : bool, default False
         Sort in ascending order
-    normalize: boolean, default False
+    normalize: bool, default False
         If True then compute a relative histogram
     bins : integer, optional
         Rather than count values, group them into half-open bins,
         convenience for pd.cut, only works with numeric data
-    dropna : boolean, default True
+    dropna : bool, default True
         Don't include counts of NaN
 
     Returns
     -------
-    value_counts : Series
-
+    Series
     """
     from pandas.core.series import Series, Index
 
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -1340,7 +1340,7 @@ def _intersection_non_unique(self, other: "IntervalIndex") -> "IntervalIndex":
 
         return self[mask]
 
-    def _setop(op_name, sort=None):
+    def _setop(op_name: str, sort=None):
         @SetopCheck(op_name=op_name)
         def func(self, other, sort=sort):
             result = getattr(self._multiindex, op_name)(other._multiindex, sort=sort)
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -7,7 +7,7 @@
 
 from pandas._config import get_option
 
-from pandas._libs import iNaT, lib, tslibs
+from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask
@@ -53,7 +53,7 @@ def __init__(self, *dtypes):
         super().__init__()
         self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes)
 
-    def check(self, obj):
+    def check(self, obj) -> bool:
         return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes)
 
     def __call__(self, f):
@@ -128,7 +128,7 @@ def f(values, axis=None, skipna=True, **kwds):
         return f
 
 
-def _bn_ok_dtype(dt, name):
+def _bn_ok_dtype(dt, name: str) -> bool:
     # Bottleneck chokes on datetime64
     if not is_object_dtype(dt) and not (
         is_datetime_or_timedelta_dtype(dt) or is_datetime64tz_dtype(dt)
@@ -149,7 +149,7 @@ def _bn_ok_dtype(dt, name):
     return False
 
 
-def _has_infs(result):
+def _has_infs(result) -> bool:
     if isinstance(result, np.ndarray):
         if result.dtype == "f8":
             return lib.has_infs_f8(result.ravel())
@@ -176,19 +176,22 @@ def _get_fill_value(dtype, fill_value=None, fill_value_typ=None):
                 return -np.inf
     else:
         if fill_value_typ is None:
-            return tslibs.iNaT
+            return iNaT
         else:
             if fill_value_typ == "+inf":
                 # need the max int here
                 return _int64_max
             else:
-                return tslibs.iNaT
+                return iNaT
 
 
 def _maybe_get_mask(
     values: np.ndarray, skipna: bool, mask: Optional[np.ndarray]
 ) -> Optional[np.ndarray]:
-    """ This function will compute a mask iff it is necessary. Otherwise,
+    """
+    Compute a mask if and only if necessary.
+
+    This function will compute a mask iff it is necessary. Otherwise,
     return the provided mask (potentially None) when a mask does not need to be
     computed.
 
@@ -214,7 +217,6 @@ def _maybe_get_mask(
     Returns
     -------
     Optional[np.ndarray]
-
     """
 
     if mask is None:
@@ -346,7 +348,7 @@ def _wrap_results(result, dtype, fill_value=None):
             assert not isna(fill_value), "Expected non-null fill_value"
             if result == fill_value:
                 result = np.nan
-            result = tslibs.Timestamp(result, tz=tz)
+            result = Timestamp(result, tz=tz)
         else:
             result = result.view(dtype)
     elif is_timedelta64_dtype(dtype):
@@ -358,21 +360,22 @@ def _wrap_results(result, dtype, fill_value=None):
             if np.fabs(result) > _int64_max:
                 raise ValueError("overflow in timedelta operation")
 
-            result = tslibs.Timedelta(result, unit="ns")
+            result = Timedelta(result, unit="ns")
         else:
             result = result.astype("m8[ns]").view(dtype)
 
     return result
 
 
-def _na_for_min_count(values, axis):
-    """Return the missing value for `values`
+def _na_for_min_count(values, axis: Optional[int]):
+    """
+    Return the missing value for `values`.
 
     Parameters
     ----------
     values : ndarray
     axis : int or None
-        axis for the reduction
+        axis for the reduction, required if values.ndim > 1.
 
     Returns
     -------
@@ -388,13 +391,14 @@ def _na_for_min_count(values, axis):
     if values.ndim == 1:
         return fill_value
     else:
+        assert axis is not None  # assertion to make mypy happy
         result_shape = values.shape[:axis] + values.shape[axis + 1 :]
         result = np.empty(result_shape, dtype=values.dtype)
         result.fill(fill_value)
         return result
 
 
-def nanany(values, axis=None, skipna=True, mask=None):
+def nanany(values, axis=None, skipna: bool = True, mask=None):
     """
     Check if any elements along an axis evaluate to True.
 
@@ -426,7 +430,7 @@ def nanany(values, axis=None, skipna=True, mask=None):
     return values.any(axis)
 
 
-def nanall(values, axis=None, skipna=True, mask=None):
+def nanall(values, axis=None, skipna: bool = True, mask=None):
     """
     Check if all elements along an axis evaluate to True.
 
@@ -1195,7 +1199,7 @@ def _maybe_null_out(
             else:
                 # GH12941, use None to auto cast null
                 result[null_mask] = None
-    elif result is not tslibs.NaT:
+    elif result is not NaT:
         if mask is not None:
             null_mask = mask.size - mask.sum()
         else:
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -569,11 +569,11 @@ def __iter__(self) -> "MMapWrapper":
         return self
 
     def __next__(self) -> str:
-        newline = self.mmap.readline()
+        newbytes = self.mmap.readline()
 
         # readline returns bytes, not str, but Python's CSV reader
         # expects str, so convert the output to str before continuing
-        newline = newline.decode("utf-8")
+        newline = newbytes.decode("utf-8")
 
         # mmap doesn't raise if reading past the allocated
         # data but instead returns an empty string, so raise
diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
@@ -9,7 +9,8 @@
 
 
 class _ODFReader(_BaseExcelReader):
-    """Read tables out of OpenDocument formatted files
+    """
+    Read tables out of OpenDocument formatted files.
 
     Parameters
     ----------