From ea5f60f6ba0cb5f51347646808f8cdcc7b06e55d Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 22 Oct 2019 19:01:00 -0700
Subject: [PATCH 1/7] add typing

---
 pandas/core/algorithms.py       | 18 ++++++++++-------
 pandas/core/indexes/interval.py |  2 +-
 pandas/core/nanops.py           | 36 ++++++++++++++++++---------------
 pandas/io/common.py             |  4 ++--
 pandas/io/excel/_odfreader.py   |  5 +++--
 5 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 2c9f632e8bc24..7760c8ec397a9 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -692,7 +692,12 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
 
 
 def value_counts(
-    values, sort=True, ascending=False, normalize=False, bins=None, dropna=True
+    values,
+    sort: bool = True,
+    ascending: bool = False,
+    normalize: bool = False,
+    bins=None,
+    dropna: bool = True,
 ):
     """
     Compute a histogram of the counts of non-null values.
@@ -700,22 +705,21 @@ def value_counts(
     Parameters
     ----------
     values : ndarray (1-d)
-    sort : boolean, default True
+    sort : bool, default True
         Sort by values
-    ascending : boolean, default False
+    ascending : bool, default False
         Sort in ascending order
-    normalize: boolean, default False
+    normalize: bool, default False
         If True then compute a relative histogram
     bins : integer, optional
         Rather than count values, group them into half-open bins,
         convenience for pd.cut, only works with numeric data
-    dropna : boolean, default True
+    dropna : bool, default True
         Don't include counts of NaN
 
     Returns
     -------
-    value_counts : Series
-
+    Series
     """
     from pandas.core.series import Series, Index
 
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index a2d48b5100a2e..3b6ac25e7c6b4 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -1340,7 +1340,7 @@ def _intersection_non_unique(self, other: "IntervalIndex") -> "IntervalIndex":
 
         return self[mask]
 
-    def _setop(op_name, sort=None):
+    def _setop(op_name: str, sort=None):
         @SetopCheck(op_name=op_name)
         def func(self, other, sort=sort):
             result = getattr(self._multiindex, op_name)(other._multiindex, sort=sort)
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 09b80d1b3a9ac..50b7e97608908 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -7,7 +7,7 @@
 
 from pandas._config import get_option
 
-from pandas._libs import iNaT, lib, tslibs
+from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib
 from pandas.compat._optional import import_optional_dependency
 
 from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask
@@ -53,7 +53,7 @@ def __init__(self, *dtypes):
         super().__init__()
         self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes)
 
-    def check(self, obj):
+    def check(self, obj) -> bool:
         return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes)
 
     def __call__(self, f):
@@ -128,7 +128,7 @@ def f(values, axis=None, skipna=True, **kwds):
         return f
 
 
-def _bn_ok_dtype(dt, name):
+def _bn_ok_dtype(dt, name: str) -> bool:
     # Bottleneck chokes on datetime64
     if not is_object_dtype(dt) and not (
         is_datetime_or_timedelta_dtype(dt) or is_datetime64tz_dtype(dt)
@@ -149,7 +149,7 @@ def _bn_ok_dtype(dt, name):
     return False
 
 
-def _has_infs(result):
+def _has_infs(result) -> bool:
     if isinstance(result, np.ndarray):
         if result.dtype == "f8":
             return lib.has_infs_f8(result.ravel())
@@ -176,19 +176,22 @@ def _get_fill_value(dtype, fill_value=None, fill_value_typ=None):
                 return -np.inf
     else:
         if fill_value_typ is None:
-            return tslibs.iNaT
+            return iNaT
         else:
             if fill_value_typ == "+inf":
                 # need the max int here
                 return _int64_max
             else:
-                return tslibs.iNaT
+                return iNaT
 
 
 def _maybe_get_mask(
     values: np.ndarray, skipna: bool, mask: Optional[np.ndarray]
 ) -> Optional[np.ndarray]:
-    """ This function will compute a mask iff it is necessary. Otherwise,
+    """
+    Compute a mask iff necessary.
+
+    This function will compute a mask iff it is necessary. Otherwise,
     return the provided mask (potentially None) when a mask does not need to be
     computed.
 
@@ -214,7 +217,6 @@ def _maybe_get_mask(
     Returns
     -------
     Optional[np.ndarray]
-
     """
 
     if mask is None:
@@ -346,7 +348,7 @@ def _wrap_results(result, dtype, fill_value=None):
             assert not isna(fill_value), "Expected non-null fill_value"
             if result == fill_value:
                 result = np.nan
-            result = tslibs.Timestamp(result, tz=tz)
+            result = Timestamp(result, tz=tz)
         else:
             result = result.view(dtype)
     elif is_timedelta64_dtype(dtype):
@@ -358,21 +360,22 @@ def _wrap_results(result, dtype, fill_value=None):
             if np.fabs(result) > _int64_max:
                 raise ValueError("overflow in timedelta operation")
 
-            result = tslibs.Timedelta(result, unit="ns")
+            result = Timedelta(result, unit="ns")
         else:
             result = result.astype("m8[ns]").view(dtype)
 
     return result
 
 
-def _na_for_min_count(values, axis):
-    """Return the missing value for `values`
+def _na_for_min_count(values, axis: Optional[int]):
+    """
+    Return the missing value for `values`.
 
     Parameters
     ----------
     values : ndarray
     axis : int or None
-        axis for the reduction
+        axis for the reduction, required if values.ndim > 1.
 
     Returns
     -------
@@ -388,13 +391,14 @@ def _na_for_min_count(values, axis):
     if values.ndim == 1:
         return fill_value
     else:
+        assert axis is not None  # assertion to make mypy happy
         result_shape = values.shape[:axis] + values.shape[axis + 1 :]
         result = np.empty(result_shape, dtype=values.dtype)
         result.fill(fill_value)
         return result
 
 
-def nanany(values, axis=None, skipna=True, mask=None):
+def nanany(values, axis=None, skipna: bool = True, mask=None):
     """
     Check if any elements along an axis evaluate to True.
 
@@ -426,7 +430,7 @@ def nanany(values, axis=None, skipna=True, mask=None):
     return values.any(axis)
 
 
-def nanall(values, axis=None, skipna=True, mask=None):
+def nanall(values, axis=None, skipna: bool = True, mask=None):
     """
     Check if all elements along an axis evaluate to True.
 
@@ -1195,7 +1199,7 @@ def _maybe_null_out(
             else:
                 # GH12941, use None to auto cast null
                 result[null_mask] = None
-    elif result is not tslibs.NaT:
+    elif result is not NaT:
         if mask is not None:
             null_mask = mask.size - mask.sum()
         else:
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 0b8594bbbd3e4..0bef14e4999c7 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -569,11 +569,11 @@ def __iter__(self) -> "MMapWrapper":
         return self
 
     def __next__(self) -> str:
-        newline = self.mmap.readline()
+        newbytes = self.mmap.readline()
 
         # readline returns bytes, not str, but Python's CSV reader
         # expects str, so convert the output to str before continuing
-        newline = newline.decode("utf-8")
+        newline = newbytes.decode("utf-8")
 
         # mmap doesn't raise if reading past the allocated
         # data but instead returns an empty string, so raise
diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index 3be36663bac79..dd6a8c38e1a55 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -9,7 +9,8 @@
 
 
 class _ODFReader(_BaseExcelReader):
-    """Read tables out of OpenDocument formatted files
+    """
+    Read tables out of OpenDocument formatted files.
 
     Parameters
     ----------
@@ -60,7 +61,7 @@ def get_sheet_by_name(self, name: str):
             if table.getAttribute("name") == name:
                 return table
 
-        raise ValueError("sheet {name} not found".format(name))
+        raise ValueError("sheet {name} not found".format(name=name))
 
     def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
         """Parse an ODF Table into a list of lists

From a05642d01818543b27dc5763a1d1322aef424b1c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 22 Oct 2019 20:59:17 -0700
Subject: [PATCH 2/7] add types in libindex

---
 pandas/_libs/index.pyx | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 144d555258c50..666fdb3dfeee3 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -170,17 +170,17 @@ cdef class IndexEngine:
 
         raise KeyError(val)
 
-    def sizeof(self, deep=False):
+    def sizeof(self, deep: bool = False) -> int:
         """ return the sizeof our mapping """
         if not self.is_mapping_populated:
             return 0
         return self.mapping.sizeof(deep=deep)
 
-    def __sizeof__(self):
+    def __sizeof__(self) -> int:
         return self.sizeof()
 
     @property
-    def is_unique(self):
+    def is_unique(self) -> bool:
         if self.need_unique_check:
             self._do_unique_check()
 
@@ -192,14 +192,14 @@ cdef class IndexEngine:
         self._ensure_mapping_populated()
 
     @property
-    def is_monotonic_increasing(self):
+    def is_monotonic_increasing(self) -> bool:
         if self.need_monotonic_check:
             self._do_monotonic_check()
 
         return self.monotonic_inc == 1
 
     @property
-    def is_monotonic_decreasing(self):
+    def is_monotonic_decreasing(self) -> bool:
         if self.need_monotonic_check:
             self._do_monotonic_check()
 
@@ -242,7 +242,7 @@ cdef class IndexEngine:
         hash(val)
 
     @property
-    def is_mapping_populated(self):
+    def is_mapping_populated(self) -> bool:
         return self.mapping is not None
 
     cdef inline _ensure_mapping_populated(self):

From 651f4b30abeaa6e780f2367562b529b69377a0fd Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 23 Oct 2019 07:40:28 -0700
Subject: [PATCH 3/7] expand iff

---
 pandas/core/nanops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 50b7e97608908..5dd4cc946572c 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -189,7 +189,7 @@ def _maybe_get_mask(
     values: np.ndarray, skipna: bool, mask: Optional[np.ndarray]
 ) -> Optional[np.ndarray]:
     """
-    Compute a mask iff necessary.
+    Compute a mask if and only if necessary.
 
     This function will compute a mask iff it is necessary. Otherwise,
     return the provided mask (potentially None) when a mask does not need to be

From e31cea4b6c3b7046ab40e85692d27bb059717074 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 23 Oct 2019 09:10:23 -0700
Subject: [PATCH 4/7] annotate value_counts return type

---
 pandas/core/algorithms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 7760c8ec397a9..7f08186b766ab 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -698,7 +698,7 @@ def value_counts(
     normalize: bool = False,
     bins=None,
     dropna: bool = True,
-):
+) -> "Series":
     """
     Compute a histogram of the counts of non-null values.
 

From 93fa8a04c33c33554d26dff3609810e8ad125437 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 23 Oct 2019 10:15:38 -0700
Subject: [PATCH 5/7] revert annotation that failed mypy

---
 pandas/core/algorithms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 7f08186b766ab..7760c8ec397a9 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -698,7 +698,7 @@ def value_counts(
     normalize: bool = False,
     bins=None,
     dropna: bool = True,
-) -> "Series":
+):
     """
     Compute a histogram of the counts of non-null values.
 

From 208c7aac9e52781afcbe7983fe1935308f3fb0ef Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 23 Oct 2019 18:03:34 -0700
Subject: [PATCH 6/7] revert

---
 pandas/io/excel/_odfreader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
index dd6a8c38e1a55..66a186161e01b 100644
--- a/pandas/io/excel/_odfreader.py
+++ b/pandas/io/excel/_odfreader.py
@@ -61,7 +61,7 @@ def get_sheet_by_name(self, name: str):
             if table.getAttribute("name") == name:
                 return table
 
-        raise ValueError("sheet {name} not found".format(name=name))
+        raise ValueError("sheet {name} not found".format(name))
 
     def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
         """Parse an ODF Table into a list of lists

From 633c583a4d14fb56e60a542664ded82cdb0504a4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 24 Oct 2019 19:03:43 -0700
Subject: [PATCH 7/7] dummy commit to force CI