From f34108d93a1cafbb13d336428cc5eacfde8a8aa0 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 24 Sep 2019 16:59:35 -0700
Subject: [PATCH 1/2] CLN

---
 pandas/core/algorithms.py                |  8 +++-----
 pandas/core/util/hashing.py              | 16 ++++++++++------
 pandas/tests/groupby/test_categorical.py |  2 +-
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 6e73e1636a75b..8458509541b48 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -176,7 +176,6 @@ def _reconstruct_data(values, dtype, original):
     -------
     Index for extension types, otherwise ndarray casted to dtype
     """
-    from pandas import Index
 
     if is_extension_array_dtype(dtype):
         values = dtype.construct_array_type()._from_sequence(values)
@@ -184,7 +183,7 @@ def _reconstruct_data(values, dtype, original):
         values = values.astype(dtype)
 
         # we only support object dtypes bool Index
-        if isinstance(original, Index):
+        if isinstance(original, ABCIndexClass):
             values = values.astype(object)
     elif dtype is not None:
         values = values.astype(dtype)
@@ -833,7 +832,7 @@ def duplicated(values, keep="first"):
     return f(values, keep=keep)
 
 
-def mode(values, dropna=True):
+def mode(values, dropna: bool = True):
     """
     Returns the mode(s) of an array.
 
@@ -1888,7 +1887,7 @@ def searchsorted(arr, value, side="left", sorter=None):
 }
 
 
-def diff(arr, n, axis=0):
+def diff(arr, n: int, axis: int = 0):
     """
     difference of n between self,
     analogous to s-s.shift(n)
@@ -1904,7 +1903,6 @@ def diff(arr, n, axis=0):
     Returns
     -------
     shifted
-
     """
 
     n = int(n)
diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py
index bcdbf0855cbb4..bb5f8aace3774 100644
--- a/pandas/core/util/hashing.py
+++ b/pandas/core/util/hashing.py
@@ -26,7 +26,7 @@
 _default_hash_key = "0123456789123456"
 
 
-def _combine_hash_arrays(arrays, num_items):
+def _combine_hash_arrays(arrays, num_items: int):
     """
     Parameters
     ----------
@@ -55,7 +55,11 @@ def _combine_hash_arrays(arrays, num_items):
 
 
 def hash_pandas_object(
-    obj, index=True, encoding="utf8", hash_key=None, categorize=True
+    obj,
+    index: bool = True,
+    encoding: str = "utf8",
+    hash_key=None,
+    categorize: bool = True,
 ):
     """
     Return a data hash of the Index/Series/DataFrame.
@@ -179,7 +183,7 @@ def hash_tuples(vals, encoding="utf8", hash_key=None):
     return h
 
 
-def hash_tuple(val, encoding="utf8", hash_key=None):
+def hash_tuple(val, encoding: str = "utf8", hash_key=None):
     """
     Hash a single tuple efficiently
 
@@ -201,7 +205,7 @@ def hash_tuple(val, encoding="utf8", hash_key=None):
     return h
 
 
-def _hash_categorical(c, encoding, hash_key):
+def _hash_categorical(c, encoding: str, hash_key: str):
     """
     Hash a Categorical by hashing its categories, and then mapping the codes
     to the hashes
@@ -239,7 +243,7 @@ def _hash_categorical(c, encoding, hash_key):
     return result
 
 
-def hash_array(vals, encoding="utf8", hash_key=None, categorize=True):
+def hash_array(vals, encoding: str = "utf8", hash_key=None, categorize: bool = True):
     """
     Given a 1d array, return an array of deterministic integers.
 
@@ -317,7 +321,7 @@ def hash_array(vals, encoding="utf8", hash_key=None, categorize=True):
     return vals
 
 
-def _hash_scalar(val, encoding="utf8", hash_key=None):
+def _hash_scalar(val, encoding: str = "utf8", hash_key=None):
     """
     Hash scalar value
 
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index e09af3fd48ee6..fcc0aa3b1c015 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -782,7 +782,7 @@ def test_categorical_no_compress():
 
 def test_sort():
 
-    # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby  # noqa: flake8
+    # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby  # noqa: E501
     # This should result in a properly sorted Series so that the plot
     # has a sorted x axis
     # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar')

From 3250e378cb8631ef10c1b409582763032b681c64 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 25 Sep 2019 07:53:29 -0700
Subject: [PATCH 2/2] mypy fixups

---
 pandas/core/algorithms.py   | 16 +++++++++-------
 pandas/core/util/hashing.py |  5 ++++-
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 8458509541b48..002bbcc63d04f 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1933,13 +1933,15 @@ def diff(arr, n: int, axis: int = 0):
         f = _diff_special[arr.dtype.name]
         f(arr, out_arr, n, axis)
     else:
-        res_indexer = [slice(None)] * arr.ndim
-        res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n)
-        res_indexer = tuple(res_indexer)
-
-        lag_indexer = [slice(None)] * arr.ndim
-        lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None)
-        lag_indexer = tuple(lag_indexer)
+        # To keep mypy happy, _res_indexer is a list while res_indexer is
+        #  a tuple, ditto for lag_indexer.
+        _res_indexer = [slice(None)] * arr.ndim
+        _res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n)
+        res_indexer = tuple(_res_indexer)
+
+        _lag_indexer = [slice(None)] * arr.ndim
+        _lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None)
+        lag_indexer = tuple(_lag_indexer)
 
         # need to make sure that we account for na for datelike/timedelta
         # we don't actually want to subtract these i8 numbers
diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py
index bb5f8aace3774..4bcc53606aeca 100644
--- a/pandas/core/util/hashing.py
+++ b/pandas/core/util/hashing.py
@@ -129,7 +129,10 @@ def hash_pandas_object(
                 for _ in [None]
             )
             num_items += 1
-            hashes = itertools.chain(hashes, index_hash_generator)
+
+            # keep `hashes` specifically a generator to keep mypy happy
+            _hashes = itertools.chain(hashes, index_hash_generator)
+            hashes = (x for x in _hashes)
         h = _combine_hash_arrays(hashes, num_items)
 
         h = Series(h, index=obj.index, dtype="uint64", copy=False)