From f34108d93a1cafbb13d336428cc5eacfde8a8aa0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 24 Sep 2019 16:59:35 -0700 Subject: [PATCH 1/2] CLN --- pandas/core/algorithms.py | 8 +++----- pandas/core/util/hashing.py | 16 ++++++++++------ pandas/tests/groupby/test_categorical.py | 2 +- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6e73e1636a75b..8458509541b48 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -176,7 +176,6 @@ def _reconstruct_data(values, dtype, original): ------- Index for extension types, otherwise ndarray casted to dtype """ - from pandas import Index if is_extension_array_dtype(dtype): values = dtype.construct_array_type()._from_sequence(values) @@ -184,7 +183,7 @@ def _reconstruct_data(values, dtype, original): values = values.astype(dtype) # we only support object dtypes bool Index - if isinstance(original, Index): + if isinstance(original, ABCIndexClass): values = values.astype(object) elif dtype is not None: values = values.astype(dtype) @@ -833,7 +832,7 @@ def duplicated(values, keep="first"): return f(values, keep=keep) -def mode(values, dropna=True): +def mode(values, dropna: bool = True): """ Returns the mode(s) of an array. @@ -1888,7 +1887,7 @@ def searchsorted(arr, value, side="left", sorter=None): } -def diff(arr, n, axis=0): +def diff(arr, n: int, axis: int = 0): """ difference of n between self, analogous to s-s.shift(n) @@ -1904,7 +1903,6 @@ def diff(arr, n, axis=0): Returns ------- shifted - """ n = int(n) diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index bcdbf0855cbb4..bb5f8aace3774 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -26,7 +26,7 @@ _default_hash_key = "0123456789123456" -def _combine_hash_arrays(arrays, num_items): +def _combine_hash_arrays(arrays, num_items: int): """ Parameters ---------- @@ -55,7 +55,11 @@ def _combine_hash_arrays(arrays, num_items): def hash_pandas_object( - obj, index=True, encoding="utf8", hash_key=None, categorize=True + obj, + index: bool = True, + encoding: str = "utf8", + hash_key=None, + categorize: bool = True, ): """ Return a data hash of the Index/Series/DataFrame. @@ -179,7 +183,7 @@ def hash_tuples(vals, encoding="utf8", hash_key=None): return h -def hash_tuple(val, encoding="utf8", hash_key=None): +def hash_tuple(val, encoding: str = "utf8", hash_key=None): """ Hash a single tuple efficiently @@ -201,7 +205,7 @@ def hash_tuple(val, encoding="utf8", hash_key=None): return h -def _hash_categorical(c, encoding, hash_key): +def _hash_categorical(c, encoding: str, hash_key: str): """ Hash a Categorical by hashing its categories, and then mapping the codes to the hashes @@ -239,7 +243,7 @@ def _hash_categorical(c, encoding, hash_key): return result -def hash_array(vals, encoding="utf8", hash_key=None, categorize=True): +def hash_array(vals, encoding: str = "utf8", hash_key=None, categorize: bool = True): """ Given a 1d array, return an array of deterministic integers. @@ -317,7 +321,7 @@ def hash_array(vals, encoding="utf8", hash_key=None, categorize=True): return vals -def _hash_scalar(val, encoding="utf8", hash_key=None): +def _hash_scalar(val, encoding: str = "utf8", hash_key=None): """ Hash scalar value diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index e09af3fd48ee6..fcc0aa3b1c015 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -782,7 +782,7 @@ def test_categorical_no_compress(): def test_sort(): - # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby # noqa: flake8 + # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby # noqa: E501 # This should result in a properly sorted Series so that the plot # has a sorted x axis # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') From 3250e378cb8631ef10c1b409582763032b681c64 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 25 Sep 2019 07:53:29 -0700 Subject: [PATCH 2/2] mypy fixups --- pandas/core/algorithms.py | 16 +++++++++------- pandas/core/util/hashing.py | 5 ++++- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 8458509541b48..002bbcc63d04f 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1933,13 +1933,15 @@ def diff(arr, n: int, axis: int = 0): f = _diff_special[arr.dtype.name] f(arr, out_arr, n, axis) else: - res_indexer = [slice(None)] * arr.ndim - res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n) - res_indexer = tuple(res_indexer) - - lag_indexer = [slice(None)] * arr.ndim - lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None) - lag_indexer = tuple(lag_indexer) + # To keep mypy happy, _res_indexer is a list while res_indexer is + # a tuple, ditto for lag_indexer. + _res_indexer = [slice(None)] * arr.ndim + _res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n) + res_indexer = tuple(_res_indexer) + + _lag_indexer = [slice(None)] * arr.ndim + _lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None) + lag_indexer = tuple(_lag_indexer) # need to make sure that we account for na for datelike/timedelta # we don't actually want to subtract these i8 numbers diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index bb5f8aace3774..4bcc53606aeca 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -129,7 +129,10 @@ def hash_pandas_object( for _ in [None] ) num_items += 1 - hashes = itertools.chain(hashes, index_hash_generator) + + # keep `hashes` specifically a generator to keep mypy happy + _hashes = itertools.chain(hashes, index_hash_generator) + hashes = (x for x in _hashes) h = _combine_hash_arrays(hashes, num_items) h = Series(h, index=obj.index, dtype="uint64", copy=False)