Skip to content

CLN: Assorted typings #28604

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,15 +176,14 @@ def _reconstruct_data(values, dtype, original):
-------
Index for extension types, otherwise ndarray casted to dtype
"""
from pandas import Index

if is_extension_array_dtype(dtype):
values = dtype.construct_array_type()._from_sequence(values)
elif is_bool_dtype(dtype):
values = values.astype(dtype)

# we only support object dtypes bool Index
if isinstance(original, Index):
if isinstance(original, ABCIndexClass):
values = values.astype(object)
elif dtype is not None:
values = values.astype(dtype)
Expand Down Expand Up @@ -833,7 +832,7 @@ def duplicated(values, keep="first"):
return f(values, keep=keep)


def mode(values, dropna=True):
def mode(values, dropna: bool = True):
"""
Returns the mode(s) of an array.

Expand Down Expand Up @@ -1888,7 +1887,7 @@ def searchsorted(arr, value, side="left", sorter=None):
}


def diff(arr, n, axis=0):
def diff(arr, n: int, axis: int = 0):
"""
difference of n between self,
analogous to s-s.shift(n)
Expand All @@ -1904,7 +1903,6 @@ def diff(arr, n, axis=0):
Returns
-------
shifted

"""

n = int(n)
Expand Down Expand Up @@ -1935,13 +1933,15 @@ def diff(arr, n, axis=0):
f = _diff_special[arr.dtype.name]
f(arr, out_arr, n, axis)
else:
res_indexer = [slice(None)] * arr.ndim
res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n)
res_indexer = tuple(res_indexer)

lag_indexer = [slice(None)] * arr.ndim
lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None)
lag_indexer = tuple(lag_indexer)
# To keep mypy happy, _res_indexer is a list while res_indexer is
# a tuple, ditto for lag_indexer.
_res_indexer = [slice(None)] * arr.ndim
_res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n)
res_indexer = tuple(_res_indexer)

_lag_indexer = [slice(None)] * arr.ndim
_lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None)
lag_indexer = tuple(_lag_indexer)

# need to make sure that we account for na for datelike/timedelta
# we don't actually want to subtract these i8 numbers
Expand Down
21 changes: 14 additions & 7 deletions pandas/core/util/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
_default_hash_key = "0123456789123456"


def _combine_hash_arrays(arrays, num_items):
def _combine_hash_arrays(arrays, num_items: int):
"""
Parameters
----------
Expand Down Expand Up @@ -55,7 +55,11 @@ def _combine_hash_arrays(arrays, num_items):


def hash_pandas_object(
obj, index=True, encoding="utf8", hash_key=None, categorize=True
obj,
index: bool = True,
encoding: str = "utf8",
hash_key=None,
categorize: bool = True,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adding type hints here is causing pandas/core/util/hashing.py:132: error: Incompatible types in assignment (expression has type "chain[Any]", variable has type "Generator[Any, None, None]") further down as the body is now checked.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it looks like the conda environment now has mypy 0.720. result!

fix is here 304351e is you want to include in the PR. (can't use py3.6 variable annotations though.)

):
"""
Return a data hash of the Index/Series/DataFrame.
Expand Down Expand Up @@ -125,7 +129,10 @@ def hash_pandas_object(
for _ in [None]
)
num_items += 1
hashes = itertools.chain(hashes, index_hash_generator)

# keep `hashes` specifically a generator to keep mypy happy
_hashes = itertools.chain(hashes, index_hash_generator)
hashes = (x for x in _hashes)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can also do a cast here (and in the change above), not sure what is more idiomatic & what we have settled on

cc @simonjayhawkins

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can also do a cast here

Our type hints policy is "..., the use of cast is strongly discouraged. Where applicable a refactor of the code to appease static analysis is preferable". https://dev.pandas.io/docs/development/contributing.html#style-guidelines

personally I'm not a fan of changing the runtime behavior of code just to appease mypy if not absolutely necessary unless it makes for cleaner code. (This applies to both our policy and the changes in this PR)

The problem in this function is because Mypy considers the initial assignment as the definition of a variable. https://mypy.readthedocs.io/en/stable/type_inference_and_annotations.html#type-inference which occurs on L114.
mypy infers the type as Generator[Any, None, None].

it is possible to override the inferred type of a variable by using a variable type annotation https://mypy.readthedocs.io/en/stable/type_inference_and_annotations.html#explicit-types-for-variables

This is the approach I took to silence this mypy error in 304351e. I prefer this as it has no effect on the runtime behavior.

with regard to the use of a leading underscore to avoid Incompatible types in assignment.. errors, my preference would be to use a trailing underscore. In PEP8, a trailing underscore is used by convention to avoid conflicts with Python keywords. So I see more similarity here to avoid conflicts than the leading underscore idiom. Not sure what's best here, but just my POV if we want to choose a pattern to be consistent moving forward.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the main actionable suggestion here is to change _hashes to hashes_; is that right @simonjayhawkins ? I'm fine with that.

h = _combine_hash_arrays(hashes, num_items)

h = Series(h, index=obj.index, dtype="uint64", copy=False)
Expand Down Expand Up @@ -179,7 +186,7 @@ def hash_tuples(vals, encoding="utf8", hash_key=None):
return h


def hash_tuple(val, encoding="utf8", hash_key=None):
def hash_tuple(val, encoding: str = "utf8", hash_key=None):
"""
Hash a single tuple efficiently

Expand All @@ -201,7 +208,7 @@ def hash_tuple(val, encoding="utf8", hash_key=None):
return h


def _hash_categorical(c, encoding, hash_key):
def _hash_categorical(c, encoding: str, hash_key: str):
"""
Hash a Categorical by hashing its categories, and then mapping the codes
to the hashes
Expand Down Expand Up @@ -239,7 +246,7 @@ def _hash_categorical(c, encoding, hash_key):
return result


def hash_array(vals, encoding="utf8", hash_key=None, categorize=True):
def hash_array(vals, encoding: str = "utf8", hash_key=None, categorize: bool = True):
"""
Given a 1d array, return an array of deterministic integers.

Expand Down Expand Up @@ -317,7 +324,7 @@ def hash_array(vals, encoding="utf8", hash_key=None, categorize=True):
return vals


def _hash_scalar(val, encoding="utf8", hash_key=None):
def _hash_scalar(val, encoding: str = "utf8", hash_key=None):
"""
Hash scalar value

Expand Down