CLN: type annotations (pandas-dev#29333)

jbrockmendel · proost · commit 28710b3bc872 · 2019-12-20T01:22:45.000+09:00
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -1089,7 +1089,7 @@ def nsmallest(self):
         return self.compute("nsmallest")
 
     @staticmethod
-    def is_valid_dtype_n_method(dtype):
+    def is_valid_dtype_n_method(dtype) -> bool:
         """
         Helper function to determine if dtype is valid for
         nsmallest/nlargest methods
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -495,7 +495,7 @@ def _ensure_dtype_type(value, dtype):
     return dtype.type(value)
 
 
-def infer_dtype_from(val, pandas_dtype=False):
+def infer_dtype_from(val, pandas_dtype: bool = False):
     """
     interpret the dtype from a scalar or array. This is a convenience
     routines to infer dtype from a scalar or an array
@@ -512,7 +512,7 @@ def infer_dtype_from(val, pandas_dtype=False):
     return infer_dtype_from_array(val, pandas_dtype=pandas_dtype)
 
 
-def infer_dtype_from_scalar(val, pandas_dtype=False):
+def infer_dtype_from_scalar(val, pandas_dtype: bool = False):
     """
     interpret the dtype from a scalar
 
@@ -587,7 +587,7 @@ def infer_dtype_from_scalar(val, pandas_dtype=False):
     return dtype, val
 
 
-def infer_dtype_from_array(arr, pandas_dtype=False):
+def infer_dtype_from_array(arr, pandas_dtype: bool = False):
     """
     infer the dtype from a scalar or array
 
diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
@@ -1,8 +1,6 @@
 """
 Quantilization functions and related stuff
 """
-from functools import partial
-
 import numpy as np
 
 from pandas._libs import Timedelta, Timestamp
@@ -38,12 +36,12 @@
 def cut(
     x,
     bins,
-    right=True,
+    right: bool = True,
     labels=None,
-    retbins=False,
-    precision=3,
-    include_lowest=False,
-    duplicates="raise",
+    retbins: bool = False,
+    precision: int = 3,
+    include_lowest: bool = False,
+    duplicates: str = "raise",
 ):
     """
     Bin values into discrete intervals.
@@ -275,7 +273,14 @@ def cut(
     )
 
 
-def qcut(x, q, labels=None, retbins=False, precision=3, duplicates="raise"):
+def qcut(
+    x,
+    q,
+    labels=None,
+    retbins: bool = False,
+    precision: int = 3,
+    duplicates: str = "raise",
+):
     """
     Quantile-based discretization function. Discretize variable into
     equal-sized buckets based on rank or based on sample quantiles. For example
@@ -355,12 +360,12 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates="raise"):
 def _bins_to_cuts(
     x,
     bins,
-    right=True,
+    right: bool = True,
     labels=None,
-    precision=3,
-    include_lowest=False,
+    precision: int = 3,
+    include_lowest: bool = False,
     dtype=None,
-    duplicates="raise",
+    duplicates: str = "raise",
 ):
 
     if duplicates not in ["raise", "drop"]:
@@ -498,13 +503,15 @@ def _convert_bin_to_datelike_type(bins, dtype):
     return bins
 
 
-def _format_labels(bins, precision, right=True, include_lowest=False, dtype=None):
+def _format_labels(
+    bins, precision, right: bool = True, include_lowest: bool = False, dtype=None
+):
     """ based on the dtype, return our labels """
 
     closed = "right" if right else "left"
 
     if is_datetime64tz_dtype(dtype):
-        formatter = partial(Timestamp, tz=dtype.tz)
+        formatter = lambda x: Timestamp(x, tz=dtype.tz)
         adjust = lambda x: x - Timedelta("1ns")
     elif is_datetime64_dtype(dtype):
         formatter = Timestamp
@@ -556,7 +563,9 @@ def _preprocess_for_cut(x):
     return x_is_series, series_index, name, x
 
 
-def _postprocess_for_cut(fac, bins, retbins, x_is_series, series_index, name, dtype):
+def _postprocess_for_cut(
+    fac, bins, retbins: bool, x_is_series, series_index, name, dtype
+):
     """
     handles post processing for the cut method where
     we combine the index information if the originally passed
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
@@ -20,7 +20,7 @@
 _INT64_MAX = np.iinfo(np.int64).max
 
 
-def get_group_index(labels, shape, sort, xnull):
+def get_group_index(labels, shape, sort: bool, xnull: bool):
     """
     For the particular label_list, gets the offsets into the hypothetical list
     representing the totally ordered cartesian product of all possible label
@@ -48,7 +48,7 @@ def get_group_index(labels, shape, sort, xnull):
     labels are equal at all location.
     """
 
-    def _int64_cut_off(shape):
+    def _int64_cut_off(shape) -> int:
         acc = 1
         for i, mul in enumerate(shape):
             acc *= int(mul)
@@ -125,7 +125,7 @@ def get_compressed_ids(labels, sizes):
     return compress_group_index(ids, sort=True)
 
 
-def is_int64_overflow_possible(shape):
+def is_int64_overflow_possible(shape) -> bool:
     the_prod = 1
     for x in shape:
         the_prod *= int(x)
@@ -153,7 +153,7 @@ def decons_group_index(comp_labels, shape):
     return label_list[::-1]
 
 
-def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
+def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull: bool):
     """
     reconstruct labels from observed group ids
 
@@ -177,7 +177,7 @@ def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull):
     return [i8copy(lab[i]) for lab in labels]
 
 
-def indexer_from_factorized(labels, shape, compress=True):
+def indexer_from_factorized(labels, shape, compress: bool = True):
     ids = get_group_index(labels, shape, sort=True, xnull=False)
 
     if not compress:
@@ -235,7 +235,7 @@ def lexsort_indexer(keys, orders=None, na_position="last"):
     return indexer_from_factorized(labels, shape)
 
 
-def nargsort(items, kind="quicksort", ascending=True, na_position="last"):
+def nargsort(items, kind="quicksort", ascending: bool = True, na_position="last"):
     """
     This is intended to be a drop-in replacement for np.argsort which
     handles NaNs. It adds ascending and na_position parameters.
@@ -325,7 +325,7 @@ def get_indexer_dict(label_list, keys):
 # sorting levels...cleverly?
 
 
-def get_group_index_sorter(group_index, ngroups):
+def get_group_index_sorter(group_index, ngroups: int):
     """
     algos.groupsort_indexer implements `counting sort` and it is at least
     O(ngroups), where
@@ -350,7 +350,7 @@ def get_group_index_sorter(group_index, ngroups):
         return group_index.argsort(kind="mergesort")
 
 
-def compress_group_index(group_index, sort=True):
+def compress_group_index(group_index, sort: bool = True):
     """
     Group_index is offsets into cartesian product of all possible labels. This
     space can be huge, so this function compresses it, by computing offsets
@@ -391,7 +391,13 @@ def _reorder_by_uniques(uniques, labels):
     return uniques, labels
 
 
-def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False, verify=True):
+def safe_sort(
+    values,
+    labels=None,
+    na_sentinel: int = -1,
+    assume_unique: bool = False,
+    verify: bool = True,
+):
     """
     Sort ``values`` and reorder corresponding ``labels``.
     ``values`` should be unique if ``labels`` is not None.