From ea5f60f6ba0cb5f51347646808f8cdcc7b06e55d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 22 Oct 2019 19:01:00 -0700 Subject: [PATCH 1/7] add typing --- pandas/core/algorithms.py | 18 ++++++++++------- pandas/core/indexes/interval.py | 2 +- pandas/core/nanops.py | 36 ++++++++++++++++++--------------- pandas/io/common.py | 4 ++-- pandas/io/excel/_odfreader.py | 5 +++-- 5 files changed, 37 insertions(+), 28 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 2c9f632e8bc24..7760c8ec397a9 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -692,7 +692,12 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): def value_counts( - values, sort=True, ascending=False, normalize=False, bins=None, dropna=True + values, + sort: bool = True, + ascending: bool = False, + normalize: bool = False, + bins=None, + dropna: bool = True, ): """ Compute a histogram of the counts of non-null values. @@ -700,22 +705,21 @@ def value_counts( Parameters ---------- values : ndarray (1-d) - sort : boolean, default True + sort : bool, default True Sort by values - ascending : boolean, default False + ascending : bool, default False Sort in ascending order - normalize: boolean, default False + normalize: bool, default False If True then compute a relative histogram bins : integer, optional Rather than count values, group them into half-open bins, convenience for pd.cut, only works with numeric data - dropna : boolean, default True + dropna : bool, default True Don't include counts of NaN Returns ------- - value_counts : Series - + Series """ from pandas.core.series import Series, Index diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index a2d48b5100a2e..3b6ac25e7c6b4 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1340,7 +1340,7 @@ def _intersection_non_unique(self, other: "IntervalIndex") -> "IntervalIndex": return self[mask] - def _setop(op_name, sort=None): + def _setop(op_name: str, sort=None): @SetopCheck(op_name=op_name) def func(self, other, sort=sort): result = getattr(self._multiindex, op_name)(other._multiindex, sort=sort) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 09b80d1b3a9ac..50b7e97608908 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -7,7 +7,7 @@ from pandas._config import get_option -from pandas._libs import iNaT, lib, tslibs +from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask @@ -53,7 +53,7 @@ def __init__(self, *dtypes): super().__init__() self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes) - def check(self, obj): + def check(self, obj) -> bool: return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes) def __call__(self, f): @@ -128,7 +128,7 @@ def f(values, axis=None, skipna=True, **kwds): return f -def _bn_ok_dtype(dt, name): +def _bn_ok_dtype(dt, name: str) -> bool: # Bottleneck chokes on datetime64 if not is_object_dtype(dt) and not ( is_datetime_or_timedelta_dtype(dt) or is_datetime64tz_dtype(dt) @@ -149,7 +149,7 @@ def _bn_ok_dtype(dt, name): return False -def _has_infs(result): +def _has_infs(result) -> bool: if isinstance(result, np.ndarray): if result.dtype == "f8": return lib.has_infs_f8(result.ravel()) @@ -176,19 +176,22 @@ def _get_fill_value(dtype, fill_value=None, fill_value_typ=None): return -np.inf else: if fill_value_typ is None: - return tslibs.iNaT + return iNaT else: if fill_value_typ == "+inf": # need the max int here return _int64_max else: - return tslibs.iNaT + return iNaT def _maybe_get_mask( values: np.ndarray, skipna: bool, mask: Optional[np.ndarray] ) -> Optional[np.ndarray]: - """ This function will compute a mask iff it is necessary. Otherwise, + """ + Compute a mask iff necessary. + + This function will compute a mask iff it is necessary. Otherwise, return the provided mask (potentially None) when a mask does not need to be computed. @@ -214,7 +217,6 @@ def _maybe_get_mask( Returns ------- Optional[np.ndarray] - """ if mask is None: @@ -346,7 +348,7 @@ def _wrap_results(result, dtype, fill_value=None): assert not isna(fill_value), "Expected non-null fill_value" if result == fill_value: result = np.nan - result = tslibs.Timestamp(result, tz=tz) + result = Timestamp(result, tz=tz) else: result = result.view(dtype) elif is_timedelta64_dtype(dtype): @@ -358,21 +360,22 @@ def _wrap_results(result, dtype, fill_value=None): if np.fabs(result) > _int64_max: raise ValueError("overflow in timedelta operation") - result = tslibs.Timedelta(result, unit="ns") + result = Timedelta(result, unit="ns") else: result = result.astype("m8[ns]").view(dtype) return result -def _na_for_min_count(values, axis): - """Return the missing value for `values` +def _na_for_min_count(values, axis: Optional[int]): + """ + Return the missing value for `values`. Parameters ---------- values : ndarray axis : int or None - axis for the reduction + axis for the reduction, required if values.ndim > 1. Returns ------- @@ -388,13 +391,14 @@ def _na_for_min_count(values, axis): if values.ndim == 1: return fill_value else: + assert axis is not None # assertion to make mypy happy result_shape = values.shape[:axis] + values.shape[axis + 1 :] result = np.empty(result_shape, dtype=values.dtype) result.fill(fill_value) return result -def nanany(values, axis=None, skipna=True, mask=None): +def nanany(values, axis=None, skipna: bool = True, mask=None): """ Check if any elements along an axis evaluate to True. @@ -426,7 +430,7 @@ def nanany(values, axis=None, skipna=True, mask=None): return values.any(axis) -def nanall(values, axis=None, skipna=True, mask=None): +def nanall(values, axis=None, skipna: bool = True, mask=None): """ Check if all elements along an axis evaluate to True. @@ -1195,7 +1199,7 @@ def _maybe_null_out( else: # GH12941, use None to auto cast null result[null_mask] = None - elif result is not tslibs.NaT: + elif result is not NaT: if mask is not None: null_mask = mask.size - mask.sum() else: diff --git a/pandas/io/common.py b/pandas/io/common.py index 0b8594bbbd3e4..0bef14e4999c7 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -569,11 +569,11 @@ def __iter__(self) -> "MMapWrapper": return self def __next__(self) -> str: - newline = self.mmap.readline() + newbytes = self.mmap.readline() # readline returns bytes, not str, but Python's CSV reader # expects str, so convert the output to str before continuing - newline = newline.decode("utf-8") + newline = newbytes.decode("utf-8") # mmap doesn't raise if reading past the allocated # data but instead returns an empty string, so raise diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 3be36663bac79..dd6a8c38e1a55 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -9,7 +9,8 @@ class _ODFReader(_BaseExcelReader): - """Read tables out of OpenDocument formatted files + """ + Read tables out of OpenDocument formatted files. Parameters ---------- @@ -60,7 +61,7 @@ def get_sheet_by_name(self, name: str): if table.getAttribute("name") == name: return table - raise ValueError("sheet {name} not found".format(name)) + raise ValueError("sheet {name} not found".format(name=name)) def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: """Parse an ODF Table into a list of lists From a05642d01818543b27dc5763a1d1322aef424b1c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 22 Oct 2019 20:59:17 -0700 Subject: [PATCH 2/7] add types in libindex --- pandas/_libs/index.pyx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 144d555258c50..666fdb3dfeee3 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -170,17 +170,17 @@ cdef class IndexEngine: raise KeyError(val) - def sizeof(self, deep=False): + def sizeof(self, deep: bool = False) -> int: """ return the sizeof our mapping """ if not self.is_mapping_populated: return 0 return self.mapping.sizeof(deep=deep) - def __sizeof__(self): + def __sizeof__(self) -> int: return self.sizeof() @property - def is_unique(self): + def is_unique(self) -> bool: if self.need_unique_check: self._do_unique_check() @@ -192,14 +192,14 @@ cdef class IndexEngine: self._ensure_mapping_populated() @property - def is_monotonic_increasing(self): + def is_monotonic_increasing(self) -> bool: if self.need_monotonic_check: self._do_monotonic_check() return self.monotonic_inc == 1 @property - def is_monotonic_decreasing(self): + def is_monotonic_decreasing(self) -> bool: if self.need_monotonic_check: self._do_monotonic_check() @@ -242,7 +242,7 @@ cdef class IndexEngine: hash(val) @property - def is_mapping_populated(self): + def is_mapping_populated(self) -> bool: return self.mapping is not None cdef inline _ensure_mapping_populated(self): From 651f4b30abeaa6e780f2367562b529b69377a0fd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 23 Oct 2019 07:40:28 -0700 Subject: [PATCH 3/7] expand iff --- pandas/core/nanops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 50b7e97608908..5dd4cc946572c 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -189,7 +189,7 @@ def _maybe_get_mask( values: np.ndarray, skipna: bool, mask: Optional[np.ndarray] ) -> Optional[np.ndarray]: """ - Compute a mask iff necessary. + Compute a mask if and only if necessary. This function will compute a mask iff it is necessary. Otherwise, return the provided mask (potentially None) when a mask does not need to be From e31cea4b6c3b7046ab40e85692d27bb059717074 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 23 Oct 2019 09:10:23 -0700 Subject: [PATCH 4/7] annotate value_counts return type --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 7760c8ec397a9..7f08186b766ab 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -698,7 +698,7 @@ def value_counts( normalize: bool = False, bins=None, dropna: bool = True, -): +) -> "Series": """ Compute a histogram of the counts of non-null values. From 93fa8a04c33c33554d26dff3609810e8ad125437 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 23 Oct 2019 10:15:38 -0700 Subject: [PATCH 5/7] revert annotation that failed mypy --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 7f08186b766ab..7760c8ec397a9 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -698,7 +698,7 @@ def value_counts( normalize: bool = False, bins=None, dropna: bool = True, -) -> "Series": +): """ Compute a histogram of the counts of non-null values. From 208c7aac9e52781afcbe7983fe1935308f3fb0ef Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 23 Oct 2019 18:03:34 -0700 Subject: [PATCH 6/7] revert --- pandas/io/excel/_odfreader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index dd6a8c38e1a55..66a186161e01b 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -61,7 +61,7 @@ def get_sheet_by_name(self, name: str): if table.getAttribute("name") == name: return table - raise ValueError("sheet {name} not found".format(name=name)) + raise ValueError("sheet {name} not found".format(name)) def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: """Parse an ODF Table into a list of lists From 633c583a4d14fb56e60a542664ded82cdb0504a4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 24 Oct 2019 19:03:43 -0700 Subject: [PATCH 7/7] dummy commit to force CI