-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
CLN: Add types in a handful of places #29178
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
ea5f60f
a2b536f
a05642d
9ce83db
651f4b3
decd5e2
e31cea4
93fa8a0
11d3b1d
208c7aa
7cb7ceb
c6bf30c
633c583
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -692,30 +692,34 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): | |||||
|
||||||
|
||||||
def value_counts( | ||||||
values, sort=True, ascending=False, normalize=False, bins=None, dropna=True | ||||||
values, | ||||||
sort: bool = True, | ||||||
ascending: bool = False, | ||||||
normalize: bool = False, | ||||||
bins=None, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
(unless docstring is wrong) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. docstring says int. should it be bool? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No this is a typo on my end There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Meant Optional[int] though? Can do in a follow up |
||||||
dropna: bool = True, | ||||||
): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can annotate return as "Series". Not sure it will type check but still helpful I think |
||||||
""" | ||||||
Compute a histogram of the counts of non-null values. | ||||||
|
||||||
Parameters | ||||||
---------- | ||||||
values : ndarray (1-d) | ||||||
sort : boolean, default True | ||||||
sort : bool, default True | ||||||
Sort by values | ||||||
ascending : boolean, default False | ||||||
ascending : bool, default False | ||||||
Sort in ascending order | ||||||
normalize: boolean, default False | ||||||
normalize: bool, default False | ||||||
If True then compute a relative histogram | ||||||
bins : integer, optional | ||||||
Rather than count values, group them into half-open bins, | ||||||
convenience for pd.cut, only works with numeric data | ||||||
dropna : boolean, default True | ||||||
dropna : bool, default True | ||||||
Don't include counts of NaN | ||||||
|
||||||
Returns | ||||||
------- | ||||||
value_counts : Series | ||||||
|
||||||
Series | ||||||
""" | ||||||
from pandas.core.series import Series, Index | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1340,7 +1340,7 @@ def _intersection_non_unique(self, other: "IntervalIndex") -> "IntervalIndex": | |
|
||
return self[mask] | ||
|
||
def _setop(op_name, sort=None): | ||
def _setop(op_name: str, sort=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was going to suggest annotating Can annotate the return here at least as |
||
@SetopCheck(op_name=op_name) | ||
def func(self, other, sort=sort): | ||
result = getattr(self._multiindex, op_name)(other._multiindex, sort=sort) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,7 @@ | |
|
||
from pandas._config import get_option | ||
|
||
from pandas._libs import iNaT, lib, tslibs | ||
from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib | ||
from pandas.compat._optional import import_optional_dependency | ||
|
||
from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask | ||
|
@@ -53,7 +53,7 @@ def __init__(self, *dtypes): | |
super().__init__() | ||
self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes) | ||
|
||
def check(self, obj): | ||
def check(self, obj) -> bool: | ||
return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes) | ||
|
||
def __call__(self, f): | ||
|
@@ -128,7 +128,7 @@ def f(values, axis=None, skipna=True, **kwds): | |
return f | ||
|
||
|
||
def _bn_ok_dtype(dt, name): | ||
def _bn_ok_dtype(dt, name: str) -> bool: | ||
# Bottleneck chokes on datetime64 | ||
if not is_object_dtype(dt) and not ( | ||
is_datetime_or_timedelta_dtype(dt) or is_datetime64tz_dtype(dt) | ||
|
@@ -149,7 +149,7 @@ def _bn_ok_dtype(dt, name): | |
return False | ||
|
||
|
||
def _has_infs(result): | ||
def _has_infs(result) -> bool: | ||
if isinstance(result, np.ndarray): | ||
if result.dtype == "f8": | ||
return lib.has_infs_f8(result.ravel()) | ||
|
@@ -176,19 +176,22 @@ def _get_fill_value(dtype, fill_value=None, fill_value_typ=None): | |
return -np.inf | ||
else: | ||
if fill_value_typ is None: | ||
return tslibs.iNaT | ||
return iNaT | ||
else: | ||
if fill_value_typ == "+inf": | ||
# need the max int here | ||
return _int64_max | ||
else: | ||
return tslibs.iNaT | ||
return iNaT | ||
|
||
|
||
def _maybe_get_mask( | ||
values: np.ndarray, skipna: bool, mask: Optional[np.ndarray] | ||
) -> Optional[np.ndarray]: | ||
""" This function will compute a mask iff it is necessary. Otherwise, | ||
""" | ||
Compute a mask iff necessary. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. iff -> if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. iff --> "if and only if" |
||
|
||
This function will compute a mask iff it is necessary. Otherwise, | ||
return the provided mask (potentially None) when a mask does not need to be | ||
computed. | ||
|
||
|
@@ -214,7 +217,6 @@ def _maybe_get_mask( | |
Returns | ||
------- | ||
Optional[np.ndarray] | ||
|
||
""" | ||
|
||
if mask is None: | ||
|
@@ -346,7 +348,7 @@ def _wrap_results(result, dtype, fill_value=None): | |
assert not isna(fill_value), "Expected non-null fill_value" | ||
if result == fill_value: | ||
result = np.nan | ||
result = tslibs.Timestamp(result, tz=tz) | ||
result = Timestamp(result, tz=tz) | ||
else: | ||
result = result.view(dtype) | ||
elif is_timedelta64_dtype(dtype): | ||
|
@@ -358,21 +360,22 @@ def _wrap_results(result, dtype, fill_value=None): | |
if np.fabs(result) > _int64_max: | ||
raise ValueError("overflow in timedelta operation") | ||
|
||
result = tslibs.Timedelta(result, unit="ns") | ||
result = Timedelta(result, unit="ns") | ||
else: | ||
result = result.astype("m8[ns]").view(dtype) | ||
|
||
return result | ||
|
||
|
||
def _na_for_min_count(values, axis): | ||
"""Return the missing value for `values` | ||
def _na_for_min_count(values, axis: Optional[int]): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be = None then There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we're requiring the caller to pass something though |
||
""" | ||
Return the missing value for `values`. | ||
|
||
Parameters | ||
---------- | ||
values : ndarray | ||
axis : int or None | ||
axis for the reduction | ||
axis for the reduction, required if values.ndim > 1. | ||
|
||
Returns | ||
------- | ||
|
@@ -388,13 +391,14 @@ def _na_for_min_count(values, axis): | |
if values.ndim == 1: | ||
return fill_value | ||
else: | ||
assert axis is not None # assertion to make mypy happy | ||
result_shape = values.shape[:axis] + values.shape[axis + 1 :] | ||
result = np.empty(result_shape, dtype=values.dtype) | ||
result.fill(fill_value) | ||
return result | ||
|
||
|
||
def nanany(values, axis=None, skipna=True, mask=None): | ||
def nanany(values, axis=None, skipna: bool = True, mask=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if applicable here but you could use the Annotated |
||
""" | ||
Check if any elements along an axis evaluate to True. | ||
|
||
|
@@ -426,7 +430,7 @@ def nanany(values, axis=None, skipna=True, mask=None): | |
return values.any(axis) | ||
|
||
|
||
def nanall(values, axis=None, skipna=True, mask=None): | ||
def nanall(values, axis=None, skipna: bool = True, mask=None): | ||
""" | ||
Check if all elements along an axis evaluate to True. | ||
|
||
|
@@ -1195,7 +1199,7 @@ def _maybe_null_out( | |
else: | ||
# GH12941, use None to auto cast null | ||
result[null_mask] = None | ||
elif result is not tslibs.NaT: | ||
elif result is not NaT: | ||
if mask is not None: | ||
null_mask = mask.size - mask.sum() | ||
else: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,8 @@ | |
|
||
|
||
class _ODFReader(_BaseExcelReader): | ||
"""Read tables out of OpenDocument formatted files | ||
""" | ||
Read tables out of OpenDocument formatted files. | ||
|
||
Parameters | ||
---------- | ||
|
@@ -60,7 +61,7 @@ def get_sheet_by_name(self, name: str): | |
if table.getAttribute("name") == name: | ||
return table | ||
|
||
raise ValueError("sheet {name} not found".format(name)) | ||
raise ValueError("sheet {name} not found".format(name=name)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. closes #27676 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is being address in #27677 with more detail (i.e. tests) so would rather revert here and let that PR take care of it, especially since it is their first contribution |
||
|
||
def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: | ||
"""Parse an ODF Table into a list of lists | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can type this as ndarray I think
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the
name = getattr(values, "name", None)
suggests that it could be a Series or IndexThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you think
AnyArrayLike
from pandas._typing is applicable here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
im hoping to refactor such that this function (as many as possible in this module, really) doesn't handle Series or Index so prefer not to make this official
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good