Skip to content

TYP: fix ignores #40389

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 12, 2021
41 changes: 18 additions & 23 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class providing the base-class of operations.
)
import pandas._libs.groupby as libgroupby
from pandas._typing import (
ArrayLike,
F,
FrameOrSeries,
FrameOrSeriesUnion,
Expand All @@ -68,7 +69,6 @@ class providing the base-class of operations.
ensure_float,
is_bool_dtype,
is_datetime64_dtype,
is_extension_array_dtype,
is_integer_dtype,
is_numeric_dtype,
is_object_dtype,
Expand All @@ -85,6 +85,7 @@ class providing the base-class of operations.
from pandas.core.arrays import (
Categorical,
DatetimeArray,
ExtensionArray,
)
from pandas.core.base import (
DataError,
Expand Down Expand Up @@ -2265,37 +2266,31 @@ def quantile(self, q=0.5, interpolation: str = "linear"):
"""
from pandas import concat

def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]:
def pre_processor(vals: ArrayLike) -> Tuple[np.ndarray, Optional[np.dtype]]:
if is_object_dtype(vals):
raise TypeError(
"'quantile' cannot be performed against 'object' dtypes!"
)

inference = None
inference: Optional[np.dtype] = None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the return type is Tuple[np.ndarray, Optional[Type]] in the function signature. so that can also now be narrowed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was needed anyway for mypy fixup

if is_integer_dtype(vals.dtype):
if is_extension_array_dtype(vals.dtype):
# error: "ndarray" has no attribute "to_numpy"
vals = vals.to_numpy( # type: ignore[attr-defined]
dtype=float, na_value=np.nan
)
inference = np.int64
elif is_bool_dtype(vals.dtype) and is_extension_array_dtype(vals.dtype):
# error: "ndarray" has no attribute "to_numpy"
vals = vals.to_numpy( # type: ignore[attr-defined]
dtype=float, na_value=np.nan
)
if isinstance(vals, ExtensionArray):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the function signature has vals: np.ndarray. also needs updating

out = vals.to_numpy(dtype=float, na_value=np.nan)
else:
out = vals
inference = np.dtype(np.int64)
elif is_bool_dtype(vals.dtype) and isinstance(vals, ExtensionArray):
out = vals.to_numpy(dtype=float, na_value=np.nan)
elif is_datetime64_dtype(vals.dtype):
# error: Incompatible types in assignment (expression has type
# "str", variable has type "Optional[Type[int64]]")
inference = "datetime64[ns]" # type: ignore[assignment]
vals = np.asarray(vals).astype(float)
inference = np.dtype("datetime64[ns]")
out = np.asarray(vals).astype(float)
elif is_timedelta64_dtype(vals.dtype):
# error: Incompatible types in assignment (expression has type "str",
# variable has type "Optional[Type[signedinteger[Any]]]")
inference = "timedelta64[ns]" # type: ignore[assignment]
vals = np.asarray(vals).astype(float)
inference = np.dtype("timedelta64[ns]")
out = np.asarray(vals).astype(float)
else:
out = np.asarray(vals)

return vals, inference
return out, inference

def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
if inference:
Expand Down
23 changes: 8 additions & 15 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
is_timedelta64_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import ABCCategoricalIndex
from pandas.core.dtypes.missing import (
isna,
Expand Down Expand Up @@ -522,7 +523,7 @@ def _disallow_invalid_ops(self, values: ArrayLike, how: str):
@final
def _ea_wrap_cython_operation(
self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs
) -> Tuple[np.ndarray, Optional[List[str]]]:
) -> np.ndarray:
"""
If we have an ExtensionArray, unwrap, call _cython_operation, and
re-wrap if appropriate.
Expand All @@ -539,10 +540,7 @@ def _ea_wrap_cython_operation(
)
if how in ["rank"]:
# preserve float64 dtype

# error: Incompatible return value type (got "ndarray", expected
# "Tuple[ndarray, Optional[List[str]]]")
return res_values # type: ignore[return-value]
return res_values

res_values = res_values.astype("i8", copy=False)
result = type(orig_values)(res_values, dtype=orig_values.dtype)
Expand All @@ -555,14 +553,11 @@ def _ea_wrap_cython_operation(
kind, values, how, axis, min_count, **kwargs
)
dtype = maybe_cast_result_dtype(orig_values.dtype, how)
if is_extension_array_dtype(dtype):
# error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no
# attribute "construct_array_type"
cls = dtype.construct_array_type() # type: ignore[union-attr]
if isinstance(dtype, ExtensionDtype):
cls = dtype.construct_array_type()
return cls._from_sequence(res_values, dtype=dtype)
# error: Incompatible return value type (got "ndarray", expected
# "Tuple[ndarray, Optional[List[str]]]")
return res_values # type: ignore[return-value]

return res_values

elif is_float_dtype(values.dtype):
# FloatingArray
Expand Down Expand Up @@ -599,9 +594,7 @@ def _cython_operation(
self._disallow_invalid_ops(values, how)

if is_extension_array_dtype(values.dtype):
# error: Incompatible return value type (got "Tuple[ndarray,
# Optional[List[str]]]", expected "ndarray")
return self._ea_wrap_cython_operation( # type: ignore[return-value]
return self._ea_wrap_cython_operation(
kind, values, how, axis, min_count, **kwargs
)

Expand Down
9 changes: 8 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3876,7 +3876,14 @@ def _reindex_non_unique(self, target):
# --------------------------------------------------------------------
# Join Methods

def join(self, other, how="left", level=None, return_indexers=False, sort=False):
def join(
self,
other,
how: str_t = "left",
level=None,
return_indexers: bool = False,
sort: bool = False,
):
"""
Compute join_index and indexers to conform data
structures to the new index.
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,7 +827,12 @@ def _union(self, other, sort):
_join_precedence = 10

def join(
self, other, how: str = "left", level=None, return_indexers=False, sort=False
self,
other,
how: str = "left",
level=None,
return_indexers: bool = False,
sort: bool = False,
):
"""
See Index.join
Expand Down
19 changes: 14 additions & 5 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

import itertools
from typing import (
TYPE_CHECKING,
List,
Optional,
Union,
cast,
)

import numpy as np
Expand Down Expand Up @@ -44,6 +46,9 @@
get_group_index_sorter,
)

if TYPE_CHECKING:
from pandas.core.arrays import ExtensionArray


class _Unstacker:
"""
Expand Down Expand Up @@ -942,11 +947,11 @@ def _get_dummies_1d(
data,
prefix,
prefix_sep="_",
dummy_na=False,
sparse=False,
drop_first=False,
dummy_na: bool = False,
sparse: bool = False,
drop_first: bool = False,
dtype: Optional[Dtype] = None,
):
) -> DataFrame:
from pandas.core.reshape.concat import concat

# Series avoids inconsistent NaN handling
Expand Down Expand Up @@ -1029,6 +1034,8 @@ def get_empty_frame(data) -> DataFrame:
sparse_series.append(Series(data=sarr, index=index, name=col))

out = concat(sparse_series, axis=1, copy=False)
# TODO: overload concat with Literal for axis
out = cast(DataFrame, out)
return out

else:
Expand All @@ -1045,7 +1052,9 @@ def get_empty_frame(data) -> DataFrame:
return DataFrame(dummy_mat, index=index, columns=dummy_cols)


def _reorder_for_extension_array_stack(arr, n_rows: int, n_columns: int):
def _reorder_for_extension_array_stack(
arr: ExtensionArray, n_rows: int, n_columns: int
) -> ExtensionArray:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably should be a typevar. but also ok to leave as is for now if not needed as this is internal

"""
Re-orders the values when stacking multiple extension-arrays.

Expand Down
3 changes: 1 addition & 2 deletions pandas/core/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
_INT64_MAX = np.iinfo(np.int64).max


# error: Function "numpy.array" is not valid as a type
def get_indexer_indexer(
target: Index,
level: Union[str, int, List[str], List[int]],
Expand All @@ -52,7 +51,7 @@ def get_indexer_indexer(
na_position: str,
sort_remaining: bool,
key: IndexKeyFunc,
) -> Optional[np.array]: # type: ignore[valid-type]
) -> Optional[np.ndarray]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you found the bonus low hanger! 😄

"""
Helper method that return the indexer according to input parameters for
the sort_index method of DataFrame and Series.
Expand Down
16 changes: 5 additions & 11 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,25 +534,19 @@ def _to_datetime_with_unit(arg, unit, name, tz, errors: Optional[str]) -> Index:
# GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime
# because it expects an ndarray argument
if isinstance(arg, IntegerArray):
result = arg.astype(f"datetime64[{unit}]")
arr = arg.astype(f"datetime64[{unit}]")
tz_parsed = None
else:
result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)

if errors == "ignore":
# Index constructor _may_ infer to DatetimeIndex

# error: Incompatible types in assignment (expression has type "Index", variable
# has type "ExtensionArray")
result = Index(result, name=name) # type: ignore[assignment]
result = Index(arr, name=name)
else:
# error: Incompatible types in assignment (expression has type "DatetimeIndex",
# variable has type "ExtensionArray")
result = DatetimeIndex(result, name=name) # type: ignore[assignment]
result = DatetimeIndex(arr, name=name)

if not isinstance(result, DatetimeIndex):
# error: Incompatible return value type (got "ExtensionArray", expected "Index")
return result # type: ignore[return-value]
return result

# GH#23758: We may still need to localize the result with tz
# GH#25546: Apply tz_parsed first (from arg), then tz (from caller)
Expand Down
7 changes: 3 additions & 4 deletions pandas/core/tools/numeric.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Optional

import numpy as np

from pandas._libs import lib
Expand Down Expand Up @@ -164,13 +166,10 @@ def to_numeric(arg, errors="raise", downcast=None):

# GH33013: for IntegerArray & FloatingArray extract non-null values for casting
# save mask to reconstruct the full array after casting
mask: Optional[np.ndarray] = None
if isinstance(values, NumericArray):
mask = values._mask
values = values._data[~mask]
else:
# error: Incompatible types in assignment (expression has type "None", variable
# has type "ndarray")
mask = None # type: ignore[assignment]

values_dtype = getattr(values, "dtype", None)
if is_numeric_dtype(values_dtype):
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/tools/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def _convert_listlike(arg, unit=None, errors="raise", name=None):
arg = np.array(list(arg), dtype=object)

try:
value = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0]
td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0]
except ValueError:
if errors == "ignore":
return arg
Expand All @@ -181,7 +181,5 @@ def _convert_listlike(arg, unit=None, errors="raise", name=None):

from pandas import TimedeltaIndex

# error: Incompatible types in assignment (expression has type "TimedeltaIndex",
# variable has type "ndarray")
value = TimedeltaIndex(value, unit="ns", name=name) # type: ignore[assignment]
value = TimedeltaIndex(td64arr, unit="ns", name=name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could also return TimedeltaIndex ... without the intermediate assignment, but the td64arr naming is also an improvement anyway.

return value