Skip to content

TYP: fix ignores #40452

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Mar 17, 2021
2 changes: 1 addition & 1 deletion pandas/_libs/writers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def write_csv_rows(
data_index : ndarray
nlevels : int
cols : ndarray
writer : object
writer : _csv.writer
"""
# In crude testing, N>100 yields little marginal improvement
cdef:
Expand Down
15 changes: 4 additions & 11 deletions pandas/core/array_algos/quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,17 +142,10 @@ def quantile_ea_compat(
mask = np.asarray(values.isna())
mask = np.atleast_2d(mask)

# error: Incompatible types in assignment (expression has type "ndarray", variable
# has type "ExtensionArray")
values, fill_value = values._values_for_factorize() # type: ignore[assignment]
# error: No overload variant of "atleast_2d" matches argument type "ExtensionArray"
values = np.atleast_2d(values) # type: ignore[call-overload]

# error: Argument 1 to "quantile_with_mask" has incompatible type "ExtensionArray";
# expected "ndarray"
result = quantile_with_mask(
values, mask, fill_value, qs, interpolation, axis # type: ignore[arg-type]
)
arr, fill_value = values._values_for_factorize()
arr = np.atleast_2d(arr)

result = quantile_with_mask(arr, mask, fill_value, qs, interpolation, axis)

if not is_sparse(orig.dtype):
# shape[0] should be 1 as long as EAs are 1D
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/array_algos/replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,6 @@ def re_replacer(s):
f = np.vectorize(re_replacer, otypes=[values.dtype])

if mask is None:
# error: Invalid index type "slice" for "ExtensionArray"; expected type
# "Union[int, ndarray]"
values[:] = f(values) # type: ignore[index]
values[:] = f(values)
else:
values[mask] = f(values[mask])
3 changes: 0 additions & 3 deletions pandas/core/array_algos/take.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,6 @@ def take_1d(

Note: similarly to `take_nd`, this function assumes that the indexer is
a valid(ated) indexer with no out of bound indices.

TODO(ArrayManager): mainly useful for ArrayManager, otherwise can potentially
be removed again if we don't end up with ArrayManager.
"""
if not isinstance(arr, np.ndarray):
# ExtensionArray -> dispatch to their method
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def __getitem__(
"""
raise AbstractMethodError(self)

def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
def __setitem__(self, key: Union[int, slice, np.ndarray], value: Any) -> None:
"""
Set one or more values inplace.

Expand Down
19 changes: 19 additions & 0 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
tzinfo,
)
from typing import (
TYPE_CHECKING,
Optional,
Union,
cast,
overload,
)
import warnings

Expand Down Expand Up @@ -79,6 +81,9 @@
Tick,
)

if TYPE_CHECKING:
from typing import Literal

_midnight = time(0, 0)


Expand Down Expand Up @@ -1909,6 +1914,20 @@ def std(
# Constructor Helpers


@overload
def sequence_to_datetimes(
data, allow_object: Literal[False] = ..., require_iso8601: bool = ...
) -> DatetimeArray:
...


@overload
def sequence_to_datetimes(
data, allow_object: Literal[True] = ..., require_iso8601: bool = ...
) -> Union[np.ndarray, DatetimeArray]:
...


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with these Literal[True]/Literal[False] may also need to add an extra bool case. ie. duplicate the function signature as an additional overload (follow-on ok as may not be needed if this is internal) see #40200 and #40197

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

may not be needed if this is internal

yah this shouldnt be user-facing

def sequence_to_datetimes(
data, allow_object: bool = False, require_iso8601: bool = False
) -> Union[np.ndarray, DatetimeArray]:
Expand Down
14 changes: 8 additions & 6 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Tuple,
Type,
Union,
cast,
)

import numpy as np
Expand Down Expand Up @@ -485,7 +486,7 @@ def _cmp_method(self, other, op):
# TODO(ARROW-9429): Add a .to_numpy() to ChunkedArray
return BooleanArray._from_sequence(result.to_pandas().values)

def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
def __setitem__(self, key: Union[int, slice, np.ndarray], value: Any) -> None:
"""Set one or more values inplace.

Parameters
Expand All @@ -509,6 +510,8 @@ def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
key = check_array_indexer(self, key)

if is_integer(key):
key = cast(int, key)

if not is_scalar(value):
raise ValueError("Must pass scalars with scalar indexer")
elif isna(value):
Expand All @@ -518,8 +521,7 @@ def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:

# Slice data and insert in-between
new_data = [
# error: Slice index must be an integer or None
*self._data[0:key].chunks, # type: ignore[misc]
*self._data[0:key].chunks,
pa.array([value], type=pa.string()),
*self._data[(key + 1) :].chunks,
]
Expand All @@ -530,11 +532,11 @@ def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
# This is probably extremely slow.

# Convert all possible input key types to an array of integers
if is_bool_dtype(key):
if isinstance(key, slice):
key_array = np.array(range(len(self))[key])
elif is_bool_dtype(key):
# TODO(ARROW-9430): Directly support setitem(booleans)
key_array = np.argwhere(key).flatten()
elif isinstance(key, slice):
key_array = np.array(range(len(self))[key])
else:
# TODO(ARROW-9431): Directly support setitem(integers)
key_array = np.asanyarray(key)
Expand Down
108 changes: 34 additions & 74 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
)
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
from pandas._typing import (
AnyArrayLike,
ArrayLike,
Dtype,
DtypeObj,
Expand Down Expand Up @@ -407,27 +406,16 @@ def maybe_cast_result(

assert not is_scalar(result)

if (
is_extension_array_dtype(dtype)
and not is_categorical_dtype(dtype)
and dtype.kind != "M"
):
# We have to special case categorical so as not to upcast
# things like counts back to categorical

# error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no
# attribute "construct_array_type"
cls = dtype.construct_array_type() # type: ignore[union-attr]
# error: Argument "dtype" to "maybe_cast_to_extension_array" has incompatible
# type "Union[dtype[Any], ExtensionDtype]"; expected "Optional[ExtensionDtype]"
result = maybe_cast_to_extension_array(
cls, result, dtype=dtype # type: ignore[arg-type]
)
if isinstance(dtype, ExtensionDtype):
if not is_categorical_dtype(dtype) and dtype.kind != "M":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic has changed here slightly. is there a latent bug here that needs a test.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looking some more, maybe the type annotations for maybe_downcast_to_dtype are incorrect?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since this is only called from one place and that is with numeric_only=True (ought to remove that kwarg) and the two cases excluded here dont have is_numeric_dtype(dtype), the behavior will end up being the same before and after

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looking some more, maybe the type annotations for maybe_downcast_to_dtype are incorrect?

it does look sketchy, yah

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and the two cases excluded here dont have is_numeric_dtype(dtype), the behavior will end up being the same before and after

so it does.

# We have to special case categorical so as not to upcast
# things like counts back to categorical

elif numeric_only and is_numeric_dtype(dtype) or not numeric_only:
# error: Argument 2 to "maybe_downcast_to_dtype" has incompatible type
# "Union[dtype[Any], ExtensionDtype]"; expected "Union[str, dtype[Any]]"
result = maybe_downcast_to_dtype(result, dtype) # type: ignore[arg-type]
cls = dtype.construct_array_type()
result = maybe_cast_to_extension_array(cls, result, dtype=dtype)

elif (numeric_only and is_numeric_dtype(dtype)) or not numeric_only:
result = maybe_downcast_to_dtype(result, dtype)

return result

Expand Down Expand Up @@ -549,17 +537,23 @@ def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray) -> np.ndarray:
new_dtype = ensure_dtype_can_hold_na(result.dtype)

if new_dtype != result.dtype:
# error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible
# type "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any],
# None, type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any,
# Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
result = result.astype(new_dtype, copy=True) # type: ignore[arg-type]
result = result.astype(new_dtype, copy=True)

np.place(result, mask, np.nan)

return result


@overload
def ensure_dtype_can_hold_na(dtype: np.dtype) -> np.dtype:
...


@overload
def ensure_dtype_can_hold_na(dtype: ExtensionDtype) -> ExtensionDtype:
...


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could perhaps create a _DtypeObjT typvar (local to the module if we don't need elsewhere) instead of an overload here since ensure_dtype_can_hold_na is type-preserving

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i considered that and ended up not pulling the trigger, would be OK with it if you feel strongly about it

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you feel strongly about it

I wouldn't block on this, so i'll leave it to you.

I won't be able to fully review this PR till tomorrow, so the comments so far are just some bits that stood out.

def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj:
"""
If we have a dtype that cannot hold NA values, find the best match that can.
Expand Down Expand Up @@ -636,9 +630,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan):

kinds = ["i", "u", "f", "c", "m", "M"]
if is_valid_na_for_dtype(fill_value, dtype) and dtype.kind in kinds:
# error: Incompatible types in assignment (expression has type
# "Union[dtype[Any], ExtensionDtype]", variable has type "dtype[Any]")
dtype = ensure_dtype_can_hold_na(dtype) # type: ignore[assignment]
dtype = ensure_dtype_can_hold_na(dtype)
fv = na_value_for_dtype(dtype)
return dtype, fv

Expand Down Expand Up @@ -1471,7 +1463,7 @@ def soft_convert_objects(


def convert_dtypes(
input_array: AnyArrayLike,
input_array: ArrayLike,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now inconsistent with the docstring

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

convert_string: bool = True,
convert_integer: bool = True,
convert_boolean: bool = True,
Expand All @@ -1483,7 +1475,7 @@ def convert_dtypes(

Parameters
----------
input_array : ExtensionArray, Index, Series or np.ndarray
input_array : ExtensionArray or np.ndarray
convert_string : bool, default True
Whether object dtypes should be converted to ``StringDtype()``.
convert_integer : bool, default True
Expand Down Expand Up @@ -1707,15 +1699,10 @@ def maybe_cast_to_datetime(
# GH 25843: Remove tz information since the dtype
# didn't specify one

# error: Item "ndarray" of "Union[ndarray, DatetimeArray]"
# has no attribute "tz"
if dta.tz is not None: # type: ignore[union-attr]
if dta.tz is not None:
# equiv: dta.view(dtype)
# Note: NOT equivalent to dta.astype(dtype)

# error: Item "ndarray" of "Union[ndarray,
# DatetimeArray]" has no attribute "tz_localize"
dta = dta.tz_localize(None) # type: ignore[union-attr]
dta = dta.tz_localize(None)
value = dta
elif is_datetime64tz:
dtype = cast(DatetimeTZDtype, dtype)
Expand All @@ -1725,38 +1712,19 @@ def maybe_cast_to_datetime(
# be localized to the timezone.
is_dt_string = is_string_dtype(value.dtype)
dta = sequence_to_datetimes(value, allow_object=False)
# error: Item "ndarray" of "Union[ndarray, DatetimeArray]"
# has no attribute "tz"
if dta.tz is not None: # type: ignore[union-attr]
# error: Argument 1 to "astype" of
# "_ArrayOrScalarCommon" has incompatible type
# "Union[dtype[Any], ExtensionDtype, None]"; expected
# "Union[dtype[Any], None, type, _SupportsDType, str,
# Union[Tuple[Any, int], Tuple[Any, Union[int,
# Sequence[int]]], List[Any], _DTypeDict, Tuple[Any,
# Any]]]"
value = dta.astype(
dtype, copy=False # type: ignore[arg-type]
)
if dta.tz is not None:
value = dta.astype(dtype, copy=False)
elif is_dt_string:
# Strings here are naive, so directly localize
# equiv: dta.astype(dtype) # though deprecated

# error: Item "ndarray" of "Union[ndarray,
# DatetimeArray]" has no attribute "tz_localize"
value = dta.tz_localize( # type: ignore[union-attr]
dtype.tz
)
value = dta.tz_localize(dtype.tz)
else:
# Numeric values are UTC at this point,
# so localize and convert
# equiv: Series(dta).astype(dtype) # though deprecated

# error: Item "ndarray" of "Union[ndarray,
# DatetimeArray]" has no attribute "tz_localize"
value = dta.tz_localize( # type: ignore[union-attr]
"UTC"
).tz_convert(dtype.tz)
value = dta.tz_localize("UTC").tz_convert(dtype.tz)
elif is_timedelta64:
# if successful, we get a ndarray[td64ns]
value, _ = sequence_to_td64ns(value)
Expand Down Expand Up @@ -1789,14 +1757,12 @@ def maybe_cast_to_datetime(
elif value.dtype == object:
value = maybe_infer_to_datetimelike(value)

elif not isinstance(value, ABCExtensionArray):
elif isinstance(value, list):
# only do this if we have an array and the dtype of the array is not
# setup already we are not an integer/object, so don't bother with this
# conversion

# error: Argument 1 to "maybe_infer_to_datetimelike" has incompatible type
# "Union[ExtensionArray, List[Any]]"; expected "Union[ndarray, List[Any]]"
value = maybe_infer_to_datetimelike(value) # type: ignore[arg-type]
value = maybe_infer_to_datetimelike(value)

return value

Expand Down Expand Up @@ -1974,10 +1940,8 @@ def construct_1d_arraylike_from_scalar(
except OutOfBoundsDatetime:
dtype = np.dtype(object)

if is_extension_array_dtype(dtype):
# error: Item "dtype" of "Union[dtype, ExtensionDtype]" has no
# attribute "construct_array_type"
cls = dtype.construct_array_type() # type: ignore[union-attr]
if isinstance(dtype, ExtensionDtype):
cls = dtype.construct_array_type()
subarr = cls._from_sequence([value] * length, dtype=dtype)

else:
Expand All @@ -1994,11 +1958,7 @@ def construct_1d_arraylike_from_scalar(
elif dtype.kind in ["M", "m"]:
value = maybe_unbox_datetimelike(value, dtype)

# error: Argument "dtype" to "empty" has incompatible type
# "Union[dtype, ExtensionDtype]"; expected "Union[dtype, None, type,
# _SupportsDtype, str, Tuple[Any, int], Tuple[Any, Union[int,
# Sequence[int]]], List[Any], _DtypeDict, Tuple[Any, Any]]"
subarr = np.empty(length, dtype=dtype) # type: ignore[arg-type]
subarr = np.empty(length, dtype=dtype)
subarr.fill(value)

return subarr
Expand Down
Loading