Skip to content

TYP: Typing changes for ExtensionArray.astype #41251

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Sep 6, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,9 +399,13 @@ def _get_ilevel_values(index, level):
# skip exact index checking when `check_categorical` is False
if check_exact and check_categorical:
if not left.equals(right):
diff = (
np.sum((left._values != right._values).astype(int)) * 100.0 / len(left)
)
# error: Value of type variable "_Number" of "sum" cannot be
# "Union[ExtensionArray, ndarray, Any]"
thesum = np.sum(
(left._values != right._values).astype(int)
) # type: ignore[type-var]
# error: Unsupported operand types for * ("ExtensionArray" and "float")
diff = thesum * 100.0 / len(left) # type: ignore[operator]
msg = f"{obj} values are different ({np.round(diff, 5)} %)"
raise_assert_detail(obj, msg, left, right)
else:
Expand Down
27 changes: 22 additions & 5 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
Sequence,
TypeVar,
cast,
overload,
)

import numpy as np
Expand All @@ -26,6 +27,7 @@
ArrayLike,
Dtype,
FillnaOptions,
NpDtype,
PositionalIndexer,
Shape,
)
Expand Down Expand Up @@ -516,7 +518,15 @@ def nbytes(self) -> int:
# Additional Methods
# ------------------------------------------------------------------------

def astype(self, dtype, copy=True):
@overload
def astype(self, dtype: NpDtype, copy: bool = True) -> np.ndarray:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dt64/td64 generally go to DTA/TDA

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't think so. When you pass a numpy dtype, you get a np.ndarray:

>>> a=pd.array([np.datetime64("2021-03-15"), np.datetime64("2021-04-05")])
>>> a
<DatetimeArray>
['2021-03-15 00:00:00', '2021-04-05 00:00:00']
Length: 2, dtype: datetime64[ns]
>>> b=a.astype(np.datetime64)
>>> b
array(['2021-03-15T00:00:00.000000000', '2021-04-05T00:00:00.000000000'],
      dtype='datetime64[ns]')
>>> type(b)
<class 'numpy.ndarray'>

...

@overload
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we could add an overload for ExtensionDtype and this overload could be str for the union return type if type_t[Union[str, float, int, complex, bool, object] is moved as suggested?

the accepted strings for numpy is already known? so we should eventually be able to remove the union return type.

...

def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
"""
Cast to a NumPy array with 'dtype'.

Expand All @@ -531,8 +541,9 @@ def astype(self, dtype, copy=True):

Returns
-------
array : ndarray
NumPy ndarray with 'dtype' for its dtype.
array : ArrayLike
An ExtensionArray if dtype StringDtype or same as that of underlying array.
Otherwise a NumPy ndarray with 'dtype' for its dtype.
"""
from pandas.core.arrays.string_ import StringDtype

Expand All @@ -548,7 +559,11 @@ def astype(self, dtype, copy=True):
# allow conversion to StringArrays
return dtype.construct_array_type()._from_sequence(self, copy=False)

return np.array(self, dtype=dtype, copy=copy)
# error: Argument "dtype" to "array" has incompatible type
# "Union[ExtensionDtype, dtype[Any]]"; expected "Union[dtype[Any], None, type,
# _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int,
# Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
return np.array(self, dtype=dtype, copy=copy) # type: ignore[arg-type]

def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:
"""
Expand Down Expand Up @@ -933,7 +948,9 @@ def _values_for_factorize(self) -> tuple[np.ndarray, Any]:
The values returned by this method are also used in
:func:`pandas.util.hash_pandas_object`.
"""
return self.astype(object), np.nan
# error: Incompatible return value type (got "Tuple[Union[ExtensionArray,
# ndarray], float]", expected "Tuple[ndarray, Any]")
return self.astype(object), np.nan # type: ignore[return-value]

def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]:
"""
Expand Down
17 changes: 15 additions & 2 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from __future__ import annotations

import numbers
from typing import TYPE_CHECKING
from typing import (
TYPE_CHECKING,
overload,
)
import warnings

import numpy as np
Expand All @@ -13,6 +16,7 @@
from pandas._typing import (
ArrayLike,
Dtype,
NpDtype,
type_t,
)
from pandas.compat.numpy import function as nv
Expand Down Expand Up @@ -392,7 +396,16 @@ def reconstruct(x):
def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]:
return coerce_to_array(value)

def astype(self, dtype, copy: bool = True) -> ArrayLike:
@overload
def astype(self, dtype: NpDtype, copy: bool = True) -> np.ndarray:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def astype(self, dtype: NpDtype, copy: bool = True) -> np.ndarray:
def astype(self, dtype: NpDtype, copy: bool = ...) -> np.ndarray:

and elsewhere

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed in next commit

...

@overload
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
...

def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:

"""
Cast to a NumPy array or ExtensionArray with 'dtype'.

Expand Down
9 changes: 9 additions & 0 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
TypeVar,
Union,
cast,
overload,
)
from warnings import (
catch_warnings,
Expand Down Expand Up @@ -487,6 +488,14 @@ def _constructor(self) -> type[Categorical]:
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
return Categorical(scalars, dtype=dtype, copy=copy)

@overload
def astype(self, dtype: NpDtype, copy: bool = True) -> np.ndarray:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think with dt64/td64 could return DTA/TDA?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't see that:

>>> s = pd.Series(pd.to_datetime(["2021-03-15 12:05", "2021-04-05 5:10"]), dtype
="category")
>>> s
0   2021-03-15 12:05:00
1   2021-04-05 05:10:00
dtype: category
Categories (2, datetime64[ns]): [2021-03-15 12:05:00, 2021-04-05 05:10:00]
>>> s.dtype.categories.dtype
dtype('<M8[ns]')
>>> type(s.astype(s.dtype.categories.dtype).values)
<class 'numpy.ndarray'>

...

@overload
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
...

def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
"""
Coerce this type to another dtype
Expand Down
13 changes: 12 additions & 1 deletion pandas/core/arrays/floating.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from typing import overload
import warnings

import numpy as np
Expand All @@ -10,7 +11,9 @@
)
from pandas._typing import (
ArrayLike,
Dtype,
DtypeObj,
NpDtype,
)
from pandas.compat.numpy import function as nv
from pandas.util._decorators import cache_readonly
Expand Down Expand Up @@ -271,7 +274,15 @@ def _from_sequence_of_strings(
def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]:
return coerce_to_array(value, dtype=self.dtype)

def astype(self, dtype, copy: bool = True) -> ArrayLike:
@overload
def astype(self, dtype: NpDtype, copy: bool = True) -> np.ndarray:
...

@overload
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
...

def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
"""
Cast to a NumPy array or ExtensionArray with 'dtype'.

Expand Down
12 changes: 11 additions & 1 deletion pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from typing import overload
import warnings

import numpy as np
Expand All @@ -13,6 +14,7 @@
ArrayLike,
Dtype,
DtypeObj,
NpDtype,
)
from pandas.compat.numpy import function as nv
from pandas.util._decorators import cache_readonly
Expand Down Expand Up @@ -335,7 +337,15 @@ def _from_sequence_of_strings(
def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]:
return coerce_to_array(value, dtype=self.dtype)

def astype(self, dtype, copy: bool = True) -> ArrayLike:
@overload
def astype(self, dtype: NpDtype, copy: bool = True) -> np.ndarray:
...

@overload
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
...

def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
"""
Cast to a NumPy array or ExtensionArray with 'dtype'.

Expand Down
9 changes: 9 additions & 0 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
Any,
Sequence,
TypeVar,
overload,
)

import numpy as np
Expand Down Expand Up @@ -301,6 +302,14 @@ def to_numpy( # type: ignore[override]
data = self._data.astype(dtype, copy=copy)
return data

@overload
def astype(self, dtype: NpDtype, copy: bool = True) -> np.ndarray:
...

@overload
def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
...

def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
dtype = pandas_dtype(dtype)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1497,7 +1497,7 @@ def _bool_agg(self, val_test, skipna):
Shared func to call any / all Cython GroupBy implementations.
"""

def objs_to_bool(vals: ArrayLike) -> tuple[np.ndarray, type]:
def objs_to_bool(vals: ArrayLike) -> tuple[ArrayLike, type]:
if is_object_dtype(vals):
vals = np.array([bool(x) for x in vals])
elif isinstance(vals, BaseMaskedArray):
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1349,7 +1349,9 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
"""
values = self.values
if dtype == _dtype_obj:
values = values.astype(object)
# error: Incompatible types in assignment (expression has type
# "Union[ExtensionArray, ndarray]", variable has type "ExtensionArray")
values = values.astype(object) # type: ignore[assignment]
# TODO(EA2D): reshape not needed with 2D EAs
return np.asarray(values).reshape(self.shape)

Expand Down