-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
TYP: core.arrays.numpy_ #31348
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
TYP: core.arrays.numpy_ #31348
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
import numbers | ||
from typing import Union | ||
from typing import Optional, Tuple, Type, Union | ||
|
||
import numpy as np | ||
from numpy.lib.mixins import NDArrayOperatorsMixin | ||
|
@@ -34,54 +34,66 @@ class PandasDtype(ExtensionDtype): | |
|
||
Parameters | ||
---------- | ||
dtype : numpy.dtype | ||
dtype : object | ||
Object to be converted to a NumPy data type object. | ||
|
||
See Also | ||
-------- | ||
numpy.dtype | ||
""" | ||
|
||
_metadata = ("_dtype",) | ||
|
||
def __init__(self, dtype): | ||
dtype = np.dtype(dtype) | ||
self._dtype = dtype | ||
self._type = dtype.type | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jorisvandenbossche does turning this into properties have perf implications we need to worry about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is for consistency with name that was changed in #31037 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK thanks. That was the perf affect I had in mind, but pre-coffee i had it backwards. |
||
def __init__(self, dtype: object): | ||
self._dtype = np.dtype(dtype) | ||
|
||
def __repr__(self) -> str: | ||
return f"PandasDtype({repr(self.name)})" | ||
|
||
@property | ||
def numpy_dtype(self): | ||
"""The NumPy dtype this PandasDtype wraps.""" | ||
def numpy_dtype(self) -> np.dtype: | ||
""" | ||
The NumPy dtype this PandasDtype wraps. | ||
""" | ||
return self._dtype | ||
|
||
@property | ||
def name(self): | ||
def name(self) -> str: | ||
""" | ||
A bit-width name for this data-type. | ||
""" | ||
return self._dtype.name | ||
|
||
@property | ||
def type(self): | ||
return self._type | ||
def type(self) -> Type[np.generic]: | ||
""" | ||
The type object used to instantiate a scalar of this NumPy data-type. | ||
""" | ||
return self._dtype.type | ||
|
||
@property | ||
def _is_numeric(self): | ||
def _is_numeric(self) -> bool: | ||
# exclude object, str, unicode, void. | ||
return self.kind in set("biufc") | ||
|
||
@property | ||
def _is_boolean(self): | ||
def _is_boolean(self) -> bool: | ||
return self.kind == "b" | ||
|
||
@classmethod | ||
def construct_from_string(cls, string): | ||
def construct_from_string(cls, string: str) -> "PandasDtype": | ||
try: | ||
return cls(np.dtype(string)) | ||
dtype = np.dtype(string) | ||
except TypeError as err: | ||
if not isinstance(string, str): | ||
msg = f"'construct_from_string' expects a string, got {type(string)}" | ||
else: | ||
msg = f"Cannot construct a 'PandasDtype' from '{string}'" | ||
raise TypeError(msg) from err | ||
return cls(dtype) | ||
|
||
@classmethod | ||
def construct_array_type(cls): | ||
def construct_array_type(cls) -> Type["PandasArray"]: | ||
""" | ||
Return the array type associated with this dtype. | ||
|
||
|
@@ -92,12 +104,17 @@ def construct_array_type(cls): | |
return PandasArray | ||
|
||
@property | ||
def kind(self): | ||
def kind(self) -> str: | ||
""" | ||
A character code (one of 'biufcmMOSUV') identifying the general kind of data. | ||
""" | ||
return self._dtype.kind | ||
|
||
@property | ||
def itemsize(self): | ||
"""The element size of this data-type object.""" | ||
def itemsize(self) -> int: | ||
""" | ||
The element size of this data-type object. | ||
""" | ||
return self._dtype.itemsize | ||
|
||
|
||
|
@@ -155,7 +172,7 @@ def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False) | |
self._dtype = PandasDtype(values.dtype) | ||
|
||
@classmethod | ||
def _from_sequence(cls, scalars, dtype=None, copy=False): | ||
def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "PandasArray": | ||
if isinstance(dtype, PandasDtype): | ||
dtype = dtype._dtype | ||
|
||
|
@@ -165,18 +182,18 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): | |
return cls(result) | ||
|
||
@classmethod | ||
def _from_factorized(cls, values, original): | ||
def _from_factorized(cls, values, original) -> "PandasArray": | ||
return cls(values) | ||
|
||
@classmethod | ||
def _concat_same_type(cls, to_concat): | ||
def _concat_same_type(cls, to_concat) -> "PandasArray": | ||
return cls(np.concatenate(to_concat)) | ||
|
||
# ------------------------------------------------------------------------ | ||
# Data | ||
|
||
@property | ||
def dtype(self): | ||
def dtype(self) -> PandasDtype: | ||
return self._dtype | ||
|
||
# ------------------------------------------------------------------------ | ||
|
@@ -187,7 +204,7 @@ def __array__(self, dtype=None) -> np.ndarray: | |
|
||
_HANDLED_TYPES = (np.ndarray, numbers.Number) | ||
|
||
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): | ||
def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs): | ||
# Lightly modified version of | ||
# https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/\ | ||
# numpy.lib.mixins.NDArrayOperatorsMixin.html | ||
|
@@ -242,7 +259,7 @@ def __getitem__(self, item): | |
result = type(self)(result) | ||
return result | ||
|
||
def __setitem__(self, key, value): | ||
def __setitem__(self, key, value) -> None: | ||
value = extract_array(value, extract_numpy=True) | ||
|
||
scalar_key = lib.is_scalar(key) | ||
|
@@ -263,10 +280,12 @@ def __len__(self) -> int: | |
def nbytes(self) -> int: | ||
return self._ndarray.nbytes | ||
|
||
def isna(self): | ||
def isna(self) -> np.ndarray: | ||
return isna(self._ndarray) | ||
|
||
def fillna(self, value=None, method=None, limit=None): | ||
def fillna( | ||
self, value=None, method: Optional[str] = None, limit: Optional[int] = None, | ||
) -> "PandasArray": | ||
# TODO(_values_for_fillna): remove this | ||
value, method = validate_fillna_kwargs(value, method) | ||
|
||
|
@@ -293,7 +312,7 @@ def fillna(self, value=None, method=None, limit=None): | |
new_values = self.copy() | ||
return new_values | ||
|
||
def take(self, indices, allow_fill=False, fill_value=None): | ||
def take(self, indices, allow_fill=False, fill_value=None) -> "PandasArray": | ||
if fill_value is None: | ||
# Primarily for subclasses | ||
fill_value = self.dtype.na_value | ||
|
@@ -302,16 +321,16 @@ def take(self, indices, allow_fill=False, fill_value=None): | |
) | ||
return type(self)(result) | ||
|
||
def copy(self): | ||
def copy(self) -> "PandasArray": | ||
return type(self)(self._ndarray.copy()) | ||
|
||
def _values_for_argsort(self): | ||
def _values_for_argsort(self) -> np.ndarray: | ||
return self._ndarray | ||
|
||
def _values_for_factorize(self): | ||
def _values_for_factorize(self) -> Tuple[np.ndarray, int]: | ||
return self._ndarray, -1 | ||
|
||
def unique(self): | ||
def unique(self) -> "PandasArray": | ||
return type(self)(unique(self._ndarray)) | ||
|
||
# ------------------------------------------------------------------------ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i see how this is more accurate than just "numpy.dtype", but can we be more specific here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it's the types accepted by np.dtype which is more permissive than just another np.dtype. Object is straight out of the numpy docs.
do you want to restrict this as before, or establish the actual types accepted by numpy?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if its straight out of the numpy docs, then sounds fine