Skip to content

TYP: core.arrays.numpy_ #31348

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 31, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 51 additions & 32 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numbers
from typing import Union
from typing import Optional, Tuple, Type, Union

import numpy as np
from numpy.lib.mixins import NDArrayOperatorsMixin
Expand Down Expand Up @@ -34,54 +34,66 @@ class PandasDtype(ExtensionDtype):

Parameters
----------
dtype : numpy.dtype
dtype : object
Object to be converted to a NumPy data type object.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i see how this is more accurate than just "numpy.dtype", but can we be more specific here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's the types accepted by np.dtype which is more permissive than just another np.dtype. Object is straight out of the numpy docs.

do you want to restrict this as before, or establish the actual types accepted by numpy?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if its straight out of the numpy docs, then sounds fine


See Also
--------
numpy.dtype
"""

_metadata = ("_dtype",)

def __init__(self, dtype):
dtype = np.dtype(dtype)
self._dtype = dtype
self._type = dtype.type
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jorisvandenbossche does turning this into properties have perf implications we need to worry about

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is for consistency with name that was changed in #31037

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK thanks. That was the perf affect I had in mind, but pre-coffee i had it backwards.

def __init__(self, dtype: object):
self._dtype = np.dtype(dtype)

def __repr__(self) -> str:
return f"PandasDtype({repr(self.name)})"

@property
def numpy_dtype(self):
"""The NumPy dtype this PandasDtype wraps."""
def numpy_dtype(self) -> np.dtype:
"""
The NumPy dtype this PandasDtype wraps.
"""
return self._dtype

@property
def name(self):
def name(self) -> str:
"""
A bit-width name for this data-type.
"""
return self._dtype.name

@property
def type(self):
return self._type
def type(self) -> Type[np.generic]:
"""
The type object used to instantiate a scalar of this NumPy data-type.
"""
return self._dtype.type

@property
def _is_numeric(self):
def _is_numeric(self) -> bool:
# exclude object, str, unicode, void.
return self.kind in set("biufc")

@property
def _is_boolean(self):
def _is_boolean(self) -> bool:
return self.kind == "b"

@classmethod
def construct_from_string(cls, string):
def construct_from_string(cls, string: str) -> "PandasDtype":
try:
return cls(np.dtype(string))
dtype = np.dtype(string)
except TypeError as err:
if not isinstance(string, str):
msg = f"'construct_from_string' expects a string, got {type(string)}"
else:
msg = f"Cannot construct a 'PandasDtype' from '{string}'"
raise TypeError(msg) from err
return cls(dtype)

@classmethod
def construct_array_type(cls):
def construct_array_type(cls) -> Type["PandasArray"]:
"""
Return the array type associated with this dtype.

Expand All @@ -92,12 +104,17 @@ def construct_array_type(cls):
return PandasArray

@property
def kind(self):
def kind(self) -> str:
"""
A character code (one of 'biufcmMOSUV') identifying the general kind of data.
"""
return self._dtype.kind

@property
def itemsize(self):
"""The element size of this data-type object."""
def itemsize(self) -> int:
"""
The element size of this data-type object.
"""
return self._dtype.itemsize


Expand Down Expand Up @@ -155,7 +172,7 @@ def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False)
self._dtype = PandasDtype(values.dtype)

@classmethod
def _from_sequence(cls, scalars, dtype=None, copy=False):
def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "PandasArray":
if isinstance(dtype, PandasDtype):
dtype = dtype._dtype

Expand All @@ -165,18 +182,18 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
return cls(result)

@classmethod
def _from_factorized(cls, values, original):
def _from_factorized(cls, values, original) -> "PandasArray":
return cls(values)

@classmethod
def _concat_same_type(cls, to_concat):
def _concat_same_type(cls, to_concat) -> "PandasArray":
return cls(np.concatenate(to_concat))

# ------------------------------------------------------------------------
# Data

@property
def dtype(self):
def dtype(self) -> PandasDtype:
return self._dtype

# ------------------------------------------------------------------------
Expand All @@ -187,7 +204,7 @@ def __array__(self, dtype=None) -> np.ndarray:

_HANDLED_TYPES = (np.ndarray, numbers.Number)

def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs):
# Lightly modified version of
# https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/\
# numpy.lib.mixins.NDArrayOperatorsMixin.html
Expand Down Expand Up @@ -242,7 +259,7 @@ def __getitem__(self, item):
result = type(self)(result)
return result

def __setitem__(self, key, value):
def __setitem__(self, key, value) -> None:
value = extract_array(value, extract_numpy=True)

scalar_key = lib.is_scalar(key)
Expand All @@ -263,10 +280,12 @@ def __len__(self) -> int:
def nbytes(self) -> int:
return self._ndarray.nbytes

def isna(self):
def isna(self) -> np.ndarray:
return isna(self._ndarray)

def fillna(self, value=None, method=None, limit=None):
def fillna(
self, value=None, method: Optional[str] = None, limit: Optional[int] = None,
) -> "PandasArray":
# TODO(_values_for_fillna): remove this
value, method = validate_fillna_kwargs(value, method)

Expand All @@ -293,7 +312,7 @@ def fillna(self, value=None, method=None, limit=None):
new_values = self.copy()
return new_values

def take(self, indices, allow_fill=False, fill_value=None):
def take(self, indices, allow_fill=False, fill_value=None) -> "PandasArray":
if fill_value is None:
# Primarily for subclasses
fill_value = self.dtype.na_value
Expand All @@ -302,16 +321,16 @@ def take(self, indices, allow_fill=False, fill_value=None):
)
return type(self)(result)

def copy(self):
def copy(self) -> "PandasArray":
return type(self)(self._ndarray.copy())

def _values_for_argsort(self):
def _values_for_argsort(self) -> np.ndarray:
return self._ndarray

def _values_for_factorize(self):
def _values_for_factorize(self) -> Tuple[np.ndarray, int]:
return self._ndarray, -1

def unique(self):
def unique(self) -> "PandasArray":
return type(self)(unique(self._ndarray))

# ------------------------------------------------------------------------
Expand Down