-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Split fastpath IntegerArray constructor and general purpose constructor #22070
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b0ea2f1
c5de1f2
5e8c1a4
90a9c13
2fc70e1
3738d1c
5948ae5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ | |
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass | ||
from pandas.core.dtypes.common import ( | ||
is_integer, is_scalar, is_float, | ||
is_bool_dtype, | ||
is_float_dtype, | ||
is_integer_dtype, | ||
is_object_dtype, | ||
|
@@ -76,7 +77,7 @@ def construct_from_string(cls, string): | |
"'{}'".format(cls, string)) | ||
|
||
|
||
def to_integer_array(values, dtype=None): | ||
def integer_array(values, dtype=None, copy=False): | ||
""" | ||
Infer and return an integer array of the values. | ||
|
||
|
@@ -85,6 +86,7 @@ def to_integer_array(values, dtype=None): | |
values : 1D list-like | ||
dtype : dtype, optional | ||
dtype to coerce | ||
copy : boolean, default False | ||
|
||
Returns | ||
------- | ||
|
@@ -94,7 +96,8 @@ def to_integer_array(values, dtype=None): | |
------ | ||
TypeError if incompatible types | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there still a ref to to_integer_array? (I see it in the diff, but I also see that you changed it above) |
||
return IntegerArray(values, dtype=dtype, copy=False) | ||
values, mask = coerce_to_array(values, dtype=dtype, copy=copy) | ||
return IntegerArray(values, mask) | ||
|
||
|
||
def safe_cast(values, dtype, copy): | ||
|
@@ -133,6 +136,11 @@ def coerce_to_array(values, dtype, mask=None, copy=False): | |
------- | ||
tuple of (values, mask) | ||
""" | ||
# if values is integer numpy array, preserve it's dtype | ||
if dtype is None and hasattr(values, 'dtype'): | ||
if is_integer_dtype(values.dtype): | ||
dtype = values.dtype | ||
|
||
if dtype is not None: | ||
if not issubclass(type(dtype), _IntegerDtype): | ||
try: | ||
|
@@ -174,10 +182,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False): | |
|
||
# infer dtype if needed | ||
if dtype is None: | ||
if is_integer_dtype(values): | ||
dtype = values.dtype | ||
else: | ||
dtype = np.dtype('int64') | ||
dtype = np.dtype('int64') | ||
else: | ||
dtype = dtype.type | ||
|
||
|
@@ -197,47 +202,62 @@ def coerce_to_array(values, dtype, mask=None, copy=False): | |
|
||
class IntegerArray(ExtensionArray, ExtensionOpsMixin): | ||
""" | ||
We represent an IntegerArray with 2 numpy arrays | ||
Array of integer (optional missing) values. | ||
|
||
We represent an IntegerArray with 2 numpy arrays: | ||
|
||
- data: contains a numpy integer array of the appropriate dtype | ||
- mask: a boolean array holding a mask on the data, False is missing | ||
- mask: a boolean array holding a mask on the data, True is missing | ||
|
||
To construct an IntegerArray from generic array-like input, use | ||
``integer_array`` function instead. | ||
|
||
Parameters | ||
---------- | ||
values : integer 1D numpy array | ||
mask : boolean 1D numpy array | ||
copy : bool, default False | ||
|
||
Returns | ||
------- | ||
IntegerArray | ||
|
||
""" | ||
|
||
@cache_readonly | ||
def dtype(self): | ||
return _dtypes[str(self._data.dtype)] | ||
|
||
def __init__(self, values, mask=None, dtype=None, copy=False): | ||
""" | ||
Parameters | ||
---------- | ||
values : 1D list-like / IntegerArray | ||
mask : 1D list-like, optional | ||
dtype : subclass of _IntegerDtype, optional | ||
copy : bool, default False | ||
def __init__(self, values, mask, copy=False): | ||
if not (isinstance(values, np.ndarray) | ||
and is_integer_dtype(values.dtype)): | ||
raise TypeError("values should be integer numpy array. Use " | ||
"the 'integer_array' function instead") | ||
if not (isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype)): | ||
raise TypeError("mask should be boolean numpy array. Use " | ||
"the 'integer_array' function instead") | ||
|
||
Returns | ||
------- | ||
IntegerArray | ||
""" | ||
self._data, self._mask = coerce_to_array( | ||
values, dtype=dtype, mask=mask, copy=copy) | ||
if copy: | ||
values = values.copy() | ||
mask = mask.copy() | ||
|
||
self._data = values | ||
self._mask = mask | ||
|
||
@classmethod | ||
def _from_sequence(cls, scalars, dtype=None, copy=False): | ||
return cls(scalars, dtype=dtype, copy=copy) | ||
return integer_array(scalars, dtype=dtype, copy=copy) | ||
|
||
@classmethod | ||
def _from_factorized(cls, values, original): | ||
return cls(values, dtype=original.dtype) | ||
return integer_array(values, dtype=original.dtype) | ||
|
||
def __getitem__(self, item): | ||
if is_integer(item): | ||
if self._mask[item]: | ||
return self.dtype.na_value | ||
return self._data[item] | ||
return type(self)(self._data[item], | ||
mask=self._mask[item], | ||
dtype=self.dtype) | ||
return type(self)(self._data[item], self._mask[item]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback additional question: what do you find of writing Python perfectly allows that (and is the same here, as we don't subclass this one further), and I personally find that easier to read. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like using |
||
|
||
def _coerce_to_ndarray(self): | ||
""" | ||
|
@@ -294,7 +314,7 @@ def take(self, indexer, allow_fill=False, fill_value=None): | |
result[fill_mask] = fill_value | ||
mask = mask ^ fill_mask | ||
|
||
return type(self)(result, mask=mask, dtype=self.dtype, copy=False) | ||
return type(self)(result, mask, copy=False) | ||
|
||
def copy(self, deep=False): | ||
data, mask = self._data, self._mask | ||
|
@@ -304,7 +324,7 @@ def copy(self, deep=False): | |
else: | ||
data = data.copy() | ||
mask = mask.copy() | ||
return type(self)(data, mask, dtype=self.dtype, copy=False) | ||
return type(self)(data, mask, copy=False) | ||
|
||
def __setitem__(self, key, value): | ||
_is_scalar = is_scalar(value) | ||
|
@@ -356,7 +376,7 @@ def _na_value(self): | |
def _concat_same_type(cls, to_concat): | ||
data = np.concatenate([x._data for x in to_concat]) | ||
mask = np.concatenate([x._mask for x in to_concat]) | ||
return cls(data, mask=mask, dtype=to_concat[0].dtype) | ||
return cls(data, mask) | ||
|
||
def astype(self, dtype, copy=True): | ||
"""Cast to a NumPy array or IntegerArray with 'dtype'. | ||
|
@@ -386,8 +406,7 @@ def astype(self, dtype, copy=True): | |
if isinstance(dtype, _IntegerDtype): | ||
result = self._data.astype(dtype.numpy_dtype, | ||
casting='same_kind', copy=False) | ||
return type(self)(result, mask=self._mask, | ||
dtype=dtype, copy=False) | ||
return type(self)(result, mask=self._mask, copy=False) | ||
|
||
# coerce | ||
data = self._coerce_to_ndarray() | ||
|
@@ -523,7 +542,7 @@ def _maybe_mask_result(self, result, mask, other, op_name): | |
result[mask] = np.nan | ||
return result | ||
|
||
return type(self)(result, mask=mask, dtype=self.dtype, copy=False) | ||
return type(self)(result, mask, copy=False) | ||
|
||
@classmethod | ||
def _create_arithmetic_method(cls, op): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you update doc-string