-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
CLN: deduplicate __setitem__ and _reduce on masked arrays #34187
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,10 +8,17 @@ | |
from pandas.util._decorators import doc | ||
|
||
from pandas.core.dtypes.base import ExtensionDtype | ||
from pandas.core.dtypes.common import is_integer, is_object_dtype, is_string_dtype | ||
from pandas.core.dtypes.common import ( | ||
is_integer, | ||
is_object_dtype, | ||
is_scalar, | ||
is_string_dtype, | ||
) | ||
from pandas.core.dtypes.missing import isna, notna | ||
|
||
from pandas.core import nanops | ||
from pandas.core.algorithms import _factorize_array, take | ||
from pandas.core.array_algos import masked_reductions | ||
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin | ||
from pandas.core.indexers import check_array_indexer | ||
|
||
|
@@ -77,6 +84,23 @@ def __getitem__(self, item): | |
|
||
return type(self)(self._data[item], self._mask[item]) | ||
|
||
def _coerce_to_array(self, values) -> Tuple[np.ndarray, np.ndarray]: | ||
raise AbstractMethodError(self) | ||
|
||
def __setitem__(self, key, value) -> None: | ||
_is_scalar = is_scalar(value) | ||
if _is_scalar: | ||
value = [value] | ||
value, mask = self._coerce_to_array(value) | ||
|
||
if _is_scalar: | ||
value = value[0] | ||
mask = mask[0] | ||
|
||
key = check_array_indexer(self, key) | ||
self._data[key] = value | ||
self._mask[key] = mask | ||
|
||
def __iter__(self): | ||
for i in range(len(self)): | ||
if self._mask[i]: | ||
|
@@ -305,3 +329,24 @@ def value_counts(self, dropna: bool = True) -> "Series": | |
counts = IntegerArray(counts, mask) | ||
|
||
return Series(counts, index=index) | ||
|
||
def _reduce(self, name: str, skipna: bool = True, **kwargs): | ||
data = self._data | ||
mask = self._mask | ||
|
||
if name in {"sum", "prod", "min", "max"}: | ||
op = getattr(masked_reductions, name) | ||
return op(data, mask, skipna=skipna, **kwargs) | ||
|
||
# coerce to a nan-aware float if needed | ||
# (we explicitly use NaN within reductions) | ||
if self._hasna: | ||
data = self.to_numpy("float64", na_value=np.nan) | ||
|
||
op = getattr(nanops, "nan" + name) | ||
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs) | ||
|
||
if np.isnan(result): | ||
return libmissing.NA | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as a follow up, should this be self.dtype.na_value instead of hardcoded in the base class. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I personally like having it explicit as well (all subclasses use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, we have
so this is not consistent. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could also remove that property, from a quick search, that is only used once in the arrays code There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, if not generally on an array class, I think self.dtype.na_value is generally clearer. This is not the only case where libmissing.NA is used, so we should use one ore the other. I accept your argument about being explicit but also prefer the the case of allowing the class to be extensible. YAGNI favours your proposal. |
||
|
||
return result |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
orthogranal to this PR but wondering if we even need this any more or could just use abc.abstractmethod decorator
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We generally use this instead of the decorator in the EA base class, because the decorator requires it to actually be an abstract base class.
See
pandas/pandas/core/arrays/base.py
Lines 128 to 131 in 45fee32