Skip to content

REF: deduplicate nullable arrays _cmp_method #44548

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 0 additions & 48 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
TYPE_CHECKING,
overload,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -44,7 +43,6 @@
BaseMaskedArray,
BaseMaskedDtype,
)
from pandas.core.ops import invalid_comparison

if TYPE_CHECKING:
import pyarrow
Expand Down Expand Up @@ -622,52 +620,6 @@ def _logical_method(self, other, op):
# expected "ndarray"
return BooleanArray(result, mask) # type: ignore[arg-type]

def _cmp_method(self, other, op):
from pandas.arrays import (
FloatingArray,
IntegerArray,
)

if isinstance(other, (IntegerArray, FloatingArray)):
return NotImplemented
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comparing the implementations, I think this is the only thing that's actually different / would change for BooleanArray. But since the implementations are otherwise equal, I suppose that doesn't matter?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that was the same thought I had when looking at these


mask = None

if isinstance(other, BooleanArray):
other, mask = other._data, other._mask

elif is_list_like(other):
other = np.asarray(other)
if other.ndim > 1:
raise NotImplementedError("can only perform ops with 1-d structures")
if len(self) != len(other):
raise ValueError("Lengths must match to compare")

if other is libmissing.NA:
# numpy does not handle pd.NA well as "other" scalar (it returns
# a scalar False instead of an array)
result = np.zeros_like(self._data)
mask = np.ones_like(self._data)
else:
# numpy will show a DeprecationWarning on invalid elementwise
# comparisons, this will raise in the future
with warnings.catch_warnings():
warnings.filterwarnings("ignore", "elementwise", FutureWarning)
with np.errstate(all="ignore"):
method = getattr(self._data, f"__{op.__name__}__")
result = method(other)

if result is NotImplemented:
result = invalid_comparison(self._data, other, op)

# nans propagate
if mask is None:
mask = self._mask.copy()
else:
mask = self._mask | mask

return BooleanArray(result, mask, copy=False)

def _arith_method(self, other, op):
mask = None
op_name = op.__name__
Expand Down
54 changes: 1 addition & 53 deletions pandas/core/arrays/floating.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
from __future__ import annotations

from typing import overload
import warnings

import numpy as np

from pandas._libs import (
lib,
missing as libmissing,
)
from pandas._libs import lib
from pandas._typing import (
ArrayLike,
AstypeArg,
Expand All @@ -24,7 +20,6 @@
is_datetime64_dtype,
is_float_dtype,
is_integer_dtype,
is_list_like,
is_object_dtype,
pandas_dtype,
)
Expand All @@ -39,7 +34,6 @@
NumericArray,
NumericDtype,
)
from pandas.core.ops import invalid_comparison
from pandas.core.tools.numeric import to_numeric


Expand Down Expand Up @@ -337,52 +331,6 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
def _values_for_argsort(self) -> np.ndarray:
return self._data

def _cmp_method(self, other, op):
from pandas.arrays import (
BooleanArray,
IntegerArray,
)

mask = None

if isinstance(other, (BooleanArray, IntegerArray, FloatingArray)):
other, mask = other._data, other._mask

elif is_list_like(other):
other = np.asarray(other)
if other.ndim > 1:
raise NotImplementedError("can only perform ops with 1-d structures")

if other is libmissing.NA:
# numpy does not handle pd.NA well as "other" scalar (it returns
# a scalar False instead of an array)
# This may be fixed by NA.__array_ufunc__. Revisit this check
# once that's implemented.
result = np.zeros(self._data.shape, dtype="bool")
mask = np.ones(self._data.shape, dtype="bool")
else:
with warnings.catch_warnings():
# numpy may show a FutureWarning:
# elementwise comparison failed; returning scalar instead,
# but in the future will perform elementwise comparison
# before returning NotImplemented. We fall back to the correct
# behavior today, so that should be fine to ignore.
warnings.filterwarnings("ignore", "elementwise", FutureWarning)
with np.errstate(all="ignore"):
method = getattr(self._data, f"__{op.__name__}__")
result = method(other)

if result is NotImplemented:
result = invalid_comparison(self._data, other, op)

# nans propagate
if mask is None:
mask = self._mask.copy()
else:
mask = self._mask | mask

return BooleanArray(result, mask)

def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
nv.validate_sum((), kwargs)
return super()._reduce("sum", skipna=skipna, min_count=min_count, axis=axis)
Expand Down
54 changes: 1 addition & 53 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
from __future__ import annotations

from typing import overload
import warnings

import numpy as np

from pandas._libs import (
iNaT,
lib,
missing as libmissing,
)
from pandas._typing import (
ArrayLike,
Expand All @@ -30,23 +28,18 @@
is_float,
is_float_dtype,
is_integer_dtype,
is_list_like,
is_object_dtype,
is_string_dtype,
pandas_dtype,
)
from pandas.core.dtypes.missing import isna

from pandas.core.arrays import ExtensionArray
from pandas.core.arrays.masked import (
BaseMaskedArray,
BaseMaskedDtype,
)
from pandas.core.arrays.masked import BaseMaskedDtype
from pandas.core.arrays.numeric import (
NumericArray,
NumericDtype,
)
from pandas.core.ops import invalid_comparison
from pandas.core.tools.numeric import to_numeric


Expand Down Expand Up @@ -418,51 +411,6 @@ def _values_for_argsort(self) -> np.ndarray:
data[self._mask] = data.min() - 1
return data

def _cmp_method(self, other, op):
from pandas.core.arrays import BooleanArray

mask = None

if isinstance(other, BaseMaskedArray):
other, mask = other._data, other._mask

elif is_list_like(other):
other = np.asarray(other)
if other.ndim > 1:
raise NotImplementedError("can only perform ops with 1-d structures")
if len(self) != len(other):
raise ValueError("Lengths must match to compare")

if other is libmissing.NA:
# numpy does not handle pd.NA well as "other" scalar (it returns
# a scalar False instead of an array)
# This may be fixed by NA.__array_ufunc__. Revisit this check
# once that's implemented.
result = np.zeros(self._data.shape, dtype="bool")
mask = np.ones(self._data.shape, dtype="bool")
else:
with warnings.catch_warnings():
# numpy may show a FutureWarning:
# elementwise comparison failed; returning scalar instead,
# but in the future will perform elementwise comparison
# before returning NotImplemented. We fall back to the correct
# behavior today, so that should be fine to ignore.
warnings.filterwarnings("ignore", "elementwise", FutureWarning)
with np.errstate(all="ignore"):
method = getattr(self._data, f"__{op.__name__}__")
result = method(other)

if result is NotImplemented:
result = invalid_comparison(self._data, other, op)

# nans propagate
if mask is None:
mask = self._mask.copy()
else:
mask = self._mask | mask

return BooleanArray(result, mask)

def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs):
nv.validate_sum((), kwargs)
return super()._reduce("sum", skipna=skipna, min_count=min_count, axis=axis)
Expand Down
48 changes: 48 additions & 0 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
TypeVar,
overload,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -40,6 +41,7 @@
is_dtype_equal,
is_float_dtype,
is_integer_dtype,
is_list_like,
is_object_dtype,
is_scalar,
is_string_dtype,
Expand All @@ -66,6 +68,7 @@
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays import ExtensionArray
from pandas.core.indexers import check_array_indexer
from pandas.core.ops import invalid_comparison

if TYPE_CHECKING:
from pandas import Series
Expand Down Expand Up @@ -482,6 +485,51 @@ def _hasna(self) -> bool:
# error: Incompatible return value type (got "bool_", expected "bool")
return self._mask.any() # type: ignore[return-value]

def _cmp_method(self, other, op) -> BooleanArray:
from pandas.core.arrays import BooleanArray

mask = None

if isinstance(other, BaseMaskedArray):
other, mask = other._data, other._mask

elif is_list_like(other):
other = np.asarray(other)
if other.ndim > 1:
raise NotImplementedError("can only perform ops with 1-d structures")
if len(self) != len(other):
raise ValueError("Lengths must match to compare")

if other is libmissing.NA:
# numpy does not handle pd.NA well as "other" scalar (it returns
# a scalar False instead of an array)
# This may be fixed by NA.__array_ufunc__. Revisit this check
# once that's implemented.
result = np.zeros(self._data.shape, dtype="bool")
mask = np.ones(self._data.shape, dtype="bool")
else:
with warnings.catch_warnings():
# numpy may show a FutureWarning:
# elementwise comparison failed; returning scalar instead,
# but in the future will perform elementwise comparison
# before returning NotImplemented. We fall back to the correct
# behavior today, so that should be fine to ignore.
warnings.filterwarnings("ignore", "elementwise", FutureWarning)
with np.errstate(all="ignore"):
method = getattr(self._data, f"__{op.__name__}__")
result = method(other)

if result is NotImplemented:
result = invalid_comparison(self._data, other, op)

# nans propagate
if mask is None:
mask = self._mask.copy()
else:
mask = self._mask | mask

return BooleanArray(result, mask, copy=False)

def isna(self) -> np.ndarray:
return self._mask.copy()

Expand Down