diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 82f9280870d59..bf78a3cdefbdd 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -637,7 +637,7 @@ def pad_inplace(numeric_object_t[:] values, uint8_t[:] mask, limit=None): @cython.boundscheck(False) @cython.wraparound(False) -def pad_2d_inplace(numeric_object_t[:, :] values, const uint8_t[:, :] mask, limit=None): +def pad_2d_inplace(numeric_object_t[:, :] values, uint8_t[:, :] mask, limit=None): cdef: Py_ssize_t i, j, N, K numeric_object_t val @@ -656,10 +656,11 @@ def pad_2d_inplace(numeric_object_t[:, :] values, const uint8_t[:, :] mask, limi val = values[j, 0] for i in range(N): if mask[j, i]: - if fill_count >= lim: + if fill_count >= lim or i == 0: continue fill_count += 1 values[j, i] = val + mask[j, i] = False else: fill_count = 0 val = values[j, i] @@ -759,7 +760,7 @@ def backfill_inplace(numeric_object_t[:] values, uint8_t[:] mask, limit=None): def backfill_2d_inplace(numeric_object_t[:, :] values, - const uint8_t[:, :] mask, + uint8_t[:, :] mask, limit=None): pad_2d_inplace(values[:, ::-1], mask[:, ::-1], limit) diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py index 01bb3d50c0da7..66a3152de1499 100644 --- a/pandas/core/array_algos/masked_reductions.py +++ b/pandas/core/array_algos/masked_reductions.py @@ -3,7 +3,10 @@ for missing values. """ -from typing import Callable +from typing import ( + Callable, + Optional, +) import numpy as np @@ -19,6 +22,7 @@ def _sumprod( *, skipna: bool = True, min_count: int = 0, + axis: Optional[int] = None, ): """ Sum or product for 1D masked array. @@ -36,36 +40,55 @@ def _sumprod( min_count : int, default 0 The required number of valid values to perform the operation. If fewer than ``min_count`` non-NA values are present the result will be NA. + axis : int, optional, default None """ if not skipna: - if mask.any() or check_below_min_count(values.shape, None, min_count): + if mask.any(axis=axis) or check_below_min_count(values.shape, None, min_count): return libmissing.NA else: - return func(values) + return func(values, axis=axis) else: - if check_below_min_count(values.shape, mask, min_count): + if check_below_min_count(values.shape, mask, min_count) and ( + axis is None or values.ndim == 1 + ): return libmissing.NA - return func(values, where=~mask) + + return func(values, where=~mask, axis=axis) def sum( - values: np.ndarray, mask: np.ndarray, *, skipna: bool = True, min_count: int = 0 + values: np.ndarray, + mask: np.ndarray, + *, + skipna: bool = True, + min_count: int = 0, + axis: Optional[int] = None, ): return _sumprod( - np.sum, values=values, mask=mask, skipna=skipna, min_count=min_count + np.sum, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis ) def prod( - values: np.ndarray, mask: np.ndarray, *, skipna: bool = True, min_count: int = 0 + values: np.ndarray, + mask: np.ndarray, + *, + skipna: bool = True, + min_count: int = 0, + axis: Optional[int] = None, ): return _sumprod( - np.prod, values=values, mask=mask, skipna=skipna, min_count=min_count + np.prod, values=values, mask=mask, skipna=skipna, min_count=min_count, axis=axis ) def _minmax( - func: Callable, values: np.ndarray, mask: np.ndarray, *, skipna: bool = True + func: Callable, + values: np.ndarray, + mask: np.ndarray, + *, + skipna: bool = True, + axis: Optional[int] = None, ): """ Reduction for 1D masked array. @@ -80,6 +103,7 @@ def _minmax( Boolean numpy array (True values indicate missing values). skipna : bool, default True Whether to skip NA. + axis : int, optional, default None """ if not skipna: if mask.any() or not values.size: @@ -96,14 +120,27 @@ def _minmax( return libmissing.NA -def min(values: np.ndarray, mask: np.ndarray, *, skipna: bool = True): - return _minmax(np.min, values=values, mask=mask, skipna=skipna) +def min( + values: np.ndarray, + mask: np.ndarray, + *, + skipna: bool = True, + axis: Optional[int] = None, +): + return _minmax(np.min, values=values, mask=mask, skipna=skipna, axis=axis) -def max(values: np.ndarray, mask: np.ndarray, *, skipna: bool = True): - return _minmax(np.max, values=values, mask=mask, skipna=skipna) +def max( + values: np.ndarray, + mask: np.ndarray, + *, + skipna: bool = True, + axis: Optional[int] = None, +): + return _minmax(np.max, values=values, mask=mask, skipna=skipna, axis=axis) +# TODO: axis kwarg def mean(values: np.ndarray, mask: np.ndarray, skipna: bool = True): if not values.size or mask.all(): return libmissing.NA diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index e43e66fed8957..3769c686da029 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -298,27 +298,6 @@ def _wrap_reduction_result(self, axis: int | None, result): return self._box_func(result) return self._from_backing_data(result) - # ------------------------------------------------------------------------ - - def __repr__(self) -> str: - if self.ndim == 1: - return super().__repr__() - - from pandas.io.formats.printing import format_object_summary - - # the short repr has no trailing newline, while the truncated - # repr does. So we include a newline in our template, and strip - # any trailing newlines from format_object_summary - lines = [ - format_object_summary(x, self._formatter(), indent_for_name=False).rstrip( - ", \n" - ) - for x in self - ] - data = ",\n".join(lines) - class_name = f"<{type(self).__name__}>" - return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}" - # ------------------------------------------------------------------------ # __array_function__ methods diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 99c4944a1cfa7..bf54f7166e14d 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1209,6 +1209,9 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike: # ------------------------------------------------------------------------ def __repr__(self) -> str: + if self.ndim > 1: + return self._repr_2d() + from pandas.io.formats.printing import format_object_summary # the short repr has no trailing newline, while the truncated @@ -1220,6 +1223,22 @@ def __repr__(self) -> str: class_name = f"<{type(self).__name__}>\n" return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" + def _repr_2d(self) -> str: + from pandas.io.formats.printing import format_object_summary + + # the short repr has no trailing newline, while the truncated + # repr does. So we include a newline in our template, and strip + # any trailing newlines from format_object_summary + lines = [ + format_object_summary(x, self._formatter(), indent_for_name=False).rstrip( + ", \n" + ) + for x in self + ] + data = ",\n".join(lines) + class_name = f"<{type(self).__name__}>" + return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}" + def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]: """ Formatting function for scalar values. diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 1df7c191bdb68..58e7abbbe1ddd 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -21,6 +21,7 @@ npt, type_t, ) +from pandas.compat.numpy import function as nv from pandas.core.dtypes.common import ( is_bool_dtype, @@ -245,10 +246,8 @@ def coerce_to_array( if mask_values is not None: mask = mask | mask_values - if values.ndim != 1: - raise ValueError("values must be a 1D list-like") - if mask.ndim != 1: - raise ValueError("mask must be a 1D list-like") + if values.shape != mask.shape: + raise ValueError("values.shape and mask.shape must match") return values, mask @@ -447,6 +446,144 @@ def _values_for_argsort(self) -> np.ndarray: data[self._mask] = -1 return data + def any(self, *, skipna: bool = True, axis: int | None = 0, **kwargs): + """ + Return whether any element is True. + + Returns False unless there is at least one element that is True. + By default, NAs are skipped. If ``skipna=False`` is specified and + missing values are present, similar :ref:`Kleene logic ` + is used as for logical operations. + + Parameters + ---------- + skipna : bool, default True + Exclude NA values. If the entire array is NA and `skipna` is + True, then the result will be False, as for an empty array. + If `skipna` is False, the result will still be True if there is + at least one element that is True, otherwise NA will be returned + if there are NA's present. + axis : int or None, default 0 + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + bool or :attr:`pandas.NA` + + See Also + -------- + numpy.any : Numpy version of this method. + BooleanArray.all : Return whether all elements are True. + + Examples + -------- + The result indicates whether any element is True (and by default + skips NAs): + + >>> pd.array([True, False, True]).any() + True + >>> pd.array([True, False, pd.NA]).any() + True + >>> pd.array([False, False, pd.NA]).any() + False + >>> pd.array([], dtype="boolean").any() + False + >>> pd.array([pd.NA], dtype="boolean").any() + False + + With ``skipna=False``, the result can be NA if this is logically + required (whether ``pd.NA`` is True or False influences the result): + + >>> pd.array([True, False, pd.NA]).any(skipna=False) + True + >>> pd.array([False, False, pd.NA]).any(skipna=False) + + """ + kwargs.pop("axis", None) + nv.validate_any((), kwargs) + + values = self._data.copy() + np.putmask(values, self._mask, False) + result = values.any(axis=axis) + + if skipna: + return result + else: + if result or self.size == 0 or not self._mask.any(): + return result + else: + return self.dtype.na_value + + def all(self, *, skipna: bool = True, axis: int | None = 0, **kwargs): + """ + Return whether all elements are True. + + Returns True unless there is at least one element that is False. + By default, NAs are skipped. If ``skipna=False`` is specified and + missing values are present, similar :ref:`Kleene logic ` + is used as for logical operations. + + Parameters + ---------- + skipna : bool, default True + Exclude NA values. If the entire array is NA and `skipna` is + True, then the result will be True, as for an empty array. + If `skipna` is False, the result will still be False if there is + at least one element that is False, otherwise NA will be returned + if there are NA's present. + axis : int or None, default 0 + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + bool or :attr:`pandas.NA` + + See Also + -------- + numpy.all : Numpy version of this method. + BooleanArray.any : Return whether any element is True. + + Examples + -------- + The result indicates whether any element is True (and by default + skips NAs): + + >>> pd.array([True, True, pd.NA]).all() + True + >>> pd.array([True, False, pd.NA]).all() + False + >>> pd.array([], dtype="boolean").all() + True + >>> pd.array([pd.NA], dtype="boolean").all() + True + + With ``skipna=False``, the result can be NA if this is logically + required (whether ``pd.NA`` is True or False influences the result): + + >>> pd.array([True, True, pd.NA]).all(skipna=False) + + >>> pd.array([True, False, pd.NA]).all(skipna=False) + False + """ + kwargs.pop("axis", None) + nv.validate_all((), kwargs) + + values = self._data.copy() + np.putmask(values, self._mask, True) + result = values.all(axis=axis) + + if skipna: + return result + else: + if not result or self.size == 0 or not self._mask.any(): + return result + else: + return self.dtype.na_value + def _logical_method(self, other, op): assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"} diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 066f6ebdfcaa6..6d6cc03a1c83e 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -385,21 +385,21 @@ def _cmp_method(self, other, op): return BooleanArray(result, mask) - def sum(self, *, skipna=True, min_count=0, **kwargs): + def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): nv.validate_sum((), kwargs) - return super()._reduce("sum", skipna=skipna, min_count=min_count) + return super()._reduce("sum", skipna=skipna, min_count=min_count, axis=axis) - def prod(self, *, skipna=True, min_count=0, **kwargs): + def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): nv.validate_prod((), kwargs) - return super()._reduce("prod", skipna=skipna, min_count=min_count) + return super()._reduce("prod", skipna=skipna, min_count=min_count, axis=axis) - def min(self, *, skipna=True, **kwargs): + def min(self, *, skipna=True, axis: int | None = 0, **kwargs): nv.validate_min((), kwargs) - return super()._reduce("min", skipna=skipna) + return super()._reduce("min", skipna=skipna, axis=axis) - def max(self, *, skipna=True, **kwargs): + def max(self, *, skipna=True, axis: int | None = 0, **kwargs): nv.validate_max((), kwargs) - return super()._reduce("max", skipna=skipna) + return super()._reduce("max", skipna=skipna, axis=axis) def _maybe_mask_result(self, result, mask, other, op_name: str): """ diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 078adeb11d3fb..4d59832655162 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -458,21 +458,21 @@ def _cmp_method(self, other, op): return BooleanArray(result, mask) - def sum(self, *, skipna=True, min_count=0, **kwargs): + def sum(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): nv.validate_sum((), kwargs) - return super()._reduce("sum", skipna=skipna, min_count=min_count) + return super()._reduce("sum", skipna=skipna, min_count=min_count, axis=axis) - def prod(self, *, skipna=True, min_count=0, **kwargs): + def prod(self, *, skipna=True, min_count=0, axis: int | None = 0, **kwargs): nv.validate_prod((), kwargs) - return super()._reduce("prod", skipna=skipna, min_count=min_count) + return super()._reduce("prod", skipna=skipna, min_count=min_count, axis=axis) - def min(self, *, skipna=True, **kwargs): + def min(self, *, skipna=True, axis: int | None = 0, **kwargs): nv.validate_min((), kwargs) - return super()._reduce("min", skipna=skipna) + return super()._reduce("min", skipna=skipna, axis=axis) - def max(self, *, skipna=True, **kwargs): + def max(self, *, skipna=True, axis: int | None = 0, **kwargs): nv.validate_max((), kwargs) - return super()._reduce("max", skipna=skipna) + return super()._reduce("max", skipna=skipna, axis=axis) def _maybe_mask_result(self, result, mask, other, op_name: str): """ diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 6a03456673604..0247cd717edec 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -22,6 +22,7 @@ Scalar, ScalarIndexer, SequenceIndexer, + Shape, npt, type_t, ) @@ -34,10 +35,10 @@ from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.common import ( + is_bool, is_bool_dtype, is_dtype_equal, is_float_dtype, - is_integer, is_integer_dtype, is_object_dtype, is_scalar, @@ -120,6 +121,10 @@ class BaseMaskedArray(OpsMixin, ExtensionArray): # The value used to fill '_data' to avoid upcasting _internal_fill_value: Scalar + # our underlying data and mask are each ndarrays + _data: np.ndarray + _mask: np.ndarray + # Fill values used for any/all _truthy_value = Scalar # bool(_truthy_value) = True _falsey_value = Scalar # bool(_falsey_value) = False @@ -131,12 +136,8 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): "mask should be boolean numpy array. Use " "the 'pd.array' function instead" ) - if values.ndim != 1: - raise ValueError("values must be a 1D array") - if mask.ndim != 1: - raise ValueError("mask must be a 1D array") if values.shape != mask.shape: - raise ValueError("values and mask must have same shape") + raise ValueError("values.shape must match mask.shape") if copy: values = values.copy() @@ -160,14 +161,16 @@ def __getitem__(self: BaseMaskedArrayT, item: SequenceIndexer) -> BaseMaskedArra def __getitem__( self: BaseMaskedArrayT, item: PositionalIndexer ) -> BaseMaskedArrayT | Any: - if is_integer(item): - if self._mask[item]: + item = check_array_indexer(self, item) + + newmask = self._mask[item] + if is_bool(newmask): + # This is a scalar indexing + if newmask: return self.dtype.na_value return self._data[item] - item = check_array_indexer(self, item) - - return type(self)(self._data[item], self._mask[item]) + return type(self)(self._data[item], newmask) @doc(ExtensionArray.fillna) def fillna( @@ -187,13 +190,13 @@ def fillna( if mask.any(): if method is not None: - func = missing.get_fill_func(method) + func = missing.get_fill_func(method, ndim=self.ndim) new_values, new_mask = func( - self._data.copy(), + self._data.copy().T, limit=limit, - mask=mask.copy(), + mask=mask.copy().T, ) - return type(self)(new_values, new_mask.view(np.bool_)) + return type(self)(new_values.T, new_mask.view(np.bool_).T) else: # fill with value new_values = self.copy() @@ -220,15 +223,52 @@ def __setitem__(self, key, value) -> None: self._mask[key] = mask def __iter__(self): - for i in range(len(self)): - if self._mask[i]: - yield self.dtype.na_value - else: - yield self._data[i] + if self.ndim == 1: + for i in range(len(self)): + if self._mask[i]: + yield self.dtype.na_value + else: + yield self._data[i] + else: + for i in range(len(self)): + yield self[i] def __len__(self) -> int: return len(self._data) + @property + def shape(self) -> Shape: + return self._data.shape + + @property + def ndim(self) -> int: + return self._data.ndim + + def swapaxes(self: BaseMaskedArrayT, axis1, axis2) -> BaseMaskedArrayT: + data = self._data.swapaxes(axis1, axis2) + mask = self._mask.swapaxes(axis1, axis2) + return type(self)(data, mask) + + def delete(self: BaseMaskedArrayT, loc, axis: int = 0) -> BaseMaskedArrayT: + data = np.delete(self._data, loc, axis=axis) + mask = np.delete(self._mask, loc, axis=axis) + return type(self)(data, mask) + + def reshape(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT: + data = self._data.reshape(*args, **kwargs) + mask = self._mask.reshape(*args, **kwargs) + return type(self)(data, mask) + + def ravel(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT: + # TODO: need to make sure we have the same order for data/mask + data = self._data.ravel(*args, **kwargs) + mask = self._mask.ravel(*args, **kwargs) + return type(self)(data, mask) + + @property + def T(self: BaseMaskedArrayT) -> BaseMaskedArrayT: + return type(self)(self._data.T, self._mask.T) + def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: return type(self)(~self._data, self._mask.copy()) @@ -454,10 +494,12 @@ def nbytes(self) -> int: @classmethod def _concat_same_type( - cls: type[BaseMaskedArrayT], to_concat: Sequence[BaseMaskedArrayT] + cls: type[BaseMaskedArrayT], + to_concat: Sequence[BaseMaskedArrayT], + axis: int = 0, ) -> BaseMaskedArrayT: - data = np.concatenate([x._data for x in to_concat]) - mask = np.concatenate([x._mask for x in to_concat]) + data = np.concatenate([x._data for x in to_concat], axis=axis) + mask = np.concatenate([x._mask for x in to_concat], axis=axis) return cls(data, mask) def take( @@ -466,15 +508,22 @@ def take( *, allow_fill: bool = False, fill_value: Scalar | None = None, + axis: int = 0, ) -> BaseMaskedArrayT: # we always fill with 1 internally # to avoid upcasting data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value result = take( - self._data, indexer, fill_value=data_fill_value, allow_fill=allow_fill + self._data, + indexer, + fill_value=data_fill_value, + allow_fill=allow_fill, + axis=axis, ) - mask = take(self._mask, indexer, fill_value=True, allow_fill=allow_fill) + mask = take( + self._mask, indexer, fill_value=True, allow_fill=allow_fill, axis=axis + ) # if we are filling # we only fill where the indexer is null @@ -593,7 +642,8 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): if name in {"sum", "prod", "min", "max", "mean"}: op = getattr(masked_reductions, name) - return op(data, mask, skipna=skipna, **kwargs) + result = op(data, mask, skipna=skipna, **kwargs) + return result # coerce to a nan-aware float if needed # (we explicitly use NaN within reductions) diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py index c5301a3bd3683..e1990dc064a84 100644 --- a/pandas/core/arrays/numeric.py +++ b/pandas/core/arrays/numeric.py @@ -152,6 +152,18 @@ def _arith_method(self, other, op): _HANDLED_TYPES = (np.ndarray, numbers.Number) + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + result = super()._reduce(name, skipna=skipna, **kwargs) + if isinstance(result, np.ndarray): + axis = kwargs["axis"] + if skipna: + # we only retain mask for all-NA rows/columns + mask = self._mask.all(axis=axis) + else: + mask = self._mask.any(axis=axis) + return type(self)(result, mask=mask) + return result + def __neg__(self): return type(self)(-self._data, self._mask.copy()) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 8d150c8f6ad3d..d93fa4bbdd7fc 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -319,7 +319,9 @@ def __init__(self, values, copy=False): def _validate(self): """Validate that we only store NA or strings.""" - if len(self._ndarray) and not lib.is_string_array(self._ndarray, skipna=True): + if len(self._ndarray) and not lib.is_string_array( + self._ndarray.ravel("K"), skipna=True + ): raise ValueError("StringArray requires a sequence of strings or pandas.NA") if self._ndarray.dtype != "object": raise ValueError( @@ -447,9 +449,11 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy) - def _reduce(self, name: str, *, skipna: bool = True, **kwargs): + def _reduce( + self, name: str, *, skipna: bool = True, axis: int | None = 0, **kwargs + ): if name in ["min", "max"]: - return getattr(self, name)(skipna=skipna) + return getattr(self, name)(skipna=skipna, axis=axis) raise TypeError(f"Cannot perform reduction '{name}' with string dtype") diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py index c9e96c437964f..f080bf7e03412 100644 --- a/pandas/tests/arrays/boolean/test_construction.py +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -27,10 +27,10 @@ def test_boolean_array_constructor(): with pytest.raises(TypeError, match="mask should be boolean numpy array"): BooleanArray(values, None) - with pytest.raises(ValueError, match="values must be a 1D array"): + with pytest.raises(ValueError, match="values.shape must match mask.shape"): BooleanArray(values.reshape(1, -1), mask) - with pytest.raises(ValueError, match="mask must be a 1D array"): + with pytest.raises(ValueError, match="values.shape must match mask.shape"): BooleanArray(values, mask.reshape(1, -1)) @@ -183,10 +183,10 @@ def test_coerce_to_array(): values = np.array([True, False, True, False], dtype="bool") mask = np.array([False, False, False, True], dtype="bool") - with pytest.raises(ValueError, match="values must be a 1D list-like"): + with pytest.raises(ValueError, match="values.shape and mask.shape must match"): coerce_to_array(values.reshape(1, -1)) - with pytest.raises(ValueError, match="mask must be a 1D list-like"): + with pytest.raises(ValueError, match="values.shape and mask.shape must match"): coerce_to_array(values, mask=mask.reshape(1, -1)) diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py index b80d2a3586b3b..b4a817cbc37ec 100644 --- a/pandas/tests/extension/base/dim2.py +++ b/pandas/tests/extension/base/dim2.py @@ -4,6 +4,11 @@ import numpy as np import pytest +from pandas.compat import ( + IS64, + is_platform_windows, +) + import pandas as pd from pandas.tests.extension.base.base import BaseExtensionTests @@ -194,9 +199,23 @@ def test_reductions_2d_axis0(self, data, method, request): if method in ["sum", "prod"] and data.dtype.kind in ["i", "u"]: # FIXME: kludge if data.dtype.kind == "i": - dtype = pd.Int64Dtype() + if is_platform_windows() or not IS64: + # FIXME: kludge for 32bit builds + if result.dtype.itemsize == 4: + dtype = pd.Int32Dtype() + else: + dtype = pd.Int64Dtype() + else: + dtype = pd.Int64Dtype() else: - dtype = pd.UInt64Dtype() + if is_platform_windows() or not IS64: + # FIXME: kludge for 32bit builds + if result.dtype.itemsize == 4: + dtype = pd.UInt32Dtype() + else: + dtype = pd.UInt64Dtype() + else: + dtype = pd.UInt64Dtype() expected = data.astype(dtype) assert type(expected) == type(data), type(expected) diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index 9260c342caa6b..9c4bf76b27c14 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -393,3 +393,7 @@ class TestUnaryOps(base.BaseUnaryOpsTests): class TestParsing(base.BaseParsingTests): pass + + +class Test2DCompat(base.Dim2CompatTests): + pass diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py index 173bc2d05af2f..500c2fbb74d17 100644 --- a/pandas/tests/extension/test_floating.py +++ b/pandas/tests/extension/test_floating.py @@ -223,3 +223,7 @@ class TestPrinting(base.BasePrintingTests): class TestParsing(base.BaseParsingTests): pass + + +class Test2DCompat(base.Dim2CompatTests): + pass diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 2cf4f8e415770..344b0be20fc7b 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -254,3 +254,7 @@ class TestPrinting(base.BasePrintingTests): class TestParsing(base.BaseParsingTests): pass + + +class Test2DCompat(base.Dim2CompatTests): + pass diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 3d0edb70d1ced..af86c359c4c00 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -19,6 +19,7 @@ import pytest import pandas as pd +from pandas.core.arrays import ArrowStringArray from pandas.core.arrays.string_ import StringDtype from pandas.tests.extension import base @@ -186,3 +187,13 @@ class TestPrinting(base.BasePrintingTests): class TestGroupBy(base.BaseGroupbyTests): pass + + +class Test2DCompat(base.Dim2CompatTests): + @pytest.fixture(autouse=True) + def arrow_not_supported(self, data, request): + if isinstance(data, ArrowStringArray): + mark = pytest.mark.xfail( + reason="2D support not implemented for ArrowStringArray" + ) + request.node.add_marker(mark)