diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index bccf74d4f610b..69f565daa2daf 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1019,6 +1019,7 @@ Numeric - Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`) - Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`) - Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`) +- Bug in dtypes being lost in ``DataFrame.__invert__`` (``~`` operator) with mixed dtypes (:issue:`31183`) - Bug in :class:`~DataFrame.diff` losing the dtype for extension types (:issue:`30889`) - Bug in :class:`DataFrame.diff` raising an ``IndexError`` when one of the columns was a nullable integer dtype (:issue:`30967`) @@ -1169,6 +1170,7 @@ ExtensionArray - Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`). - Bug where nullable integers could not be compared to strings (:issue:`28930`) - Bug where :class:`DataFrame` constructor raised ``ValueError`` with list-like data and ``dtype`` specified (:issue:`30280`) +- Bug in dtype being lost in ``__invert__`` (``~`` operator) for extension-array backed ``Series`` and ``DataFrame`` (:issue:`23087`) Other diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 47605413ff1a6..5eaed70721592 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -50,6 +50,9 @@ def __iter__(self): def __len__(self) -> int: return len(self._data) + def __invert__(self): + return type(self)(~self._data, self._mask) + def to_numpy( self, dtype=None, copy=False, na_value: "Scalar" = lib.no_default, ): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6c04212e26924..cdaf12392dc66 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1393,8 +1393,9 @@ def __invert__(self): # inv fails with 0 len return self - arr = operator.inv(com.values_from_object(self)) - return self.__array_wrap__(arr) + new_data = self._data.apply(operator.invert) + result = self._constructor(new_data).__finalize__(self) + return result def __nonzero__(self): raise ValueError( diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 722fe152e6a85..17a817116c6bf 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2233,7 +2233,7 @@ def check_bool_indexer(index: Index, key) -> np.ndarray: result = result.astype(bool)._values else: if is_sparse(result): - result = result.to_dense() + result = np.asarray(result) result = check_bool_array_indexer(index, result) return result diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py index 73652da78654f..bf7d275e4ff7b 100644 --- a/pandas/tests/arrays/sparse/test_arithmetics.py +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -476,6 +476,14 @@ def test_invert(fill_value): expected = SparseArray(~arr, fill_value=not fill_value) tm.assert_sp_array_equal(result, expected) + result = ~pd.Series(sparray) + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + result = ~pd.DataFrame({"A": sparray}) + expected = pd.DataFrame({"A": expected}) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("fill_value", [0, np.nan]) @pytest.mark.parametrize("op", [operator.pos, operator.neg]) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py index 200446f79af8a..cb9b07db4a0df 100644 --- a/pandas/tests/arrays/test_boolean.py +++ b/pandas/tests/arrays/test_boolean.py @@ -471,6 +471,24 @@ def test_ufunc_reduce_raises(values): np.add.reduce(a) +class TestUnaryOps: + def test_invert(self): + a = pd.array([True, False, None], dtype="boolean") + expected = pd.array([False, True, None], dtype="boolean") + tm.assert_extension_array_equal(~a, expected) + + expected = pd.Series(expected, index=["a", "b", "c"], name="name") + result = ~pd.Series(a, index=["a", "b", "c"], name="name") + tm.assert_series_equal(result, expected) + + df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"]) + result = ~df + expected = pd.DataFrame( + {"A": expected, "B": [False, True, True]}, index=["a", "b", "c"] + ) + tm.assert_frame_equal(result, expected) + + class TestLogicalOps(BaseOpsUtil): def test_numpy_scalars_ok(self, all_logical_operators): a = pd.array([True, False, None], dtype="boolean") diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index 090df35bd94c9..e2b6ea0304f6a 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -49,7 +49,12 @@ class TestMyDtype(BaseDtypeTests): from .io import BaseParsingTests # noqa from .methods import BaseMethodsTests # noqa from .missing import BaseMissingTests # noqa -from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests, BaseOpsUtil # noqa +from .ops import ( # noqa + BaseArithmeticOpsTests, + BaseComparisonOpsTests, + BaseOpsUtil, + BaseUnaryOpsTests, +) from .printing import BasePrintingTests # noqa from .reduce import ( # noqa BaseBooleanReduceTests, diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 20d06ef2e5647..0609f19c8e0c3 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -168,3 +168,11 @@ def test_direct_arith_with_series_returns_not_implemented(self, data): assert result is NotImplemented else: raise pytest.skip(f"{type(data).__name__} does not implement __eq__") + + +class BaseUnaryOpsTests(BaseOpsUtil): + def test_invert(self, data): + s = pd.Series(data, name="name") + result = ~s + expected = pd.Series(~data, name="name") + self.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index c489445d8512a..0c6b187eac1fc 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -342,6 +342,10 @@ class TestPrinting(base.BasePrintingTests): pass +class TestUnaryOps(base.BaseUnaryOpsTests): + pass + + # TODO parsing not yet supported # class TestParsing(base.BaseParsingTests): # pass diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index c727cb398d53e..55f1216a0efd7 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -61,6 +61,27 @@ def test_invert(self, float_frame): tm.assert_frame_equal(-(df < 0), ~(df < 0)) + def test_invert_mixed(self): + shape = (10, 5) + df = pd.concat( + [ + pd.DataFrame(np.zeros(shape, dtype="bool")), + pd.DataFrame(np.zeros(shape, dtype=int)), + ], + axis=1, + ignore_index=True, + ) + result = ~df + expected = pd.concat( + [ + pd.DataFrame(np.ones(shape, dtype="bool")), + pd.DataFrame(-np.ones(shape, dtype=int)), + ], + axis=1, + ignore_index=True, + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "df", [