diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index d93b5fbc83312..66a7b3ff6e824 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -320,7 +320,8 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, int]: @classmethod def _from_factorized(cls, values, original: "BooleanArray") -> "BooleanArray": - return cls._from_sequence(values, dtype=original.dtype) + mask = values == -1 + return cls(values.astype(bool, copy=False), mask) _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_) diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index ec21898852888..3e82e9d9fa37f 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -324,3 +324,10 @@ def test_transpose(self, data): self.assert_frame_equal(result, expected) self.assert_frame_equal(np.transpose(np.transpose(df)), df) self.assert_frame_equal(np.transpose(np.transpose(df[["A"]])), df[["A"]]) + + def test_factorize_roundtrip(self, data): + # GH#32673 + values = data._values_for_factorize()[0] + result = type(data)._from_factorized(values, data) + + self.assert_equal(result, data) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 1f026e405dc17..d576228674968 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -65,7 +65,9 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): @classmethod def _from_factorized(cls, values, original): - return cls([UserDict(x) for x in values if x != ()]) + return cls( + [UserDict(x) if x != () else original.dtype.na_value for x in values] + ) def __getitem__(self, item): if isinstance(item, numbers.Integral): diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index 3aa188098620d..38666a1709092 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -4,6 +4,7 @@ from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd +import pandas._testing as tm from pandas.core.arrays import DatetimeArray from pandas.tests.extension import base @@ -201,6 +202,13 @@ def test_unstack(self, obj): result = ser.unstack(0) self.assert_equal(result, expected) + def test_factorize_roundtrip(self, data): + # GH#32673, for DTA we dont preserve freq + values = data._values_for_factorize()[0] + result = type(data)._from_factorized(values, data) + + tm.assert_numpy_array_equal(result.asi8, data.asi8) + class TestSetitem(BaseDatetimeTests, base.BaseSetitemTests): pass