diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 95363e598a06c..146e6fcf89755 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -905,7 +905,6 @@ def value_counts(self, dropna: bool = True) -> Series: Index, Series, ) - from pandas.arrays import IntegerArray # compute counts on the data with no nans data = self._data[~self._mask] @@ -925,9 +924,6 @@ def value_counts(self, dropna: bool = True) -> Series: index = index.astype(self.dtype) - mask = np.zeros(len(counts), dtype="bool") - counts = IntegerArray(counts, mask) - return Series(counts, index=index) @doc(ExtensionArray.equals) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index b5432f0d1346c..56dd03203ddef 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -485,7 +485,7 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar: def value_counts(self, dropna: bool = True): from pandas import value_counts - result = value_counts(self._ndarray, dropna=dropna).astype("Int64") + result = value_counts(self._ndarray, dropna=dropna) result.index = result.index.astype(self.dtype) return result diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index e39ebd3afd2ff..1d467e1d72295 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -566,7 +566,7 @@ def value_counts(self, dropna: bool = True) -> Series: index = Index(type(self)(values)) - return Series(counts, index=index).astype("Int64") + return Series(counts, index=index) def astype(self, dtype, copy: bool = True): dtype = pandas_dtype(dtype) diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py index 8e9112b531fad..0718fca0af821 100644 --- a/pandas/tests/arrays/boolean/test_function.py +++ b/pandas/tests/arrays/boolean/test_function.py @@ -92,12 +92,12 @@ def test_ufunc_reduce_raises(values): def test_value_counts_na(): arr = pd.array([True, False, pd.NA], dtype="boolean") result = arr.value_counts(dropna=False) - expected = pd.Series([1, 1, 1], index=arr, dtype="Int64") + expected = pd.Series([1, 1, 1], index=arr) assert expected.index.dtype == arr.dtype tm.assert_series_equal(result, expected) result = arr.value_counts(dropna=True) - expected = pd.Series([1, 1], index=arr[:-1], dtype="Int64") + expected = pd.Series([1, 1], index=arr[:-1]) assert expected.index.dtype == arr.dtype tm.assert_series_equal(result, expected) @@ -105,7 +105,7 @@ def test_value_counts_na(): def test_value_counts_with_normalize(): ser = pd.Series([True, False, pd.NA], dtype="boolean") result = ser.value_counts(normalize=True) - expected = pd.Series([1, 1], index=ser[:-1], dtype="Float64") / 2 + expected = pd.Series([1, 1], index=ser[:-1]) / 2 assert expected.index.dtype == "boolean" tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py index fbdf419811e24..e44d33c5d1455 100644 --- a/pandas/tests/arrays/floating/test_function.py +++ b/pandas/tests/arrays/floating/test_function.py @@ -100,11 +100,11 @@ def test_value_counts_na(): result = arr.value_counts(dropna=False) idx = pd.Index([0.1, 0.2, pd.NA], dtype=arr.dtype) assert idx.dtype == arr.dtype - expected = pd.Series([2, 1, 1], index=idx, dtype="Int64") + expected = pd.Series([2, 1, 1], index=idx) tm.assert_series_equal(result, expected) result = arr.value_counts(dropna=True) - expected = pd.Series([2, 1], index=idx[:-1], dtype="Int64") + expected = pd.Series([2, 1], index=idx[:-1]) tm.assert_series_equal(result, expected) @@ -113,14 +113,14 @@ def test_value_counts_empty(): result = ser.value_counts() idx = pd.Index([], dtype="Float64") assert idx.dtype == "Float64" - expected = pd.Series([], index=idx, dtype="Int64") + expected = pd.Series([], index=idx, dtype="int64") tm.assert_series_equal(result, expected) def test_value_counts_with_normalize(): ser = pd.Series([0.1, 0.2, 0.1, pd.NA], dtype="Float64") result = ser.value_counts(normalize=True) - expected = pd.Series([2, 1], index=ser[:2], dtype="Float64") / 3 + expected = pd.Series([2, 1], index=ser[:2]) / 3 assert expected.index.dtype == ser.dtype tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py index 73c8d4e6b1aed..88bceed148ac0 100644 --- a/pandas/tests/arrays/integer/test_function.py +++ b/pandas/tests/arrays/integer/test_function.py @@ -111,11 +111,11 @@ def test_value_counts_na(): result = arr.value_counts(dropna=False) ex_index = pd.Index([1, 2, pd.NA], dtype="Int64") assert ex_index.dtype == "Int64" - expected = pd.Series([2, 1, 1], index=ex_index, dtype="Int64") + expected = pd.Series([2, 1, 1], index=ex_index) tm.assert_series_equal(result, expected) result = arr.value_counts(dropna=True) - expected = pd.Series([2, 1], index=arr[:2], dtype="Int64") + expected = pd.Series([2, 1], index=arr[:2]) assert expected.index.dtype == arr.dtype tm.assert_series_equal(result, expected) @@ -125,8 +125,8 @@ def test_value_counts_empty(): ser = pd.Series([], dtype="Int64") result = ser.value_counts() idx = pd.Index([], dtype=ser.dtype) + expected = pd.Series([], index=idx, dtype="int64") assert idx.dtype == ser.dtype - expected = pd.Series([], index=idx, dtype="Int64") tm.assert_series_equal(result, expected) @@ -134,7 +134,7 @@ def test_value_counts_with_normalize(): # GH 33172 ser = pd.Series([1, 2, 1, pd.NA], dtype="Int64") result = ser.value_counts(normalize=True) - expected = pd.Series([2, 1], index=ser[:2], dtype="Float64") / 3 + expected = pd.Series([2, 1], index=ser[:2]) / 3 assert expected.index.dtype == ser.dtype tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 8b2aea5c2e2e1..42c9a50d7fdf7 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -481,18 +481,18 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage2): def test_value_counts_na(dtype): arr = pd.array(["a", "b", "a", pd.NA], dtype=dtype) result = arr.value_counts(dropna=False) - expected = pd.Series([2, 1, 1], index=arr[[0, 1, 3]], dtype="Int64") + expected = pd.Series([2, 1, 1], index=arr[[0, 1, 3]]) tm.assert_series_equal(result, expected) result = arr.value_counts(dropna=True) - expected = pd.Series([2, 1], index=arr[:2], dtype="Int64") + expected = pd.Series([2, 1], index=arr[:2]) tm.assert_series_equal(result, expected) def test_value_counts_with_normalize(dtype): ser = pd.Series(["a", "b", "a", pd.NA], dtype=dtype) result = ser.value_counts(normalize=True) - expected = pd.Series([2, 1], index=ser[:2], dtype="Float64") / 3 + expected = pd.Series([2, 1], index=ser[:2]) / 3 tm.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 6379dfe2efefe..f8bdf081e5c1f 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -50,10 +50,6 @@ def test_value_counts_with_normalize(self, data): expected = pd.Series(0.0, index=result.index) expected[result > 0] = 1 / len(values) - if isinstance(data.dtype, pd.core.dtypes.dtypes.BaseMaskedDtype): - # TODO(GH#44692): avoid special-casing - expected = expected.astype("Float64") - self.assert_series_equal(result, expected) def test_count(self, data_missing): diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 73682620b8353..0d81bc441bf8b 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -156,14 +156,10 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): class TestMethods(base.BaseMethodsTests): - @pytest.mark.skip(reason="returns nullable") + @pytest.mark.xfail(reason="returns nullable") def test_value_counts(self, all_data, dropna): return super().test_value_counts(all_data, dropna) - @pytest.mark.skip(reason="returns nullable") - def test_value_counts_with_normalize(self, data): - pass - class TestCasting(base.BaseCastingTests): pass