From 4405a53863b5c8a7daaef997831a5638e1602d0d Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 30 Nov 2021 11:37:12 -0800
Subject: [PATCH 1/2] API: value_counts consistently return int64 dtype

---
 pandas/core/arrays/masked.py                   |  4 ----
 pandas/core/arrays/string_.py                  |  2 +-
 pandas/core/arrays/string_arrow.py             |  2 +-
 pandas/tests/arrays/boolean/test_function.py   |  6 +++---
 pandas/tests/arrays/floating/test_function.py  |  8 ++++----
 pandas/tests/arrays/integer/test_function.py   |  8 ++++----
 pandas/tests/arrays/string_/test_string.py     |  6 +++---
 pandas/tests/extension/decimal/test_decimal.py |  3 ---
 pandas/tests/extension/test_boolean.py         |  8 --------
 pandas/tests/extension/test_floating.py        | 18 +-----------------
 pandas/tests/extension/test_integer.py         | 18 +-----------------
 pandas/tests/extension/test_string.py          |  8 +-------
 12 files changed, 19 insertions(+), 72 deletions(-)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index cc61fb4d93ffc..32844ae929c46 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -656,7 +656,6 @@ def value_counts(self, dropna: bool = True) -> Series:
             Index,
             Series,
         )
-        from pandas.arrays import IntegerArray
 
         # compute counts on the data with no nans
         data = self._data[~self._mask]
@@ -680,9 +679,6 @@ def value_counts(self, dropna: bool = True) -> Series:
                 dtype=object,
             )
 
-        mask = np.zeros(len(counts), dtype="bool")
-        counts = IntegerArray(counts, mask)
-
         return Series(counts, index=index)
 
     @doc(ExtensionArray.equals)
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index df71501d55b20..3dcd9738ee532 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -470,7 +470,7 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
     def value_counts(self, dropna: bool = True):
         from pandas import value_counts
 
-        return value_counts(self._ndarray, dropna=dropna).astype("Int64")
+        return value_counts(self._ndarray, dropna=dropna)
 
     def memory_usage(self, deep: bool = False) -> int:
         result = self._ndarray.nbytes
diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index b1daf0e393ef0..eb64712a45422 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -622,7 +622,7 @@ def value_counts(self, dropna: bool = True) -> Series:
         # Index cannot hold ExtensionArrays yet
         index = Index(type(self)(values)).astype(object)
 
-        return Series(counts, index=index).astype("Int64")
+        return Series(counts, index=index)
 
     def astype(self, dtype, copy=True):
         dtype = pandas_dtype(dtype)
diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py
index 2f1a3121cdf5b..6ede1de399911 100644
--- a/pandas/tests/arrays/boolean/test_function.py
+++ b/pandas/tests/arrays/boolean/test_function.py
@@ -77,18 +77,18 @@ def test_ufunc_reduce_raises(values):
 def test_value_counts_na():
     arr = pd.array([True, False, pd.NA], dtype="boolean")
     result = arr.value_counts(dropna=False)
-    expected = pd.Series([1, 1, 1], index=[True, False, pd.NA], dtype="Int64")
+    expected = pd.Series([1, 1, 1], index=[True, False, pd.NA])
     tm.assert_series_equal(result, expected)
 
     result = arr.value_counts(dropna=True)
-    expected = pd.Series([1, 1], index=[True, False], dtype="Int64")
+    expected = pd.Series([1, 1], index=[True, False])
     tm.assert_series_equal(result, expected)
 
 
 def test_value_counts_with_normalize():
     ser = pd.Series([True, False, pd.NA], dtype="boolean")
     result = ser.value_counts(normalize=True)
-    expected = pd.Series([1, 1], index=[True, False], dtype="Float64") / 2
+    expected = pd.Series([1, 1], index=[True, False]) / 2
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py
index ff84116fa1b18..882f0a210424c 100644
--- a/pandas/tests/arrays/floating/test_function.py
+++ b/pandas/tests/arrays/floating/test_function.py
@@ -97,11 +97,11 @@ def test_stat_method(pandasmethname, kwargs):
 def test_value_counts_na():
     arr = pd.array([0.1, 0.2, 0.1, pd.NA], dtype="Float64")
     result = arr.value_counts(dropna=False)
-    expected = pd.Series([2, 1, 1], index=[0.1, 0.2, pd.NA], dtype="Int64")
+    expected = pd.Series([2, 1, 1], index=[0.1, 0.2, pd.NA])
     tm.assert_series_equal(result, expected)
 
     result = arr.value_counts(dropna=True)
-    expected = pd.Series([2, 1], index=[0.1, 0.2], dtype="Int64")
+    expected = pd.Series([2, 1], index=[0.1, 0.2])
     tm.assert_series_equal(result, expected)
 
 
@@ -109,14 +109,14 @@ def test_value_counts_empty():
     ser = pd.Series([], dtype="Float64")
     result = ser.value_counts()
     idx = pd.Index([], dtype="object")
-    expected = pd.Series([], index=idx, dtype="Int64")
+    expected = pd.Series([], index=idx, dtype="int64")
     tm.assert_series_equal(result, expected)
 
 
 def test_value_counts_with_normalize():
     ser = pd.Series([0.1, 0.2, 0.1, pd.NA], dtype="Float64")
     result = ser.value_counts(normalize=True)
-    expected = pd.Series([2, 1], index=[0.1, 0.2], dtype="Float64") / 3
+    expected = pd.Series([2, 1], index=[0.1, 0.2]) / 3
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py
index 3d8c93fbd507f..f4f4d5a2fac02 100644
--- a/pandas/tests/arrays/integer/test_function.py
+++ b/pandas/tests/arrays/integer/test_function.py
@@ -108,11 +108,11 @@ def test_stat_method(pandasmethname, kwargs):
 def test_value_counts_na():
     arr = pd.array([1, 2, 1, pd.NA], dtype="Int64")
     result = arr.value_counts(dropna=False)
-    expected = pd.Series([2, 1, 1], index=[1, 2, pd.NA], dtype="Int64")
+    expected = pd.Series([2, 1, 1], index=[1, 2, pd.NA])
     tm.assert_series_equal(result, expected)
 
     result = arr.value_counts(dropna=True)
-    expected = pd.Series([2, 1], index=[1, 2], dtype="Int64")
+    expected = pd.Series([2, 1], index=[1, 2])
     tm.assert_series_equal(result, expected)
 
 
@@ -122,7 +122,7 @@ def test_value_counts_empty():
     result = ser.value_counts()
     # TODO: The dtype of the index seems wrong (it's int64 for non-empty)
     idx = pd.Index([], dtype="object")
-    expected = pd.Series([], index=idx, dtype="Int64")
+    expected = pd.Series([], index=idx, dtype="int64")
     tm.assert_series_equal(result, expected)
 
 
@@ -130,7 +130,7 @@ def test_value_counts_with_normalize():
     # GH 33172
     ser = pd.Series([1, 2, 1, pd.NA], dtype="Int64")
     result = ser.value_counts(normalize=True)
-    expected = pd.Series([2, 1], index=[1, 2], dtype="Float64") / 3
+    expected = pd.Series([2, 1], index=[1, 2]) / 3
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index c330e959ad5bf..997ac267831a6 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -466,18 +466,18 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage2):
 def test_value_counts_na(dtype):
     arr = pd.array(["a", "b", "a", pd.NA], dtype=dtype)
     result = arr.value_counts(dropna=False)
-    expected = pd.Series([2, 1, 1], index=["a", "b", pd.NA], dtype="Int64")
+    expected = pd.Series([2, 1, 1], index=["a", "b", pd.NA])
     tm.assert_series_equal(result, expected)
 
     result = arr.value_counts(dropna=True)
-    expected = pd.Series([2, 1], index=["a", "b"], dtype="Int64")
+    expected = pd.Series([2, 1], index=["a", "b"])
     tm.assert_series_equal(result, expected)
 
 
 def test_value_counts_with_normalize(dtype):
     ser = pd.Series(["a", "b", "a", pd.NA], dtype=dtype)
     result = ser.value_counts(normalize=True)
-    expected = pd.Series([2, 1], index=["a", "b"], dtype="Float64") / 3
+    expected = pd.Series([2, 1], index=["a", "b"]) / 3
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 53416b6a3e9db..f969e60e5244f 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -148,9 +148,6 @@ def test_value_counts(self, all_data, dropna, request):
 
         tm.assert_series_equal(result, expected)
 
-    def test_value_counts_with_normalize(self, data):
-        return super().test_value_counts_with_normalize(data)
-
 
 class TestCasting(base.BaseCastingTests):
     pass
diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
index 05455905860d2..2f68477411ec8 100644
--- a/pandas/tests/extension/test_boolean.py
+++ b/pandas/tests/extension/test_boolean.py
@@ -216,14 +216,6 @@ def test_searchsorted(self, data_for_sorting, as_series):
         sorter = np.array([1, 0])
         assert data_for_sorting.searchsorted(a, sorter=sorter) == 0
 
-    @pytest.mark.skip(reason="uses nullable integer")
-    def test_value_counts(self, all_data, dropna):
-        return super().test_value_counts(all_data, dropna)
-
-    @pytest.mark.skip(reason="uses nullable integer")
-    def test_value_counts_with_normalize(self, data):
-        pass
-
     def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting):
         # override because there are only 2 unique values
 
diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py
index 2b08c5b7be450..da4258c31b393 100644
--- a/pandas/tests/extension/test_floating.py
+++ b/pandas/tests/extension/test_floating.py
@@ -172,23 +172,7 @@ class TestMissing(base.BaseMissingTests):
 
 
 class TestMethods(base.BaseMethodsTests):
-    @pytest.mark.skip(reason="uses nullable integer")
-    def test_value_counts(self, all_data, dropna):
-        all_data = all_data[:10]
-        if dropna:
-            other = np.array(all_data[~all_data.isna()])
-        else:
-            other = all_data
-
-        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
-        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
-        expected.index = expected.index.astype(all_data.dtype)
-
-        self.assert_series_equal(result, expected)
-
-    @pytest.mark.skip(reason="uses nullable integer")
-    def test_value_counts_with_normalize(self, data):
-        pass
+    pass
 
 
 class TestCasting(base.BaseCastingTests):
diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
index 7d343aab3c7a0..0eaebc23b9834 100644
--- a/pandas/tests/extension/test_integer.py
+++ b/pandas/tests/extension/test_integer.py
@@ -195,23 +195,7 @@ class TestMissing(base.BaseMissingTests):
 
 
 class TestMethods(base.BaseMethodsTests):
-    @pytest.mark.skip(reason="uses nullable integer")
-    def test_value_counts(self, all_data, dropna):
-        all_data = all_data[:10]
-        if dropna:
-            other = np.array(all_data[~all_data.isna()])
-        else:
-            other = all_data
-
-        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
-        expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
-        expected.index = expected.index.astype(all_data.dtype)
-
-        self.assert_series_equal(result, expected)
-
-    @pytest.mark.skip(reason="uses nullable integer")
-    def test_value_counts_with_normalize(self, data):
-        pass
+    pass
 
 
 class TestCasting(base.BaseCastingTests):
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index 5049116a9320e..64793b098d57f 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -152,13 +152,7 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
 
 
 class TestMethods(base.BaseMethodsTests):
-    @pytest.mark.skip(reason="returns nullable")
-    def test_value_counts(self, all_data, dropna):
-        return super().test_value_counts(all_data, dropna)
-
-    @pytest.mark.skip(reason="returns nullable")
-    def test_value_counts_with_normalize(self, data):
-        pass
+    pass
 
 
 class TestCasting(base.BaseCastingTests):

From cdae1c522b9ed825b488f2cedba3f0cd35a86e21 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 14 Feb 2022 09:59:40 -0800
Subject: [PATCH 2/2] update tests

---
 pandas/tests/extension/test_floating.py | 1 -
 pandas/tests/extension/test_integer.py  | 1 -
 pandas/tests/extension/test_string.py   | 6 +-----
 3 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py
index ce49567c50594..f6ffe51fd93d4 100644
--- a/pandas/tests/extension/test_floating.py
+++ b/pandas/tests/extension/test_floating.py
@@ -183,7 +183,6 @@ def test_value_counts(self, all_data, dropna):
 
         result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
         expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
-        expected = expected.astype("Int64")
         expected.index = expected.index.astype(all_data.dtype)
 
         self.assert_series_equal(result, expected)
diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
index 83745cc3f66d8..c4b95d8da01e8 100644
--- a/pandas/tests/extension/test_integer.py
+++ b/pandas/tests/extension/test_integer.py
@@ -206,7 +206,6 @@ def test_value_counts(self, all_data, dropna):
 
         result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
         expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
-        expected = expected.astype("Int64")
         expected.index = expected.index.astype(all_data.dtype)
 
         self.assert_series_equal(result, expected)
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
index 4256142556894..2484433449feb 100644
--- a/pandas/tests/extension/test_string.py
+++ b/pandas/tests/extension/test_string.py
@@ -158,14 +158,10 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
 
 
 class TestMethods(base.BaseMethodsTests):
-    @pytest.mark.skip(reason="returns nullable")
+    @pytest.mark.xfail(reason="returns nullable")
     def test_value_counts(self, all_data, dropna):
         return super().test_value_counts(all_data, dropna)
 
-    @pytest.mark.skip(reason="returns nullable")
-    def test_value_counts_with_normalize(self, data):
-        pass
-
 
 class TestCasting(base.BaseCastingTests):
     pass