From 6e15159c30a92c725ac276155c9eb63e29db2f0e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 3 Jan 2020 19:05:53 -0800
Subject: [PATCH 1/5] REF: EA value_counts -> _value_counts

---
 pandas/core/algorithms.py             |  5 +++--
 pandas/core/arrays/boolean.py         | 16 ++++++--------
 pandas/core/arrays/categorical.py     | 29 ++++++++++++++++----------
 pandas/core/arrays/datetimelike.py    | 30 +++++++++++++--------------
 pandas/core/arrays/integer.py         | 15 ++++++--------
 pandas/core/arrays/interval.py        |  9 ++++++++
 pandas/core/arrays/sparse/array.py    | 18 +++++++---------
 pandas/core/arrays/string_.py         |  5 +++--
 pandas/tests/arrays/test_datetimes.py |  7 +++++--
 9 files changed, 72 insertions(+), 62 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 42cfd9d54ac19..b0ff79b047006 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -705,8 +705,9 @@ def value_counts(
         if is_extension_array_dtype(values):
 
             # handle Categorical and sparse,
-            result = Series(values)._values.value_counts(dropna=dropna)
-            result.name = name
+            arr = extract_array(values)
+            index, counts = arr._value_counts(dropna=dropna)
+            result = Series(counts, index=index, name=name)
             counts = result.values
 
         else:
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 102150b1cbce1..0e12e631c46f2 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -539,11 +539,9 @@ def astype(self, dtype, copy=True):
         data = self._coerce_to_ndarray(na_value=na_value)
         return astype_nansafe(data, dtype, copy=False)
 
-    def value_counts(self, dropna=True):
+    def _value_counts(self, dropna=True):
         """
-        Returns a Series containing counts of each category.
-
-        Every category will have an entry, even those with a count of 0.
+        Return a tuple describing the counts for each value.
 
         Parameters
         ----------
@@ -552,15 +550,14 @@ def value_counts(self, dropna=True):
 
         Returns
         -------
-        counts : Series
+        index : BooleanArray
+        values : ndarray[int64]
 
         See Also
         --------
         Series.value_counts
-
         """
-
-        from pandas import Index, Series
+        from pandas import Index
 
         # compute counts on the data with no nans
         data = self._data[~self._mask]
@@ -582,8 +579,7 @@ def value_counts(self, dropna=True):
             index = Index(
                 np.concatenate([index, np.array([np.nan], dtype=object)]), dtype=object
             )
-
-        return Series(array, index=index)
+        return index, array
 
     def _values_for_argsort(self) -> np.ndarray:
         """
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index f4e75364ae932..93a354a47ab0f 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1423,9 +1423,9 @@ def dropna(self):
 
         return result
 
-    def value_counts(self, dropna=True):
+    def _value_counts(self, dropna=True):
         """
-        Return a Series containing counts of each category.
+        Return a tuple describing the counts of each category.
 
         Every category will have an entry, even those with a count of 0.
 
@@ -1436,17 +1436,21 @@ def value_counts(self, dropna=True):
 
         Returns
         -------
-        counts : Series
+        index : Categorical
+        values : ndarray[int64]
 
         See Also
         --------
         Series.value_counts
         """
-        from pandas import Series, CategoricalIndex
 
-        code, cat = self._codes, self.categories
-        ncat, mask = len(cat), 0 <= code
-        ix, clean = np.arange(ncat), mask.all()
+        code = self._values_for_factorize()[0]
+        mask = 0 <= code
+        clean = mask.all()
+
+        cat = self.categories
+        ncat = len(cat)
+        ix = np.arange(ncat)
 
         if dropna or clean:
             obs = code if clean else code[mask]
@@ -1455,9 +1459,8 @@ def value_counts(self, dropna=True):
             count = np.bincount(np.where(mask, code, ncat))
             ix = np.append(ix, -1)
 
-        ix = self._constructor(ix, dtype=self.dtype, fastpath=True)
-
-        return Series(count, index=CategoricalIndex(ix), dtype="int64")
+        index = self._constructor(ix, dtype=self.dtype, fastpath=True)
+        return index, count
 
     def _internal_get_values(self):
         """
@@ -2323,7 +2326,11 @@ def describe(self):
         description: `DataFrame`
             A dataframe with frequency and counts by category.
         """
-        counts = self.value_counts(dropna=False)
+        from pandas import Series
+
+        index, values = self._value_counts(dropna=False)
+        counts = Series(values, index=index)
+
         freqs = counts / float(counts.sum())
 
         from pandas.core.reshape.concat import concat
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 2bdd9acaeb70f..814e5dddd756c 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -679,33 +679,33 @@ def repeat(self, repeats, *args, **kwargs):
         values = self._data.repeat(repeats)
         return type(self)(values.view("i8"), dtype=self.dtype)
 
-    def value_counts(self, dropna=False):
+    def _value_counts(self, dropna: bool = False):
         """
-        Return a Series containing counts of unique values.
+        Return an array of unique values and an array of their counts.
 
         Parameters
         ----------
-        dropna : bool, default True
-            Don't include counts of NaT values.
+        dropna : bool, default False
 
         Returns
         -------
-        Series
+        ExtensionArray
+        ndarray[int64]
         """
-        from pandas import Series, Index
-
         if dropna:
-            values = self[~self.isna()]._data
+            values = self[~self.isna()]
         else:
-            values = self._data
+            values = self
 
-        cls = type(self)
+        arg = values._values_for_factorize()[0]
 
-        result = value_counts(values, sort=False, dropna=dropna)
-        index = Index(
-            cls(result.index.view("i8"), dtype=self.dtype), name=result.index.name
-        )
-        return Series(result.values, index=index, name=result.name)
+        result = value_counts(arg, sort=False, dropna=False)
+
+        freq = self.freq if is_period_dtype(self) else None
+        new_index = type(self)(result.index, dtype=self.dtype, freq=freq)
+        counts = result.values
+
+        return new_index, counts
 
     def map(self, mapper):
         # TODO(GH-23179): Add ExtensionArray.map
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 0922f4ac6f71d..71bf72fe76f53 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -578,11 +578,9 @@ def _ndarray_values(self) -> np.ndarray:
         """
         return self._data
 
-    def value_counts(self, dropna=True):
+    def _value_counts(self, dropna=True):
         """
-        Returns a Series containing counts of each category.
-
-        Every category will have an entry, even those with a count of 0.
+        Return a tuple describing the counts for each value.
 
         Parameters
         ----------
@@ -591,15 +589,15 @@ def value_counts(self, dropna=True):
 
         Returns
         -------
-        counts : Series
+        index : IntegerArray
+        values : ndarray[int64]
 
         See Also
         --------
         Series.value_counts
-
         """
 
-        from pandas import Index, Series
+        from pandas import Index
 
         # compute counts on the data with no nans
         data = self._data[~self._mask]
@@ -624,8 +622,7 @@ def value_counts(self, dropna=True):
                 ),
                 dtype=object,
             )
-
-        return Series(array, index=index)
+        return index, array
 
     def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
         # TODO: https://github.com/pandas-dev/pandas/issues/30037
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index cea059fb22be1..27bd2ba75d3fb 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -800,6 +800,14 @@ def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs):
 
         return self._shallow_copy(left_take, right_take)
 
+    def _value_counts(self, dropna=True):
+        # TODO: implement this is a non-naive way!
+
+        arg = self._values_for_factorize()[0]
+        result = value_counts(arg, dropna=dropna)
+        return result.index, result.values
+
+    '''
     def value_counts(self, dropna=True):
         """
         Returns a Series containing counts of each interval.
@@ -819,6 +827,7 @@ def value_counts(self, dropna=True):
         """
         # TODO: implement this is a non-naive way!
         return value_counts(np.asarray(self), dropna=dropna)
+    '''
 
     # Formatting
 
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index adf10642f337a..504f0f635ea3f 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -34,7 +34,7 @@
     is_string_dtype,
     pandas_dtype,
 )
-from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries, ABCSparseArray
+from pandas.core.dtypes.generic import ABCSeries, ABCSparseArray
 from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna
 
 import pandas.core.algorithms as algos
@@ -696,20 +696,19 @@ def factorize(self, na_sentinel=-1):
         uniques = SparseArray(uniques, dtype=self.dtype)
         return codes, uniques
 
-    def value_counts(self, dropna=True):
+    def _value_counts(self, dropna=True):
         """
-        Returns a Series containing counts of unique values.
+        Return an array of unique values and an array of their counts.
 
         Parameters
         ----------
-        dropna : boolean, default True
-            Don't include counts of NaN, even if NaN is in sp_values.
+        dropna : bool, default True
 
         Returns
         -------
-        counts : Series
+        ndarray
+        ndarray[int64]
         """
-        from pandas import Index, Series
 
         keys, counts = algos._value_counts_arraylike(self.sp_values, dropna=dropna)
         fcounts = self.sp_index.ngaps
@@ -728,10 +727,7 @@ def value_counts(self, dropna=True):
                     keys = np.insert(keys, 0, self.fill_value)
                     counts = np.insert(counts, 0, fcounts)
 
-        if not isinstance(keys, ABCIndexClass):
-            keys = Index(keys)
-        result = Series(counts, index=keys)
-        return result
+        return keys, counts
 
     # --------
     # Indexing
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index de254f662bb32..49d0ad057cbf9 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -261,10 +261,11 @@ def astype(self, dtype, copy=True):
     def _reduce(self, name, skipna=True, **kwargs):
         raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
 
-    def value_counts(self, dropna=False):
+    def _value_counts(self, dropna=False):
         from pandas import value_counts
 
-        return value_counts(self._ndarray, dropna=dropna)
+        result = value_counts(self._ndarray, dropna=dropna)
+        return result.index, result.values
 
     # Overrride parent because we have different return types.
     @classmethod
diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py
index bca629ae32270..4428d8027bcfe 100644
--- a/pandas/tests/arrays/test_datetimes.py
+++ b/pandas/tests/arrays/test_datetimes.py
@@ -214,13 +214,16 @@ def test_value_counts_preserves_tz(self):
         dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
         arr = DatetimeArray(dti).repeat([4, 3])
 
-        result = arr.value_counts()
+        index, values = arr._value_counts()
+        result = pd.Series(values, index=index)
 
         # Note: not tm.assert_index_equal, since `freq`s do not match
         assert result.index.equals(dti)
 
         arr[-2] = pd.NaT
-        result = arr.value_counts()
+        index, values = arr._value_counts()
+        result = pd.Series(values, index=index)
+
         expected = pd.Series([1, 4, 2], index=[pd.NaT, dti[0], dti[1]])
         tm.assert_series_equal(result, expected)
 

From 2a469d79cabb87f9d70cc01bccd8e0c4a1b92673 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 3 Jan 2020 19:14:58 -0800
Subject: [PATCH 2/5] remove docsttringd out code

---
 pandas/core/arrays/interval.py | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index 27bd2ba75d3fb..ea4cd4ba630c5 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -807,28 +807,6 @@ def _value_counts(self, dropna=True):
         result = value_counts(arg, dropna=dropna)
         return result.index, result.values
 
-    '''
-    def value_counts(self, dropna=True):
-        """
-        Returns a Series containing counts of each interval.
-
-        Parameters
-        ----------
-        dropna : bool, default True
-            Don't include counts of NaN.
-
-        Returns
-        -------
-        counts : Series
-
-        See Also
-        --------
-        Series.value_counts
-        """
-        # TODO: implement this is a non-naive way!
-        return value_counts(np.asarray(self), dropna=dropna)
-    '''
-
     # Formatting
 
     def _format_data(self):

From e785b7eaa7ce5e6f340f39e688041f12c99b4ec6 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 3 Jan 2020 20:27:04 -0800
Subject: [PATCH 3/5] troubleshoot 32 bit build

---
 pandas/core/arrays/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index a38acdc460868..d351a26c6b256 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1444,7 +1444,7 @@ def _value_counts(self, dropna=True):
         Series.value_counts
         """
 
-        code = self._values_for_factorize()[0]
+        code = self._codes
         mask = 0 <= code
         clean = mask.all()
 

From 614069019b064cce6eb7da26d8a3d1a84215216f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 4 Jan 2020 08:08:52 -0800
Subject: [PATCH 4/5] restore cast

---
 pandas/core/arrays/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index d351a26c6b256..d0cdcdd48763d 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1460,7 +1460,7 @@ def _value_counts(self, dropna=True):
             ix = np.append(ix, -1)
 
         index = self._constructor(ix, dtype=self.dtype, fastpath=True)
-        return index, count
+        return index, count.astype(np.int64)
 
     def _internal_get_values(self):
         """

From 18571563779f6ea9bbbaa4b34bde6103d0919a31 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Jan 2020 08:38:56 -0800
Subject: [PATCH 5/5] mypy fixup

---
 pandas/core/arrays/datetimelike.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 814e5dddd756c..3ab6bcb7759a9 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -702,7 +702,8 @@ def _value_counts(self, dropna: bool = False):
         result = value_counts(arg, sort=False, dropna=False)
 
         freq = self.freq if is_period_dtype(self) else None
-        new_index = type(self)(result.index, dtype=self.dtype, freq=freq)
+        idx = result.index
+        new_index = type(self)(idx, dtype=self.dtype, freq=freq)  # type: ignore
         counts = result.values
 
         return new_index, counts