pandas-dev · jbrockmendel · Jan 4, 2020 · Jan 4, 2020 · Jan 4, 2020 · Jan 4, 2020
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -708,8 +708,9 @@ def value_counts(
         if is_extension_array_dtype(values):
 
             # handle Categorical and sparse,
-            result = Series(values)._values.value_counts(dropna=dropna)
-            result.name = name
+            arr = extract_array(values)
+            index, counts = arr._value_counts(dropna=dropna)
+            result = Series(counts, index=index, name=name)
             counts = result.values
 
         else:

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -528,11 +528,9 @@ def astype(self, dtype, copy=True):
         data = self._coerce_to_ndarray(na_value=na_value)
         return astype_nansafe(data, dtype, copy=False)
 
-    def value_counts(self, dropna=True):
+    def _value_counts(self, dropna=True):
         """
-        Returns a Series containing counts of each category.
-
-        Every category will have an entry, even those with a count of 0.
+        Return a tuple describing the counts for each value.
 
         Parameters
         ----------
@@ -541,15 +539,14 @@ def value_counts(self, dropna=True):
 
         Returns
         -------
-        counts : Series
+        index : BooleanArray
+        values : ndarray[int64]
 
         See Also
         --------
         Series.value_counts
-
         """
-
-        from pandas import Index, Series
+        from pandas import Index
 
         # compute counts on the data with no nans
         data = self._data[~self._mask]
@@ -571,8 +568,7 @@ def value_counts(self, dropna=True):
             index = Index(
                 np.concatenate([index, np.array([np.nan], dtype=object)]), dtype=object
             )
-
-        return Series(array, index=index)
+        return index, array
 
     def _values_for_argsort(self) -> np.ndarray:
         """

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1423,9 +1423,9 @@ def dropna(self):
 
         return result
 
-    def value_counts(self, dropna=True):
+    def _value_counts(self, dropna=True):
         """
-        Return a Series containing counts of each category.
+        Return a tuple describing the counts of each category.
 
         Every category will have an entry, even those with a count of 0.
 
@@ -1436,17 +1436,21 @@ def value_counts(self, dropna=True):
 
         Returns
         -------
-        counts : Series
+        index : Categorical
+        values : ndarray[int64]
 
         See Also
         --------
         Series.value_counts
         """
-        from pandas import Series, CategoricalIndex
 
-        code, cat = self._codes, self.categories
-        ncat, mask = len(cat), 0 <= code
-        ix, clean = np.arange(ncat), mask.all()
+        code = self._codes
+        mask = 0 <= code
+        clean = mask.all()
+
+        cat = self.categories
+        ncat = len(cat)
+        ix = np.arange(ncat)
 
         if dropna or clean:
             obs = code if clean else code[mask]
@@ -1455,9 +1459,8 @@ def value_counts(self, dropna=True):
             count = np.bincount(np.where(mask, code, ncat))
             ix = np.append(ix, -1)
 
-        ix = self._constructor(ix, dtype=self.dtype, fastpath=True)
-
-        return Series(count, index=CategoricalIndex(ix), dtype="int64")
+        index = self._constructor(ix, dtype=self.dtype, fastpath=True)
+        return index, count.astype(np.int64)
 
     def _internal_get_values(self):
         """
@@ -2323,7 +2326,11 @@ def describe(self):
         description: `DataFrame`
             A dataframe with frequency and counts by category.
         """
-        counts = self.value_counts(dropna=False)
+        from pandas import Series
+
+        index, values = self._value_counts(dropna=False)
+        counts = Series(values, index=index)
+
         freqs = counts / float(counts.sum())
 
         from pandas.core.reshape.concat import concat

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -679,33 +679,34 @@ def repeat(self, repeats, *args, **kwargs):
         values = self._data.repeat(repeats)
         return type(self)(values.view("i8"), dtype=self.dtype)
 
-    def value_counts(self, dropna=False):
+    def _value_counts(self, dropna: bool = False):
         """
-        Return a Series containing counts of unique values.
+        Return an array of unique values and an array of their counts.
 
         Parameters
         ----------
-        dropna : bool, default True
-            Don't include counts of NaT values.
+        dropna : bool, default False
 
         Returns
         -------
-        Series
+        ExtensionArray
+        ndarray[int64]
         """
-        from pandas import Series, Index
-
         if dropna:
-            values = self[~self.isna()]._data
+            values = self[~self.isna()]
         else:
-            values = self._data
+            values = self
 
-        cls = type(self)
+        arg = values._values_for_factorize()[0]
 
-        result = value_counts(values, sort=False, dropna=dropna)
-        index = Index(
-            cls(result.index.view("i8"), dtype=self.dtype), name=result.index.name
-        )
-        return Series(result.values, index=index, name=result.name)
+        result = value_counts(arg, sort=False, dropna=False)
+
+        freq = self.freq if is_period_dtype(self) else None
+        idx = result.index
+        new_index = type(self)(idx, dtype=self.dtype, freq=freq)  # type: ignore
+        counts = result.values
+
+        return new_index, counts
 
     def map(self, mapper):
         # TODO(GH-23179): Add ExtensionArray.map

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -578,11 +578,9 @@ def _ndarray_values(self) -> np.ndarray:
         """
         return self._data
 
-    def value_counts(self, dropna=True):
+    def _value_counts(self, dropna=True):
         """
-        Returns a Series containing counts of each category.
-
-        Every category will have an entry, even those with a count of 0.
+        Return a tuple describing the counts for each value.
 
         Parameters
         ----------
@@ -591,15 +589,15 @@ def value_counts(self, dropna=True):
 
         Returns
         -------
-        counts : Series
+        index : IntegerArray
+        values : ndarray[int64]
 
         See Also
         --------
         Series.value_counts
-
         """
 
-        from pandas import Index, Series
+        from pandas import Index
 
         # compute counts on the data with no nans
         data = self._data[~self._mask]
@@ -624,8 +622,7 @@ def value_counts(self, dropna=True):
                 ),
                 dtype=object,
             )
-
-        return Series(array, index=index)
+        return index, array
 
     def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
         # TODO: https://github.com/pandas-dev/pandas/issues/30037

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -855,25 +855,12 @@ def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs):
 
         return self._shallow_copy(left_take, right_take)
 
-    def value_counts(self, dropna=True):
-        """
-        Returns a Series containing counts of each interval.
-
-        Parameters
-        ----------
-        dropna : bool, default True
-            Don't include counts of NaN.
-
-        Returns
-        -------
-        counts : Series
-
-        See Also
-        --------
-        Series.value_counts
-        """
+    def _value_counts(self, dropna=True):
         # TODO: implement this is a non-naive way!
-        return value_counts(np.asarray(self), dropna=dropna)
+
+        arg = self._values_for_factorize()[0]
+        result = value_counts(arg, dropna=dropna)
+        return result.index, result.values
 
     # Formatting
 

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -34,7 +34,7 @@
     is_string_dtype,
     pandas_dtype,
 )
-from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries, ABCSparseArray
+from pandas.core.dtypes.generic import ABCSeries, ABCSparseArray
 from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna
 
 import pandas.core.algorithms as algos
@@ -696,20 +696,19 @@ def factorize(self, na_sentinel=-1):
         uniques = SparseArray(uniques, dtype=self.dtype)
         return codes, uniques
 
-    def value_counts(self, dropna=True):
+    def _value_counts(self, dropna=True):
         """
-        Returns a Series containing counts of unique values.
+        Return an array of unique values and an array of their counts.
 
         Parameters
         ----------
-        dropna : boolean, default True
-            Don't include counts of NaN, even if NaN is in sp_values.
+        dropna : bool, default True
 
         Returns
         -------
-        counts : Series
+        ndarray
+        ndarray[int64]
         """
-        from pandas import Index, Series
 
         keys, counts = algos._value_counts_arraylike(self.sp_values, dropna=dropna)
         fcounts = self.sp_index.ngaps
@@ -728,10 +727,7 @@ def value_counts(self, dropna=True):
                     keys = np.insert(keys, 0, self.fill_value)
                     counts = np.insert(counts, 0, fcounts)
 
-        if not isinstance(keys, ABCIndexClass):
-            keys = Index(keys)
-        result = Series(counts, index=keys)
-        return result
+        return keys, counts
 
     # --------
     # Indexing

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -250,10 +250,11 @@ def astype(self, dtype, copy=True):
     def _reduce(self, name, skipna=True, **kwargs):
         raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
 
-    def value_counts(self, dropna=False):
+    def _value_counts(self, dropna=False):
         from pandas import value_counts
 
-        return value_counts(self._ndarray, dropna=dropna)
+        result = value_counts(self._ndarray, dropna=dropna)
+        return result.index, result.values
 
     # Overrride parent because we have different return types.
     @classmethod

diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py
@@ -214,13 +214,16 @@ def test_value_counts_preserves_tz(self):
         dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
         arr = DatetimeArray(dti).repeat([4, 3])
 
-        result = arr.value_counts()
+        index, values = arr._value_counts()
+        result = pd.Series(values, index=index)
 
         # Note: not tm.assert_index_equal, since `freq`s do not match
         assert result.index.equals(dti)
 
         arr[-2] = pd.NaT
-        result = arr.value_counts()
+        index, values = arr._value_counts()
+        result = pd.Series(values, index=index)
+
         expected = pd.Series([1, 4, 2], index=[pd.NaT, dti[0], dti[1]])
         tm.assert_series_equal(result, expected)