Skip to content

REF: EA value_counts -> _value_counts #30673

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,8 +708,9 @@ def value_counts(
if is_extension_array_dtype(values):

# handle Categorical and sparse,
result = Series(values)._values.value_counts(dropna=dropna)
result.name = name
arr = extract_array(values)
index, counts = arr._value_counts(dropna=dropna)
result = Series(counts, index=index, name=name)
counts = result.values

else:
Expand Down
16 changes: 6 additions & 10 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,11 +528,9 @@ def astype(self, dtype, copy=True):
data = self._coerce_to_ndarray(na_value=na_value)
return astype_nansafe(data, dtype, copy=False)

def value_counts(self, dropna=True):
def _value_counts(self, dropna=True):
"""
Returns a Series containing counts of each category.

Every category will have an entry, even those with a count of 0.
Return a tuple describing the counts for each value.

Parameters
----------
Expand All @@ -541,15 +539,14 @@ def value_counts(self, dropna=True):

Returns
-------
counts : Series
index : BooleanArray
values : ndarray[int64]

See Also
--------
Series.value_counts

"""

from pandas import Index, Series
from pandas import Index

# compute counts on the data with no nans
data = self._data[~self._mask]
Expand All @@ -571,8 +568,7 @@ def value_counts(self, dropna=True):
index = Index(
np.concatenate([index, np.array([np.nan], dtype=object)]), dtype=object
)

return Series(array, index=index)
return index, array

def _values_for_argsort(self) -> np.ndarray:
"""
Expand Down
29 changes: 18 additions & 11 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1423,9 +1423,9 @@ def dropna(self):

return result

def value_counts(self, dropna=True):
def _value_counts(self, dropna=True):
"""
Return a Series containing counts of each category.
Return a tuple describing the counts of each category.

Every category will have an entry, even those with a count of 0.

Expand All @@ -1436,17 +1436,21 @@ def value_counts(self, dropna=True):

Returns
-------
counts : Series
index : Categorical
values : ndarray[int64]

See Also
--------
Series.value_counts
"""
from pandas import Series, CategoricalIndex

code, cat = self._codes, self.categories
ncat, mask = len(cat), 0 <= code
ix, clean = np.arange(ncat), mask.all()
code = self._codes
mask = 0 <= code
clean = mask.all()

cat = self.categories
ncat = len(cat)
ix = np.arange(ncat)

if dropna or clean:
obs = code if clean else code[mask]
Expand All @@ -1455,9 +1459,8 @@ def value_counts(self, dropna=True):
count = np.bincount(np.where(mask, code, ncat))
ix = np.append(ix, -1)

ix = self._constructor(ix, dtype=self.dtype, fastpath=True)

return Series(count, index=CategoricalIndex(ix), dtype="int64")
index = self._constructor(ix, dtype=self.dtype, fastpath=True)
return index, count.astype(np.int64)

def _internal_get_values(self):
"""
Expand Down Expand Up @@ -2323,7 +2326,11 @@ def describe(self):
description: `DataFrame`
A dataframe with frequency and counts by category.
"""
counts = self.value_counts(dropna=False)
from pandas import Series

index, values = self._value_counts(dropna=False)
counts = Series(values, index=index)

freqs = counts / float(counts.sum())

from pandas.core.reshape.concat import concat
Expand Down
31 changes: 16 additions & 15 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,33 +679,34 @@ def repeat(self, repeats, *args, **kwargs):
values = self._data.repeat(repeats)
return type(self)(values.view("i8"), dtype=self.dtype)

def value_counts(self, dropna=False):
def _value_counts(self, dropna: bool = False):
"""
Return a Series containing counts of unique values.
Return an array of unique values and an array of their counts.

Parameters
----------
dropna : bool, default True
Don't include counts of NaT values.
dropna : bool, default False

Returns
-------
Series
ExtensionArray
ndarray[int64]
"""
from pandas import Series, Index

if dropna:
values = self[~self.isna()]._data
values = self[~self.isna()]
else:
values = self._data
values = self

cls = type(self)
arg = values._values_for_factorize()[0]

result = value_counts(values, sort=False, dropna=dropna)
index = Index(
cls(result.index.view("i8"), dtype=self.dtype), name=result.index.name
)
return Series(result.values, index=index, name=result.name)
result = value_counts(arg, sort=False, dropna=False)

freq = self.freq if is_period_dtype(self) else None
idx = result.index
new_index = type(self)(idx, dtype=self.dtype, freq=freq) # type: ignore
counts = result.values

return new_index, counts

def map(self, mapper):
# TODO(GH-23179): Add ExtensionArray.map
Expand Down
15 changes: 6 additions & 9 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,11 +578,9 @@ def _ndarray_values(self) -> np.ndarray:
"""
return self._data

def value_counts(self, dropna=True):
def _value_counts(self, dropna=True):
"""
Returns a Series containing counts of each category.

Every category will have an entry, even those with a count of 0.
Return a tuple describing the counts for each value.

Parameters
----------
Expand All @@ -591,15 +589,15 @@ def value_counts(self, dropna=True):

Returns
-------
counts : Series
index : IntegerArray
values : ndarray[int64]

See Also
--------
Series.value_counts

"""

from pandas import Index, Series
from pandas import Index

# compute counts on the data with no nans
data = self._data[~self._mask]
Expand All @@ -624,8 +622,7 @@ def value_counts(self, dropna=True):
),
dtype=object,
)

return Series(array, index=index)
return index, array

def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
# TODO: https://github.com/pandas-dev/pandas/issues/30037
Expand Down
23 changes: 5 additions & 18 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,25 +855,12 @@ def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs):

return self._shallow_copy(left_take, right_take)

def value_counts(self, dropna=True):
"""
Returns a Series containing counts of each interval.

Parameters
----------
dropna : bool, default True
Don't include counts of NaN.

Returns
-------
counts : Series

See Also
--------
Series.value_counts
"""
def _value_counts(self, dropna=True):
# TODO: implement this is a non-naive way!
return value_counts(np.asarray(self), dropna=dropna)

arg = self._values_for_factorize()[0]
result = value_counts(arg, dropna=dropna)
return result.index, result.values

# Formatting

Expand Down
18 changes: 7 additions & 11 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
is_string_dtype,
pandas_dtype,
)
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries, ABCSparseArray
from pandas.core.dtypes.generic import ABCSeries, ABCSparseArray
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna

import pandas.core.algorithms as algos
Expand Down Expand Up @@ -696,20 +696,19 @@ def factorize(self, na_sentinel=-1):
uniques = SparseArray(uniques, dtype=self.dtype)
return codes, uniques

def value_counts(self, dropna=True):
def _value_counts(self, dropna=True):
"""
Returns a Series containing counts of unique values.
Return an array of unique values and an array of their counts.

Parameters
----------
dropna : boolean, default True
Don't include counts of NaN, even if NaN is in sp_values.
dropna : bool, default True

Returns
-------
counts : Series
ndarray
ndarray[int64]
"""
from pandas import Index, Series

keys, counts = algos._value_counts_arraylike(self.sp_values, dropna=dropna)
fcounts = self.sp_index.ngaps
Expand All @@ -728,10 +727,7 @@ def value_counts(self, dropna=True):
keys = np.insert(keys, 0, self.fill_value)
counts = np.insert(counts, 0, fcounts)

if not isinstance(keys, ABCIndexClass):
keys = Index(keys)
result = Series(counts, index=keys)
return result
return keys, counts

# --------
# Indexing
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,10 +250,11 @@ def astype(self, dtype, copy=True):
def _reduce(self, name, skipna=True, **kwargs):
raise TypeError(f"Cannot perform reduction '{name}' with string dtype")

def value_counts(self, dropna=False):
def _value_counts(self, dropna=False):
from pandas import value_counts

return value_counts(self._ndarray, dropna=dropna)
result = value_counts(self._ndarray, dropna=dropna)
return result.index, result.values

# Overrride parent because we have different return types.
@classmethod
Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,13 +214,16 @@ def test_value_counts_preserves_tz(self):
dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central")
arr = DatetimeArray(dti).repeat([4, 3])

result = arr.value_counts()
index, values = arr._value_counts()
result = pd.Series(values, index=index)

# Note: not tm.assert_index_equal, since `freq`s do not match
assert result.index.equals(dti)

arr[-2] = pd.NaT
result = arr.value_counts()
index, values = arr._value_counts()
result = pd.Series(values, index=index)

expected = pd.Series([1, 4, 2], index=[pd.NaT, dti[0], dti[1]])
tm.assert_series_equal(result, expected)

Expand Down