diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 039a86da0541e..18fc16b5f38d3 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -224,6 +224,7 @@ Datetimelike - Bug in :func:`to_datetime` with sequences of ``np.str_`` objects incorrectly raising (:issue:`32264`) - Bug in :class:`Timestamp` construction when passing datetime components as positional arguments and ``tzinfo`` as a keyword argument incorrectly raising (:issue:`31929`) - Bug in :meth:`Index.astype` when casting from object dtype to ``timedelta64[ns]`` dtype incorrectly casting ``np.datetime64("NaT")`` values to ``np.timedelta64("NaT")`` instead of raising (:issue:`45722`) +- Bug in :meth:`SeriesGroupBy.value_counts` index when passing categorical column (:issue:`44324`) - Timedelta diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 949f369849323..9d0305d0d1208 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -602,23 +602,23 @@ def value_counts( ids, _, _ = self.grouper.group_info val = self.obj._values - def apply_series_value_counts(): - return self.apply( + names = self.grouper.names + [self.obj.name] + + if is_categorical_dtype(val.dtype) or ( + bins is not None and not np.iterable(bins) + ): + # scalar bins cannot be done at top level + # in a backward compatible way + # GH38672 relates to categorical dtype + ser = self.apply( Series.value_counts, normalize=normalize, sort=sort, ascending=ascending, bins=bins, ) - - if bins is not None: - if not np.iterable(bins): - # scalar bins cannot be done at top level - # in a backward compatible way - return apply_series_value_counts() - elif is_categorical_dtype(val.dtype): - # GH38672 - return apply_series_value_counts() + ser.index.names = names + return ser # groupby removes null keys from groupings mask = ids != -1 @@ -683,7 +683,6 @@ def apply_series_value_counts(): levels = [ping.group_index for ping in self.grouper.groupings] + [ lev # type: ignore[list-item] ] - names = self.grouper.names + [self.obj.name] if dropna: mask = codes[-1] != -1 diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index a33e4efbe3b6d..0cc7fa11d096a 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -271,7 +271,7 @@ def test_sorting_with_different_categoricals(): index = ["low", "med", "high", "low", "med", "high"] index = Categorical(index, categories=["low", "med", "high"], ordered=True) index = [["A", "A", "A", "B", "B", "B"], CategoricalIndex(index)] - index = MultiIndex.from_arrays(index, names=["group", None]) + index = MultiIndex.from_arrays(index, names=["group", "dose"]) expected = Series([2] * 6, index=index, name="dose") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 54f672cb69800..d38ba84cd1397 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -22,6 +22,26 @@ import pandas._testing as tm +def tests_value_counts_index_names_category_column(): + # GH44324 Missing name of index category column + df = DataFrame( + { + "gender": ["female"], + "country": ["US"], + } + ) + df["gender"] = df["gender"].astype("category") + result = df.groupby("country")["gender"].value_counts() + + # Construct expected, very specific multiindex + df_mi_expected = DataFrame([["US", "female"]], columns=["country", "gender"]) + df_mi_expected["gender"] = df_mi_expected["gender"].astype("category") + mi_expected = MultiIndex.from_frame(df_mi_expected) + expected = Series([1], index=mi_expected, name="gender") + + tm.assert_series_equal(result, expected) + + # our starting frame def seed_df(seed_nans, n, m): np.random.seed(1234)