diff --git a/doc/source/whatsnew/v1.3.1.rst b/doc/source/whatsnew/v1.3.1.rst index 838b7e5fc1a8b..099d818236bd2 100644 --- a/doc/source/whatsnew/v1.3.1.rst +++ b/doc/source/whatsnew/v1.3.1.rst @@ -28,7 +28,7 @@ Fixed regressions - Fixed regression for ``SettingWithCopyWarning`` displaying incorrect stacklevel (:issue:`42570`) - Fixed regression for :func:`merge_asof` raising ``KeyError`` when one of the ``by`` columns is in the index (:issue:`34488`) - Fixed regression in :func:`to_datetime` returning pd.NaT for inputs that produce duplicated values, when ``cache=True`` (:issue:`42259`) - +- Fixed regression in :meth:`SeriesGroupBy.value_counts` that resulted in an ``IndexError`` when called on a Series with one row (:issue:`42618`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 88d1baae86467..a6be85bf2be2a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -758,7 +758,7 @@ def apply_series_value_counts(): # new values are where sorted labels change lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1)) inc = np.r_[True, lchanges] - if not len(lchanges): + if not len(val): inc = lchanges inc[idx] = True # group boundaries are also new values out = np.diff(np.nonzero(np.r_[inc, True])[0]) # value counts diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 8bb07b7163f2e..54f672cb69800 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -122,23 +122,27 @@ def test_series_groupby_value_counts_with_grouper(): tm.assert_series_equal(result, expected) -def test_series_groupby_value_counts_empty(): +@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]]) +def test_series_groupby_value_counts_empty(columns): # GH39172 - df = DataFrame(columns=["A", "B"]) - dfg = df.groupby("A") + df = DataFrame(columns=columns) + dfg = df.groupby(columns[:-1]) - result = dfg["B"].value_counts() - expected = Series([], name="B", dtype=result.dtype) - expected.index = MultiIndex.from_arrays([[]] * 2, names=["A", "B"]) + result = dfg[columns[-1]].value_counts() + expected = Series([], name=columns[-1], dtype=result.dtype) + expected.index = MultiIndex.from_arrays([[]] * len(columns), names=columns) tm.assert_series_equal(result, expected) - df = DataFrame(columns=["A", "B", "C"]) - dfg = df.groupby(["A", "B"]) - result = dfg["C"].value_counts() - expected = Series([], name="C", dtype=result.dtype) - expected.index = MultiIndex.from_arrays([[]] * 3, names=["A", "B", "C"]) +@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]]) +def test_series_groupby_value_counts_one_row(columns): + # GH42618 + df = DataFrame(data=[range(len(columns))], columns=columns) + dfg = df.groupby(columns[:-1]) + + result = dfg[columns[-1]].value_counts() + expected = df.value_counts().rename(columns[-1]) tm.assert_series_equal(result, expected)