Skip to content

Commit e5b2692

Browse files
neelmramanCGe0516
authored andcommitted
BUG: Fix bug in SeriesGroupBy.value_counts when DataFrame has one row (pandas-dev#42618) (pandas-dev#42640)
1 parent 733fc8b commit e5b2692

File tree

3 files changed

+17
-13
lines changed

3 files changed

+17
-13
lines changed

doc/source/whatsnew/v1.3.1.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ Fixed regressions
2828
- Fixed regression for ``SettingWithCopyWarning`` displaying incorrect stacklevel (:issue:`42570`)
2929
- Fixed regression for :func:`merge_asof` raising ``KeyError`` when one of the ``by`` columns is in the index (:issue:`34488`)
3030
- Fixed regression in :func:`to_datetime` returning pd.NaT for inputs that produce duplicated values, when ``cache=True`` (:issue:`42259`)
31-
31+
- Fixed regression in :meth:`SeriesGroupBy.value_counts` that resulted in an ``IndexError`` when called on a Series with one row (:issue:`42618`)
3232

3333
.. ---------------------------------------------------------------------------
3434

pandas/core/groupby/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,7 @@ def apply_series_value_counts():
758758
# new values are where sorted labels change
759759
lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1))
760760
inc = np.r_[True, lchanges]
761-
if not len(lchanges):
761+
if not len(val):
762762
inc = lchanges
763763
inc[idx] = True # group boundaries are also new values
764764
out = np.diff(np.nonzero(np.r_[inc, True])[0]) # value counts

pandas/tests/groupby/test_value_counts.py

+15-11
Original file line numberDiff line numberDiff line change
@@ -122,23 +122,27 @@ def test_series_groupby_value_counts_with_grouper():
122122
tm.assert_series_equal(result, expected)
123123

124124

125-
def test_series_groupby_value_counts_empty():
125+
@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]])
126+
def test_series_groupby_value_counts_empty(columns):
126127
# GH39172
127-
df = DataFrame(columns=["A", "B"])
128-
dfg = df.groupby("A")
128+
df = DataFrame(columns=columns)
129+
dfg = df.groupby(columns[:-1])
129130

130-
result = dfg["B"].value_counts()
131-
expected = Series([], name="B", dtype=result.dtype)
132-
expected.index = MultiIndex.from_arrays([[]] * 2, names=["A", "B"])
131+
result = dfg[columns[-1]].value_counts()
132+
expected = Series([], name=columns[-1], dtype=result.dtype)
133+
expected.index = MultiIndex.from_arrays([[]] * len(columns), names=columns)
133134

134135
tm.assert_series_equal(result, expected)
135136

136-
df = DataFrame(columns=["A", "B", "C"])
137-
dfg = df.groupby(["A", "B"])
138137

139-
result = dfg["C"].value_counts()
140-
expected = Series([], name="C", dtype=result.dtype)
141-
expected.index = MultiIndex.from_arrays([[]] * 3, names=["A", "B", "C"])
138+
@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]])
139+
def test_series_groupby_value_counts_one_row(columns):
140+
# GH42618
141+
df = DataFrame(data=[range(len(columns))], columns=columns)
142+
dfg = df.groupby(columns[:-1])
143+
144+
result = dfg[columns[-1]].value_counts()
145+
expected = df.value_counts().rename(columns[-1])
142146

143147
tm.assert_series_equal(result, expected)
144148

0 commit comments

Comments
 (0)