Skip to content

Commit fc8ea62

Browse files
author
Lucas
committed
Restore original single grouper test + add notable fix sub-section
1 parent 6b388ec commit fc8ea62

File tree

2 files changed

+100
-0
lines changed

2 files changed

+100
-0
lines changed

doc/source/whatsnew/v1.5.0.rst

+40
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,46 @@ upon serialization. (Related issue :issue:`12997`)
263263
# Roundtripping now works
264264
pd.read_json(a.to_json(date_format='iso'), typ="series").index == a.index
265265
266+
267+
DataFrameGroupBy.value_counts with non-grouping categorical columns and ``observed=True``
268+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
269+
270+
Calling :meth:`.DataFrameGroupBy.value_counts` with ``observed=True`` would incorrectly drop non-observed categories of non-grouping columns (:issue:`46357`).
271+
272+
.. code-block:: ipython
273+
274+
In [6]: df = pd.DataFrame(["a", "b", "c"], dtype="category").iloc[0:2]
275+
In [7]: df
276+
Out[7]:
277+
0
278+
0 a
279+
1 b
280+
281+
*Old Behavior*
282+
283+
.. code-block:: ipython
284+
285+
In [8]: df.groupby(level=0, observed=True).value_counts()
286+
Out[8]:
287+
0 a 1
288+
1 b 1
289+
dtype: int64
290+
291+
292+
*New Behavior*
293+
294+
.. code-block:: ipython
295+
296+
In [9]: df.groupby(level=0, observed=True).value_counts()
297+
Out[9]:
298+
0 a 1
299+
1 a 0
300+
b 1
301+
0 b 0
302+
c 0
303+
1 c 0
304+
dtype: int64
305+
266306
.. ---------------------------------------------------------------------------
267307
.. _whatsnew_150.api_breaking:
268308

pandas/tests/groupby/test_frame_value_counts.py

+60
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,66 @@ def test_data_frame_value_counts_dropna(
308308
tm.assert_series_equal(result_frame_groupby, expected)
309309

310310

311+
@pytest.mark.parametrize("as_index", [False, True])
312+
@pytest.mark.parametrize("observed", [False, True])
313+
@pytest.mark.parametrize(
314+
"normalize, expected_data",
315+
[
316+
(False, np.array([2, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0], dtype=np.int64)),
317+
(
318+
True,
319+
np.array([0.5, 0.25, 0.25, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]),
320+
),
321+
],
322+
)
323+
def test_categorical_single_grouper_with_only_observed_categories(
324+
education_df, as_index, observed, normalize, expected_data
325+
):
326+
327+
# Test single categorical grouper with only observed grouping categories
328+
# when non-groupers are also categorical
329+
330+
gp = education_df.astype("category").groupby(
331+
"country", as_index=as_index, observed=observed
332+
)
333+
result = gp.value_counts(normalize=normalize)
334+
335+
expected_index = MultiIndex.from_tuples(
336+
[
337+
("FR", "male", "low"),
338+
("FR", "female", "high"),
339+
("FR", "male", "medium"),
340+
("FR", "female", "low"),
341+
("FR", "female", "medium"),
342+
("FR", "male", "high"),
343+
("US", "female", "high"),
344+
("US", "male", "low"),
345+
("US", "female", "low"),
346+
("US", "female", "medium"),
347+
("US", "male", "high"),
348+
("US", "male", "medium"),
349+
],
350+
names=["country", "gender", "education"],
351+
)
352+
353+
expected_series = Series(
354+
data=expected_data,
355+
index=expected_index,
356+
)
357+
for i in range(3):
358+
expected_series.index = expected_series.index.set_levels(
359+
CategoricalIndex(expected_series.index.levels[i]), level=i
360+
)
361+
362+
if as_index:
363+
tm.assert_series_equal(result, expected_series)
364+
else:
365+
expected = expected_series.reset_index(
366+
name="proportion" if normalize else "count"
367+
)
368+
tm.assert_frame_equal(result, expected)
369+
370+
311371
def assert_categorical_single_grouper(
312372
education_df, as_index, observed, expected_index, normalize, expected_data
313373
):

0 commit comments

Comments
 (0)