From 4095bceda3a5c8c34a22e24d2b2eb8b010c61d90 Mon Sep 17 00:00:00 2001 From: RajatS Mukherjee Date: Tue, 29 Aug 2023 06:50:44 +0000 Subject: [PATCH 1/3] added test for missing category --- pandas/tests/frame/methods/test_value_counts.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py index c05a929360478..f6cb1cec47eb5 100644 --- a/pandas/tests/frame/methods/test_value_counts.py +++ b/pandas/tests/frame/methods/test_value_counts.py @@ -189,3 +189,17 @@ def test_value_counts_categorical_future_warning(): name="count", ) tm.assert_series_equal(result, expected) + + +def test_value_counts_with_missing_category(): + # GH-54836 + raw_cat = pd.Categorical([1, 2, 4], categories=[1, 2, 3, 4]) + df = pd.DataFrame({"a": [1, 2, 3]}, dtype="category") + df["b"] = raw_cat + result = df.value_counts() + expected = pd.Series( + 1, + index=pd.MultiIndex.from_frame(df), + name="count", + ) + tm.assert_series_equal(result, expected) From 5cd21fcbd0cc06c70114325cd4989425b4b7f98c Mon Sep 17 00:00:00 2001 From: RajatS Mukherjee Date: Tue, 29 Aug 2023 11:22:59 +0000 Subject: [PATCH 2/3] updated test --- pandas/tests/frame/methods/test_value_counts.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py index f6cb1cec47eb5..ed160278c6869 100644 --- a/pandas/tests/frame/methods/test_value_counts.py +++ b/pandas/tests/frame/methods/test_value_counts.py @@ -193,13 +193,15 @@ def test_value_counts_categorical_future_warning(): def test_value_counts_with_missing_category(): # GH-54836 - raw_cat = pd.Categorical([1, 2, 4], categories=[1, 2, 3, 4]) - df = pd.DataFrame({"a": [1, 2, 3]}, dtype="category") - df["b"] = raw_cat + df = pd.DataFrame({"a": pd.Categorical([1, 2, 4], categories=[1, 2, 3, 4])}) result = df.value_counts() expected = pd.Series( 1, - index=pd.MultiIndex.from_frame(df), + index=pd.MultiIndex.from_arrays( + [pd.CategoricalIndex([1, 2, 4], categories=[1, 2, 3, 4], name="a")] + ), name="count", ) + # result should include the missing category + expected[3] = 0 tm.assert_series_equal(result, expected) From 209e807d78fb77566710adc3e1bd30b25105b533 Mon Sep 17 00:00:00 2001 From: RajatS Mukherjee Date: Wed, 30 Aug 2023 20:19:27 +0000 Subject: [PATCH 3/3] simplified result value --- pandas/tests/frame/methods/test_value_counts.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py index ed160278c6869..f30db91f82b60 100644 --- a/pandas/tests/frame/methods/test_value_counts.py +++ b/pandas/tests/frame/methods/test_value_counts.py @@ -196,12 +196,10 @@ def test_value_counts_with_missing_category(): df = pd.DataFrame({"a": pd.Categorical([1, 2, 4], categories=[1, 2, 3, 4])}) result = df.value_counts() expected = pd.Series( - 1, + [1, 1, 1, 0], index=pd.MultiIndex.from_arrays( - [pd.CategoricalIndex([1, 2, 4], categories=[1, 2, 3, 4], name="a")] + [pd.CategoricalIndex([1, 2, 4, 3], categories=[1, 2, 3, 4], name="a")] ), name="count", ) - # result should include the missing category - expected[3] = 0 tm.assert_series_equal(result, expected)