BUG: Fix ValueError when grouping by read-only category with sort=False (pandas-dev#33410)

erik-hasse · erik-hasse · commit 4d3a29e8cdda · 2020-04-09T21:51:02.000-04:00
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -517,6 +517,7 @@ Groupby/resample/rolling
 - Bug in :meth:`SeriesGroupBy.first`, :meth:`SeriesGroupBy.last`, :meth:`SeriesGroupBy.min`, and :meth:`SeriesGroupBy.max` returning floats when applied to nullable Booleans (:issue:`33071`)
 - Bug in :meth:`DataFrameGroupBy.agg` with dictionary input losing ``ExtensionArray`` dtypes (:issue:`32194`)
 - Bug in :meth:`DataFrame.resample` where an ``AmbiguousTimeError`` would be raised when the resulting timezone aware :class:`DatetimeIndex` had a DST transition at midnight (:issue:`25758`)
+- Bug in :meth:`DataFrame.groupby` where a ``ValueError`` would be raised when grouping by a categorical column with read-only categories and ``sort=False`` (:issue:`33410`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -206,7 +206,7 @@ def duplicated_{{dtype}}({{c_type}}[:] values, object keep='first'):
 {{if dtype == 'object'}}
 def ismember_{{dtype}}(ndarray[{{c_type}}] arr, ndarray[{{c_type}}] values):
 {{else}}
-def ismember_{{dtype}}({{c_type}}[:] arr, {{c_type}}[:] values):
+def ismember_{{dtype}}(const {{c_type}}[:] arr, {{c_type}}[:] values):
 {{endif}}
     """
     Return boolean of values in arr on an
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -1380,3 +1380,21 @@ def test_groupby_agg_non_numeric():
 
     result = df.groupby([1, 2, 1]).nunique()
     tm.assert_frame_equal(result, expected)
+
+
+def test_read_only_category_no_sort():
+    # GH33410
+    cats = np.array([1, 2])
+    cats.flags.writeable = False
+    df = DataFrame(
+        {
+            "a": [1, 3, 5, 7],
+            "b": Categorical([1, 1, 2, 2], categories=Index(cats))
+        }
+    )
+    expected = DataFrame(
+        data={"a": [2, 6]},
+        index=CategoricalIndex([1, 2], name="b")
+    )
+    result = df.groupby("b", sort=False).mean()
+    tm.assert_frame_equal(result, expected)