diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index 180411afb117d..4259393f682e5 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -21,6 +21,7 @@ Fixed regressions - Fixed regression in :meth:`GroupBy.apply` if called with a function which returned a non-pandas non-scalar object (e.g. a list or numpy array) (:issue:`31441`) - Fixed regression in :meth:`to_datetime` when parsing non-nanosecond resolution datetimes (:issue:`31491`) - Fixed regression in :meth:`~DataFrame.to_csv` where specifying an ``na_rep`` might truncate the values written (:issue:`31447`) +- Fixed regression in :class:`Categorical` construction with ``numpy.str_`` categories (:issue:`31499`) - Fixed regression where setting :attr:`pd.options.display.max_colwidth` was not accepting negative integer. In addition, this behavior has been deprecated in favor of using ``None`` (:issue:`31532`) - Fixed regression in objTOJSON.c fix return-type warning (:issue:`31463`) - Fixed regression in :meth:`qcut` when passed a nullable integer. (:issue:`31389`) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 7d57c67e70b58..6671375f628e7 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -670,7 +670,9 @@ cdef class StringHashTable(HashTable): val = values[i] if isinstance(val, str): - v = get_c_string(val) + # GH#31499 if we have a np.str_ get_c_string wont recognize + # it as a str, even though isinstance does. + v = get_c_string(val) else: v = get_c_string(self.na_string_sentinel) vecs[i] = v @@ -703,7 +705,9 @@ cdef class StringHashTable(HashTable): val = values[i] if isinstance(val, str): - v = get_c_string(val) + # GH#31499 if we have a np.str_ get_c_string wont recognize + # it as a str, even though isinstance does. + v = get_c_string(val) else: v = get_c_string(self.na_string_sentinel) vecs[i] = v diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index cfba3da354d44..70e1421c8dcf4 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -408,6 +408,11 @@ def test_constructor_str_unknown(self): with pytest.raises(ValueError, match="Unknown dtype"): Categorical([1, 2], dtype="foo") + def test_constructor_np_strs(self): + # GH#31499 Hastable.map_locations needs to work on np.str_ objects + cat = pd.Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")]) + assert all(isinstance(x, np.str_) for x in cat.categories) + def test_constructor_from_categorical_with_dtype(self): dtype = CategoricalDtype(["a", "b", "c"], ordered=True) values = Categorical(["a", "b", "d"])