From d096fd562fc4c9d49a373f0e24e6ca422c20e766 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 31 Jan 2020 19:38:46 -0800 Subject: [PATCH 1/5] REGR: Categorical with np.str_ categories --- pandas/_libs/hashtable_class_helper.pxi.in | 8 ++++++-- pandas/tests/arrays/categorical/test_constructors.py | 5 +++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 7d57c67e70b58..6671375f628e7 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -670,7 +670,9 @@ cdef class StringHashTable(HashTable): val = values[i] if isinstance(val, str): - v = get_c_string(val) + # GH#31499 if we have a np.str_ get_c_string wont recognize + # it as a str, even though isinstance does. + v = get_c_string(val) else: v = get_c_string(self.na_string_sentinel) vecs[i] = v @@ -703,7 +705,9 @@ cdef class StringHashTable(HashTable): val = values[i] if isinstance(val, str): - v = get_c_string(val) + # GH#31499 if we have a np.str_ get_c_string wont recognize + # it as a str, even though isinstance does. + v = get_c_string(val) else: v = get_c_string(self.na_string_sentinel) vecs[i] = v diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index cfba3da354d44..70e1421c8dcf4 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -408,6 +408,11 @@ def test_constructor_str_unknown(self): with pytest.raises(ValueError, match="Unknown dtype"): Categorical([1, 2], dtype="foo") + def test_constructor_np_strs(self): + # GH#31499 Hastable.map_locations needs to work on np.str_ objects + cat = pd.Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")]) + assert all(isinstance(x, np.str_) for x in cat.categories) + def test_constructor_from_categorical_with_dtype(self): dtype = CategoricalDtype(["a", "b", "c"], ordered=True) values = Categorical(["a", "b", "d"]) From d106d29319556380e0aaa55dd77eca851c1ee3a0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 31 Jan 2020 19:40:21 -0800 Subject: [PATCH 2/5] whatsnew --- doc/source/whatsnew/v1.0.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index ff8433c7cafd9..745a0a66e4b65 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -19,7 +19,7 @@ Bug fixes Categorical ^^^^^^^^^^^ - +- Bug in :class:`Categorical` construction with ``numpy.str_`` categories (:issue:`31499`) - - From b8b8e7c12e8c1a6e8e5e8cf5aa82b17983ce3c09 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 1 Feb 2020 08:11:39 -0800 Subject: [PATCH 3/5] Update doc/source/whatsnew/v1.0.1.rst Co-Authored-By: Tom Augspurger --- doc/source/whatsnew/v1.0.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index 745a0a66e4b65..ae64d24a7ed3c 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -19,7 +19,7 @@ Bug fixes Categorical ^^^^^^^^^^^ -- Bug in :class:`Categorical` construction with ``numpy.str_`` categories (:issue:`31499`) +- Fixed regression in :class:`Categorical` construction with ``numpy.str_`` categories (:issue:`31499`) - - From 70553d823297ca68e9c68ec37e1bb049afd8b2f1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 3 Feb 2020 08:42:04 -0800 Subject: [PATCH 4/5] rebase fixup --- doc/source/whatsnew/v1.0.1.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index 06b6efd1155c7..28b3aef4c5ea5 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -34,7 +34,6 @@ Deprecations ~~~~~~~~~~~~ - Support for negative integer for :attr:`pd.options.display.max_colwidth` is deprecated in favor of using ``None`` (:issue:`31532`) ->>>>>>> a2721fd602e43128314d4efd056dae56a89197bf .. --------------------------------------------------------------------------- From e78ad5feb02e007bddb9959ca6cabe9df33e0f7a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 4 Feb 2020 08:54:02 +0100 Subject: [PATCH 5/5] move whatsnew --- doc/source/whatsnew/v1.0.1.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index a963875e0ca3c..4259393f682e5 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -21,6 +21,7 @@ Fixed regressions - Fixed regression in :meth:`GroupBy.apply` if called with a function which returned a non-pandas non-scalar object (e.g. a list or numpy array) (:issue:`31441`) - Fixed regression in :meth:`to_datetime` when parsing non-nanosecond resolution datetimes (:issue:`31491`) - Fixed regression in :meth:`~DataFrame.to_csv` where specifying an ``na_rep`` might truncate the values written (:issue:`31447`) +- Fixed regression in :class:`Categorical` construction with ``numpy.str_`` categories (:issue:`31499`) - Fixed regression where setting :attr:`pd.options.display.max_colwidth` was not accepting negative integer. In addition, this behavior has been deprecated in favor of using ``None`` (:issue:`31532`) - Fixed regression in objTOJSON.c fix return-type warning (:issue:`31463`) - Fixed regression in :meth:`qcut` when passed a nullable integer. (:issue:`31389`) @@ -51,10 +52,6 @@ Bug fixes - Bug in dtypes being lost in ``DataFrame.__invert__`` (``~`` operator) with mixed dtypes (:issue:`31183`) and for extension-array backed ``Series`` and ``DataFrame`` (:issue:`23087`) -**Categorical** -- Fixed regression in :class:`Categorical` construction with ``numpy.str_`` categories (:issue:`31499`) - - **Plotting** - Plotting tz-aware timeseries no longer gives UserWarning (:issue:`31205`)