From 42aa2313d7d65894a89385ea93920d6f1f2f43ee Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 22 Sep 2020 11:57:41 -0700 Subject: [PATCH] REF: de-duplicate Categorical validators --- pandas/core/arrays/categorical.py | 10 ++-------- pandas/tests/indexes/categorical/test_category.py | 13 ++++++++----- pandas/tests/indexing/test_categorical.py | 5 +++-- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index ef69d6565cfeb..e984f2c26b916 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1177,13 +1177,7 @@ def _validate_where_value(self, value): return self._validate_listlike(value) def _validate_insert_value(self, value) -> int: - code = self.categories.get_indexer([value]) - if (code == -1) and not (is_scalar(value) and isna(value)): - raise TypeError( - "cannot insert an item into a CategoricalIndex " - "that is not already an existing category" - ) - return code[0] + return self._validate_fill_value(value) def _validate_searchsorted_value(self, value): # searchsorted is very performance sensitive. By converting codes @@ -1213,7 +1207,7 @@ def _validate_fill_value(self, fill_value): ValueError """ - if isna(fill_value): + if is_valid_nat_for_dtype(fill_value, self.categories.dtype): fill_value = -1 elif fill_value in self.categories: fill_value = self._unbox_scalar(fill_value) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index a3a06338a0277..81b31e3ea180c 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -171,11 +171,8 @@ def test_insert(self): tm.assert_index_equal(result, expected, exact=True) # invalid - msg = ( - "cannot insert an item into a CategoricalIndex that is not " - "already an existing category" - ) - with pytest.raises(TypeError, match=msg): + msg = "'fill_value=d' is not present in this Categorical's categories" + with pytest.raises(ValueError, match=msg): ci.insert(0, "d") # GH 18295 (test missing) @@ -184,6 +181,12 @@ def test_insert(self): result = CategoricalIndex(list("aabcb")).insert(1, na) tm.assert_index_equal(result, expected) + def test_insert_na_mismatched_dtype(self): + ci = pd.CategoricalIndex([0, 1, 1]) + msg = "'fill_value=NaT' is not present in this Categorical's categories" + with pytest.raises(ValueError, match=msg): + ci.insert(0, pd.NaT) + def test_delete(self): ci = self.create_index() diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 98edb56260b01..9f3ee81fac2eb 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -76,9 +76,10 @@ def test_loc_scalar(self): "cannot insert an item into a CategoricalIndex that is not " "already an existing category" ) - with pytest.raises(TypeError, match=msg): + msg = "'fill_value=d' is not present in this Categorical's categories" + with pytest.raises(ValueError, match=msg): df.loc["d", "A"] = 10 - with pytest.raises(TypeError, match=msg): + with pytest.raises(ValueError, match=msg): df.loc["d", "C"] = 10 with pytest.raises(KeyError, match="^1$"):