Skip to content

Commit 8d87f3b

Browse files
jrebackTomAugspurger
authored andcommitted
move NaN deprecation warning to _validate_categories, cleanup a bit
1 parent e757e8a commit 8d87f3b

File tree

3 files changed

+59
-31
lines changed

3 files changed

+59
-31
lines changed

pandas/core/base.py

+1
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,7 @@ def argmin(self, axis=None):
392392
"""
393393
return nanops.nanargmin(self.values)
394394

395+
@cache_readonly
395396
def hasnans(self):
396397
""" return if I have any nans; enables various perf speedups """
397398
return com.isnull(self).any()

pandas/core/categorical.py

+46-23
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
207207
if fastpath:
208208
# fast path
209209
self._codes = _coerce_indexer_dtype(values, categories)
210-
self.categories = categories
210+
self._categories = self._validate_categories(categories, fastpath=isinstance(categories, ABCIndexClass))
211211
self._ordered = ordered
212212
return
213213

@@ -274,6 +274,8 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
274274
### FIXME ####
275275
raise NotImplementedError("> 1 ndim Categorical are not supported at this time")
276276

277+
categories = self._validate_categories(categories)
278+
277279
else:
278280
# there were two ways if categories are present
279281
# - the old one, where each value is a int pointer to the levels array -> not anymore
@@ -282,7 +284,6 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
282284

283285
# make sure that we always have the same type here, no matter what we get passed in
284286
categories = self._validate_categories(categories)
285-
286287
codes = _get_codes_for_values(values, categories)
287288

288289
# TODO: check for old style usage. These warnings should be removes after 0.18/ in 2016
@@ -295,7 +296,7 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
295296
"'Categorical.from_codes(codes, categories)'?", RuntimeWarning, stacklevel=2)
296297

297298
self.set_ordered(ordered or False, inplace=True)
298-
self.categories = categories
299+
self._categories = categories
299300
self._codes = _coerce_indexer_dtype(codes, categories)
300301

301302
def copy(self):
@@ -421,9 +422,15 @@ def _get_labels(self):
421422
_categories = None
422423

423424
@classmethod
424-
def _validate_categories(cls, categories):
425+
def _validate_categories(cls, categories, fastpath=False):
425426
"""
426427
Validates that we have good categories
428+
429+
Parameters
430+
----------
431+
fastpath : boolean (default: False)
432+
Don't perform validation of the categories for uniqueness or nulls
433+
427434
"""
428435
if not isinstance(categories, ABCIndexClass):
429436
dtype = None
@@ -439,22 +446,40 @@ def _validate_categories(cls, categories):
439446

440447
from pandas import Index
441448
categories = Index(categories, dtype=dtype)
442-
if not categories.is_unique:
443-
raise ValueError('Categorical categories must be unique')
449+
450+
if not fastpath:
451+
452+
# check properties of the categories
453+
# we don't allow NaNs in the categories themselves
454+
455+
if categories.hasnans:
456+
# NaNs in cats deprecated in 0.17, remove in 0.18 or 0.19 GH 10748
457+
msg = ('\nSetting NaNs in `categories` is deprecated and '
458+
'will be removed in a future version of pandas.')
459+
warn(msg, FutureWarning, stacklevel=5)
460+
461+
# categories must be unique
462+
463+
if not categories.is_unique:
464+
raise ValueError('Categorical categories must be unique')
465+
444466
return categories
445467

446-
def _set_categories(self, categories, validate=True):
447-
""" Sets new categories """
448-
if validate:
449-
categories = self._validate_categories(categories)
450-
if not self._categories is None and len(categories) != len(self._categories):
451-
raise ValueError("new categories need to have the same number of items than the old "
452-
"categories!")
453-
if np.any(isnull(categories)):
454-
# NaNs in cats deprecated in 0.17, remove in 0.18 or 0.19 GH 10748
455-
msg = ('\nSetting NaNs in `categories` is deprecated and '
456-
'will be removed in a future version of pandas.')
457-
warn(msg, FutureWarning, stacklevel=9)
468+
def _set_categories(self, categories, fastpath=False):
469+
""" Sets new categories
470+
471+
Parameters
472+
----------
473+
fastpath : boolean (default: False)
474+
Don't perform validation of the categories for uniqueness or nulls
475+
476+
"""
477+
478+
categories = self._validate_categories(categories, fastpath=fastpath)
479+
if not fastpath and not self._categories is None and len(categories) != len(self._categories):
480+
raise ValueError("new categories need to have the same number of items than the old "
481+
"categories!")
482+
458483
self._categories = categories
459484

460485
def _get_categories(self):
@@ -587,11 +612,10 @@ def set_categories(self, new_categories, ordered=None, rename=False, inplace=Fal
587612
if not cat._categories is None and len(new_categories) < len(cat._categories):
588613
# remove all _codes which are larger and set to -1/NaN
589614
self._codes[self._codes >= len(new_categories)] = -1
590-
cat._set_categories(new_categories, validate=False)
591615
else:
592616
values = cat.__array__()
593617
cat._codes = _get_codes_for_values(values, new_categories)
594-
cat._set_categories(new_categories, validate=False)
618+
cat._categories = new_categories
595619

596620
if ordered is None:
597621
ordered = self.ordered
@@ -712,9 +736,8 @@ def add_categories(self, new_categories, inplace=False):
712736
msg = "new categories must not include old categories: %s" % str(already_included)
713737
raise ValueError(msg)
714738
new_categories = list(self._categories) + list(new_categories)
715-
new_categories = self._validate_categories(new_categories)
716739
cat = self if inplace else self.copy()
717-
cat._set_categories(new_categories, validate=False)
740+
cat._categories = self._validate_categories(new_categories)
718741
cat._codes = _coerce_indexer_dtype(cat._codes, new_categories)
719742
if not inplace:
720743
return cat
@@ -797,7 +820,7 @@ def remove_unused_categories(self, inplace=False):
797820
from pandas.core.index import _ensure_index
798821
new_categories = _ensure_index(new_categories)
799822
cat._codes = _get_codes_for_values(cat.__array__(), new_categories)
800-
cat._set_categories(new_categories, validate=False)
823+
cat._categories = new_categories
801824
if not inplace:
802825
return cat
803826

pandas/tests/test_categorical.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ def f():
129129
Categorical(["a","b"], ["a","b","b"])
130130
self.assertRaises(ValueError, f)
131131
def f():
132-
Categorical([1,2], [1,2,np.nan, np.nan])
132+
with tm.assert_produces_warning(FutureWarning):
133+
Categorical([1,2], [1,2,np.nan, np.nan])
133134
self.assertRaises(ValueError, f)
134135

135136
# The default should be unordered
@@ -879,15 +880,18 @@ def test_nan_handling(self):
879880
base = Categorical([], with_null)
880881
expected = Categorical([], without)
881882

882-
with tm.assert_produces_warning(FutureWarning):
883-
for nullval in null_values:
884-
result = base.remove_categories(nullval)
885-
self.assert_categorical_equal(result, expected)
883+
for nullval in null_values:
884+
result = base.remove_categories(nullval)
885+
self.assert_categorical_equal(result, expected)
886886

887887
# Different null values are indistinguishable
888888
for i, j in [(0, 1), (0, 2), (1, 2)]:
889889
nulls = [null_values[i], null_values[j]]
890-
self.assertRaises(ValueError, lambda: Categorical([], categories=nulls))
890+
891+
def f():
892+
with tm.assert_produces_warning(FutureWarning):
893+
Categorical([], categories=nulls)
894+
self.assertRaises(ValueError, f)
891895

892896

893897
def test_isnull(self):
@@ -3488,8 +3492,8 @@ def f():
34883492
c[0] = np.nan
34893493
df = pd.DataFrame({"cats":c, "vals":[1,2,3]})
34903494
df_exp = pd.DataFrame({"cats": Categorical(["a","b","a"]), "vals": [1,2,3]})
3491-
with tm.assert_produces_warning(FutureWarning):
3492-
res = df.fillna("a")
3495+
3496+
res = df.fillna("a")
34933497
tm.assert_frame_equal(res, df_exp)
34943498

34953499

0 commit comments

Comments
 (0)