Skip to content

Commit aebe2a9

Browse files
toobazjreback
authored andcommitted
Remove old warnings (plus some useless code) (#18022)
1 parent 96a5274 commit aebe2a9

File tree

3 files changed

+23
-54
lines changed

3 files changed

+23
-54
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ Deprecations
6060
Removal of prior version deprecations/changes
6161
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6262

63-
-
63+
- Warnings against the obsolete usage ``Categorical(codes, categories)``, which were emitted for instance when the first two arguments to ``Categorical()`` had different dtypes, and recommended the use of ``Categorical.from_codes``, have now been removed (:issue:`8074`)
6464
-
6565
-
6666

pandas/core/categorical.py

+18-47
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
is_timedelta64_dtype,
2626
is_categorical,
2727
is_categorical_dtype,
28-
is_integer_dtype,
2928
is_list_like, is_sequence,
3029
is_scalar,
3130
is_dict_like)
@@ -261,6 +260,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
261260
# c.) infer from values
262261

263262
if dtype is not None:
263+
# The dtype argument takes precedence over values.dtype (if any)
264264
if isinstance(dtype, compat.string_types):
265265
if dtype == 'category':
266266
dtype = CategoricalDtype(categories, ordered)
@@ -275,9 +275,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
275275
ordered = dtype.ordered
276276

277277
elif is_categorical(values):
278+
# If no "dtype" was passed, use the one from "values", but honor
279+
# the "ordered" and "categories" arguments
278280
dtype = values.dtype._from_categorical_dtype(values.dtype,
279281
categories, ordered)
280282
else:
283+
# If dtype=None and values is not categorical, create a new dtype
281284
dtype = CategoricalDtype(categories, ordered)
282285

283286
# At this point, dtype is always a CategoricalDtype
@@ -294,28 +297,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
294297

295298
# sanitize input
296299
if is_categorical_dtype(values):
300+
if dtype.categories is None:
301+
dtype = CategoricalDtype(values.categories, dtype.ordered)
297302

298-
# we are either a Series or a CategoricalIndex
299-
if isinstance(values, (ABCSeries, ABCCategoricalIndex)):
300-
values = values._values
301-
302-
if ordered is None:
303-
ordered = values.ordered
304-
if categories is None:
305-
categories = values.categories
306-
values = values.get_values()
307-
308-
elif isinstance(values, (ABCIndexClass, ABCSeries)):
309-
# we'll do inference later
310-
pass
311-
312-
else:
313-
314-
# on numpy < 1.6 datetimelike get inferred to all i8 by
315-
# _sanitize_array which is fine, but since factorize does this
316-
# correctly no need here this is an issue because _sanitize_array
317-
# also coerces np.nan to a string under certain versions of numpy
318-
# as well
303+
elif not isinstance(values, (ABCIndexClass, ABCSeries)):
304+
# _sanitize_array coerces np.nan to a string under certain versions
305+
# of numpy
319306
values = maybe_infer_to_datetimelike(values, convert_dates=True)
320307
if not isinstance(values, np.ndarray):
321308
values = _convert_to_list_like(values)
@@ -335,7 +322,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
335322
codes, categories = factorize(values, sort=True)
336323
except TypeError:
337324
codes, categories = factorize(values, sort=False)
338-
if ordered:
325+
if dtype.ordered:
339326
# raise, as we don't have a sortable data structure and so
340327
# the user should give us one by specifying categories
341328
raise TypeError("'values' is not ordered, please "
@@ -347,34 +334,18 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
347334
raise NotImplementedError("> 1 ndim Categorical are not "
348335
"supported at this time")
349336

350-
if dtype.categories is None:
351-
# we're inferring from values
352-
dtype = CategoricalDtype(categories, ordered)
337+
# we're inferring from values
338+
dtype = CategoricalDtype(categories, dtype.ordered)
353339

354-
else:
355-
# there were two ways if categories are present
356-
# - the old one, where each value is a int pointer to the levels
357-
# array -> not anymore possible, but code outside of pandas could
358-
# call us like that, so make some checks
359-
# - the new one, where each value is also in the categories array
360-
# (or np.nan)
340+
elif is_categorical_dtype(values):
341+
old_codes = (values.cat.codes if isinstance(values, ABCSeries)
342+
else values.codes)
343+
codes = _recode_for_categories(old_codes, values.dtype.categories,
344+
dtype.categories)
361345

346+
else:
362347
codes = _get_codes_for_values(values, dtype.categories)
363348

364-
# TODO: check for old style usage. These warnings should be removes
365-
# after 0.18/ in 2016
366-
if (is_integer_dtype(values) and
367-
not is_integer_dtype(dtype.categories)):
368-
warn("Values and categories have different dtypes. Did you "
369-
"mean to use\n'Categorical.from_codes(codes, "
370-
"categories)'?", RuntimeWarning, stacklevel=2)
371-
372-
if (len(values) and is_integer_dtype(values) and
373-
(codes == -1).all()):
374-
warn("None of the categories were found in values. Did you "
375-
"mean to use\n'Categorical.from_codes(codes, "
376-
"categories)'?", RuntimeWarning, stacklevel=2)
377-
378349
if null_mask.any():
379350
# Reinsert -1 placeholders for previously removed missing values
380351
full_codes = - np.ones(null_mask.shape, dtype=codes.dtype)

pandas/tests/test_categorical.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -306,20 +306,18 @@ def f():
306306
assert len(cat.codes) == 1
307307
assert cat.codes[0] == 0
308308

309-
# Catch old style constructor useage: two arrays, codes + categories
310-
# We can only catch two cases:
309+
# two arrays
311310
# - when the first is an integer dtype and the second is not
312311
# - when the resulting codes are all -1/NaN
313-
with tm.assert_produces_warning(RuntimeWarning):
312+
with tm.assert_produces_warning(None):
314313
c_old = Categorical([0, 1, 2, 0, 1, 2],
315314
categories=["a", "b", "c"]) # noqa
316315

317-
with tm.assert_produces_warning(RuntimeWarning):
316+
with tm.assert_produces_warning(None):
318317
c_old = Categorical([0, 1, 2, 0, 1, 2], # noqa
319318
categories=[3, 4, 5])
320319

321-
# the next one are from the old docs, but unfortunately these don't
322-
# trigger :-(
320+
# the next one are from the old docs
323321
with tm.assert_produces_warning(None):
324322
c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa
325323
cat = Categorical([1, 2], categories=[1, 2, 3])

0 commit comments

Comments
 (0)