Skip to content

Commit 39e17f2

Browse files
committed
API: deprecate setting of .ordered directly (GH9347, GH9190)
add set_ordered method for setting ordered default for Categorical is now to NOT order unless explicity specified
1 parent d876a9f commit 39e17f2

File tree

8 files changed

+160
-70
lines changed

8 files changed

+160
-70
lines changed

pandas/core/categorical.py

Lines changed: 57 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from pandas.core.common import (CategoricalDtype, ABCSeries, isnull, notnull,
1818
is_categorical_dtype, is_integer_dtype, is_object_dtype,
1919
_possibly_infer_to_datetimelike, get_dtype_kinds,
20-
is_list_like, is_sequence, is_null_slice,
20+
is_list_like, is_sequence, is_null_slice, is_bool,
2121
_ensure_platform_int, _ensure_object, _ensure_int64,
2222
_coerce_indexer_dtype, _values_from_object, take_1d)
2323
from pandas.util.terminal import get_terminal_size
@@ -141,7 +141,7 @@ class Categorical(PandasObject):
141141
to be the unique values of values.
142142
ordered : boolean, optional
143143
Whether or not this categorical is treated as a ordered categorical. If not given,
144-
the resulting categorical will be ordered if values can be sorted.
144+
the resulting categorical will not be ordered.
145145
name : str, optional
146146
Name for the Categorical variable. If name is None, will attempt
147147
to infer from values.
@@ -184,7 +184,6 @@ class Categorical(PandasObject):
184184
dtype = CategoricalDtype()
185185
"""The dtype (always "category")"""
186186

187-
ordered = None
188187
"""Whether or not this Categorical is ordered.
189188
190189
Only ordered `Categoricals` can be sorted (according to the order
@@ -201,18 +200,17 @@ class Categorical(PandasObject):
201200
# For comparisons, so that numpy uses our implementation if the compare ops, which raise
202201
__array_priority__ = 1000
203202
_typ = 'categorical'
204-
ordered = False
205203
name = None
206204

207-
def __init__(self, values, categories=None, ordered=None, name=None, fastpath=False,
205+
def __init__(self, values, categories=None, ordered=False, name=None, fastpath=False,
208206
levels=None):
209207

210208
if fastpath:
211209
# fast path
212210
self._codes = _coerce_indexer_dtype(values, categories)
213211
self.name = name
214212
self.categories = categories
215-
self.ordered = ordered
213+
self._ordered = ordered
216214
return
217215

218216
if name is None:
@@ -237,8 +235,6 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
237235
cat = values.values
238236
if categories is None:
239237
categories = cat.categories
240-
if ordered is None:
241-
ordered = cat.ordered
242238
values = values.__array__()
243239

244240
elif isinstance(values, Index):
@@ -263,18 +259,12 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
263259

264260
if categories is None:
265261
try:
266-
codes, categories = factorize(values, sort=True)
267-
# If the underlying data structure was sortable, and the user doesn't want to
268-
# "forget" this order, the categorical also is sorted/ordered
269-
if ordered is None:
270-
ordered = True
262+
codes, categories = factorize(values, sort=ordered)
271263
except TypeError:
272-
codes, categories = factorize(values, sort=False)
273-
if ordered:
274-
# raise, as we don't have a sortable data structure and so the user should
275-
# give us one by specifying categories
276-
raise TypeError("'values' is not ordered, please explicitly specify the "
277-
"categories order by passing in a categories argument.")
264+
# raise, as we don't have a sortable data structure and so the user should
265+
# give us one by specifying categories
266+
raise TypeError("'values' is not factorizable, please pass "
267+
"categories order by passing in a categories argument.")
278268
except ValueError:
279269

280270
### FIXME ####
@@ -300,12 +290,7 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
300290
warn("None of the categories were found in values. Did you mean to use\n"
301291
"'Categorical.from_codes(codes, categories)'?", RuntimeWarning)
302292

303-
# if we got categories, we can assume that the order is intended
304-
# if ordered is unspecified
305-
if ordered is None:
306-
ordered = True
307-
308-
self.ordered = False if ordered is None else ordered
293+
self._ordered = ordered
309294
self.categories = categories
310295
self.name = name
311296
self._codes = _coerce_indexer_dtype(codes, categories)
@@ -460,6 +445,37 @@ def _get_levels(self):
460445
# TODO: Remove after deprecation period in 2017/ after 0.18
461446
levels = property(fget=_get_levels, fset=_set_levels)
462447

448+
_ordered = None
449+
450+
def _set_ordered(self, value):
451+
""" Sets the ordered attribute to the boolean value """
452+
warn("Setting 'ordered' directly is deprecated, use 'set_ordered'", FutureWarning)
453+
self.set_ordered(value, inplace=True)
454+
455+
def set_ordered(self, value, inplace=False):
456+
"""
457+
Sets the ordered attribute to the boolean value
458+
459+
Parameters
460+
----------
461+
value : boolean to set whether this categorical is ordered (True) or not (False)
462+
inplace : boolean (default: False)
463+
Whether or not to set the ordered attribute inplace or return a copy of this categorical
464+
with ordered set to the value
465+
"""
466+
if not is_bool(value):
467+
raise TypeError("ordered must be a boolean value")
468+
cat = self if inplace else self.copy()
469+
cat._ordered = value
470+
if not inplace:
471+
return cat
472+
473+
def _get_ordered(self):
474+
""" Gets the ordered attribute """
475+
return self._ordered
476+
477+
ordered = property(fget=_get_ordered, fset=_set_ordered)
478+
463479
def set_categories(self, new_categories, ordered=None, rename=False, inplace=False):
464480
""" Sets the categories to the specified new_categories.
465481
@@ -486,7 +502,7 @@ def set_categories(self, new_categories, ordered=None, rename=False, inplace=Fal
486502
----------
487503
new_categories : Index-like
488504
The categories in new order.
489-
ordered : boolean, optional
505+
ordered : boolean, (default: False)
490506
Whether or not the categorical is treated as a ordered categorical. If not given,
491507
do not change the ordered information.
492508
rename : boolean (default: False)
@@ -520,8 +536,9 @@ def set_categories(self, new_categories, ordered=None, rename=False, inplace=Fal
520536
cat._codes = _get_codes_for_values(values, new_categories)
521537
cat._categories = new_categories
522538

523-
if not ordered is None:
524-
cat.ordered = ordered
539+
if ordered is None:
540+
ordered = self.ordered
541+
cat.set_ordered(ordered, inplace=True)
525542

526543
if not inplace:
527544
return cat
@@ -765,6 +782,15 @@ def __setstate__(self, state):
765782
state['_categories'] = \
766783
self._validate_categories(state.pop('_levels'))
767784

785+
# 0.16.0 ordered change
786+
if '_ordered' not in state:
787+
788+
# >=15.0 < 0.16.0
789+
if 'ordered' in state:
790+
state['_ordered'] = state.pop('ordered')
791+
else:
792+
state['_ordered'] = False
793+
768794
for k, v in compat.iteritems(state):
769795
setattr(self, k, v)
770796

@@ -1498,6 +1524,7 @@ class CategoricalAccessor(PandasDelegate):
14981524
>>> s.cat.remove_categories(['d'])
14991525
>>> s.cat.remove_unused_categories()
15001526
>>> s.cat.set_categories(list('abcde'))
1527+
>>> s.cat.set_ordered(True)
15011528
15021529
"""
15031530

@@ -1533,7 +1560,8 @@ def _delegate_method(self, name, *args, **kwargs):
15331560
"add_categories",
15341561
"remove_categories",
15351562
"remove_unused_categories",
1536-
"set_categories"],
1563+
"set_categories",
1564+
"set_ordered"],
15371565
typ='method')
15381566

15391567
##### utility routines #####

pandas/core/index.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3628,7 +3628,7 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
36283628
name = None if names is None else names[0]
36293629
return Index(arrays[0], name=name)
36303630

3631-
cats = [Categorical.from_array(arr) for arr in arrays]
3631+
cats = [Categorical.from_array(arr, ordered=True) for arr in arrays]
36323632
levels = [c.categories for c in cats]
36333633
labels = [c.codes for c in cats]
36343634
if names is None:
@@ -3721,7 +3721,7 @@ def from_product(cls, iterables, sortorder=None, names=None):
37213721
from pandas.core.categorical import Categorical
37223722
from pandas.tools.util import cartesian_product
37233723

3724-
categoricals = [Categorical.from_array(it) for it in iterables]
3724+
categoricals = [Categorical.from_array(it, ordered=True) for it in iterables]
37253725
labels = cartesian_product([c.codes for c in categoricals])
37263726

37273727
return MultiIndex(levels=[c.categories for c in categoricals],

pandas/core/panel.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ def panel_index(time, panels, names=['time', 'panel']):
9595
(1962, 'C')], dtype=object)
9696
"""
9797
time, panels = _ensure_like_indices(time, panels)
98-
time_factor = Categorical.from_array(time)
99-
panel_factor = Categorical.from_array(panels)
98+
time_factor = Categorical.from_array(time, ordered=True)
99+
panel_factor = Categorical.from_array(panels, ordered=True)
100100

101101
labels = [time_factor.codes, panel_factor.codes]
102102
levels = [time_factor.categories, panel_factor.categories]

pandas/core/reshape.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,8 @@ def get_result(self):
157157
# may need to coerce categoricals here
158158
if self.is_categorical is not None:
159159
values = [ Categorical.from_array(values[:,i],
160-
categories=self.is_categorical.categories)
160+
categories=self.is_categorical.categories,
161+
ordered=True)
161162
for i in range(values.shape[-1]) ]
162163

163164
return DataFrame(values, index=index, columns=columns)
@@ -1049,7 +1050,7 @@ def check_len(item, name):
10491050

10501051
def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False):
10511052
# Series avoids inconsistent NaN handling
1052-
cat = Categorical.from_array(Series(data))
1053+
cat = Categorical.from_array(Series(data), ordered=True)
10531054
levels = cat.categories
10541055

10551056
# if all NaN
@@ -1117,7 +1118,7 @@ def make_axis_dummies(frame, axis='minor', transform=None):
11171118
labels = frame.index.labels[num]
11181119
if transform is not None:
11191120
mapped_items = items.map(transform)
1120-
cat = Categorical.from_array(mapped_items.take(labels))
1121+
cat = Categorical.from_array(mapped_items.take(labels), ordered=True)
11211122
labels = cat.codes
11221123
items = cat.categories
11231124

pandas/io/pytables.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3637,7 +3637,7 @@ def read(self, where=None, columns=None, **kwargs):
36373637
if not self.read_axes(where=where, **kwargs):
36383638
return None
36393639

3640-
factors = [Categorical.from_array(a.values) for a in self.index_axes]
3640+
factors = [Categorical.from_array(a.values, ordered=True) for a in self.index_axes]
36413641
levels = [f.categories for f in factors]
36423642
N = [len(f.categories) for f in factors]
36433643
labels = [f.codes for f in factors]
392 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)