17
17
18
18
str_type = str
19
19
20
+ # GH26403: sentinel value used for the default value of ordered in the
21
+ # CategoricalDtype constructor to detect when ordered=None is explicitly passed
22
+ ordered_sentinel = object () # type: object
23
+
24
+ # TODO(GH26403): Replace with Optional[bool] or bool
25
+ OrderedType = Union [None , bool , object ]
26
+
20
27
21
28
def register_extension_dtype (cls : Type [ExtensionDtype ],
22
29
) -> Type [ExtensionDtype ]:
@@ -214,7 +221,9 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
214
221
_metadata = ('categories' , 'ordered' )
215
222
_cache = {} # type: Dict[str_type, PandasExtensionDtype]
216
223
217
- def __init__ (self , categories = None , ordered : Optional [bool ] = None ):
224
+ def __init__ (self ,
225
+ categories = None ,
226
+ ordered : OrderedType = ordered_sentinel ):
218
227
self ._finalize (categories , ordered , fastpath = False )
219
228
220
229
@classmethod
@@ -230,7 +239,7 @@ def _from_fastpath(cls,
230
239
def _from_categorical_dtype (cls ,
231
240
dtype : 'CategoricalDtype' ,
232
241
categories = None ,
233
- ordered : Optional [ bool ] = None ,
242
+ ordered : OrderedType = None ,
234
243
) -> 'CategoricalDtype' :
235
244
if categories is ordered is None :
236
245
return dtype
@@ -330,19 +339,20 @@ def _from_values_or_dtype(cls,
330
339
331
340
def _finalize (self ,
332
341
categories ,
333
- ordered : Optional [ bool ] ,
342
+ ordered : OrderedType ,
334
343
fastpath : bool = False ,
335
344
) -> None :
336
345
337
- if ordered is not None :
346
+ if ordered is not None and ordered is not ordered_sentinel :
338
347
self .validate_ordered (ordered )
339
348
340
349
if categories is not None :
341
350
categories = self .validate_categories (categories ,
342
351
fastpath = fastpath )
343
352
344
353
self ._categories = categories
345
- self ._ordered = ordered
354
+ self ._ordered = ordered if ordered is not ordered_sentinel else None
355
+ self ._ordered_from_sentinel = ordered is ordered_sentinel
346
356
347
357
def __setstate__ (self , state : Dict [str_type , Any ]) -> None :
348
358
# for pickle compat. __get_state__ is defined in the
@@ -355,12 +365,12 @@ def __hash__(self) -> int:
355
365
# _hash_categories returns a uint64, so use the negative
356
366
# space for when we have unknown categories to avoid a conflict
357
367
if self .categories is None :
358
- if self .ordered :
368
+ if self ._ordered :
359
369
return - 1
360
370
else :
361
371
return - 2
362
372
# We *do* want to include the real self.ordered here
363
- return int (self ._hash_categories (self .categories , self .ordered ))
373
+ return int (self ._hash_categories (self .categories , self ._ordered ))
364
374
365
375
def __eq__ (self , other : Any ) -> bool :
366
376
"""
@@ -379,7 +389,7 @@ def __eq__(self, other: Any) -> bool:
379
389
return other == self .name
380
390
elif other is self :
381
391
return True
382
- elif not (hasattr (other , 'ordered ' ) and hasattr (other , 'categories' )):
392
+ elif not (hasattr (other , '_ordered ' ) and hasattr (other , 'categories' )):
383
393
return False
384
394
elif self .categories is None or other .categories is None :
385
395
# We're forced into a suboptimal corner thanks to math and
@@ -388,10 +398,10 @@ def __eq__(self, other: Any) -> bool:
388
398
# CDT(., .) = CDT(None, False) and *all*
389
399
# CDT(., .) = CDT(None, True).
390
400
return True
391
- elif self .ordered or other .ordered :
401
+ elif self ._ordered or other ._ordered :
392
402
# At least one has ordered=True; equal if both have ordered=True
393
403
# and the same values for categories in the same order.
394
- return ((self .ordered == other .ordered ) and
404
+ return ((self ._ordered == other ._ordered ) and
395
405
self .categories .equals (other .categories ))
396
406
else :
397
407
# Neither has ordered=True; equal if both have the same categories,
@@ -406,10 +416,10 @@ def __repr__(self):
406
416
data = "None, "
407
417
else :
408
418
data = self .categories ._format_data (name = self .__class__ .__name__ )
409
- return tpl .format (data , self .ordered )
419
+ return tpl .format (data , self ._ordered )
410
420
411
421
@staticmethod
412
- def _hash_categories (categories , ordered : Optional [ bool ] = True ) -> int :
422
+ def _hash_categories (categories , ordered : OrderedType = True ) -> int :
413
423
from pandas .core .util .hashing import (
414
424
hash_array , _combine_hash_arrays , hash_tuples
415
425
)
@@ -459,7 +469,7 @@ def construct_array_type(cls):
459
469
return Categorical
460
470
461
471
@staticmethod
462
- def validate_ordered (ordered : bool ) -> None :
472
+ def validate_ordered (ordered : OrderedType ) -> None :
463
473
"""
464
474
Validates that we have a valid ordered parameter. If
465
475
it is not a boolean, a TypeError will be raised.
@@ -534,17 +544,25 @@ def update_dtype(self, dtype: 'CategoricalDtype') -> 'CategoricalDtype':
534
544
msg = ('a CategoricalDtype must be passed to perform an update, '
535
545
'got {dtype!r}' ).format (dtype = dtype )
536
546
raise ValueError (msg )
537
- elif dtype .categories is not None and dtype .ordered is self .ordered :
538
- return dtype
539
547
540
548
# dtype is CDT: keep current categories/ordered if None
541
549
new_categories = dtype .categories
542
550
if new_categories is None :
543
551
new_categories = self .categories
544
552
545
- new_ordered = dtype .ordered
553
+ new_ordered = dtype ._ordered
554
+ new_ordered_from_sentinel = dtype ._ordered_from_sentinel
546
555
if new_ordered is None :
547
- new_ordered = self .ordered
556
+ # maintain existing ordered if new dtype has ordered=None
557
+ new_ordered = self ._ordered
558
+ if self ._ordered and new_ordered_from_sentinel :
559
+ # only warn if we'd actually change the existing behavior
560
+ msg = ("Constructing a CategoricalDtype without specifying "
561
+ "`ordered` will default to `ordered=False` in a future "
562
+ "version, which will cause the resulting categorical's "
563
+ "`ordered` attribute to change to False; `ordered=True`"
564
+ " must be explicitly passed in order to be retained" )
565
+ warnings .warn (msg , FutureWarning , stacklevel = 3 )
548
566
549
567
return CategoricalDtype (new_categories , new_ordered )
550
568
@@ -556,10 +574,18 @@ def categories(self):
556
574
return self ._categories
557
575
558
576
@property
559
- def ordered (self ) -> Optional [ bool ] :
577
+ def ordered (self ) -> OrderedType :
560
578
"""
561
579
Whether the categories have an ordered relationship.
562
580
"""
581
+ # TODO: remove if block when ordered=None as default is deprecated
582
+ if self ._ordered_from_sentinel and self ._ordered is None :
583
+ # warn when accessing ordered if ordered=None and None was not
584
+ # explicitly passed to the constructor
585
+ msg = ("Constructing a CategoricalDtype without specifying "
586
+ "`ordered` will default to `ordered=False` in a future "
587
+ "version; `ordered=None` must be explicitly passed." )
588
+ warnings .warn (msg , FutureWarning , stacklevel = 2 )
563
589
return self ._ordered
564
590
565
591
@property
0 commit comments