1
1
""" define extension dtypes """
2
2
import re
3
3
from typing import Any , Dict , List , MutableMapping , Optional , Tuple , Type , Union , cast
4
- import warnings
5
4
6
5
import numpy as np
7
6
import pytz
18
17
19
18
str_type = str
20
19
21
- # GH26403: sentinel value used for the default value of ordered in the
22
- # CategoricalDtype constructor to detect when ordered=None is explicitly passed
23
- ordered_sentinel : object = object ()
24
-
25
20
26
21
def register_extension_dtype (cls : Type [ExtensionDtype ]) -> Type [ExtensionDtype ]:
27
22
"""
@@ -179,7 +174,11 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
179
174
----------
180
175
categories : sequence, optional
181
176
Must be unique, and must not contain any nulls.
182
- ordered : bool, default False
177
+ ordered : bool or None, default False
178
+ Whether or not this categorical is treated as a ordered categorical.
179
+ None can be used to maintain the ordered value of existing categoricals when
180
+ used in operations that combine categoricals, e.g. astype, and will resolve to
181
+ False if there is no existing ordered to maintain.
183
182
184
183
Attributes
185
184
----------
@@ -218,14 +217,10 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
218
217
kind : str_type = "O"
219
218
str = "|O08"
220
219
base = np .dtype ("O" )
221
- _metadata = ("categories" , "ordered" , "_ordered_from_sentinel" )
220
+ _metadata = ("categories" , "ordered" )
222
221
_cache : Dict [str_type , PandasExtensionDtype ] = {}
223
222
224
- def __init__ (
225
- self , categories = None , ordered : Union [Ordered , object ] = ordered_sentinel
226
- ):
227
- # TODO(GH26403): Set type of ordered to Ordered
228
- ordered = cast (Ordered , ordered )
223
+ def __init__ (self , categories = None , ordered : Ordered = False ):
229
224
self ._finalize (categories , ordered , fastpath = False )
230
225
231
226
@classmethod
@@ -338,36 +333,63 @@ def _from_values_or_dtype(
338
333
339
334
return dtype
340
335
336
+ @classmethod
337
+ def construct_from_string (cls , string : str_type ) -> "CategoricalDtype" :
338
+ """
339
+ Construct a CategoricalDtype from a string.
340
+
341
+ Parameters
342
+ ----------
343
+ string : str
344
+ Must be the string "category" in order to be successfully constructed.
345
+
346
+ Returns
347
+ -------
348
+ CategoricalDtype
349
+ Instance of the dtype.
350
+
351
+ Raises
352
+ ------
353
+ TypeError
354
+ If a CategoricalDtype cannot be constructed from the input.
355
+ """
356
+ if not isinstance (string , str ):
357
+ raise TypeError (f"Expects a string, got { type (string )} " )
358
+ if string != cls .name :
359
+ raise TypeError (f"Cannot construct a 'CategoricalDtype' from '{ string } '" )
360
+
361
+ # need ordered=None to ensure that operations specifying dtype="category" don't
362
+ # override the ordered value for existing categoricals
363
+ return cls (ordered = None )
364
+
341
365
def _finalize (self , categories , ordered : Ordered , fastpath : bool = False ) -> None :
342
366
343
- if ordered is not None and ordered is not ordered_sentinel :
367
+ if ordered is not None :
344
368
self .validate_ordered (ordered )
345
369
346
370
if categories is not None :
347
371
categories = self .validate_categories (categories , fastpath = fastpath )
348
372
349
373
self ._categories = categories
350
- self ._ordered = ordered if ordered is not ordered_sentinel else None
351
- self ._ordered_from_sentinel = ordered is ordered_sentinel
374
+ self ._ordered = ordered
352
375
353
376
def __setstate__ (self , state : MutableMapping [str_type , Any ]) -> None :
354
377
# for pickle compat. __get_state__ is defined in the
355
378
# PandasExtensionDtype superclass and uses the public properties to
356
379
# pickle -> need to set the settable private ones here (see GH26067)
357
380
self ._categories = state .pop ("categories" , None )
358
381
self ._ordered = state .pop ("ordered" , False )
359
- self ._ordered_from_sentinel = state .pop ("_ordered_from_sentinel" , False )
360
382
361
383
def __hash__ (self ) -> int :
362
384
# _hash_categories returns a uint64, so use the negative
363
385
# space for when we have unknown categories to avoid a conflict
364
386
if self .categories is None :
365
- if self ._ordered :
387
+ if self .ordered :
366
388
return - 1
367
389
else :
368
390
return - 2
369
391
# We *do* want to include the real self.ordered here
370
- return int (self ._hash_categories (self .categories , self ._ordered ))
392
+ return int (self ._hash_categories (self .categories , self .ordered ))
371
393
372
394
def __eq__ (self , other : Any ) -> bool :
373
395
"""
@@ -386,7 +408,7 @@ def __eq__(self, other: Any) -> bool:
386
408
return other == self .name
387
409
elif other is self :
388
410
return True
389
- elif not (hasattr (other , "_ordered " ) and hasattr (other , "categories" )):
411
+ elif not (hasattr (other , "ordered " ) and hasattr (other , "categories" )):
390
412
return False
391
413
elif self .categories is None or other .categories is None :
392
414
# We're forced into a suboptimal corner thanks to math and
@@ -395,10 +417,10 @@ def __eq__(self, other: Any) -> bool:
395
417
# CDT(., .) = CDT(None, False) and *all*
396
418
# CDT(., .) = CDT(None, True).
397
419
return True
398
- elif self ._ordered or other ._ordered :
420
+ elif self .ordered or other .ordered :
399
421
# At least one has ordered=True; equal if both have ordered=True
400
422
# and the same values for categories in the same order.
401
- return (self ._ordered == other ._ordered ) and self .categories .equals (
423
+ return (self .ordered == other .ordered ) and self .categories .equals (
402
424
other .categories
403
425
)
404
426
else :
@@ -420,7 +442,7 @@ def __repr__(self) -> str_type:
420
442
data = "None, "
421
443
else :
422
444
data = self .categories ._format_data (name = type (self ).__name__ )
423
- return tpl .format (data = data , ordered = self ._ordered )
445
+ return tpl .format (data = data , ordered = self .ordered )
424
446
425
447
@staticmethod
426
448
def _hash_categories (categories , ordered : Ordered = True ) -> int :
@@ -557,26 +579,11 @@ def update_dtype(
557
579
# from here on, dtype is a CategoricalDtype
558
580
dtype = cast (CategoricalDtype , dtype )
559
581
560
- # dtype is CDT: keep current categories/ordered if None
561
- new_categories = dtype .categories
562
- if new_categories is None :
563
- new_categories = self .categories
564
-
565
- new_ordered = dtype ._ordered
566
- new_ordered_from_sentinel = dtype ._ordered_from_sentinel
567
- if new_ordered is None :
568
- # maintain existing ordered if new dtype has ordered=None
569
- new_ordered = self ._ordered
570
- if self ._ordered and new_ordered_from_sentinel :
571
- # only warn if we'd actually change the existing behavior
572
- msg = (
573
- "Constructing a CategoricalDtype without specifying "
574
- "`ordered` will default to `ordered=False` in a future "
575
- "version, which will cause the resulting categorical's "
576
- "`ordered` attribute to change to False; `ordered=True` "
577
- "must be explicitly passed in order to be retained"
578
- )
579
- warnings .warn (msg , FutureWarning , stacklevel = 3 )
582
+ # update categories/ordered unless they've been explicitly passed as None
583
+ new_categories = (
584
+ dtype .categories if dtype .categories is not None else self .categories
585
+ )
586
+ new_ordered = dtype .ordered if dtype .ordered is not None else self .ordered
580
587
581
588
return CategoricalDtype (new_categories , new_ordered )
582
589
@@ -592,16 +599,6 @@ def ordered(self) -> Ordered:
592
599
"""
593
600
Whether the categories have an ordered relationship.
594
601
"""
595
- # TODO: remove if block when ordered=None as default is deprecated
596
- if self ._ordered_from_sentinel and self ._ordered is None :
597
- # warn when accessing ordered if ordered=None and None was not
598
- # explicitly passed to the constructor
599
- msg = (
600
- "Constructing a CategoricalDtype without specifying "
601
- "`ordered` will default to `ordered=False` in a future "
602
- "version; `ordered=None` must be explicitly passed."
603
- )
604
- warnings .warn (msg , FutureWarning , stacklevel = 2 )
605
602
return self ._ordered
606
603
607
604
@property
0 commit comments