1
1
""" define extension dtypes """
2
2
import re
3
- from typing import Any , Dict , Optional , Tuple , Type
3
+ import typing
4
+ from typing import Any , Dict , List , Optional , Tuple , Type
4
5
import warnings
5
6
6
7
import numpy as np
15
16
from .base import ExtensionDtype , _DtypeOpsMixin
16
17
from .inference import is_list_like
17
18
18
- str_type = str
19
19
20
-
21
- def register_extension_dtype (cls ):
20
+ def register_extension_dtype (cls : 'ExtensionDtype' ) -> 'ExtensionDtype' :
22
21
"""
23
22
Register an ExtensionType with pandas as class decorator.
24
23
@@ -60,20 +59,20 @@ class Registry:
60
59
These are tried in order.
61
60
"""
62
61
def __init__ (self ):
63
- self .dtypes = []
62
+ self .dtypes = [] # type: List[ExtensionDtype]
64
63
65
- def register (self , dtype ) :
64
+ def register (self , dtype : 'ExtensionDtype' ) -> None :
66
65
"""
67
66
Parameters
68
67
----------
69
68
dtype : ExtensionDtype
70
69
"""
71
- if not issubclass (dtype , ( PandasExtensionDtype , ExtensionDtype ) ):
70
+ if not issubclass (dtype , ExtensionDtype ):
72
71
raise ValueError ("can only register pandas extension dtypes" )
73
72
74
73
self .dtypes .append (dtype )
75
74
76
- def find (self , dtype ) :
75
+ def find (self , dtype : 'ExtensionDtype' ) -> Optional [ ExtensionDtype ] :
77
76
"""
78
77
Parameters
79
78
----------
@@ -117,25 +116,25 @@ class PandasExtensionDtype(_DtypeOpsMixin):
117
116
# and ExtensionDtype's @properties in the subclasses below. The kind and
118
117
# type variables in those subclasses are explicitly typed below.
119
118
subdtype = None
120
- str = None # type: Optional[str_type ]
119
+ str = None # type: Optional[str ]
121
120
num = 100
122
121
shape = tuple () # type: Tuple[int, ...]
123
122
itemsize = 8
124
123
base = None
125
124
isbuiltin = 0
126
125
isnative = 0
127
- _cache = {} # type: Dict[str_type , 'PandasExtensionDtype']
126
+ _cache = {} # type: Dict[str , 'PandasExtensionDtype']
128
127
129
- def __unicode__ (self ):
128
+ def __unicode__ (self ) -> str :
130
129
return self .name
131
130
132
- def __str__ (self ):
131
+ def __str__ (self ) -> str :
133
132
"""
134
133
Return a string representation for a particular Object
135
134
"""
136
135
return self .__unicode__ ()
137
136
138
- def __bytes__ (self ):
137
+ def __bytes__ (self ) -> bytes :
139
138
"""
140
139
Return a string representation for a particular object.
141
140
"""
@@ -144,22 +143,22 @@ def __bytes__(self):
144
143
encoding = get_option ("display.encoding" )
145
144
return self .__unicode__ ().encode (encoding , 'replace' )
146
145
147
- def __repr__ (self ):
146
+ def __repr__ (self ) -> str :
148
147
"""
149
148
Return a string representation for a particular object.
150
149
"""
151
150
return str (self )
152
151
153
- def __hash__ (self ):
152
+ def __hash__ (self ) -> int :
154
153
raise NotImplementedError ("sub-classes should implement an __hash__ "
155
154
"method" )
156
155
157
- def __getstate__ (self ):
156
+ def __getstate__ (self ) -> Dict [ str , Any ] :
158
157
# pickle support; we don't want to pickle the cache
159
158
return {k : getattr (self , k , None ) for k in self ._metadata }
160
159
161
160
@classmethod
162
- def reset_cache (cls ):
161
+ def reset_cache (cls ) -> None :
163
162
""" clear the cache """
164
163
cls ._cache = {}
165
164
@@ -217,23 +216,27 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
217
216
# TODO: Document public vs. private API
218
217
name = 'category'
219
218
type = CategoricalDtypeType # type: Type[CategoricalDtypeType]
220
- kind = 'O' # type: str_type
219
+ kind = 'O' # type: str
221
220
str = '|O08'
222
221
base = np .dtype ('O' )
223
222
_metadata = ('categories' , 'ordered' )
224
- _cache = {} # type: Dict[str_type , PandasExtensionDtype]
223
+ _cache = {} # type: Dict[str , PandasExtensionDtype]
225
224
226
- def __init__ (self , categories = None , ordered = None ):
225
+ def __init__ (self , categories = None , ordered : bool = None ):
227
226
self ._finalize (categories , ordered , fastpath = False )
228
227
229
228
@classmethod
230
- def _from_fastpath (cls , categories = None , ordered = None ):
229
+ def _from_fastpath (cls , categories = None , ordered : bool = None ):
231
230
self = cls .__new__ (cls )
232
231
self ._finalize (categories , ordered , fastpath = True )
233
232
return self
234
233
235
234
@classmethod
236
- def _from_categorical_dtype (cls , dtype , categories = None , ordered = None ):
235
+ def _from_categorical_dtype (cls ,
236
+ dtype : 'CategoricalDtype' ,
237
+ categories = None ,
238
+ ordered : bool = None ,
239
+ ) -> 'CategoricalDtype' :
237
240
if categories is ordered is None :
238
241
return dtype
239
242
if categories is None :
@@ -243,8 +246,11 @@ def _from_categorical_dtype(cls, dtype, categories=None, ordered=None):
243
246
return cls (categories , ordered )
244
247
245
248
@classmethod
246
- def _from_values_or_dtype (cls , values = None , categories = None , ordered = None ,
247
- dtype = None ):
249
+ def _from_values_or_dtype (cls ,
250
+ values = None ,
251
+ categories = None ,
252
+ ordered : bool = None ,
253
+ dtype : 'CategoricalDtype' = None ):
248
254
"""
249
255
Construct dtype from the input parameters used in :class:`Categorical`.
250
256
@@ -326,7 +332,11 @@ def _from_values_or_dtype(cls, values=None, categories=None, ordered=None,
326
332
327
333
return dtype
328
334
329
- def _finalize (self , categories , ordered , fastpath = False ):
335
+ def _finalize (self ,
336
+ categories ,
337
+ ordered : Optional [bool ],
338
+ fastpath : bool = False ,
339
+ ) -> None :
330
340
331
341
if ordered is not None :
332
342
self .validate_ordered (ordered )
@@ -338,14 +348,14 @@ def _finalize(self, categories, ordered, fastpath=False):
338
348
self ._categories = categories
339
349
self ._ordered = ordered
340
350
341
- def __setstate__ (self , state ) :
351
+ def __setstate__ (self , state : 'Dict[str, Any]' ) -> None :
342
352
# for pickle compat. __get_state__ is defined in the
343
353
# PandasExtensionDtype superclass and uses the public properties to
344
354
# pickle -> need to set the settable private ones here (see GH26067)
345
355
self ._categories = state .pop ('categories' , None )
346
356
self ._ordered = state .pop ('ordered' , False )
347
357
348
- def __hash__ (self ):
358
+ def __hash__ (self ) -> int :
349
359
# _hash_categories returns a uint64, so use the negative
350
360
# space for when we have unknown categories to avoid a conflict
351
361
if self .categories is None :
@@ -356,7 +366,7 @@ def __hash__(self):
356
366
# We *do* want to include the real self.ordered here
357
367
return int (self ._hash_categories (self .categories , self .ordered ))
358
368
359
- def __eq__ (self , other ) :
369
+ def __eq__ (self , other : Any ) -> bool :
360
370
"""
361
371
Rules for CDT equality:
362
372
1) Any CDT is equal to the string 'category'
@@ -403,7 +413,7 @@ def __repr__(self):
403
413
return tpl .format (data , self .ordered )
404
414
405
415
@staticmethod
406
- def _hash_categories (categories , ordered = True ):
416
+ def _hash_categories (categories , ordered : bool = True ) -> int :
407
417
from pandas .core .util .hashing import (
408
418
hash_array , _combine_hash_arrays , hash_tuples
409
419
)
@@ -453,7 +463,7 @@ def construct_array_type(cls):
453
463
return Categorical
454
464
455
465
@classmethod
456
- def construct_from_string (cls , string ) :
466
+ def construct_from_string (cls , string : str ) -> 'CategoricalDtype' :
457
467
"""
458
468
attempt to construct this type from a string, raise a TypeError if
459
469
it's not possible """
@@ -466,7 +476,7 @@ def construct_from_string(cls, string):
466
476
pass
467
477
468
478
@staticmethod
469
- def validate_ordered (ordered ) :
479
+ def validate_ordered (ordered : bool ) -> None :
470
480
"""
471
481
Validates that we have a valid ordered parameter. If
472
482
it is not a boolean, a TypeError will be raised.
@@ -486,7 +496,7 @@ def validate_ordered(ordered):
486
496
raise TypeError ("'ordered' must either be 'True' or 'False'" )
487
497
488
498
@staticmethod
489
- def validate_categories (categories , fastpath = False ):
499
+ def validate_categories (categories , fastpath : bool = False ):
490
500
"""
491
501
Validates that we have good categories
492
502
@@ -521,7 +531,7 @@ def validate_categories(categories, fastpath=False):
521
531
522
532
return categories
523
533
524
- def update_dtype (self , dtype ) :
534
+ def update_dtype (self , dtype : 'CategoricalDtype' ) -> 'CategoricalDtype' :
525
535
"""
526
536
Returns a CategoricalDtype with categories and ordered taken from dtype
527
537
if specified, otherwise falling back to self if unspecified
@@ -560,17 +570,18 @@ def categories(self):
560
570
"""
561
571
An ``Index`` containing the unique categories allowed.
562
572
"""
563
- return self ._categories
573
+ from pandas import Index
574
+ return typing .cast (Index , self ._categories )
564
575
565
576
@property
566
- def ordered (self ):
577
+ def ordered (self ) -> bool :
567
578
"""
568
579
Whether the categories have an ordered relationship.
569
580
"""
570
581
return self ._ordered
571
582
572
583
@property
573
- def _is_boolean (self ):
584
+ def _is_boolean (self ) -> bool :
574
585
from pandas .core .dtypes .common import is_bool_dtype
575
586
576
587
return is_bool_dtype (self .categories )
@@ -614,14 +625,14 @@ class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype):
614
625
datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')]
615
626
"""
616
627
type = Timestamp # type: Type[Timestamp]
617
- kind = 'M' # type: str_type
628
+ kind = 'M' # type: str
618
629
str = '|M8[ns]'
619
630
num = 101
620
631
base = np .dtype ('M8[ns]' )
621
632
na_value = NaT
622
633
_metadata = ('unit' , 'tz' )
623
634
_match = re .compile (r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]" )
624
- _cache = {} # type: Dict[str_type , PandasExtensionDtype]
635
+ _cache = {} # type: Dict[str , PandasExtensionDtype]
625
636
626
637
def __init__ (self , unit = "ns" , tz = None ):
627
638
if isinstance (unit , DatetimeTZDtype ):
@@ -765,13 +776,13 @@ class PeriodDtype(ExtensionDtype, PandasExtensionDtype):
765
776
period[M]
766
777
"""
767
778
type = Period # type: Type[Period]
768
- kind = 'O' # type: str_type
779
+ kind = 'O' # type: str
769
780
str = '|O08'
770
781
base = np .dtype ('O' )
771
782
num = 102
772
783
_metadata = ('freq' ,)
773
784
_match = re .compile (r"(P|p)eriod\[(?P<freq>.+)\]" )
774
- _cache = {} # type: Dict[str_type , PandasExtensionDtype]
785
+ _cache = {} # type: Dict[str , PandasExtensionDtype]
775
786
776
787
def __new__ (cls , freq = None ):
777
788
"""
@@ -919,13 +930,13 @@ class IntervalDtype(PandasExtensionDtype, ExtensionDtype):
919
930
interval[int64]
920
931
"""
921
932
name = 'interval'
922
- kind = None # type: Optional[str_type ]
933
+ kind = None # type: Optional[str ]
923
934
str = '|O08'
924
935
base = np .dtype ('O' )
925
936
num = 103
926
937
_metadata = ('subtype' ,)
927
938
_match = re .compile (r"(I|i)nterval\[(?P<subtype>.+)\]" )
928
- _cache = {} # type: Dict[str_type , PandasExtensionDtype]
939
+ _cache = {} # type: Dict[str , PandasExtensionDtype]
929
940
930
941
def __new__ (cls , subtype = None ):
931
942
from pandas .core .dtypes .common import (
0 commit comments