1
1
""" define extension dtypes """
2
2
import re
3
- from typing import Any , Dict , Optional , Tuple , Type
3
+ import typing
4
+ from typing import Any , Dict , List , Optional , Tuple , Type
4
5
import warnings
5
6
6
7
import numpy as np
15
16
from .base import ExtensionDtype , _DtypeOpsMixin
16
17
from .inference import is_list_like
17
18
18
- str_type = str
19
19
20
-
21
- def register_extension_dtype (cls ):
20
+ def register_extension_dtype (cls : 'ExtensionDtype' ) -> 'ExtensionDtype' :
22
21
"""
23
22
Register an ExtensionType with pandas as class decorator.
24
23
@@ -60,20 +59,20 @@ class Registry:
60
59
These are tried in order.
61
60
"""
62
61
def __init__ (self ):
63
- self .dtypes = []
62
+ self .dtypes = [] # type: List[ExtensionDtype]
64
63
65
- def register (self , dtype ) :
64
+ def register (self , dtype : 'ExtensionDtype' ) -> None :
66
65
"""
67
66
Parameters
68
67
----------
69
68
dtype : ExtensionDtype
70
69
"""
71
- if not issubclass (dtype , ( PandasExtensionDtype , ExtensionDtype ) ):
70
+ if not issubclass (dtype , ExtensionDtype ):
72
71
raise ValueError ("can only register pandas extension dtypes" )
73
72
74
73
self .dtypes .append (dtype )
75
74
76
- def find (self , dtype ) :
75
+ def find (self , dtype : 'ExtensionDtype' ) -> Optional [ ExtensionDtype ] :
77
76
"""
78
77
Parameters
79
78
----------
@@ -117,25 +116,25 @@ class PandasExtensionDtype(_DtypeOpsMixin):
117
116
# and ExtensionDtype's @properties in the subclasses below. The kind and
118
117
# type variables in those subclasses are explicitly typed below.
119
118
subdtype = None
120
- str = None # type: Optional[str_type ]
119
+ str = None # type: Optional[str ]
121
120
num = 100
122
121
shape = tuple () # type: Tuple[int, ...]
123
122
itemsize = 8
124
123
base = None
125
124
isbuiltin = 0
126
125
isnative = 0
127
- _cache = {} # type: Dict[str_type , 'PandasExtensionDtype']
126
+ _cache = {} # type: Dict[str , 'PandasExtensionDtype']
128
127
129
- def __unicode__ (self ):
128
+ def __unicode__ (self ) -> str :
130
129
return self .name
131
130
132
- def __str__ (self ):
131
+ def __str__ (self ) -> str :
133
132
"""
134
133
Return a string representation for a particular Object
135
134
"""
136
135
return self .__unicode__ ()
137
136
138
- def __bytes__ (self ):
137
+ def __bytes__ (self ) -> bytes :
139
138
"""
140
139
Return a string representation for a particular object.
141
140
"""
@@ -144,22 +143,22 @@ def __bytes__(self):
144
143
encoding = get_option ("display.encoding" )
145
144
return self .__unicode__ ().encode (encoding , 'replace' )
146
145
147
- def __repr__ (self ):
146
+ def __repr__ (self ) -> str :
148
147
"""
149
148
Return a string representation for a particular object.
150
149
"""
151
150
return str (self )
152
151
153
- def __hash__ (self ):
152
+ def __hash__ (self ) -> int :
154
153
raise NotImplementedError ("sub-classes should implement an __hash__ "
155
154
"method" )
156
155
157
- def __getstate__ (self ):
156
+ def __getstate__ (self ) -> Dict [ str , Any ] :
158
157
# pickle support; we don't want to pickle the cache
159
158
return {k : getattr (self , k , None ) for k in self ._metadata }
160
159
161
160
@classmethod
162
- def reset_cache (cls ):
161
+ def reset_cache (cls ) -> None :
163
162
""" clear the cache """
164
163
cls ._cache = {}
165
164
@@ -217,23 +216,30 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
217
216
# TODO: Document public vs. private API
218
217
name = 'category'
219
218
type = CategoricalDtypeType # type: Type[CategoricalDtypeType]
220
- kind = 'O' # type: str_type
219
+ kind = 'O' # type: str
221
220
str = '|O08'
222
221
base = np .dtype ('O' )
223
222
_metadata = ('categories' , 'ordered' )
224
- _cache = {} # type: Dict[str_type , PandasExtensionDtype]
223
+ _cache = {} # type: Dict[str , PandasExtensionDtype]
225
224
226
- def __init__ (self , categories = None , ordered = None ):
225
+ def __init__ (self , categories = None , ordered : bool = None ):
227
226
self ._finalize (categories , ordered , fastpath = False )
228
227
229
228
@classmethod
230
- def _from_fastpath (cls , categories = None , ordered = None ):
229
+ def _from_fastpath (cls ,
230
+ categories = None ,
231
+ ordered : bool = None
232
+ ) -> 'CategoricalDtype' :
231
233
self = cls .__new__ (cls )
232
234
self ._finalize (categories , ordered , fastpath = True )
233
235
return self
234
236
235
237
@classmethod
236
- def _from_categorical_dtype (cls , dtype , categories = None , ordered = None ):
238
+ def _from_categorical_dtype (cls ,
239
+ dtype : 'CategoricalDtype' ,
240
+ categories = None ,
241
+ ordered : bool = None ,
242
+ ) -> 'CategoricalDtype' :
237
243
if categories is ordered is None :
238
244
return dtype
239
245
if categories is None :
@@ -243,8 +249,12 @@ def _from_categorical_dtype(cls, dtype, categories=None, ordered=None):
243
249
return cls (categories , ordered )
244
250
245
251
@classmethod
246
- def _from_values_or_dtype (cls , values = None , categories = None , ordered = None ,
247
- dtype = None ):
252
+ def _from_values_or_dtype (cls ,
253
+ values = None ,
254
+ categories = None ,
255
+ ordered : bool = None ,
256
+ dtype : 'CategoricalDtype' = None ,
257
+ ) -> 'CategoricalDtype' :
248
258
"""
249
259
Construct dtype from the input parameters used in :class:`Categorical`.
250
260
@@ -326,7 +336,11 @@ def _from_values_or_dtype(cls, values=None, categories=None, ordered=None,
326
336
327
337
return dtype
328
338
329
- def _finalize (self , categories , ordered , fastpath = False ):
339
+ def _finalize (self ,
340
+ categories ,
341
+ ordered : Optional [bool ],
342
+ fastpath : bool = False ,
343
+ ) -> None :
330
344
331
345
if ordered is not None :
332
346
self .validate_ordered (ordered )
@@ -338,14 +352,14 @@ def _finalize(self, categories, ordered, fastpath=False):
338
352
self ._categories = categories
339
353
self ._ordered = ordered
340
354
341
- def __setstate__ (self , state ) :
355
+ def __setstate__ (self , state : 'Dict[str, Any]' ) -> None :
342
356
# for pickle compat. __get_state__ is defined in the
343
357
# PandasExtensionDtype superclass and uses the public properties to
344
358
# pickle -> need to set the settable private ones here (see GH26067)
345
359
self ._categories = state .pop ('categories' , None )
346
360
self ._ordered = state .pop ('ordered' , False )
347
361
348
- def __hash__ (self ):
362
+ def __hash__ (self ) -> int :
349
363
# _hash_categories returns a uint64, so use the negative
350
364
# space for when we have unknown categories to avoid a conflict
351
365
if self .categories is None :
@@ -356,7 +370,7 @@ def __hash__(self):
356
370
# We *do* want to include the real self.ordered here
357
371
return int (self ._hash_categories (self .categories , self .ordered ))
358
372
359
- def __eq__ (self , other ) :
373
+ def __eq__ (self , other : Any ) -> bool :
360
374
"""
361
375
Rules for CDT equality:
362
376
1) Any CDT is equal to the string 'category'
@@ -403,7 +417,7 @@ def __repr__(self):
403
417
return tpl .format (data , self .ordered )
404
418
405
419
@staticmethod
406
- def _hash_categories (categories , ordered = True ):
420
+ def _hash_categories (categories , ordered : bool = True ) -> int :
407
421
from pandas .core .util .hashing import (
408
422
hash_array , _combine_hash_arrays , hash_tuples
409
423
)
@@ -453,7 +467,7 @@ def construct_array_type(cls):
453
467
return Categorical
454
468
455
469
@classmethod
456
- def construct_from_string (cls , string ) :
470
+ def construct_from_string (cls , string : str ) -> 'CategoricalDtype' :
457
471
"""
458
472
attempt to construct this type from a string, raise a TypeError if
459
473
it's not possible """
@@ -466,7 +480,7 @@ def construct_from_string(cls, string):
466
480
pass
467
481
468
482
@staticmethod
469
- def validate_ordered (ordered ) :
483
+ def validate_ordered (ordered : bool ) -> None :
470
484
"""
471
485
Validates that we have a valid ordered parameter. If
472
486
it is not a boolean, a TypeError will be raised.
@@ -486,7 +500,7 @@ def validate_ordered(ordered):
486
500
raise TypeError ("'ordered' must either be 'True' or 'False'" )
487
501
488
502
@staticmethod
489
- def validate_categories (categories , fastpath = False ):
503
+ def validate_categories (categories , fastpath : bool = False ):
490
504
"""
491
505
Validates that we have good categories
492
506
@@ -519,9 +533,9 @@ def validate_categories(categories, fastpath=False):
519
533
if isinstance (categories , ABCCategoricalIndex ):
520
534
categories = categories .categories
521
535
522
- return categories
536
+ return typing . cast ( Index , categories )
523
537
524
- def update_dtype (self , dtype ) :
538
+ def update_dtype (self , dtype : 'CategoricalDtype' ) -> 'CategoricalDtype' :
525
539
"""
526
540
Returns a CategoricalDtype with categories and ordered taken from dtype
527
541
if specified, otherwise falling back to self if unspecified
@@ -560,17 +574,18 @@ def categories(self):
560
574
"""
561
575
An ``Index`` containing the unique categories allowed.
562
576
"""
563
- return self ._categories
577
+ from pandas import Index
578
+ return typing .cast (Index , self ._categories )
564
579
565
580
@property
566
- def ordered (self ):
581
+ def ordered (self ) -> bool :
567
582
"""
568
583
Whether the categories have an ordered relationship.
569
584
"""
570
585
return self ._ordered
571
586
572
587
@property
573
- def _is_boolean (self ):
588
+ def _is_boolean (self ) -> bool :
574
589
from pandas .core .dtypes .common import is_bool_dtype
575
590
576
591
return is_bool_dtype (self .categories )
@@ -614,14 +629,14 @@ class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype):
614
629
datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')]
615
630
"""
616
631
type = Timestamp # type: Type[Timestamp]
617
- kind = 'M' # type: str_type
632
+ kind = 'M' # type: str
618
633
str = '|M8[ns]'
619
634
num = 101
620
635
base = np .dtype ('M8[ns]' )
621
636
na_value = NaT
622
637
_metadata = ('unit' , 'tz' )
623
638
_match = re .compile (r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]" )
624
- _cache = {} # type: Dict[str_type , PandasExtensionDtype]
639
+ _cache = {} # type: Dict[str , PandasExtensionDtype]
625
640
626
641
def __init__ (self , unit = "ns" , tz = None ):
627
642
if isinstance (unit , DatetimeTZDtype ):
@@ -765,13 +780,13 @@ class PeriodDtype(ExtensionDtype, PandasExtensionDtype):
765
780
period[M]
766
781
"""
767
782
type = Period # type: Type[Period]
768
- kind = 'O' # type: str_type
783
+ kind = 'O' # type: str
769
784
str = '|O08'
770
785
base = np .dtype ('O' )
771
786
num = 102
772
787
_metadata = ('freq' ,)
773
788
_match = re .compile (r"(P|p)eriod\[(?P<freq>.+)\]" )
774
- _cache = {} # type: Dict[str_type , PandasExtensionDtype]
789
+ _cache = {} # type: Dict[str , PandasExtensionDtype]
775
790
776
791
def __new__ (cls , freq = None ):
777
792
"""
@@ -919,13 +934,13 @@ class IntervalDtype(PandasExtensionDtype, ExtensionDtype):
919
934
interval[int64]
920
935
"""
921
936
name = 'interval'
922
- kind = None # type: Optional[str_type ]
937
+ kind = None # type: Optional[str ]
923
938
str = '|O08'
924
939
base = np .dtype ('O' )
925
940
num = 103
926
941
_metadata = ('subtype' ,)
927
942
_match = re .compile (r"(I|i)nterval\[(?P<subtype>.+)\]" )
928
- _cache = {} # type: Dict[str_type , PandasExtensionDtype]
943
+ _cache = {} # type: Dict[str , PandasExtensionDtype]
929
944
930
945
def __new__ (cls , subtype = None ):
931
946
from pandas .core .dtypes .common import (
0 commit comments