|
8 | 8 | from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
|
9 | 9 |
|
10 | 10 |
|
11 |
| -class _DtypeOpsMixin: |
12 |
| - # Not all of pandas' extension dtypes are compatibile with |
13 |
| - # the new ExtensionArray interface. This means PandasExtensionDtype |
14 |
| - # can't subclass ExtensionDtype yet, as is_extension_array_dtype would |
15 |
| - # incorrectly say that these types are extension types. |
16 |
| - # |
17 |
| - # In the interim, we put methods that are shared between the two base |
18 |
| - # classes ExtensionDtype and PandasExtensionDtype here. Both those base |
19 |
| - # classes will inherit from this Mixin. Once everything is compatible, this |
20 |
| - # class's methods can be moved to ExtensionDtype and removed. |
21 |
| - |
22 |
| - # na_value is the default NA value to use for this type. This is used in |
23 |
| - # e.g. ExtensionArray.take. This should be the user-facing "boxed" version |
24 |
| - # of the NA value, not the physical NA vaalue for storage. |
25 |
| - # e.g. for JSONArray, this is an empty dictionary. |
26 |
| - na_value = np.nan |
27 |
| - _metadata = () # type: Tuple[str, ...] |
28 |
| - |
29 |
| - def __eq__(self, other): |
30 |
| - """Check whether 'other' is equal to self. |
31 |
| -
|
32 |
| - By default, 'other' is considered equal if either |
33 |
| -
|
34 |
| - * it's a string matching 'self.name'. |
35 |
| - * it's an instance of this type and all of the |
36 |
| - the attributes in ``self._metadata`` are equal between |
37 |
| - `self` and `other`. |
38 |
| -
|
39 |
| - Parameters |
40 |
| - ---------- |
41 |
| - other : Any |
42 |
| -
|
43 |
| - Returns |
44 |
| - ------- |
45 |
| - bool |
46 |
| - """ |
47 |
| - if isinstance(other, str): |
48 |
| - try: |
49 |
| - other = self.construct_from_string(other) |
50 |
| - except TypeError: |
51 |
| - return False |
52 |
| - if isinstance(other, type(self)): |
53 |
| - return all( |
54 |
| - getattr(self, attr) == getattr(other, attr) |
55 |
| - for attr in self._metadata |
56 |
| - ) |
57 |
| - return False |
58 |
| - |
59 |
| - def __hash__(self): |
60 |
| - return hash(tuple(getattr(self, attr) for attr in self._metadata)) |
61 |
| - |
62 |
| - def __ne__(self, other): |
63 |
| - return not self.__eq__(other) |
64 |
| - |
65 |
| - @property |
66 |
| - def names(self) -> Optional[List[str]]: |
67 |
| - """Ordered list of field names, or None if there are no fields. |
68 |
| -
|
69 |
| - This is for compatibility with NumPy arrays, and may be removed in the |
70 |
| - future. |
71 |
| - """ |
72 |
| - return None |
73 |
| - |
74 |
| - @classmethod |
75 |
| - def is_dtype(cls, dtype): |
76 |
| - """Check if we match 'dtype'. |
77 |
| -
|
78 |
| - Parameters |
79 |
| - ---------- |
80 |
| - dtype : object |
81 |
| - The object to check. |
82 |
| -
|
83 |
| - Returns |
84 |
| - ------- |
85 |
| - is_dtype : bool |
86 |
| -
|
87 |
| - Notes |
88 |
| - ----- |
89 |
| - The default implementation is True if |
90 |
| -
|
91 |
| - 1. ``cls.construct_from_string(dtype)`` is an instance |
92 |
| - of ``cls``. |
93 |
| - 2. ``dtype`` is an object and is an instance of ``cls`` |
94 |
| - 3. ``dtype`` has a ``dtype`` attribute, and any of the above |
95 |
| - conditions is true for ``dtype.dtype``. |
96 |
| - """ |
97 |
| - dtype = getattr(dtype, 'dtype', dtype) |
98 |
| - |
99 |
| - if isinstance(dtype, (ABCSeries, ABCIndexClass, |
100 |
| - ABCDataFrame, np.dtype)): |
101 |
| - # https://github.com/pandas-dev/pandas/issues/22960 |
102 |
| - # avoid passing data to `construct_from_string`. This could |
103 |
| - # cause a FutureWarning from numpy about failing elementwise |
104 |
| - # comparison from, e.g., comparing DataFrame == 'category'. |
105 |
| - return False |
106 |
| - elif dtype is None: |
107 |
| - return False |
108 |
| - elif isinstance(dtype, cls): |
109 |
| - return True |
110 |
| - try: |
111 |
| - return cls.construct_from_string(dtype) is not None |
112 |
| - except TypeError: |
113 |
| - return False |
114 |
| - |
115 |
| - @property |
116 |
| - def _is_numeric(self) -> bool: |
117 |
| - """ |
118 |
| - Whether columns with this dtype should be considered numeric. |
119 |
| -
|
120 |
| - By default ExtensionDtypes are assumed to be non-numeric. |
121 |
| - They'll be excluded from operations that exclude non-numeric |
122 |
| - columns, like (groupby) reductions, plotting, etc. |
123 |
| - """ |
124 |
| - return False |
125 |
| - |
126 |
| - @property |
127 |
| - def _is_boolean(self) -> bool: |
128 |
| - """ |
129 |
| - Whether this dtype should be considered boolean. |
130 |
| -
|
131 |
| - By default, ExtensionDtypes are assumed to be non-numeric. |
132 |
| - Setting this to True will affect the behavior of several places, |
133 |
| - e.g. |
134 |
| -
|
135 |
| - * is_bool |
136 |
| - * boolean indexing |
137 |
| -
|
138 |
| - Returns |
139 |
| - ------- |
140 |
| - bool |
141 |
| - """ |
142 |
| - return False |
143 |
| - |
144 |
| - |
145 |
| -class ExtensionDtype(_DtypeOpsMixin): |
| 11 | +class ExtensionDtype: |
146 | 12 | """
|
147 | 13 | A custom data type, to be paired with an ExtensionArray.
|
148 | 14 |
|
@@ -202,10 +68,52 @@ class property**.
|
202 | 68 | ``pandas.errors.AbstractMethodError`` and no ``register`` method is
|
203 | 69 | provided for registering virtual subclasses.
|
204 | 70 | """
|
| 71 | + # na_value is the default NA value to use for this type. This is used in |
| 72 | + # e.g. ExtensionArray.take. This should be the user-facing "boxed" version |
| 73 | + # of the NA value, not the physical NA value for storage. |
| 74 | + # e.g. for JSONArray, this is an empty dictionary. |
| 75 | + na_value = np.nan |
| 76 | + _metadata = () # type: Tuple[str, ...] |
205 | 77 |
|
206 | 78 | def __str__(self):
|
207 | 79 | return self.name
|
208 | 80 |
|
| 81 | + def __eq__(self, other): |
| 82 | + """Check whether 'other' is equal to self. |
| 83 | +
|
| 84 | + By default, 'other' is considered equal if either |
| 85 | +
|
| 86 | + * it's a string matching 'self.name'. |
| 87 | + * it's an instance of this type and all of the |
| 88 | + the attributes in ``self._metadata`` are equal between |
| 89 | + `self` and `other`. |
| 90 | +
|
| 91 | + Parameters |
| 92 | + ---------- |
| 93 | + other : Any |
| 94 | +
|
| 95 | + Returns |
| 96 | + ------- |
| 97 | + bool |
| 98 | + """ |
| 99 | + if isinstance(other, str): |
| 100 | + try: |
| 101 | + other = self.construct_from_string(other) |
| 102 | + except TypeError: |
| 103 | + return False |
| 104 | + if isinstance(other, type(self)): |
| 105 | + return all( |
| 106 | + getattr(self, attr) == getattr(other, attr) |
| 107 | + for attr in self._metadata |
| 108 | + ) |
| 109 | + return False |
| 110 | + |
| 111 | + def __hash__(self): |
| 112 | + return hash(tuple(getattr(self, attr) for attr in self._metadata)) |
| 113 | + |
| 114 | + def __ne__(self, other): |
| 115 | + return not self.__eq__(other) |
| 116 | + |
209 | 117 | @property
|
210 | 118 | def type(self) -> Type:
|
211 | 119 | """
|
@@ -243,6 +151,15 @@ def name(self) -> str:
|
243 | 151 | """
|
244 | 152 | raise AbstractMethodError(self)
|
245 | 153 |
|
| 154 | + @property |
| 155 | + def names(self) -> Optional[List[str]]: |
| 156 | + """Ordered list of field names, or None if there are no fields. |
| 157 | +
|
| 158 | + This is for compatibility with NumPy arrays, and may be removed in the |
| 159 | + future. |
| 160 | + """ |
| 161 | + return None |
| 162 | + |
246 | 163 | @classmethod
|
247 | 164 | def construct_array_type(cls):
|
248 | 165 | """
|
@@ -286,3 +203,73 @@ def construct_from_string(cls, string):
|
286 | 203 | ... "'{}'".format(cls, string))
|
287 | 204 | """
|
288 | 205 | raise AbstractMethodError(cls)
|
| 206 | + |
| 207 | + @classmethod |
| 208 | + def is_dtype(cls, dtype): |
| 209 | + """Check if we match 'dtype'. |
| 210 | +
|
| 211 | + Parameters |
| 212 | + ---------- |
| 213 | + dtype : object |
| 214 | + The object to check. |
| 215 | +
|
| 216 | + Returns |
| 217 | + ------- |
| 218 | + is_dtype : bool |
| 219 | +
|
| 220 | + Notes |
| 221 | + ----- |
| 222 | + The default implementation is True if |
| 223 | +
|
| 224 | + 1. ``cls.construct_from_string(dtype)`` is an instance |
| 225 | + of ``cls``. |
| 226 | + 2. ``dtype`` is an object and is an instance of ``cls`` |
| 227 | + 3. ``dtype`` has a ``dtype`` attribute, and any of the above |
| 228 | + conditions is true for ``dtype.dtype``. |
| 229 | + """ |
| 230 | + dtype = getattr(dtype, 'dtype', dtype) |
| 231 | + |
| 232 | + if isinstance(dtype, (ABCSeries, ABCIndexClass, |
| 233 | + ABCDataFrame, np.dtype)): |
| 234 | + # https://github.com/pandas-dev/pandas/issues/22960 |
| 235 | + # avoid passing data to `construct_from_string`. This could |
| 236 | + # cause a FutureWarning from numpy about failing elementwise |
| 237 | + # comparison from, e.g., comparing DataFrame == 'category'. |
| 238 | + return False |
| 239 | + elif dtype is None: |
| 240 | + return False |
| 241 | + elif isinstance(dtype, cls): |
| 242 | + return True |
| 243 | + try: |
| 244 | + return cls.construct_from_string(dtype) is not None |
| 245 | + except TypeError: |
| 246 | + return False |
| 247 | + |
| 248 | + @property |
| 249 | + def _is_numeric(self) -> bool: |
| 250 | + """ |
| 251 | + Whether columns with this dtype should be considered numeric. |
| 252 | +
|
| 253 | + By default ExtensionDtypes are assumed to be non-numeric. |
| 254 | + They'll be excluded from operations that exclude non-numeric |
| 255 | + columns, like (groupby) reductions, plotting, etc. |
| 256 | + """ |
| 257 | + return False |
| 258 | + |
| 259 | + @property |
| 260 | + def _is_boolean(self) -> bool: |
| 261 | + """ |
| 262 | + Whether this dtype should be considered boolean. |
| 263 | +
|
| 264 | + By default, ExtensionDtypes are assumed to be non-numeric. |
| 265 | + Setting this to True will affect the behavior of several places, |
| 266 | + e.g. |
| 267 | +
|
| 268 | + * is_bool |
| 269 | + * boolean indexing |
| 270 | +
|
| 271 | + Returns |
| 272 | + ------- |
| 273 | + bool |
| 274 | + """ |
| 275 | + return False |
0 commit comments