|
| 1 | +import numpy as np |
| 2 | + |
| 3 | +from pandas._libs import lib |
| 4 | + |
| 5 | +from pandas.core.dtypes.common import extract_array |
| 6 | +from pandas.core.dtypes.dtypes import ExtensionDtype |
| 7 | +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries |
| 8 | +from pandas.core.dtypes.inference import is_list_like |
| 9 | + |
| 10 | +from pandas import compat |
| 11 | +from pandas.core import nanops |
| 12 | + |
| 13 | +from .base import ExtensionArray, ExtensionOpsMixin |
| 14 | + |
| 15 | + |
| 16 | +class NumPyExtensionDtype(ExtensionDtype): |
| 17 | + _metadata = ('_dtype',) |
| 18 | + |
| 19 | + def __init__(self, dtype): |
| 20 | + assert isinstance(dtype, np.dtype) |
| 21 | + self._dtype = dtype |
| 22 | + self._name = dtype.name |
| 23 | + self._type = dtype.type |
| 24 | + |
| 25 | + @property |
| 26 | + def name(self): |
| 27 | + return self._name |
| 28 | + |
| 29 | + @property |
| 30 | + def type(self): |
| 31 | + return self._type |
| 32 | + |
| 33 | + @property |
| 34 | + def _is_numeric(self): |
| 35 | + # TODO: find numeric types |
| 36 | + return True |
| 37 | + |
| 38 | + @property |
| 39 | + def _is_boolean(self): |
| 40 | + return self.kind == 'b' # object? |
| 41 | + |
| 42 | + @classmethod |
| 43 | + def construct_from_string(cls, string): |
| 44 | + return cls(np.dtype(string)) |
| 45 | + |
| 46 | + def construct_array_type(cls): |
| 47 | + return NumPyExtensionArray |
| 48 | + |
| 49 | + @property |
| 50 | + def kind(self): |
| 51 | + return self._dtype.kind |
| 52 | + |
| 53 | + @property |
| 54 | + def itemsize(self): |
| 55 | + return self._dtype.itemsize |
| 56 | + |
| 57 | + |
| 58 | +class NumPyExtensionArray(ExtensionArray, ExtensionOpsMixin): |
| 59 | + __array_priority__ = 1000 |
| 60 | + |
| 61 | + def __init__(self, values): |
| 62 | + if isinstance(values, type(self)): |
| 63 | + values = values._ndarray |
| 64 | + assert isinstance(values, np.ndarray) |
| 65 | + assert values.ndim == 1 |
| 66 | + |
| 67 | + self._ndarray = values |
| 68 | + self._dtype = NumPyExtensionDtype(values.dtype) |
| 69 | + |
| 70 | + @classmethod |
| 71 | + def _from_sequence(cls, scalars, dtype=None, copy=False): |
| 72 | + # |
| 73 | + # if isinstance(dtype, NumpyDtype): |
| 74 | + # dtype = dtype._dtype |
| 75 | + # we deliberately ignore dtype to not deal with casting issues. |
| 76 | + |
| 77 | + result = np.asarray(scalars) |
| 78 | + if copy and result is scalars: |
| 79 | + result = result.copy() |
| 80 | + return cls(result) |
| 81 | + |
| 82 | + @classmethod |
| 83 | + def _from_factorized(cls, values, original): |
| 84 | + return cls(values) |
| 85 | + |
| 86 | + @classmethod |
| 87 | + def _concat_same_type(cls, to_concat): |
| 88 | + return cls(np.concatenate(to_concat)) |
| 89 | + |
| 90 | + @property |
| 91 | + def dtype(self): |
| 92 | + return self._dtype |
| 93 | + |
| 94 | + def __array__(self, dtype=None): |
| 95 | + return np.asarray(self._ndarray, dtype=dtype) |
| 96 | + |
| 97 | + def __getitem__(self, item): |
| 98 | + if isinstance(item, type(self)): |
| 99 | + item = item._ndarray |
| 100 | + |
| 101 | + result = self._ndarray[item] |
| 102 | + if not lib.is_scalar(result): |
| 103 | + result = type(self)(result) |
| 104 | + return result |
| 105 | + |
| 106 | + def __setitem__(self, key, value): |
| 107 | + value = extract_array(value) |
| 108 | + |
| 109 | + if not lib.is_scalar(key) and is_list_like(key): |
| 110 | + key = np.asarray(key) |
| 111 | + if not len(key): |
| 112 | + # early return to avoid casting unnecessarily. |
| 113 | + return |
| 114 | + |
| 115 | + if not lib.is_scalar(value): |
| 116 | + value = np.asarray(value) |
| 117 | + |
| 118 | + values = self._ndarray |
| 119 | + t = np.result_type(value, values) |
| 120 | + if t != self._ndarray.dtype: |
| 121 | + values = values.astype(t, casting='safe') |
| 122 | + values[key] = value |
| 123 | + self._dtype = NumPyExtensionDtype(t) |
| 124 | + self._ndarray = values |
| 125 | + else: |
| 126 | + self._ndarray[key] = value |
| 127 | + |
| 128 | + def __len__(self): |
| 129 | + return len(self._ndarray) |
| 130 | + |
| 131 | + @property |
| 132 | + def nbytes(self): |
| 133 | + return self._ndarray.nbytes |
| 134 | + |
| 135 | + def isna(self): |
| 136 | + from pandas import isna |
| 137 | + |
| 138 | + return isna(self._ndarray) |
| 139 | + |
| 140 | + def fillna(self, value=None, method=None, limit=None): |
| 141 | + from pandas.api.types import is_array_like |
| 142 | + from pandas.util._validators import validate_fillna_kwargs |
| 143 | + from pandas.core.missing import pad_1d, backfill_1d |
| 144 | + |
| 145 | + # TODO: really need to implement `_values_for_fillna`. |
| 146 | + value, method = validate_fillna_kwargs(value, method) |
| 147 | + |
| 148 | + mask = self.isna() |
| 149 | + |
| 150 | + if is_array_like(value): |
| 151 | + if len(value) != len(self): |
| 152 | + raise ValueError("Length of 'value' does not match. Got ({}) " |
| 153 | + " expected {}".format(len(value), len(self))) |
| 154 | + value = value[mask] |
| 155 | + |
| 156 | + if mask.any(): |
| 157 | + if method is not None: |
| 158 | + func = pad_1d if method == 'pad' else backfill_1d |
| 159 | + new_values = func(self._ndarray, limit=limit, |
| 160 | + mask=mask) |
| 161 | + new_values = self._from_sequence(new_values, dtype=self.dtype) |
| 162 | + else: |
| 163 | + # fill with value |
| 164 | + new_values = self.copy() |
| 165 | + new_values[mask] = value |
| 166 | + else: |
| 167 | + new_values = self.copy() |
| 168 | + return new_values |
| 169 | + |
| 170 | + def take(self, indices, allow_fill=False, fill_value=None): |
| 171 | + from pandas.core.algorithms import take |
| 172 | + |
| 173 | + result = take(self._ndarray, indices, allow_fill=allow_fill, |
| 174 | + fill_value=fill_value) |
| 175 | + return type(self)(result) |
| 176 | + |
| 177 | + def copy(self, deep=False): |
| 178 | + return type(self)(self._ndarray.copy()) |
| 179 | + |
| 180 | + def _values_for_argsort(self): |
| 181 | + return self._ndarray |
| 182 | + |
| 183 | + def _values_for_factorize(self): |
| 184 | + return self._ndarray, -1 |
| 185 | + |
| 186 | + def unique(self): |
| 187 | + from pandas import unique |
| 188 | + |
| 189 | + return type(self)(unique(self._ndarray)) |
| 190 | + |
| 191 | + def _reduce(self, name, skipna=True, **kwargs): |
| 192 | + meth = getattr(self, name, None) |
| 193 | + if meth is None: |
| 194 | + # raise from the parent |
| 195 | + super(ExtensionArray, self).__reduce__( |
| 196 | + name=name, skipna=skipna, **kwargs |
| 197 | + ) |
| 198 | + |
| 199 | + return meth(skipna=skipna, **kwargs) |
| 200 | + |
| 201 | + def min(self, skipna=True): |
| 202 | + return nanops.nanmin(self._ndarray, skipna=skipna) |
| 203 | + |
| 204 | + def max(self, skipna=True): |
| 205 | + return nanops.nanmax(self._ndarray, skipna=skipna) |
| 206 | + |
| 207 | + def any(self, skipna=True): |
| 208 | + return nanops.nanany(self._ndarray, skipna=skipna) |
| 209 | + |
| 210 | + def all(self, skipna=True): |
| 211 | + return nanops.nanall(self._ndarray, skipna=skipna) |
| 212 | + |
| 213 | + def sum(self, skipna=True, min_count=0): |
| 214 | + return nanops.nansum(self._ndarray, skipna=skipna, |
| 215 | + min_count=min_count) |
| 216 | + |
| 217 | + def mean(self, skipna=True): |
| 218 | + return nanops.nanmean(self._ndarray, skipna=skipna) |
| 219 | + |
| 220 | + def median(self, skipna=True): |
| 221 | + return nanops.nanmedian(self._ndarray, skipna=skipna) |
| 222 | + |
| 223 | + def prod(self, min_count=0, skipna=True): |
| 224 | + return nanops.nanprod(self._ndarray, min_count=min_count, |
| 225 | + skipna=skipna) |
| 226 | + |
| 227 | + def std(self, skipna=True, ddof=1): |
| 228 | + return nanops.nanstd(self._ndarray, skipna=skipna, ddof=ddof) |
| 229 | + |
| 230 | + def var(self, skipna=True, ddof=1): |
| 231 | + return nanops.nanvar(self._ndarray, skipna=skipna, ddof=ddof) |
| 232 | + |
| 233 | + def kurt(self, skipna=True): |
| 234 | + return nanops.nankurt(self._ndarray, skipna=skipna) |
| 235 | + |
| 236 | + def skew(self, skipna=True): |
| 237 | + return nanops.nanskew(self._ndarray, skipna=skipna) |
| 238 | + |
| 239 | + def sem(self, skipna=True): |
| 240 | + return nanops.nansem(self._ndarray, skipna=skipna) |
| 241 | + |
| 242 | + def __invert__(self): |
| 243 | + return type(self)(~self._ndarray) |
| 244 | + |
| 245 | + @classmethod |
| 246 | + def _create_arithmetic_method(cls, op): |
| 247 | + def arithmetic_method(self, other): |
| 248 | + if isinstance(other, (ABCIndexClass, ABCSeries)): |
| 249 | + return NotImplemented |
| 250 | + |
| 251 | + elif isinstance(other, cls): |
| 252 | + other = other._ndarray |
| 253 | + |
| 254 | + with np.errstate(all="ignore"): |
| 255 | + result = op(self._ndarray, other) |
| 256 | + |
| 257 | + if op is divmod: |
| 258 | + a, b = result |
| 259 | + return cls(a), cls(b) |
| 260 | + |
| 261 | + return cls(result) |
| 262 | + |
| 263 | + return compat.set_function_name(arithmetic_method, |
| 264 | + "__{}__".format(op.__name__), |
| 265 | + cls) |
| 266 | + |
| 267 | + _create_comparison_method = _create_arithmetic_method |
| 268 | + |
| 269 | + |
| 270 | +NumPyExtensionArray._add_arithmetic_ops() |
| 271 | +NumPyExtensionArray._add_comparison_ops() |
0 commit comments