From 2ff4b090717938f76448ad9a25559adab75486eb Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 7 Aug 2018 16:28:22 -0700 Subject: [PATCH 01/10] First pass at implementation (needs refactor) --- pandas/core/arrays/integer.py | 45 +++++++++++++++++++++++++++-------- pandas/core/series.py | 2 +- pandas/util/testing.py | 8 +++++++ 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index c126117060c3d..993432d27c845 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -2,6 +2,7 @@ import warnings import copy import numpy as np +from bitarray import bitarray from pandas._libs.lib import infer_dtype from pandas.util._decorators import cache_readonly @@ -163,14 +164,12 @@ def coerce_to_array(values, dtype, mask=None, copy=False): values.dtype)) if mask is None: - mask = isna(values) + mask = bitarray(isna(values).tolist()) else: assert len(mask) == len(values) if not values.ndim == 1: raise TypeError("values must be a 1D list-like") - if not mask.ndim == 1: - raise TypeError("mask must be a 1D list-like") # infer dtype if needed if dtype is None: @@ -186,12 +185,24 @@ def coerce_to_array(values, dtype, mask=None, copy=False): # we copy as need to coerce here if mask.any(): + slice_mask = mask + if isinstance(mask, bitarray): + slice_mask = np.fromstring(mask.unpack(), dtype=bool) + else: + slice_mask = slice_mask.astype(np.bool_, copy=False) + values = values.copy() - values[mask] = 1 + values[slice_mask] = 1 values = safe_cast(values, dtype, copy=False) else: values = safe_cast(values, dtype, copy=False) + # Make sure we always coerce back to bitarray + if not isinstance(mask, bitarray): + _mask = mask + mask = bitarray() + mask.pack(_mask.tostring()) + return values, mask @@ -235,8 +246,12 @@ def __getitem__(self, item): if self._mask[item]: return self.dtype.na_value return self._data[item] + + arr = np.array(self._mask)[item] + mask = bitarray() + mask.pack(arr.tostring()) return type(self)(self._data[item], - mask=self._mask[item], + mask=mask, dtype=self.dtype) def _coerce_to_ndarray(self): @@ -317,7 +332,13 @@ def __setitem__(self, key, value): mask = mask[0] self._data[key] = value - self._mask[key] = mask + + # HACKish but unpack to ndarray then repack to leverage + # numpy slicing of arrays + arr = np.fromstring(self._mask.unpack(), dtype=bool) + arr[key] = mask + self._mask = bitarray() + self._mask.pack(arr.tostring()) def __len__(self): return len(self._data) @@ -343,10 +364,10 @@ def __repr__(self): @property def nbytes(self): - return self._data.nbytes + self._mask.nbytes + return self._data.nbytes + self._mask.buffer_info()[1] def isna(self): - return self._mask + return np.fromstring(self._mask.unpack(), dtype=bool) @property def _na_value(self): @@ -444,7 +465,8 @@ def value_counts(self, dropna=True): # TODO(extension) # appending to an Index *always* infers # w/o passing the dtype - array = np.append(array, [self._mask.sum()]) + array = np.append(array, [ + np.fromstring(self._mask.unpack(), dtype=bool).sum()]) index = Index(np.concatenate( [index.values, np.array([np.nan], dtype=object)]), dtype=object) @@ -513,7 +535,10 @@ def _maybe_mask_result(self, result, mask, other, op_name): # may need to fill infs # and mask wraparound if is_float_dtype(result): - mask |= (result == np.inf) | (result == -np.inf) + new_mask = bitarray() + new_mask.pack( + ((result == np.inf) | (result == -np.inf)).tostring()) + mask |= new_mask # if we have a float operand we are by-definition # a float result diff --git a/pandas/core/series.py b/pandas/core/series.py index b84179875db1f..6da88c6eb7d65 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -988,7 +988,7 @@ def _set_with(self, key, value): else: return self._set_values(key, value) elif key_type == 'boolean': - self._set_values(key.astype(np.bool_), value) + self._set_values(np.array(key).astype(np.bool_), value) else: self._set_labels(key, value) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index afc928ddfbb84..bedcb40bbaeda 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -15,6 +15,7 @@ from functools import wraps from contextlib import contextmanager +from bitarray import bitarray from numpy.random import randn, rand import numpy as np @@ -1172,6 +1173,13 @@ def assert_extension_array_equal(left, right): assert left.dtype == right.dtype left_na = left.isna() right_na = right.isna() + + # HACK - probably need new method to wrap numpy_array_equal + if isinstance(left_na, bitarray): + left_na = np.fromstring(left_na.unpack(), dtype=bool) + if isinstance(right_na, bitarray): + right_na = np.fromstring(right_na.unpack(), dtype=bool) + assert_numpy_array_equal(left_na, right_na) left_valid = left[~left_na].astype(object) From a8e865626a9edaa29aea4fb951e657df1ce0ac48 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 11 Aug 2018 11:36:12 -0700 Subject: [PATCH 02/10] Shared methods to simplify implementation --- pandas/core/arrays/integer.py | 62 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 993432d27c845..47b1b0cf2dca7 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -164,10 +164,14 @@ def coerce_to_array(values, dtype, mask=None, copy=False): values.dtype)) if mask is None: - mask = bitarray(isna(values).tolist()) + mask = isna(values) else: assert len(mask) == len(values) + # Work with bitarrays from here on out + if isinstance(mask, np.ndarray): + mask = _numpy_to_bitarray(mask) + if not values.ndim == 1: raise TypeError("values must be a 1D list-like") @@ -185,27 +189,32 @@ def coerce_to_array(values, dtype, mask=None, copy=False): # we copy as need to coerce here if mask.any(): - slice_mask = mask - if isinstance(mask, bitarray): - slice_mask = np.fromstring(mask.unpack(), dtype=bool) - else: - slice_mask = slice_mask.astype(np.bool_, copy=False) - values = values.copy() - values[slice_mask] = 1 + values[mask] = 1 values = safe_cast(values, dtype, copy=False) else: values = safe_cast(values, dtype, copy=False) - # Make sure we always coerce back to bitarray - if not isinstance(mask, bitarray): - _mask = mask - mask = bitarray() - mask.pack(_mask.tostring()) - return values, mask +def _numpy_to_bitarray(arr): + """ + Efficiently convert a NumPy array to a bitarray object. + """ + barr = bitarray() + barr.pack(arr.astype(bool, copy=False).tostring()) + + return barr + + +def _bitarray_to_numpy(arr): + """ + Efficiently convert a bitarray object to a NumPy array. + """ + return np.fromstring(arr.unpack(), dtype=bool) + + class IntegerArray(ExtensionArray, ExtensionOpsMixin): """ We represent an IntegerArray with 2 numpy arrays @@ -247,11 +256,8 @@ def __getitem__(self, item): return self.dtype.na_value return self._data[item] - arr = np.array(self._mask)[item] - mask = bitarray() - mask.pack(arr.tostring()) return type(self)(self._data[item], - mask=mask, + mask=_bitarray_to_numpy(self._mask)[item], dtype=self.dtype) def _coerce_to_ndarray(self): @@ -332,13 +338,10 @@ def __setitem__(self, key, value): mask = mask[0] self._data[key] = value - - # HACKish but unpack to ndarray then repack to leverage - # numpy slicing of arrays - arr = np.fromstring(self._mask.unpack(), dtype=bool) + # Coerce to numpy array to leverage advanced indexing, then corece back + arr = _bitarray_to_numpy(self._mask) arr[key] = mask - self._mask = bitarray() - self._mask.pack(arr.tostring()) + self._mask = _numpy_to_bitarray(arr) def __len__(self): return len(self._data) @@ -367,7 +370,7 @@ def nbytes(self): return self._data.nbytes + self._mask.buffer_info()[1] def isna(self): - return np.fromstring(self._mask.unpack(), dtype=bool) + return _bitarray_to_numpy(self._mask) @property def _na_value(self): @@ -465,8 +468,7 @@ def value_counts(self, dropna=True): # TODO(extension) # appending to an Index *always* infers # w/o passing the dtype - array = np.append(array, [ - np.fromstring(self._mask.unpack(), dtype=bool).sum()]) + array = np.append(array, [_bitarray_to_numpy(self._mask).sum()]) index = Index(np.concatenate( [index.values, np.array([np.nan], dtype=object)]), dtype=object) @@ -535,10 +537,8 @@ def _maybe_mask_result(self, result, mask, other, op_name): # may need to fill infs # and mask wraparound if is_float_dtype(result): - new_mask = bitarray() - new_mask.pack( - ((result == np.inf) | (result == -np.inf)).tostring()) - mask |= new_mask + arr = _numpy_to_bitarray((result == np.inf) | (result == -np.inf)) + mask |= arr # if we have a float operand we are by-definition # a float result From cae87e9df4d4c2a53b461c5487e786b1ab2a024f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 11 Aug 2018 11:44:26 -0700 Subject: [PATCH 03/10] Added try...except for non-ndarray Series _set_with --- pandas/core/series.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6da88c6eb7d65..03b768bd5bc47 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -971,7 +971,8 @@ def _set_with(self, key, value): except Exception: pass - if not isinstance(key, (list, Series, np.ndarray, Series)): + if not isinstance(key, ( + list, Series, np.ndarray, Series)): try: key = list(key) except Exception: @@ -988,7 +989,10 @@ def _set_with(self, key, value): else: return self._set_values(key, value) elif key_type == 'boolean': - self._set_values(np.array(key).astype(np.bool_), value) + try: + self._set_values(key.astype(np.bool_), value) + except AttributeError: + self._set_values(key, value) else: self._set_labels(key, value) From 65893ae93f696d75574edbcd038238be0c5605df Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 11 Aug 2018 11:49:17 -0700 Subject: [PATCH 04/10] Minor fixups --- pandas/core/arrays/integer.py | 2 +- pandas/core/series.py | 3 +-- pandas/util/testing.py | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 47b1b0cf2dca7..e9c7c83f68b59 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -338,7 +338,7 @@ def __setitem__(self, key, value): mask = mask[0] self._data[key] = value - # Coerce to numpy array to leverage advanced indexing, then corece back + # Coerce to numpy array to leverage advanced indexing, then coerce back arr = _bitarray_to_numpy(self._mask) arr[key] = mask self._mask = _numpy_to_bitarray(arr) diff --git a/pandas/core/series.py b/pandas/core/series.py index 03b768bd5bc47..9cb475e913816 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -971,8 +971,7 @@ def _set_with(self, key, value): except Exception: pass - if not isinstance(key, ( - list, Series, np.ndarray, Series)): + if not isinstance(key, (list, Series, np.ndarray, Series)): try: key = list(key) except Exception: diff --git a/pandas/util/testing.py b/pandas/util/testing.py index bedcb40bbaeda..9f1b37d24b09e 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1174,12 +1174,12 @@ def assert_extension_array_equal(left, right): left_na = left.isna() right_na = right.isna() - # HACK - probably need new method to wrap numpy_array_equal + # TODO - maybe generate dedicated method for bitarray comparison? if isinstance(left_na, bitarray): left_na = np.fromstring(left_na.unpack(), dtype=bool) if isinstance(right_na, bitarray): right_na = np.fromstring(right_na.unpack(), dtype=bool) - + assert_numpy_array_equal(left_na, right_na) left_valid = left[~left_na].astype(object) From e085674acce6ed0d0045b96e41a7ff7bd4e24846 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 1 Jan 2019 20:49:16 -0500 Subject: [PATCH 05/10] Reverted changes; created new module for mask --- pandas/core/arrays/_mask.py | 47 +++++++++++++++++++++++++++++++++++ pandas/core/arrays/integer.py | 22 ++++++---------- pandas/core/series.py | 5 +--- 3 files changed, 55 insertions(+), 19 deletions(-) create mode 100644 pandas/core/arrays/_mask.py diff --git a/pandas/core/arrays/_mask.py b/pandas/core/arrays/_mask.py new file mode 100644 index 0000000000000..a4a959202ebdf --- /dev/null +++ b/pandas/core/arrays/_mask.py @@ -0,0 +1,47 @@ +import numpy as np + + +class NAMask(): + """Generic class which can be used to represent missing data. + + Will use bitarray if available; otherwise will use numpy.""" + + def __init__(self, mask): + """ + Parameters + ---------- + mask : numpy array + Mask of missing values. + """ + + self._has_bitarray = False + try: + import bitarray + globals()['bitarray'] = bitarray + self._has_bitarray = True + self._data = self._numpy_to_bitarray(mask) + except (ImportError, ModuleNotFoundError): + self._data = mask.astype(bool, copy=False) + + def _numpy_to_bitarray(self, arr): + bit_arr = bitarray() + bit_arr.pack(arr.astype(bool, copy=False)) + + def _bitarray_to_numpy(self, arr): + return np.fromstring(arr.unpack(), dtype=bool) + + def __getitem__(self): + pass + + def __setitem__(self): + pass + + @property + def nbytes(self): + if self._has_bitarray: + return self._data.buffer_info()[1] + + return self._data.nbytes + + def sum(self): + pass diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index c05b1f5bd1e1a..6785a6671b039 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -3,7 +3,6 @@ import warnings import numpy as np -from bitarray import bitarray from pandas._libs import lib from pandas.compat import range, set_function_name, string_types @@ -189,12 +188,10 @@ def coerce_to_array(values, dtype, mask=None, copy=False): else: assert len(mask) == len(values) - # Work with bitarrays from here on out - if isinstance(mask, np.ndarray): - mask = _numpy_to_bitarray(mask) - if not values.ndim == 1: raise TypeError("values must be a 1D list-like") + if not mask.ndim == 1: + raise TypeError("mask must be a 1D list-like") # infer dtype if needed if dtype is None: @@ -299,8 +296,7 @@ def __getitem__(self, item): return self._data[item] return type(self)(self._data[item], - mask=_bitarray_to_numpy(self._mask)[item], - dtype=self.dtype) + mask=_bitarray_to_numpy(self._mask)[item]) def _coerce_to_ndarray(self): """ @@ -372,10 +368,7 @@ def __setitem__(self, key, value): mask = mask[0] self._data[key] = value - # Coerce to numpy array to leverage advanced indexing, then coerce back - arr = _bitarray_to_numpy(self._mask) - arr[key] = mask - self._mask = _numpy_to_bitarray(arr) + self._mask[key] = mask def __len__(self): return len(self._data) @@ -385,7 +378,7 @@ def nbytes(self): return self._data.nbytes + self._mask.buffer_info()[1] def isna(self): - return _bitarray_to_numpy(self._mask) + return self._mask @property def _na_value(self): @@ -482,7 +475,7 @@ def value_counts(self, dropna=True): # TODO(extension) # appending to an Index *always* infers # w/o passing the dtype - array = np.append(array, [_bitarray_to_numpy(self._mask).sum()]) + array = np.append(array, [self._mask.sum()]) index = Index(np.concatenate( [index.values, np.array([np.nan], dtype=object)]), dtype=object) @@ -585,8 +578,7 @@ def _maybe_mask_result(self, result, mask, other, op_name): # may need to fill infs # and mask wraparound if is_float_dtype(result): - arr = _numpy_to_bitarray((result == np.inf) | (result == -np.inf)) - mask |= arr + mask |= (result == np.inf) | (result == -np.inf) # if we have a float operand we are by-definition # a float result diff --git a/pandas/core/series.py b/pandas/core/series.py index 39e30a72172a7..50619d667bf0b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1020,10 +1020,7 @@ def _set_with(self, key, value): else: return self._set_values(key, value) elif key_type == 'boolean': - try: - self._set_values(key.astype(np.bool_), value) - except AttributeError: - self._set_values(key, value) + self._set_values(key, value) else: self._set_labels(key, value) From 36256a7185b9d5ec05fe990f80c6a98911aba4a5 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 1 Jan 2019 20:56:17 -0500 Subject: [PATCH 06/10] Reverted test from master --- pandas/util/testing.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index ebcbda0301006..3c902ce7dc0d8 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1195,20 +1195,9 @@ def assert_extension_array_equal(left, right, check_dtype=True, The remaining all-valid values are cast to object dtype and checked. """ assert isinstance(left, ExtensionArray), 'left is not an ExtensionArray' - assert left.dtype == right.dtype, 'right is not an ExtensionArray' + assert isinstance(right, ExtensionArray), 'right is not an ExtensionArray' if check_dtype: assert_attr_equal('dtype', left, right, obj='ExtensionArray') - - left_na = left.isna() - right_na = right.isna() - - # TODO - maybe generate dedicated method for bitarray comparison? - if isinstance(left_na, bitarray): - left_na = np.fromstring(left_na.unpack(), dtype=bool) - if isinstance(right_na, bitarray): - right_na = np.fromstring(right_na.unpack(), dtype=bool) - - assert_numpy_array_equal(left_na, right_na) left_na = np.asarray(left.isna()) right_na = np.asarray(right.isna()) From 384287e71e7003fabf912b6edb4a6930fa7d7b68 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 1 Jan 2019 21:56:36 -0500 Subject: [PATCH 07/10] Revert unnecessary changes from master --- pandas/core/arrays/_mask.py | 19 ++++++++++++++----- pandas/core/arrays/integer.py | 25 ++++--------------------- pandas/core/series.py | 2 +- 3 files changed, 19 insertions(+), 27 deletions(-) diff --git a/pandas/core/arrays/_mask.py b/pandas/core/arrays/_mask.py index a4a959202ebdf..d8233ba08e5c8 100644 --- a/pandas/core/arrays/_mask.py +++ b/pandas/core/arrays/_mask.py @@ -30,11 +30,17 @@ def _numpy_to_bitarray(self, arr): def _bitarray_to_numpy(self, arr): return np.fromstring(arr.unpack(), dtype=bool) - def __getitem__(self): - pass + def __getitem__(self, item): + if self._has_bitarray: + raise NotImplementedError + + return self._data[item] + + def __setitem__(self, key, value): + if self._has_bitarray: + raise NotImplementedError - def __setitem__(self): - pass + self._data[key] = value @property def nbytes(self): @@ -44,4 +50,7 @@ def nbytes(self): return self._data.nbytes def sum(self): - pass + if self._has_bitarray: + raise NotImplementedError + + return self._data.sum() diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 6785a6671b039..79ea7eba8cf3b 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -19,6 +19,7 @@ from pandas.core import nanops from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin +from pandas.core.arrays._mask import NAMask class _IntegerDtype(ExtensionDtype): @@ -213,23 +214,6 @@ def coerce_to_array(values, dtype, mask=None, copy=False): return values, mask -def _numpy_to_bitarray(arr): - """ - Efficiently convert a NumPy array to a bitarray object. - """ - barr = bitarray() - barr.pack(arr.astype(bool, copy=False).tostring()) - - return barr - - -def _bitarray_to_numpy(arr): - """ - Efficiently convert a bitarray object to a NumPy array. - """ - return np.fromstring(arr.unpack(), dtype=bool) - - class IntegerArray(ExtensionArray, ExtensionOpsMixin): """ Array of integer (optional missing) values. @@ -272,7 +256,7 @@ def __init__(self, values, mask, copy=False): mask = mask.copy() self._data = values - self._mask = mask + self._mask = NAMask(mask) @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): @@ -295,8 +279,7 @@ def __getitem__(self, item): return self.dtype.na_value return self._data[item] - return type(self)(self._data[item], - mask=_bitarray_to_numpy(self._mask)[item]) + return type(self)(self._data[item], self._mask[item]) def _coerce_to_ndarray(self): """ @@ -375,7 +358,7 @@ def __len__(self): @property def nbytes(self): - return self._data.nbytes + self._mask.buffer_info()[1] + return self._data.nbytes + self._mask.nbytes def isna(self): return self._mask diff --git a/pandas/core/series.py b/pandas/core/series.py index 50619d667bf0b..3637081e09f8c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1020,7 +1020,7 @@ def _set_with(self, key, value): else: return self._set_values(key, value) elif key_type == 'boolean': - self._set_values(key, value) + self._set_values(key.astype(np.bool_), value) else: self._set_labels(key, value) From 2b56c99fa0d0efa605a77a09385ef0f9fca62957 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 1 Jan 2019 22:27:52 -0500 Subject: [PATCH 08/10] Added iter to mask --- pandas/core/arrays/_mask.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/_mask.py b/pandas/core/arrays/_mask.py index d8233ba08e5c8..2defd28607e71 100644 --- a/pandas/core/arrays/_mask.py +++ b/pandas/core/arrays/_mask.py @@ -13,7 +13,7 @@ def __init__(self, mask): mask : numpy array Mask of missing values. """ - + self._has_bitarray = False try: import bitarray @@ -42,6 +42,10 @@ def __setitem__(self, key, value): self._data[key] = value + def __iter__(self): + for i in range(len(self._data)): + yield self._data[i] + @property def nbytes(self): if self._has_bitarray: @@ -49,6 +53,18 @@ def nbytes(self): return self._data.nbytes + def astype(self, dtype, copy=False): + if self._has_bitarray: + raise NotImplementedError + + return self._data.astype(dtype, copy=copy) + + def copy(self): + if self._has_bitarray: + raise NotImplementedError + + return self._data.copy() + def sum(self): if self._has_bitarray: raise NotImplementedError From 29abf0195d42e4c6ef11dd6825c6cae431713b46 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 3 Jan 2019 19:48:08 -0800 Subject: [PATCH 09/10] Added more dunders --- pandas/core/arrays/_mask.py | 24 ++++++++++++++++++++++++ pandas/core/arrays/integer.py | 3 ++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/_mask.py b/pandas/core/arrays/_mask.py index 2defd28607e71..81ea524989f2c 100644 --- a/pandas/core/arrays/_mask.py +++ b/pandas/core/arrays/_mask.py @@ -42,10 +42,34 @@ def __setitem__(self, key, value): self._data[key] = value + def __array__(self): + if self._has_bitarray: + raise NotImplementedError + + return self._data + def __iter__(self): for i in range(len(self._data)): yield self._data[i] + def __invert__(self): + if self._has_bitarray: + raise NotImplementedError + + return ~self._data + + def __or__(self, other): + if self._has_bitarray: + raise NotImplementedError + + return self._data.__or__(other) + + def __ior__(self, other): + if self._has_bitarray: + raise NotImplementedError + + return self._data | other + @property def nbytes(self): if self._has_bitarray: diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index f06b549afbbab..1daf96ab45489 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -248,7 +248,8 @@ def __init__(self, values, mask, copy=False): and is_integer_dtype(values.dtype)): raise TypeError("values should be integer numpy array. Use " "the 'integer_array' function instead") - if not (isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype)): + if not (isinstance(mask, NAMask) or ( + isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype))): raise TypeError("mask should be boolean numpy array. Use " "the 'integer_array' function instead") From 76162f50d415c8d2a8e6f38cd7731e3692d60a40 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 3 Jan 2019 20:14:15 -0800 Subject: [PATCH 10/10] More properties / methods --- pandas/core/arrays/_mask.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/_mask.py b/pandas/core/arrays/_mask.py index 81ea524989f2c..7b908f519be95 100644 --- a/pandas/core/arrays/_mask.py +++ b/pandas/core/arrays/_mask.py @@ -56,7 +56,7 @@ def __invert__(self): if self._has_bitarray: raise NotImplementedError - return ~self._data + return type(self)(~self._data) def __or__(self, other): if self._has_bitarray: @@ -77,17 +77,24 @@ def nbytes(self): return self._data.nbytes - def astype(self, dtype, copy=False): + @property + def size(self): if self._has_bitarray: raise NotImplementedError - return self._data.astype(dtype, copy=copy) + return self._data.size - def copy(self): + def astype(self, dtype, copy=False): if self._has_bitarray: raise NotImplementedError - return self._data.copy() + return self._data.astype(dtype, copy=copy) + + def any(self): + return self._data.any() + + def copy(self): + return type(self)(self._data.copy()) def sum(self): if self._has_bitarray: