Skip to content

Commit 1c49fb0

Browse files
committed
ENJH: create NumpyBoolArray and ArrawBoolArray subclasses for MaskArray
1 parent e6d8dd8 commit 1c49fb0

19 files changed

+484
-323
lines changed

pandas/core/arrays/_mask.py

-103
This file was deleted.

pandas/core/arrays/integer.py

+8-24
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,13 @@
1414
is_bool_dtype, is_float, is_float_dtype, is_integer, is_integer_dtype,
1515
is_list_like, is_object_dtype, is_scalar)
1616
from pandas.core.dtypes.dtypes import register_extension_dtype
17-
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
17+
from pandas.core.dtypes.generic import ABCIndexClass, ABCMaskArray, ABCSeries
1818
from pandas.core.dtypes.missing import isna, notna
1919

2020
from pandas.core import nanops
2121
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
22-
<<<<<<< HEAD
22+
from pandas.core.arrays.mask import get_mask_array_type
2323
from pandas.core.tools.numeric import to_numeric
24-
=======
25-
from pandas.core.arrays._mask import NAMask
26-
>>>>>>> 384287e71... Revert unnecessary changes from master
2724

2825

2926
class _IntegerDtype(ExtensionDtype):
@@ -291,8 +288,8 @@ def __init__(self, values, mask, copy=False):
291288
and is_integer_dtype(values.dtype)):
292289
raise TypeError("values should be integer numpy array. Use "
293290
"the 'integer_array' function instead")
294-
if not (isinstance(mask, NAMask) or (
295-
isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype))):
291+
if not (isinstance(mask, (np.ndarray, ABCMaskArray)) and
292+
is_bool_dtype(mask.dtype)):
296293
raise TypeError("mask should be boolean numpy array. Use "
297294
"the 'integer_array' function instead")
298295

@@ -301,7 +298,7 @@ def __init__(self, values, mask, copy=False):
301298
mask = mask.copy()
302299

303300
self._data = values
304-
self._mask = NAMask(mask)
301+
self._mask = get_mask_array_type()._from_sequence(mask, copy=False)
305302

306303
@classmethod
307304
def _from_sequence(cls, scalars, dtype=None, copy=False):
@@ -328,22 +325,7 @@ def __getitem__(self, item):
328325
if self._mask[item]:
329326
return self.dtype.na_value
330327
return self._data[item]
331-
<<<<<<< HEAD
332328
return type(self)(self._data[item], self._mask[item])
333-
=======
334-
335-
<<<<<<< HEAD
336-
return type(self)(self._data[item],
337-
<<<<<<< HEAD
338-
mask=_bitarray_to_numpy(self._mask)[item],
339-
dtype=self.dtype)
340-
>>>>>>> 2ff4b0907... First pass at implementation (needs refactor)
341-
=======
342-
mask=_bitarray_to_numpy(self._mask)[item])
343-
>>>>>>> e085674ac... Reverted changes; created new module for mask
344-
=======
345-
return type(self)(self._data[item], self._mask[item])
346-
>>>>>>> 384287e71... Revert unnecessary changes from master
347329

348330
def _coerce_to_ndarray(self):
349331
"""
@@ -352,7 +334,8 @@ def _coerce_to_ndarray(self):
352334

353335
# TODO(jreback) make this better
354336
data = self._data.astype(object)
355-
data[self._mask] = self._na_value
337+
mask = np.array(self._mask, copy=False)
338+
data[mask] = self._na_value
356339
return data
357340

358341
__array_priority__ = 1000 # higher than ndarray so ops dispatch to us
@@ -581,6 +564,7 @@ def cmp_method(self, other):
581564
else:
582565
mask = self._mask | mask
583566

567+
mask = np.array(mask, copy=False)
584568
result[mask] = op_name == 'ne'
585569
return result
586570

pandas/core/arrays/mask/__init__.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
_MaskArrayType = None
2+
3+
4+
def get_mask_array_type():
5+
"""Set the mask array type to use, we need to do
6+
this after all modules are imported as the implementations
7+
e.g. pyarrow depend on pandas being importable
8+
"""
9+
global _MaskArrayType
10+
11+
if _MaskArrayType is not None:
12+
return _MaskArrayType
13+
14+
# if ArrowBoolArray is available use it
15+
# otherwise use the NumpyMask
16+
try:
17+
from pandas.core.arrays.mask._pyarrow import ArrowBoolArray
18+
19+
MaskArray = ArrowBoolArray
20+
21+
except ImportError:
22+
from pandas.core.arrays.mask._numpy import NumpyBoolArray
23+
24+
MaskArray = NumpyBoolArray
25+
26+
_MaskArrayType = MaskArray
27+
return _MaskArrayType
28+
29+
30+
__all__ = ['get_mask_array_type']

pandas/core/arrays/mask/_base.py

+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
"""A boolean mask interace.
2+
3+
This module provides an interface to a numpy / pyarrow boolean mask.
4+
This is limited as not all of the implementations can hold NA, so
5+
for consistency this is an internal.
6+
"""
7+
8+
import copy
9+
10+
import numpy as np
11+
12+
from pandas import compat
13+
from pandas.api.extensions import ExtensionDtype
14+
from pandas.core.arrays.base import ExtensionArray
15+
from pandas.core.missing import isna
16+
17+
18+
class BoolDtype(ExtensionDtype):
19+
20+
type = np.bool_
21+
kind = 'b'
22+
name = 'bool'
23+
24+
@classmethod
25+
def construct_from_string(cls, string):
26+
if string == cls.name:
27+
return cls()
28+
else:
29+
raise TypeError("Cannot construct a '{}' from "
30+
"'{}'".format(cls, string))
31+
32+
def _is_boolean(self):
33+
return True
34+
35+
def __hash__(self):
36+
return hash(str(self))
37+
38+
def __eq__(self, other):
39+
# compare == to np.dtype('bool')
40+
if isinstance(other, compat.string_types):
41+
return other == self.name
42+
elif other is self:
43+
return True
44+
elif isinstance(other, np.dtype):
45+
return other == 'bool'
46+
else:
47+
return hash(self) == hash(other)
48+
49+
50+
class BoolArray(ExtensionArray):
51+
"""Common baseclass for both pyarrow and numpy masked arrays"""
52+
_typ = "maskarray"
53+
54+
@classmethod
55+
def _from_sequence(cls, scalars, dtype=None, copy=False):
56+
return cls.from_scalars(scalars)
57+
58+
@property
59+
def dtype(self):
60+
return self._dtype
61+
62+
@property
63+
def size(self):
64+
return len(self)
65+
66+
def __eq__(self, other):
67+
return np.array(self, copy=False) == np.array(other, copy=False)
68+
69+
def __len__(self):
70+
return len(self._data)
71+
72+
def isna(self):
73+
nas = isna(np.array(self._data, copy=False))
74+
return type(self).from_scalars(nas)
75+
76+
def __invert__(self):
77+
return type(self).from_scalars(
78+
~np.array(self._data, copy=False)
79+
)
80+
81+
def __or__(self, other):
82+
return type(self).from_scalars(np.array(
83+
self, copy=False).__or__(np.array(other, copy=False)))
84+
85+
def __ior__(self, other):
86+
return type(self).from_scalars(
87+
np.array(self, copy=False) | np.array(other, copy=False))
88+
89+
def __and__(self, other):
90+
return type(self).from_scalars(
91+
np.array(self, copy=False).__and__(np.array(other, copy=False)))
92+
93+
def __iand__(self, other):
94+
return type(self).from_scalars(
95+
np.array(self, copy=False) & (np.array(other, copy=False)))
96+
97+
def view(self, dtype=None):
98+
arr = np.array(self._data, copy=False)
99+
if dtype is not None:
100+
arr = arr.view(dtype=dtype)
101+
return arr
102+
103+
def sum(self, axis=None):
104+
return np.array(self, copy=False).sum()
105+
106+
def copy(self, deep=False):
107+
if deep:
108+
return type(self)(copy.deepcopy(self._data))
109+
else:
110+
return type(self)(copy.copy(self._data))
111+
112+
def any(self, axis=0, out=None):
113+
return np.array(self._data, copy=False).any()
114+
115+
def all(self, axis=0, out=None):
116+
return np.array(self._data, copy=False).all()
117+
118+
def _reduce(self, method, skipna=True, **kwargs):
119+
if skipna:
120+
arr = self[~self.isna()]
121+
else:
122+
arr = self
123+
124+
try:
125+
op = getattr(arr, method)
126+
except AttributeError:
127+
raise TypeError
128+
return op(**kwargs)

0 commit comments

Comments
 (0)