Skip to content

Commit e6d8dd8

Browse files
WillAydjreback
authored andcommitted
First pass at implementation (needs refactor)
Shared methods to simplify implementation Added try...except for non-ndarray Series _set_with Minor fixups Reverted changes; created new module for mask Reverted test from master Revert unnecessary changes from master Added iter to mask Added more dunders More properties / methods
1 parent 5557e36 commit e6d8dd8

File tree

4 files changed

+161
-2
lines changed

4 files changed

+161
-2
lines changed

pandas/core/arrays/_mask.py

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import numpy as np
2+
3+
4+
class NAMask():
5+
"""Generic class which can be used to represent missing data.
6+
7+
Will use bitarray if available; otherwise will use numpy."""
8+
9+
def __init__(self, mask):
10+
"""
11+
Parameters
12+
----------
13+
mask : numpy array
14+
Mask of missing values.
15+
"""
16+
17+
self._has_bitarray = False
18+
try:
19+
import bitarray
20+
globals()['bitarray'] = bitarray
21+
self._has_bitarray = True
22+
self._data = self._numpy_to_bitarray(mask)
23+
except (ImportError, ModuleNotFoundError):
24+
self._data = mask.astype(bool, copy=False)
25+
26+
def _numpy_to_bitarray(self, arr):
27+
bit_arr = bitarray()
28+
bit_arr.pack(arr.astype(bool, copy=False))
29+
30+
def _bitarray_to_numpy(self, arr):
31+
return np.fromstring(arr.unpack(), dtype=bool)
32+
33+
def __getitem__(self, item):
34+
if self._has_bitarray:
35+
raise NotImplementedError
36+
37+
return self._data[item]
38+
39+
def __setitem__(self, key, value):
40+
if self._has_bitarray:
41+
raise NotImplementedError
42+
43+
self._data[key] = value
44+
45+
def __array__(self):
46+
if self._has_bitarray:
47+
raise NotImplementedError
48+
49+
return self._data
50+
51+
def __iter__(self):
52+
for i in range(len(self._data)):
53+
yield self._data[i]
54+
55+
def __invert__(self):
56+
if self._has_bitarray:
57+
raise NotImplementedError
58+
59+
return type(self)(~self._data)
60+
61+
def __or__(self, other):
62+
if self._has_bitarray:
63+
raise NotImplementedError
64+
65+
return self._data.__or__(other)
66+
67+
def __ior__(self, other):
68+
if self._has_bitarray:
69+
raise NotImplementedError
70+
71+
return self._data | other
72+
73+
@property
74+
def nbytes(self):
75+
if self._has_bitarray:
76+
return self._data.buffer_info()[1]
77+
78+
return self._data.nbytes
79+
80+
@property
81+
def size(self):
82+
if self._has_bitarray:
83+
raise NotImplementedError
84+
85+
return self._data.size
86+
87+
def astype(self, dtype, copy=False):
88+
if self._has_bitarray:
89+
raise NotImplementedError
90+
91+
return self._data.astype(dtype, copy=copy)
92+
93+
def any(self):
94+
return self._data.any()
95+
96+
def copy(self):
97+
return type(self)(self._data.copy())
98+
99+
def sum(self):
100+
if self._has_bitarray:
101+
raise NotImplementedError
102+
103+
return self._data.sum()

pandas/core/arrays/integer.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@
1919

2020
from pandas.core import nanops
2121
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
22+
<<<<<<< HEAD
2223
from pandas.core.tools.numeric import to_numeric
24+
=======
25+
from pandas.core.arrays._mask import NAMask
26+
>>>>>>> 384287e71... Revert unnecessary changes from master
2327

2428

2529
class _IntegerDtype(ExtensionDtype):
@@ -287,7 +291,8 @@ def __init__(self, values, mask, copy=False):
287291
and is_integer_dtype(values.dtype)):
288292
raise TypeError("values should be integer numpy array. Use "
289293
"the 'integer_array' function instead")
290-
if not (isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype)):
294+
if not (isinstance(mask, NAMask) or (
295+
isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype))):
291296
raise TypeError("mask should be boolean numpy array. Use "
292297
"the 'integer_array' function instead")
293298

@@ -296,7 +301,7 @@ def __init__(self, values, mask, copy=False):
296301
mask = mask.copy()
297302

298303
self._data = values
299-
self._mask = mask
304+
self._mask = NAMask(mask)
300305

301306
@classmethod
302307
def _from_sequence(cls, scalars, dtype=None, copy=False):
@@ -323,7 +328,22 @@ def __getitem__(self, item):
323328
if self._mask[item]:
324329
return self.dtype.na_value
325330
return self._data[item]
331+
<<<<<<< HEAD
326332
return type(self)(self._data[item], self._mask[item])
333+
=======
334+
335+
<<<<<<< HEAD
336+
return type(self)(self._data[item],
337+
<<<<<<< HEAD
338+
mask=_bitarray_to_numpy(self._mask)[item],
339+
dtype=self.dtype)
340+
>>>>>>> 2ff4b0907... First pass at implementation (needs refactor)
341+
=======
342+
mask=_bitarray_to_numpy(self._mask)[item])
343+
>>>>>>> e085674ac... Reverted changes; created new module for mask
344+
=======
345+
return type(self)(self._data[item], self._mask[item])
346+
>>>>>>> 384287e71... Revert unnecessary changes from master
327347

328348
def _coerce_to_ndarray(self):
329349
"""

pandas/core/series.py

+9
Original file line numberDiff line numberDiff line change
@@ -1067,9 +1067,18 @@ def _set_with(self, key, value):
10671067
except Exception:
10681068
pass
10691069

1070+
<<<<<<< HEAD
1071+
<<<<<<< HEAD
10701072
if is_scalar(key):
10711073
key = [key]
10721074
elif not isinstance(key, (list, Series, np.ndarray)):
1075+
=======
1076+
if not isinstance(key, (
1077+
list, Series, np.ndarray, Series)):
1078+
>>>>>>> cae87e9df... Added try...except for non-ndarray Series _set_with
1079+
=======
1080+
if not isinstance(key, (list, Series, np.ndarray, Series)):
1081+
>>>>>>> 65893ae93... Minor fixups
10731082
try:
10741083
key = list(key)
10751084
except Exception:

pandas/util/testing.py

+27
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@
1515
import traceback
1616
import warnings
1717

18+
<<<<<<< HEAD
19+
=======
20+
from datetime import datetime
21+
from functools import wraps
22+
from contextlib import contextmanager
23+
24+
from bitarray import bitarray
25+
from numpy.random import randn, rand
26+
>>>>>>> 2ff4b0907... First pass at implementation (needs refactor)
1827
import numpy as np
1928
from numpy.random import rand, randn
2029

@@ -1195,15 +1204,33 @@ def assert_extension_array_equal(left, right, check_dtype=True,
11951204
A mask of missing values is computed for each and checked to match.
11961205
The remaining all-valid values are cast to object dtype and checked.
11971206
"""
1207+
<<<<<<< HEAD
11981208
assert isinstance(left, ExtensionArray), 'left is not an ExtensionArray'
11991209
assert isinstance(right, ExtensionArray), 'right is not an ExtensionArray'
12001210
if check_dtype:
12011211
assert_attr_equal('dtype', left, right, obj='ExtensionArray')
1212+
<<<<<<< HEAD
1213+
=======
1214+
assert isinstance(left, ExtensionArray)
1215+
assert left.dtype == right.dtype
1216+
left_na = left.isna()
1217+
right_na = right.isna()
1218+
1219+
# TODO - maybe generate dedicated method for bitarray comparison?
1220+
if isinstance(left_na, bitarray):
1221+
left_na = np.fromstring(left_na.unpack(), dtype=bool)
1222+
if isinstance(right_na, bitarray):
1223+
right_na = np.fromstring(right_na.unpack(), dtype=bool)
1224+
1225+
assert_numpy_array_equal(left_na, right_na)
1226+
>>>>>>> 2ff4b0907... First pass at implementation (needs refactor)
12021227

12031228
if hasattr(left, "asi8") and type(right) == type(left):
12041229
# Avoid slow object-dtype comparisons
12051230
assert_numpy_array_equal(left.asi8, right.asi8)
12061231
return
1232+
=======
1233+
>>>>>>> 36256a718... Reverted test from master
12071234

12081235
left_na = np.asarray(left.isna())
12091236
right_na = np.asarray(right.isna())

0 commit comments

Comments
 (0)