Skip to content

Commit 7b4b60f

Browse files
WillAydjreback
authored andcommitted
First pass at implementation (needs refactor)
Shared methods to simplify implementation Added try...except for non-ndarray Series _set_with Minor fixups Reverted changes; created new module for mask Reverted test from master Revert unnecessary changes from master Added iter to mask Added more dunders More properties / methods
1 parent 61edc76 commit 7b4b60f

File tree

4 files changed

+161
-2
lines changed

4 files changed

+161
-2
lines changed

pandas/core/arrays/_mask.py

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import numpy as np
2+
3+
4+
class NAMask():
5+
"""Generic class which can be used to represent missing data.
6+
7+
Will use bitarray if available; otherwise will use numpy."""
8+
9+
def __init__(self, mask):
10+
"""
11+
Parameters
12+
----------
13+
mask : numpy array
14+
Mask of missing values.
15+
"""
16+
17+
self._has_bitarray = False
18+
try:
19+
import bitarray
20+
globals()['bitarray'] = bitarray
21+
self._has_bitarray = True
22+
self._data = self._numpy_to_bitarray(mask)
23+
except (ImportError, ModuleNotFoundError):
24+
self._data = mask.astype(bool, copy=False)
25+
26+
def _numpy_to_bitarray(self, arr):
27+
bit_arr = bitarray()
28+
bit_arr.pack(arr.astype(bool, copy=False))
29+
30+
def _bitarray_to_numpy(self, arr):
31+
return np.fromstring(arr.unpack(), dtype=bool)
32+
33+
def __getitem__(self, item):
34+
if self._has_bitarray:
35+
raise NotImplementedError
36+
37+
return self._data[item]
38+
39+
def __setitem__(self, key, value):
40+
if self._has_bitarray:
41+
raise NotImplementedError
42+
43+
self._data[key] = value
44+
45+
def __array__(self):
46+
if self._has_bitarray:
47+
raise NotImplementedError
48+
49+
return self._data
50+
51+
def __iter__(self):
52+
for i in range(len(self._data)):
53+
yield self._data[i]
54+
55+
def __invert__(self):
56+
if self._has_bitarray:
57+
raise NotImplementedError
58+
59+
return type(self)(~self._data)
60+
61+
def __or__(self, other):
62+
if self._has_bitarray:
63+
raise NotImplementedError
64+
65+
return self._data.__or__(other)
66+
67+
def __ior__(self, other):
68+
if self._has_bitarray:
69+
raise NotImplementedError
70+
71+
return self._data | other
72+
73+
@property
74+
def nbytes(self):
75+
if self._has_bitarray:
76+
return self._data.buffer_info()[1]
77+
78+
return self._data.nbytes
79+
80+
@property
81+
def size(self):
82+
if self._has_bitarray:
83+
raise NotImplementedError
84+
85+
return self._data.size
86+
87+
def astype(self, dtype, copy=False):
88+
if self._has_bitarray:
89+
raise NotImplementedError
90+
91+
return self._data.astype(dtype, copy=copy)
92+
93+
def any(self):
94+
return self._data.any()
95+
96+
def copy(self):
97+
return type(self)(self._data.copy())
98+
99+
def sum(self):
100+
if self._has_bitarray:
101+
raise NotImplementedError
102+
103+
return self._data.sum()

pandas/core/arrays/integer.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@
1919

2020
from pandas.core import nanops
2121
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
22+
<<<<<<< HEAD
2223
from pandas.core.tools.numeric import to_numeric
24+
=======
25+
from pandas.core.arrays._mask import NAMask
26+
>>>>>>> 384287e71... Revert unnecessary changes from master
2327

2428

2529
class _IntegerDtype(ExtensionDtype):
@@ -287,7 +291,8 @@ def __init__(self, values, mask, copy=False):
287291
and is_integer_dtype(values.dtype)):
288292
raise TypeError("values should be integer numpy array. Use "
289293
"the 'integer_array' function instead")
290-
if not (isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype)):
294+
if not (isinstance(mask, NAMask) or (
295+
isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype))):
291296
raise TypeError("mask should be boolean numpy array. Use "
292297
"the 'integer_array' function instead")
293298

@@ -296,7 +301,7 @@ def __init__(self, values, mask, copy=False):
296301
mask = mask.copy()
297302

298303
self._data = values
299-
self._mask = mask
304+
self._mask = NAMask(mask)
300305

301306
@classmethod
302307
def _from_sequence(cls, scalars, dtype=None, copy=False):
@@ -323,7 +328,22 @@ def __getitem__(self, item):
323328
if self._mask[item]:
324329
return self.dtype.na_value
325330
return self._data[item]
331+
<<<<<<< HEAD
326332
return type(self)(self._data[item], self._mask[item])
333+
=======
334+
335+
<<<<<<< HEAD
336+
return type(self)(self._data[item],
337+
<<<<<<< HEAD
338+
mask=_bitarray_to_numpy(self._mask)[item],
339+
dtype=self.dtype)
340+
>>>>>>> 2ff4b0907... First pass at implementation (needs refactor)
341+
=======
342+
mask=_bitarray_to_numpy(self._mask)[item])
343+
>>>>>>> e085674ac... Reverted changes; created new module for mask
344+
=======
345+
return type(self)(self._data[item], self._mask[item])
346+
>>>>>>> 384287e71... Revert unnecessary changes from master
327347

328348
def _coerce_to_ndarray(self):
329349
"""

pandas/core/series.py

+9
Original file line numberDiff line numberDiff line change
@@ -1062,9 +1062,18 @@ def _set_with(self, key, value):
10621062
except Exception:
10631063
pass
10641064

1065+
<<<<<<< HEAD
1066+
<<<<<<< HEAD
10651067
if is_scalar(key):
10661068
key = [key]
10671069
elif not isinstance(key, (list, Series, np.ndarray)):
1070+
=======
1071+
if not isinstance(key, (
1072+
list, Series, np.ndarray, Series)):
1073+
>>>>>>> cae87e9df... Added try...except for non-ndarray Series _set_with
1074+
=======
1075+
if not isinstance(key, (list, Series, np.ndarray, Series)):
1076+
>>>>>>> 65893ae93... Minor fixups
10681077
try:
10691078
key = list(key)
10701079
except Exception:

pandas/util/testing.py

+27
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@
1515
import warnings
1616
import zipfile
1717

18+
<<<<<<< HEAD
19+
=======
20+
from datetime import datetime
21+
from functools import wraps
22+
from contextlib import contextmanager
23+
24+
from bitarray import bitarray
25+
from numpy.random import randn, rand
26+
>>>>>>> 2ff4b0907... First pass at implementation (needs refactor)
1827
import numpy as np
1928
from numpy.random import rand, randn
2029

@@ -938,15 +947,33 @@ def assert_extension_array_equal(left, right, check_dtype=True,
938947
A mask of missing values is computed for each and checked to match.
939948
The remaining all-valid values are cast to object dtype and checked.
940949
"""
950+
<<<<<<< HEAD
941951
assert isinstance(left, ExtensionArray), 'left is not an ExtensionArray'
942952
assert isinstance(right, ExtensionArray), 'right is not an ExtensionArray'
943953
if check_dtype:
944954
assert_attr_equal('dtype', left, right, obj='ExtensionArray')
955+
<<<<<<< HEAD
956+
=======
957+
assert isinstance(left, ExtensionArray)
958+
assert left.dtype == right.dtype
959+
left_na = left.isna()
960+
right_na = right.isna()
961+
962+
# TODO - maybe generate dedicated method for bitarray comparison?
963+
if isinstance(left_na, bitarray):
964+
left_na = np.fromstring(left_na.unpack(), dtype=bool)
965+
if isinstance(right_na, bitarray):
966+
right_na = np.fromstring(right_na.unpack(), dtype=bool)
967+
968+
assert_numpy_array_equal(left_na, right_na)
969+
>>>>>>> 2ff4b0907... First pass at implementation (needs refactor)
945970

946971
if hasattr(left, "asi8") and type(right) == type(left):
947972
# Avoid slow object-dtype comparisons
948973
assert_numpy_array_equal(left.asi8, right.asi8)
949974
return
975+
=======
976+
>>>>>>> 36256a718... Reverted test from master
950977

951978
left_na = np.asarray(left.isna())
952979
right_na = np.asarray(right.isna())

0 commit comments

Comments
 (0)