Skip to content

Commit 1f87ddd

Browse files
committed
TST: Arrow-backed BoolArray
1 parent 73dd6ec commit 1f87ddd

File tree

3 files changed

+147
-0
lines changed

3 files changed

+147
-0
lines changed

pandas/tests/extension/arrow/__init__.py

Whitespace-only changes.

pandas/tests/extension/arrow/bool.py

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import copy
2+
import itertools
3+
4+
import numpy as np
5+
import pyarrow as pa
6+
import pandas as pd
7+
from pandas.api.extensions import (
8+
ExtensionDtype, ExtensionArray
9+
)
10+
11+
12+
# @register_extension_dtype
13+
class ArrowBoolDtype(ExtensionDtype):
14+
15+
type = np.bool_
16+
kind = 'b'
17+
name = 'arrow_bool'
18+
na_value = pa.NULL
19+
20+
@classmethod
21+
def construct_from_string(cls, string):
22+
if string == cls.name:
23+
return cls()
24+
else:
25+
raise TypeError("Cannot construct a '{}' from "
26+
"'{}'".format(cls, string))
27+
28+
@classmethod
29+
def construct_array_type(cls):
30+
return ArrowBoolArray
31+
32+
33+
class ArrowBoolArray(ExtensionArray):
34+
def __init__(self, values):
35+
if not isinstance(values, pa.ChunkedArray):
36+
raise ValueError
37+
38+
assert values.type == pa.bool_()
39+
self._data = values
40+
self._dtype = ArrowBoolDtype()
41+
42+
def __repr__(self):
43+
return "ArrowBoolArray({})".format(repr(self._data))
44+
45+
@classmethod
46+
def from_scalars(cls, values):
47+
arr = pa.chunked_array([pa.array(np.asarray(values))])
48+
return cls(arr)
49+
50+
@classmethod
51+
def from_array(cls, arr):
52+
assert isinstance(arr, pa.Array)
53+
return cls(pa.chunked_array([arr]))
54+
55+
@classmethod
56+
def _from_sequence(cls, scalars, dtype=None, copy=False):
57+
return cls.from_scalars(scalars)
58+
59+
def __getitem__(self, item):
60+
return self._data.to_pandas()[item]
61+
62+
def __len__(self):
63+
return len(self._data)
64+
65+
@property
66+
def dtype(self):
67+
return self._dtype
68+
69+
@property
70+
def nbytes(self):
71+
return sum(x.size for chunk in self._data.chunks
72+
for x in chunk.buffers()
73+
if x is not None)
74+
75+
def isna(self):
76+
return pd.isna(self._data.to_pandas())
77+
78+
def take(self, indices, allow_fill=False, fill_value=None):
79+
from pandas.core.algorithms import take
80+
data = self._data.to_pandas()
81+
82+
if allow_fill and fill_value is None:
83+
fill_value = self.dtype.na_value
84+
85+
result = take(data, indices, fill_value=fill_value,
86+
allow_fill=allow_fill)
87+
return self._from_sequence(result, dtype=self.dtype)
88+
89+
def copy(self, deep=False):
90+
if deep:
91+
return copy.deepcopy(self._data)
92+
else:
93+
return copy.copy(self._data)
94+
95+
def _concat_same_type(cls, to_concat):
96+
chunks = list(itertools.chain.from_iterable(x._data.chunks
97+
for x in to_concat))
98+
arr = pa.chunked_array(chunks)
99+
return cls(arr)
+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import numpy as np
2+
import pytest
3+
import pandas as pd
4+
import pandas.util.testing as tm
5+
from pandas.tests.extension import base
6+
7+
pytest.importorskip('pyarrow')
8+
9+
from .bool import ArrowBoolDtype, ArrowBoolArray
10+
11+
12+
@pytest.fixture
13+
def dtype():
14+
return ArrowBoolDtype()
15+
16+
17+
@pytest.fixture
18+
def data():
19+
return ArrowBoolArray.from_scalars(np.random.randint(0, 2, size=100,
20+
dtype=bool))
21+
22+
23+
class BaseArrowTests(object):
24+
pass
25+
26+
27+
class TestDtype(BaseArrowTests, base.BaseDtypeTests):
28+
def test_array_type_with_arg(self, data, dtype):
29+
pytest.skip("GH-22666")
30+
31+
32+
class TestInterface(BaseArrowTests, base.BaseInterfaceTests):
33+
def test_repr(self, data):
34+
raise pytest.skip("TODO")
35+
36+
37+
class TestConstructors(BaseArrowTests, base.BaseConstructorsTests):
38+
def test_from_dtype(self, data):
39+
pytest.skip("GH-22666")
40+
41+
42+
def test_is_bool_dtype(data):
43+
assert pd.api.types.is_bool_dtype(data)
44+
assert pd.core.common.is_bool_indexer(data)
45+
s = pd.Series(range(len(data)))
46+
result = s[data]
47+
expected = s[np.asarray(data)]
48+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)