From 1f87ddd781b0f1483dd89ee6d4f8613216e9c90d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 11 Sep 2018 14:12:20 -0500 Subject: [PATCH 1/7] TST: Arrow-backed BoolArray --- pandas/tests/extension/arrow/__init__.py | 0 pandas/tests/extension/arrow/bool.py | 99 +++++++++++++++++++++++ pandas/tests/extension/arrow/test_bool.py | 48 +++++++++++ 3 files changed, 147 insertions(+) create mode 100644 pandas/tests/extension/arrow/__init__.py create mode 100644 pandas/tests/extension/arrow/bool.py create mode 100644 pandas/tests/extension/arrow/test_bool.py diff --git a/pandas/tests/extension/arrow/__init__.py b/pandas/tests/extension/arrow/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/extension/arrow/bool.py b/pandas/tests/extension/arrow/bool.py new file mode 100644 index 0000000000000..5fc30536f9670 --- /dev/null +++ b/pandas/tests/extension/arrow/bool.py @@ -0,0 +1,99 @@ +import copy +import itertools + +import numpy as np +import pyarrow as pa +import pandas as pd +from pandas.api.extensions import ( + ExtensionDtype, ExtensionArray +) + + +# @register_extension_dtype +class ArrowBoolDtype(ExtensionDtype): + + type = np.bool_ + kind = 'b' + name = 'arrow_bool' + na_value = pa.NULL + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + raise TypeError("Cannot construct a '{}' from " + "'{}'".format(cls, string)) + + @classmethod + def construct_array_type(cls): + return ArrowBoolArray + + +class ArrowBoolArray(ExtensionArray): + def __init__(self, values): + if not isinstance(values, pa.ChunkedArray): + raise ValueError + + assert values.type == pa.bool_() + self._data = values + self._dtype = ArrowBoolDtype() + + def __repr__(self): + return "ArrowBoolArray({})".format(repr(self._data)) + + @classmethod + def from_scalars(cls, values): + arr = pa.chunked_array([pa.array(np.asarray(values))]) + return cls(arr) + + @classmethod + def from_array(cls, arr): + assert isinstance(arr, pa.Array) + return cls(pa.chunked_array([arr])) + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return cls.from_scalars(scalars) + + def __getitem__(self, item): + return self._data.to_pandas()[item] + + def __len__(self): + return len(self._data) + + @property + def dtype(self): + return self._dtype + + @property + def nbytes(self): + return sum(x.size for chunk in self._data.chunks + for x in chunk.buffers() + if x is not None) + + def isna(self): + return pd.isna(self._data.to_pandas()) + + def take(self, indices, allow_fill=False, fill_value=None): + from pandas.core.algorithms import take + data = self._data.to_pandas() + + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + result = take(data, indices, fill_value=fill_value, + allow_fill=allow_fill) + return self._from_sequence(result, dtype=self.dtype) + + def copy(self, deep=False): + if deep: + return copy.deepcopy(self._data) + else: + return copy.copy(self._data) + + def _concat_same_type(cls, to_concat): + chunks = list(itertools.chain.from_iterable(x._data.chunks + for x in to_concat)) + arr = pa.chunked_array(chunks) + return cls(arr) diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py new file mode 100644 index 0000000000000..defa0b0adcf14 --- /dev/null +++ b/pandas/tests/extension/arrow/test_bool.py @@ -0,0 +1,48 @@ +import numpy as np +import pytest +import pandas as pd +import pandas.util.testing as tm +from pandas.tests.extension import base + +pytest.importorskip('pyarrow') + +from .bool import ArrowBoolDtype, ArrowBoolArray + + +@pytest.fixture +def dtype(): + return ArrowBoolDtype() + + +@pytest.fixture +def data(): + return ArrowBoolArray.from_scalars(np.random.randint(0, 2, size=100, + dtype=bool)) + + +class BaseArrowTests(object): + pass + + +class TestDtype(BaseArrowTests, base.BaseDtypeTests): + def test_array_type_with_arg(self, data, dtype): + pytest.skip("GH-22666") + + +class TestInterface(BaseArrowTests, base.BaseInterfaceTests): + def test_repr(self, data): + raise pytest.skip("TODO") + + +class TestConstructors(BaseArrowTests, base.BaseConstructorsTests): + def test_from_dtype(self, data): + pytest.skip("GH-22666") + + +def test_is_bool_dtype(data): + assert pd.api.types.is_bool_dtype(data) + assert pd.core.common.is_bool_indexer(data) + s = pd.Series(range(len(data))) + result = s[data] + expected = s[np.asarray(data)] + tm.assert_series_equal(result, expected) From 47da6d39dedc00a47e1e12b8f9c96189e8be3d9f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 11 Sep 2018 14:12:38 -0500 Subject: [PATCH 2/7] BUG: EA-backed boolean indexers Closes https://github.com/pandas-dev/pandas/issues/22665 Closes https://github.com/pandas-dev/pandas/issues/22326 --- doc/source/whatsnew/v0.24.0.txt | 2 ++ pandas/core/common.py | 9 ++++++--- pandas/core/dtypes/common.py | 12 ++++++++++++ pandas/tests/arrays/categorical/test_indexing.py | 12 +++++++++++- 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3660c1e843f6c..8dbaf2dd64b7e 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -484,6 +484,7 @@ ExtensionType Changes - ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`) - ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`) +- An ``ExtensionArray`` with a boolean dtype now works correctly as a boolean indexer. :meth:`pandas.api.types.is_bool_dtype` now properly considers them boolean (:issue:`22326`) - Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`). - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`) - Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`) @@ -608,6 +609,7 @@ Categorical ^^^^^^^^^^^ - Bug in :meth:`Categorical.from_codes` where ``NaN`` values in ``codes`` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of ``.from_codes([1.1, 2.0])``. +- Bug when indexing with a boolean-valued ``Categorical``. Now categoricals are treated as a boolean mask (:issue:`22665`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/common.py b/pandas/core/common.py index a3fba762509f1..5d52724fd9619 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -15,7 +15,9 @@ from pandas import compat from pandas.compat import iteritems, PY36, OrderedDict from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCIndexClass -from pandas.core.dtypes.common import is_integer +from pandas.core.dtypes.common import ( + is_integer, is_bool_dtype, is_extension_array_dtype, is_array_like +) from pandas.core.dtypes.inference import _iterable_not_string from pandas.core.dtypes.missing import isna, isnull, notnull # noqa from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike @@ -100,7 +102,8 @@ def maybe_box_datetimelike(value): def is_bool_indexer(key): - if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)): + if (isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or + (is_array_like(key) and is_extension_array_dtype(key.dtype))): if key.dtype == np.object_: key = np.asarray(values_from_object(key)) @@ -110,7 +113,7 @@ def is_bool_indexer(key): 'NA / NaN values') return False return True - elif key.dtype == np.bool_: + elif is_bool_dtype(key.dtype): return True elif isinstance(key, list): try: diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index b8cbb41501dd1..9ab7ecb122008 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1608,6 +1608,8 @@ def is_bool_dtype(arr_or_dtype): False >>> is_bool_dtype(np.array([True, False])) True + >>> is_bool_dtype(pd.Categorical([True, False])) + True """ if arr_or_dtype is None: @@ -1618,6 +1620,13 @@ def is_bool_dtype(arr_or_dtype): # this isn't even a dtype return False + if isinstance(arr_or_dtype, (ABCCategorical, ABCCategoricalIndex)): + arr_or_dtype = arr_or_dtype.dtype + + if isinstance(arr_or_dtype, CategoricalDtype): + arr_or_dtype = arr_or_dtype.categories + # now we use the special definition for Index + if isinstance(arr_or_dtype, ABCIndexClass): # TODO(jreback) @@ -1626,6 +1635,9 @@ def is_bool_dtype(arr_or_dtype): # guess this return (arr_or_dtype.is_object and arr_or_dtype.inferred_type == 'boolean') + elif is_extension_array_dtype(arr_or_dtype): + dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) + return dtype.kind == 'b' return issubclass(tipo, np.bool_) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index b54ac2835bee3..5044b522a96f1 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -5,7 +5,8 @@ import numpy as np import pandas.util.testing as tm -from pandas import Categorical, Index, CategoricalIndex, PeriodIndex +from pandas import Categorical, Index, CategoricalIndex, PeriodIndex, Series +from pandas.core.common import is_bool_indexer from pandas.tests.arrays.categorical.common import TestCategorical @@ -121,3 +122,12 @@ def test_get_indexer_non_unique(self, idx_values, key_values, key_class): tm.assert_numpy_array_equal(expected, result) tm.assert_numpy_array_equal(exp_miss, res_miss) + + +def test_mask_with_boolean(): + s = Series(range(3)) + idx = CategoricalIndex([True, False, True]) + assert is_bool_indexer(idx) + result = s[idx] + expected = s[idx.astype('object')] + tm.assert_series_equal(result, expected) From 9d4eab6efac58c0764d000b3e94b200595e76ba0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 11 Sep 2018 15:49:50 -0500 Subject: [PATCH 3/7] lint and skip --- pandas/tests/arrays/categorical/test_indexing.py | 4 ++-- pandas/tests/extension/arrow/test_bool.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 5044b522a96f1..e172d09ffd067 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm from pandas import Categorical, Index, CategoricalIndex, PeriodIndex, Series -from pandas.core.common import is_bool_indexer +import pandas.core.common as com from pandas.tests.arrays.categorical.common import TestCategorical @@ -127,7 +127,7 @@ def test_get_indexer_non_unique(self, idx_values, key_values, key_class): def test_mask_with_boolean(): s = Series(range(3)) idx = CategoricalIndex([True, False, True]) - assert is_bool_indexer(idx) + assert com.is_bool_indexer(idx) result = s[idx] expected = s[idx.astype('object')] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index defa0b0adcf14..e1afedcade3ff 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -4,7 +4,7 @@ import pandas.util.testing as tm from pandas.tests.extension import base -pytest.importorskip('pyarrow') +pytest.importorskip('pyarrow', minversion="0.10.0") from .bool import ArrowBoolDtype, ArrowBoolArray From 35f05751a8ff209ff66aced7c8818b5bd827f57f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 12 Sep 2018 13:34:19 -0500 Subject: [PATCH 4/7] Handle NAs --- pandas/core/common.py | 31 +++++++++++++++++-- .../tests/arrays/categorical/test_indexing.py | 19 ++++++++++-- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 5d52724fd9619..e160c5ff5885e 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -102,6 +102,29 @@ def maybe_box_datetimelike(value): def is_bool_indexer(key): + # type: (Any) -> bool + """ + Check whether `key` is a valid boolean indexer. + + Parameters + ---------- + key : Any + Only list-likes may be considered boolean indexers. + All other types are not considered a boolean indexer. + For array-like input, boolean ndarrays or ExtensionArrays + with a boolean type are considered boolean indexers. + + Returns + ------- + bool + + Raises + ------ + ValueError + When the array is an object-dtype ndarray or ExtensionArray + and contains missing values. + """ + na_msg = 'cannot index with vector containing NA / NaN values' if (isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or (is_array_like(key) and is_extension_array_dtype(key.dtype))): if key.dtype == np.object_: @@ -109,11 +132,15 @@ def is_bool_indexer(key): if not lib.is_bool_array(key): if isna(key).any(): - raise ValueError('cannot index with vector containing ' - 'NA / NaN values') + raise ValueError(na_msg) return False return True elif is_bool_dtype(key.dtype): + # an ndarray with bool-dtype by definition has no missing values. + # So we only need to check for NAs in ExtensionArrays + if is_extension_array_dtype(key.dtype): + if np.any(key.isna()): + raise ValueError(na_msg) return True elif isinstance(key, list): try: diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index e172d09ffd067..8dfa9fa9ad416 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -124,10 +124,25 @@ def test_get_indexer_non_unique(self, idx_values, key_values, key_class): tm.assert_numpy_array_equal(exp_miss, res_miss) -def test_mask_with_boolean(): +@pytest.mark.parametrize("index", [True, False]) +def test_mask_with_boolean(index): s = Series(range(3)) - idx = CategoricalIndex([True, False, True]) + idx = Categorical([True, False, True]) + if index: + idx = CategoricalIndex(idx) + assert com.is_bool_indexer(idx) result = s[idx] expected = s[idx.astype('object')] tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("index", [True, False]) +def test_mask_with_boolean_raises(index): + s = Series(range(3)) + idx = Categorical([True, False, None]) + if index: + idx = CategoricalIndex(idx) + + with tm.assert_raises_regex(ValueError, 'NA / NaN'): + s[idx] \ No newline at end of file From 20b2add434ee26f233b2bfcb29810065eef8fe7e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 13 Sep 2018 06:38:40 -0500 Subject: [PATCH 5/7] Document --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/common.py | 2 +- pandas/core/dtypes/base.py | 3 +++ pandas/core/dtypes/common.py | 5 +++++ pandas/tests/arrays/categorical/test_indexing.py | 2 +- 5 files changed, 11 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 070f1c2614c44..994feedd90494 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -610,7 +610,7 @@ Categorical ^^^^^^^^^^^ - Bug in :meth:`Categorical.from_codes` where ``NaN`` values in ``codes`` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of ``.from_codes([1.1, 2.0])``. -- Bug when indexing with a boolean-valued ``Categorical``. Now categoricals are treated as a boolean mask (:issue:`22665`) +- Bug when indexing with a boolean-valued ``Categorical``. Now a boolean-valued ``Categorical`` is treated as a boolean mask (:issue:`22665`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/common.py b/pandas/core/common.py index ca9f3d20c052e..9271ef8bfb7b5 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -112,7 +112,7 @@ def is_bool_indexer(key): Only list-likes may be considered boolean indexers. All other types are not considered a boolean indexer. For array-like input, boolean ndarrays or ExtensionArrays - with a boolean type are considered boolean indexers. + with a boolean kind are considered boolean indexers. Returns ------- diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 7dcdf878231f1..c68800b7ab9b4 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -169,6 +169,9 @@ def kind(self): the extension type cannot be represented as a built-in NumPy type. + This affect whether the ExtensionArray can be used as a boolean + mask. ExtensionArrays with ``kind == 'b'`` can be boolean masks. + See Also -------- numpy.dtype.kind diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 9ab7ecb122008..5e61df81debe9 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1592,6 +1592,11 @@ def is_bool_dtype(arr_or_dtype): ------- boolean : Whether or not the array or dtype is of a boolean dtype. + Notes + ----- + An ExtensionArray is considered boolean when the ``.kind`` of the + dtype is ``'b'``. + Examples -------- >>> is_bool_dtype(str) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 8dfa9fa9ad416..d23da1565a952 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -145,4 +145,4 @@ def test_mask_with_boolean_raises(index): idx = CategoricalIndex(idx) with tm.assert_raises_regex(ValueError, 'NA / NaN'): - s[idx] \ No newline at end of file + s[idx] From b9c45bd8984b581954da51f43d8ddc1322a369e2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 17 Sep 2018 11:55:52 -0500 Subject: [PATCH 6/7] kind -> attribute --- pandas/core/common.py | 2 +- pandas/core/dtypes/base.py | 23 ++++++++++++++++++++--- pandas/core/dtypes/common.py | 6 +++--- pandas/core/dtypes/dtypes.py | 6 ++++++ pandas/tests/dtypes/test_dtypes.py | 14 +++++++++++++- pandas/tests/extension/arrow/bool.py | 3 +++ 6 files changed, 46 insertions(+), 8 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 9271ef8bfb7b5..e5ac2b366d8c8 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -112,7 +112,7 @@ def is_bool_indexer(key): Only list-likes may be considered boolean indexers. All other types are not considered a boolean indexer. For array-like input, boolean ndarrays or ExtensionArrays - with a boolean kind are considered boolean indexers. + with ``_is_boolean`` set are considered boolean indexers. Returns ------- diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index c68800b7ab9b4..a552251ebbafa 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -106,6 +106,25 @@ def _is_numeric(self): """ return False + @property + def _is_boolean(self): + # type: () -> bool + """ + Whether this dtype should be considered boolean. + + By default, ExtensionDtypes are assumed to be non-numeric. + Setting this to True will affect the behavior of several places, + e.g. + + * is_bool + * boolean indexing + + Returns + ------- + bool + """ + return False + class ExtensionDtype(_DtypeOpsMixin): """A custom data type, to be paired with an ExtensionArray. @@ -125,6 +144,7 @@ class ExtensionDtype(_DtypeOpsMixin): pandas operations * _is_numeric + * _is_boolean Optionally one can override construct_array_type for construction with the name of this dtype via the Registry. See @@ -169,9 +189,6 @@ def kind(self): the extension type cannot be represented as a built-in NumPy type. - This affect whether the ExtensionArray can be used as a boolean - mask. ExtensionArrays with ``kind == 'b'`` can be boolean masks. - See Also -------- numpy.dtype.kind diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 5e61df81debe9..253cbdeaf8e92 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1594,8 +1594,8 @@ def is_bool_dtype(arr_or_dtype): Notes ----- - An ExtensionArray is considered boolean when the ``.kind`` of the - dtype is ``'b'``. + An ExtensionArray is considered boolean when the ``_is_boolean`` + attribute is set to True. Examples -------- @@ -1642,7 +1642,7 @@ def is_bool_dtype(arr_or_dtype): arr_or_dtype.inferred_type == 'boolean') elif is_extension_array_dtype(arr_or_dtype): dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) - return dtype.kind == 'b' + return dtype._is_boolean return issubclass(tipo, np.bool_) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 4fd77e41a1c67..d879ded4f0f09 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -462,6 +462,12 @@ def ordered(self): """Whether the categories have an ordered relationship""" return self._ordered + @property + def _is_boolean(self): + from pandas.core.dtypes.common import is_bool_dtype + + return is_bool_dtype(self.categories) + class DatetimeTZDtypeType(type): """ diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 55c841ba1fc46..e3d14497a38f9 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -17,7 +17,7 @@ is_dtype_equal, is_datetime64_ns_dtype, is_datetime64_dtype, is_interval_dtype, is_datetime64_any_dtype, is_string_dtype, - _coerce_to_dtype) + _coerce_to_dtype, is_bool_dtype) import pandas.util.testing as tm @@ -126,6 +126,18 @@ def test_tuple_categories(self): result = CategoricalDtype(categories) assert all(result.categories == categories) + @pytest.mark.parametrize("categories, expected", [ + ([True, False], True), + ([True, False, None], True), + ([True, False, "a", "b'"], False), + ([0, 1], False), + ]) + def test_is_boolean(self, categories, expected): + cat = Categorical(categories) + assert cat.dtype._is_boolean is expected + assert is_bool_dtype(cat) is expected + assert is_bool_dtype(cat.dtype) is expected + class TestDatetimeTZDtype(Base): diff --git a/pandas/tests/extension/arrow/bool.py b/pandas/tests/extension/arrow/bool.py index 5fc30536f9670..fe98a81cdc36c 100644 --- a/pandas/tests/extension/arrow/bool.py +++ b/pandas/tests/extension/arrow/bool.py @@ -29,6 +29,9 @@ def construct_from_string(cls, string): def construct_array_type(cls): return ArrowBoolArray + def _is_boolean(self): + return True + class ArrowBoolArray(ExtensionArray): def __init__(self, values): From 4d095092f5fc03e984bc3d58c24a89259541b1e9 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 20 Sep 2018 06:18:01 -0500 Subject: [PATCH 7/7] update --- pandas/tests/extension/arrow/bool.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/arrow/bool.py b/pandas/tests/extension/arrow/bool.py index fe98a81cdc36c..a9da25cdd2755 100644 --- a/pandas/tests/extension/arrow/bool.py +++ b/pandas/tests/extension/arrow/bool.py @@ -1,3 +1,10 @@ +"""Rudimentary Apache Arrow-backed ExtensionArray. + +At the moment, just a boolean array / type is implemented. +Eventually, we'll want to parametrize the type and support +multiple dtypes. Not all methods are implemented yet, and the +current implementation is not efficient. +""" import copy import itertools @@ -5,11 +12,11 @@ import pyarrow as pa import pandas as pd from pandas.api.extensions import ( - ExtensionDtype, ExtensionArray + ExtensionDtype, ExtensionArray, take, register_extension_dtype ) -# @register_extension_dtype +@register_extension_dtype class ArrowBoolDtype(ExtensionDtype): type = np.bool_ @@ -79,7 +86,6 @@ def isna(self): return pd.isna(self._data.to_pandas()) def take(self, indices, allow_fill=False, fill_value=None): - from pandas.core.algorithms import take data = self._data.to_pandas() if allow_fill and fill_value is None: