diff --git a/ci/deps/actions-310-minimum_versions.yaml b/ci/deps/actions-310-minimum_versions.yaml index c7c72828db481..20e551cce689b 100644 --- a/ci/deps/actions-310-minimum_versions.yaml +++ b/ci/deps/actions-310-minimum_versions.yaml @@ -60,6 +60,7 @@ dependencies: - zstandard=0.19.0 - pip: + - git+https://github.com/WillAyd/pandas-bitmask.git - adbc-driver-postgresql==0.10.0 - adbc-driver-sqlite==0.8.0 - tzdata==2022.7 diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index 74cab4e0970dc..6cd695bfbc8b5 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -58,6 +58,7 @@ dependencies: - zstandard>=0.19.0 - pip: + - git+https://github.com/WillAyd/pandas-bitmask.git - adbc-driver-postgresql>=0.10.0 - adbc-driver-sqlite>=0.8.0 - tzdata>=2022.7 diff --git a/ci/deps/actions-311-downstream_compat.yaml b/ci/deps/actions-311-downstream_compat.yaml index 092ca18d61259..cb5856528db1d 100644 --- a/ci/deps/actions-311-downstream_compat.yaml +++ b/ci/deps/actions-311-downstream_compat.yaml @@ -72,6 +72,7 @@ dependencies: - pyyaml - py - pip: + - git+https://github.com/WillAyd/pandas-bitmask.git - adbc-driver-postgresql>=0.10.0 - adbc-driver-sqlite>=0.8.0 - tzdata>=2022.7 diff --git a/ci/deps/actions-311-numpydev.yaml b/ci/deps/actions-311-numpydev.yaml index 325a6d45d74fd..a722651a54305 100644 --- a/ci/deps/actions-311-numpydev.yaml +++ b/ci/deps/actions-311-numpydev.yaml @@ -21,6 +21,7 @@ dependencies: - pip - pip: + - git+https://github.com/WillAyd/pandas-bitmask.git - "--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" - "--pre" - "numpy" diff --git a/ci/deps/actions-311-pyarrownightly.yaml b/ci/deps/actions-311-pyarrownightly.yaml index 22e4907e5a6e5..f89616da85b14 100644 --- a/ci/deps/actions-311-pyarrownightly.yaml +++ b/ci/deps/actions-311-pyarrownightly.yaml @@ -22,6 +22,7 @@ dependencies: - pip - pip: + - git+https://github.com/WillAyd/pandas-bitmask.git - "tzdata>=2022.7" - "--extra-index-url https://pypi.fury.io/arrow-nightlies/" - "--prefer-binary" diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index b6f515dceaea9..b22456064ffba 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -58,5 +58,6 @@ dependencies: - zstandard>=0.19.0 - pip: + - git+https://github.com/WillAyd/pandas-bitmask.git - adbc-driver-postgresql>=0.10.0 - adbc-driver-sqlite>=0.8.0 diff --git a/ci/deps/actions-312.yaml b/ci/deps/actions-312.yaml index bc66f8a5382c9..1efe766b74a0b 100644 --- a/ci/deps/actions-312.yaml +++ b/ci/deps/actions-312.yaml @@ -58,6 +58,7 @@ dependencies: - zstandard>=0.19.0 - pip: + - git+https://github.com/WillAyd/pandas-bitmask.git - adbc-driver-postgresql>=0.10.0 - adbc-driver-sqlite>=0.8.0 - tzdata>=2022.7 diff --git a/ci/deps/actions-pypy-39.yaml b/ci/deps/actions-pypy-39.yaml index 90933b24b88db..034d1aa58c7e4 100644 --- a/ci/deps/actions-pypy-39.yaml +++ b/ci/deps/actions-pypy-39.yaml @@ -23,4 +23,5 @@ dependencies: - numpy - python-dateutil - pip: + - git+https://github.com/WillAyd/pandas-bitmask.git - tzdata>=2022.7 diff --git a/ci/deps/circle-311-arm64.yaml b/ci/deps/circle-311-arm64.yaml index 3f09e27d0fe4b..837c4591c0363 100644 --- a/ci/deps/circle-311-arm64.yaml +++ b/ci/deps/circle-311-arm64.yaml @@ -57,5 +57,6 @@ dependencies: - xlsxwriter>=3.0.5 - zstandard>=0.19.0 - pip: + - git+https://github.com/WillAyd/pandas-bitmask.git - adbc-driver-postgresql>=0.8.0 - adbc-driver-sqlite>=0.8.0 diff --git a/environment.yml b/environment.yml index 9bf6cf2a92347..9c2129a18efc1 100644 --- a/environment.yml +++ b/environment.yml @@ -116,6 +116,7 @@ dependencies: - pygments # Code highlighting - pip: + - git+https://github.com/WillAyd/pandas-bitmask.git - adbc-driver-postgresql>=0.10.0 - adbc-driver-sqlite>=0.8.0 - typing_extensions; python_version<"3.11" diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 1506a76aa94a6..4d7d610658e7c 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -140,7 +140,7 @@ cdef class IndexEngine: cdef readonly: ndarray values - ndarray mask + object mask HashTable mapping bint over_size_threshold diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 56f8adda93251..fdcbf2ad49fad 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -16,6 +16,7 @@ import warnings import numpy as np +from pandas_mask import PandasMaskArray from pandas._libs import ( algos, @@ -1173,6 +1174,8 @@ def take( ... ) array([ 10, 10, -10]) """ + if isinstance(arr, PandasMaskArray): # TODO: implement take directly on mask + arr = np.array(arr) if not isinstance( arr, (np.ndarray, ABCExtensionArray, ABCIndex, ABCSeries, ABCNumpyExtensionArray), diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index f3a0cc0dccdb3..8cc22708fe77a 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -10,6 +10,7 @@ import warnings import numpy as np +from pandas_mask import PandasMaskArray from pandas._libs import ( lib, @@ -112,20 +113,23 @@ class BaseMaskedArray(OpsMixin, ExtensionArray): # our underlying data and mask are each ndarrays _data: np.ndarray - _mask: npt.NDArray[np.bool_] + _mask: PandasMaskArray @classmethod def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self: result = BaseMaskedArray.__new__(cls) result._data = values - result._mask = mask + result._mask = PandasMaskArray(mask) return result def __init__( self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False ) -> None: # values is supposed to already be validated in the subclass - if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_): + if not ( + (isinstance(mask, np.ndarray) and mask.dtype == np.bool_) + or isinstance(mask, PandasMaskArray) + ): raise TypeError( "mask should be boolean numpy array. Use " "the 'pd.array' function instead" @@ -678,7 +682,7 @@ def __arrow_array__(self, type=None): """ import pyarrow as pa - return pa.array(self._data, mask=self._mask, type=type) + return pa.array(self._data, mask=np.asarray(self._mask), type=type) @property def _hasna(self) -> bool: diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 4c7fe604e452d..cfade8502e699 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -324,7 +324,7 @@ def _cython_op_ndim_compat( # expand to 2d, dispatch, then squeeze if appropriate values2d = values[None, :] if mask is not None: - mask = mask[None, :] + mask = np.array(mask)[None, :] if result_mask is not None: result_mask = result_mask[None, :] res = self._call_cython_op( diff --git a/requirements-dev.txt b/requirements-dev.txt index 69568cf661241..f1450465e7cb5 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -84,6 +84,7 @@ feedparser pyyaml requests pygments +git+https://github.com/WillAyd/pandas-bitmask.git adbc-driver-postgresql>=0.10.0 adbc-driver-sqlite>=0.8.0 typing_extensions; python_version<"3.11"