|
16 | 16 |
|
17 | 17 | import numpy as np
|
18 | 18 |
|
19 |
| -from pandas._libs import lib |
| 19 | +from pandas._libs import ( |
| 20 | + NaT, |
| 21 | + lib, |
| 22 | +) |
20 | 23 | from pandas._typing import (
|
21 | 24 | ArrayLike,
|
22 | 25 | DtypeObj,
|
|
33 | 36 | is_dtype_equal,
|
34 | 37 | is_extension_array_dtype,
|
35 | 38 | is_numeric_dtype,
|
| 39 | + is_object_dtype, |
| 40 | + is_timedelta64_ns_dtype, |
36 | 41 | )
|
37 | 42 | from pandas.core.dtypes.dtypes import (
|
38 | 43 | ExtensionDtype,
|
|
50 | 55 | import pandas.core.algorithms as algos
|
51 | 56 | from pandas.core.arrays import ExtensionArray
|
52 | 57 | from pandas.core.arrays.sparse import SparseDtype
|
53 |
| -from pandas.core.construction import extract_array |
| 58 | +from pandas.core.construction import ( |
| 59 | + ensure_wrapped_if_datetimelike, |
| 60 | + extract_array, |
| 61 | + sanitize_array, |
| 62 | +) |
54 | 63 | from pandas.core.indexers import maybe_convert_indices
|
55 | 64 | from pandas.core.indexes.api import (
|
56 | 65 | Index,
|
@@ -201,18 +210,48 @@ def _verify_integrity(self) -> None:
|
201 | 210 | def reduce(
|
202 | 211 | self: T, func: Callable, ignore_failures: bool = False
|
203 | 212 | ) -> Tuple[T, np.ndarray]:
|
204 |
| - # TODO this still fails because `func` assumes to work on 2D arrays |
205 |
| - # TODO implement ignore_failures |
206 |
| - assert self.ndim == 2 |
| 213 | + """ |
| 214 | + Apply reduction function column-wise, returning a single-row ArrayManager. |
207 | 215 |
|
208 |
| - res_arrays = [] |
209 |
| - for arr in self.arrays: |
210 |
| - res = func(arr, axis=0) |
211 |
| - res_arrays.append(np.array([res])) |
| 216 | + Parameters |
| 217 | + ---------- |
| 218 | + func : reduction function |
| 219 | + ignore_failures : bool, default False |
| 220 | + Whether to drop columns where func raises TypeError. |
212 | 221 |
|
213 |
| - index = Index([None]) # placeholder |
214 |
| - new_mgr = type(self)(res_arrays, [index, self.items]) |
215 |
| - indexer = np.arange(self.shape[0]) |
| 222 | + Returns |
| 223 | + ------- |
| 224 | + ArrayManager |
| 225 | + np.ndarray |
| 226 | + Indexer of column indices that are retained. |
| 227 | + """ |
| 228 | + result_arrays: List[np.ndarray] = [] |
| 229 | + result_indices: List[int] = [] |
| 230 | + for i, arr in enumerate(self.arrays): |
| 231 | + try: |
| 232 | + res = func(arr, axis=0) |
| 233 | + except TypeError: |
| 234 | + if not ignore_failures: |
| 235 | + raise |
| 236 | + else: |
| 237 | + # TODO NaT doesn't preserve dtype, so we need to ensure to create |
| 238 | + # a timedelta result array if original was timedelta |
| 239 | + # what if datetime results in timedelta? (eg std) |
| 240 | + if res is NaT and is_timedelta64_ns_dtype(arr.dtype): |
| 241 | + result_arrays.append(np.array(["NaT"], dtype="timedelta64[ns]")) |
| 242 | + else: |
| 243 | + result_arrays.append(sanitize_array([res], None)) |
| 244 | + result_indices.append(i) |
| 245 | + |
| 246 | + index = Index._simple_new(np.array([None], dtype=object)) # placeholder |
| 247 | + if ignore_failures: |
| 248 | + indexer = np.array(result_indices) |
| 249 | + columns = self.items[result_indices] |
| 250 | + else: |
| 251 | + indexer = np.arange(self.shape[0]) |
| 252 | + columns = self.items |
| 253 | + |
| 254 | + new_mgr = type(self)(result_arrays, [index, columns]) |
216 | 255 | return new_mgr, indexer
|
217 | 256 |
|
218 | 257 | def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager:
|
@@ -489,14 +528,17 @@ def _get_data_subset(self, predicate: Callable) -> ArrayManager:
|
489 | 528 |
|
490 | 529 | def get_bool_data(self, copy: bool = False) -> ArrayManager:
|
491 | 530 | """
|
492 |
| - Select columns that are bool-dtype. |
| 531 | + Select columns that are bool-dtype and object-dtype columns that are all-bool. |
493 | 532 |
|
494 | 533 | Parameters
|
495 | 534 | ----------
|
496 | 535 | copy : bool, default False
|
497 | 536 | Whether to copy the blocks
|
498 | 537 | """
|
499 |
| - return self._get_data_subset(lambda arr: is_bool_dtype(arr.dtype)) |
| 538 | + return self._get_data_subset( |
| 539 | + lambda arr: is_bool_dtype(arr.dtype) |
| 540 | + or (is_object_dtype(arr.dtype) and lib.is_bool_array(arr)) |
| 541 | + ) |
500 | 542 |
|
501 | 543 | def get_numeric_data(self, copy: bool = False) -> ArrayManager:
|
502 | 544 | """
|
@@ -693,6 +735,10 @@ def iset(self, loc: Union[int, slice, np.ndarray], value):
|
693 | 735 | assert value.shape[1] == 1
|
694 | 736 | value = value[0, :]
|
695 | 737 |
|
| 738 | + # TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item |
| 739 | + # but we should avoid that and pass directly the proper array |
| 740 | + value = ensure_wrapped_if_datetimelike(value) |
| 741 | + |
696 | 742 | assert isinstance(value, (np.ndarray, ExtensionArray))
|
697 | 743 | assert value.ndim == 1
|
698 | 744 | assert len(value) == len(self._axes[0])
|
|
0 commit comments