Skip to content

Commit 36ff425

Browse files
[ArrayManager] Test DataFrame reductions + implement ignore_failures (#39719)
1 parent 3ccbbfd commit 36ff425

File tree

3 files changed

+66
-14
lines changed

3 files changed

+66
-14
lines changed

.github/workflows/ci.yml

+3
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,9 @@ jobs:
153153
run: |
154154
source activate pandas-dev
155155
pytest pandas/tests/frame/methods --array-manager
156+
pytest pandas/tests/frame/test_reductions.py --array-manager
157+
pytest pandas/tests/reductions/ --array-manager
158+
pytest pandas/tests/generic/test_generic.py --array-manager
156159
pytest pandas/tests/arithmetic/ --array-manager
157160
pytest pandas/tests/reshape/merge --array-manager
158161

pandas/core/internals/array_manager.py

+60-14
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616

1717
import numpy as np
1818

19-
from pandas._libs import lib
19+
from pandas._libs import (
20+
NaT,
21+
lib,
22+
)
2023
from pandas._typing import (
2124
ArrayLike,
2225
DtypeObj,
@@ -33,6 +36,8 @@
3336
is_dtype_equal,
3437
is_extension_array_dtype,
3538
is_numeric_dtype,
39+
is_object_dtype,
40+
is_timedelta64_ns_dtype,
3641
)
3742
from pandas.core.dtypes.dtypes import (
3843
ExtensionDtype,
@@ -50,7 +55,11 @@
5055
import pandas.core.algorithms as algos
5156
from pandas.core.arrays import ExtensionArray
5257
from pandas.core.arrays.sparse import SparseDtype
53-
from pandas.core.construction import extract_array
58+
from pandas.core.construction import (
59+
ensure_wrapped_if_datetimelike,
60+
extract_array,
61+
sanitize_array,
62+
)
5463
from pandas.core.indexers import maybe_convert_indices
5564
from pandas.core.indexes.api import (
5665
Index,
@@ -201,18 +210,48 @@ def _verify_integrity(self) -> None:
201210
def reduce(
202211
self: T, func: Callable, ignore_failures: bool = False
203212
) -> Tuple[T, np.ndarray]:
204-
# TODO this still fails because `func` assumes to work on 2D arrays
205-
# TODO implement ignore_failures
206-
assert self.ndim == 2
213+
"""
214+
Apply reduction function column-wise, returning a single-row ArrayManager.
207215
208-
res_arrays = []
209-
for arr in self.arrays:
210-
res = func(arr, axis=0)
211-
res_arrays.append(np.array([res]))
216+
Parameters
217+
----------
218+
func : reduction function
219+
ignore_failures : bool, default False
220+
Whether to drop columns where func raises TypeError.
212221
213-
index = Index([None]) # placeholder
214-
new_mgr = type(self)(res_arrays, [index, self.items])
215-
indexer = np.arange(self.shape[0])
222+
Returns
223+
-------
224+
ArrayManager
225+
np.ndarray
226+
Indexer of column indices that are retained.
227+
"""
228+
result_arrays: List[np.ndarray] = []
229+
result_indices: List[int] = []
230+
for i, arr in enumerate(self.arrays):
231+
try:
232+
res = func(arr, axis=0)
233+
except TypeError:
234+
if not ignore_failures:
235+
raise
236+
else:
237+
# TODO NaT doesn't preserve dtype, so we need to ensure to create
238+
# a timedelta result array if original was timedelta
239+
# what if datetime results in timedelta? (eg std)
240+
if res is NaT and is_timedelta64_ns_dtype(arr.dtype):
241+
result_arrays.append(np.array(["NaT"], dtype="timedelta64[ns]"))
242+
else:
243+
result_arrays.append(sanitize_array([res], None))
244+
result_indices.append(i)
245+
246+
index = Index._simple_new(np.array([None], dtype=object)) # placeholder
247+
if ignore_failures:
248+
indexer = np.array(result_indices)
249+
columns = self.items[result_indices]
250+
else:
251+
indexer = np.arange(self.shape[0])
252+
columns = self.items
253+
254+
new_mgr = type(self)(result_arrays, [index, columns])
216255
return new_mgr, indexer
217256

218257
def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager:
@@ -489,14 +528,17 @@ def _get_data_subset(self, predicate: Callable) -> ArrayManager:
489528

490529
def get_bool_data(self, copy: bool = False) -> ArrayManager:
491530
"""
492-
Select columns that are bool-dtype.
531+
Select columns that are bool-dtype and object-dtype columns that are all-bool.
493532
494533
Parameters
495534
----------
496535
copy : bool, default False
497536
Whether to copy the blocks
498537
"""
499-
return self._get_data_subset(lambda arr: is_bool_dtype(arr.dtype))
538+
return self._get_data_subset(
539+
lambda arr: is_bool_dtype(arr.dtype)
540+
or (is_object_dtype(arr.dtype) and lib.is_bool_array(arr))
541+
)
500542

501543
def get_numeric_data(self, copy: bool = False) -> ArrayManager:
502544
"""
@@ -693,6 +735,10 @@ def iset(self, loc: Union[int, slice, np.ndarray], value):
693735
assert value.shape[1] == 1
694736
value = value[0, :]
695737

738+
# TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item
739+
# but we should avoid that and pass directly the proper array
740+
value = ensure_wrapped_if_datetimelike(value)
741+
696742
assert isinstance(value, (np.ndarray, ExtensionArray))
697743
assert value.ndim == 1
698744
assert len(value) == len(self._axes[0])

pandas/tests/reductions/test_reductions.py

+3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import numpy as np
77
import pytest
88

9+
import pandas.util._test_decorators as td
10+
911
import pandas as pd
1012
from pandas import (
1113
Categorical,
@@ -291,6 +293,7 @@ def test_numpy_minmax_timedelta64(self):
291293
with pytest.raises(ValueError, match=errmsg):
292294
np.argmax(td, out=0)
293295

296+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile
294297
def test_timedelta_ops(self):
295298
# GH#4984
296299
# make sure ops return Timedelta

0 commit comments

Comments
 (0)