Skip to content

Commit 075ed8b

Browse files
jbrockmendelmathurk1tkmz-n
authored
REF: handle axis=None case inside DataFrame.any/all to simplify _reduce (#35899)
* REF: remove unnecesary try/except * TST: add test for agg on ordered categorical cols (#35630) * TST: resample does not yield empty groups (#10603) (#35799) * revert accidental rebase * REF: handle axis=None cases inside DataFrame.all/any * annotate * dummy commit to force Travis Co-authored-by: Karthik Mathur <[email protected]> Co-authored-by: tkmz-n <[email protected]>
1 parent 0bc407a commit 075ed8b

File tree

2 files changed

+30
-39
lines changed

2 files changed

+30
-39
lines changed

pandas/core/frame.py

+22-39
Original file line numberDiff line numberDiff line change
@@ -8617,22 +8617,19 @@ def _reduce(
86178617
cols = self.columns[~dtype_is_dt]
86188618
self = self[cols]
86198619

8620-
if axis is None and filter_type == "bool":
8621-
labels = None
8622-
constructor = None
8623-
else:
8624-
# TODO: Make other agg func handle axis=None properly
8625-
axis = self._get_axis_number(axis)
8626-
labels = self._get_agg_axis(axis)
8627-
constructor = self._constructor
8620+
# TODO: Make other agg func handle axis=None properly
8621+
axis = self._get_axis_number(axis)
8622+
labels = self._get_agg_axis(axis)
8623+
constructor = self._constructor
8624+
assert axis in [0, 1]
86288625

86298626
def func(values):
86308627
if is_extension_array_dtype(values.dtype):
86318628
return extract_array(values)._reduce(name, skipna=skipna, **kwds)
86328629
else:
86338630
return op(values, axis=axis, skipna=skipna, **kwds)
86348631

8635-
def _get_data(axis_matters):
8632+
def _get_data(axis_matters: bool) -> "DataFrame":
86368633
if filter_type is None:
86378634
data = self._get_numeric_data()
86388635
elif filter_type == "bool":
@@ -8649,7 +8646,7 @@ def _get_data(axis_matters):
86498646
raise NotImplementedError(msg)
86508647
return data
86518648

8652-
if numeric_only is not None and axis in [0, 1]:
8649+
if numeric_only is not None:
86538650
df = self
86548651
if numeric_only is True:
86558652
df = _get_data(axis_matters=True)
@@ -8675,6 +8672,8 @@ def blk_func(values):
86758672
out[:] = coerce_to_dtypes(out.values, df.dtypes)
86768673
return out
86778674

8675+
assert numeric_only is None
8676+
86788677
if not self._is_homogeneous_type or self._mgr.any_extension_types:
86798678
# try to avoid self.values call
86808679

@@ -8702,40 +8701,24 @@ def blk_func(values):
87028701
result = result.iloc[0].rename(None)
87038702
return result
87048703

8705-
if numeric_only is None:
8706-
data = self
8707-
values = data.values
8708-
8709-
try:
8710-
result = func(values)
8711-
8712-
except TypeError:
8713-
# e.g. in nanops trying to convert strs to float
8704+
data = self
8705+
values = data.values
87148706

8715-
# TODO: why doesnt axis matter here?
8716-
data = _get_data(axis_matters=False)
8717-
labels = data._get_agg_axis(axis)
8707+
try:
8708+
result = func(values)
87188709

8719-
values = data.values
8720-
with np.errstate(all="ignore"):
8721-
result = func(values)
8710+
except TypeError:
8711+
# e.g. in nanops trying to convert strs to float
87228712

8723-
else:
8724-
if numeric_only:
8725-
data = _get_data(axis_matters=True)
8726-
labels = data._get_agg_axis(axis)
8713+
# TODO: why doesnt axis matter here?
8714+
data = _get_data(axis_matters=False)
8715+
labels = data._get_agg_axis(axis)
87278716

8728-
values = data.values
8729-
else:
8730-
data = self
8731-
values = data.values
8732-
result = func(values)
8717+
values = data.values
8718+
with np.errstate(all="ignore"):
8719+
result = func(values)
87338720

8734-
if filter_type == "bool" and is_object_dtype(values) and axis is None:
8735-
# work around https://github.com/numpy/numpy/issues/10489
8736-
# TODO: can we de-duplicate parts of this with the next blocK?
8737-
result = np.bool_(result)
8738-
elif hasattr(result, "dtype") and is_object_dtype(result.dtype):
8721+
if is_object_dtype(result.dtype):
87398722
try:
87408723
if filter_type is None:
87418724
result = result.astype(np.float64)

pandas/core/generic.py

+8
Original file line numberDiff line numberDiff line change
@@ -11499,6 +11499,14 @@ def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs
1149911499
"Option bool_only is not implemented with option level."
1150011500
)
1150111501
return self._agg_by_level(name, axis=axis, level=level, skipna=skipna)
11502+
11503+
if self.ndim > 1 and axis is None:
11504+
# Reduce along one dimension then the other, to simplify DataFrame._reduce
11505+
res = logical_func(
11506+
self, axis=0, bool_only=bool_only, skipna=skipna, **kwargs
11507+
)
11508+
return logical_func(res, skipna=skipna, **kwargs)
11509+
1150211510
return self._reduce(
1150311511
func,
1150411512
name=name,

0 commit comments

Comments
 (0)