Skip to content

REF: put all post-processing at end of DataFrame._reduce #32671

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 14, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 19 additions & 15 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7808,6 +7808,8 @@ def _reduce(
self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
):

assert filter_type is None or filter_type == "bool", filter_type
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it make sense to add type annotation to filter_type as well? and maybe start a docstring.

also the NotImplementedError in _get_data is now unreachable?

also out_dtype = "bool" if filter_type == "bool" else None on L7864 is redundant?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

filter_type is only passed for "any" and "all", id lean towards getting rid of the kwarg altogether

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also the NotImplementedError in _get_data is now unreachable?

that has always been the case, but im happy to get rid of that branch altogether


dtype_is_dt = self.dtypes.apply(
lambda x: is_datetime64_any_dtype(x) or is_period_dtype(x)
)
Expand Down Expand Up @@ -7835,7 +7837,7 @@ def f(x):
return op(x, axis=axis, skipna=skipna, **kwds)

def _get_data(axis_matters):
if filter_type is None or filter_type == "numeric":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it be clearer to have "numeric" as the default value of filter_type instead of None. Again maybe a docstring would help.

if filter_type is None:
data = self._get_numeric_data()
elif filter_type == "bool":
if axis_matters:
Expand Down Expand Up @@ -7882,15 +7884,11 @@ def blk_func(values):
return out

if numeric_only is None:
values = self.values
data = self
values = data.values
try:
result = f(values)

if filter_type == "bool" and is_object_dtype(values) and axis is None:
# work around https://github.com/numpy/numpy/issues/10489
# TODO: combine with hasattr(result, 'dtype') further down
# hard since we don't have `values` down there.
result = np.bool_(result)
except TypeError:
# e.g. in nanops trying to convert strs to float

Expand All @@ -7916,30 +7914,36 @@ def blk_func(values):

# TODO: why doesnt axis matter here?
data = _get_data(axis_matters=False)
with np.errstate(all="ignore"):
result = f(data.values)
labels = data._get_agg_axis(axis)

values = data.values
with np.errstate(all="ignore"):
result = f(values)
else:
if numeric_only:
data = _get_data(axis_matters=True)
labels = data._get_agg_axis(axis)

values = data.values
labels = data._get_agg_axis(axis)
else:
values = self.values
data = self
values = data.values
result = f(values)

if hasattr(result, "dtype") and is_object_dtype(result.dtype):
if filter_type == "bool" and is_object_dtype(values) and axis is None:
# work around https://github.com/numpy/numpy/issues/10489
# TODO: can we de-duplicate parts of this with the next blocK?
result = np.bool_(result)
elif hasattr(result, "dtype") and is_object_dtype(result.dtype):
try:
if filter_type is None or filter_type == "numeric":
if filter_type is None:
result = result.astype(np.float64)
elif filter_type == "bool" and notna(result).all():
result = result.astype(np.bool_)
except (ValueError, TypeError):

# try to coerce to the original dtypes item by item if we can
if axis == 0:
result = coerce_to_dtypes(result, self.dtypes)
result = coerce_to_dtypes(result, data.dtypes)

if constructor is not None:
result = self._constructor_sliced(result, index=labels)
Expand Down