Skip to content

CLN: remove _try_coerce_result altogether #27683

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Aug 5, 2019
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
is_categorical_dtype,
is_complex_dtype,
is_datetime64_any_dtype,
is_datetime64tz_dtype,
is_integer_dtype,
is_numeric_dtype,
is_sparse,
Expand Down Expand Up @@ -452,6 +453,16 @@ def wrapper(*args, **kwargs):
def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
assert kind in ["transform", "aggregate"]

if is_datetime64tz_dtype(values):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you shouldn't do this here and rather handle at a higher level as datetime64 is caught below for some operations.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

later in this function it checks for datetime64 to exclude add/prod/cumsum/cumprod, but there is nowhere else in this module where we handle datetime64tz AFAICT

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my point is that this is not the right place to handle this. if you want to transform the input then let it pass thru or raise a NotImplementedError then that is fine, but the way you are writing it is is very diffferent.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all aggregate and transform calls go through here, so this is the only place AFAICT that we can do this without having to duplicate it for every calling function. If there is a better place, can you be more specific?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing my point. I have no problem handling this in this function. What you wrote returns early, bypassing all of the logic that handles all of the other dtypes. So it needs to be integrated there OR handled at a higher level on a NotImplementedError raise.

This function becomes even harder to understand with this addition. In theory you could also split this function up more logically. There are a number of solutions.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

edited to do what I think you're suggesting.

the method could use a good refactor, agreed. separate pass.

# TODO: possible need to reshape? kludge can be avoided when
# 2D EA is allowed.
naive = values.view("M8[ns]")
result, names = self._cython_operation(
kind, naive, how=how, axis=axis, min_count=min_count, **kwargs
)
result = type(values)(result.astype(np.int64), dtype=values.dtype)
return result, names

# can we do this operation with our cython functions
# if not raise NotImplementedError

Expand Down
53 changes: 1 addition & 52 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,9 +417,6 @@ def fillna(self, value, limit=None, inplace=False, downcast=None):
if self._can_hold_element(value):
# equivalent: self._try_coerce_args(value) would not raise
blocks = self.putmask(mask, value, inplace=inplace)
blocks = [
b.make_block(values=self._try_coerce_result(b.values)) for b in blocks
]
return self._maybe_downcast(blocks, downcast)

# we can't process the value, but nothing to do
Expand Down Expand Up @@ -734,12 +731,7 @@ def _try_coerce_args(self, other):

return other

def _try_coerce_result(self, result):
""" reverse of try_coerce_args """
return result

def _try_coerce_and_cast_result(self, result, dtype=None):
result = self._try_coerce_result(result)
result = self._try_cast_result(result, dtype=dtype)
return result

Expand Down Expand Up @@ -1406,7 +1398,7 @@ def func(cond, values, other):

try:
fastres = expressions.where(cond, values, other)
return self._try_coerce_result(fastres)
return fastres
except Exception as detail:
if errors == "raise":
raise TypeError(
Expand Down Expand Up @@ -1694,7 +1686,6 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False)
mask = _safe_reshape(mask, new_values.shape)

new_values[mask] = new
new_values = self._try_coerce_result(new_values)
return [self.make_block(values=new_values)]

def _try_cast_result(self, result, dtype=None):
Expand Down Expand Up @@ -1872,20 +1863,6 @@ def _slice(self, slicer):

return self.values[slicer]

def _try_cast_result(self, result, dtype=None):
"""
if we have an operation that operates on for example floats
we want to try to cast back to our EA here if possible

result could be a 2-D numpy array, e.g. the result of
a numeric operation; but it must be shape (1, X) because
we by-definition operate on the ExtensionBlocks one-by-one

result could also be an EA Array itself, in which case it
is already a 1-D array
"""
return result

def formatting_values(self):
# Deprecating the ability to override _formatting_values.
# Do the warning here, it's only user in pandas, since we
Expand Down Expand Up @@ -2443,20 +2420,6 @@ def _try_coerce_args(self, other):

return other

def _try_coerce_result(self, result):
""" reverse of try_coerce_args """
if isinstance(result, np.ndarray):
if result.ndim == 2:
# kludge for 2D blocks with 1D EAs
result = result[0, :]
if result.dtype == np.float64:
# needed for post-groupby.median
result = self._holder._from_sequence(
result.astype(np.int64), freq=None, dtype=self.values.dtype
)

return result

def diff(self, n, axis=0):
"""1st discrete difference

Expand Down Expand Up @@ -2617,10 +2580,6 @@ def _try_coerce_args(self, other):

return other

def _try_coerce_result(self, result):
""" reverse of try_coerce_args / try_operate """
return result

def should_store(self, value):
return issubclass(
value.dtype.type, np.timedelta64
Expand Down Expand Up @@ -3029,16 +2988,6 @@ def array_dtype(self):
"""
return np.object_

def _try_coerce_result(self, result):
""" reverse of try_coerce_args """

# GH12564: CategoricalBlock is 1-dim only
# while returned results could be any dim
if (not is_categorical_dtype(result)) and isinstance(result, np.ndarray):
result = _block_shape(result, ndim=self.ndim)

return result

def to_dense(self):
# Categorical.get_values returns a DatetimeIndex for datetime
# categories, so we can't simply use `np.asarray(self.values)` like
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -908,7 +908,7 @@ def fast_xs(self, loc):
# Such assignment may incorrectly coerce NaT to None
# result[blk.mgr_locs] = blk._slice((slice(None), loc))
for i, rl in enumerate(blk.mgr_locs):
result[rl] = blk._try_coerce_result(blk.iget((i, loc)))
result[rl] = blk.iget((i, loc))

if is_extension_array_dtype(dtype):
result = dtype.construct_array_type()._from_sequence(result, dtype=dtype)
Expand Down