Skip to content

REF: standardize usage in DataFrame vs SparseDataFrame ops #28027

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Sep 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
8e1ad04
implement _construct_result
jbrockmendel Aug 20, 2019
bdc0d72
Change dispatch_to_series return, restore alignment
jbrockmendel Aug 20, 2019
7a4b8c2
remove comment
jbrockmendel Aug 20, 2019
3505e6a
Merge branch 'master' of https://github.com/pandas-dev/pandas into bl…
jbrockmendel Aug 20, 2019
63c5f80
Merge branch 'master' of https://github.com/pandas-dev/pandas into bl…
jbrockmendel Aug 20, 2019
bf0d2bf
Merge branch 'master' of https://github.com/pandas-dev/pandas into bl…
jbrockmendel Aug 20, 2019
c235b6a
Merge branch 'master' of https://github.com/pandas-dev/pandas into bl…
jbrockmendel Aug 23, 2019
6cf996d
pin columns
jbrockmendel Aug 23, 2019
d867034
patch _default_fill_value
jbrockmendel Aug 23, 2019
ec3b4f5
Merge branch 'master' of https://github.com/pandas-dev/pandas into bl…
jbrockmendel Aug 23, 2019
94b5dbd
Merge branch 'master' of https://github.com/pandas-dev/pandas into bl…
jbrockmendel Aug 23, 2019
ca76be4
pin default_fill_value after align
jbrockmendel Aug 23, 2019
2620ef4
Merge branch 'master' of https://github.com/pandas-dev/pandas into bl…
jbrockmendel Aug 27, 2019
4c836d3
Merge branch 'master' of https://github.com/pandas-dev/pandas into bl…
jbrockmendel Sep 2, 2019
45f5419
improve docstring
jbrockmendel Sep 2, 2019
ceeb1d2
Merge branch 'master' of https://github.com/pandas-dev/pandas into bl…
jbrockmendel Sep 3, 2019
ad53e94
Merge branch 'master' of https://github.com/pandas-dev/pandas into bl…
jbrockmendel Sep 12, 2019
cffa624
Merge branch 'master' of https://github.com/pandas-dev/pandas into bl…
jbrockmendel Sep 12, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 36 additions & 13 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5306,7 +5306,6 @@ def reorder_levels(self, order, axis=0):

def _combine_frame(self, other, func, fill_value=None, level=None):
this, other = self.align(other, join="outer", level=level, copy=False)
new_index, new_columns = this.index, this.columns

if fill_value is None:
# since _arith_op may be called in a loop, avoid function call
Expand All @@ -5324,38 +5323,62 @@ def _arith_op(left, right):

if ops.should_series_dispatch(this, other, func):
# iterate over columns
return ops.dispatch_to_series(this, other, _arith_op)
new_data = ops.dispatch_to_series(this, other, _arith_op)
else:
with np.errstate(all="ignore"):
result = _arith_op(this.values, other.values)
result = dispatch_fill_zeros(func, this.values, other.values, result)
return self._constructor(
result, index=new_index, columns=new_columns, copy=False
)
res_values = _arith_op(this.values, other.values)
new_data = dispatch_fill_zeros(func, this.values, other.values, res_values)
return this._construct_result(other, new_data, _arith_op)

def _combine_match_index(self, other, func, level=None):
left, right = self.align(other, join="outer", axis=0, level=level, copy=False)
# at this point we have `left.index.equals(right.index)`

if left._is_mixed_type or right._is_mixed_type:
# operate column-wise; avoid costly object-casting in `.values`
return ops.dispatch_to_series(left, right, func)
new_data = ops.dispatch_to_series(left, right, func)
else:
# fastpath --> operate directly on values
with np.errstate(all="ignore"):
new_data = func(left.values.T, right.values).T
return self._constructor(
new_data, index=left.index, columns=self.columns, copy=False
)
return left._construct_result(other, new_data, func)

def _combine_match_columns(self, other: Series, func, level=None):
left, right = self.align(other, join="outer", axis=1, level=level, copy=False)
# at this point we have `left.columns.equals(right.index)`
return ops.dispatch_to_series(left, right, func, axis="columns")
new_data = ops.dispatch_to_series(left, right, func, axis="columns")
return left._construct_result(right, new_data, func)

def _combine_const(self, other, func):
# scalar other or np.ndim(other) == 0
return ops.dispatch_to_series(self, other, func)
new_data = ops.dispatch_to_series(self, other, func)
return self._construct_result(other, new_data, func)

def _construct_result(self, other, result, func):
"""
Wrap the result of an arithmetic, comparison, or logical operation.

Parameters
----------
other : object
result : DataFrame
func : binary operator

Returns
-------
DataFrame

Notes
-----
`func` is included for compat with SparseDataFrame signature, is not
needed here.
"""
out = self._constructor(result, index=self.index, copy=False)
# Pin columns instead of passing to constructor for compat with
# non-unique columns case
out.columns = self.columns
return out
# TODO: finalize? we do for SparseDataFrame
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

finalize doesn't do really anything for frames, prob can't hurt though

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

especially in the frame+frame case it isn't obvious that is correct; I'd prefer to revisit

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In principle we should finalize I think, but since it was not done right now, fine to leave it for later?


def combine(self, other, func, fill_value=None, overwrite=True):
"""
Expand Down
13 changes: 5 additions & 8 deletions pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,12 +509,7 @@ def column_op(a, b):
raise NotImplementedError(right)

new_data = expressions.evaluate(column_op, str_rep, left, right)

result = left._constructor(new_data, index=left.index, copy=False)
# Pin columns instead of passing to constructor for compat with
# non-unique columns case
result.columns = left.columns
return result
return new_data


def dispatch_to_extension_op(
Expand Down Expand Up @@ -1055,7 +1050,8 @@ def f(self, other, axis=default_axis, level=None):
# Another DataFrame
if not self._indexed_same(other):
self, other = self.align(other, "outer", level=level, copy=False)
return dispatch_to_series(self, other, na_op, str_rep)
new_data = dispatch_to_series(self, other, na_op, str_rep)
return self._construct_result(other, new_data, na_op)

elif isinstance(other, ABCSeries):
return _combine_series_frame(
Expand Down Expand Up @@ -1085,7 +1081,8 @@ def f(self, other):
raise ValueError(
"Can only compare identically-labeled DataFrame objects"
)
return dispatch_to_series(self, other, func, str_rep)
new_data = dispatch_to_series(self, other, func, str_rep)
return self._construct_result(other, new_data, func)

elif isinstance(other, ABCSeries):
return _combine_series_frame(
Expand Down
73 changes: 34 additions & 39 deletions pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,19 +534,13 @@ def _set_value(self, index, col, value, takeable=False):
# Arithmetic-related methods

def _combine_frame(self, other, func, fill_value=None, level=None):
if level is not None:
raise NotImplementedError("'level' argument is not supported")

this, other = self.align(other, join="outer", level=level, copy=False)
new_index, new_columns = this.index, this.columns

if self.empty and other.empty:
return self._constructor(index=new_index).__finalize__(self)
this._default_fill_value = self._default_fill_value

new_data = {}
if fill_value is not None:
# TODO: be a bit more intelligent here
for col in new_columns:
for col in this.columns:
if col in this and col in other:
dleft = this[col].to_dense()
dright = other[col].to_dense()
Expand All @@ -555,66 +549,62 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
new_data[col] = result
else:

for col in new_columns:
for col in this.columns:
if col in this and col in other:
new_data[col] = func(this[col], other[col])

new_fill_value = self._get_op_result_fill_value(other, func)

return self._constructor(
data=new_data,
index=new_index,
columns=new_columns,
default_fill_value=new_fill_value,
).__finalize__(self)
return this._construct_result(other, new_data, func)

def _combine_match_index(self, other, func, level=None):

if level is not None:
raise NotImplementedError("'level' argument is not supported")

this, other = self.align(other, join="outer", axis=0, level=level, copy=False)
this._default_fill_value = self._default_fill_value

new_data = {}
for col in this.columns:
new_data[col] = func(this[col], other)

fill_value = self._get_op_result_fill_value(other, func)

return self._constructor(
new_data,
index=this.index,
columns=self.columns,
default_fill_value=fill_value,
).__finalize__(self)
return this._construct_result(other, new_data, func)

def _combine_match_columns(self, other, func, level=None):
# patched version of DataFrame._combine_match_columns to account for
# NumPy circumventing __rsub__ with float64 types, e.g.: 3.0 - series,
# where 3.0 is numpy.float64 and series is a SparseSeries. Still
# possible for this to happen, which is bothersome

if level is not None:
raise NotImplementedError("'level' argument is not supported")

left, right = self.align(other, join="outer", axis=1, level=level, copy=False)
assert left.columns.equals(right.index)
left._default_fill_value = self._default_fill_value

new_data = {}

for col in left.columns:
new_data[col] = func(left[col], right[col])

return self._constructor(
new_data,
index=left.index,
columns=left.columns,
default_fill_value=self.default_fill_value,
).__finalize__(self)
# TODO: using this changed some behavior, see GH#28025
return left._construct_result(other, new_data, func)

def _combine_const(self, other, func):
return self._apply_columns(lambda x: func(x, other))

def _construct_result(self, other, result, func):
"""
Wrap the result of an arithmetic, comparison, or logical operation.

Parameters
----------
other : object
result : SparseDataFrame
func : binary operator

Returns
-------
SparseDataFrame
"""
fill_value = self._get_op_result_fill_value(other, func)

out = self._constructor(result, index=self.index, default_fill_value=fill_value)
out.columns = self.columns
return out.__finalize__(self)

def _get_op_result_fill_value(self, other, func):
own_default = self.default_fill_value

Expand Down Expand Up @@ -643,6 +633,11 @@ def _get_op_result_fill_value(self, other, func):
else:
fill_value = func(np.float64(own_default), np.float64(other.fill_value))
fill_value = item_from_zerodim(fill_value)

elif isinstance(other, Series):
# reached via _combine_match_columns
fill_value = self.default_fill_value

else:
raise NotImplementedError(type(other))

Expand Down