Skip to content

Commit 1b16a5e

Browse files
jbrockmendelproost
authored andcommitted
REF: standardize usage in DataFrame vs SparseDataFrame ops (pandas-dev#28027)
1 parent a5c41ae commit 1b16a5e

File tree

3 files changed

+75
-60
lines changed

3 files changed

+75
-60
lines changed

pandas/core/frame.py

+36-13
Original file line numberDiff line numberDiff line change
@@ -5328,7 +5328,6 @@ def reorder_levels(self, order, axis=0):
53285328

53295329
def _combine_frame(self, other, func, fill_value=None, level=None):
53305330
this, other = self.align(other, join="outer", level=level, copy=False)
5331-
new_index, new_columns = this.index, this.columns
53325331

53335332
if fill_value is None:
53345333
# since _arith_op may be called in a loop, avoid function call
@@ -5346,38 +5345,62 @@ def _arith_op(left, right):
53465345

53475346
if ops.should_series_dispatch(this, other, func):
53485347
# iterate over columns
5349-
return ops.dispatch_to_series(this, other, _arith_op)
5348+
new_data = ops.dispatch_to_series(this, other, _arith_op)
53505349
else:
53515350
with np.errstate(all="ignore"):
5352-
result = _arith_op(this.values, other.values)
5353-
result = dispatch_fill_zeros(func, this.values, other.values, result)
5354-
return self._constructor(
5355-
result, index=new_index, columns=new_columns, copy=False
5356-
)
5351+
res_values = _arith_op(this.values, other.values)
5352+
new_data = dispatch_fill_zeros(func, this.values, other.values, res_values)
5353+
return this._construct_result(other, new_data, _arith_op)
53575354

53585355
def _combine_match_index(self, other, func, level=None):
53595356
left, right = self.align(other, join="outer", axis=0, level=level, copy=False)
53605357
# at this point we have `left.index.equals(right.index)`
53615358

53625359
if left._is_mixed_type or right._is_mixed_type:
53635360
# operate column-wise; avoid costly object-casting in `.values`
5364-
return ops.dispatch_to_series(left, right, func)
5361+
new_data = ops.dispatch_to_series(left, right, func)
53655362
else:
53665363
# fastpath --> operate directly on values
53675364
with np.errstate(all="ignore"):
53685365
new_data = func(left.values.T, right.values).T
5369-
return self._constructor(
5370-
new_data, index=left.index, columns=self.columns, copy=False
5371-
)
5366+
return left._construct_result(other, new_data, func)
53725367

53735368
def _combine_match_columns(self, other: Series, func, level=None):
53745369
left, right = self.align(other, join="outer", axis=1, level=level, copy=False)
53755370
# at this point we have `left.columns.equals(right.index)`
5376-
return ops.dispatch_to_series(left, right, func, axis="columns")
5371+
new_data = ops.dispatch_to_series(left, right, func, axis="columns")
5372+
return left._construct_result(right, new_data, func)
53775373

53785374
def _combine_const(self, other, func):
53795375
# scalar other or np.ndim(other) == 0
5380-
return ops.dispatch_to_series(self, other, func)
5376+
new_data = ops.dispatch_to_series(self, other, func)
5377+
return self._construct_result(other, new_data, func)
5378+
5379+
def _construct_result(self, other, result, func):
5380+
"""
5381+
Wrap the result of an arithmetic, comparison, or logical operation.
5382+
5383+
Parameters
5384+
----------
5385+
other : object
5386+
result : DataFrame
5387+
func : binary operator
5388+
5389+
Returns
5390+
-------
5391+
DataFrame
5392+
5393+
Notes
5394+
-----
5395+
`func` is included for compat with SparseDataFrame signature, is not
5396+
needed here.
5397+
"""
5398+
out = self._constructor(result, index=self.index, copy=False)
5399+
# Pin columns instead of passing to constructor for compat with
5400+
# non-unique columns case
5401+
out.columns = self.columns
5402+
return out
5403+
# TODO: finalize? we do for SparseDataFrame
53815404

53825405
def combine(self, other, func, fill_value=None, overwrite=True):
53835406
"""

pandas/core/ops/__init__.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -513,12 +513,7 @@ def column_op(a, b):
513513
raise NotImplementedError(right)
514514

515515
new_data = expressions.evaluate(column_op, str_rep, left, right)
516-
517-
result = left._constructor(new_data, index=left.index, copy=False)
518-
# Pin columns instead of passing to constructor for compat with
519-
# non-unique columns case
520-
result.columns = left.columns
521-
return result
516+
return new_data
522517

523518

524519
def dispatch_to_extension_op(
@@ -1057,7 +1052,8 @@ def f(self, other, axis=default_axis, level=None):
10571052
# Another DataFrame
10581053
if not self._indexed_same(other):
10591054
self, other = self.align(other, "outer", level=level, copy=False)
1060-
return dispatch_to_series(self, other, na_op, str_rep)
1055+
new_data = dispatch_to_series(self, other, na_op, str_rep)
1056+
return self._construct_result(other, new_data, na_op)
10611057

10621058
elif isinstance(other, ABCSeries):
10631059
return _combine_series_frame(
@@ -1087,7 +1083,8 @@ def f(self, other):
10871083
raise ValueError(
10881084
"Can only compare identically-labeled DataFrame objects"
10891085
)
1090-
return dispatch_to_series(self, other, func, str_rep)
1086+
new_data = dispatch_to_series(self, other, func, str_rep)
1087+
return self._construct_result(other, new_data, func)
10911088

10921089
elif isinstance(other, ABCSeries):
10931090
return _combine_series_frame(

pandas/core/sparse/frame.py

+34-39
Original file line numberDiff line numberDiff line change
@@ -534,19 +534,13 @@ def _set_value(self, index, col, value, takeable=False):
534534
# Arithmetic-related methods
535535

536536
def _combine_frame(self, other, func, fill_value=None, level=None):
537-
if level is not None:
538-
raise NotImplementedError("'level' argument is not supported")
539-
540537
this, other = self.align(other, join="outer", level=level, copy=False)
541-
new_index, new_columns = this.index, this.columns
542-
543-
if self.empty and other.empty:
544-
return self._constructor(index=new_index).__finalize__(self)
538+
this._default_fill_value = self._default_fill_value
545539

546540
new_data = {}
547541
if fill_value is not None:
548542
# TODO: be a bit more intelligent here
549-
for col in new_columns:
543+
for col in this.columns:
550544
if col in this and col in other:
551545
dleft = this[col].to_dense()
552546
dright = other[col].to_dense()
@@ -555,66 +549,62 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
555549
new_data[col] = result
556550
else:
557551

558-
for col in new_columns:
552+
for col in this.columns:
559553
if col in this and col in other:
560554
new_data[col] = func(this[col], other[col])
561555

562-
new_fill_value = self._get_op_result_fill_value(other, func)
563-
564-
return self._constructor(
565-
data=new_data,
566-
index=new_index,
567-
columns=new_columns,
568-
default_fill_value=new_fill_value,
569-
).__finalize__(self)
556+
return this._construct_result(other, new_data, func)
570557

571558
def _combine_match_index(self, other, func, level=None):
572-
573-
if level is not None:
574-
raise NotImplementedError("'level' argument is not supported")
575-
576559
this, other = self.align(other, join="outer", axis=0, level=level, copy=False)
560+
this._default_fill_value = self._default_fill_value
577561

578562
new_data = {}
579563
for col in this.columns:
580564
new_data[col] = func(this[col], other)
581565

582-
fill_value = self._get_op_result_fill_value(other, func)
583-
584-
return self._constructor(
585-
new_data,
586-
index=this.index,
587-
columns=self.columns,
588-
default_fill_value=fill_value,
589-
).__finalize__(self)
566+
return this._construct_result(other, new_data, func)
590567

591568
def _combine_match_columns(self, other, func, level=None):
592569
# patched version of DataFrame._combine_match_columns to account for
593570
# NumPy circumventing __rsub__ with float64 types, e.g.: 3.0 - series,
594571
# where 3.0 is numpy.float64 and series is a SparseSeries. Still
595572
# possible for this to happen, which is bothersome
596573

597-
if level is not None:
598-
raise NotImplementedError("'level' argument is not supported")
599-
600574
left, right = self.align(other, join="outer", axis=1, level=level, copy=False)
601575
assert left.columns.equals(right.index)
576+
left._default_fill_value = self._default_fill_value
602577

603578
new_data = {}
604-
605579
for col in left.columns:
606580
new_data[col] = func(left[col], right[col])
607581

608-
return self._constructor(
609-
new_data,
610-
index=left.index,
611-
columns=left.columns,
612-
default_fill_value=self.default_fill_value,
613-
).__finalize__(self)
582+
# TODO: using this changed some behavior, see GH#28025
583+
return left._construct_result(other, new_data, func)
614584

615585
def _combine_const(self, other, func):
616586
return self._apply_columns(lambda x: func(x, other))
617587

588+
def _construct_result(self, other, result, func):
589+
"""
590+
Wrap the result of an arithmetic, comparison, or logical operation.
591+
592+
Parameters
593+
----------
594+
other : object
595+
result : SparseDataFrame
596+
func : binary operator
597+
598+
Returns
599+
-------
600+
SparseDataFrame
601+
"""
602+
fill_value = self._get_op_result_fill_value(other, func)
603+
604+
out = self._constructor(result, index=self.index, default_fill_value=fill_value)
605+
out.columns = self.columns
606+
return out.__finalize__(self)
607+
618608
def _get_op_result_fill_value(self, other, func):
619609
own_default = self.default_fill_value
620610

@@ -643,6 +633,11 @@ def _get_op_result_fill_value(self, other, func):
643633
else:
644634
fill_value = func(np.float64(own_default), np.float64(other.fill_value))
645635
fill_value = item_from_zerodim(fill_value)
636+
637+
elif isinstance(other, Series):
638+
# reached via _combine_match_columns
639+
fill_value = self.default_fill_value
640+
646641
else:
647642
raise NotImplementedError(type(other))
648643

0 commit comments

Comments
 (0)