Skip to content

Commit c0f761d

Browse files
jbrockmendelharisbal
authored and
harisbal
committed
dispatch frame methods to series versions instead of re-implementing masking etc (pandas-dev#19611)
1 parent 563367f commit c0f761d

File tree

4 files changed

+138
-108
lines changed

4 files changed

+138
-108
lines changed

pandas/core/frame.py

+17-61
Original file line numberDiff line numberDiff line change
@@ -3944,71 +3944,39 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
39443944
new_index, new_columns = this.index, this.columns
39453945

39463946
def _arith_op(left, right):
3947+
# for the mixed_type case where we iterate over columns,
3948+
# _arith_op(left, right) is equivalent to
3949+
# left._binop(right, func, fill_value=fill_value)
39473950
left, right = ops.fill_binop(left, right, fill_value)
39483951
return func(left, right)
39493952

39503953
if this._is_mixed_type or other._is_mixed_type:
3951-
3952-
# unique
3954+
# iterate over columns
39533955
if this.columns.is_unique:
3954-
3955-
def f(col):
3956-
r = _arith_op(this[col].values, other[col].values)
3957-
return self._constructor_sliced(r, index=new_index,
3958-
dtype=r.dtype)
3959-
3960-
result = {col: f(col) for col in this}
3961-
3962-
# non-unique
3956+
# unique columns
3957+
result = {col: _arith_op(this[col], other[col])
3958+
for col in this}
3959+
result = self._constructor(result, index=new_index,
3960+
columns=new_columns, copy=False)
39633961
else:
3964-
3965-
def f(i):
3966-
r = _arith_op(this.iloc[:, i].values,
3967-
other.iloc[:, i].values)
3968-
return self._constructor_sliced(r, index=new_index,
3969-
dtype=r.dtype)
3970-
3971-
result = {i: f(i) for i, col in enumerate(this.columns)}
3962+
# non-unique columns
3963+
result = {i: _arith_op(this.iloc[:, i], other.iloc[:, i])
3964+
for i, col in enumerate(this.columns)}
39723965
result = self._constructor(result, index=new_index, copy=False)
39733966
result.columns = new_columns
3974-
return result
3967+
return result
39753968

39763969
else:
39773970
result = _arith_op(this.values, other.values)
39783971

39793972
return self._constructor(result, index=new_index, columns=new_columns,
39803973
copy=False)
39813974

3982-
def _combine_series(self, other, func, fill_value=None, axis=None,
3983-
level=None, try_cast=True):
3984-
if fill_value is not None:
3985-
raise NotImplementedError("fill_value {fill} not supported."
3986-
.format(fill=fill_value))
3987-
3988-
if axis is not None:
3989-
axis = self._get_axis_name(axis)
3990-
if axis == 'index':
3991-
return self._combine_match_index(other, func, level=level)
3992-
else:
3993-
return self._combine_match_columns(other, func, level=level,
3994-
try_cast=try_cast)
3995-
else:
3996-
if not len(other):
3997-
return self * np.nan
3998-
3999-
if not len(self):
4000-
# Ambiguous case, use _series so works with DataFrame
4001-
return self._constructor(data=self._series, index=self.index,
4002-
columns=self.columns)
4003-
4004-
# default axis is columns
4005-
return self._combine_match_columns(other, func, level=level,
4006-
try_cast=try_cast)
4007-
40083975
def _combine_match_index(self, other, func, level=None):
40093976
left, right = self.align(other, join='outer', axis=0, level=level,
40103977
copy=False)
4011-
return self._constructor(func(left.values.T, right.values).T,
3978+
new_data = func(left.values.T, right.values).T
3979+
return self._constructor(new_data,
40123980
index=left.index, columns=self.columns,
40133981
copy=False)
40143982

@@ -4027,7 +3995,8 @@ def _combine_const(self, other, func, errors='raise', try_cast=True):
40273995
try_cast=try_cast)
40283996
return self._constructor(new_data)
40293997

4030-
def _compare_frame_evaluate(self, other, func, str_rep, try_cast=True):
3998+
def _compare_frame(self, other, func, str_rep, try_cast=True):
3999+
# compare_frame assumes self._indexed_same(other)
40314000

40324001
import pandas.core.computation.expressions as expressions
40334002
# unique
@@ -4052,19 +4021,6 @@ def _compare(a, b):
40524021
result.columns = self.columns
40534022
return result
40544023

4055-
def _compare_frame(self, other, func, str_rep, try_cast=True):
4056-
if not self._indexed_same(other):
4057-
raise ValueError('Can only compare identically-labeled '
4058-
'DataFrame objects')
4059-
return self._compare_frame_evaluate(other, func, str_rep,
4060-
try_cast=try_cast)
4061-
4062-
def _flex_compare_frame(self, other, func, str_rep, level, try_cast=True):
4063-
if not self._indexed_same(other):
4064-
self, other = self.align(other, 'outer', level=level, copy=False)
4065-
return self._compare_frame_evaluate(other, func, str_rep,
4066-
try_cast=try_cast)
4067-
40684024
def combine(self, other, func, fill_value=None, overwrite=True):
40694025
"""
40704026
Add two DataFrame objects and do not propagate NaN values, so if for a

pandas/core/indexes/base.py

+15-34
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
import pandas.core.algorithms as algos
5656
import pandas.core.sorting as sorting
5757
from pandas.io.formats.printing import pprint_thing
58-
from pandas.core.ops import _comp_method_OBJECT_ARRAY
58+
from pandas.core.ops import _comp_method_OBJECT_ARRAY, make_invalid_op
5959
from pandas.core.config import get_option
6060
from pandas.core.strings import StringMethods
6161

@@ -82,26 +82,6 @@ def _try_get_item(x):
8282
return x
8383

8484

85-
def _make_invalid_op(name):
86-
"""
87-
Return a binary method that always raises a TypeError.
88-
89-
Parameters
90-
----------
91-
name : str
92-
93-
Returns
94-
-------
95-
invalid_op : function
96-
"""
97-
def invalid_op(self, other=None):
98-
raise TypeError("cannot perform {name} with this index type: "
99-
"{typ}".format(name=name, typ=type(self)))
100-
101-
invalid_op.__name__ = name
102-
return invalid_op
103-
104-
10585
class InvalidIndexError(Exception):
10686
pass
10787

@@ -3994,22 +3974,23 @@ def _evaluate_compare(self, other):
39943974
@classmethod
39953975
def _add_numeric_methods_add_sub_disabled(cls):
39963976
""" add in the numeric add/sub methods to disable """
3997-
cls.__add__ = cls.__radd__ = __iadd__ = _make_invalid_op('__add__') # noqa
3998-
cls.__sub__ = __isub__ = _make_invalid_op('__sub__') # noqa
3977+
cls.__add__ = cls.__radd__ = __iadd__ = make_invalid_op('__add__') # noqa
3978+
cls.__sub__ = __isub__ = make_invalid_op('__sub__') # noqa
39993979

40003980
@classmethod
40013981
def _add_numeric_methods_disabled(cls):
40023982
""" add in numeric methods to disable other than add/sub """
4003-
cls.__pow__ = cls.__rpow__ = _make_invalid_op('__pow__')
4004-
cls.__mul__ = cls.__rmul__ = _make_invalid_op('__mul__')
4005-
cls.__floordiv__ = cls.__rfloordiv__ = _make_invalid_op('__floordiv__')
4006-
cls.__truediv__ = cls.__rtruediv__ = _make_invalid_op('__truediv__')
3983+
cls.__pow__ = make_invalid_op('__pow__')
3984+
cls.__rpow__ = make_invalid_op('__rpow__')
3985+
cls.__mul__ = cls.__rmul__ = make_invalid_op('__mul__')
3986+
cls.__floordiv__ = cls.__rfloordiv__ = make_invalid_op('__floordiv__')
3987+
cls.__truediv__ = cls.__rtruediv__ = make_invalid_op('__truediv__')
40073988
if not compat.PY3:
4008-
cls.__div__ = cls.__rdiv__ = _make_invalid_op('__div__')
4009-
cls.__neg__ = _make_invalid_op('__neg__')
4010-
cls.__pos__ = _make_invalid_op('__pos__')
4011-
cls.__abs__ = _make_invalid_op('__abs__')
4012-
cls.__inv__ = _make_invalid_op('__inv__')
3989+
cls.__div__ = cls.__rdiv__ = make_invalid_op('__div__')
3990+
cls.__neg__ = make_invalid_op('__neg__')
3991+
cls.__pos__ = make_invalid_op('__pos__')
3992+
cls.__abs__ = make_invalid_op('__abs__')
3993+
cls.__inv__ = make_invalid_op('__inv__')
40133994

40143995
def _maybe_update_attributes(self, attrs):
40153996
""" Update Index attributes (e.g. freq) depending on op """
@@ -4207,8 +4188,8 @@ def logical_func(self, *args, **kwargs):
42074188
@classmethod
42084189
def _add_logical_methods_disabled(cls):
42094190
""" add in logical methods to disable """
4210-
cls.all = _make_invalid_op('all')
4211-
cls.any = _make_invalid_op('any')
4191+
cls.all = make_invalid_op('all')
4192+
cls.any = make_invalid_op('any')
42124193

42134194

42144195
Index._add_numeric_methods_disabled()

pandas/core/ops.py

+89-13
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,26 @@ def rxor(left, right):
9696

9797
# -----------------------------------------------------------------------------
9898

99+
def make_invalid_op(name):
100+
"""
101+
Return a binary method that always raises a TypeError.
102+
103+
Parameters
104+
----------
105+
name : str
106+
107+
Returns
108+
-------
109+
invalid_op : function
110+
"""
111+
def invalid_op(self, other=None):
112+
raise TypeError("cannot perform {name} with this index type: "
113+
"{typ}".format(name=name, typ=type(self).__name__))
114+
115+
invalid_op.__name__ = name
116+
return invalid_op
117+
118+
99119
def _gen_eval_kwargs(name):
100120
"""
101121
Find the keyword arguments to pass to numexpr for the given operation.
@@ -1047,8 +1067,8 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
10471067
elif isinstance(other, (np.ndarray, list, tuple)):
10481068
if len(other) != len(self):
10491069
raise ValueError('Lengths must be equal')
1050-
return self._binop(self._constructor(other, self.index), op,
1051-
level=level, fill_value=fill_value)
1070+
other = self._constructor(other, self.index)
1071+
return self._binop(other, op, level=level, fill_value=fill_value)
10521072
else:
10531073
if fill_value is not None:
10541074
self = self.fillna(fill_value)
@@ -1071,6 +1091,51 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
10711091
# -----------------------------------------------------------------------------
10721092
# DataFrame
10731093

1094+
def _combine_series_frame(self, other, func, fill_value=None, axis=None,
1095+
level=None, try_cast=True):
1096+
"""
1097+
Apply binary operator `func` to self, other using alignment and fill
1098+
conventions determined by the fill_value, axis, level, and try_cast kwargs.
1099+
1100+
Parameters
1101+
----------
1102+
self : DataFrame
1103+
other : Series
1104+
func : binary operator
1105+
fill_value : object, default None
1106+
axis : {0, 1, 'columns', 'index', None}, default None
1107+
level : int or None, default None
1108+
try_cast : bool, default True
1109+
1110+
Returns
1111+
-------
1112+
result : DataFrame
1113+
"""
1114+
if fill_value is not None:
1115+
raise NotImplementedError("fill_value {fill} not supported."
1116+
.format(fill=fill_value))
1117+
1118+
if axis is not None:
1119+
axis = self._get_axis_number(axis)
1120+
if axis == 0:
1121+
return self._combine_match_index(other, func, level=level)
1122+
else:
1123+
return self._combine_match_columns(other, func, level=level,
1124+
try_cast=try_cast)
1125+
else:
1126+
if not len(other):
1127+
return self * np.nan
1128+
1129+
if not len(self):
1130+
# Ambiguous case, use _series so works with DataFrame
1131+
return self._constructor(data=self._series, index=self.index,
1132+
columns=self.columns)
1133+
1134+
# default axis is columns
1135+
return self._combine_match_columns(other, func, level=level,
1136+
try_cast=try_cast)
1137+
1138+
10741139
def _align_method_FRAME(left, right, axis):
10751140
""" convert rhs to meet lhs dims if input is list, tuple or np.ndarray """
10761141

@@ -1179,8 +1244,9 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
11791244
if isinstance(other, ABCDataFrame): # Another DataFrame
11801245
return self._combine_frame(other, na_op, fill_value, level)
11811246
elif isinstance(other, ABCSeries):
1182-
return self._combine_series(other, na_op, fill_value, axis, level,
1183-
try_cast=True)
1247+
return _combine_series_frame(self, other, na_op,
1248+
fill_value=fill_value, axis=axis,
1249+
level=level, try_cast=True)
11841250
else:
11851251
if fill_value is not None:
11861252
self = self.fillna(fill_value)
@@ -1209,13 +1275,17 @@ def f(self, other, axis=default_axis, level=None):
12091275

12101276
other = _align_method_FRAME(self, other, axis)
12111277

1212-
if isinstance(other, ABCDataFrame): # Another DataFrame
1213-
return self._flex_compare_frame(other, na_op, str_rep, level,
1214-
try_cast=False)
1278+
if isinstance(other, ABCDataFrame):
1279+
# Another DataFrame
1280+
if not self._indexed_same(other):
1281+
self, other = self.align(other, 'outer',
1282+
level=level, copy=False)
1283+
return self._compare_frame(other, na_op, str_rep, try_cast=False)
12151284

12161285
elif isinstance(other, ABCSeries):
1217-
return self._combine_series(other, na_op, None, axis, level,
1218-
try_cast=False)
1286+
return _combine_series_frame(self, other, na_op,
1287+
fill_value=None, axis=axis,
1288+
level=level, try_cast=False)
12191289
else:
12201290
return self._combine_const(other, na_op, try_cast=False)
12211291

@@ -1227,11 +1297,17 @@ def f(self, other, axis=default_axis, level=None):
12271297
def _comp_method_FRAME(func, name, str_rep):
12281298
@Appender('Wrapper for comparison method {name}'.format(name=name))
12291299
def f(self, other):
1230-
if isinstance(other, ABCDataFrame): # Another DataFrame
1231-
return self._compare_frame(other, func, str_rep)
1300+
if isinstance(other, ABCDataFrame):
1301+
# Another DataFrame
1302+
if not self._indexed_same(other):
1303+
raise ValueError('Can only compare identically-labeled '
1304+
'DataFrame objects')
1305+
return self._compare_frame(other, func, str_rep, try_cast=True)
1306+
12321307
elif isinstance(other, ABCSeries):
1233-
return self._combine_series(other, func,
1234-
axis=None, try_cast=False)
1308+
return _combine_series_frame(self, other, func,
1309+
fill_value=None, axis=None,
1310+
level=None, try_cast=False)
12351311
else:
12361312

12371313
# straight boolean comparisons we want to allow all columns

pandas/tests/frame/test_arithmetic.py

+17
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,23 @@ def test_tz_aware_scalar_comparison(self, timestamps):
7272
# -------------------------------------------------------------------
7373
# Arithmetic
7474

75+
class TestFrameFlexArithmetic(object):
76+
def test_df_add_flex_filled_mixed_dtypes(self):
77+
# GH#19611
78+
dti = pd.date_range('2016-01-01', periods=3)
79+
ser = pd.Series(['1 Day', 'NaT', '2 Days'], dtype='timedelta64[ns]')
80+
df = pd.DataFrame({'A': dti, 'B': ser})
81+
other = pd.DataFrame({'A': ser, 'B': ser})
82+
fill = pd.Timedelta(days=1).to_timedelta64()
83+
result = df.add(other, fill_value=fill)
84+
85+
expected = pd.DataFrame(
86+
{'A': pd.Series(['2016-01-02', '2016-01-03', '2016-01-05'],
87+
dtype='datetime64[ns]'),
88+
'B': ser * 2})
89+
tm.assert_frame_equal(result, expected)
90+
91+
7592
class TestFrameMulDiv(object):
7693
"""Tests for DataFrame multiplication and division"""
7794
# ------------------------------------------------------------------

0 commit comments

Comments
 (0)