Skip to content

Commit 7526823

Browse files
committed
BUG: DataFrame mixed-type arithmetic column-wise, fix DataFrame.diff upcasting->object bug close #1896
1 parent 8743be5 commit 7526823

File tree

3 files changed

+34
-18
lines changed

3 files changed

+34
-18
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ pandas 0.9.0
5959

6060
**Bug fixes**
6161

62+
- Perform arithmetic column-by-column in mixed-type DataFrame to avoid type
63+
upcasting issues. Caused downstream DataFrame.diff bug (#1896)
6264
- Fix matplotlib auto-color assignment when no custom spectrum passed. Also
6365
respect passed color keyword argument (#1711)
6466
- Fix resampling logical error with closed='left' (#1726)

pandas/core/frame.py

+23-16
Original file line numberDiff line numberDiff line change
@@ -3139,23 +3139,30 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
31393139
this, other = self.align(other, join='outer', level=level, copy=False)
31403140
new_index, new_columns = this.index, this.columns
31413141

3142-
this_vals = this.values
3143-
other_vals = other.values
3142+
def _arith_op(left, right):
3143+
if fill_value is not None:
3144+
left_mask = isnull(left)
3145+
right_mask = isnull(right)
3146+
left = left.copy()
3147+
right = right.copy()
3148+
3149+
# one but not both
3150+
mask = left_mask ^ right_mask
3151+
left[left_mask & mask] = fill_value
3152+
right[right_mask & mask] = fill_value
3153+
3154+
return func(left, right)
3155+
3156+
if this._is_mixed_type or other._is_mixed_type:
3157+
# XXX no good for duplicate columns
3158+
result = {}
3159+
for col in this:
3160+
result[col] = func(this[col].values, other[col].values)
3161+
else:
3162+
result = _arith_op(this.values, other.values)
31443163

3145-
if fill_value is not None:
3146-
this_mask = isnull(this_vals)
3147-
other_mask = isnull(other_vals)
3148-
this_vals = this_vals.copy()
3149-
other_vals = other_vals.copy()
3150-
3151-
# one but not both
3152-
mask = this_mask ^ other_mask
3153-
this_vals[this_mask & mask] = fill_value
3154-
other_vals[other_mask & mask] = fill_value
3155-
3156-
result = func(this_vals, other_vals)
3157-
return self._constructor(result, index=new_index, columns=new_columns,
3158-
copy=False)
3164+
return self._constructor(result, index=new_index,
3165+
columns=new_columns, copy=False)
31593166

31603167
def _indexed_same(self, other):
31613168
same_index = self.index.equals(other.index)

pandas/tests/test_frame.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -2710,7 +2710,7 @@ def test_operators(self):
27102710

27112711
def test_operators_none_as_na(self):
27122712
df = DataFrame({"col1": [2,5.0,123,None],
2713-
"col2": [1,2,3,4]})
2713+
"col2": [1,2,3,4]}, dtype=object)
27142714

27152715
ops = [operator.add, operator.sub, operator.mul, operator.truediv]
27162716

@@ -3553,7 +3553,7 @@ def test_to_excel_float_format(self):
35533553
assert_frame_equal(rs, xp)
35543554
os.remove(filename)
35553555

3556-
def test_000to_excel_unicode_filename(self):
3556+
def test_to_excel_unicode_filename(self):
35573557
try:
35583558
import xlwt
35593559
import openpyxl
@@ -4906,6 +4906,13 @@ def test_diff(self):
49064906
assert_series_equal(the_diff['A'],
49074907
self.tsframe['A'] - self.tsframe['A'].shift(1))
49084908

4909+
def test_diff_mixed_dtype(self):
4910+
df = DataFrame(np.random.randn(5, 3))
4911+
df['A'] = np.array([1, 2, 3, 4, 5], dtype=object)
4912+
4913+
result = df.diff()
4914+
self.assert_(result[0].dtype == np.float64)
4915+
49094916
def test_pct_change(self):
49104917
rs = self.tsframe.pct_change(fill_method=None)
49114918
assert_frame_equal(rs, self.tsframe / self.tsframe.shift(1) - 1)

0 commit comments

Comments
 (0)