Skip to content

Commit 3a7b7c6

Browse files
jbrockmendeltm9k1
authored andcommitted
Make DataFrame arithmetic ops with 2D arrays behave like numpy analogues (pandas-dev#23000)
1 parent 7130b6b commit 3a7b7c6

File tree

3 files changed

+148
-3
lines changed

3 files changed

+148
-3
lines changed

doc/source/whatsnew/v0.24.0.txt

+33
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,39 @@ Previous Behavior:
488488
0 NaT
489489

490490

491+
.. _whatsnew_0240.api.dataframe_arithmetic_broadcasting:
492+
493+
DataFrame Arithmetic Operations Broadcasting Changes
494+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
495+
:class:`DataFrame` arithmetic operations when operating with 2-dimensional
496+
``np.ndarray`` objects now broadcast in the same way as ``np.ndarray``s
497+
broadcast. (:issue:`23000`)
498+
499+
Previous Behavior:
500+
501+
.. code-block:: ipython
502+
503+
In [3]: arr = np.arange(6).reshape(3, 2)
504+
In [4]: df = pd.DataFrame(arr)
505+
In [5]: df + arr[[0], :] # 1 row, 2 columns
506+
...
507+
ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2)
508+
In [6]: df + arr[:, [1]] # 1 column, 3 rows
509+
...
510+
ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (3, 1)
511+
512+
*Current Behavior*:
513+
514+
.. ipython:: python
515+
arr = np.arange(6).reshape(3, 2)
516+
df = pd.DataFrame(arr)
517+
df
518+
519+
.. ipython:: python
520+
df + arr[[0], :] # 1 row, 2 columns
521+
df + arr[:, [1]] # 1 column, 3 rows
522+
523+
491524
.. _whatsnew_0240.api.extension:
492525

493526
ExtensionType Changes

pandas/core/ops.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -1799,14 +1799,32 @@ def to_series(right):
17991799
right = to_series(right)
18001800

18011801
elif right.ndim == 2:
1802-
if left.shape != right.shape:
1802+
if right.shape == left.shape:
1803+
right = left._constructor(right, index=left.index,
1804+
columns=left.columns)
1805+
1806+
elif right.shape[0] == left.shape[0] and right.shape[1] == 1:
1807+
# Broadcast across columns
1808+
try:
1809+
right = np.broadcast_to(right, left.shape)
1810+
except AttributeError:
1811+
# numpy < 1.10.0
1812+
right = np.tile(right, (1, left.shape[1]))
1813+
1814+
right = left._constructor(right,
1815+
index=left.index,
1816+
columns=left.columns)
1817+
1818+
elif right.shape[1] == left.shape[1] and right.shape[0] == 1:
1819+
# Broadcast along rows
1820+
right = to_series(right[0, :])
1821+
1822+
else:
18031823
raise ValueError("Unable to coerce to DataFrame, shape "
18041824
"must be {req_shape}: given {given_shape}"
18051825
.format(req_shape=left.shape,
18061826
given_shape=right.shape))
18071827

1808-
right = left._constructor(right, index=left.index,
1809-
columns=left.columns)
18101828
elif right.ndim > 2:
18111829
raise ValueError('Unable to coerce to Series/DataFrame, dim '
18121830
'must be <= 2: {dim}'.format(dim=right.shape))

pandas/tests/frame/test_arithmetic.py

+94
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname):
9999
# Arithmetic
100100

101101
class TestFrameFlexArithmetic(object):
102+
102103
def test_df_add_td64_columnwise(self):
103104
# GH#22534 Check that column-wise addition broadcasts correctly
104105
dti = pd.date_range('2016-01-01', periods=10)
@@ -252,6 +253,99 @@ def test_arith_flex_zero_len_raises(self):
252253

253254

254255
class TestFrameArithmetic(object):
256+
def test_df_add_2d_array_rowlike_broadcasts(self):
257+
# GH#23000
258+
arr = np.arange(6).reshape(3, 2)
259+
df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C'])
260+
261+
rowlike = arr[[1], :] # shape --> (1, ncols)
262+
assert rowlike.shape == (1, df.shape[1])
263+
264+
expected = pd.DataFrame([[2, 4],
265+
[4, 6],
266+
[6, 8]],
267+
columns=df.columns, index=df.index,
268+
# specify dtype explicitly to avoid failing
269+
# on 32bit builds
270+
dtype=arr.dtype)
271+
result = df + rowlike
272+
tm.assert_frame_equal(result, expected)
273+
result = rowlike + df
274+
tm.assert_frame_equal(result, expected)
275+
276+
def test_df_add_2d_array_collike_broadcasts(self):
277+
# GH#23000
278+
arr = np.arange(6).reshape(3, 2)
279+
df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C'])
280+
281+
collike = arr[:, [1]] # shape --> (nrows, 1)
282+
assert collike.shape == (df.shape[0], 1)
283+
284+
expected = pd.DataFrame([[1, 2],
285+
[5, 6],
286+
[9, 10]],
287+
columns=df.columns, index=df.index,
288+
# specify dtype explicitly to avoid failing
289+
# on 32bit builds
290+
dtype=arr.dtype)
291+
result = df + collike
292+
tm.assert_frame_equal(result, expected)
293+
result = collike + df
294+
tm.assert_frame_equal(result, expected)
295+
296+
def test_df_arith_2d_array_rowlike_broadcasts(self,
297+
all_arithmetic_operators):
298+
# GH#23000
299+
opname = all_arithmetic_operators
300+
301+
arr = np.arange(6).reshape(3, 2)
302+
df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C'])
303+
304+
rowlike = arr[[1], :] # shape --> (1, ncols)
305+
assert rowlike.shape == (1, df.shape[1])
306+
307+
exvals = [getattr(df.loc['A'], opname)(rowlike.squeeze()),
308+
getattr(df.loc['B'], opname)(rowlike.squeeze()),
309+
getattr(df.loc['C'], opname)(rowlike.squeeze())]
310+
311+
expected = pd.DataFrame(exvals, columns=df.columns, index=df.index)
312+
313+
if opname in ['__rmod__', '__rfloordiv__']:
314+
# exvals will have dtypes [f8, i8, i8] so expected will be
315+
# all-f8, but the DataFrame operation will return mixed dtypes
316+
# use exvals[-1].dtype instead of "i8" for compat with 32-bit
317+
# systems/pythons
318+
expected[False] = expected[False].astype(exvals[-1].dtype)
319+
320+
result = getattr(df, opname)(rowlike)
321+
tm.assert_frame_equal(result, expected)
322+
323+
def test_df_arith_2d_array_collike_broadcasts(self,
324+
all_arithmetic_operators):
325+
# GH#23000
326+
opname = all_arithmetic_operators
327+
328+
arr = np.arange(6).reshape(3, 2)
329+
df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C'])
330+
331+
collike = arr[:, [1]] # shape --> (nrows, 1)
332+
assert collike.shape == (df.shape[0], 1)
333+
334+
exvals = {True: getattr(df[True], opname)(collike.squeeze()),
335+
False: getattr(df[False], opname)(collike.squeeze())}
336+
337+
dtype = None
338+
if opname in ['__rmod__', '__rfloordiv__']:
339+
# Series ops may return mixed int/float dtypes in cases where
340+
# DataFrame op will return all-float. So we upcast `expected`
341+
dtype = np.common_type(*[x.values for x in exvals.values()])
342+
343+
expected = pd.DataFrame(exvals, columns=df.columns, index=df.index,
344+
dtype=dtype)
345+
346+
result = getattr(df, opname)(collike)
347+
tm.assert_frame_equal(result, expected)
348+
255349
def test_df_bool_mul_int(self):
256350
# GH#22047, GH#22163 multiplication by 1 should result in int dtype,
257351
# not object dtype

0 commit comments

Comments
 (0)