From 2206df42d53d490a94653e92e06a944d59afa493 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 4 Oct 2018 15:03:51 -0700 Subject: [PATCH 01/13] Make DataFrame arithmetic ops with 2D arrays behave like numpy analogues --- pandas/core/ops.py | 26 +++++++++++++++++++--- pandas/tests/frame/test_arithmetic.py | 32 +++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index dc99faaf68f51..00aeec3a61629 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1798,15 +1798,35 @@ def to_series(right): if right.ndim == 1: right = to_series(right) + elif right.ndim == 2: - if left.shape != right.shape: + if right.shape == left.shape: + right = left._constructor(right, index=left.index, + columns=left.columns) + + elif right.shape[0] == left.shape[0] and right.shape[1] == 1: + # Broadcast across columns + try: + right = np.broadcast_to(right, left.shape) + except AttributeError: + # numpy < 1.10.0 + right = np.tile(right, (1, left.shape[1])) + + right = left._constructor(right, + index=left.index, + columns=left.columns) + # TODO: Double-check this doesn't make copies + + elif right.shape[1] == left.shape[1] and right.shape[0] == 1: + # Broadcast along rows + right = to_series(right[0, :]) + + else: raise ValueError("Unable to coerce to DataFrame, shape " "must be {req_shape}: given {given_shape}" .format(req_shape=left.shape, given_shape=right.shape)) - right = left._constructor(right, index=left.index, - columns=left.columns) elif right.ndim > 2: raise ValueError('Unable to coerce to Series/DataFrame, dim ' 'must be <= 2: {dim}'.format(dim=right.shape)) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 2eb11c3a2e2f7..bb0af6b330311 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -99,6 +99,38 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): # Arithmetic class TestFrameFlexArithmetic(object): + # TODO: tests for other arithmetic ops + def test_df_add_2d_array_rowlike_broadcasts(self): + # GH# + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) + + rowlike = arr[[1], :] # shape --> (1, ncols) + expected = pd.DataFrame([[2, 4], + [4, 6], + [6, 8]], + columns=df.columns, index=df.index) + result = df + rowlike + tm.assert_frame_equal(result, expected) + result = rowlike + df + tm.assert_frame_equal(result, expected) + + # TODO: tests for other arithmetic ops + def test_df_add_2d_array_collike_broadcasts(self): + # GH# + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) + + collike = arr[[1], :] # shape --> (nrows, 1) + expected = pd.DataFrame([[0, 1], + [4, 7], + [8, 9]], + columns=df.columns, index=df.index) + result = df + collike + tm.assert_frame_equal(result, expected) + result = collike + df + tm.assert_frame_equal(result, expected) + def test_df_add_td64_columnwise(self): # GH#22534 Check that column-wise addition broadcasts correctly dti = pd.date_range('2016-01-01', periods=10) From 1616e41a9db51f33c700fffd4fc56297fbeb73bc Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 4 Oct 2018 16:51:10 -0700 Subject: [PATCH 02/13] fix incorrectly-written test --- pandas/tests/frame/test_arithmetic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index bb0af6b330311..445f38956575d 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -122,9 +122,9 @@ def test_df_add_2d_array_collike_broadcasts(self): df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) collike = arr[[1], :] # shape --> (nrows, 1) - expected = pd.DataFrame([[0, 1], - [4, 7], - [8, 9]], + expected = pd.DataFrame([[2, 4], + [4, 6], + [6, 8]], columns=df.columns, index=df.index) result = df + collike tm.assert_frame_equal(result, expected) From a0e27da67af41133985bfc34b871d900198c81d8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 4 Oct 2018 18:01:43 -0700 Subject: [PATCH 03/13] Specify dtype to avoid failing on 32bit builds --- pandas/tests/frame/test_arithmetic.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 445f38956575d..13527d219421d 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -125,7 +125,10 @@ def test_df_add_2d_array_collike_broadcasts(self): expected = pd.DataFrame([[2, 4], [4, 6], [6, 8]], - columns=df.columns, index=df.index) + columns=df.columns, index=df.index, + # specify dtype explicitly to avoid failing + # on 32bit builds + dtype=arr.dtype) result = df + collike tm.assert_frame_equal(result, expected) result = collike + df From 69fa4bbbb8d3d120ee32127dd6b4ae0efe7e3cb0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 4 Oct 2018 18:02:21 -0700 Subject: [PATCH 04/13] Specify dtype to avoid failing on 32bit builds --- pandas/tests/frame/test_arithmetic.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 13527d219421d..df10f7516bc55 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -109,7 +109,10 @@ def test_df_add_2d_array_rowlike_broadcasts(self): expected = pd.DataFrame([[2, 4], [4, 6], [6, 8]], - columns=df.columns, index=df.index) + columns=df.columns, index=df.index, + # specify dtype explicitly to avoid failing + # on 32bit builds + dtype=arr.dtype) result = df + rowlike tm.assert_frame_equal(result, expected) result = rowlike + df From f67be3bca596b20d25739b6c5dea1f883f9e2b17 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 4 Oct 2018 20:28:35 -0700 Subject: [PATCH 05/13] flake8 whitespace fixup --- pandas/core/ops.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 00aeec3a61629..d46a04fa124b7 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1798,7 +1798,6 @@ def to_series(right): if right.ndim == 1: right = to_series(right) - elif right.ndim == 2: if right.shape == left.shape: right = left._constructor(right, index=left.index, From 78d4517d2be8a7d8d2457afc4295252c688c3d5d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 5 Oct 2018 09:24:04 -0700 Subject: [PATCH 06/13] add shape assertions --- pandas/tests/frame/test_arithmetic.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index df10f7516bc55..3a07f56aa10ce 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -98,7 +98,7 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): # ------------------------------------------------------------------- # Arithmetic -class TestFrameFlexArithmetic(object): +class TestFrameArithmetic(object): # TODO: tests for other arithmetic ops def test_df_add_2d_array_rowlike_broadcasts(self): # GH# @@ -106,6 +106,8 @@ def test_df_add_2d_array_rowlike_broadcasts(self): df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) rowlike = arr[[1], :] # shape --> (1, ncols) + assert rowlike.shape == (1, df.shape[1]) + expected = pd.DataFrame([[2, 4], [4, 6], [6, 8]], @@ -124,7 +126,9 @@ def test_df_add_2d_array_collike_broadcasts(self): arr = np.arange(6).reshape(3, 2) df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) - collike = arr[[1], :] # shape --> (nrows, 1) + collike = arr[:, [1]] # shape --> (nrows, 1) + assert collike.shape == (df.shape[0], 1) + expected = pd.DataFrame([[2, 4], [4, 6], [6, 8]], @@ -137,6 +141,9 @@ def test_df_add_2d_array_collike_broadcasts(self): result = collike + df tm.assert_frame_equal(result, expected) + +class TestFrameFlexArithmetic(object): + def test_df_add_td64_columnwise(self): # GH#22534 Check that column-wise addition broadcasts correctly dti = pd.date_range('2016-01-01', periods=10) From 21ae42daa197447c018a3d7f321f52cb3bd06600 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 5 Oct 2018 09:24:34 -0700 Subject: [PATCH 07/13] add shape assertions --- pandas/tests/frame/test_arithmetic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 3a07f56aa10ce..7ebcaf3120d95 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -129,9 +129,9 @@ def test_df_add_2d_array_collike_broadcasts(self): collike = arr[:, [1]] # shape --> (nrows, 1) assert collike.shape == (df.shape[0], 1) - expected = pd.DataFrame([[2, 4], - [4, 6], - [6, 8]], + expected = pd.DataFrame([[1, 2], + [5, 6], + [9, 10]], columns=df.columns, index=df.index, # specify dtype explicitly to avoid failing # on 32bit builds From 34edbb4ab634d2ab13705f0a5e1ba352382dc8a5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 5 Oct 2018 10:15:56 -0700 Subject: [PATCH 08/13] fix screwup --- pandas/tests/frame/test_arithmetic.py | 86 +++++++++++++-------------- 1 file changed, 42 insertions(+), 44 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 7ebcaf3120d95..acd2f2ecc7fcf 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -98,50 +98,6 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): # ------------------------------------------------------------------- # Arithmetic -class TestFrameArithmetic(object): - # TODO: tests for other arithmetic ops - def test_df_add_2d_array_rowlike_broadcasts(self): - # GH# - arr = np.arange(6).reshape(3, 2) - df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) - - rowlike = arr[[1], :] # shape --> (1, ncols) - assert rowlike.shape == (1, df.shape[1]) - - expected = pd.DataFrame([[2, 4], - [4, 6], - [6, 8]], - columns=df.columns, index=df.index, - # specify dtype explicitly to avoid failing - # on 32bit builds - dtype=arr.dtype) - result = df + rowlike - tm.assert_frame_equal(result, expected) - result = rowlike + df - tm.assert_frame_equal(result, expected) - - # TODO: tests for other arithmetic ops - def test_df_add_2d_array_collike_broadcasts(self): - # GH# - arr = np.arange(6).reshape(3, 2) - df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) - - collike = arr[:, [1]] # shape --> (nrows, 1) - assert collike.shape == (df.shape[0], 1) - - expected = pd.DataFrame([[1, 2], - [5, 6], - [9, 10]], - columns=df.columns, index=df.index, - # specify dtype explicitly to avoid failing - # on 32bit builds - dtype=arr.dtype) - result = df + collike - tm.assert_frame_equal(result, expected) - result = collike + df - tm.assert_frame_equal(result, expected) - - class TestFrameFlexArithmetic(object): def test_df_add_td64_columnwise(self): @@ -297,6 +253,48 @@ def test_arith_flex_zero_len_raises(self): class TestFrameArithmetic(object): + # TODO: tests for other arithmetic ops + def test_df_add_2d_array_rowlike_broadcasts(self): + # GH# + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) + + rowlike = arr[[1], :] # shape --> (1, ncols) + assert rowlike.shape == (1, df.shape[1]) + + expected = pd.DataFrame([[2, 4], + [4, 6], + [6, 8]], + columns=df.columns, index=df.index, + # specify dtype explicitly to avoid failing + # on 32bit builds + dtype=arr.dtype) + result = df + rowlike + tm.assert_frame_equal(result, expected) + result = rowlike + df + tm.assert_frame_equal(result, expected) + + # TODO: tests for other arithmetic ops + def test_df_add_2d_array_collike_broadcasts(self): + # GH# + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) + + collike = arr[:, [1]] # shape --> (nrows, 1) + assert collike.shape == (df.shape[0], 1) + + expected = pd.DataFrame([[1, 2], + [5, 6], + [9, 10]], + columns=df.columns, index=df.index, + # specify dtype explicitly to avoid failing + # on 32bit builds + dtype=arr.dtype) + result = df + collike + tm.assert_frame_equal(result, expected) + result = collike + df + tm.assert_frame_equal(result, expected) + def test_df_bool_mul_int(self): # GH#22047, GH#22163 multiplication by 1 should result in int dtype, # not object dtype From b6211450a299cf3119bfc4558149f977c8c759fa Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 5 Oct 2018 10:49:34 -0700 Subject: [PATCH 09/13] Whatsnew entry --- doc/source/whatsnew/v0.24.0.txt | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f246ebad3aa2c..cf52c5a37a454 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -487,6 +487,36 @@ Previous Behavior: 0 NaT +.. _whatsnew_0240.api.dataframe_arithmetic_broadcasting: + +DataFrame Arithmetic Operations Broadcasting Changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:class:`DataFrame` arithmetic operations when operating with 2-dimensional +``np.ndarray`` objects now broadcast in the same way as ``np.ndarray``s +broadcast. (:issue:`23000`) + +Previous Behavior: + +.. code-block:: ipython + + In [3]: arr = np.arange(6).reshape(3, 2) + In [4]: df = pd.DataFrame(arr) + In [5]: df + arr[[0], :] # 1 row, 2 columns + ... + ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2) + In [6]: df + arr[:, [1]] # 1 column, 3 rows + ... + ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (3, 1) + +*Current Behavior*: + +.. ipython:: python + In [3]: arr = np.arange(6).reshape(3, 2) + In [4]: df = pd.DataFrame(arr) + In [5]: df + arr[[0], :] # 1 row, 2 columns + In [6]: df + arr[:, [1]] # 1 column, 3 rows + + .. _whatsnew_0240.api.extension: ExtensionType Changes From 23a96e5d74de78db21c7c84366b72002ec9d9c95 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 6 Oct 2018 08:45:19 -0700 Subject: [PATCH 10/13] suggested edits to whatsnew --- doc/source/whatsnew/v0.24.0.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index edbcb61b851ad..c2a859ab00fb1 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -512,10 +512,12 @@ Previous Behavior: *Current Behavior*: .. ipython:: python - In [3]: arr = np.arange(6).reshape(3, 2) - In [4]: df = pd.DataFrame(arr) - In [5]: df + arr[[0], :] # 1 row, 2 columns - In [6]: df + arr[:, [1]] # 1 column, 3 rows + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr) + df + + df + arr[[0], :] # 1 row, 2 columns + df + arr[:, [1]] # 1 column, 3 rows .. _whatsnew_0240.api.extension: From 6fca5f5b4e378a826a82c957e3febb19ed110396 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 6 Oct 2018 09:00:54 -0700 Subject: [PATCH 11/13] another ipython:: python line --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index c2a859ab00fb1..66f3d4bd1c4f3 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -516,6 +516,7 @@ Previous Behavior: df = pd.DataFrame(arr) df +.. ipython:: python df + arr[[0], :] # 1 row, 2 columns df + arr[:, [1]] # 1 column, 3 rows From 855e49ba1973a64b942d91eb4c44b41926586e05 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 6 Oct 2018 09:41:01 -0700 Subject: [PATCH 12/13] Add tests for all arithmetic ops --- pandas/core/ops.py | 1 - pandas/tests/frame/test_arithmetic.py | 57 +++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index d46a04fa124b7..20559bca9caed 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1814,7 +1814,6 @@ def to_series(right): right = left._constructor(right, index=left.index, columns=left.columns) - # TODO: Double-check this doesn't make copies elif right.shape[1] == left.shape[1] and right.shape[0] == 1: # Broadcast along rows diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index acd2f2ecc7fcf..b47f9b004c366 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -253,9 +253,8 @@ def test_arith_flex_zero_len_raises(self): class TestFrameArithmetic(object): - # TODO: tests for other arithmetic ops def test_df_add_2d_array_rowlike_broadcasts(self): - # GH# + # GH#23000 arr = np.arange(6).reshape(3, 2) df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) @@ -274,9 +273,8 @@ def test_df_add_2d_array_rowlike_broadcasts(self): result = rowlike + df tm.assert_frame_equal(result, expected) - # TODO: tests for other arithmetic ops def test_df_add_2d_array_collike_broadcasts(self): - # GH# + # GH#23000 arr = np.arange(6).reshape(3, 2) df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) @@ -295,6 +293,57 @@ def test_df_add_2d_array_collike_broadcasts(self): result = collike + df tm.assert_frame_equal(result, expected) + def test_df_arith_2d_array_rowlike_broadcasts(self, + all_arithmetic_operators): + # GH#23000 + opname = all_arithmetic_operators + + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) + + rowlike = arr[[1], :] # shape --> (1, ncols) + assert rowlike.shape == (1, df.shape[1]) + + exvals = [getattr(df.loc['A'], opname)(rowlike.squeeze()), + getattr(df.loc['B'], opname)(rowlike.squeeze()), + getattr(df.loc['C'], opname)(rowlike.squeeze())] + + expected = pd.DataFrame(exvals, columns=df.columns, index=df.index) + + if opname in ['__rmod__', '__rfloordiv__']: + # exvals will have dtypes [f8, i8, i8] so expected will be + # all-f8, but the DataFrame operation will return mixed dtypes + expected[False] = expected[False].astype('i8') + + result = getattr(df, opname)(rowlike) + tm.assert_frame_equal(result, expected) + + def test_df_arith_2d_array_collike_broadcasts(self, + all_arithmetic_operators): + # GH#23000 + opname = all_arithmetic_operators + + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr, columns=[True, False], index=['A', 'B', 'C']) + + collike = arr[:, [1]] # shape --> (nrows, 1) + assert collike.shape == (df.shape[0], 1) + + exvals = {True: getattr(df[True], opname)(collike.squeeze()), + False: getattr(df[False], opname)(collike.squeeze())} + + dtype = None + if opname in ['__rmod__', '__rfloordiv__']: + # Series ops may return mixed int/float dtypes in cases where + # DataFrame op will return all-float. So we upcast `expected` + dtype = np.common_type(*[x.values for x in exvals.values()]) + + expected = pd.DataFrame(exvals, columns=df.columns, index=df.index, + dtype=dtype) + + result = getattr(df, opname)(collike) + tm.assert_frame_equal(result, expected) + def test_df_bool_mul_int(self): # GH#22047, GH#22163 multiplication by 1 should result in int dtype, # not object dtype From 01ab2b505b40f00e7ef1a8f950b3f5234c8ea7a2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 6 Oct 2018 09:43:41 -0700 Subject: [PATCH 13/13] 32bit compat --- pandas/tests/frame/test_arithmetic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index b47f9b004c366..b97c5e4f7d7c2 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -313,7 +313,9 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, if opname in ['__rmod__', '__rfloordiv__']: # exvals will have dtypes [f8, i8, i8] so expected will be # all-f8, but the DataFrame operation will return mixed dtypes - expected[False] = expected[False].astype('i8') + # use exvals[-1].dtype instead of "i8" for compat with 32-bit + # systems/pythons + expected[False] = expected[False].astype(exvals[-1].dtype) result = getattr(df, opname)(rowlike) tm.assert_frame_equal(result, expected)