From 4489214940f03c017805b76c2150e40af3f487ad Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 11 Feb 2021 13:27:38 +0100 Subject: [PATCH 1/5] [ArrayManager] TST: arithmetic test --- .github/workflows/ci.yml | 1 + pandas/_testing/__init__.py | 4 ++- pandas/tests/arithmetic/test_datetime64.py | 40 ++++++++------------- pandas/tests/arithmetic/test_numeric.py | 8 +++-- pandas/tests/arithmetic/test_timedelta64.py | 15 ++++++-- 5 files changed, 37 insertions(+), 31 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b551e7ded0178..a7a171e2fb4b0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -157,3 +157,4 @@ jobs: run: | source activate pandas-dev pytest pandas/tests/frame/methods --array-manager + pytest pandas/tests/arithmetic/ --array-manager diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 0b2be53131af6..0e42e1ff22cee 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -207,8 +207,10 @@ def box_expected(expected, box_cls, transpose=True): if transpose: # for vector operations, we need a DataFrame to be a single-row, # not a single-column, in order to operate against non-DataFrame - # vectors of the same length. + # vectors of the same length. But convert to two rows to avoid + # single-row special cases in datetime arithmetic expected = expected.T + expected = pd.concat([expected] * 2, ignore_index=True) elif box_cls is PeriodArray: # the PeriodArray constructor is not as flexible as period_array expected = period_array(expected) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index b2d88b3556388..538b23507f74f 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -318,40 +318,40 @@ def test_dt64arr_timestamp_equality(self, box_with_array): box_with_array if box_with_array not in [pd.Index, pd.array] else np.ndarray ) - ser = Series([Timestamp("2000-01-29 01:59:00"), "NaT"]) + ser = Series([Timestamp("2000-01-29 01:59:00"), Timestamp("2000-01-30"), "NaT"]) ser = tm.box_expected(ser, box_with_array) result = ser != ser - expected = tm.box_expected([False, True], xbox) + expected = tm.box_expected([False, False, True], xbox) tm.assert_equal(result, expected) warn = FutureWarning if box_with_array is pd.DataFrame else None with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated result = ser != ser[0] - expected = tm.box_expected([False, True], xbox) + expected = tm.box_expected([False, True, True], xbox) tm.assert_equal(result, expected) with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated - result = ser != ser[1] - expected = tm.box_expected([True, True], xbox) + result = ser != ser[2] + expected = tm.box_expected([True, True, True], xbox) tm.assert_equal(result, expected) result = ser == ser - expected = tm.box_expected([True, False], xbox) + expected = tm.box_expected([True, True, False], xbox) tm.assert_equal(result, expected) with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated result = ser == ser[0] - expected = tm.box_expected([True, False], xbox) + expected = tm.box_expected([True, False, False], xbox) tm.assert_equal(result, expected) with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated - result = ser == ser[1] - expected = tm.box_expected([False, False], xbox) + result = ser == ser[2] + expected = tm.box_expected([False, False, False], xbox) tm.assert_equal(result, expected) @@ -1010,10 +1010,7 @@ def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture): obj = tm.box_expected(dti, box_with_array) expected = tm.box_expected(expected, box_with_array) - warn = None - if box_with_array is not pd.DataFrame or tz_naive_fixture is None: - warn = PerformanceWarning - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): result = obj - obj.astype(object) tm.assert_equal(result, expected) @@ -1276,7 +1273,7 @@ def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array): ] ) vec = tm.box_expected(vec, box_with_array) - vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec + vec_items = vec.iloc[0] if box_with_array is pd.DataFrame else vec # DateOffset relativedelta fastpath relative_kwargs = [ @@ -1401,7 +1398,7 @@ def test_dt64arr_add_sub_DateOffsets( ] ) vec = tm.box_expected(vec, box_with_array) - vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec + vec_items = vec.iloc[0] if box_with_array is pd.DataFrame else vec offset_cls = getattr(pd.offsets, cls_name) @@ -1515,10 +1512,7 @@ def test_dt64arr_add_sub_offset_array( if box_other: other = tm.box_expected(other, box_with_array) - warn = PerformanceWarning - if box_with_array is pd.DataFrame and tz is not None: - warn = None - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): res = op(dtarr, other) tm.assert_equal(res, expected) @@ -2459,18 +2453,14 @@ def test_dti_addsub_object_arraylike( expected = DatetimeIndex(["2017-01-31", "2017-01-06"], tz=tz_naive_fixture) expected = tm.box_expected(expected, xbox) - warn = PerformanceWarning - if box_with_array is pd.DataFrame and tz is not None: - warn = None - - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): result = dtarr + other tm.assert_equal(result, expected) expected = DatetimeIndex(["2016-12-31", "2016-12-29"], tz=tz_naive_fixture) expected = tm.box_expected(expected, xbox) - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): result = dtarr - other tm.assert_equal(result, expected) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index f4f258b559939..a1d96e602b8a8 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -532,13 +532,15 @@ def test_df_div_zero_series_does_not_commute(self): # ------------------------------------------------------------------ # Mod By Zero - def test_df_mod_zero_df(self): + def test_df_mod_zero_df(self, using_array_manager): # GH#3590, modulo as ints df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) # this is technically wrong, as the integer portion is coerced to float - # ### - first = Series([0, 0, 0, 0], dtype="float64") + first = Series([0, 0, 0, 0]) + if not using_array_manager: + # BlockManager doesn't preserve dtype per column if possible + first = first.astype("float64") second = Series([np.nan, np.nan, np.nan, 0]) expected = pd.DataFrame({"first": first, "second": second}) result = df % df diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 740ec3be4a1c6..f2848968aff7b 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1748,7 +1748,9 @@ def test_tdarr_div_length_mismatch(self, box_with_array): # ------------------------------------------------------------------ # __floordiv__, __rfloordiv__ - def test_td64arr_floordiv_td64arr_with_nat(self, box_with_array): + def test_td64arr_floordiv_td64arr_with_nat( + self, box_with_array, using_array_manager + ): # GH#35529 box = box_with_array xbox = np.ndarray if box is pd.array else box @@ -1761,6 +1763,8 @@ def test_td64arr_floordiv_td64arr_with_nat(self, box_with_array): expected = np.array([1.0, 1.0, np.nan], dtype=np.float64) expected = tm.box_expected(expected, xbox) + if box is DataFrame and using_array_manager: + expected[[0, 1]] = expected[[0, 1]].astype("int64") result = left // right @@ -2040,7 +2044,9 @@ def test_td64arr_rmul_numeric_array(self, box_with_array, vector, any_real_dtype [np.array([20, 30, 40]), pd.Index([20, 30, 40]), Series([20, 30, 40])], ids=lambda x: type(x).__name__, ) - def test_td64arr_div_numeric_array(self, box_with_array, vector, any_real_dtype): + def test_td64arr_div_numeric_array( + self, box_with_array, vector, any_real_dtype, using_array_manager + ): # GH#4521 # divide/multiply by integers xbox = get_upcast_box(box_with_array, vector) @@ -2071,6 +2077,11 @@ def test_td64arr_div_numeric_array(self, box_with_array, vector, any_real_dtype) result = tdser / vector.astype(object) if box_with_array is pd.DataFrame: expected = [tdser.iloc[0, n] / vector[n] for n in range(len(vector))] + if using_array_manager: + # https://github.com/pandas-dev/pandas/issues/39750 + # third column with all-NaT as result doesn't get preserved + # as timedelta64 dtype + result[2] = pd.array(["NaT", "NaT"], dtype="timedelta64[ns]") else: expected = [tdser[n] / vector[n] for n in range(len(tdser))] expected = pd.Index(expected) # do dtype inference From f6671a4d1d857c5beea55dc7eb02dbed4755aaa0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 11 Feb 2021 20:20:45 +0100 Subject: [PATCH 2/5] adapt expected instead --- pandas/tests/arithmetic/test_datetime64.py | 18 +++++++++--------- pandas/tests/arithmetic/test_timedelta64.py | 12 +++++++----- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 538b23507f74f..fecfd40837116 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -318,40 +318,40 @@ def test_dt64arr_timestamp_equality(self, box_with_array): box_with_array if box_with_array not in [pd.Index, pd.array] else np.ndarray ) - ser = Series([Timestamp("2000-01-29 01:59:00"), Timestamp("2000-01-30"), "NaT"]) + ser = Series([Timestamp("2000-01-29 01:59:00"), "NaT"]) ser = tm.box_expected(ser, box_with_array) result = ser != ser - expected = tm.box_expected([False, False, True], xbox) + expected = tm.box_expected([False, True], xbox) tm.assert_equal(result, expected) warn = FutureWarning if box_with_array is pd.DataFrame else None with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated result = ser != ser[0] - expected = tm.box_expected([False, True, True], xbox) + expected = tm.box_expected([False, True], xbox) tm.assert_equal(result, expected) with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated - result = ser != ser[2] - expected = tm.box_expected([True, True, True], xbox) + result = ser != ser[1] + expected = tm.box_expected([True, True], xbox) tm.assert_equal(result, expected) result = ser == ser - expected = tm.box_expected([True, True, False], xbox) + expected = tm.box_expected([True, False], xbox) tm.assert_equal(result, expected) with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated result = ser == ser[0] - expected = tm.box_expected([True, False, False], xbox) + expected = tm.box_expected([True, False], xbox) tm.assert_equal(result, expected) with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated - result = ser == ser[2] - expected = tm.box_expected([False, False, False], xbox) + result = ser == ser[1] + expected = tm.box_expected([False, False], xbox) tm.assert_equal(result, expected) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index f2848968aff7b..0db51bd483f90 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -2077,15 +2077,17 @@ def test_td64arr_div_numeric_array( result = tdser / vector.astype(object) if box_with_array is pd.DataFrame: expected = [tdser.iloc[0, n] / vector[n] for n in range(len(vector))] - if using_array_manager: - # https://github.com/pandas-dev/pandas/issues/39750 - # third column with all-NaT as result doesn't get preserved - # as timedelta64 dtype - result[2] = pd.array(["NaT", "NaT"], dtype="timedelta64[ns]") else: expected = [tdser[n] / vector[n] for n in range(len(tdser))] expected = pd.Index(expected) # do dtype inference expected = tm.box_expected(expected, xbox) + + if using_array_manager and box_with_array is pd.DataFrame: + # https://github.com/pandas-dev/pandas/issues/39750 + # third column with all-NaT as result doesn't get preserved + # as timedelta64 dtype + expected[2] = Series([pd.NaT, pd.NaT], dtype=object) + tm.assert_equal(result, expected) with pytest.raises(TypeError, match=pattern): From f30d9177cda573caee8dc1b0e8dd001fbe080070 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 15 Feb 2021 12:08:21 +0100 Subject: [PATCH 3/5] update comment --- pandas/tests/arithmetic/test_timedelta64.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 0db51bd483f90..4b1115877c040 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -2083,9 +2083,11 @@ def test_td64arr_div_numeric_array( expected = tm.box_expected(expected, xbox) if using_array_manager and box_with_array is pd.DataFrame: - # https://github.com/pandas-dev/pandas/issues/39750 - # third column with all-NaT as result doesn't get preserved - # as timedelta64 dtype + # TODO the behaviour is buggy here (third column with all-NaT + # as result doesn't get preserved as timedelta64 dtype). + # Reported at https://github.com/pandas-dev/pandas/issues/39750 + # Changing the expected instead of xfailing to continue to test + # the correct behaviour for the other columns expected[2] = Series([pd.NaT, pd.NaT], dtype=object) tm.assert_equal(result, expected) From 8fa1e5964acc94366b15eab2aa0bf89198a7b73d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 15 Feb 2021 12:12:52 +0100 Subject: [PATCH 4/5] fixup changes to test_dt64arr_timestamp_equality (accidentally reverted) --- pandas/tests/arithmetic/test_datetime64.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index fecfd40837116..538b23507f74f 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -318,40 +318,40 @@ def test_dt64arr_timestamp_equality(self, box_with_array): box_with_array if box_with_array not in [pd.Index, pd.array] else np.ndarray ) - ser = Series([Timestamp("2000-01-29 01:59:00"), "NaT"]) + ser = Series([Timestamp("2000-01-29 01:59:00"), Timestamp("2000-01-30"), "NaT"]) ser = tm.box_expected(ser, box_with_array) result = ser != ser - expected = tm.box_expected([False, True], xbox) + expected = tm.box_expected([False, False, True], xbox) tm.assert_equal(result, expected) warn = FutureWarning if box_with_array is pd.DataFrame else None with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated result = ser != ser[0] - expected = tm.box_expected([False, True], xbox) + expected = tm.box_expected([False, True, True], xbox) tm.assert_equal(result, expected) with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated - result = ser != ser[1] - expected = tm.box_expected([True, True], xbox) + result = ser != ser[2] + expected = tm.box_expected([True, True, True], xbox) tm.assert_equal(result, expected) result = ser == ser - expected = tm.box_expected([True, False], xbox) + expected = tm.box_expected([True, True, False], xbox) tm.assert_equal(result, expected) with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated result = ser == ser[0] - expected = tm.box_expected([True, False], xbox) + expected = tm.box_expected([True, False, False], xbox) tm.assert_equal(result, expected) with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated - result = ser == ser[1] - expected = tm.box_expected([False, False], xbox) + result = ser == ser[2] + expected = tm.box_expected([False, False, False], xbox) tm.assert_equal(result, expected) From fbffcadf902af751cffcf13c51a784e992125acf Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 15 Feb 2021 20:13:49 +0100 Subject: [PATCH 5/5] add INFO(ArrayManager) comments --- pandas/tests/arithmetic/test_numeric.py | 4 +++- pandas/tests/arithmetic/test_timedelta64.py | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index a1d96e602b8a8..9c01a6a4a524c 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -539,7 +539,9 @@ def test_df_mod_zero_df(self, using_array_manager): # this is technically wrong, as the integer portion is coerced to float first = Series([0, 0, 0, 0]) if not using_array_manager: - # BlockManager doesn't preserve dtype per column if possible + # INFO(ArrayManager) BlockManager doesn't preserve dtype per column + # while ArrayManager performs op column-wisedoes and thus preserves + # dtype if possible first = first.astype("float64") second = Series([np.nan, np.nan, np.nan, 0]) expected = pd.DataFrame({"first": first, "second": second}) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 4b1115877c040..57b8980e568d8 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1764,6 +1764,9 @@ def test_td64arr_floordiv_td64arr_with_nat( expected = np.array([1.0, 1.0, np.nan], dtype=np.float64) expected = tm.box_expected(expected, xbox) if box is DataFrame and using_array_manager: + # INFO(ArrayManager) floorfiv returns integer, and ArrayManager + # performs ops column-wise and thus preserves int64 dtype for + # columns without missing values expected[[0, 1]] = expected[[0, 1]].astype("int64") result = left // right