From c32a44178d090d2d853016fcd07e524ad7a3ced2 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 3 Jul 2020 15:33:41 +0100 Subject: [PATCH 1/5] Revert "REF: move mixed-dtype frame_apply check outside of _reduce try/except (#32950)" This reverts commit b8385083b2e6d98638fe7e9ea25ad5065fb97c26. --- pandas/core/frame.py | 49 ++++++++++++---------------- pandas/tests/frame/test_analytics.py | 4 +-- pandas/tests/frame/test_missing.py | 8 ++--- 3 files changed, 23 insertions(+), 38 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b6993e9ed851a..e51078a393556 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8539,43 +8539,35 @@ def blk_func(values): out[:] = coerce_to_dtypes(out.values, df.dtypes) return out - if not self._is_homogeneous_type: - # try to avoid self.values call - - if filter_type is None and axis == 0 and len(self) > 0: - # operate column-wise - - # numeric_only must be None here, as other cases caught above - # require len(self) > 0 bc frame_apply messes up empty prod/sum - - # this can end up with a non-reduction - # but not always. if the types are mixed - # with datelike then need to make sure a series - - # we only end up here if we have not specified - # numeric_only and yet we have tried a - # column-by-column reduction, where we have mixed type. - # So let's just do what we can - from pandas.core.apply import frame_apply - - opa = frame_apply( - self, func=f, result_type="expand", ignore_failures=True - ) - result = opa.get_result() - if result.ndim == self.ndim: - result = result.iloc[0].rename(None) - return result - if numeric_only is None: data = self values = data.values - try: result = f(values) except TypeError: # e.g. in nanops trying to convert strs to float + # try by-column first + if filter_type is None and axis == 0: + # this can end up with a non-reduction + # but not always. if the types are mixed + # with datelike then need to make sure a series + + # we only end up here if we have not specified + # numeric_only and yet we have tried a + # column-by-column reduction, where we have mixed type. + # So let's just do what we can + from pandas.core.apply import frame_apply + + opa = frame_apply( + self, func=f, result_type="expand", ignore_failures=True + ) + result = opa.get_result() + if result.ndim == self.ndim: + result = result.iloc[0] + return result + # TODO: why doesnt axis matter here? data = _get_data(axis_matters=False) labels = data._get_agg_axis(axis) @@ -8583,7 +8575,6 @@ def blk_func(values): values = data.values with np.errstate(all="ignore"): result = f(values) - else: if numeric_only: data = _get_data(axis_matters=True) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index db8bb5ca3c437..e9af883a717b5 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -351,9 +351,7 @@ def kurt(x): "sum", np.sum, float_frame_with_na, skipna_alternative=np.nansum ) assert_stat_op_calc("mean", np.mean, float_frame_with_na, check_dates=True) - assert_stat_op_calc( - "product", np.prod, float_frame_with_na, skipna_alternative=np.nanprod - ) + assert_stat_op_calc("product", np.prod, float_frame_with_na) assert_stat_op_calc("mad", mad, float_frame_with_na) assert_stat_op_calc("var", var, float_frame_with_na) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 7cb7115276f71..e4de749c5f5c5 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -372,12 +372,8 @@ def test_fillna_categorical_nan(self): cat = Categorical([np.nan, 2, np.nan]) val = Categorical([np.nan, np.nan, np.nan]) df = DataFrame({"cats": cat, "vals": val}) - - # GH#32950 df.median() is poorly behaved because there is no - # Categorical.median - median = Series({"cats": 2.0, "vals": np.nan}) - - res = df.fillna(median) + with tm.assert_produces_warning(RuntimeWarning): + res = df.fillna(df.median()) v_exp = [np.nan, np.nan, np.nan] df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category") tm.assert_frame_equal(res, df_exp) From 64b55d87f51dbf77b41ab7e9151ac9fbe61d0bcc Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 3 Jul 2020 15:49:14 +0100 Subject: [PATCH 2/5] add test --- pandas/tests/arrays/integer/test_function.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py index 44c3077228e80..3b486117f93df 100644 --- a/pandas/tests/arrays/integer/test_function.py +++ b/pandas/tests/arrays/integer/test_function.py @@ -133,6 +133,15 @@ def test_integer_array_numpy_sum(values, expected): assert result == expected +def test_mixed_frame_with_integer_sum(): + # https://github.com/pandas-dev/pandas/issues/34520 + df = pd.DataFrame([["a", 1]], columns=list("ab")) + df.astype({"b": "Int64"}) + result = df.sum() + expected = pd.Series(["a", 1], index=["a", "b"]) + tm.assert_series_equal(result, expected) + + # TODO(jreback) - these need testing / are broken # shift From 851029574e0c5d7083ad8153bc7da428986032d0 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 3 Jul 2020 20:39:27 +0100 Subject: [PATCH 3/5] workaround Numpy issue --- pandas/_libs/tslibs/timedeltas.pyx | 4 +++- pandas/_libs/tslibs/timestamps.pyx | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 2862e62e3d522..01cb22117587d 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -547,7 +547,9 @@ def _binary_op_method_timedeltalike(op, name): try: other = Timedelta(other) - except ValueError: + except (ValueError, SystemError): + # catch SystemError to workaround NumPy issue + # https://github.com/numpy/numpy/issues/15502 # failed to parse as timedelta return NotImplemented diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e104b722ea119..a33732427337d 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -203,10 +203,10 @@ def integer_op_not_supported(obj): # GH#30886 using an fstring raises SystemError int_addsub_msg = ( - f"Addition/subtraction of integers and integer-arrays with {cls} is " + "Addition/subtraction of integers and integer-arrays with {cls} is " "no longer supported. Instead of adding/subtracting `n`, " "use `n * obj.freq`" - ) + ).format(cls=cls) return TypeError(int_addsub_msg) From cf9c3fb89b278b1ccf13343d706d4b53a8f05dc4 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 3 Jul 2020 20:44:10 +0100 Subject: [PATCH 4/5] undo changes to test_fillna_categorical_nan --- pandas/tests/frame/test_missing.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index e4de749c5f5c5..7cb7115276f71 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -372,8 +372,12 @@ def test_fillna_categorical_nan(self): cat = Categorical([np.nan, 2, np.nan]) val = Categorical([np.nan, np.nan, np.nan]) df = DataFrame({"cats": cat, "vals": val}) - with tm.assert_produces_warning(RuntimeWarning): - res = df.fillna(df.median()) + + # GH#32950 df.median() is poorly behaved because there is no + # Categorical.median + median = Series({"cats": 2.0, "vals": np.nan}) + + res = df.fillna(median) v_exp = [np.nan, np.nan, np.nan] df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category") tm.assert_frame_equal(res, df_exp) From 6b283dfece4291e79fd51c8ed7158bfab9d9c7d6 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 10 Jul 2020 09:54:29 +0100 Subject: [PATCH 5/5] fixup test --- pandas/tests/arrays/integer/test_function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py index 3b486117f93df..c584fb9f704b8 100644 --- a/pandas/tests/arrays/integer/test_function.py +++ b/pandas/tests/arrays/integer/test_function.py @@ -136,7 +136,7 @@ def test_integer_array_numpy_sum(values, expected): def test_mixed_frame_with_integer_sum(): # https://github.com/pandas-dev/pandas/issues/34520 df = pd.DataFrame([["a", 1]], columns=list("ab")) - df.astype({"b": "Int64"}) + df = df.astype({"b": "Int64"}) result = df.sum() expected = pd.Series(["a", 1], index=["a", "b"]) tm.assert_series_equal(result, expected)