From 6baee91883622cd8a4f0f6da24d37bd9917e532c Mon Sep 17 00:00:00 2001 From: Richard Date: Sat, 5 Sep 2020 15:18:46 -0400 Subject: [PATCH 1/2] CLN: Separate transform tests --- pandas/tests/frame/apply/test_frame_apply.py | 49 +------------ .../tests/frame/apply/test_frame_transform.py | 72 +++++++++++++++++++ pandas/tests/frame/common.py | 26 +++++++ .../tests/series/apply/test_series_apply.py | 31 +------- .../series/apply/test_series_transform.py | 59 +++++++++++++++ 5 files changed, 159 insertions(+), 78 deletions(-) create mode 100644 pandas/tests/frame/apply/test_frame_transform.py create mode 100644 pandas/tests/series/apply/test_series_transform.py diff --git a/pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/apply/test_frame_apply.py index 5a1e448beb40f..bc09501583e2c 100644 --- a/pandas/tests/frame/apply/test_frame_apply.py +++ b/pandas/tests/frame/apply/test_frame_apply.py @@ -1,7 +1,6 @@ from collections import OrderedDict from datetime import datetime from itertools import chain -import operator import warnings import numpy as np @@ -14,6 +13,7 @@ import pandas._testing as tm from pandas.core.apply import frame_apply from pandas.core.base import SpecificationError +from pandas.tests.frame.common import zip_frames @pytest.fixture @@ -1058,25 +1058,6 @@ def test_consistency_for_boxed(self, box, int_frame_const_col): tm.assert_frame_equal(result, expected) -def zip_frames(frames, axis=1): - """ - take a list of frames, zip them together under the - assumption that these all have the first frames' index/columns. - - Returns - ------- - new_frame : DataFrame - """ - if axis == 1: - columns = frames[0].columns - zipped = [f.loc[:, c] for c in columns for f in frames] - return pd.concat(zipped, axis=1) - else: - index = frames[0].index - zipped = [f.loc[i, :] for i in index for f in frames] - return pd.DataFrame(zipped) - - class TestDataFrameAggregate: def test_agg_transform(self, axis, float_frame): other_axis = 1 if axis in {0, "index"} else 0 @@ -1087,16 +1068,10 @@ def test_agg_transform(self, axis, float_frame): f_sqrt = np.sqrt(float_frame) # ufunc - result = float_frame.transform(np.sqrt, axis=axis) expected = f_sqrt.copy() - tm.assert_frame_equal(result, expected) - result = float_frame.apply(np.sqrt, axis=axis) tm.assert_frame_equal(result, expected) - result = float_frame.transform(np.sqrt, axis=axis) - tm.assert_frame_equal(result, expected) - # list-like result = float_frame.apply([np.sqrt], axis=axis) expected = f_sqrt.copy() @@ -1110,9 +1085,6 @@ def test_agg_transform(self, axis, float_frame): ) tm.assert_frame_equal(result, expected) - result = float_frame.transform([np.sqrt], axis=axis) - tm.assert_frame_equal(result, expected) - # multiple items in list # these are in the order as if we are applying both # functions per series and then concatting @@ -1128,38 +1100,19 @@ def test_agg_transform(self, axis, float_frame): ) tm.assert_frame_equal(result, expected) - result = float_frame.transform([np.abs, "sqrt"], axis=axis) - tm.assert_frame_equal(result, expected) - def test_transform_and_agg_err(self, axis, float_frame): # cannot both transform and agg - msg = "transforms cannot produce aggregated results" - with pytest.raises(ValueError, match=msg): - float_frame.transform(["max", "min"], axis=axis) - msg = "cannot combine transform and aggregation operations" with pytest.raises(ValueError, match=msg): with np.errstate(all="ignore"): float_frame.agg(["max", "sqrt"], axis=axis) - with pytest.raises(ValueError, match=msg): - with np.errstate(all="ignore"): - float_frame.transform(["max", "sqrt"], axis=axis) - df = pd.DataFrame({"A": range(5), "B": 5}) def f(): with np.errstate(all="ignore"): df.agg({"A": ["abs", "sum"], "B": ["mean", "max"]}, axis=axis) - @pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"]) - def test_transform_method_name(self, method): - # GH 19760 - df = pd.DataFrame({"A": [-1, 2]}) - result = df.transform(method) - expected = operator.methodcaller(method)(df) - tm.assert_frame_equal(result, expected) - def test_demo(self): # demonstration tests df = pd.DataFrame({"A": range(5), "B": 5}) diff --git a/pandas/tests/frame/apply/test_frame_transform.py b/pandas/tests/frame/apply/test_frame_transform.py new file mode 100644 index 0000000000000..3a345215482ed --- /dev/null +++ b/pandas/tests/frame/apply/test_frame_transform.py @@ -0,0 +1,72 @@ +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.frame.common import zip_frames + + +def test_agg_transform(axis, float_frame): + other_axis = 1 if axis in {0, "index"} else 0 + + with np.errstate(all="ignore"): + + f_abs = np.abs(float_frame) + f_sqrt = np.sqrt(float_frame) + + # ufunc + result = float_frame.transform(np.sqrt, axis=axis) + expected = f_sqrt.copy() + tm.assert_frame_equal(result, expected) + + result = float_frame.transform(np.sqrt, axis=axis) + tm.assert_frame_equal(result, expected) + + # list-like + expected = f_sqrt.copy() + if axis in {0, "index"}: + expected.columns = pd.MultiIndex.from_product( + [float_frame.columns, ["sqrt"]] + ) + else: + expected.index = pd.MultiIndex.from_product([float_frame.index, ["sqrt"]]) + result = float_frame.transform([np.sqrt], axis=axis) + tm.assert_frame_equal(result, expected) + + # multiple items in list + # these are in the order as if we are applying both + # functions per series and then concatting + expected = zip_frames([f_abs, f_sqrt], axis=other_axis) + if axis in {0, "index"}: + expected.columns = pd.MultiIndex.from_product( + [float_frame.columns, ["absolute", "sqrt"]] + ) + else: + expected.index = pd.MultiIndex.from_product( + [float_frame.index, ["absolute", "sqrt"]] + ) + result = float_frame.transform([np.abs, "sqrt"], axis=axis) + tm.assert_frame_equal(result, expected) + + +def test_transform_and_agg_err(axis, float_frame): + # cannot both transform and agg + msg = "transforms cannot produce aggregated results" + with pytest.raises(ValueError, match=msg): + float_frame.transform(["max", "min"], axis=axis) + + msg = "cannot combine transform and aggregation operations" + with pytest.raises(ValueError, match=msg): + with np.errstate(all="ignore"): + float_frame.transform(["max", "sqrt"], axis=axis) + + +@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"]) +def test_transform_method_name(method): + # GH 19760 + df = pd.DataFrame({"A": [-1, 2]}) + result = df.transform(method) + expected = operator.methodcaller(method)(df) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index 463a140972ab5..51953313b3ca2 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -1,3 +1,10 @@ +from typing import List + +from pandas._typing import Axis + +from pandas import DataFrame, concat + + def _check_mixed_float(df, dtype=None): # float16 are most likely to be upcasted to float32 dtypes = dict(A="float32", B="float32", C="float16", D="float64") @@ -29,3 +36,22 @@ def _check_mixed_int(df, dtype=None): assert df.dtypes["C"] == dtypes["C"] if dtypes.get("D"): assert df.dtypes["D"] == dtypes["D"] + + +def zip_frames(frames: List[DataFrame], axis: int = 1) -> DataFrame: + """ + take a list of frames, zip them together under the + assumption that these all have the first frames' index/columns. + + Returns + ------- + new_frame : DataFrame + """ + if axis == 1: + columns = frames[0].columns + zipped = [f.loc[:, c] for c in columns for f in frames] + return concat(zipped, axis=1) + else: + index = frames[0].index + zipped = [f.loc[i, :] for i in index for f in frames] + return DataFrame(zipped) diff --git a/pandas/tests/series/apply/test_series_apply.py b/pandas/tests/series/apply/test_series_apply.py index 308398642895c..b948317f32062 100644 --- a/pandas/tests/series/apply/test_series_apply.py +++ b/pandas/tests/series/apply/test_series_apply.py @@ -209,25 +209,16 @@ def test_transform(self, string_series): f_abs = np.abs(string_series) # ufunc - result = string_series.transform(np.sqrt) expected = f_sqrt.copy() - tm.assert_series_equal(result, expected) - result = string_series.apply(np.sqrt) tm.assert_series_equal(result, expected) # list-like - result = string_series.transform([np.sqrt]) + result = string_series.apply([np.sqrt]) expected = f_sqrt.to_frame().copy() expected.columns = ["sqrt"] tm.assert_frame_equal(result, expected) - result = string_series.transform([np.sqrt]) - tm.assert_frame_equal(result, expected) - - result = string_series.transform(["sqrt"]) - tm.assert_frame_equal(result, expected) - # multiple items in list # these are in the order as if we are applying both functions per # series and then concatting @@ -236,10 +227,6 @@ def test_transform(self, string_series): result = string_series.apply([np.sqrt, np.abs]) tm.assert_frame_equal(result, expected) - result = string_series.transform(["sqrt", "abs"]) - expected.columns = ["sqrt", "abs"] - tm.assert_frame_equal(result, expected) - # dict, provide renaming expected = pd.concat([f_sqrt, f_abs], axis=1) expected.columns = ["foo", "bar"] @@ -250,19 +237,11 @@ def test_transform(self, string_series): def test_transform_and_agg_error(self, string_series): # we are trying to transform with an aggregator - msg = "transforms cannot produce aggregated results" - with pytest.raises(ValueError, match=msg): - string_series.transform(["min", "max"]) - msg = "cannot combine transform and aggregation" with pytest.raises(ValueError, match=msg): with np.errstate(all="ignore"): string_series.agg(["sqrt", "max"]) - with pytest.raises(ValueError, match=msg): - with np.errstate(all="ignore"): - string_series.transform(["sqrt", "max"]) - msg = "cannot perform both aggregation and transformation" with pytest.raises(ValueError, match=msg): with np.errstate(all="ignore"): @@ -463,14 +442,6 @@ def test_agg_cython_table_raises(self, series, func, expected): # e.g. Series('a b'.split()).cumprod() will raise series.agg(func) - def test_transform_none_to_type(self): - # GH34377 - df = pd.DataFrame({"a": [None]}) - - msg = "DataFrame constructor called with incompatible data and dtype" - with pytest.raises(TypeError, match=msg): - df.transform({"a": int}) - class TestSeriesMap: def test_map(self, datetime_series): diff --git a/pandas/tests/series/apply/test_series_transform.py b/pandas/tests/series/apply/test_series_transform.py new file mode 100644 index 0000000000000..8bc3d2dc4d0db --- /dev/null +++ b/pandas/tests/series/apply/test_series_transform.py @@ -0,0 +1,59 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_transform(string_series): + # transforming functions + + with np.errstate(all="ignore"): + f_sqrt = np.sqrt(string_series) + f_abs = np.abs(string_series) + + # ufunc + result = string_series.transform(np.sqrt) + expected = f_sqrt.copy() + tm.assert_series_equal(result, expected) + + # list-like + result = string_series.transform([np.sqrt]) + expected = f_sqrt.to_frame().copy() + expected.columns = ["sqrt"] + tm.assert_frame_equal(result, expected) + + result = string_series.transform([np.sqrt]) + tm.assert_frame_equal(result, expected) + + result = string_series.transform(["sqrt"]) + tm.assert_frame_equal(result, expected) + + # multiple items in list + # these are in the order as if we are applying both functions per + # series and then concatting + expected = pd.concat([f_sqrt, f_abs], axis=1) + result = string_series.transform(["sqrt", "abs"]) + expected.columns = ["sqrt", "abs"] + tm.assert_frame_equal(result, expected) + + +def test_transform_and_agg_error(string_series): + # we are trying to transform with an aggregator + msg = "transforms cannot produce aggregated results" + with pytest.raises(ValueError, match=msg): + string_series.transform(["min", "max"]) + + msg = "cannot combine transform and aggregation operations" + with pytest.raises(ValueError, match=msg): + with np.errstate(all="ignore"): + string_series.transform(["sqrt", "max"]) + + +def test_transform_none_to_type(): + # GH34377 + df = pd.DataFrame({"a": [None]}) + + msg = "DataFrame constructor called with incompatible data and dtype" + with pytest.raises(TypeError, match=msg): + df.transform({"a": int}) From 53c2ecf093dbdaee303bc51e27b2391dd038a2cc Mon Sep 17 00:00:00 2001 From: Richard Date: Sat, 5 Sep 2020 16:06:29 -0400 Subject: [PATCH 2/2] Removed unused import --- pandas/tests/frame/common.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index 51953313b3ca2..73e60ff389038 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -1,7 +1,5 @@ from typing import List -from pandas._typing import Axis - from pandas import DataFrame, concat