diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index cef389a6c4167..0653c9dc5f91b 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1495,112 +1495,6 @@ def test_sum_bools(self): bools = isna(df) assert bools.sum(axis=1)[0] == 10 - # --------------------------------------------------------------------- - # Cumulative Reductions - cumsum, cummax, ... - - def test_cumsum_corner(self): - dm = DataFrame(np.arange(20).reshape(4, 5), index=range(4), columns=range(5)) - # ?(wesm) - result = dm.cumsum() # noqa - - def test_cumsum(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cumsum = datetime_frame.cumsum() - expected = datetime_frame.apply(Series.cumsum) - tm.assert_frame_equal(cumsum, expected) - - # axis = 1 - cumsum = datetime_frame.cumsum(axis=1) - expected = datetime_frame.apply(Series.cumsum, axis=1) - tm.assert_frame_equal(cumsum, expected) - - # works - df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) - result = df.cumsum() # noqa - - # fix issue - cumsum_xs = datetime_frame.cumsum(axis=1) - assert np.shape(cumsum_xs) == np.shape(datetime_frame) - - def test_cumprod(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cumprod = datetime_frame.cumprod() - expected = datetime_frame.apply(Series.cumprod) - tm.assert_frame_equal(cumprod, expected) - - # axis = 1 - cumprod = datetime_frame.cumprod(axis=1) - expected = datetime_frame.apply(Series.cumprod, axis=1) - tm.assert_frame_equal(cumprod, expected) - - # fix issue - cumprod_xs = datetime_frame.cumprod(axis=1) - assert np.shape(cumprod_xs) == np.shape(datetime_frame) - - # ints - df = datetime_frame.fillna(0).astype(int) - df.cumprod(0) - df.cumprod(1) - - # ints32 - df = datetime_frame.fillna(0).astype(np.int32) - df.cumprod(0) - df.cumprod(1) - - def test_cummin(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cummin = datetime_frame.cummin() - expected = datetime_frame.apply(Series.cummin) - tm.assert_frame_equal(cummin, expected) - - # axis = 1 - cummin = datetime_frame.cummin(axis=1) - expected = datetime_frame.apply(Series.cummin, axis=1) - tm.assert_frame_equal(cummin, expected) - - # it works - df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) - result = df.cummin() # noqa - - # fix issue - cummin_xs = datetime_frame.cummin(axis=1) - assert np.shape(cummin_xs) == np.shape(datetime_frame) - - def test_cummax(self, datetime_frame): - datetime_frame.loc[5:10, 0] = np.nan - datetime_frame.loc[10:15, 1] = np.nan - datetime_frame.loc[15:, 2] = np.nan - - # axis = 0 - cummax = datetime_frame.cummax() - expected = datetime_frame.apply(Series.cummax) - tm.assert_frame_equal(cummax, expected) - - # axis = 1 - cummax = datetime_frame.cummax(axis=1) - expected = datetime_frame.apply(Series.cummax, axis=1) - tm.assert_frame_equal(cummax, expected) - - # it works - df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) - result = df.cummax() # noqa - - # fix issue - cummax_xs = datetime_frame.cummax(axis=1) - assert np.shape(cummax_xs) == np.shape(datetime_frame) - # --------------------------------------------------------------------- # Miscellanea diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py new file mode 100644 index 0000000000000..ad2cbff888b2e --- /dev/null +++ b/pandas/tests/frame/test_cumulative.py @@ -0,0 +1,120 @@ +""" +Tests for DataFrame cumulative operations + +See also +-------- +tests.series.test_cumulative +""" + +import numpy as np + +from pandas import DataFrame, Series +import pandas.util.testing as tm + + +class TestDataFrameCumulativeOps: + # --------------------------------------------------------------------- + # Cumulative Operations - cumsum, cummax, ... + + def test_cumsum_corner(self): + dm = DataFrame(np.arange(20).reshape(4, 5), index=range(4), columns=range(5)) + # TODO(wesm): do something with this? + result = dm.cumsum() # noqa + + def test_cumsum(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cumsum = datetime_frame.cumsum() + expected = datetime_frame.apply(Series.cumsum) + tm.assert_frame_equal(cumsum, expected) + + # axis = 1 + cumsum = datetime_frame.cumsum(axis=1) + expected = datetime_frame.apply(Series.cumsum, axis=1) + tm.assert_frame_equal(cumsum, expected) + + # works + df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) + df.cumsum() + + # fix issue + cumsum_xs = datetime_frame.cumsum(axis=1) + assert np.shape(cumsum_xs) == np.shape(datetime_frame) + + def test_cumprod(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cumprod = datetime_frame.cumprod() + expected = datetime_frame.apply(Series.cumprod) + tm.assert_frame_equal(cumprod, expected) + + # axis = 1 + cumprod = datetime_frame.cumprod(axis=1) + expected = datetime_frame.apply(Series.cumprod, axis=1) + tm.assert_frame_equal(cumprod, expected) + + # fix issue + cumprod_xs = datetime_frame.cumprod(axis=1) + assert np.shape(cumprod_xs) == np.shape(datetime_frame) + + # ints + df = datetime_frame.fillna(0).astype(int) + df.cumprod(0) + df.cumprod(1) + + # ints32 + df = datetime_frame.fillna(0).astype(np.int32) + df.cumprod(0) + df.cumprod(1) + + def test_cummin(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cummin = datetime_frame.cummin() + expected = datetime_frame.apply(Series.cummin) + tm.assert_frame_equal(cummin, expected) + + # axis = 1 + cummin = datetime_frame.cummin(axis=1) + expected = datetime_frame.apply(Series.cummin, axis=1) + tm.assert_frame_equal(cummin, expected) + + # it works + df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) + df.cummin() + + # fix issue + cummin_xs = datetime_frame.cummin(axis=1) + assert np.shape(cummin_xs) == np.shape(datetime_frame) + + def test_cummax(self, datetime_frame): + datetime_frame.loc[5:10, 0] = np.nan + datetime_frame.loc[10:15, 1] = np.nan + datetime_frame.loc[15:, 2] = np.nan + + # axis = 0 + cummax = datetime_frame.cummax() + expected = datetime_frame.apply(Series.cummax) + tm.assert_frame_equal(cummax, expected) + + # axis = 1 + cummax = datetime_frame.cummax(axis=1) + expected = datetime_frame.apply(Series.cummax, axis=1) + tm.assert_frame_equal(cummax, expected) + + # it works + df = DataFrame({"A": np.arange(20)}, index=np.arange(20)) + df.cummax() + + # fix issue + cummax_xs = datetime_frame.cummax(axis=1) + assert np.shape(cummax_xs) == np.shape(datetime_frame) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 0eb4e8a6cfdf3..148c376eba752 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -4,7 +4,6 @@ import numpy as np import pytest -from pandas.compat.numpy import _np_version_under1p18 import pandas.util._test_decorators as td import pandas as pd @@ -125,116 +124,6 @@ def test_argsort_stable(self): with pytest.raises(AssertionError, match=msg): tm.assert_numpy_array_equal(qindexer, mindexer) - def test_cumsum(self, datetime_series): - self._check_accum_op("cumsum", datetime_series) - - def test_cumprod(self, datetime_series): - self._check_accum_op("cumprod", datetime_series) - - def test_cummin(self, datetime_series): - tm.assert_numpy_array_equal( - datetime_series.cummin().values, - np.minimum.accumulate(np.array(datetime_series)), - ) - ts = datetime_series.copy() - ts[::2] = np.NaN - result = ts.cummin()[1::2] - expected = np.minimum.accumulate(ts.dropna()) - - tm.assert_series_equal(result, expected) - - def test_cummax(self, datetime_series): - tm.assert_numpy_array_equal( - datetime_series.cummax().values, - np.maximum.accumulate(np.array(datetime_series)), - ) - ts = datetime_series.copy() - ts[::2] = np.NaN - result = ts.cummax()[1::2] - expected = np.maximum.accumulate(ts.dropna()) - - tm.assert_series_equal(result, expected) - - @pytest.mark.xfail( - not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" - ) - def test_cummin_datetime64(self): - s = pd.Series( - pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]) - ) - - expected = pd.Series( - pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-1"]) - ) - result = s.cummin(skipna=True) - tm.assert_series_equal(expected, result) - - expected = pd.Series( - pd.to_datetime( - ["NaT", "2000-1-2", "2000-1-2", "2000-1-1", "2000-1-1", "2000-1-1"] - ) - ) - result = s.cummin(skipna=False) - tm.assert_series_equal(expected, result) - - @pytest.mark.xfail( - not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" - ) - def test_cummax_datetime64(self): - s = pd.Series( - pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]) - ) - - expected = pd.Series( - pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-2", "NaT", "2000-1-3"]) - ) - result = s.cummax(skipna=True) - tm.assert_series_equal(expected, result) - - expected = pd.Series( - pd.to_datetime( - ["NaT", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-3"] - ) - ) - result = s.cummax(skipna=False) - tm.assert_series_equal(expected, result) - - @pytest.mark.xfail( - not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" - ) - def test_cummin_timedelta64(self): - s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) - - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "1 min"]) - ) - result = s.cummin(skipna=True) - tm.assert_series_equal(expected, result) - - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "2 min", "1 min", "1 min", "1 min"]) - ) - result = s.cummin(skipna=False) - tm.assert_series_equal(expected, result) - - @pytest.mark.xfail( - not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" - ) - def test_cummax_timedelta64(self): - s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) - - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "NaT", "2 min", "NaT", "3 min"]) - ) - result = s.cummax(skipna=True) - tm.assert_series_equal(expected, result) - - expected = pd.Series( - pd.to_timedelta(["NaT", "2 min", "2 min", "2 min", "2 min", "3 min"]) - ) - result = s.cummax(skipna=False) - tm.assert_series_equal(expected, result) - def test_np_diff(self): pytest.skip("skipping due to Series no longer being an ndarray") diff --git a/pandas/tests/series/test_cumulative.py b/pandas/tests/series/test_cumulative.py new file mode 100644 index 0000000000000..a31cc9d968f3a --- /dev/null +++ b/pandas/tests/series/test_cumulative.py @@ -0,0 +1,142 @@ +""" +Tests for Series cumulative operations. + +See also +-------- +tests.frame.test_cumulative +""" +import numpy as np +import pytest + +from pandas.compat.numpy import _np_version_under1p18 + +import pandas as pd +import pandas.util.testing as tm + + +def _check_accum_op(name, series, check_dtype=True): + func = getattr(np, name) + tm.assert_numpy_array_equal( + func(series).values, func(np.array(series)), check_dtype=check_dtype, + ) + + # with missing values + ts = series.copy() + ts[::2] = np.NaN + + result = func(ts)[1::2] + expected = func(np.array(ts.dropna())) + + tm.assert_numpy_array_equal(result.values, expected, check_dtype=False) + + +class TestSeriesCumulativeOps: + def test_cumsum(self, datetime_series): + _check_accum_op("cumsum", datetime_series) + + def test_cumprod(self, datetime_series): + _check_accum_op("cumprod", datetime_series) + + def test_cummin(self, datetime_series): + tm.assert_numpy_array_equal( + datetime_series.cummin().values, + np.minimum.accumulate(np.array(datetime_series)), + ) + ts = datetime_series.copy() + ts[::2] = np.NaN + result = ts.cummin()[1::2] + expected = np.minimum.accumulate(ts.dropna()) + + tm.assert_series_equal(result, expected) + + def test_cummax(self, datetime_series): + tm.assert_numpy_array_equal( + datetime_series.cummax().values, + np.maximum.accumulate(np.array(datetime_series)), + ) + ts = datetime_series.copy() + ts[::2] = np.NaN + result = ts.cummax()[1::2] + expected = np.maximum.accumulate(ts.dropna()) + + tm.assert_series_equal(result, expected) + + @pytest.mark.xfail( + not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" + ) + def test_cummin_datetime64(self): + s = pd.Series( + pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]) + ) + + expected = pd.Series( + pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-1"]) + ) + result = s.cummin(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_datetime( + ["NaT", "2000-1-2", "2000-1-2", "2000-1-1", "2000-1-1", "2000-1-1"] + ) + ) + result = s.cummin(skipna=False) + tm.assert_series_equal(expected, result) + + @pytest.mark.xfail( + not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" + ) + def test_cummax_datetime64(self): + s = pd.Series( + pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"]) + ) + + expected = pd.Series( + pd.to_datetime(["NaT", "2000-1-2", "NaT", "2000-1-2", "NaT", "2000-1-3"]) + ) + result = s.cummax(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_datetime( + ["NaT", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-3"] + ) + ) + result = s.cummax(skipna=False) + tm.assert_series_equal(expected, result) + + @pytest.mark.xfail( + not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" + ) + def test_cummin_timedelta64(self): + s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "1 min"]) + ) + result = s.cummin(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "2 min", "1 min", "1 min", "1 min"]) + ) + result = s.cummin(skipna=False) + tm.assert_series_equal(expected, result) + + @pytest.mark.xfail( + not _np_version_under1p18, reason="numpy 1.18 changed min/max behavior for NaT" + ) + def test_cummax_timedelta64(self): + s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "NaT", "2 min", "NaT", "3 min"]) + ) + result = s.cummax(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "2 min", "2 min", "2 min", "3 min"]) + ) + result = s.cummax(skipna=False) + tm.assert_series_equal(expected, result)