From d4f8e526cec0b07329d0049e3953045da36dd508 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 Dec 2019 13:07:46 -0800 Subject: [PATCH 1/3] More frame method tests --- pandas/tests/frame/methods/test_count.py | 0 pandas/tests/frame/methods/test_cov_corr.py | 289 ++++++++++++ pandas/tests/frame/methods/test_round.py | 217 +++++++++ pandas/tests/frame/test_analytics.py | 486 +------------------- 4 files changed, 509 insertions(+), 483 deletions(-) create mode 100644 pandas/tests/frame/methods/test_count.py create mode 100644 pandas/tests/frame/methods/test_cov_corr.py create mode 100644 pandas/tests/frame/methods/test_round.py diff --git a/pandas/tests/frame/methods/test_count.py b/pandas/tests/frame/methods/test_count.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py new file mode 100644 index 0000000000000..04bc87a243a9b --- /dev/null +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -0,0 +1,289 @@ +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, Series, isna +import pandas.util.testing as tm + + +class TestDataFrameCov: + def test_cov(self, float_frame, float_string_frame): + # min_periods no NAs (corner case) + expected = float_frame.cov() + result = float_frame.cov(min_periods=len(float_frame)) + + tm.assert_frame_equal(expected, result) + + result = float_frame.cov(min_periods=len(float_frame) + 1) + assert isna(result.values).all() + + # with NAs + frame = float_frame.copy() + frame["A"][:5] = np.nan + frame["B"][5:10] = np.nan + result = float_frame.cov(min_periods=len(float_frame) - 8) + expected = float_frame.cov() + expected.loc["A", "B"] = np.nan + expected.loc["B", "A"] = np.nan + + # regular + float_frame["A"][:5] = np.nan + float_frame["B"][:10] = np.nan + cov = float_frame.cov() + + tm.assert_almost_equal(cov["A"]["C"], float_frame["A"].cov(float_frame["C"])) + + # exclude non-numeric types + result = float_string_frame.cov() + expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].cov() + tm.assert_frame_equal(result, expected) + + # Single column frame + df = DataFrame(np.linspace(0.0, 1.0, 10)) + result = df.cov() + expected = DataFrame( + np.cov(df.values.T).reshape((1, 1)), index=df.columns, columns=df.columns + ) + tm.assert_frame_equal(result, expected) + df.loc[0] = np.nan + result = df.cov() + expected = DataFrame( + np.cov(df.values[1:].T).reshape((1, 1)), + index=df.columns, + columns=df.columns, + ) + tm.assert_frame_equal(result, expected) + + +class TestDataFrameCorr: + # DataFrame.corr(), as opposed to DataFrame.corrwith + + @staticmethod + def _check_method(frame, method="pearson"): + correls = frame.corr(method=method) + expected = frame["A"].corr(frame["C"], method=method) + tm.assert_almost_equal(correls["A"]["C"], expected) + + @td.skip_if_no_scipy + def test_corr_pearson(self, float_frame): + float_frame["A"][:5] = np.nan + float_frame["B"][5:10] = np.nan + + self._check_method(float_frame, "pearson") + + @td.skip_if_no_scipy + def test_corr_kendall(self, float_frame): + float_frame["A"][:5] = np.nan + float_frame["B"][5:10] = np.nan + + self._check_method(float_frame, "kendall") + + @td.skip_if_no_scipy + def test_corr_spearman(self, float_frame): + float_frame["A"][:5] = np.nan + float_frame["B"][5:10] = np.nan + + self._check_method(float_frame, "spearman") + + # --------------------------------------------------------------------- + + @td.skip_if_no_scipy + def test_corr_non_numeric(self, float_frame, float_string_frame): + float_frame["A"][:5] = np.nan + float_frame["B"][5:10] = np.nan + + # exclude non-numeric types + result = float_string_frame.corr() + expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].corr() + tm.assert_frame_equal(result, expected) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("meth", ["pearson", "kendall", "spearman"]) + def test_corr_nooverlap(self, meth): + # nothing in common + df = DataFrame( + { + "A": [1, 1.5, 1, np.nan, np.nan, np.nan], + "B": [np.nan, np.nan, np.nan, 1, 1.5, 1], + "C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + } + ) + rs = df.corr(meth) + assert isna(rs.loc["A", "B"]) + assert isna(rs.loc["B", "A"]) + assert rs.loc["A", "A"] == 1 + assert rs.loc["B", "B"] == 1 + assert isna(rs.loc["C", "C"]) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("meth", ["pearson", "spearman"]) + def test_corr_constant(self, meth): + # constant --> all NA + + df = DataFrame( + { + "A": [1, 1, 1, np.nan, np.nan, np.nan], + "B": [np.nan, np.nan, np.nan, 1, 1, 1], + } + ) + rs = df.corr(meth) + assert isna(rs.values).all() + + @td.skip_if_no_scipy + def test_corr_int_and_boolean(self): + # when dtypes of pandas series are different + # then ndarray will have dtype=object, + # so it need to be properly handled + df = DataFrame({"a": [True, False], "b": [1, 0]}) + + expected = DataFrame(np.ones((2, 2)), index=["a", "b"], columns=["a", "b"]) + for meth in ["pearson", "kendall", "spearman"]: + + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + result = df.corr(meth) + tm.assert_frame_equal(result, expected) + + def test_corr_cov_independent_index_column(self): + # GH#14617 + df = pd.DataFrame(np.random.randn(4 * 10).reshape(10, 4), columns=list("abcd")) + for method in ["cov", "corr"]: + result = getattr(df, method)() + assert result.index is not result.columns + assert result.index.equals(result.columns) + + def test_corr_invalid_method(self): + # GH#22298 + df = pd.DataFrame(np.random.normal(size=(10, 2))) + msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " + with pytest.raises(ValueError, match=msg): + df.corr(method="____") + + def test_corr_int(self): + # dtypes other than float64 GH#1761 + df3 = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]}) + + df3.cov() + df3.corr() + + +class TestDataFrameCorrWith: + def test_corrwith(self, datetime_frame): + a = datetime_frame + noise = Series(np.random.randn(len(a)), index=a.index) + + b = datetime_frame.add(noise, axis=0) + + # make sure order does not matter + b = b.reindex(columns=b.columns[::-1], index=b.index[::-1][10:]) + del b["B"] + + colcorr = a.corrwith(b, axis=0) + tm.assert_almost_equal(colcorr["A"], a["A"].corr(b["A"])) + + rowcorr = a.corrwith(b, axis=1) + tm.assert_series_equal(rowcorr, a.T.corrwith(b.T, axis=0)) + + dropped = a.corrwith(b, axis=0, drop=True) + tm.assert_almost_equal(dropped["A"], a["A"].corr(b["A"])) + assert "B" not in dropped + + dropped = a.corrwith(b, axis=1, drop=True) + assert a.index[-1] not in dropped.index + + # non time-series data + index = ["a", "b", "c", "d", "e"] + columns = ["one", "two", "three", "four"] + df1 = DataFrame(np.random.randn(5, 4), index=index, columns=columns) + df2 = DataFrame(np.random.randn(4, 4), index=index[:4], columns=columns) + correls = df1.corrwith(df2, axis=1) + for row in index[:4]: + tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row])) + + def test_corrwith_with_objects(self): + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame() + cols = ["A", "B", "C", "D"] + + df1["obj"] = "foo" + df2["obj"] = "bar" + + result = df1.corrwith(df2) + expected = df1.loc[:, cols].corrwith(df2.loc[:, cols]) + tm.assert_series_equal(result, expected) + + result = df1.corrwith(df2, axis=1) + expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1) + tm.assert_series_equal(result, expected) + + def test_corrwith_series(self, datetime_frame): + result = datetime_frame.corrwith(datetime_frame["A"]) + expected = datetime_frame.apply(datetime_frame["A"].corr) + + tm.assert_series_equal(result, expected) + + def test_corrwith_matches_corrcoef(self): + df1 = DataFrame(np.arange(10000), columns=["a"]) + df2 = DataFrame(np.arange(10000) ** 2, columns=["a"]) + c1 = df1.corrwith(df2)["a"] + c2 = np.corrcoef(df1["a"], df2["a"])[0][1] + + tm.assert_almost_equal(c1, c2) + assert c1 < 1 + + def test_corrwith_mixed_dtypes(self): + # GH#18570 + df = pd.DataFrame( + {"a": [1, 4, 3, 2], "b": [4, 6, 7, 3], "c": ["a", "b", "c", "d"]} + ) + s = pd.Series([0, 6, 7, 3]) + result = df.corrwith(s) + corrs = [df["a"].corr(s), df["b"].corr(s)] + expected = pd.Series(data=corrs, index=["a", "b"]) + tm.assert_series_equal(result, expected) + + def test_corrwith_index_intersection(self): + df1 = pd.DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) + df2 = pd.DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) + + result = df1.corrwith(df2, drop=True).index.sort_values() + expected = df1.columns.intersection(df2.columns).sort_values() + tm.assert_index_equal(result, expected) + + def test_corrwith_index_union(self): + df1 = pd.DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) + df2 = pd.DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) + + result = df1.corrwith(df2, drop=False).index.sort_values() + expected = df1.columns.union(df2.columns).sort_values() + tm.assert_index_equal(result, expected) + + def test_corrwith_dup_cols(self): + # GH#21925 + df1 = pd.DataFrame(np.vstack([np.arange(10)] * 3).T) + df2 = df1.copy() + df2 = pd.concat((df2, df2[0]), axis=1) + + result = df1.corrwith(df2) + expected = pd.Series(np.ones(4), index=[0, 0, 1, 2]) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_corrwith_spearman(self): + # GH#21925 + df = pd.DataFrame(np.random.random(size=(100, 3))) + result = df.corrwith(df ** 2, method="spearman") + expected = Series(np.ones(len(result))) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_corrwith_kendall(self): + # GH#21925 + df = pd.DataFrame(np.random.random(size=(100, 3))) + result = df.corrwith(df ** 2, method="kendall") + expected = Series(np.ones(len(result))) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_round.py b/pandas/tests/frame/methods/test_round.py new file mode 100644 index 0000000000000..96ac012ce7892 --- /dev/null +++ b/pandas/tests/frame/methods/test_round.py @@ -0,0 +1,217 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series, date_range +import pandas.util.testing as tm + + +class TestDataFrameRound: + def test_round(self): + # GH#2665 + + # Test that rounding an empty DataFrame does nothing + df = DataFrame() + tm.assert_frame_equal(df, df.round()) + + # Here's the test frame we'll be working with + df = DataFrame({"col1": [1.123, 2.123, 3.123], "col2": [1.234, 2.234, 3.234]}) + + # Default round to integer (i.e. decimals=0) + expected_rounded = DataFrame({"col1": [1.0, 2.0, 3.0], "col2": [1.0, 2.0, 3.0]}) + tm.assert_frame_equal(df.round(), expected_rounded) + + # Round with an integer + decimals = 2 + expected_rounded = DataFrame( + {"col1": [1.12, 2.12, 3.12], "col2": [1.23, 2.23, 3.23]} + ) + tm.assert_frame_equal(df.round(decimals), expected_rounded) + + # This should also work with np.round (since np.round dispatches to + # df.round) + tm.assert_frame_equal(np.round(df, decimals), expected_rounded) + + # Round with a list + round_list = [1, 2] + with pytest.raises(TypeError): + df.round(round_list) + + # Round with a dictionary + expected_rounded = DataFrame( + {"col1": [1.1, 2.1, 3.1], "col2": [1.23, 2.23, 3.23]} + ) + round_dict = {"col1": 1, "col2": 2} + tm.assert_frame_equal(df.round(round_dict), expected_rounded) + + # Incomplete dict + expected_partially_rounded = DataFrame( + {"col1": [1.123, 2.123, 3.123], "col2": [1.2, 2.2, 3.2]} + ) + partial_round_dict = {"col2": 1} + tm.assert_frame_equal(df.round(partial_round_dict), expected_partially_rounded) + + # Dict with unknown elements + wrong_round_dict = {"col3": 2, "col2": 1} + tm.assert_frame_equal(df.round(wrong_round_dict), expected_partially_rounded) + + # float input to `decimals` + non_int_round_dict = {"col1": 1, "col2": 0.5} + with pytest.raises(TypeError): + df.round(non_int_round_dict) + + # String input + non_int_round_dict = {"col1": 1, "col2": "foo"} + with pytest.raises(TypeError): + df.round(non_int_round_dict) + + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError): + df.round(non_int_round_Series) + + # List input + non_int_round_dict = {"col1": 1, "col2": [1, 2]} + with pytest.raises(TypeError): + df.round(non_int_round_dict) + + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError): + df.round(non_int_round_Series) + + # Non integer Series inputs + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError): + df.round(non_int_round_Series) + + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError): + df.round(non_int_round_Series) + + # Negative numbers + negative_round_dict = {"col1": -1, "col2": -2} + big_df = df * 100 + expected_neg_rounded = DataFrame( + {"col1": [110.0, 210, 310], "col2": [100.0, 200, 300]} + ) + tm.assert_frame_equal(big_df.round(negative_round_dict), expected_neg_rounded) + + # nan in Series round + nan_round_Series = Series({"col1": np.nan, "col2": 1}) + + # TODO(wesm): unused? + expected_nan_round = DataFrame( # noqa + {"col1": [1.123, 2.123, 3.123], "col2": [1.2, 2.2, 3.2]} + ) + + with pytest.raises(TypeError): + df.round(nan_round_Series) + + # Make sure this doesn't break existing Series.round + tm.assert_series_equal(df["col1"].round(1), expected_rounded["col1"]) + + # named columns + # GH#11986 + decimals = 2 + expected_rounded = DataFrame( + {"col1": [1.12, 2.12, 3.12], "col2": [1.23, 2.23, 3.23]} + ) + df.columns.name = "cols" + expected_rounded.columns.name = "cols" + tm.assert_frame_equal(df.round(decimals), expected_rounded) + + # interaction of named columns & series + tm.assert_series_equal(df["col1"].round(decimals), expected_rounded["col1"]) + tm.assert_series_equal(df.round(decimals)["col1"], expected_rounded["col1"]) + + def test_round_numpy(self): + # GH#12600 + df = DataFrame([[1.53, 1.36], [0.06, 7.01]]) + out = np.round(df, decimals=0) + expected = DataFrame([[2.0, 1.0], [0.0, 7.0]]) + tm.assert_frame_equal(out, expected) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.round(df, decimals=0, out=df) + + def test_round_numpy_with_nan(self): + # See GH#14197 + df = Series([1.53, np.nan, 0.06]).to_frame() + with tm.assert_produces_warning(None): + result = df.round() + expected = Series([2.0, np.nan, 0.0]).to_frame() + tm.assert_frame_equal(result, expected) + + def test_round_mixed_type(self): + # GH#11885 + df = DataFrame( + { + "col1": [1.1, 2.2, 3.3, 4.4], + "col2": ["1", "a", "c", "f"], + "col3": date_range("20111111", periods=4), + } + ) + round_0 = DataFrame( + { + "col1": [1.0, 2.0, 3.0, 4.0], + "col2": ["1", "a", "c", "f"], + "col3": date_range("20111111", periods=4), + } + ) + tm.assert_frame_equal(df.round(), round_0) + tm.assert_frame_equal(df.round(1), df) + tm.assert_frame_equal(df.round({"col1": 1}), df) + tm.assert_frame_equal(df.round({"col1": 0}), round_0) + tm.assert_frame_equal(df.round({"col1": 0, "col2": 1}), round_0) + tm.assert_frame_equal(df.round({"col3": 1}), df) + + def test_round_with_duplicate_columns(self): + # GH#11611 + + df = pd.DataFrame( + np.random.random([3, 3]), + columns=["A", "B", "C"], + index=["first", "second", "third"], + ) + + dfs = pd.concat((df, df), axis=1) + rounded = dfs.round() + tm.assert_index_equal(rounded.index, dfs.index) + + decimals = pd.Series([1, 0, 2], index=["A", "B", "A"]) + msg = "Index of decimals must be unique" + with pytest.raises(ValueError, match=msg): + df.round(decimals) + + def test_round_builtin(self): + # GH#11763 + # Here's the test frame we'll be working with + df = DataFrame({"col1": [1.123, 2.123, 3.123], "col2": [1.234, 2.234, 3.234]}) + + # Default round to integer (i.e. decimals=0) + expected_rounded = DataFrame({"col1": [1.0, 2.0, 3.0], "col2": [1.0, 2.0, 3.0]}) + tm.assert_frame_equal(round(df), expected_rounded) + + def test_round_nonunique_categorical(self): + # See GH#21809 + idx = pd.CategoricalIndex(["low"] * 3 + ["hi"] * 3) + df = pd.DataFrame(np.random.rand(6, 3), columns=list("abc")) + + expected = df.round(3) + expected.index = idx + + df_categorical = df.copy().set_index(idx) + assert df_categorical.shape == (6, 3) + result = df_categorical.round(3) + assert result.shape == (6, 3) + + tm.assert_frame_equal(result, expected) + + def test_round_interval_category_columns(self): + # GH#30063 + columns = pd.CategoricalIndex(pd.interval_range(0, 2)) + df = DataFrame([[0.66, 1.1], [0.3, 0.25]], columns=columns) + + result = df.round() + expected = DataFrame([[1.0, 1.0], [0.0, 0.0]], columns=columns) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 1a241cd72ec43..f0e91286020b1 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1,7 +1,6 @@ from datetime import timedelta from decimal import Decimal import operator -import warnings import numpy as np import pytest @@ -257,278 +256,11 @@ def assert_bool_op_api( getattr(bool_frame_with_na, opname)(axis=1, bool_only=False) -class TestDataFrameAnalytics: - - # --------------------------------------------------------------------- - # Correlation and covariance - - @td.skip_if_no_scipy - def test_corr_pearson(self, float_frame): - float_frame["A"][:5] = np.nan - float_frame["B"][5:10] = np.nan - - self._check_method(float_frame, "pearson") - - @td.skip_if_no_scipy - def test_corr_kendall(self, float_frame): - float_frame["A"][:5] = np.nan - float_frame["B"][5:10] = np.nan - - self._check_method(float_frame, "kendall") - - @td.skip_if_no_scipy - def test_corr_spearman(self, float_frame): - float_frame["A"][:5] = np.nan - float_frame["B"][5:10] = np.nan - - self._check_method(float_frame, "spearman") - - def _check_method(self, frame, method="pearson"): - correls = frame.corr(method=method) - expected = frame["A"].corr(frame["C"], method=method) - tm.assert_almost_equal(correls["A"]["C"], expected) - - @td.skip_if_no_scipy - def test_corr_non_numeric(self, float_frame, float_string_frame): - float_frame["A"][:5] = np.nan - float_frame["B"][5:10] = np.nan - - # exclude non-numeric types - result = float_string_frame.corr() - expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].corr() - tm.assert_frame_equal(result, expected) - - @td.skip_if_no_scipy - @pytest.mark.parametrize("meth", ["pearson", "kendall", "spearman"]) - def test_corr_nooverlap(self, meth): - # nothing in common - df = DataFrame( - { - "A": [1, 1.5, 1, np.nan, np.nan, np.nan], - "B": [np.nan, np.nan, np.nan, 1, 1.5, 1], - "C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], - } - ) - rs = df.corr(meth) - assert isna(rs.loc["A", "B"]) - assert isna(rs.loc["B", "A"]) - assert rs.loc["A", "A"] == 1 - assert rs.loc["B", "B"] == 1 - assert isna(rs.loc["C", "C"]) - - @td.skip_if_no_scipy - @pytest.mark.parametrize("meth", ["pearson", "spearman"]) - def test_corr_constant(self, meth): - # constant --> all NA - - df = DataFrame( - { - "A": [1, 1, 1, np.nan, np.nan, np.nan], - "B": [np.nan, np.nan, np.nan, 1, 1, 1], - } - ) - rs = df.corr(meth) - assert isna(rs.values).all() - - def test_corr_int(self): - # dtypes other than float64 #1761 - df3 = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]}) - - df3.cov() - df3.corr() - - @td.skip_if_no_scipy - def test_corr_int_and_boolean(self): - # when dtypes of pandas series are different - # then ndarray will have dtype=object, - # so it need to be properly handled - df = DataFrame({"a": [True, False], "b": [1, 0]}) - - expected = DataFrame(np.ones((2, 2)), index=["a", "b"], columns=["a", "b"]) - for meth in ["pearson", "kendall", "spearman"]: - - with warnings.catch_warnings(record=True): - warnings.simplefilter("ignore", RuntimeWarning) - result = df.corr(meth) - tm.assert_frame_equal(result, expected) - - def test_corr_cov_independent_index_column(self): - # GH 14617 - df = pd.DataFrame(np.random.randn(4 * 10).reshape(10, 4), columns=list("abcd")) - for method in ["cov", "corr"]: - result = getattr(df, method)() - assert result.index is not result.columns - assert result.index.equals(result.columns) - - def test_corr_invalid_method(self): - # GH 22298 - df = pd.DataFrame(np.random.normal(size=(10, 2))) - msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " - with pytest.raises(ValueError, match=msg): - df.corr(method="____") - - def test_cov(self, float_frame, float_string_frame): - # min_periods no NAs (corner case) - expected = float_frame.cov() - result = float_frame.cov(min_periods=len(float_frame)) - - tm.assert_frame_equal(expected, result) - - result = float_frame.cov(min_periods=len(float_frame) + 1) - assert isna(result.values).all() - - # with NAs - frame = float_frame.copy() - frame["A"][:5] = np.nan - frame["B"][5:10] = np.nan - result = float_frame.cov(min_periods=len(float_frame) - 8) - expected = float_frame.cov() - expected.loc["A", "B"] = np.nan - expected.loc["B", "A"] = np.nan - - # regular - float_frame["A"][:5] = np.nan - float_frame["B"][:10] = np.nan - cov = float_frame.cov() - - tm.assert_almost_equal(cov["A"]["C"], float_frame["A"].cov(float_frame["C"])) - - # exclude non-numeric types - result = float_string_frame.cov() - expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].cov() - tm.assert_frame_equal(result, expected) - - # Single column frame - df = DataFrame(np.linspace(0.0, 1.0, 10)) - result = df.cov() - expected = DataFrame( - np.cov(df.values.T).reshape((1, 1)), index=df.columns, columns=df.columns - ) - tm.assert_frame_equal(result, expected) - df.loc[0] = np.nan - result = df.cov() - expected = DataFrame( - np.cov(df.values[1:].T).reshape((1, 1)), - index=df.columns, - columns=df.columns, - ) - tm.assert_frame_equal(result, expected) - - def test_corrwith(self, datetime_frame): - a = datetime_frame - noise = Series(np.random.randn(len(a)), index=a.index) - - b = datetime_frame.add(noise, axis=0) - - # make sure order does not matter - b = b.reindex(columns=b.columns[::-1], index=b.index[::-1][10:]) - del b["B"] - - colcorr = a.corrwith(b, axis=0) - tm.assert_almost_equal(colcorr["A"], a["A"].corr(b["A"])) - - rowcorr = a.corrwith(b, axis=1) - tm.assert_series_equal(rowcorr, a.T.corrwith(b.T, axis=0)) - - dropped = a.corrwith(b, axis=0, drop=True) - tm.assert_almost_equal(dropped["A"], a["A"].corr(b["A"])) - assert "B" not in dropped - - dropped = a.corrwith(b, axis=1, drop=True) - assert a.index[-1] not in dropped.index - - # non time-series data - index = ["a", "b", "c", "d", "e"] - columns = ["one", "two", "three", "four"] - df1 = DataFrame(np.random.randn(5, 4), index=index, columns=columns) - df2 = DataFrame(np.random.randn(4, 4), index=index[:4], columns=columns) - correls = df1.corrwith(df2, axis=1) - for row in index[:4]: - tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row])) - - def test_corrwith_with_objects(self): - df1 = tm.makeTimeDataFrame() - df2 = tm.makeTimeDataFrame() - cols = ["A", "B", "C", "D"] - - df1["obj"] = "foo" - df2["obj"] = "bar" - - result = df1.corrwith(df2) - expected = df1.loc[:, cols].corrwith(df2.loc[:, cols]) - tm.assert_series_equal(result, expected) - - result = df1.corrwith(df2, axis=1) - expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1) - tm.assert_series_equal(result, expected) - - def test_corrwith_series(self, datetime_frame): - result = datetime_frame.corrwith(datetime_frame["A"]) - expected = datetime_frame.apply(datetime_frame["A"].corr) - - tm.assert_series_equal(result, expected) - - def test_corrwith_matches_corrcoef(self): - df1 = DataFrame(np.arange(10000), columns=["a"]) - df2 = DataFrame(np.arange(10000) ** 2, columns=["a"]) - c1 = df1.corrwith(df2)["a"] - c2 = np.corrcoef(df1["a"], df2["a"])[0][1] +class TestDataFrameCount: + pass - tm.assert_almost_equal(c1, c2) - assert c1 < 1 - def test_corrwith_mixed_dtypes(self): - # GH 18570 - df = pd.DataFrame( - {"a": [1, 4, 3, 2], "b": [4, 6, 7, 3], "c": ["a", "b", "c", "d"]} - ) - s = pd.Series([0, 6, 7, 3]) - result = df.corrwith(s) - corrs = [df["a"].corr(s), df["b"].corr(s)] - expected = pd.Series(data=corrs, index=["a", "b"]) - tm.assert_series_equal(result, expected) - - def test_corrwith_index_intersection(self): - df1 = pd.DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) - df2 = pd.DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) - - result = df1.corrwith(df2, drop=True).index.sort_values() - expected = df1.columns.intersection(df2.columns).sort_values() - tm.assert_index_equal(result, expected) - - def test_corrwith_index_union(self): - df1 = pd.DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) - df2 = pd.DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) - - result = df1.corrwith(df2, drop=False).index.sort_values() - expected = df1.columns.union(df2.columns).sort_values() - tm.assert_index_equal(result, expected) - - def test_corrwith_dup_cols(self): - # GH 21925 - df1 = pd.DataFrame(np.vstack([np.arange(10)] * 3).T) - df2 = df1.copy() - df2 = pd.concat((df2, df2[0]), axis=1) - - result = df1.corrwith(df2) - expected = pd.Series(np.ones(4), index=[0, 0, 1, 2]) - tm.assert_series_equal(result, expected) - - @td.skip_if_no_scipy - def test_corrwith_spearman(self): - # GH 21925 - df = pd.DataFrame(np.random.random(size=(100, 3))) - result = df.corrwith(df ** 2, method="spearman") - expected = Series(np.ones(len(result))) - tm.assert_series_equal(result, expected) - - @td.skip_if_no_scipy - def test_corrwith_kendall(self): - # GH 21925 - df = pd.DataFrame(np.random.random(size=(100, 3))) - result = df.corrwith(df ** 2, method="kendall") - expected = Series(np.ones(len(result))) - tm.assert_series_equal(result, expected) +class TestDataFrameAnalytics: # --------------------------------------------------------------------- # Reductions @@ -1452,218 +1184,6 @@ def test_any_all_level_axis_none_raises(self, method): with pytest.raises(ValueError, match=xpr): getattr(df, method)(axis=None, level="out") - # --------------------------------------------------------------------- - # Rounding - - def test_round(self): - # GH 2665 - - # Test that rounding an empty DataFrame does nothing - df = DataFrame() - tm.assert_frame_equal(df, df.round()) - - # Here's the test frame we'll be working with - df = DataFrame({"col1": [1.123, 2.123, 3.123], "col2": [1.234, 2.234, 3.234]}) - - # Default round to integer (i.e. decimals=0) - expected_rounded = DataFrame({"col1": [1.0, 2.0, 3.0], "col2": [1.0, 2.0, 3.0]}) - tm.assert_frame_equal(df.round(), expected_rounded) - - # Round with an integer - decimals = 2 - expected_rounded = DataFrame( - {"col1": [1.12, 2.12, 3.12], "col2": [1.23, 2.23, 3.23]} - ) - tm.assert_frame_equal(df.round(decimals), expected_rounded) - - # This should also work with np.round (since np.round dispatches to - # df.round) - tm.assert_frame_equal(np.round(df, decimals), expected_rounded) - - # Round with a list - round_list = [1, 2] - with pytest.raises(TypeError): - df.round(round_list) - - # Round with a dictionary - expected_rounded = DataFrame( - {"col1": [1.1, 2.1, 3.1], "col2": [1.23, 2.23, 3.23]} - ) - round_dict = {"col1": 1, "col2": 2} - tm.assert_frame_equal(df.round(round_dict), expected_rounded) - - # Incomplete dict - expected_partially_rounded = DataFrame( - {"col1": [1.123, 2.123, 3.123], "col2": [1.2, 2.2, 3.2]} - ) - partial_round_dict = {"col2": 1} - tm.assert_frame_equal(df.round(partial_round_dict), expected_partially_rounded) - - # Dict with unknown elements - wrong_round_dict = {"col3": 2, "col2": 1} - tm.assert_frame_equal(df.round(wrong_round_dict), expected_partially_rounded) - - # float input to `decimals` - non_int_round_dict = {"col1": 1, "col2": 0.5} - with pytest.raises(TypeError): - df.round(non_int_round_dict) - - # String input - non_int_round_dict = {"col1": 1, "col2": "foo"} - with pytest.raises(TypeError): - df.round(non_int_round_dict) - - non_int_round_Series = Series(non_int_round_dict) - with pytest.raises(TypeError): - df.round(non_int_round_Series) - - # List input - non_int_round_dict = {"col1": 1, "col2": [1, 2]} - with pytest.raises(TypeError): - df.round(non_int_round_dict) - - non_int_round_Series = Series(non_int_round_dict) - with pytest.raises(TypeError): - df.round(non_int_round_Series) - - # Non integer Series inputs - non_int_round_Series = Series(non_int_round_dict) - with pytest.raises(TypeError): - df.round(non_int_round_Series) - - non_int_round_Series = Series(non_int_round_dict) - with pytest.raises(TypeError): - df.round(non_int_round_Series) - - # Negative numbers - negative_round_dict = {"col1": -1, "col2": -2} - big_df = df * 100 - expected_neg_rounded = DataFrame( - {"col1": [110.0, 210, 310], "col2": [100.0, 200, 300]} - ) - tm.assert_frame_equal(big_df.round(negative_round_dict), expected_neg_rounded) - - # nan in Series round - nan_round_Series = Series({"col1": np.nan, "col2": 1}) - - # TODO(wesm): unused? - expected_nan_round = DataFrame( # noqa - {"col1": [1.123, 2.123, 3.123], "col2": [1.2, 2.2, 3.2]} - ) - - with pytest.raises(TypeError): - df.round(nan_round_Series) - - # Make sure this doesn't break existing Series.round - tm.assert_series_equal(df["col1"].round(1), expected_rounded["col1"]) - - # named columns - # GH 11986 - decimals = 2 - expected_rounded = DataFrame( - {"col1": [1.12, 2.12, 3.12], "col2": [1.23, 2.23, 3.23]} - ) - df.columns.name = "cols" - expected_rounded.columns.name = "cols" - tm.assert_frame_equal(df.round(decimals), expected_rounded) - - # interaction of named columns & series - tm.assert_series_equal(df["col1"].round(decimals), expected_rounded["col1"]) - tm.assert_series_equal(df.round(decimals)["col1"], expected_rounded["col1"]) - - def test_numpy_round(self): - # GH 12600 - df = DataFrame([[1.53, 1.36], [0.06, 7.01]]) - out = np.round(df, decimals=0) - expected = DataFrame([[2.0, 1.0], [0.0, 7.0]]) - tm.assert_frame_equal(out, expected) - - msg = "the 'out' parameter is not supported" - with pytest.raises(ValueError, match=msg): - np.round(df, decimals=0, out=df) - - def test_numpy_round_nan(self): - # See gh-14197 - df = Series([1.53, np.nan, 0.06]).to_frame() - with tm.assert_produces_warning(None): - result = df.round() - expected = Series([2.0, np.nan, 0.0]).to_frame() - tm.assert_frame_equal(result, expected) - - def test_round_mixed_type(self): - # GH 11885 - df = DataFrame( - { - "col1": [1.1, 2.2, 3.3, 4.4], - "col2": ["1", "a", "c", "f"], - "col3": date_range("20111111", periods=4), - } - ) - round_0 = DataFrame( - { - "col1": [1.0, 2.0, 3.0, 4.0], - "col2": ["1", "a", "c", "f"], - "col3": date_range("20111111", periods=4), - } - ) - tm.assert_frame_equal(df.round(), round_0) - tm.assert_frame_equal(df.round(1), df) - tm.assert_frame_equal(df.round({"col1": 1}), df) - tm.assert_frame_equal(df.round({"col1": 0}), round_0) - tm.assert_frame_equal(df.round({"col1": 0, "col2": 1}), round_0) - tm.assert_frame_equal(df.round({"col3": 1}), df) - - def test_round_issue(self): - # GH 11611 - - df = pd.DataFrame( - np.random.random([3, 3]), - columns=["A", "B", "C"], - index=["first", "second", "third"], - ) - - dfs = pd.concat((df, df), axis=1) - rounded = dfs.round() - tm.assert_index_equal(rounded.index, dfs.index) - - decimals = pd.Series([1, 0, 2], index=["A", "B", "A"]) - msg = "Index of decimals must be unique" - with pytest.raises(ValueError, match=msg): - df.round(decimals) - - def test_built_in_round(self): - # GH 11763 - # Here's the test frame we'll be working with - df = DataFrame({"col1": [1.123, 2.123, 3.123], "col2": [1.234, 2.234, 3.234]}) - - # Default round to integer (i.e. decimals=0) - expected_rounded = DataFrame({"col1": [1.0, 2.0, 3.0], "col2": [1.0, 2.0, 3.0]}) - tm.assert_frame_equal(round(df), expected_rounded) - - def test_round_nonunique_categorical(self): - # See GH21809 - idx = pd.CategoricalIndex(["low"] * 3 + ["hi"] * 3) - df = pd.DataFrame(np.random.rand(6, 3), columns=list("abc")) - - expected = df.round(3) - expected.index = idx - - df_categorical = df.copy().set_index(idx) - assert df_categorical.shape == (6, 3) - result = df_categorical.round(3) - assert result.shape == (6, 3) - - tm.assert_frame_equal(result, expected) - - def test_round_interval_category_columns(self): - # GH 30063 - columns = pd.CategoricalIndex(pd.interval_range(0, 2)) - df = DataFrame([[0.66, 1.1], [0.3, 0.25]], columns=columns) - - result = df.round() - expected = DataFrame([[1.0, 1.0], [0.0, 0.0]], columns=columns) - tm.assert_frame_equal(result, expected) - # --------------------------------------------------------------------- # Matrix-like From 2423734624aac6ea577f0042f6d7b56be44ec883 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 Dec 2019 13:08:41 -0800 Subject: [PATCH 2/3] test_count --- pandas/tests/frame/methods/test_count.py | 36 ++++++++++++++++++++++++ pandas/tests/frame/test_analytics.py | 36 ------------------------ 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/pandas/tests/frame/methods/test_count.py b/pandas/tests/frame/methods/test_count.py index e69de29bb2d1d..b5d3d60579f54 100644 --- a/pandas/tests/frame/methods/test_count.py +++ b/pandas/tests/frame/methods/test_count.py @@ -0,0 +1,36 @@ +from pandas import DataFrame, Series +import pandas.util.testing as tm + + +class TestDataFrameCount: + def test_count(self): + # corner case + frame = DataFrame() + ct1 = frame.count(1) + assert isinstance(ct1, Series) + + ct2 = frame.count(0) + assert isinstance(ct2, Series) + + # GH#423 + df = DataFrame(index=range(10)) + result = df.count(1) + expected = Series(0, index=df.index) + tm.assert_series_equal(result, expected) + + df = DataFrame(columns=range(10)) + result = df.count(0) + expected = Series(0, index=df.columns) + tm.assert_series_equal(result, expected) + + df = DataFrame() + result = df.count() + expected = Series(0, index=[]) + tm.assert_series_equal(result, expected) + + def test_count_objects(self, float_string_frame): + dm = DataFrame(float_string_frame._series) + df = DataFrame(float_string_frame._series) + + tm.assert_series_equal(dm.count(), df.count()) + tm.assert_series_equal(dm.count(1), df.count(1)) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index f0e91286020b1..9ddb14470f6e4 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -256,10 +256,6 @@ def assert_bool_op_api( getattr(bool_frame_with_na, opname)(axis=1, bool_only=False) -class TestDataFrameCount: - pass - - class TestDataFrameAnalytics: # --------------------------------------------------------------------- @@ -900,38 +896,6 @@ def test_sum_bools(self): # --------------------------------------------------------------------- # Miscellanea - def test_count(self): - # corner case - frame = DataFrame() - ct1 = frame.count(1) - assert isinstance(ct1, Series) - - ct2 = frame.count(0) - assert isinstance(ct2, Series) - - # GH#423 - df = DataFrame(index=range(10)) - result = df.count(1) - expected = Series(0, index=df.index) - tm.assert_series_equal(result, expected) - - df = DataFrame(columns=range(10)) - result = df.count(0) - expected = Series(0, index=df.columns) - tm.assert_series_equal(result, expected) - - df = DataFrame() - result = df.count() - expected = Series(0, index=[]) - tm.assert_series_equal(result, expected) - - def test_count_objects(self, float_string_frame): - dm = DataFrame(float_string_frame._series) - df = DataFrame(float_string_frame._series) - - tm.assert_series_equal(dm.count(), df.count()) - tm.assert_series_equal(dm.count(1), df.count(1)) - def test_pct_change(self): # GH#11150 pnl = DataFrame( From 37695e380bf2c1b67d29edd9335c7c03e9fbcff7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 23 Dec 2019 13:21:03 -0800 Subject: [PATCH 3/3] series cov, count, round methods --- pandas/tests/series/methods/test_count.py | 38 ++++ pandas/tests/series/methods/test_cov_corr.py | 158 ++++++++++++++ pandas/tests/series/methods/test_round.py | 46 ++++ pandas/tests/series/test_analytics.py | 216 ------------------- 4 files changed, 242 insertions(+), 216 deletions(-) create mode 100644 pandas/tests/series/methods/test_count.py create mode 100644 pandas/tests/series/methods/test_cov_corr.py create mode 100644 pandas/tests/series/methods/test_round.py diff --git a/pandas/tests/series/methods/test_count.py b/pandas/tests/series/methods/test_count.py new file mode 100644 index 0000000000000..9cf776c0d9f1a --- /dev/null +++ b/pandas/tests/series/methods/test_count.py @@ -0,0 +1,38 @@ +import numpy as np + +import pandas as pd +from pandas import Categorical, MultiIndex, Series +import pandas.util.testing as tm + + +class TestSeriesCount: + def test_count(self, datetime_series): + assert datetime_series.count() == len(datetime_series) + + datetime_series[::2] = np.NaN + + assert datetime_series.count() == np.isfinite(datetime_series).sum() + + mi = MultiIndex.from_arrays([list("aabbcc"), [1, 2, 2, np.nan, 1, 2]]) + ts = Series(np.arange(len(mi)), index=mi) + + left = ts.count(level=1) + right = Series([2, 3, 1], index=[1, 2, np.nan]) + tm.assert_series_equal(left, right) + + ts.iloc[[0, 3, 5]] = np.nan + tm.assert_series_equal(ts.count(level=1), right - 1) + + # GH#29478 + with pd.option_context("use_inf_as_na", True): + assert pd.Series([pd.Timestamp("1990/1/1")]).count() == 1 + + def test_count_categorical(self): + + ser = Series( + Categorical( + [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True + ) + ) + result = ser.count() + assert result == 2 diff --git a/pandas/tests/series/methods/test_cov_corr.py b/pandas/tests/series/methods/test_cov_corr.py new file mode 100644 index 0000000000000..f7dae87018419 --- /dev/null +++ b/pandas/tests/series/methods/test_cov_corr.py @@ -0,0 +1,158 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import Series, isna +import pandas.util.testing as tm + + +class TestSeriesCov: + def test_cov(self, datetime_series): + # full overlap + tm.assert_almost_equal( + datetime_series.cov(datetime_series), datetime_series.std() ** 2 + ) + + # partial overlap + tm.assert_almost_equal( + datetime_series[:15].cov(datetime_series[5:]), + datetime_series[5:15].std() ** 2, + ) + + # No overlap + assert np.isnan(datetime_series[::2].cov(datetime_series[1::2])) + + # all NA + cp = datetime_series[:10].copy() + cp[:] = np.nan + assert isna(cp.cov(cp)) + + # min_periods + assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12)) + + ts1 = datetime_series[:15].reindex(datetime_series.index) + ts2 = datetime_series[5:].reindex(datetime_series.index) + assert isna(ts1.cov(ts2, min_periods=12)) + + +class TestSeriesCorr: + @td.skip_if_no_scipy + def test_corr(self, datetime_series): + import scipy.stats as stats + + # full overlap + tm.assert_almost_equal(datetime_series.corr(datetime_series), 1) + + # partial overlap + tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), 1) + + assert isna(datetime_series[:15].corr(datetime_series[5:], min_periods=12)) + + ts1 = datetime_series[:15].reindex(datetime_series.index) + ts2 = datetime_series[5:].reindex(datetime_series.index) + assert isna(ts1.corr(ts2, min_periods=12)) + + # No overlap + assert np.isnan(datetime_series[::2].corr(datetime_series[1::2])) + + # all NA + cp = datetime_series[:10].copy() + cp[:] = np.nan + assert isna(cp.corr(cp)) + + A = tm.makeTimeSeries() + B = tm.makeTimeSeries() + result = A.corr(B) + expected, _ = stats.pearsonr(A, B) + tm.assert_almost_equal(result, expected) + + @td.skip_if_no_scipy + def test_corr_rank(self): + import scipy.stats as stats + + # kendall and spearman + A = tm.makeTimeSeries() + B = tm.makeTimeSeries() + A[-5:] = A[:5] + result = A.corr(B, method="kendall") + expected = stats.kendalltau(A, B)[0] + tm.assert_almost_equal(result, expected) + + result = A.corr(B, method="spearman") + expected = stats.spearmanr(A, B)[0] + tm.assert_almost_equal(result, expected) + + # results from R + A = Series( + [ + -0.89926396, + 0.94209606, + -1.03289164, + -0.95445587, + 0.76910310, + -0.06430576, + -2.09704447, + 0.40660407, + -0.89926396, + 0.94209606, + ] + ) + B = Series( + [ + -1.01270225, + -0.62210117, + -1.56895827, + 0.59592943, + -0.01680292, + 1.17258718, + -1.06009347, + -0.10222060, + -0.89076239, + 0.89372375, + ] + ) + kexp = 0.4319297 + sexp = 0.5853767 + tm.assert_almost_equal(A.corr(B, method="kendall"), kexp) + tm.assert_almost_equal(A.corr(B, method="spearman"), sexp) + + def test_corr_invalid_method(self): + # GH PR #22298 + s1 = pd.Series(np.random.randn(10)) + s2 = pd.Series(np.random.randn(10)) + msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " + with pytest.raises(ValueError, match=msg): + s1.corr(s2, method="____") + + def test_corr_callable_method(self, datetime_series): + # simple correlation example + # returns 1 if exact equality, 0 otherwise + my_corr = lambda a, b: 1.0 if (a == b).all() else 0.0 + + # simple example + s1 = Series([1, 2, 3, 4, 5]) + s2 = Series([5, 4, 3, 2, 1]) + expected = 0 + tm.assert_almost_equal(s1.corr(s2, method=my_corr), expected) + + # full overlap + tm.assert_almost_equal( + datetime_series.corr(datetime_series, method=my_corr), 1.0 + ) + + # partial overlap + tm.assert_almost_equal( + datetime_series[:15].corr(datetime_series[5:], method=my_corr), 1.0 + ) + + # No overlap + assert np.isnan( + datetime_series[::2].corr(datetime_series[1::2], method=my_corr) + ) + + # dataframe example + df = pd.DataFrame([s1, s2]) + expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}]) + tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected) diff --git a/pandas/tests/series/methods/test_round.py b/pandas/tests/series/methods/test_round.py new file mode 100644 index 0000000000000..1776468ef5a83 --- /dev/null +++ b/pandas/tests/series/methods/test_round.py @@ -0,0 +1,46 @@ +import numpy as np +import pytest + +from pandas import Series +import pandas.util.testing as tm + + +class TestSeriesRound: + def test_round(self, datetime_series): + datetime_series.index.name = "index_name" + result = datetime_series.round(2) + expected = Series( + np.round(datetime_series.values, 2), index=datetime_series.index, name="ts" + ) + tm.assert_series_equal(result, expected) + assert result.name == datetime_series.name + + def test_round_numpy(self): + # See GH#12600 + ser = Series([1.53, 1.36, 0.06]) + out = np.round(ser, decimals=0) + expected = Series([2.0, 1.0, 0.0]) + tm.assert_series_equal(out, expected) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.round(ser, decimals=0, out=ser) + + def test_round_numpy_with_nan(self): + # See GH#14197 + ser = Series([1.53, np.nan, 0.06]) + with tm.assert_produces_warning(None): + result = ser.round() + expected = Series([2.0, np.nan, 0.0]) + tm.assert_series_equal(result, expected) + + def test_round_builtin(self): + ser = Series([1.123, 2.123, 3.123], index=range(3)) + result = round(ser) + expected_rounded0 = Series([1.0, 2.0, 3.0], index=range(3)) + tm.assert_series_equal(result, expected_rounded0) + + decimals = 2 + expected_rounded = Series([1.12, 2.12, 3.12], index=range(3)) + result = round(ser, decimals) + tm.assert_series_equal(result, expected_rounded) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index b43dcc5e52c55..17cf307a04d7f 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -89,218 +89,12 @@ def test_numpy_compress(self): with pytest.raises(ValueError, match=msg): np.compress(cond, s, out=s) - def test_round(self, datetime_series): - datetime_series.index.name = "index_name" - result = datetime_series.round(2) - expected = Series( - np.round(datetime_series.values, 2), index=datetime_series.index, name="ts" - ) - tm.assert_series_equal(result, expected) - assert result.name == datetime_series.name - - def test_numpy_round(self): - # See gh-12600 - s = Series([1.53, 1.36, 0.06]) - out = np.round(s, decimals=0) - expected = Series([2.0, 1.0, 0.0]) - tm.assert_series_equal(out, expected) - - msg = "the 'out' parameter is not supported" - with pytest.raises(ValueError, match=msg): - np.round(s, decimals=0, out=s) - - def test_numpy_round_nan(self): - # See gh-14197 - s = Series([1.53, np.nan, 0.06]) - with tm.assert_produces_warning(None): - result = s.round() - expected = Series([2.0, np.nan, 0.0]) - tm.assert_series_equal(result, expected) - - def test_built_in_round(self): - s = Series([1.123, 2.123, 3.123], index=range(3)) - result = round(s) - expected_rounded0 = Series([1.0, 2.0, 3.0], index=range(3)) - tm.assert_series_equal(result, expected_rounded0) - - decimals = 2 - expected_rounded = Series([1.12, 2.12, 3.12], index=range(3)) - result = round(s, decimals) - tm.assert_series_equal(result, expected_rounded) - def test_prod_numpy16_bug(self): s = Series([1.0, 1.0, 1.0], index=range(3)) result = s.prod() assert not isinstance(result, Series) - @td.skip_if_no_scipy - def test_corr(self, datetime_series): - import scipy.stats as stats - - # full overlap - tm.assert_almost_equal(datetime_series.corr(datetime_series), 1) - - # partial overlap - tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), 1) - - assert isna(datetime_series[:15].corr(datetime_series[5:], min_periods=12)) - - ts1 = datetime_series[:15].reindex(datetime_series.index) - ts2 = datetime_series[5:].reindex(datetime_series.index) - assert isna(ts1.corr(ts2, min_periods=12)) - - # No overlap - assert np.isnan(datetime_series[::2].corr(datetime_series[1::2])) - - # all NA - cp = datetime_series[:10].copy() - cp[:] = np.nan - assert isna(cp.corr(cp)) - - A = tm.makeTimeSeries() - B = tm.makeTimeSeries() - result = A.corr(B) - expected, _ = stats.pearsonr(A, B) - tm.assert_almost_equal(result, expected) - - @td.skip_if_no_scipy - def test_corr_rank(self): - import scipy.stats as stats - - # kendall and spearman - A = tm.makeTimeSeries() - B = tm.makeTimeSeries() - A[-5:] = A[:5] - result = A.corr(B, method="kendall") - expected = stats.kendalltau(A, B)[0] - tm.assert_almost_equal(result, expected) - - result = A.corr(B, method="spearman") - expected = stats.spearmanr(A, B)[0] - tm.assert_almost_equal(result, expected) - - # results from R - A = Series( - [ - -0.89926396, - 0.94209606, - -1.03289164, - -0.95445587, - 0.76910310, - -0.06430576, - -2.09704447, - 0.40660407, - -0.89926396, - 0.94209606, - ] - ) - B = Series( - [ - -1.01270225, - -0.62210117, - -1.56895827, - 0.59592943, - -0.01680292, - 1.17258718, - -1.06009347, - -0.10222060, - -0.89076239, - 0.89372375, - ] - ) - kexp = 0.4319297 - sexp = 0.5853767 - tm.assert_almost_equal(A.corr(B, method="kendall"), kexp) - tm.assert_almost_equal(A.corr(B, method="spearman"), sexp) - - def test_corr_invalid_method(self): - # GH PR #22298 - s1 = pd.Series(np.random.randn(10)) - s2 = pd.Series(np.random.randn(10)) - msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " - with pytest.raises(ValueError, match=msg): - s1.corr(s2, method="____") - - def test_corr_callable_method(self, datetime_series): - # simple correlation example - # returns 1 if exact equality, 0 otherwise - my_corr = lambda a, b: 1.0 if (a == b).all() else 0.0 - - # simple example - s1 = Series([1, 2, 3, 4, 5]) - s2 = Series([5, 4, 3, 2, 1]) - expected = 0 - tm.assert_almost_equal(s1.corr(s2, method=my_corr), expected) - - # full overlap - tm.assert_almost_equal( - datetime_series.corr(datetime_series, method=my_corr), 1.0 - ) - - # partial overlap - tm.assert_almost_equal( - datetime_series[:15].corr(datetime_series[5:], method=my_corr), 1.0 - ) - - # No overlap - assert np.isnan( - datetime_series[::2].corr(datetime_series[1::2], method=my_corr) - ) - - # dataframe example - df = pd.DataFrame([s1, s2]) - expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}]) - tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected) - - def test_cov(self, datetime_series): - # full overlap - tm.assert_almost_equal( - datetime_series.cov(datetime_series), datetime_series.std() ** 2 - ) - - # partial overlap - tm.assert_almost_equal( - datetime_series[:15].cov(datetime_series[5:]), - datetime_series[5:15].std() ** 2, - ) - - # No overlap - assert np.isnan(datetime_series[::2].cov(datetime_series[1::2])) - - # all NA - cp = datetime_series[:10].copy() - cp[:] = np.nan - assert isna(cp.cov(cp)) - - # min_periods - assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12)) - - ts1 = datetime_series[:15].reindex(datetime_series.index) - ts2 = datetime_series[5:].reindex(datetime_series.index) - assert isna(ts1.cov(ts2, min_periods=12)) - - def test_count(self, datetime_series): - assert datetime_series.count() == len(datetime_series) - - datetime_series[::2] = np.NaN - - assert datetime_series.count() == np.isfinite(datetime_series).sum() - - mi = MultiIndex.from_arrays([list("aabbcc"), [1, 2, 2, np.nan, 1, 2]]) - ts = Series(np.arange(len(mi)), index=mi) - - left = ts.count(level=1) - right = Series([2, 3, 1], index=[1, 2, np.nan]) - tm.assert_series_equal(left, right) - - ts.iloc[[0, 3, 5]] = np.nan - tm.assert_series_equal(ts.count(level=1), right - 1) - - # GH29478 - with pd.option_context("use_inf_as_na", True): - assert pd.Series([pd.Timestamp("1990/1/1")]).count() == 1 - def test_dot(self): a = Series(np.random.randn(4), index=["p", "q", "r", "s"]) b = DataFrame( @@ -606,16 +400,6 @@ def test_validate_stat_keepdims(self): class TestCategoricalSeriesAnalytics: - def test_count(self): - - s = Series( - Categorical( - [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True - ) - ) - result = s.count() - assert result == 2 - @pytest.mark.parametrize( "dtype", ["int_", "uint", "float_", "unicode_", "timedelta64[h]", "datetime64[D]"],