From 35423669a83aa7de53085595576ec02ee1ec6637 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 19 Dec 2019 13:12:15 -0800
Subject: [PATCH 1/7] remove dummy file

---
 pandas/tests/test_compat.py | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 pandas/tests/test_compat.py

diff --git a/pandas/tests/test_compat.py b/pandas/tests/test_compat.py
deleted file mode 100644
index 4ff8b0b31e85e..0000000000000
--- a/pandas/tests/test_compat.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""
-Testing that functions from compat work as expected
-"""

From a18380727f86942f9adbf074e8b7be43dab1ea60 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 19 Dec 2019 13:15:14 -0800
Subject: [PATCH 2/7] CLN

---
 pandas/tests/frame/test_repr_info.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py
index 318b1c6add91e..b944b8f954f22 100644
--- a/pandas/tests/frame/test_repr_info.py
+++ b/pandas/tests/frame/test_repr_info.py
@@ -29,17 +29,17 @@
 class TestDataFrameReprInfoEtc:
     def test_repr_empty(self):
         # empty
-        foo = repr(DataFrame())  # noqa
+        repr(DataFrame())
 
         # empty with index
         frame = DataFrame(index=np.arange(1000))
-        foo = repr(frame)  # noqa
+        repr(frame)
 
     def test_repr_mixed(self, float_string_frame):
         buf = StringIO()
 
         # mixed
-        foo = repr(float_string_frame)  # noqa
+        repr(float_string_frame)
         float_string_frame.info(verbose=False, buf=buf)
 
     @pytest.mark.slow
@@ -51,7 +51,7 @@ def test_repr_mixed_big(self):
         biggie.loc[:20, "A"] = np.nan
         biggie.loc[:20, "B"] = np.nan
 
-        foo = repr(biggie)  # noqa
+        repr(biggie)
 
     def test_repr(self, float_frame):
         buf = StringIO()
@@ -68,7 +68,7 @@ def test_repr(self, float_frame):
 
         # columns but no index
         no_index = DataFrame(columns=[0, 1, 3])
-        foo = repr(no_index)  # noqa
+        repr(no_index)
 
         # no columns or index
         DataFrame().info(buf=buf)
@@ -129,9 +129,6 @@ def test_repr_unsortable(self, float_frame):
     def test_repr_unicode(self):
         uval = "\u03c3\u03c3\u03c3\u03c3"
 
-        # TODO(wesm): is this supposed to be used?
-        bval = uval.encode("utf-8")  # noqa
-
         df = DataFrame({"A": [uval, uval]})
 
         result = repr(df)

From 58043b3af5d02d9f5fe64fce34d9857ac2dbd0c3 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 19 Dec 2019 13:22:14 -0800
Subject: [PATCH 3/7] refactor out TestDescribe

---
 pandas/tests/frame/test_analytics.py  | 939 +++++++++++++-------------
 pandas/tests/series/test_analytics.py |   4 +-
 2 files changed, 472 insertions(+), 471 deletions(-)

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index cef389a6c4167..938c531095584 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -258,526 +258,251 @@ def assert_bool_op_api(
         getattr(bool_frame_with_na, opname)(axis=1, bool_only=False)
 
 
-class TestDataFrameAnalytics:
-
-    # ---------------------------------------------------------------------
-    # Correlation and covariance
-
-    @td.skip_if_no_scipy
-    def test_corr_pearson(self, float_frame):
-        float_frame["A"][:5] = np.nan
-        float_frame["B"][5:10] = np.nan
-
-        self._check_method(float_frame, "pearson")
-
-    @td.skip_if_no_scipy
-    def test_corr_kendall(self, float_frame):
-        float_frame["A"][:5] = np.nan
-        float_frame["B"][5:10] = np.nan
-
-        self._check_method(float_frame, "kendall")
+class TestDescribe:
+    def test_describe_bool_in_mixed_frame(self):
+        df = DataFrame(
+            {
+                "string_data": ["a", "b", "c", "d", "e"],
+                "bool_data": [True, True, False, False, False],
+                "int_data": [10, 20, 30, 40, 50],
+            }
+        )
 
-    @td.skip_if_no_scipy
-    def test_corr_spearman(self, float_frame):
-        float_frame["A"][:5] = np.nan
-        float_frame["B"][5:10] = np.nan
+        # Integer data are included in .describe() output,
+        # Boolean and string data are not.
+        result = df.describe()
+        expected = DataFrame(
+            {"int_data": [5, 30, df.int_data.std(), 10, 20, 30, 40, 50]},
+            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
+        )
+        tm.assert_frame_equal(result, expected)
 
-        self._check_method(float_frame, "spearman")
+        # Top value is a boolean value that is False
+        result = df.describe(include=["bool"])
 
-    def _check_method(self, frame, method="pearson"):
-        correls = frame.corr(method=method)
-        expected = frame["A"].corr(frame["C"], method=method)
-        tm.assert_almost_equal(correls["A"]["C"], expected)
+        expected = DataFrame(
+            {"bool_data": [5, 2, False, 3]}, index=["count", "unique", "top", "freq"]
+        )
+        tm.assert_frame_equal(result, expected)
 
-    @td.skip_if_no_scipy
-    def test_corr_non_numeric(self, float_frame, float_string_frame):
-        float_frame["A"][:5] = np.nan
-        float_frame["B"][5:10] = np.nan
+    def test_describe_empty_object(self):
+        # GH#27183
+        df = pd.DataFrame({"A": [None, None]}, dtype=object)
+        result = df.describe()
+        expected = pd.DataFrame(
+            {"A": [0, 0, np.nan, np.nan]},
+            dtype=object,
+            index=["count", "unique", "top", "freq"],
+        )
+        tm.assert_frame_equal(result, expected)
 
-        # exclude non-numeric types
-        result = float_string_frame.corr()
-        expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].corr()
+        result = df.iloc[:0].describe()
         tm.assert_frame_equal(result, expected)
 
-    @td.skip_if_no_scipy
-    @pytest.mark.parametrize("meth", ["pearson", "kendall", "spearman"])
-    def test_corr_nooverlap(self, meth):
-        # nothing in common
-        df = DataFrame(
+    def test_describe_bool_frame(self):
+        # GH#13891
+        df = pd.DataFrame(
             {
-                "A": [1, 1.5, 1, np.nan, np.nan, np.nan],
-                "B": [np.nan, np.nan, np.nan, 1, 1.5, 1],
-                "C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
+                "bool_data_1": [False, False, True, True],
+                "bool_data_2": [False, True, True, True],
             }
         )
-        rs = df.corr(meth)
-        assert isna(rs.loc["A", "B"])
-        assert isna(rs.loc["B", "A"])
-        assert rs.loc["A", "A"] == 1
-        assert rs.loc["B", "B"] == 1
-        assert isna(rs.loc["C", "C"])
-
-    @td.skip_if_no_scipy
-    @pytest.mark.parametrize("meth", ["pearson", "spearman"])
-    def test_corr_constant(self, meth):
-        # constant --> all NA
+        result = df.describe()
+        expected = DataFrame(
+            {"bool_data_1": [4, 2, True, 2], "bool_data_2": [4, 2, True, 3]},
+            index=["count", "unique", "top", "freq"],
+        )
+        tm.assert_frame_equal(result, expected)
 
-        df = DataFrame(
+        df = pd.DataFrame(
             {
-                "A": [1, 1, 1, np.nan, np.nan, np.nan],
-                "B": [np.nan, np.nan, np.nan, 1, 1, 1],
+                "bool_data": [False, False, True, True, False],
+                "int_data": [0, 1, 2, 3, 4],
             }
         )
-        rs = df.corr(meth)
-        assert isna(rs.values).all()
-
-    def test_corr_int(self):
-        # dtypes other than float64 #1761
-        df3 = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]})
-
-        df3.cov()
-        df3.corr()
-
-    @td.skip_if_no_scipy
-    def test_corr_int_and_boolean(self):
-        # when dtypes of pandas series are different
-        # then ndarray will have dtype=object,
-        # so it need to be properly handled
-        df = DataFrame({"a": [True, False], "b": [1, 0]})
-
-        expected = DataFrame(np.ones((2, 2)), index=["a", "b"], columns=["a", "b"])
-        for meth in ["pearson", "kendall", "spearman"]:
-
-            with warnings.catch_warnings(record=True):
-                warnings.simplefilter("ignore", RuntimeWarning)
-                result = df.corr(meth)
-            tm.assert_frame_equal(result, expected)
-
-    def test_corr_cov_independent_index_column(self):
-        # GH 14617
-        df = pd.DataFrame(np.random.randn(4 * 10).reshape(10, 4), columns=list("abcd"))
-        for method in ["cov", "corr"]:
-            result = getattr(df, method)()
-            assert result.index is not result.columns
-            assert result.index.equals(result.columns)
-
-    def test_corr_invalid_method(self):
-        # GH 22298
-        df = pd.DataFrame(np.random.normal(size=(10, 2)))
-        msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, "
-        with pytest.raises(ValueError, match=msg):
-            df.corr(method="____")
+        result = df.describe()
+        expected = DataFrame(
+            {"int_data": [5, 2, df.int_data.std(), 0, 1, 2, 3, 4]},
+            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
+        )
+        tm.assert_frame_equal(result, expected)
 
-    def test_cov(self, float_frame, float_string_frame):
-        # min_periods no NAs (corner case)
-        expected = float_frame.cov()
-        result = float_frame.cov(min_periods=len(float_frame))
+        df = pd.DataFrame(
+            {"bool_data": [False, False, True, True], "str_data": ["a", "b", "c", "a"]}
+        )
+        result = df.describe()
+        expected = DataFrame(
+            {"bool_data": [4, 2, True, 2], "str_data": [4, 3, "a", 2]},
+            index=["count", "unique", "top", "freq"],
+        )
+        tm.assert_frame_equal(result, expected)
 
-        tm.assert_frame_equal(expected, result)
+    def test_describe_categorical(self):
+        df = DataFrame({"value": np.random.randint(0, 10000, 100)})
+        labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
+        cat_labels = Categorical(labels, labels)
 
-        result = float_frame.cov(min_periods=len(float_frame) + 1)
-        assert isna(result.values).all()
+        df = df.sort_values(by=["value"], ascending=True)
+        df["value_group"] = pd.cut(
+            df.value, range(0, 10500, 500), right=False, labels=cat_labels
+        )
+        cat = df
 
-        # with NAs
-        frame = float_frame.copy()
-        frame["A"][:5] = np.nan
-        frame["B"][5:10] = np.nan
-        result = float_frame.cov(min_periods=len(float_frame) - 8)
-        expected = float_frame.cov()
-        expected.loc["A", "B"] = np.nan
-        expected.loc["B", "A"] = np.nan
+        # Categoricals should not show up together with numerical columns
+        result = cat.describe()
+        assert len(result.columns) == 1
 
-        # regular
-        float_frame["A"][:5] = np.nan
-        float_frame["B"][:10] = np.nan
-        cov = float_frame.cov()
+        # In a frame, describe() for the cat should be the same as for string
+        # arrays (count, unique, top, freq)
 
-        tm.assert_almost_equal(cov["A"]["C"], float_frame["A"].cov(float_frame["C"]))
+        cat = Categorical(
+            ["a", "b", "b", "b"], categories=["a", "b", "c"], ordered=True
+        )
+        s = Series(cat)
+        result = s.describe()
+        expected = Series([4, 2, "b", 3], index=["count", "unique", "top", "freq"])
+        tm.assert_series_equal(result, expected)
 
-        # exclude non-numeric types
-        result = float_string_frame.cov()
-        expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].cov()
-        tm.assert_frame_equal(result, expected)
+        cat = Series(Categorical(["a", "b", "c", "c"]))
+        df3 = DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]})
+        result = df3.describe()
+        tm.assert_numpy_array_equal(result["cat"].values, result["s"].values)
 
-        # Single column frame
-        df = DataFrame(np.linspace(0.0, 1.0, 10))
-        result = df.cov()
+    def test_describe_empty_categorical_column(self):
+        # GH#26397
+        # Ensure the index of an an empty categorical DataFrame column
+        # also contains (count, unique, top, freq)
+        df = pd.DataFrame({"empty_col": Categorical([])})
+        result = df.describe()
         expected = DataFrame(
-            np.cov(df.values.T).reshape((1, 1)), index=df.columns, columns=df.columns
+            {"empty_col": [0, 0, np.nan, np.nan]},
+            index=["count", "unique", "top", "freq"],
+            dtype="object",
         )
         tm.assert_frame_equal(result, expected)
-        df.loc[0] = np.nan
-        result = df.cov()
-        expected = DataFrame(
-            np.cov(df.values[1:].T).reshape((1, 1)),
-            index=df.columns,
-            columns=df.columns,
+        # ensure NaN, not None
+        assert np.isnan(result.iloc[2, 0])
+        assert np.isnan(result.iloc[3, 0])
+
+    def test_describe_categorical_columns(self):
+        # GH#11558
+        columns = pd.CategoricalIndex(["int1", "int2", "obj"], ordered=True, name="XXX")
+        df = DataFrame(
+            {
+                "int1": [10, 20, 30, 40, 50],
+                "int2": [10, 20, 30, 40, 50],
+                "obj": ["A", 0, None, "X", 1],
+            },
+            columns=columns,
         )
-        tm.assert_frame_equal(result, expected)
+        result = df.describe()
 
-    def test_corrwith(self, datetime_frame):
-        a = datetime_frame
-        noise = Series(np.random.randn(len(a)), index=a.index)
+        exp_columns = pd.CategoricalIndex(
+            ["int1", "int2"],
+            categories=["int1", "int2", "obj"],
+            ordered=True,
+            name="XXX",
+        )
+        expected = DataFrame(
+            {
+                "int1": [5, 30, df.int1.std(), 10, 20, 30, 40, 50],
+                "int2": [5, 30, df.int2.std(), 10, 20, 30, 40, 50],
+            },
+            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
+            columns=exp_columns,
+        )
 
-        b = datetime_frame.add(noise, axis=0)
+        tm.assert_frame_equal(result, expected)
+        tm.assert_categorical_equal(result.columns.values, expected.columns.values)
 
-        # make sure order does not matter
-        b = b.reindex(columns=b.columns[::-1], index=b.index[::-1][10:])
-        del b["B"]
-
-        colcorr = a.corrwith(b, axis=0)
-        tm.assert_almost_equal(colcorr["A"], a["A"].corr(b["A"]))
-
-        rowcorr = a.corrwith(b, axis=1)
-        tm.assert_series_equal(rowcorr, a.T.corrwith(b.T, axis=0))
-
-        dropped = a.corrwith(b, axis=0, drop=True)
-        tm.assert_almost_equal(dropped["A"], a["A"].corr(b["A"]))
-        assert "B" not in dropped
-
-        dropped = a.corrwith(b, axis=1, drop=True)
-        assert a.index[-1] not in dropped.index
-
-        # non time-series data
-        index = ["a", "b", "c", "d", "e"]
-        columns = ["one", "two", "three", "four"]
-        df1 = DataFrame(np.random.randn(5, 4), index=index, columns=columns)
-        df2 = DataFrame(np.random.randn(4, 4), index=index[:4], columns=columns)
-        correls = df1.corrwith(df2, axis=1)
-        for row in index[:4]:
-            tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row]))
-
-    def test_corrwith_with_objects(self):
-        df1 = tm.makeTimeDataFrame()
-        df2 = tm.makeTimeDataFrame()
-        cols = ["A", "B", "C", "D"]
-
-        df1["obj"] = "foo"
-        df2["obj"] = "bar"
-
-        result = df1.corrwith(df2)
-        expected = df1.loc[:, cols].corrwith(df2.loc[:, cols])
-        tm.assert_series_equal(result, expected)
-
-        result = df1.corrwith(df2, axis=1)
-        expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1)
-        tm.assert_series_equal(result, expected)
-
-    def test_corrwith_series(self, datetime_frame):
-        result = datetime_frame.corrwith(datetime_frame["A"])
-        expected = datetime_frame.apply(datetime_frame["A"].corr)
-
-        tm.assert_series_equal(result, expected)
-
-    def test_corrwith_matches_corrcoef(self):
-        df1 = DataFrame(np.arange(10000), columns=["a"])
-        df2 = DataFrame(np.arange(10000) ** 2, columns=["a"])
-        c1 = df1.corrwith(df2)["a"]
-        c2 = np.corrcoef(df1["a"], df2["a"])[0][1]
-
-        tm.assert_almost_equal(c1, c2)
-        assert c1 < 1
-
-    def test_corrwith_mixed_dtypes(self):
-        # GH 18570
-        df = pd.DataFrame(
-            {"a": [1, 4, 3, 2], "b": [4, 6, 7, 3], "c": ["a", "b", "c", "d"]}
+    def test_describe_datetime_columns(self):
+        columns = pd.DatetimeIndex(
+            ["2011-01-01", "2011-02-01", "2011-03-01"],
+            freq="MS",
+            tz="US/Eastern",
+            name="XXX",
         )
-        s = pd.Series([0, 6, 7, 3])
-        result = df.corrwith(s)
-        corrs = [df["a"].corr(s), df["b"].corr(s)]
-        expected = pd.Series(data=corrs, index=["a", "b"])
-        tm.assert_series_equal(result, expected)
-
-    def test_corrwith_index_intersection(self):
-        df1 = pd.DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"])
-        df2 = pd.DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"])
-
-        result = df1.corrwith(df2, drop=True).index.sort_values()
-        expected = df1.columns.intersection(df2.columns).sort_values()
-        tm.assert_index_equal(result, expected)
-
-    def test_corrwith_index_union(self):
-        df1 = pd.DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"])
-        df2 = pd.DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"])
-
-        result = df1.corrwith(df2, drop=False).index.sort_values()
-        expected = df1.columns.union(df2.columns).sort_values()
-        tm.assert_index_equal(result, expected)
-
-    def test_corrwith_dup_cols(self):
-        # GH 21925
-        df1 = pd.DataFrame(np.vstack([np.arange(10)] * 3).T)
-        df2 = df1.copy()
-        df2 = pd.concat((df2, df2[0]), axis=1)
-
-        result = df1.corrwith(df2)
-        expected = pd.Series(np.ones(4), index=[0, 0, 1, 2])
-        tm.assert_series_equal(result, expected)
-
-    @td.skip_if_no_scipy
-    def test_corrwith_spearman(self):
-        # GH 21925
-        df = pd.DataFrame(np.random.random(size=(100, 3)))
-        result = df.corrwith(df ** 2, method="spearman")
-        expected = Series(np.ones(len(result)))
-        tm.assert_series_equal(result, expected)
-
-    @td.skip_if_no_scipy
-    def test_corrwith_kendall(self):
-        # GH 21925
-        df = pd.DataFrame(np.random.random(size=(100, 3)))
-        result = df.corrwith(df ** 2, method="kendall")
-        expected = Series(np.ones(len(result)))
-        tm.assert_series_equal(result, expected)
-
-    # ---------------------------------------------------------------------
-    # Describe
-
-    def test_bool_describe_in_mixed_frame(self):
         df = DataFrame(
             {
-                "string_data": ["a", "b", "c", "d", "e"],
-                "bool_data": [True, True, False, False, False],
-                "int_data": [10, 20, 30, 40, 50],
+                0: [10, 20, 30, 40, 50],
+                1: [10, 20, 30, 40, 50],
+                2: ["A", 0, None, "X", 1],
             }
         )
-
-        # Integer data are included in .describe() output,
-        # Boolean and string data are not.
+        df.columns = columns
         result = df.describe()
-        expected = DataFrame(
-            {"int_data": [5, 30, df.int_data.std(), 10, 20, 30, 40, 50]},
-            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
-        )
-        tm.assert_frame_equal(result, expected)
 
-        # Top value is a boolean value that is False
-        result = df.describe(include=["bool"])
-
-        expected = DataFrame(
-            {"bool_data": [5, 2, False, 3]}, index=["count", "unique", "top", "freq"]
+        exp_columns = pd.DatetimeIndex(
+            ["2011-01-01", "2011-02-01"], freq="MS", tz="US/Eastern", name="XXX"
         )
-        tm.assert_frame_equal(result, expected)
-
-    def test_describe_empty_object(self):
-        # https://github.com/pandas-dev/pandas/issues/27183
-        df = pd.DataFrame({"A": [None, None]}, dtype=object)
-        result = df.describe()
-        expected = pd.DataFrame(
-            {"A": [0, 0, np.nan, np.nan]},
-            dtype=object,
-            index=["count", "unique", "top", "freq"],
+        expected = DataFrame(
+            {
+                0: [5, 30, df.iloc[:, 0].std(), 10, 20, 30, 40, 50],
+                1: [5, 30, df.iloc[:, 1].std(), 10, 20, 30, 40, 50],
+            },
+            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
         )
+        expected.columns = exp_columns
         tm.assert_frame_equal(result, expected)
+        assert result.columns.freq == "MS"
+        assert result.columns.tz == expected.columns.tz
 
-        result = df.iloc[:0].describe()
-        tm.assert_frame_equal(result, expected)
+    def test_describe_timedelta_values(self):
+        # GH#6145
+        t1 = pd.timedelta_range("1 days", freq="D", periods=5)
+        t2 = pd.timedelta_range("1 hours", freq="H", periods=5)
+        df = pd.DataFrame({"t1": t1, "t2": t2})
 
-    def test_describe_bool_frame(self):
-        # GH 13891
-        df = pd.DataFrame(
-            {
-                "bool_data_1": [False, False, True, True],
-                "bool_data_2": [False, True, True, True],
-            }
-        )
-        result = df.describe()
         expected = DataFrame(
-            {"bool_data_1": [4, 2, True, 2], "bool_data_2": [4, 2, True, 3]},
-            index=["count", "unique", "top", "freq"],
-        )
-        tm.assert_frame_equal(result, expected)
-
-        df = pd.DataFrame(
             {
-                "bool_data": [False, False, True, True, False],
-                "int_data": [0, 1, 2, 3, 4],
-            }
-        )
-        result = df.describe()
-        expected = DataFrame(
-            {"int_data": [5, 2, df.int_data.std(), 0, 1, 2, 3, 4]},
+                "t1": [
+                    5,
+                    pd.Timedelta("3 days"),
+                    df.iloc[:, 0].std(),
+                    pd.Timedelta("1 days"),
+                    pd.Timedelta("2 days"),
+                    pd.Timedelta("3 days"),
+                    pd.Timedelta("4 days"),
+                    pd.Timedelta("5 days"),
+                ],
+                "t2": [
+                    5,
+                    pd.Timedelta("3 hours"),
+                    df.iloc[:, 1].std(),
+                    pd.Timedelta("1 hours"),
+                    pd.Timedelta("2 hours"),
+                    pd.Timedelta("3 hours"),
+                    pd.Timedelta("4 hours"),
+                    pd.Timedelta("5 hours"),
+                ],
+            },
             index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
         )
-        tm.assert_frame_equal(result, expected)
 
-        df = pd.DataFrame(
-            {"bool_data": [False, False, True, True], "str_data": ["a", "b", "c", "a"]}
-        )
         result = df.describe()
-        expected = DataFrame(
-            {"bool_data": [4, 2, True, 2], "str_data": [4, 3, "a", 2]},
-            index=["count", "unique", "top", "freq"],
-        )
         tm.assert_frame_equal(result, expected)
 
-    def test_describe_categorical(self):
-        df = DataFrame({"value": np.random.randint(0, 10000, 100)})
-        labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
-        cat_labels = Categorical(labels, labels)
-
-        df = df.sort_values(by=["value"], ascending=True)
-        df["value_group"] = pd.cut(
-            df.value, range(0, 10500, 500), right=False, labels=cat_labels
+        exp_repr = (
+            "                           t1                      t2\n"
+            "count                       5                       5\n"
+            "mean          3 days 00:00:00         0 days 03:00:00\n"
+            "std    1 days 13:56:50.394919  0 days 01:34:52.099788\n"
+            "min           1 days 00:00:00         0 days 01:00:00\n"
+            "25%           2 days 00:00:00         0 days 02:00:00\n"
+            "50%           3 days 00:00:00         0 days 03:00:00\n"
+            "75%           4 days 00:00:00         0 days 04:00:00\n"
+            "max           5 days 00:00:00         0 days 05:00:00"
         )
-        cat = df
-
-        # Categoricals should not show up together with numerical columns
-        result = cat.describe()
-        assert len(result.columns) == 1
-
-        # In a frame, describe() for the cat should be the same as for string
-        # arrays (count, unique, top, freq)
+        assert repr(result) == exp_repr
 
-        cat = Categorical(
-            ["a", "b", "b", "b"], categories=["a", "b", "c"], ordered=True
-        )
-        s = Series(cat)
-        result = s.describe()
-        expected = Series([4, 2, "b", 3], index=["count", "unique", "top", "freq"])
-        tm.assert_series_equal(result, expected)
-
-        cat = Series(Categorical(["a", "b", "c", "c"]))
-        df3 = DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]})
-        result = df3.describe()
-        tm.assert_numpy_array_equal(result["cat"].values, result["s"].values)
-
-    def test_describe_empty_categorical_column(self):
-        # GH 26397
-        # Ensure the index of an an empty categorical DataFrame column
-        # also contains (count, unique, top, freq)
-        df = pd.DataFrame({"empty_col": Categorical([])})
-        result = df.describe()
-        expected = DataFrame(
-            {"empty_col": [0, 0, np.nan, np.nan]},
-            index=["count", "unique", "top", "freq"],
-            dtype="object",
-        )
-        tm.assert_frame_equal(result, expected)
-        # ensure NaN, not None
-        assert np.isnan(result.iloc[2, 0])
-        assert np.isnan(result.iloc[3, 0])
-
-    def test_describe_categorical_columns(self):
-        # GH 11558
-        columns = pd.CategoricalIndex(["int1", "int2", "obj"], ordered=True, name="XXX")
-        df = DataFrame(
-            {
-                "int1": [10, 20, 30, 40, 50],
-                "int2": [10, 20, 30, 40, 50],
-                "obj": ["A", 0, None, "X", 1],
-            },
-            columns=columns,
-        )
-        result = df.describe()
-
-        exp_columns = pd.CategoricalIndex(
-            ["int1", "int2"],
-            categories=["int1", "int2", "obj"],
-            ordered=True,
-            name="XXX",
-        )
-        expected = DataFrame(
-            {
-                "int1": [5, 30, df.int1.std(), 10, 20, 30, 40, 50],
-                "int2": [5, 30, df.int2.std(), 10, 20, 30, 40, 50],
-            },
-            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
-            columns=exp_columns,
-        )
-
-        tm.assert_frame_equal(result, expected)
-        tm.assert_categorical_equal(result.columns.values, expected.columns.values)
-
-    def test_describe_datetime_columns(self):
-        columns = pd.DatetimeIndex(
-            ["2011-01-01", "2011-02-01", "2011-03-01"],
-            freq="MS",
-            tz="US/Eastern",
-            name="XXX",
-        )
-        df = DataFrame(
-            {
-                0: [10, 20, 30, 40, 50],
-                1: [10, 20, 30, 40, 50],
-                2: ["A", 0, None, "X", 1],
-            }
-        )
-        df.columns = columns
-        result = df.describe()
-
-        exp_columns = pd.DatetimeIndex(
-            ["2011-01-01", "2011-02-01"], freq="MS", tz="US/Eastern", name="XXX"
-        )
-        expected = DataFrame(
-            {
-                0: [5, 30, df.iloc[:, 0].std(), 10, 20, 30, 40, 50],
-                1: [5, 30, df.iloc[:, 1].std(), 10, 20, 30, 40, 50],
-            },
-            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
-        )
-        expected.columns = exp_columns
-        tm.assert_frame_equal(result, expected)
-        assert result.columns.freq == "MS"
-        assert result.columns.tz == expected.columns.tz
-
-    def test_describe_timedelta_values(self):
-        # GH 6145
-        t1 = pd.timedelta_range("1 days", freq="D", periods=5)
-        t2 = pd.timedelta_range("1 hours", freq="H", periods=5)
-        df = pd.DataFrame({"t1": t1, "t2": t2})
-
-        expected = DataFrame(
-            {
-                "t1": [
-                    5,
-                    pd.Timedelta("3 days"),
-                    df.iloc[:, 0].std(),
-                    pd.Timedelta("1 days"),
-                    pd.Timedelta("2 days"),
-                    pd.Timedelta("3 days"),
-                    pd.Timedelta("4 days"),
-                    pd.Timedelta("5 days"),
-                ],
-                "t2": [
-                    5,
-                    pd.Timedelta("3 hours"),
-                    df.iloc[:, 1].std(),
-                    pd.Timedelta("1 hours"),
-                    pd.Timedelta("2 hours"),
-                    pd.Timedelta("3 hours"),
-                    pd.Timedelta("4 hours"),
-                    pd.Timedelta("5 hours"),
-                ],
-            },
-            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
-        )
-
-        result = df.describe()
-        tm.assert_frame_equal(result, expected)
-
-        exp_repr = (
-            "                           t1                      t2\n"
-            "count                       5                       5\n"
-            "mean          3 days 00:00:00         0 days 03:00:00\n"
-            "std    1 days 13:56:50.394919  0 days 01:34:52.099788\n"
-            "min           1 days 00:00:00         0 days 01:00:00\n"
-            "25%           2 days 00:00:00         0 days 02:00:00\n"
-            "50%           3 days 00:00:00         0 days 03:00:00\n"
-            "75%           4 days 00:00:00         0 days 04:00:00\n"
-            "max           5 days 00:00:00         0 days 05:00:00"
-        )
-        assert repr(result) == exp_repr
-
-    def test_describe_tz_values(self, tz_naive_fixture):
-        # GH 21332
-        tz = tz_naive_fixture
-        s1 = Series(range(5))
-        start = Timestamp(2018, 1, 1)
-        end = Timestamp(2018, 1, 5)
-        s2 = Series(date_range(start, end, tz=tz))
-        df = pd.DataFrame({"s1": s1, "s2": s2})
+    def test_describe_tz_values(self, tz_naive_fixture):
+        # GH#21332
+        tz = tz_naive_fixture
+        s1 = Series(range(5))
+        start = Timestamp(2018, 1, 1)
+        end = Timestamp(2018, 1, 5)
+        s2 = Series(date_range(start, end, tz=tz))
+        df = pd.DataFrame({"s1": s1, "s2": s2})
 
         expected = DataFrame(
             {
@@ -832,7 +557,7 @@ def test_describe_tz_values(self, tz_naive_fixture):
         tm.assert_frame_equal(result, expected)
 
     def test_describe_percentiles_integer_idx(self):
-        # Issue 26660
+        # GH#26660
         df = pd.DataFrame({"x": [1]})
         pct = np.linspace(0, 1, 10 + 1)
         result = df.describe(percentiles=pct)
@@ -860,6 +585,280 @@ def test_describe_percentiles_integer_idx(self):
         )
         tm.assert_frame_equal(result, expected)
 
+
+class TestDataFrameAnalytics:
+
+    # ---------------------------------------------------------------------
+    # Correlation and covariance
+
+    @td.skip_if_no_scipy
+    def test_corr_pearson(self, float_frame):
+        float_frame["A"][:5] = np.nan
+        float_frame["B"][5:10] = np.nan
+
+        self._check_method(float_frame, "pearson")
+
+    @td.skip_if_no_scipy
+    def test_corr_kendall(self, float_frame):
+        float_frame["A"][:5] = np.nan
+        float_frame["B"][5:10] = np.nan
+
+        self._check_method(float_frame, "kendall")
+
+    @td.skip_if_no_scipy
+    def test_corr_spearman(self, float_frame):
+        float_frame["A"][:5] = np.nan
+        float_frame["B"][5:10] = np.nan
+
+        self._check_method(float_frame, "spearman")
+
+    def _check_method(self, frame, method="pearson"):
+        correls = frame.corr(method=method)
+        expected = frame["A"].corr(frame["C"], method=method)
+        tm.assert_almost_equal(correls["A"]["C"], expected)
+
+    @td.skip_if_no_scipy
+    def test_corr_non_numeric(self, float_frame, float_string_frame):
+        float_frame["A"][:5] = np.nan
+        float_frame["B"][5:10] = np.nan
+
+        # exclude non-numeric types
+        result = float_string_frame.corr()
+        expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].corr()
+        tm.assert_frame_equal(result, expected)
+
+    @td.skip_if_no_scipy
+    @pytest.mark.parametrize("meth", ["pearson", "kendall", "spearman"])
+    def test_corr_nooverlap(self, meth):
+        # nothing in common
+        df = DataFrame(
+            {
+                "A": [1, 1.5, 1, np.nan, np.nan, np.nan],
+                "B": [np.nan, np.nan, np.nan, 1, 1.5, 1],
+                "C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
+            }
+        )
+        rs = df.corr(meth)
+        assert isna(rs.loc["A", "B"])
+        assert isna(rs.loc["B", "A"])
+        assert rs.loc["A", "A"] == 1
+        assert rs.loc["B", "B"] == 1
+        assert isna(rs.loc["C", "C"])
+
+    @td.skip_if_no_scipy
+    @pytest.mark.parametrize("meth", ["pearson", "spearman"])
+    def test_corr_constant(self, meth):
+        # constant --> all NA
+
+        df = DataFrame(
+            {
+                "A": [1, 1, 1, np.nan, np.nan, np.nan],
+                "B": [np.nan, np.nan, np.nan, 1, 1, 1],
+            }
+        )
+        rs = df.corr(meth)
+        assert isna(rs.values).all()
+
+    def test_corr_int(self):
+        # dtypes other than float64 #1761
+        df3 = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]})
+
+        df3.cov()
+        df3.corr()
+
+    @td.skip_if_no_scipy
+    def test_corr_int_and_boolean(self):
+        # when dtypes of pandas series are different
+        # then ndarray will have dtype=object,
+        # so it need to be properly handled
+        df = DataFrame({"a": [True, False], "b": [1, 0]})
+
+        expected = DataFrame(np.ones((2, 2)), index=["a", "b"], columns=["a", "b"])
+        for meth in ["pearson", "kendall", "spearman"]:
+
+            with warnings.catch_warnings(record=True):
+                warnings.simplefilter("ignore", RuntimeWarning)
+                result = df.corr(meth)
+            tm.assert_frame_equal(result, expected)
+
+    def test_corr_cov_independent_index_column(self):
+        # GH 14617
+        df = pd.DataFrame(np.random.randn(4 * 10).reshape(10, 4), columns=list("abcd"))
+        for method in ["cov", "corr"]:
+            result = getattr(df, method)()
+            assert result.index is not result.columns
+            assert result.index.equals(result.columns)
+
+    def test_corr_invalid_method(self):
+        # GH 22298
+        df = pd.DataFrame(np.random.normal(size=(10, 2)))
+        msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, "
+        with pytest.raises(ValueError, match=msg):
+            df.corr(method="____")
+
+    def test_cov(self, float_frame, float_string_frame):
+        # min_periods no NAs (corner case)
+        expected = float_frame.cov()
+        result = float_frame.cov(min_periods=len(float_frame))
+
+        tm.assert_frame_equal(expected, result)
+
+        result = float_frame.cov(min_periods=len(float_frame) + 1)
+        assert isna(result.values).all()
+
+        # with NAs
+        frame = float_frame.copy()
+        frame["A"][:5] = np.nan
+        frame["B"][5:10] = np.nan
+        result = float_frame.cov(min_periods=len(float_frame) - 8)
+        expected = float_frame.cov()
+        expected.loc["A", "B"] = np.nan
+        expected.loc["B", "A"] = np.nan
+
+        # regular
+        float_frame["A"][:5] = np.nan
+        float_frame["B"][:10] = np.nan
+        cov = float_frame.cov()
+
+        tm.assert_almost_equal(cov["A"]["C"], float_frame["A"].cov(float_frame["C"]))
+
+        # exclude non-numeric types
+        result = float_string_frame.cov()
+        expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].cov()
+        tm.assert_frame_equal(result, expected)
+
+        # Single column frame
+        df = DataFrame(np.linspace(0.0, 1.0, 10))
+        result = df.cov()
+        expected = DataFrame(
+            np.cov(df.values.T).reshape((1, 1)), index=df.columns, columns=df.columns
+        )
+        tm.assert_frame_equal(result, expected)
+        df.loc[0] = np.nan
+        result = df.cov()
+        expected = DataFrame(
+            np.cov(df.values[1:].T).reshape((1, 1)),
+            index=df.columns,
+            columns=df.columns,
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_corrwith(self, datetime_frame):
+        a = datetime_frame
+        noise = Series(np.random.randn(len(a)), index=a.index)
+
+        b = datetime_frame.add(noise, axis=0)
+
+        # make sure order does not matter
+        b = b.reindex(columns=b.columns[::-1], index=b.index[::-1][10:])
+        del b["B"]
+
+        colcorr = a.corrwith(b, axis=0)
+        tm.assert_almost_equal(colcorr["A"], a["A"].corr(b["A"]))
+
+        rowcorr = a.corrwith(b, axis=1)
+        tm.assert_series_equal(rowcorr, a.T.corrwith(b.T, axis=0))
+
+        dropped = a.corrwith(b, axis=0, drop=True)
+        tm.assert_almost_equal(dropped["A"], a["A"].corr(b["A"]))
+        assert "B" not in dropped
+
+        dropped = a.corrwith(b, axis=1, drop=True)
+        assert a.index[-1] not in dropped.index
+
+        # non time-series data
+        index = ["a", "b", "c", "d", "e"]
+        columns = ["one", "two", "three", "four"]
+        df1 = DataFrame(np.random.randn(5, 4), index=index, columns=columns)
+        df2 = DataFrame(np.random.randn(4, 4), index=index[:4], columns=columns)
+        correls = df1.corrwith(df2, axis=1)
+        for row in index[:4]:
+            tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row]))
+
+    def test_corrwith_with_objects(self):
+        df1 = tm.makeTimeDataFrame()
+        df2 = tm.makeTimeDataFrame()
+        cols = ["A", "B", "C", "D"]
+
+        df1["obj"] = "foo"
+        df2["obj"] = "bar"
+
+        result = df1.corrwith(df2)
+        expected = df1.loc[:, cols].corrwith(df2.loc[:, cols])
+        tm.assert_series_equal(result, expected)
+
+        result = df1.corrwith(df2, axis=1)
+        expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1)
+        tm.assert_series_equal(result, expected)
+
+    def test_corrwith_series(self, datetime_frame):
+        result = datetime_frame.corrwith(datetime_frame["A"])
+        expected = datetime_frame.apply(datetime_frame["A"].corr)
+
+        tm.assert_series_equal(result, expected)
+
+    def test_corrwith_matches_corrcoef(self):
+        df1 = DataFrame(np.arange(10000), columns=["a"])
+        df2 = DataFrame(np.arange(10000) ** 2, columns=["a"])
+        c1 = df1.corrwith(df2)["a"]
+        c2 = np.corrcoef(df1["a"], df2["a"])[0][1]
+
+        tm.assert_almost_equal(c1, c2)
+        assert c1 < 1
+
+    def test_corrwith_mixed_dtypes(self):
+        # GH 18570
+        df = pd.DataFrame(
+            {"a": [1, 4, 3, 2], "b": [4, 6, 7, 3], "c": ["a", "b", "c", "d"]}
+        )
+        s = pd.Series([0, 6, 7, 3])
+        result = df.corrwith(s)
+        corrs = [df["a"].corr(s), df["b"].corr(s)]
+        expected = pd.Series(data=corrs, index=["a", "b"])
+        tm.assert_series_equal(result, expected)
+
+    def test_corrwith_index_intersection(self):
+        df1 = pd.DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"])
+        df2 = pd.DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"])
+
+        result = df1.corrwith(df2, drop=True).index.sort_values()
+        expected = df1.columns.intersection(df2.columns).sort_values()
+        tm.assert_index_equal(result, expected)
+
+    def test_corrwith_index_union(self):
+        df1 = pd.DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"])
+        df2 = pd.DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"])
+
+        result = df1.corrwith(df2, drop=False).index.sort_values()
+        expected = df1.columns.union(df2.columns).sort_values()
+        tm.assert_index_equal(result, expected)
+
+    def test_corrwith_dup_cols(self):
+        # GH 21925
+        df1 = pd.DataFrame(np.vstack([np.arange(10)] * 3).T)
+        df2 = df1.copy()
+        df2 = pd.concat((df2, df2[0]), axis=1)
+
+        result = df1.corrwith(df2)
+        expected = pd.Series(np.ones(4), index=[0, 0, 1, 2])
+        tm.assert_series_equal(result, expected)
+
+    @td.skip_if_no_scipy
+    def test_corrwith_spearman(self):
+        # GH 21925
+        df = pd.DataFrame(np.random.random(size=(100, 3)))
+        result = df.corrwith(df ** 2, method="spearman")
+        expected = Series(np.ones(len(result)))
+        tm.assert_series_equal(result, expected)
+
+    @td.skip_if_no_scipy
+    def test_corrwith_kendall(self):
+        # GH 21925
+        df = pd.DataFrame(np.random.random(size=(100, 3)))
+        result = df.corrwith(df ** 2, method="kendall")
+        expected = Series(np.ones(len(result)))
+        tm.assert_series_equal(result, expected)
+
     # ---------------------------------------------------------------------
     # Reductions
 
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index 0eb4e8a6cfdf3..2a09cac90d5ba 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -24,7 +24,7 @@
 import pandas.util.testing as tm
 
 
-class TestSeriesAnalytics:
+class TestDescribe:
     def test_describe(self):
         s = Series([0, 1, 2, 3, 4], name="int_data")
         result = s.describe()
@@ -88,6 +88,8 @@ def test_describe_with_tz(self, tz_naive_fixture):
         )
         tm.assert_series_equal(result, expected)
 
+
+class TestSeriesAnalytics:
     def test_argsort(self, datetime_series):
         self._check_accum_op("argsort", datetime_series, check_dtype=False)
         argsorted = datetime_series.argsort()

From c9f3348a583b23ad4755a1d60e0bacfe441fe0ca Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 19 Dec 2019 13:38:28 -0800
Subject: [PATCH 4/7] refactor out TestDescrbie

---
 pandas/tests/frame/test_analytics.py  | 2 +-
 pandas/tests/series/test_analytics.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 938c531095584..f97ec309ed1c2 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -258,7 +258,7 @@ def assert_bool_op_api(
         getattr(bool_frame_with_na, opname)(axis=1, bool_only=False)
 
 
-class TestDescribe:
+class TestDataFrameDescribe:
     def test_describe_bool_in_mixed_frame(self):
         df = DataFrame(
             {
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index 2a09cac90d5ba..5cc67f19e45f0 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -24,7 +24,7 @@
 import pandas.util.testing as tm
 
 
-class TestDescribe:
+class TestSeriesDescribe:
     def test_describe(self):
         s = Series([0, 1, 2, 3, 4], name="int_data")
         result = s.describe()

From 9c4f53c29ccc911c7e2d20cf65a9033b6c9f0f20 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 23 Dec 2019 12:46:48 -0800
Subject: [PATCH 5/7] revert

---
 pandas/tests/frame/test_analytics.py | 661 ---------------------------
 1 file changed, 661 deletions(-)

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index e119a28827606..1a241cd72ec43 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -257,334 +257,6 @@ def assert_bool_op_api(
         getattr(bool_frame_with_na, opname)(axis=1, bool_only=False)
 
 
-class TestDataFrameDescribe:
-    def test_describe_bool_in_mixed_frame(self):
-        df = DataFrame(
-            {
-                "string_data": ["a", "b", "c", "d", "e"],
-                "bool_data": [True, True, False, False, False],
-                "int_data": [10, 20, 30, 40, 50],
-            }
-        )
-
-        # Integer data are included in .describe() output,
-        # Boolean and string data are not.
-        result = df.describe()
-        expected = DataFrame(
-            {"int_data": [5, 30, df.int_data.std(), 10, 20, 30, 40, 50]},
-            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
-        )
-        tm.assert_frame_equal(result, expected)
-
-        # Top value is a boolean value that is False
-        result = df.describe(include=["bool"])
-
-        expected = DataFrame(
-            {"bool_data": [5, 2, False, 3]}, index=["count", "unique", "top", "freq"]
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_describe_empty_object(self):
-        # GH#27183
-        df = pd.DataFrame({"A": [None, None]}, dtype=object)
-        result = df.describe()
-        expected = pd.DataFrame(
-            {"A": [0, 0, np.nan, np.nan]},
-            dtype=object,
-            index=["count", "unique", "top", "freq"],
-        )
-        tm.assert_frame_equal(result, expected)
-
-        result = df.iloc[:0].describe()
-        tm.assert_frame_equal(result, expected)
-
-    def test_describe_bool_frame(self):
-        # GH#13891
-        df = pd.DataFrame(
-            {
-                "bool_data_1": [False, False, True, True],
-                "bool_data_2": [False, True, True, True],
-            }
-        )
-        result = df.describe()
-        expected = DataFrame(
-            {"bool_data_1": [4, 2, True, 2], "bool_data_2": [4, 2, True, 3]},
-            index=["count", "unique", "top", "freq"],
-        )
-        tm.assert_frame_equal(result, expected)
-
-        df = pd.DataFrame(
-            {
-                "bool_data": [False, False, True, True, False],
-                "int_data": [0, 1, 2, 3, 4],
-            }
-        )
-        result = df.describe()
-        expected = DataFrame(
-            {"int_data": [5, 2, df.int_data.std(), 0, 1, 2, 3, 4]},
-            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
-        )
-        tm.assert_frame_equal(result, expected)
-
-        df = pd.DataFrame(
-            {"bool_data": [False, False, True, True], "str_data": ["a", "b", "c", "a"]}
-        )
-        result = df.describe()
-        expected = DataFrame(
-            {"bool_data": [4, 2, True, 2], "str_data": [4, 3, "a", 2]},
-            index=["count", "unique", "top", "freq"],
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_describe_categorical(self):
-        df = DataFrame({"value": np.random.randint(0, 10000, 100)})
-        labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
-        cat_labels = Categorical(labels, labels)
-
-        df = df.sort_values(by=["value"], ascending=True)
-        df["value_group"] = pd.cut(
-            df.value, range(0, 10500, 500), right=False, labels=cat_labels
-        )
-        cat = df
-
-        # Categoricals should not show up together with numerical columns
-        result = cat.describe()
-        assert len(result.columns) == 1
-
-        # In a frame, describe() for the cat should be the same as for string
-        # arrays (count, unique, top, freq)
-
-        cat = Categorical(
-            ["a", "b", "b", "b"], categories=["a", "b", "c"], ordered=True
-        )
-        s = Series(cat)
-        result = s.describe()
-        expected = Series([4, 2, "b", 3], index=["count", "unique", "top", "freq"])
-        tm.assert_series_equal(result, expected)
-
-        cat = Series(Categorical(["a", "b", "c", "c"]))
-        df3 = DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]})
-        result = df3.describe()
-        tm.assert_numpy_array_equal(result["cat"].values, result["s"].values)
-
-    def test_describe_empty_categorical_column(self):
-        # GH#26397
-        # Ensure the index of an an empty categorical DataFrame column
-        # also contains (count, unique, top, freq)
-        df = pd.DataFrame({"empty_col": Categorical([])})
-        result = df.describe()
-        expected = DataFrame(
-            {"empty_col": [0, 0, np.nan, np.nan]},
-            index=["count", "unique", "top", "freq"],
-            dtype="object",
-        )
-        tm.assert_frame_equal(result, expected)
-        # ensure NaN, not None
-        assert np.isnan(result.iloc[2, 0])
-        assert np.isnan(result.iloc[3, 0])
-
-    def test_describe_categorical_columns(self):
-        # GH#11558
-        columns = pd.CategoricalIndex(["int1", "int2", "obj"], ordered=True, name="XXX")
-        df = DataFrame(
-            {
-                "int1": [10, 20, 30, 40, 50],
-                "int2": [10, 20, 30, 40, 50],
-                "obj": ["A", 0, None, "X", 1],
-            },
-            columns=columns,
-        )
-        result = df.describe()
-
-        exp_columns = pd.CategoricalIndex(
-            ["int1", "int2"],
-            categories=["int1", "int2", "obj"],
-            ordered=True,
-            name="XXX",
-        )
-        expected = DataFrame(
-            {
-                "int1": [5, 30, df.int1.std(), 10, 20, 30, 40, 50],
-                "int2": [5, 30, df.int2.std(), 10, 20, 30, 40, 50],
-            },
-            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
-            columns=exp_columns,
-        )
-
-        tm.assert_frame_equal(result, expected)
-        tm.assert_categorical_equal(result.columns.values, expected.columns.values)
-
-    def test_describe_datetime_columns(self):
-        columns = pd.DatetimeIndex(
-            ["2011-01-01", "2011-02-01", "2011-03-01"],
-            freq="MS",
-            tz="US/Eastern",
-            name="XXX",
-        )
-        df = DataFrame(
-            {
-                0: [10, 20, 30, 40, 50],
-                1: [10, 20, 30, 40, 50],
-                2: ["A", 0, None, "X", 1],
-            }
-        )
-        df.columns = columns
-        result = df.describe()
-
-        exp_columns = pd.DatetimeIndex(
-            ["2011-01-01", "2011-02-01"], freq="MS", tz="US/Eastern", name="XXX"
-        )
-        expected = DataFrame(
-            {
-                0: [5, 30, df.iloc[:, 0].std(), 10, 20, 30, 40, 50],
-                1: [5, 30, df.iloc[:, 1].std(), 10, 20, 30, 40, 50],
-            },
-            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
-        )
-        expected.columns = exp_columns
-        tm.assert_frame_equal(result, expected)
-        assert result.columns.freq == "MS"
-        assert result.columns.tz == expected.columns.tz
-
-    def test_describe_timedelta_values(self):
-        # GH#6145
-        t1 = pd.timedelta_range("1 days", freq="D", periods=5)
-        t2 = pd.timedelta_range("1 hours", freq="H", periods=5)
-        df = pd.DataFrame({"t1": t1, "t2": t2})
-
-        expected = DataFrame(
-            {
-                "t1": [
-                    5,
-                    pd.Timedelta("3 days"),
-                    df.iloc[:, 0].std(),
-                    pd.Timedelta("1 days"),
-                    pd.Timedelta("2 days"),
-                    pd.Timedelta("3 days"),
-                    pd.Timedelta("4 days"),
-                    pd.Timedelta("5 days"),
-                ],
-                "t2": [
-                    5,
-                    pd.Timedelta("3 hours"),
-                    df.iloc[:, 1].std(),
-                    pd.Timedelta("1 hours"),
-                    pd.Timedelta("2 hours"),
-                    pd.Timedelta("3 hours"),
-                    pd.Timedelta("4 hours"),
-                    pd.Timedelta("5 hours"),
-                ],
-            },
-            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
-        )
-
-        result = df.describe()
-        tm.assert_frame_equal(result, expected)
-
-        exp_repr = (
-            "                           t1                      t2\n"
-            "count                       5                       5\n"
-            "mean          3 days 00:00:00         0 days 03:00:00\n"
-            "std    1 days 13:56:50.394919  0 days 01:34:52.099788\n"
-            "min           1 days 00:00:00         0 days 01:00:00\n"
-            "25%           2 days 00:00:00         0 days 02:00:00\n"
-            "50%           3 days 00:00:00         0 days 03:00:00\n"
-            "75%           4 days 00:00:00         0 days 04:00:00\n"
-            "max           5 days 00:00:00         0 days 05:00:00"
-        )
-        assert repr(result) == exp_repr
-
-    def test_describe_tz_values(self, tz_naive_fixture):
-        # GH#21332
-        tz = tz_naive_fixture
-        s1 = Series(range(5))
-        start = Timestamp(2018, 1, 1)
-        end = Timestamp(2018, 1, 5)
-        s2 = Series(date_range(start, end, tz=tz))
-        df = pd.DataFrame({"s1": s1, "s2": s2})
-
-        expected = DataFrame(
-            {
-                "s1": [
-                    5,
-                    np.nan,
-                    np.nan,
-                    np.nan,
-                    np.nan,
-                    np.nan,
-                    2,
-                    1.581139,
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                ],
-                "s2": [
-                    5,
-                    5,
-                    s2.value_counts().index[0],
-                    1,
-                    start.tz_localize(tz),
-                    end.tz_localize(tz),
-                    np.nan,
-                    np.nan,
-                    np.nan,
-                    np.nan,
-                    np.nan,
-                    np.nan,
-                    np.nan,
-                ],
-            },
-            index=[
-                "count",
-                "unique",
-                "top",
-                "freq",
-                "first",
-                "last",
-                "mean",
-                "std",
-                "min",
-                "25%",
-                "50%",
-                "75%",
-                "max",
-            ],
-        )
-        result = df.describe(include="all")
-        tm.assert_frame_equal(result, expected)
-
-    def test_describe_percentiles_integer_idx(self):
-        # GH#26660
-        df = pd.DataFrame({"x": [1]})
-        pct = np.linspace(0, 1, 10 + 1)
-        result = df.describe(percentiles=pct)
-
-        expected = DataFrame(
-            {"x": [1.0, 1.0, np.NaN, 1.0, *[1.0 for _ in pct], 1.0]},
-            index=[
-                "count",
-                "mean",
-                "std",
-                "min",
-                "0%",
-                "10%",
-                "20%",
-                "30%",
-                "40%",
-                "50%",
-                "60%",
-                "70%",
-                "80%",
-                "90%",
-                "100%",
-                "max",
-            ],
-        )
-        tm.assert_frame_equal(result, expected)
-
-
 class TestDataFrameAnalytics:
 
     # ---------------------------------------------------------------------
@@ -1780,187 +1452,6 @@ def test_any_all_level_axis_none_raises(self, method):
         with pytest.raises(ValueError, match=xpr):
             getattr(df, method)(axis=None, level="out")
 
-    # ----------------------------------------------------------------------
-    # Isin
-
-    def test_isin(self):
-        # GH 4211
-        df = DataFrame(
-            {
-                "vals": [1, 2, 3, 4],
-                "ids": ["a", "b", "f", "n"],
-                "ids2": ["a", "n", "c", "n"],
-            },
-            index=["foo", "bar", "baz", "qux"],
-        )
-        other = ["a", "b", "c"]
-
-        result = df.isin(other)
-        expected = DataFrame([df.loc[s].isin(other) for s in df.index])
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])])
-    def test_isin_empty(self, empty):
-        # GH 16991
-        df = DataFrame({"A": ["a", "b", "c"], "B": ["a", "e", "f"]})
-        expected = DataFrame(False, df.index, df.columns)
-
-        result = df.isin(empty)
-        tm.assert_frame_equal(result, expected)
-
-    def test_isin_dict(self):
-        df = DataFrame({"A": ["a", "b", "c"], "B": ["a", "e", "f"]})
-        d = {"A": ["a"]}
-
-        expected = DataFrame(False, df.index, df.columns)
-        expected.loc[0, "A"] = True
-
-        result = df.isin(d)
-        tm.assert_frame_equal(result, expected)
-
-        # non unique columns
-        df = DataFrame({"A": ["a", "b", "c"], "B": ["a", "e", "f"]})
-        df.columns = ["A", "A"]
-        expected = DataFrame(False, df.index, df.columns)
-        expected.loc[0, "A"] = True
-        result = df.isin(d)
-        tm.assert_frame_equal(result, expected)
-
-    def test_isin_with_string_scalar(self):
-        # GH 4763
-        df = DataFrame(
-            {
-                "vals": [1, 2, 3, 4],
-                "ids": ["a", "b", "f", "n"],
-                "ids2": ["a", "n", "c", "n"],
-            },
-            index=["foo", "bar", "baz", "qux"],
-        )
-        with pytest.raises(TypeError):
-            df.isin("a")
-
-        with pytest.raises(TypeError):
-            df.isin("aaa")
-
-    def test_isin_df(self):
-        df1 = DataFrame({"A": [1, 2, 3, 4], "B": [2, np.nan, 4, 4]})
-        df2 = DataFrame({"A": [0, 2, 12, 4], "B": [2, np.nan, 4, 5]})
-        expected = DataFrame(False, df1.index, df1.columns)
-        result = df1.isin(df2)
-        expected["A"].loc[[1, 3]] = True
-        expected["B"].loc[[0, 2]] = True
-        tm.assert_frame_equal(result, expected)
-
-        # partial overlapping columns
-        df2.columns = ["A", "C"]
-        result = df1.isin(df2)
-        expected["B"] = False
-        tm.assert_frame_equal(result, expected)
-
-    def test_isin_tuples(self):
-        # GH 16394
-        df = pd.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "f"]})
-        df["C"] = list(zip(df["A"], df["B"]))
-        result = df["C"].isin([(1, "a")])
-        tm.assert_series_equal(result, Series([True, False, False], name="C"))
-
-    def test_isin_df_dupe_values(self):
-        df1 = DataFrame({"A": [1, 2, 3, 4], "B": [2, np.nan, 4, 4]})
-        # just cols duped
-        df2 = DataFrame([[0, 2], [12, 4], [2, np.nan], [4, 5]], columns=["B", "B"])
-        with pytest.raises(ValueError):
-            df1.isin(df2)
-
-        # just index duped
-        df2 = DataFrame(
-            [[0, 2], [12, 4], [2, np.nan], [4, 5]],
-            columns=["A", "B"],
-            index=[0, 0, 1, 1],
-        )
-        with pytest.raises(ValueError):
-            df1.isin(df2)
-
-        # cols and index:
-        df2.columns = ["B", "B"]
-        with pytest.raises(ValueError):
-            df1.isin(df2)
-
-    def test_isin_dupe_self(self):
-        other = DataFrame({"A": [1, 0, 1, 0], "B": [1, 1, 0, 0]})
-        df = DataFrame([[1, 1], [1, 0], [0, 0]], columns=["A", "A"])
-        result = df.isin(other)
-        expected = DataFrame(False, index=df.index, columns=df.columns)
-        expected.loc[0] = True
-        expected.iloc[1, 1] = True
-        tm.assert_frame_equal(result, expected)
-
-    def test_isin_against_series(self):
-        df = pd.DataFrame(
-            {"A": [1, 2, 3, 4], "B": [2, np.nan, 4, 4]}, index=["a", "b", "c", "d"]
-        )
-        s = pd.Series([1, 3, 11, 4], index=["a", "b", "c", "d"])
-        expected = DataFrame(False, index=df.index, columns=df.columns)
-        expected["A"].loc["a"] = True
-        expected.loc["d"] = True
-        result = df.isin(s)
-        tm.assert_frame_equal(result, expected)
-
-    def test_isin_multiIndex(self):
-        idx = MultiIndex.from_tuples(
-            [
-                (0, "a", "foo"),
-                (0, "a", "bar"),
-                (0, "b", "bar"),
-                (0, "b", "baz"),
-                (2, "a", "foo"),
-                (2, "a", "bar"),
-                (2, "c", "bar"),
-                (2, "c", "baz"),
-                (1, "b", "foo"),
-                (1, "b", "bar"),
-                (1, "c", "bar"),
-                (1, "c", "baz"),
-            ]
-        )
-        df1 = DataFrame({"A": np.ones(12), "B": np.zeros(12)}, index=idx)
-        df2 = DataFrame(
-            {
-                "A": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
-                "B": [1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1],
-            }
-        )
-        # against regular index
-        expected = DataFrame(False, index=df1.index, columns=df1.columns)
-        result = df1.isin(df2)
-        tm.assert_frame_equal(result, expected)
-
-        df2.index = idx
-        expected = df2.values.astype(np.bool)
-        expected[:, 1] = ~expected[:, 1]
-        expected = DataFrame(expected, columns=["A", "B"], index=idx)
-
-        result = df1.isin(df2)
-        tm.assert_frame_equal(result, expected)
-
-    def test_isin_empty_datetimelike(self):
-        # GH 15473
-        df1_ts = DataFrame({"date": pd.to_datetime(["2014-01-01", "2014-01-02"])})
-        df1_td = DataFrame({"date": [pd.Timedelta(1, "s"), pd.Timedelta(2, "s")]})
-        df2 = DataFrame({"date": []})
-        df3 = DataFrame()
-
-        expected = DataFrame({"date": [False, False]})
-
-        result = df1_ts.isin(df2)
-        tm.assert_frame_equal(result, expected)
-        result = df1_ts.isin(df3)
-        tm.assert_frame_equal(result, expected)
-
-        result = df1_td.isin(df2)
-        tm.assert_frame_equal(result, expected)
-        result = df1_td.isin(df3)
-        tm.assert_frame_equal(result, expected)
-
     # ---------------------------------------------------------------------
     # Rounding
 
@@ -2173,158 +1664,6 @@ def test_round_interval_category_columns(self):
         expected = DataFrame([[1.0, 1.0], [0.0, 0.0]], columns=columns)
         tm.assert_frame_equal(result, expected)
 
-    # ---------------------------------------------------------------------
-    # Clip
-
-    def test_clip(self, float_frame):
-        median = float_frame.median().median()
-        original = float_frame.copy()
-
-        double = float_frame.clip(upper=median, lower=median)
-        assert not (double.values != median).any()
-
-        # Verify that float_frame was not changed inplace
-        assert (float_frame.values == original.values).all()
-
-    def test_inplace_clip(self, float_frame):
-        # GH 15388
-        median = float_frame.median().median()
-        frame_copy = float_frame.copy()
-
-        frame_copy.clip(upper=median, lower=median, inplace=True)
-        assert not (frame_copy.values != median).any()
-
-    def test_dataframe_clip(self):
-        # GH 2747
-        df = DataFrame(np.random.randn(1000, 2))
-
-        for lb, ub in [(-1, 1), (1, -1)]:
-            clipped_df = df.clip(lb, ub)
-
-            lb, ub = min(lb, ub), max(ub, lb)
-            lb_mask = df.values <= lb
-            ub_mask = df.values >= ub
-            mask = ~lb_mask & ~ub_mask
-            assert (clipped_df.values[lb_mask] == lb).all()
-            assert (clipped_df.values[ub_mask] == ub).all()
-            assert (clipped_df.values[mask] == df.values[mask]).all()
-
-    def test_clip_mixed_numeric(self):
-        # TODO(jreback)
-        # clip on mixed integer or floats
-        # with integer clippers coerces to float
-        df = DataFrame({"A": [1, 2, 3], "B": [1.0, np.nan, 3.0]})
-        result = df.clip(1, 2)
-        expected = DataFrame({"A": [1, 2, 2], "B": [1.0, np.nan, 2.0]})
-        tm.assert_frame_equal(result, expected, check_like=True)
-
-        # GH 24162, clipping now preserves numeric types per column
-        df = DataFrame([[1, 2, 3.4], [3, 4, 5.6]], columns=["foo", "bar", "baz"])
-        expected = df.dtypes
-        result = df.clip(upper=3).dtypes
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize("inplace", [True, False])
-    def test_clip_against_series(self, inplace):
-        # GH 6966
-
-        df = DataFrame(np.random.randn(1000, 2))
-        lb = Series(np.random.randn(1000))
-        ub = lb + 1
-
-        original = df.copy()
-        clipped_df = df.clip(lb, ub, axis=0, inplace=inplace)
-
-        if inplace:
-            clipped_df = df
-
-        for i in range(2):
-            lb_mask = original.iloc[:, i] <= lb
-            ub_mask = original.iloc[:, i] >= ub
-            mask = ~lb_mask & ~ub_mask
-
-            result = clipped_df.loc[lb_mask, i]
-            tm.assert_series_equal(result, lb[lb_mask], check_names=False)
-            assert result.name == i
-
-            result = clipped_df.loc[ub_mask, i]
-            tm.assert_series_equal(result, ub[ub_mask], check_names=False)
-            assert result.name == i
-
-            tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i])
-
-    @pytest.mark.parametrize("inplace", [True, False])
-    @pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])])
-    @pytest.mark.parametrize(
-        "axis,res",
-        [
-            (0, [[2.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 7.0, 7.0]]),
-            (1, [[2.0, 3.0, 4.0], [4.0, 5.0, 6.0], [5.0, 6.0, 7.0]]),
-        ],
-    )
-    def test_clip_against_list_like(self, simple_frame, inplace, lower, axis, res):
-        # GH 15390
-        original = simple_frame.copy(deep=True)
-
-        result = original.clip(lower=lower, upper=[5, 6, 7], axis=axis, inplace=inplace)
-
-        expected = pd.DataFrame(res, columns=original.columns, index=original.index)
-        if inplace:
-            result = original
-        tm.assert_frame_equal(result, expected, check_exact=True)
-
-    @pytest.mark.parametrize("axis", [0, 1, None])
-    def test_clip_against_frame(self, axis):
-        df = DataFrame(np.random.randn(1000, 2))
-        lb = DataFrame(np.random.randn(1000, 2))
-        ub = lb + 1
-
-        clipped_df = df.clip(lb, ub, axis=axis)
-
-        lb_mask = df <= lb
-        ub_mask = df >= ub
-        mask = ~lb_mask & ~ub_mask
-
-        tm.assert_frame_equal(clipped_df[lb_mask], lb[lb_mask])
-        tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask])
-        tm.assert_frame_equal(clipped_df[mask], df[mask])
-
-    def test_clip_against_unordered_columns(self):
-        # GH 20911
-        df1 = DataFrame(np.random.randn(1000, 4), columns=["A", "B", "C", "D"])
-        df2 = DataFrame(np.random.randn(1000, 4), columns=["D", "A", "B", "C"])
-        df3 = DataFrame(df2.values - 1, columns=["B", "D", "C", "A"])
-        result_upper = df1.clip(lower=0, upper=df2)
-        expected_upper = df1.clip(lower=0, upper=df2[df1.columns])
-        result_lower = df1.clip(lower=df3, upper=3)
-        expected_lower = df1.clip(lower=df3[df1.columns], upper=3)
-        result_lower_upper = df1.clip(lower=df3, upper=df2)
-        expected_lower_upper = df1.clip(lower=df3[df1.columns], upper=df2[df1.columns])
-        tm.assert_frame_equal(result_upper, expected_upper)
-        tm.assert_frame_equal(result_lower, expected_lower)
-        tm.assert_frame_equal(result_lower_upper, expected_lower_upper)
-
-    def test_clip_with_na_args(self, float_frame):
-        """Should process np.nan argument as None """
-        # GH 17276
-        tm.assert_frame_equal(float_frame.clip(np.nan), float_frame)
-        tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame)
-
-        # GH 19992
-        df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]})
-
-        result = df.clip(lower=[4, 5, np.nan], axis=0)
-        expected = DataFrame(
-            {"col_0": [4, 5, np.nan], "col_1": [4, 5, np.nan], "col_2": [7, 8, np.nan]}
-        )
-        tm.assert_frame_equal(result, expected)
-
-        result = df.clip(lower=[4, 5, np.nan], axis=1)
-        expected = DataFrame(
-            {"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [np.nan, np.nan, np.nan]}
-        )
-        tm.assert_frame_equal(result, expected)
-
     # ---------------------------------------------------------------------
     # Matrix-like
 

From 9bf172be9a19161bfd63656a26df15e6595b87f6 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 6 Feb 2020 17:37:16 -0800
Subject: [PATCH 6/7] CLN: generic tests

---
 pandas/tests/frame/test_block_internals.py | 20 +++---
 pandas/tests/generic/test_frame.py         | 76 +++++++++++++++++++---
 pandas/tests/generic/test_generic.py       | 35 ----------
 pandas/tests/generic/test_series.py        | 62 ++++++++++++------
 4 files changed, 118 insertions(+), 75 deletions(-)

diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
index d301ed969789e..a5f5e6f36cd58 100644
--- a/pandas/tests/frame/test_block_internals.py
+++ b/pandas/tests/frame/test_block_internals.py
@@ -364,14 +364,14 @@ def test_pickle(self, float_string_frame, timezone_frame):
     def test_consolidate_datetime64(self):
         # numpy vstack bug
 
-        data = """\
-starting,ending,measure
-2012-06-21 00:00,2012-06-23 07:00,77
-2012-06-23 07:00,2012-06-23 16:30,65
-2012-06-23 16:30,2012-06-25 08:00,77
-2012-06-25 08:00,2012-06-26 12:00,0
-2012-06-26 12:00,2012-06-27 08:00,77
-"""
+        data = (
+            "starting,ending,measure\n"
+            "2012-06-21 00:00,2012-06-23 07:00,77\n"
+            "2012-06-23 07:00,2012-06-23 16:30,65\n"
+            "2012-06-23 16:30,2012-06-25 08:00,77\n"
+            "2012-06-25 08:00,2012-06-26 12:00,0\n"
+            "2012-06-26 12:00,2012-06-27 08:00,77\n"
+        )
         df = pd.read_csv(StringIO(data), parse_dates=[0, 1])
 
         ser_starting = df.starting
@@ -397,9 +397,6 @@ def test_is_mixed_type(self, float_frame, float_string_frame):
         assert float_string_frame._is_mixed_type
 
     def test_get_numeric_data(self):
-        # TODO(wesm): unused?
-        intname = np.dtype(np.int_).name  # noqa
-        floatname = np.dtype(np.float_).name  # noqa
 
         datetime64name = np.dtype("M8[ns]").name
         objectname = np.dtype(np.object_).name
@@ -581,6 +578,7 @@ def test_get_X_columns(self):
         tm.assert_index_equal(df._get_numeric_data().columns, pd.Index(["a", "b", "e"]))
 
     def test_strange_column_corruption_issue(self):
+        # FIXME: dont leave commented-out
         # (wesm) Unclear how exactly this is related to internal matters
         df = DataFrame(index=[0, 1])
         df[0] = np.nan
diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py
index 7fe22e77c5bf3..72e13db03f190 100644
--- a/pandas/tests/generic/test_frame.py
+++ b/pandas/tests/generic/test_frame.py
@@ -160,7 +160,7 @@ def finalize(self, other, method=None, **kwargs):
 
         # reset
         DataFrame._metadata = _metadata
-        DataFrame.__finalize__ = _finalize
+        DataFrame.__finalize__ = _finalize  # FIXME: use monkeypatch
 
     def test_set_attribute(self):
         # Test for consistent setattr behavior when an attribute and a column
@@ -174,6 +174,72 @@ def test_set_attribute(self):
         assert df.y == 5
         tm.assert_series_equal(df["y"], Series([2, 4, 6], name="y"))
 
+    def test_deepcopy_empty(self):
+        # This test covers empty frame copying with non-empty column sets
+        # as reported in issue GH15370
+        empty_frame = DataFrame(data=[], index=[], columns=["A"])
+        empty_frame_copy = deepcopy(empty_frame)
+
+        self._compare(empty_frame_copy, empty_frame)
+
+
+# formerly in Generic but only test DataFrame
+class TestDataFrame2:
+    def test_validate_bool_args(self):
+        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        invalid_values = [1, "True", [1, 2, 3], 5.0]
+
+        for value in invalid_values:
+            with pytest.raises(ValueError):
+                super(DataFrame, df).rename_axis(
+                    mapper={"a": "x", "b": "y"}, axis=1, inplace=value
+                )
+
+            with pytest.raises(ValueError):
+                super(DataFrame, df).drop("a", axis=1, inplace=value)
+
+            with pytest.raises(ValueError):
+                super(DataFrame, df).sort_index(inplace=value)
+
+            with pytest.raises(ValueError):
+                super(DataFrame, df)._consolidate(inplace=value)
+
+            with pytest.raises(ValueError):
+                super(DataFrame, df).fillna(value=0, inplace=value)
+
+            with pytest.raises(ValueError):
+                super(DataFrame, df).replace(to_replace=1, value=7, inplace=value)
+
+            with pytest.raises(ValueError):
+                super(DataFrame, df).interpolate(inplace=value)
+
+            with pytest.raises(ValueError):
+                super(DataFrame, df)._where(cond=df.a > 2, inplace=value)
+
+            with pytest.raises(ValueError):
+                super(DataFrame, df).mask(cond=df.a > 2, inplace=value)
+
+    def test_unexpected_keyword(self):
+        # GH8597
+        df = DataFrame(np.random.randn(5, 2), columns=["jim", "joe"])
+        ca = pd.Categorical([0, 0, 2, 2, 3, np.nan])
+        ts = df["joe"].copy()
+        ts[2] = np.nan
+
+        with pytest.raises(TypeError, match="unexpected keyword"):
+            df.drop("joe", axis=1, in_place=True)
+
+        with pytest.raises(TypeError, match="unexpected keyword"):
+            df.reindex([1, 0], inplace=True)
+
+        with pytest.raises(TypeError, match="unexpected keyword"):
+            ca.fillna(0, inplace=True)
+
+        with pytest.raises(TypeError, match="unexpected keyword"):
+            ts.fillna(0, in_place=True)
+
+
+class TestToXArray:
     @pytest.mark.skipif(
         not _XARRAY_INSTALLED
         or _XARRAY_INSTALLED
@@ -272,11 +338,3 @@ def test_to_xarray(self):
         expected["f"] = expected["f"].astype(object)
         expected.columns.name = None
         tm.assert_frame_equal(result, expected, check_index_type=False)
-
-    def test_deepcopy_empty(self):
-        # This test covers empty frame copying with non-empty column sets
-        # as reported in issue GH15370
-        empty_frame = DataFrame(data=[], index=[], columns=["A"])
-        empty_frame_copy = deepcopy(empty_frame)
-
-        self._compare(empty_frame_copy, empty_frame)
diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
index efb04c7f63c66..4533afb05179b 100644
--- a/pandas/tests/generic/test_generic.py
+++ b/pandas/tests/generic/test_generic.py
@@ -103,23 +103,6 @@ def test_get_numeric_data(self):
         # _get_numeric_data is includes _get_bool_data, so can't test for
         # non-inclusion
 
-    def test_get_default(self):
-
-        # GH 7725
-        d0 = "a", "b", "c", "d"
-        d1 = np.arange(4, dtype="int64")
-        others = "e", 10
-
-        for data, index in ((d0, d1), (d1, d0)):
-            s = Series(data, index=index)
-            for i, d in zip(index, data):
-                assert s.get(i) == d
-                assert s.get(i, d) == d
-                assert s.get(i, "z") == d
-                for other in others:
-                    assert s.get(other, "z") == "z"
-                    assert s.get(other, other) == other
-
     def test_nonzero(self):
 
         # GH 4633
@@ -460,24 +443,6 @@ def test_split_compat(self):
         assert len(np.array_split(o, 5)) == 5
         assert len(np.array_split(o, 2)) == 2
 
-    def test_unexpected_keyword(self):  # GH8597
-        df = DataFrame(np.random.randn(5, 2), columns=["jim", "joe"])
-        ca = pd.Categorical([0, 0, 2, 2, 3, np.nan])
-        ts = df["joe"].copy()
-        ts[2] = np.nan
-
-        with pytest.raises(TypeError, match="unexpected keyword"):
-            df.drop("joe", axis=1, in_place=True)
-
-        with pytest.raises(TypeError, match="unexpected keyword"):
-            df.reindex([1, 0], inplace=True)
-
-        with pytest.raises(TypeError, match="unexpected keyword"):
-            ca.fillna(0, inplace=True)
-
-        with pytest.raises(TypeError, match="unexpected keyword"):
-            ts.fillna(0, in_place=True)
-
     # See gh-12301
     def test_stat_unexpected_keyword(self):
         obj = self._construct(5)
diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py
index 8ad8355f2d530..ce0daf8522687 100644
--- a/pandas/tests/generic/test_series.py
+++ b/pandas/tests/generic/test_series.py
@@ -181,8 +181,49 @@ def finalize(self, other, method=None, **kwargs):
 
         # reset
         Series._metadata = _metadata
-        Series.__finalize__ = _finalize
+        Series.__finalize__ = _finalize  # FIXME: use monkeypatch
 
+    @pytest.mark.parametrize(
+        "s",
+        [
+            Series([np.arange(5)]),
+            pd.date_range("1/1/2011", periods=24, freq="H"),
+            pd.Series(range(5), index=pd.date_range("2017", periods=5)),
+        ],
+    )
+    @pytest.mark.parametrize("shift_size", [0, 1, 2])
+    def test_shift_always_copy(self, s, shift_size):
+        # GH22397
+        assert s.shift(shift_size) is not s
+
+    @pytest.mark.parametrize("move_by_freq", [pd.Timedelta("1D"), pd.Timedelta("1M")])
+    def test_datetime_shift_always_copy(self, move_by_freq):
+        # GH22397
+        s = pd.Series(range(5), index=pd.date_range("2017", periods=5))
+        assert s.shift(freq=move_by_freq) is not s
+
+
+class TestSeries2:
+    # moved from Generic
+    def test_get_default(self):
+
+        # GH#7725
+        d0 = ["a", "b", "c", "d"]
+        d1 = np.arange(4, dtype="int64")
+        others = ["e", 10]
+
+        for data, index in ((d0, d1), (d1, d0)):
+            s = Series(data, index=index)
+            for i, d in zip(index, data):
+                assert s.get(i) == d
+                assert s.get(i, d) == d
+                assert s.get(i, "z") == d
+                for other in others:
+                    assert s.get(other, "z") == "z"
+                    assert s.get(other, other) == other
+
+
+class TestToXArray:
     @pytest.mark.skipif(
         not _XARRAY_INSTALLED
         or _XARRAY_INSTALLED
@@ -242,22 +283,3 @@ def test_to_xarray(self):
         tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
         assert isinstance(result, DataArray)
         tm.assert_series_equal(result.to_series(), s)
-
-    @pytest.mark.parametrize(
-        "s",
-        [
-            Series([np.arange(5)]),
-            pd.date_range("1/1/2011", periods=24, freq="H"),
-            pd.Series(range(5), index=pd.date_range("2017", periods=5)),
-        ],
-    )
-    @pytest.mark.parametrize("shift_size", [0, 1, 2])
-    def test_shift_always_copy(self, s, shift_size):
-        # GH22397
-        assert s.shift(shift_size) is not s
-
-    @pytest.mark.parametrize("move_by_freq", [pd.Timedelta("1D"), pd.Timedelta("1M")])
-    def test_datetime_shift_always_copy(self, move_by_freq):
-        # GH22397
-        s = pd.Series(range(5), index=pd.date_range("2017", periods=5))
-        assert s.shift(freq=move_by_freq) is not s

From 6ffad92e04c7a11983e3049e6633fc7399a7d5a6 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 10 Feb 2020 18:16:04 -0800
Subject: [PATCH 7/7] merge fixup

---
 pandas/tests/generic/test_frame.py   |  3 ---
 pandas/tests/generic/test_generic.py | 31 ----------------------------
 2 files changed, 34 deletions(-)

diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py
index 72e13db03f190..d8f4257566f84 100644
--- a/pandas/tests/generic/test_frame.py
+++ b/pandas/tests/generic/test_frame.py
@@ -198,9 +198,6 @@ def test_validate_bool_args(self):
             with pytest.raises(ValueError):
                 super(DataFrame, df).drop("a", axis=1, inplace=value)
 
-            with pytest.raises(ValueError):
-                super(DataFrame, df).sort_index(inplace=value)
-
             with pytest.raises(ValueError):
                 super(DataFrame, df)._consolidate(inplace=value)
 
diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
index 5754f39be9c9e..d574660d21c0d 100644
--- a/pandas/tests/generic/test_generic.py
+++ b/pandas/tests/generic/test_generic.py
@@ -509,37 +509,6 @@ def test_truncate_out_of_bounds(self):
         self._compare(big.truncate(before=0, after=3e6), big)
         self._compare(big.truncate(before=-1, after=2e6), big)
 
-    def test_validate_bool_args(self):
-        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-        invalid_values = [1, "True", [1, 2, 3], 5.0]
-
-        for value in invalid_values:
-            with pytest.raises(ValueError):
-                super(DataFrame, df).rename_axis(
-                    mapper={"a": "x", "b": "y"}, axis=1, inplace=value
-                )
-
-            with pytest.raises(ValueError):
-                super(DataFrame, df).drop("a", axis=1, inplace=value)
-
-            with pytest.raises(ValueError):
-                super(DataFrame, df)._consolidate(inplace=value)
-
-            with pytest.raises(ValueError):
-                super(DataFrame, df).fillna(value=0, inplace=value)
-
-            with pytest.raises(ValueError):
-                super(DataFrame, df).replace(to_replace=1, value=7, inplace=value)
-
-            with pytest.raises(ValueError):
-                super(DataFrame, df).interpolate(inplace=value)
-
-            with pytest.raises(ValueError):
-                super(DataFrame, df)._where(cond=df.a > 2, inplace=value)
-
-            with pytest.raises(ValueError):
-                super(DataFrame, df).mask(cond=df.a > 2, inplace=value)
-
     def test_copy_and_deepcopy(self):
         # GH 15444
         for shape in [0, 1, 2]: