From 07478cee3e3cc86b7c32cf5ed3a22900be031a3b Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 15 Jan 2021 13:50:00 -0800
Subject: [PATCH] TST/REF: split large categorical indexing test

---
 pandas/_testing/__init__.py                   |   8 +
 .../tests/frame/indexing/test_categorical.py  | 300 ++++++------------
 pandas/tests/indexing/test_loc.py             |  53 ++--
 3 files changed, 130 insertions(+), 231 deletions(-)

diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index b36e790f8023b..549a3c8e4a681 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -977,3 +977,11 @@ def loc(x):
 
 def iloc(x):
     return x.iloc
+
+
+def at(x):
+    return x.at
+
+
+def iat(x):
+    return x.iat
diff --git a/pandas/tests/frame/indexing/test_categorical.py b/pandas/tests/frame/indexing/test_categorical.py
index 6137cadc93125..b3e0783d7388f 100644
--- a/pandas/tests/frame/indexing/test_categorical.py
+++ b/pandas/tests/frame/indexing/test_categorical.py
@@ -7,6 +7,9 @@
 from pandas import Categorical, DataFrame, Index, Series
 import pandas._testing as tm
 
+msg1 = "Cannot setitem on a Categorical with a new category, set the categories first"
+msg2 = "Cannot set a Categorical with another, without identical categories"
+
 
 class TestDataFrameIndexingCategorical:
     def test_assignment(self):
@@ -54,47 +57,44 @@ def test_assignment(self):
         cat = Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10])
         df = DataFrame(Series(cat))
 
-    def test_assigning_ops(self):
-        # systematically test the assigning operations:
-        # for all slicing ops:
-        #  for value in categories and value not in categories:
-
-        #   - assign a single value -> exp_single_cats_value
-
-        #   - assign a complete row (mixed values) -> exp_single_row
-
-        # assign multiple rows (mixed values) (-> array) -> exp_multi_row
-
-        # assign a part of a column with dtype == categorical ->
-        # exp_parts_cats_col
-
-        # assign a part of a column with dtype != categorical ->
-        # exp_parts_cats_col
-
+    @pytest.fixture
+    def orig(self):
         cats = Categorical(["a", "a", "a", "a", "a", "a", "a"], categories=["a", "b"])
         idx = Index(["h", "i", "j", "k", "l", "m", "n"])
         values = [1, 1, 1, 1, 1, 1, 1]
         orig = DataFrame({"cats": cats, "values": values}, index=idx)
+        return orig
 
-        # the expected values
-        # changed single row
+    @pytest.fixture
+    def exp_single_row(self):
+        # The expected values if we change a single row
         cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"])
         idx1 = Index(["h", "i", "j", "k", "l", "m", "n"])
         values1 = [1, 1, 2, 1, 1, 1, 1]
         exp_single_row = DataFrame({"cats": cats1, "values": values1}, index=idx1)
+        return exp_single_row
 
+    @pytest.fixture
+    def exp_multi_row(self):
+        # assign multiple rows (mixed values) (-> array) -> exp_multi_row
         # changed multiple rows
         cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"])
         idx2 = Index(["h", "i", "j", "k", "l", "m", "n"])
         values2 = [1, 1, 2, 2, 1, 1, 1]
         exp_multi_row = DataFrame({"cats": cats2, "values": values2}, index=idx2)
+        return exp_multi_row
 
+    @pytest.fixture
+    def exp_parts_cats_col(self):
         # changed part of the cats column
         cats3 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"])
         idx3 = Index(["h", "i", "j", "k", "l", "m", "n"])
         values3 = [1, 1, 1, 1, 1, 1, 1]
         exp_parts_cats_col = DataFrame({"cats": cats3, "values": values3}, index=idx3)
+        return exp_parts_cats_col
 
+    @pytest.fixture
+    def exp_single_cats_value(self):
         # changed single value in cats col
         cats4 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"])
         idx4 = Index(["h", "i", "j", "k", "l", "m", "n"])
@@ -102,222 +102,129 @@ def test_assigning_ops(self):
         exp_single_cats_value = DataFrame(
             {"cats": cats4, "values": values4}, index=idx4
         )
+        return exp_single_cats_value
 
-        #  iloc
-        # ###############
-        #   - assign a single value -> exp_single_cats_value
-        df = orig.copy()
-        df.iloc[2, 0] = "b"
-        tm.assert_frame_equal(df, exp_single_cats_value)
-
-        df = orig.copy()
-        df.iloc[df.index == "j", 0] = "b"
-        tm.assert_frame_equal(df, exp_single_cats_value)
-
-        #   - assign a single value not in the current categories set
-        msg1 = (
-            "Cannot setitem on a Categorical with a new category, "
-            "set the categories first"
-        )
-        msg2 = "Cannot set a Categorical with another, without identical categories"
-        with pytest.raises(ValueError, match=msg1):
-            df = orig.copy()
-            df.iloc[2, 0] = "c"
-
-        #   - assign a complete row (mixed values) -> exp_single_row
-        df = orig.copy()
-        df.iloc[2, :] = ["b", 2]
-        tm.assert_frame_equal(df, exp_single_row)
-
-        #   - assign a complete row (mixed values) not in categories set
-        with pytest.raises(ValueError, match=msg1):
-            df = orig.copy()
-            df.iloc[2, :] = ["c", 2]
-
+    @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
+    def test_loc_iloc_setitem_list_of_lists(self, orig, exp_multi_row, indexer):
         #   - assign multiple rows (mixed values) -> exp_multi_row
         df = orig.copy()
-        df.iloc[2:4, :] = [["b", 2], ["b", 2]]
-        tm.assert_frame_equal(df, exp_multi_row)
 
-        with pytest.raises(ValueError, match=msg1):
-            df = orig.copy()
-            df.iloc[2:4, :] = [["c", 2], ["c", 2]]
+        key = slice(2, 4)
+        if indexer is tm.loc:
+            key = slice("j", "k")
 
-        # assign a part of a column with dtype == categorical ->
-        # exp_parts_cats_col
-        df = orig.copy()
-        df.iloc[2:4, 0] = Categorical(["b", "b"], categories=["a", "b"])
-        tm.assert_frame_equal(df, exp_parts_cats_col)
-
-        with pytest.raises(ValueError, match=msg2):
-            # different categories -> not sure if this should fail or pass
-            df = orig.copy()
-            df.iloc[2:4, 0] = Categorical(list("bb"), categories=list("abc"))
-
-        with pytest.raises(ValueError, match=msg2):
-            # different values
-            df = orig.copy()
-            df.iloc[2:4, 0] = Categorical(list("cc"), categories=list("abc"))
+        indexer(df)[key, :] = [["b", 2], ["b", 2]]
+        tm.assert_frame_equal(df, exp_multi_row)
 
-        # assign a part of a column with dtype != categorical ->
-        # exp_parts_cats_col
         df = orig.copy()
-        df.iloc[2:4, 0] = ["b", "b"]
-        tm.assert_frame_equal(df, exp_parts_cats_col)
-
         with pytest.raises(ValueError, match=msg1):
-            df.iloc[2:4, 0] = ["c", "c"]
+            indexer(df)[key, :] = [["c", 2], ["c", 2]]
 
-        #  loc
-        # ##############
+    @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc, tm.at, tm.iat])
+    def test_loc_iloc_at_iat_setitem_single_value_in_categories(
+        self, orig, exp_single_cats_value, indexer
+    ):
         #   - assign a single value -> exp_single_cats_value
         df = orig.copy()
-        df.loc["j", "cats"] = "b"
-        tm.assert_frame_equal(df, exp_single_cats_value)
-
-        df = orig.copy()
-        df.loc[df.index == "j", "cats"] = "b"
-        tm.assert_frame_equal(df, exp_single_cats_value)
-
-        #   - assign a single value not in the current categories set
-        with pytest.raises(ValueError, match=msg1):
-            df = orig.copy()
-            df.loc["j", "cats"] = "c"
-
-        #   - assign a complete row (mixed values) -> exp_single_row
-        df = orig.copy()
-        df.loc["j", :] = ["b", 2]
-        tm.assert_frame_equal(df, exp_single_row)
 
-        #   - assign a complete row (mixed values) not in categories set
-        with pytest.raises(ValueError, match=msg1):
-            df = orig.copy()
-            df.loc["j", :] = ["c", 2]
+        key = (2, 0)
+        if indexer in [tm.loc, tm.at]:
+            key = (df.index[2], df.columns[0])
 
-        #   - assign multiple rows (mixed values) -> exp_multi_row
-        df = orig.copy()
-        df.loc["j":"k", :] = [["b", 2], ["b", 2]]
-        tm.assert_frame_equal(df, exp_multi_row)
+        # "b" is among the categories for df["cat"}]
+        indexer(df)[key] = "b"
+        tm.assert_frame_equal(df, exp_single_cats_value)
 
+        # "c" is not among the categories for df["cat"]
         with pytest.raises(ValueError, match=msg1):
-            df = orig.copy()
-            df.loc["j":"k", :] = [["c", 2], ["c", 2]]
+            indexer(df)[key] = "c"
 
-        # assign a part of a column with dtype == categorical ->
-        # exp_parts_cats_col
+    @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
+    def test_loc_iloc_setitem_mask_single_value_in_categories(
+        self, orig, exp_single_cats_value, indexer
+    ):
+        # mask with single True
         df = orig.copy()
-        df.loc["j":"k", "cats"] = Categorical(["b", "b"], categories=["a", "b"])
-        tm.assert_frame_equal(df, exp_parts_cats_col)
 
-        with pytest.raises(ValueError, match=msg2):
-            # different categories -> not sure if this should fail or pass
-            df = orig.copy()
-            df.loc["j":"k", "cats"] = Categorical(
-                ["b", "b"], categories=["a", "b", "c"]
-            )
-
-        with pytest.raises(ValueError, match=msg2):
-            # different values
-            df = orig.copy()
-            df.loc["j":"k", "cats"] = Categorical(
-                ["c", "c"], categories=["a", "b", "c"]
-            )
+        mask = df.index == "j"
+        key = 0
+        if indexer is tm.loc:
+            key = df.columns[key]
 
-        # assign a part of a column with dtype != categorical ->
-        # exp_parts_cats_col
-        df = orig.copy()
-        df.loc["j":"k", "cats"] = ["b", "b"]
-        tm.assert_frame_equal(df, exp_parts_cats_col)
-
-        with pytest.raises(ValueError, match=msg1):
-            df.loc["j":"k", "cats"] = ["c", "c"]
-
-        #  loc
-        # ##############
-        #   - assign a single value -> exp_single_cats_value
-        df = orig.copy()
-        df.loc["j", df.columns[0]] = "b"
-        tm.assert_frame_equal(df, exp_single_cats_value)
-
-        df = orig.copy()
-        df.loc[df.index == "j", df.columns[0]] = "b"
+        indexer(df)[mask, key] = "b"
         tm.assert_frame_equal(df, exp_single_cats_value)
 
-        #   - assign a single value not in the current categories set
-        with pytest.raises(ValueError, match=msg1):
-            df = orig.copy()
-            df.loc["j", df.columns[0]] = "c"
-
+    @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
+    def test_iloc_setitem_full_row_non_categorical_rhs(
+        self, orig, exp_single_row, indexer
+    ):
         #   - assign a complete row (mixed values) -> exp_single_row
         df = orig.copy()
-        df.loc["j", :] = ["b", 2]
-        tm.assert_frame_equal(df, exp_single_row)
 
-        #   - assign a complete row (mixed values) not in categories set
-        with pytest.raises(ValueError, match=msg1):
-            df = orig.copy()
-            df.loc["j", :] = ["c", 2]
+        key = 2
+        if indexer is tm.loc:
+            key = df.index[2]
 
-        #   - assign multiple rows (mixed values) -> exp_multi_row
-        df = orig.copy()
-        df.loc["j":"k", :] = [["b", 2], ["b", 2]]
-        tm.assert_frame_equal(df, exp_multi_row)
+        # not categorical dtype, but "b" _is_ among the categories for df["cat"]
+        indexer(df)[key, :] = ["b", 2]
+        tm.assert_frame_equal(df, exp_single_row)
 
+        # "c" is not among the categories for df["cat"]
         with pytest.raises(ValueError, match=msg1):
-            df = orig.copy()
-            df.loc["j":"k", :] = [["c", 2], ["c", 2]]
+            indexer(df)[key, :] = ["c", 2]
 
+    @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
+    def test_loc_iloc_setitem_partial_col_categorical_rhs(
+        self, orig, exp_parts_cats_col, indexer
+    ):
         # assign a part of a column with dtype == categorical ->
         # exp_parts_cats_col
         df = orig.copy()
-        df.loc["j":"k", df.columns[0]] = Categorical(["b", "b"], categories=["a", "b"])
+
+        key = (slice(2, 4), 0)
+        if indexer is tm.loc:
+            key = (slice("j", "k"), df.columns[0])
+
+        # same categories as we currently have in df["cats"]
+        compat = Categorical(["b", "b"], categories=["a", "b"])
+        indexer(df)[key] = compat
         tm.assert_frame_equal(df, exp_parts_cats_col)
 
+        # categories do not match df["cat"]'s, but "b" is among them
+        semi_compat = Categorical(list("bb"), categories=list("abc"))
         with pytest.raises(ValueError, match=msg2):
-            # different categories -> not sure if this should fail or pass
-            df = orig.copy()
-            df.loc["j":"k", df.columns[0]] = Categorical(
-                ["b", "b"], categories=["a", "b", "c"]
-            )
+            # different categories but holdable values
+            #  -> not sure if this should fail or pass
+            indexer(df)[key] = semi_compat
 
+        # categories do not match df["cat"]'s, and "c" is not among them
+        incompat = Categorical(list("cc"), categories=list("abc"))
         with pytest.raises(ValueError, match=msg2):
             # different values
-            df = orig.copy()
-            df.loc["j":"k", df.columns[0]] = Categorical(
-                ["c", "c"], categories=["a", "b", "c"]
-            )
-
-        # assign a part of a column with dtype != categorical ->
-        # exp_parts_cats_col
-        df = orig.copy()
-        df.loc["j":"k", df.columns[0]] = ["b", "b"]
-        tm.assert_frame_equal(df, exp_parts_cats_col)
-
-        with pytest.raises(ValueError, match=msg1):
-            df.loc["j":"k", df.columns[0]] = ["c", "c"]
+            indexer(df)[key] = incompat
 
-        # iat
+    @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
+    def test_loc_iloc_setitem_non_categorical_rhs(
+        self, orig, exp_parts_cats_col, indexer
+    ):
+        # assign a part of a column with dtype != categorical -> exp_parts_cats_col
         df = orig.copy()
-        df.iat[2, 0] = "b"
-        tm.assert_frame_equal(df, exp_single_cats_value)
 
-        #   - assign a single value not in the current categories set
-        with pytest.raises(ValueError, match=msg1):
-            df = orig.copy()
-            df.iat[2, 0] = "c"
+        key = (slice(2, 4), 0)
+        if indexer is tm.loc:
+            key = (slice("j", "k"), df.columns[0])
 
-        # at
-        #   - assign a single value -> exp_single_cats_value
-        df = orig.copy()
-        df.at["j", "cats"] = "b"
-        tm.assert_frame_equal(df, exp_single_cats_value)
+        # "b" is among the categories for df["cat"]
+        indexer(df)[key] = ["b", "b"]
+        tm.assert_frame_equal(df, exp_parts_cats_col)
 
-        #   - assign a single value not in the current categories set
+        # "c" not part of the categories
         with pytest.raises(ValueError, match=msg1):
-            df = orig.copy()
-            df.at["j", "cats"] = "c"
+            indexer(df)[key] = ["c", "c"]
 
+    def test_setitem_mask_categorical(self, exp_multi_row):
         # fancy indexing
+
         catsf = Categorical(
             ["a", "a", "c", "c", "a", "a", "a"], categories=["a", "b", "c"]
         )
@@ -331,19 +238,12 @@ def test_assigning_ops(self):
         )
         assert return_value is None
 
-        df[df["cats"] == "c"] = ["b", 2]
+        mask = df["cats"] == "c"
+        df[mask] = ["b", 2]
         # category c is kept in .categories
         tm.assert_frame_equal(df, exp_fancy)
 
-        # set_value
-        df = orig.copy()
-        df.at["j", "cats"] = "b"
-        tm.assert_frame_equal(df, exp_single_cats_value)
-
-        with pytest.raises(ValueError, match=msg1):
-            df = orig.copy()
-            df.at["j", "cats"] = "c"
-
+    def test_loc_setitem_categorical_values_partial_column_slice(self):
         # Assigning a Category to parts of a int/... column uses the values of
         # the Categorical
         df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")})
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 7c73917e44b22..8b13bafdd012f 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -395,73 +395,64 @@ def test_loc_general(self):
         tm.assert_series_equal(result, expected)
         assert result.dtype == object
 
-    def test_loc_setitem_consistency(self):
-        # GH 6149
-        # coerce similarly for setitem and loc when rows have a null-slice
-        expected = DataFrame(
+    @pytest.fixture
+    def frame_for_consistency(self):
+        return DataFrame(
             {
-                "date": Series(0, index=range(5), dtype=np.int64),
+                "date": date_range("2000-01-01", "2000-01-5"),
                 "val": Series(range(5), dtype=np.int64),
             }
         )
 
-        df = DataFrame(
+    def test_loc_setitem_consistency(self, frame_for_consistency):
+        # GH 6149
+        # coerce similarly for setitem and loc when rows have a null-slice
+        expected = DataFrame(
             {
-                "date": date_range("2000-01-01", "2000-01-5"),
+                "date": Series(0, index=range(5), dtype=np.int64),
                 "val": Series(range(5), dtype=np.int64),
             }
         )
+        df = frame_for_consistency.copy()
         df.loc[:, "date"] = 0
         tm.assert_frame_equal(df, expected)
 
-        df = DataFrame(
-            {
-                "date": date_range("2000-01-01", "2000-01-5"),
-                "val": Series(range(5), dtype=np.int64),
-            }
-        )
+        df = frame_for_consistency.copy()
         df.loc[:, "date"] = np.array(0, dtype=np.int64)
         tm.assert_frame_equal(df, expected)
 
-        df = DataFrame(
-            {
-                "date": date_range("2000-01-01", "2000-01-5"),
-                "val": Series(range(5), dtype=np.int64),
-            }
-        )
+        df = frame_for_consistency.copy()
         df.loc[:, "date"] = np.array([0, 0, 0, 0, 0], dtype=np.int64)
         tm.assert_frame_equal(df, expected)
 
+    def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency):
+        # GH 6149
+        # coerce similarly for setitem and loc when rows have a null-slice
+
         expected = DataFrame(
             {
                 "date": Series("foo", index=range(5)),
                 "val": Series(range(5), dtype=np.int64),
             }
         )
-        df = DataFrame(
-            {
-                "date": date_range("2000-01-01", "2000-01-5"),
-                "val": Series(range(5), dtype=np.int64),
-            }
-        )
+        df = frame_for_consistency.copy()
         df.loc[:, "date"] = "foo"
         tm.assert_frame_equal(df, expected)
 
+    def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency):
+        # GH 6149
+        # coerce similarly for setitem and loc when rows have a null-slice
         expected = DataFrame(
             {
                 "date": Series(1.0, index=range(5)),
                 "val": Series(range(5), dtype=np.int64),
             }
         )
-        df = DataFrame(
-            {
-                "date": date_range("2000-01-01", "2000-01-5"),
-                "val": Series(range(5), dtype=np.int64),
-            }
-        )
+        df = frame_for_consistency.copy()
         df.loc[:, "date"] = 1.0
         tm.assert_frame_equal(df, expected)
 
+    def test_loc_setitem_consistency_single_row(self):
         # GH 15494
         # setting on frame with single row
         df = DataFrame({"date": Series([Timestamp("20180101")])})