TST/CLN: Remove makeCategoricalIndex (#56186)

mroeschke · web-flow · commit e973b4222240 · 2023-11-28T09:07:34.000-08:00
* TST/CLN: Remove makeCategoricalIndex

* Remove usage in asv_bench

* Adjust xarray test for more categories

* Remove rands_array
diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
@@ -6,8 +6,6 @@
 
 import pandas as pd
 
-from .pandas_vb_common import tm
-
 try:
     from pandas.api.types import union_categoricals
 except ImportError:
@@ -189,7 +187,7 @@ def setup(self):
         N = 10**5
         ncats = 15
 
-        self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
+        self.s_str = pd.Series(np.random.randint(0, ncats, size=N).astype(str))
         self.s_str_cat = pd.Series(self.s_str, dtype="category")
         with warnings.catch_warnings(record=True):
             str_cat_type = pd.CategoricalDtype(set(self.s_str), ordered=True)
@@ -242,7 +240,7 @@ def time_categorical_series_is_monotonic_decreasing(self):
 class Contains:
     def setup(self):
         N = 10**5
-        self.ci = tm.makeCategoricalIndex(N)
+        self.ci = pd.CategoricalIndex(np.arange(N))
         self.c = self.ci.values
         self.key = self.ci.categories[0]
 
@@ -325,7 +323,7 @@ def time_sort_values(self):
 class SearchSorted:
     def setup(self):
         N = 10**5
-        self.ci = tm.makeCategoricalIndex(N).sort_values()
+        self.ci = pd.CategoricalIndex(np.arange(N)).sort_values()
         self.c = self.ci.values
         self.key = self.ci.categories[1]
 
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
@@ -39,7 +39,6 @@
 from pandas import (
     ArrowDtype,
     Categorical,
-    CategoricalIndex,
     DataFrame,
     DatetimeIndex,
     Index,
@@ -348,36 +347,10 @@ def to_array(obj):
 # Others
 
 
-def rands_array(
-    nchars, size: int, dtype: NpDtype = "O", replace: bool = True
-) -> np.ndarray:
-    """
-    Generate an array of byte strings.
-    """
-    chars = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1))
-    retval = (
-        np.random.default_rng(2)
-        .choice(chars, size=nchars * np.prod(size), replace=replace)
-        .view((np.str_, nchars))
-        .reshape(size)
-    )
-    return retval.astype(dtype)
-
-
 def getCols(k) -> str:
     return string.ascii_uppercase[:k]
 
 
-def makeCategoricalIndex(
-    k: int = 10, n: int = 3, name=None, **kwargs
-) -> CategoricalIndex:
-    """make a length k index or n categories"""
-    x = rands_array(nchars=4, size=n, replace=False)
-    return CategoricalIndex(
-        Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs
-    )
-
-
 def makeNumericIndex(k: int = 10, *, name=None, dtype: Dtype | None) -> Index:
     dtype = pandas_dtype(dtype)
     assert isinstance(dtype, np.dtype)
@@ -998,7 +971,6 @@ def shares_memory(left, right) -> bool:
     "iat",
     "iloc",
     "loc",
-    "makeCategoricalIndex",
     "makeCustomDataframe",
     "makeCustomIndex",
     "makeDataFrame",
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -59,6 +59,7 @@
 
 import pandas as pd
 from pandas import (
+    CategoricalIndex,
     DataFrame,
     Interval,
     IntervalIndex,
@@ -632,7 +633,7 @@ def _create_mi_with_dt64tz_level():
     "bool-dtype": Index(np.random.default_rng(2).standard_normal(10) < 0),
     "complex64": tm.makeNumericIndex(100, dtype="float64").astype("complex64"),
     "complex128": tm.makeNumericIndex(100, dtype="float64").astype("complex128"),
-    "categorical": tm.makeCategoricalIndex(100),
+    "categorical": CategoricalIndex(list("abcd") * 25),
     "interval": IntervalIndex.from_breaks(np.linspace(0, 100, num=101)),
     "empty": Index([]),
     "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])),
diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py
@@ -12,6 +12,7 @@
 
 from pandas import (
     Categorical,
+    CategoricalIndex,
     DataFrame,
     DatetimeIndex,
     Index,
@@ -398,8 +399,7 @@ def test_set_index_pass_multiindex(self, frame_of_index_cols, drop, append):
         tm.assert_frame_equal(result, expected)
 
     def test_construction_with_categorical_index(self):
-        ci = tm.makeCategoricalIndex(10)
-        ci.name = "B"
+        ci = CategoricalIndex(list("ab") * 5, name="B")
 
         # with Categorical
         df = DataFrame(
diff --git a/pandas/tests/generic/test_to_xarray.py b/pandas/tests/generic/test_to_xarray.py
@@ -18,14 +18,14 @@ class TestDataFrameToXArray:
     def df(self):
         return DataFrame(
             {
-                "a": list("abc"),
-                "b": list(range(1, 4)),
-                "c": np.arange(3, 6).astype("u1"),
-                "d": np.arange(4.0, 7.0, dtype="float64"),
-                "e": [True, False, True],
-                "f": Categorical(list("abc")),
-                "g": date_range("20130101", periods=3),
-                "h": date_range("20130101", periods=3, tz="US/Eastern"),
+                "a": list("abcd"),
+                "b": list(range(1, 5)),
+                "c": np.arange(3, 7).astype("u1"),
+                "d": np.arange(4.0, 8.0, dtype="float64"),
+                "e": [True, False, True, False],
+                "f": Categorical(list("abcd")),
+                "g": date_range("20130101", periods=4),
+                "h": date_range("20130101", periods=4, tz="US/Eastern"),
             }
         )
 
@@ -37,11 +37,11 @@ def test_to_xarray_index_types(self, index_flat, df, using_infer_string):
 
         from xarray import Dataset
 
-        df.index = index[:3]
+        df.index = index[:4]
         df.index.name = "foo"
         df.columns.name = "bar"
         result = df.to_xarray()
-        assert result.dims["foo"] == 3
+        assert result.dims["foo"] == 4
         assert len(result.coords) == 1
         assert len(result.data_vars) == 8
         tm.assert_almost_equal(list(result.coords.keys()), ["foo"])
@@ -69,10 +69,10 @@ def test_to_xarray_with_multiindex(self, df, using_infer_string):
         from xarray import Dataset
 
         # MultiIndex
-        df.index = MultiIndex.from_product([["a"], range(3)], names=["one", "two"])
+        df.index = MultiIndex.from_product([["a"], range(4)], names=["one", "two"])
         result = df.to_xarray()
         assert result.dims["one"] == 1
-        assert result.dims["two"] == 3
+        assert result.dims["two"] == 4
         assert len(result.coords) == 2
         assert len(result.data_vars) == 8
         tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"])
diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py
@@ -248,7 +248,7 @@ def test_ensure_copied_data(self):
         #
         # Must be tested separately from other indexes because
         # self.values is not an ndarray.
-        index = tm.makeCategoricalIndex(10)
+        index = CategoricalIndex(list("ab") * 5)
 
         result = CategoricalIndex(index.values, copy=True)
         tm.assert_index_equal(index, result)
@@ -261,7 +261,7 @@ def test_ensure_copied_data(self):
 class TestCategoricalIndex2:
     def test_view_i8(self):
         # GH#25464
-        ci = tm.makeCategoricalIndex(100)
+        ci = CategoricalIndex(list("ab") * 50)
         msg = "When changing to a larger dtype, its size must be a divisor"
         with pytest.raises(ValueError, match=msg):
             ci.view("i8")
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
@@ -69,8 +69,8 @@ def test_tab_completion_with_categorical(self):
     @pytest.mark.parametrize(
         "index",
         [
+            Index(list("ab") * 5, dtype="category"),
             Index([str(i) for i in range(10)]),
-            tm.makeCategoricalIndex(10),
             Index(["foo", "bar", "baz"] * 2),
             tm.makeDateIndex(10),
             tm.makePeriodIndex(10),