From 9ac1f1f81abcc28451d091910a440a877919a786 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 1 Aug 2023 16:03:48 -0700
Subject: [PATCH 1/4] remove tm.rands

---
 pandas/_testing/__init__.py                |  5 +----
 pandas/_testing/_io.py                     |  4 ++--
 pandas/_testing/_random.py                 | 10 ----------
 pandas/tests/extension/base/getitem.py     |  2 +-
 pandas/tests/extension/base/setitem.py     |  2 +-
 pandas/tests/frame/test_arithmetic.py      |  2 +-
 pandas/tests/io/formats/test_format.py     |  7 +++----
 pandas/tests/io/pytables/test_errors.py    |  2 +-
 pandas/tests/series/methods/test_astype.py | 16 +++++++++++++---
 pandas/tests/util/test_util.py             |  5 -----
 10 files changed, 23 insertions(+), 32 deletions(-)

diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 78c882dc94a99..4ce0a3e7836e7 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -54,10 +54,7 @@
     round_trip_pickle,
     write_to_compressed,
 )
-from pandas._testing._random import (
-    rands,
-    rands_array,
-)
+from pandas._testing._random import rands_array
 from pandas._testing._warnings import (
     assert_produces_warning,
     maybe_produces_warning,
diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py
index 49fde6d08fa11..edbba9452b50a 100644
--- a/pandas/_testing/_io.py
+++ b/pandas/_testing/_io.py
@@ -9,6 +9,7 @@
     Any,
     Callable,
 )
+import uuid
 import zipfile
 
 from pandas.compat import (
@@ -18,7 +19,6 @@
 from pandas.compat._optional import import_optional_dependency
 
 import pandas as pd
-from pandas._testing._random import rands
 from pandas._testing.contexts import ensure_clean
 
 if TYPE_CHECKING:
@@ -56,7 +56,7 @@ def round_trip_pickle(
     """
     _path = path
     if _path is None:
-        _path = f"__{rands(10)}__.pickle"
+        _path = f"__{uuid.uuid4()}__.pickle"
     with ensure_clean(_path) as temp_path:
         pd.to_pickle(obj, temp_path)
         return pd.read_pickle(temp_path)
diff --git a/pandas/_testing/_random.py b/pandas/_testing/_random.py
index 4306a72700aff..fabcd62739c69 100644
--- a/pandas/_testing/_random.py
+++ b/pandas/_testing/_random.py
@@ -23,13 +23,3 @@ def rands_array(
         .reshape(size)
     )
     return retval.astype(dtype)
-
-
-def rands(nchars) -> str:
-    """
-    Generate one random byte string.
-
-    See `rands_array` if you want to create an array of random strings.
-
-    """
-    return "".join(np.random.default_rng(2).choice(RANDS_CHARS, nchars))
diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
index 73c8afee4083a..faa38a7c03447 100644
--- a/pandas/tests/extension/base/getitem.py
+++ b/pandas/tests/extension/base/getitem.py
@@ -272,7 +272,7 @@ def test_getitem_series_integer_with_missing_raises(self, data, idx):
         msg = "Cannot index with an integer indexer containing NA values"
         # TODO: this raises KeyError about labels not found (it tries label-based)
 
-        ser = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))])
+        ser = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
         with pytest.raises(ValueError, match=msg):
             ser[idx]
 
diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
index 76aa560fd17a2..1085ada920ccc 100644
--- a/pandas/tests/extension/base/setitem.py
+++ b/pandas/tests/extension/base/setitem.py
@@ -197,7 +197,7 @@ def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
         # TODO(xfail) this raises KeyError about labels not found (it tries label-based)
         # for list of labels with Series
         if box_in_series:
-            arr = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))])
+            arr = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
 
         msg = "Cannot index with an integer indexer containing NA values"
         with pytest.raises(ValueError, match=msg):
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index 0394241955e9b..262ed69ca7099 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -203,7 +203,7 @@ def test_timestamp_compare(self, left, right):
                 "dates2": pd.date_range("20010102", periods=10),
                 "intcol": np.random.default_rng(2).integers(1000000000, size=10),
                 "floatcol": np.random.default_rng(2).standard_normal(10),
-                "stringcol": list(tm.rands(10)),
+                "stringcol": [chr(100 + i) for i in range(10)],
             }
         )
         df.loc[np.random.default_rng(2).random(len(df)) > 0.5, "dates2"] = pd.NaT
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index 0938e7fc6f28b..46892af86ec49 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -214,10 +214,9 @@ def test_repr_truncation(self):
                 {
                     "A": np.random.default_rng(2).standard_normal(10),
                     "B": [
-                        tm.rands(
-                            np.random.default_rng(2).integers(max_len - 1, max_len + 1)
-                        )
-                        for i in range(10)
+                        "a"
+                        * np.random.default_rng(2).integers(max_len - 1, max_len + 1)
+                        for _ in range(10)
                     ],
                 }
             )
diff --git a/pandas/tests/io/pytables/test_errors.py b/pandas/tests/io/pytables/test_errors.py
index e8e62d3fdd33b..44bdbfc3fdd7e 100644
--- a/pandas/tests/io/pytables/test_errors.py
+++ b/pandas/tests/io/pytables/test_errors.py
@@ -153,7 +153,7 @@ def test_append_with_diff_col_name_types_raises_value_error(setup_path):
     df5 = DataFrame({("1", 2, object): np.random.default_rng(2).standard_normal(10)})
 
     with ensure_clean_store(setup_path) as store:
-        name = f"df_{tm.rands(10)}"
+        name = "df_diff_valerror"
         store.append(name, df)
 
         for d in (df2, df3, df4, df5):
diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py
index f367d611d592a..b6c409397c9fb 100644
--- a/pandas/tests/series/methods/test_astype.py
+++ b/pandas/tests/series/methods/test_astype.py
@@ -29,6 +29,16 @@
 import pandas._testing as tm
 
 
+def rand_str(nchars: int) -> str:
+    """
+    Generate one random byte string.
+    """
+    RANDS_CHARS = np.array(
+        list(string.ascii_letters + string.digits), dtype=(np.str_, 1)
+    )
+    return "".join(np.random.default_rng(2).choice(RANDS_CHARS, nchars))
+
+
 class TestAstypeAPI:
     def test_astype_unitless_dt64_raises(self):
         # GH#47844
@@ -129,8 +139,8 @@ def test_astype_empty_constructor_equality(self, dtype):
     @pytest.mark.parametrize(
         "series",
         [
-            Series([string.digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
-            Series([string.digits * 10, tm.rands(63), tm.rands(64), np.nan, 1.0]),
+            Series([string.digits * 10, rand_str(63), rand_str(64), rand_str(1000)]),
+            Series([string.digits * 10, rand_str(63), rand_str(64), np.nan, 1.0]),
         ],
     )
     def test_astype_str_map(self, dtype, series):
@@ -382,7 +392,7 @@ def test_astype_unicode(self):
         # default encoding to utf-8
         digits = string.digits
         test_series = [
-            Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
+            Series([digits * 10, rand_str(63), rand_str(64), rand_str(1000)]),
             Series(["データーサイエンス、お前はもう死んでいる"]),
         ]
 
diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py
index 802be634192a3..1d146771e6a42 100644
--- a/pandas/tests/util/test_util.py
+++ b/pandas/tests/util/test_util.py
@@ -6,11 +6,6 @@
 import pandas._testing as tm
 
 
-def test_rands():
-    r = tm.rands(10)
-    assert len(r) == 10
-
-
 def test_rands_array_1d():
     arr = tm.rands_array(5, size=10)
     assert arr.shape == (10,)

From 4ab42cc9f2f2b4b3eb7dec86afe189d068750f4c Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 1 Aug 2023 17:32:05 -0700
Subject: [PATCH 2/4] remove rands array

---
 asv_bench/benchmarks/array.py                |  8 +++----
 asv_bench/benchmarks/series_methods.py       |  2 +-
 asv_bench/benchmarks/strings.py              |  6 ++---
 pandas/_testing/__init__.py                  | 17 ++++++++++++-
 pandas/_testing/_random.py                   | 25 --------------------
 pandas/tests/arithmetic/test_numeric.py      |  2 +-
 pandas/tests/frame/test_repr_info.py         |  2 +-
 pandas/tests/groupby/test_groupby.py         |  2 +-
 pandas/tests/groupby/test_rank.py            |  4 ++--
 pandas/tests/io/formats/test_format.py       | 24 +++++++++----------
 pandas/tests/io/pytables/test_round_trip.py  |  4 +---
 pandas/tests/reshape/merge/test_multi.py     |  2 +-
 pandas/tests/series/indexing/test_getitem.py |  2 +-
 pandas/tests/series/indexing/test_setitem.py |  2 +-
 pandas/tests/util/test_hashing.py            |  2 +-
 pandas/tests/util/test_util.py               | 12 ----------
 16 files changed, 44 insertions(+), 72 deletions(-)
 delete mode 100644 pandas/_testing/_random.py

diff --git a/asv_bench/benchmarks/array.py b/asv_bench/benchmarks/array.py
index ecd8c26ba6ca5..506f334d83d01 100644
--- a/asv_bench/benchmarks/array.py
+++ b/asv_bench/benchmarks/array.py
@@ -2,8 +2,6 @@
 
 import pandas as pd
 
-from .pandas_vb_common import tm
-
 
 class BooleanArray:
     def setup(self):
@@ -56,7 +54,7 @@ def time_from_tuples(self):
 class StringArray:
     def setup(self):
         N = 100_000
-        values = tm.rands_array(3, N)
+        values = np.array(["a"] * N, dtype=object)
         self.values_obj = np.array(values, dtype="object")
         self.values_str = np.array(values, dtype="U")
         self.values_list = values.tolist()
@@ -80,7 +78,7 @@ def setup(self, multiple_chunks):
             import pyarrow as pa
         except ImportError:
             raise NotImplementedError
-        strings = tm.rands_array(3, 10_000)
+        strings = np.array(["a"] * 10_000, dtype=object)
         if multiple_chunks:
             chunks = [strings[i : i + 100] for i in range(0, len(strings), 100)]
             self.array = pd.arrays.ArrowStringArray(pa.chunked_array(chunks))
@@ -127,7 +125,7 @@ def setup(self, dtype, hasna):
         elif dtype == "int64[pyarrow]":
             data = np.arange(N)
         elif dtype == "string[pyarrow]":
-            data = tm.rands_array(10, N)
+            data = np.array(["a"] * N, dtype=object)
         elif dtype == "timestamp[ns][pyarrow]":
             data = pd.date_range("2000-01-01", freq="s", periods=N)
         else:
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index 492d075173e17..76cc803ecc2da 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -104,7 +104,7 @@ def setup(self, dtype):
             data = np.arange(N)
             na_value = NA
         elif dtype in ("string", "string[pyarrow]"):
-            data = tm.rands_array(5, N)
+            data = np.array(["a"] * N, dtype=object)
             na_value = NA
         else:
             raise NotImplementedError
diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
index 9f1aeb7670628..1cc3939c8080b 100644
--- a/asv_bench/benchmarks/strings.py
+++ b/asv_bench/benchmarks/strings.py
@@ -34,9 +34,7 @@ class Construction:
     dtype_mapping = {"str": "str", "string[python]": object, "string[pyarrow]": object}
 
     def setup(self, pd_type, dtype):
-        series_arr = tm.rands_array(
-            nchars=10, size=10**5, dtype=self.dtype_mapping[dtype]
-        )
+        series_arr = np.array(["a"] * 10_000, dtype=self.dtype_mapping[dtype])
         if pd_type == "series":
             self.arr = series_arr
         elif pd_type == "frame":
@@ -276,7 +274,7 @@ def time_iter(self, dtype):
 
 class StringArrayConstruction:
     def setup(self):
-        self.series_arr = tm.rands_array(nchars=10, size=10**5)
+        self.series_arr = np.array(["a"] * 10**5, dtype=object)
         self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)])
 
     def time_string_array_construction(self):
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 4ce0a3e7836e7..03bcbaaf2e25b 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -54,7 +54,6 @@
     round_trip_pickle,
     write_to_compressed,
 )
-from pandas._testing._random import rands_array
 from pandas._testing._warnings import (
     assert_produces_warning,
     maybe_produces_warning,
@@ -346,6 +345,22 @@ def to_array(obj):
 # Others
 
 
+def rands_array(
+    nchars, size: int, dtype: NpDtype = "O", replace: bool = True
+) -> np.ndarray:
+    """
+    Generate an array of byte strings.
+    """
+    chars = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1))
+    retval = (
+        np.random.default_rng(2)
+        .choice(chars, size=nchars * np.prod(size), replace=replace)
+        .view((np.str_, nchars))
+        .reshape(size)
+    )
+    return retval.astype(dtype)
+
+
 def getCols(k) -> str:
     return string.ascii_uppercase[:k]
 
diff --git a/pandas/_testing/_random.py b/pandas/_testing/_random.py
deleted file mode 100644
index fabcd62739c69..0000000000000
--- a/pandas/_testing/_random.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from __future__ import annotations
-
-import string
-from typing import TYPE_CHECKING
-
-import numpy as np
-
-if TYPE_CHECKING:
-    from pandas._typing import NpDtype
-RANDS_CHARS = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1))
-
-
-def rands_array(
-    nchars, size: int, dtype: NpDtype = "O", replace: bool = True
-) -> np.ndarray:
-    """
-    Generate an array of byte strings.
-    """
-    retval = (
-        np.random.default_rng(2)
-        .choice(RANDS_CHARS, size=nchars * np.prod(size), replace=replace)
-        .view((np.str_, nchars))
-        .reshape(size)
-    )
-    return retval.astype(dtype)
diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
index 42fa03b38f6ff..7f0996da2e2f2 100644
--- a/pandas/tests/arithmetic/test_numeric.py
+++ b/pandas/tests/arithmetic/test_numeric.py
@@ -881,7 +881,7 @@ def test_add_frames(self, first, second, expected):
     # TODO: This came from series.test.test_operators, needs cleanup
     def test_series_frame_radd_bug(self, fixed_now_ts):
         # GH#353
-        vals = Series(tm.rands_array(5, 10))
+        vals = Series(tm.makeStringIndex())
         result = "foo_" + vals
         expected = vals.map(lambda x: "foo_" + x)
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py
index 0c9e5e01fa644..49375658abfee 100644
--- a/pandas/tests/frame/test_repr_info.py
+++ b/pandas/tests/frame/test_repr_info.py
@@ -265,7 +265,7 @@ def test_str_to_bytes_raises(self):
     def test_very_wide_info_repr(self):
         df = DataFrame(
             np.random.default_rng(2).standard_normal((10, 20)),
-            columns=tm.rands_array(10, 20),
+            columns=np.array(["a" * 10] * 20, dtype=object),
         )
         repr(df)
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index fc0efb74a9b62..09d1814e1b0a1 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1361,7 +1361,7 @@ def test_cython_grouper_series_bug_noncontig():
 
 
 def test_series_grouper_noncontig_index():
-    index = Index(tm.rands_array(10, 100))
+    index = Index(["a" * 10] * 100)
 
     values = Series(np.random.default_rng(2).standard_normal(50), index=index[::2])
     labels = np.random.default_rng(2).integers(0, 5, 50)
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index 41bfa121624ea..26881bdd18274 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -31,8 +31,8 @@ def test_rank_unordered_categorical_typeerror():
 
 
 def test_rank_apply():
-    lev1 = tm.rands_array(10, 100)
-    lev2 = tm.rands_array(10, 130)
+    lev1 = np.array(["a" * 10] * 100, dtype=object)
+    lev2 = np.array(["b" * 10] * 130, dtype=object)
     lab1 = np.random.default_rng(2).integers(0, 100, size=500, dtype=int)
     lab2 = np.random.default_rng(2).integers(0, 130, size=500, dtype=int)
 
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index 46892af86ec49..592b8d206fa30 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -1176,7 +1176,7 @@ def test_wide_repr(self):
             20,
         ):
             max_cols = get_option("display.max_columns")
-            df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
+            df = DataFrame([["a" * 25] * (max_cols - 1)] * 10)
             with option_context("display.expand_frame_repr", False):
                 rep_str = repr(df)
 
@@ -1202,7 +1202,7 @@ def test_wide_repr_wide_columns(self):
     def test_wide_repr_named(self):
         with option_context("mode.sim_interactive", True, "display.max_columns", 20):
             max_cols = get_option("display.max_columns")
-            df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
+            df = DataFrame([["a" * 25] * (max_cols - 1)] * 10)
             df.index.name = "DataFrame Index"
             with option_context("display.expand_frame_repr", False):
                 rep_str = repr(df)
@@ -1219,9 +1219,9 @@ def test_wide_repr_named(self):
 
     def test_wide_repr_multiindex(self):
         with option_context("mode.sim_interactive", True, "display.max_columns", 20):
-            midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10)))
+            midx = MultiIndex.from_arrays([["a" * 5] * 10] * 2)
             max_cols = get_option("display.max_columns")
-            df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)), index=midx)
+            df = DataFrame([["a" * 25] * (max_cols - 1)] * 10, index=midx)
             df.index.names = ["Level 0", "Level 1"]
             with option_context("display.expand_frame_repr", False):
                 rep_str = repr(df)
@@ -1239,10 +1239,10 @@ def test_wide_repr_multiindex(self):
     def test_wide_repr_multiindex_cols(self):
         with option_context("mode.sim_interactive", True, "display.max_columns", 20):
             max_cols = get_option("display.max_columns")
-            midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10)))
-            mcols = MultiIndex.from_arrays(tm.rands_array(3, size=(2, max_cols - 1)))
+            midx = MultiIndex.from_arrays([["a" * 5] * 10] * 2)
+            mcols = MultiIndex.from_arrays([["b" * 3] * (max_cols - 1)] * 2)
             df = DataFrame(
-                tm.rands_array(25, (10, max_cols - 1)), index=midx, columns=mcols
+                [["c" * 25] * (max_cols - 1)] * 10, index=midx, columns=mcols
             )
             df.index.names = ["Level 0", "Level 1"]
             with option_context("display.expand_frame_repr", False):
@@ -1258,7 +1258,7 @@ def test_wide_repr_multiindex_cols(self):
     def test_wide_repr_unicode(self):
         with option_context("mode.sim_interactive", True, "display.max_columns", 20):
             max_cols = 20
-            df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
+            df = DataFrame([["a" * 25] * 10] * (max_cols - 1))
             with option_context("display.expand_frame_repr", False):
                 rep_str = repr(df)
             with option_context("display.expand_frame_repr", True):
@@ -1896,11 +1896,11 @@ def test_repr_html_mathjax(self):
 
     def test_repr_html_wide(self):
         max_cols = 20
-        df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
+        df = DataFrame([["a" * 25] * (max_cols - 1)] * 10)
         with option_context("display.max_rows", 60, "display.max_columns", 20):
             assert "..." not in df._repr_html_()
 
-        wide_df = DataFrame(tm.rands_array(25, size=(10, max_cols + 1)))
+        wide_df = DataFrame([["a" * 25] * (max_cols + 1)] * 10)
         with option_context("display.max_rows", 60, "display.max_columns", 20):
             assert "..." in wide_df._repr_html_()
 
@@ -1910,14 +1910,14 @@ def test_repr_html_wide_multiindex_cols(self):
         mcols = MultiIndex.from_product(
             [np.arange(max_cols // 2), ["foo", "bar"]], names=["first", "second"]
         )
-        df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), columns=mcols)
+        df = DataFrame([["a" * 25] * len(mcols)] * 10, columns=mcols)
         reg_repr = df._repr_html_()
         assert "..." not in reg_repr
 
         mcols = MultiIndex.from_product(
             (np.arange(1 + (max_cols // 2)), ["foo", "bar"]), names=["first", "second"]
         )
-        df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), columns=mcols)
+        df = DataFrame([["a" * 25] * len(mcols)] * 10, columns=mcols)
         with option_context("display.max_rows", 60, "display.max_columns", 20):
             assert "..." in df._repr_html_()
 
diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py
index 84c8c0a314342..8ffdc421492a5 100644
--- a/pandas/tests/io/pytables/test_round_trip.py
+++ b/pandas/tests/io/pytables/test_round_trip.py
@@ -54,9 +54,7 @@ def roundtrip(key, obj, **kwargs):
 
 def test_long_strings(setup_path):
     # GH6166
-    df = DataFrame(
-        {"a": tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10)
-    )
+    df = DataFrame({"a": tm.makeStringIndex(10)}, index=tm.makeStringIndex(10))
 
     with ensure_clean_store(setup_path) as store:
         store.append("df", df, data_columns=["a"])
diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py
index b43275f3ce4af..088d1e7e3c85e 100644
--- a/pandas/tests/reshape/merge/test_multi.py
+++ b/pandas/tests/reshape/merge/test_multi.py
@@ -193,7 +193,7 @@ def test_merge_multiple_cols_with_mixed_cols_index(self):
 
     def test_compress_group_combinations(self):
         # ~ 40000000 possible unique groups
-        key1 = tm.rands_array(10, 10000)
+        key1 = tm.makeStringIndex(10000)
         key1 = np.tile(key1, 2)
         key2 = key1[::-1]
 
diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py
index 93ccc336468ea..458988491aae8 100644
--- a/pandas/tests/series/indexing/test_getitem.py
+++ b/pandas/tests/series/indexing/test_getitem.py
@@ -69,7 +69,7 @@ def test_getitem_unrecognized_scalar(self):
         assert result == 2
 
     def test_getitem_negative_out_of_bounds(self):
-        ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
+        ser = Series(["a"] * 10, index=["a"] * 10)
 
         msg = "index -11 is out of bounds for axis 0 with size 10"
         warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
index b40e4276ccfe7..f1e66212c131a 100644
--- a/pandas/tests/series/indexing/test_setitem.py
+++ b/pandas/tests/series/indexing/test_setitem.py
@@ -173,7 +173,7 @@ def test_object_series_setitem_dt64array_exact_match(self):
 
 class TestSetitemScalarIndexer:
     def test_setitem_negative_out_of_bounds(self):
-        ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
+        ser = Series(["a"] * 10, index=["a"] * 10)
 
         msg = "index -11 is out of bounds for axis 0 with size 10"
         warn_msg = "Series.__setitem__ treating keys as positions is deprecated"
diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py
index a23d0c1c13e09..e78b042a09231 100644
--- a/pandas/tests/util/test_hashing.py
+++ b/pandas/tests/util/test_hashing.py
@@ -328,7 +328,7 @@ def test_alternate_encoding(index):
 @pytest.mark.parametrize("l_add", [0, 1])
 def test_same_len_hash_collisions(l_exp, l_add):
     length = 2 ** (l_exp + 8) + l_add
-    s = tm.rands_array(length, 2)
+    s = tm.makeStringIndex(length).to_numpy()
 
     result = hash_array(s, "utf8")
     assert not result[0] == result[1]
diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py
index 1d146771e6a42..5718480fdec5e 100644
--- a/pandas/tests/util/test_util.py
+++ b/pandas/tests/util/test_util.py
@@ -6,18 +6,6 @@
 import pandas._testing as tm
 
 
-def test_rands_array_1d():
-    arr = tm.rands_array(5, size=10)
-    assert arr.shape == (10,)
-    assert len(arr[0]) == 5
-
-
-def test_rands_array_2d():
-    arr = tm.rands_array(7, size=(10, 10))
-    assert arr.shape == (10, 10)
-    assert len(arr[1, 1]) == 7
-
-
 def test_numpy_err_state_is_default():
     expected = {"over": "warn", "divide": "warn", "invalid": "warn", "under": "ignore"}
     import numpy as np

From 1b250b44926de41105d260eba605fb97bbd0aa7e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 2 Aug 2023 09:22:52 -0700
Subject: [PATCH 3/4] Address failures

---
 asv_bench/benchmarks/strings.py | 4 ++--
 pandas/_testing/__init__.py     | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
index 1cc3939c8080b..712d2afb81d08 100644
--- a/asv_bench/benchmarks/strings.py
+++ b/asv_bench/benchmarks/strings.py
@@ -34,7 +34,7 @@ class Construction:
     dtype_mapping = {"str": "str", "string[python]": object, "string[pyarrow]": object}
 
     def setup(self, pd_type, dtype):
-        series_arr = np.array(["a"] * 10_000, dtype=self.dtype_mapping[dtype])
+        series_arr = np.array(["a" * 10] * 100_000, dtype=self.dtype_mapping[dtype])
         if pd_type == "series":
             self.arr = series_arr
         elif pd_type == "frame":
@@ -274,7 +274,7 @@ def time_iter(self, dtype):
 
 class StringArrayConstruction:
     def setup(self):
-        self.series_arr = np.array(["a"] * 10**5, dtype=object)
+        self.series_arr = np.array(["a" * 10] * 10**5, dtype=object)
         self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)])
 
     def time_string_array_construction(self):
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 03bcbaaf2e25b..483c5ad59872f 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -1139,7 +1139,6 @@ def shares_memory(left, right) -> bool:
     "NULL_OBJECTS",
     "OBJECT_DTYPES",
     "raise_assert_detail",
-    "rands",
     "reset_display_options",
     "raises_chained_assignment_error",
     "round_trip_localpath",

From e360fea192c7c22934b0617a13a6352fae264ccb Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 2 Aug 2023 15:00:11 -0700
Subject: [PATCH 4/4] Use unique values

---
 asv_bench/benchmarks/array.py          | 6 +++---
 asv_bench/benchmarks/series_methods.py | 2 +-
 asv_bench/benchmarks/strings.py        | 6 ++++--
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/asv_bench/benchmarks/array.py b/asv_bench/benchmarks/array.py
index 506f334d83d01..09c4acc0ab309 100644
--- a/asv_bench/benchmarks/array.py
+++ b/asv_bench/benchmarks/array.py
@@ -54,7 +54,7 @@ def time_from_tuples(self):
 class StringArray:
     def setup(self):
         N = 100_000
-        values = np.array(["a"] * N, dtype=object)
+        values = np.array([str(i) for i in range(N)], dtype=object)
         self.values_obj = np.array(values, dtype="object")
         self.values_str = np.array(values, dtype="U")
         self.values_list = values.tolist()
@@ -78,7 +78,7 @@ def setup(self, multiple_chunks):
             import pyarrow as pa
         except ImportError:
             raise NotImplementedError
-        strings = np.array(["a"] * 10_000, dtype=object)
+        strings = np.array([str(i) for i in range(10_000)], dtype=object)
         if multiple_chunks:
             chunks = [strings[i : i + 100] for i in range(0, len(strings), 100)]
             self.array = pd.arrays.ArrowStringArray(pa.chunked_array(chunks))
@@ -125,7 +125,7 @@ def setup(self, dtype, hasna):
         elif dtype == "int64[pyarrow]":
             data = np.arange(N)
         elif dtype == "string[pyarrow]":
-            data = np.array(["a"] * N, dtype=object)
+            data = np.array([str(i) for i in range(N)], dtype=object)
         elif dtype == "timestamp[ns][pyarrow]":
             data = pd.date_range("2000-01-01", freq="s", periods=N)
         else:
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index 76cc803ecc2da..288369145576e 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -104,7 +104,7 @@ def setup(self, dtype):
             data = np.arange(N)
             na_value = NA
         elif dtype in ("string", "string[pyarrow]"):
-            data = np.array(["a"] * N, dtype=object)
+            data = np.array([str(i) * 5 for i in range(N)], dtype=object)
             na_value = NA
         else:
             raise NotImplementedError
diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
index 712d2afb81d08..d70d9d0aa5227 100644
--- a/asv_bench/benchmarks/strings.py
+++ b/asv_bench/benchmarks/strings.py
@@ -34,7 +34,9 @@ class Construction:
     dtype_mapping = {"str": "str", "string[python]": object, "string[pyarrow]": object}
 
     def setup(self, pd_type, dtype):
-        series_arr = np.array(["a" * 10] * 100_000, dtype=self.dtype_mapping[dtype])
+        series_arr = np.array(
+            [str(i) * 10 for i in range(100_000)], dtype=self.dtype_mapping[dtype]
+        )
         if pd_type == "series":
             self.arr = series_arr
         elif pd_type == "frame":
@@ -274,7 +276,7 @@ def time_iter(self, dtype):
 
 class StringArrayConstruction:
     def setup(self):
-        self.series_arr = np.array(["a" * 10] * 10**5, dtype=object)
+        self.series_arr = np.array([str(i) * 10 for i in range(10**5)], dtype=object)
         self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)])
 
     def time_string_array_construction(self):