Skip to content

Commit 818618e

Browse files
authored
TST: Remove tm.rands/rands_array (pandas-dev#54368)
* remove tm.rands * remove rands array * Address failures * Use unique values
1 parent 0d0073a commit 818618e

22 files changed

+67
-103
lines changed

asv_bench/benchmarks/array.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
import pandas as pd
44

5-
from .pandas_vb_common import tm
6-
75

86
class BooleanArray:
97
def setup(self):
@@ -56,7 +54,7 @@ def time_from_tuples(self):
5654
class StringArray:
5755
def setup(self):
5856
N = 100_000
59-
values = tm.rands_array(3, N)
57+
values = np.array([str(i) for i in range(N)], dtype=object)
6058
self.values_obj = np.array(values, dtype="object")
6159
self.values_str = np.array(values, dtype="U")
6260
self.values_list = values.tolist()
@@ -80,7 +78,7 @@ def setup(self, multiple_chunks):
8078
import pyarrow as pa
8179
except ImportError:
8280
raise NotImplementedError
83-
strings = tm.rands_array(3, 10_000)
81+
strings = np.array([str(i) for i in range(10_000)], dtype=object)
8482
if multiple_chunks:
8583
chunks = [strings[i : i + 100] for i in range(0, len(strings), 100)]
8684
self.array = pd.arrays.ArrowStringArray(pa.chunked_array(chunks))
@@ -127,7 +125,7 @@ def setup(self, dtype, hasna):
127125
elif dtype == "int64[pyarrow]":
128126
data = np.arange(N)
129127
elif dtype == "string[pyarrow]":
130-
data = tm.rands_array(10, N)
128+
data = np.array([str(i) for i in range(N)], dtype=object)
131129
elif dtype == "timestamp[ns][pyarrow]":
132130
data = pd.date_range("2000-01-01", freq="s", periods=N)
133131
else:

asv_bench/benchmarks/series_methods.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def setup(self, dtype):
104104
data = np.arange(N)
105105
na_value = NA
106106
elif dtype in ("string", "string[pyarrow]"):
107-
data = tm.rands_array(5, N)
107+
data = np.array([str(i) * 5 for i in range(N)], dtype=object)
108108
na_value = NA
109109
else:
110110
raise NotImplementedError

asv_bench/benchmarks/strings.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ class Construction:
3434
dtype_mapping = {"str": "str", "string[python]": object, "string[pyarrow]": object}
3535

3636
def setup(self, pd_type, dtype):
37-
series_arr = tm.rands_array(
38-
nchars=10, size=10**5, dtype=self.dtype_mapping[dtype]
37+
series_arr = np.array(
38+
[str(i) * 10 for i in range(100_000)], dtype=self.dtype_mapping[dtype]
3939
)
4040
if pd_type == "series":
4141
self.arr = series_arr
@@ -276,7 +276,7 @@ def time_iter(self, dtype):
276276

277277
class StringArrayConstruction:
278278
def setup(self):
279-
self.series_arr = tm.rands_array(nchars=10, size=10**5)
279+
self.series_arr = np.array([str(i) * 10 for i in range(10**5)], dtype=object)
280280
self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)])
281281

282282
def time_string_array_construction(self):

pandas/_testing/__init__.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,6 @@
5454
round_trip_pickle,
5555
write_to_compressed,
5656
)
57-
from pandas._testing._random import (
58-
rands,
59-
rands_array,
60-
)
6157
from pandas._testing._warnings import (
6258
assert_produces_warning,
6359
maybe_produces_warning,
@@ -349,6 +345,22 @@ def to_array(obj):
349345
# Others
350346

351347

348+
def rands_array(
349+
nchars, size: int, dtype: NpDtype = "O", replace: bool = True
350+
) -> np.ndarray:
351+
"""
352+
Generate an array of byte strings.
353+
"""
354+
chars = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1))
355+
retval = (
356+
np.random.default_rng(2)
357+
.choice(chars, size=nchars * np.prod(size), replace=replace)
358+
.view((np.str_, nchars))
359+
.reshape(size)
360+
)
361+
return retval.astype(dtype)
362+
363+
352364
def getCols(k) -> str:
353365
return string.ascii_uppercase[:k]
354366

@@ -1127,7 +1139,6 @@ def shares_memory(left, right) -> bool:
11271139
"NULL_OBJECTS",
11281140
"OBJECT_DTYPES",
11291141
"raise_assert_detail",
1130-
"rands",
11311142
"reset_display_options",
11321143
"raises_chained_assignment_error",
11331144
"round_trip_localpath",

pandas/_testing/_io.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
Any,
1010
Callable,
1111
)
12+
import uuid
1213
import zipfile
1314

1415
from pandas.compat import (
@@ -18,7 +19,6 @@
1819
from pandas.compat._optional import import_optional_dependency
1920

2021
import pandas as pd
21-
from pandas._testing._random import rands
2222
from pandas._testing.contexts import ensure_clean
2323

2424
if TYPE_CHECKING:
@@ -56,7 +56,7 @@ def round_trip_pickle(
5656
"""
5757
_path = path
5858
if _path is None:
59-
_path = f"__{rands(10)}__.pickle"
59+
_path = f"__{uuid.uuid4()}__.pickle"
6060
with ensure_clean(_path) as temp_path:
6161
pd.to_pickle(obj, temp_path)
6262
return pd.read_pickle(temp_path)

pandas/_testing/_random.py

-35
This file was deleted.

pandas/tests/arithmetic/test_numeric.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -881,7 +881,7 @@ def test_add_frames(self, first, second, expected):
881881
# TODO: This came from series.test.test_operators, needs cleanup
882882
def test_series_frame_radd_bug(self, fixed_now_ts):
883883
# GH#353
884-
vals = Series(tm.rands_array(5, 10))
884+
vals = Series(tm.makeStringIndex())
885885
result = "foo_" + vals
886886
expected = vals.map(lambda x: "foo_" + x)
887887
tm.assert_series_equal(result, expected)

pandas/tests/extension/base/getitem.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def test_getitem_series_integer_with_missing_raises(self, data, idx):
272272
msg = "Cannot index with an integer indexer containing NA values"
273273
# TODO: this raises KeyError about labels not found (it tries label-based)
274274

275-
ser = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))])
275+
ser = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
276276
with pytest.raises(ValueError, match=msg):
277277
ser[idx]
278278

pandas/tests/extension/base/setitem.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
197197
# TODO(xfail) this raises KeyError about labels not found (it tries label-based)
198198
# for list of labels with Series
199199
if box_in_series:
200-
arr = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))])
200+
arr = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
201201

202202
msg = "Cannot index with an integer indexer containing NA values"
203203
with pytest.raises(ValueError, match=msg):

pandas/tests/frame/test_arithmetic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def test_timestamp_compare(self, left, right):
203203
"dates2": pd.date_range("20010102", periods=10),
204204
"intcol": np.random.default_rng(2).integers(1000000000, size=10),
205205
"floatcol": np.random.default_rng(2).standard_normal(10),
206-
"stringcol": list(tm.rands(10)),
206+
"stringcol": [chr(100 + i) for i in range(10)],
207207
}
208208
)
209209
df.loc[np.random.default_rng(2).random(len(df)) > 0.5, "dates2"] = pd.NaT

pandas/tests/frame/test_repr_info.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ def test_str_to_bytes_raises(self):
265265
def test_very_wide_info_repr(self):
266266
df = DataFrame(
267267
np.random.default_rng(2).standard_normal((10, 20)),
268-
columns=tm.rands_array(10, 20),
268+
columns=np.array(["a" * 10] * 20, dtype=object),
269269
)
270270
repr(df)
271271

pandas/tests/groupby/test_groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1361,7 +1361,7 @@ def test_cython_grouper_series_bug_noncontig():
13611361

13621362

13631363
def test_series_grouper_noncontig_index():
1364-
index = Index(tm.rands_array(10, 100))
1364+
index = Index(["a" * 10] * 100)
13651365

13661366
values = Series(np.random.default_rng(2).standard_normal(50), index=index[::2])
13671367
labels = np.random.default_rng(2).integers(0, 5, 50)

pandas/tests/groupby/test_rank.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ def test_rank_unordered_categorical_typeerror():
3131

3232

3333
def test_rank_apply():
34-
lev1 = tm.rands_array(10, 100)
35-
lev2 = tm.rands_array(10, 130)
34+
lev1 = np.array(["a" * 10] * 100, dtype=object)
35+
lev2 = np.array(["b" * 10] * 130, dtype=object)
3636
lab1 = np.random.default_rng(2).integers(0, 100, size=500, dtype=int)
3737
lab2 = np.random.default_rng(2).integers(0, 130, size=500, dtype=int)
3838

pandas/tests/io/formats/test_format.py

+15-16
Original file line numberDiff line numberDiff line change
@@ -214,10 +214,9 @@ def test_repr_truncation(self):
214214
{
215215
"A": np.random.default_rng(2).standard_normal(10),
216216
"B": [
217-
tm.rands(
218-
np.random.default_rng(2).integers(max_len - 1, max_len + 1)
219-
)
220-
for i in range(10)
217+
"a"
218+
* np.random.default_rng(2).integers(max_len - 1, max_len + 1)
219+
for _ in range(10)
221220
],
222221
}
223222
)
@@ -1177,7 +1176,7 @@ def test_wide_repr(self):
11771176
20,
11781177
):
11791178
max_cols = get_option("display.max_columns")
1180-
df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
1179+
df = DataFrame([["a" * 25] * (max_cols - 1)] * 10)
11811180
with option_context("display.expand_frame_repr", False):
11821181
rep_str = repr(df)
11831182

@@ -1203,7 +1202,7 @@ def test_wide_repr_wide_columns(self):
12031202
def test_wide_repr_named(self):
12041203
with option_context("mode.sim_interactive", True, "display.max_columns", 20):
12051204
max_cols = get_option("display.max_columns")
1206-
df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
1205+
df = DataFrame([["a" * 25] * (max_cols - 1)] * 10)
12071206
df.index.name = "DataFrame Index"
12081207
with option_context("display.expand_frame_repr", False):
12091208
rep_str = repr(df)
@@ -1220,9 +1219,9 @@ def test_wide_repr_named(self):
12201219

12211220
def test_wide_repr_multiindex(self):
12221221
with option_context("mode.sim_interactive", True, "display.max_columns", 20):
1223-
midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10)))
1222+
midx = MultiIndex.from_arrays([["a" * 5] * 10] * 2)
12241223
max_cols = get_option("display.max_columns")
1225-
df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)), index=midx)
1224+
df = DataFrame([["a" * 25] * (max_cols - 1)] * 10, index=midx)
12261225
df.index.names = ["Level 0", "Level 1"]
12271226
with option_context("display.expand_frame_repr", False):
12281227
rep_str = repr(df)
@@ -1240,10 +1239,10 @@ def test_wide_repr_multiindex(self):
12401239
def test_wide_repr_multiindex_cols(self):
12411240
with option_context("mode.sim_interactive", True, "display.max_columns", 20):
12421241
max_cols = get_option("display.max_columns")
1243-
midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10)))
1244-
mcols = MultiIndex.from_arrays(tm.rands_array(3, size=(2, max_cols - 1)))
1242+
midx = MultiIndex.from_arrays([["a" * 5] * 10] * 2)
1243+
mcols = MultiIndex.from_arrays([["b" * 3] * (max_cols - 1)] * 2)
12451244
df = DataFrame(
1246-
tm.rands_array(25, (10, max_cols - 1)), index=midx, columns=mcols
1245+
[["c" * 25] * (max_cols - 1)] * 10, index=midx, columns=mcols
12471246
)
12481247
df.index.names = ["Level 0", "Level 1"]
12491248
with option_context("display.expand_frame_repr", False):
@@ -1259,7 +1258,7 @@ def test_wide_repr_multiindex_cols(self):
12591258
def test_wide_repr_unicode(self):
12601259
with option_context("mode.sim_interactive", True, "display.max_columns", 20):
12611260
max_cols = 20
1262-
df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
1261+
df = DataFrame([["a" * 25] * 10] * (max_cols - 1))
12631262
with option_context("display.expand_frame_repr", False):
12641263
rep_str = repr(df)
12651264
with option_context("display.expand_frame_repr", True):
@@ -1897,11 +1896,11 @@ def test_repr_html_mathjax(self):
18971896

18981897
def test_repr_html_wide(self):
18991898
max_cols = 20
1900-
df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
1899+
df = DataFrame([["a" * 25] * (max_cols - 1)] * 10)
19011900
with option_context("display.max_rows", 60, "display.max_columns", 20):
19021901
assert "..." not in df._repr_html_()
19031902

1904-
wide_df = DataFrame(tm.rands_array(25, size=(10, max_cols + 1)))
1903+
wide_df = DataFrame([["a" * 25] * (max_cols + 1)] * 10)
19051904
with option_context("display.max_rows", 60, "display.max_columns", 20):
19061905
assert "..." in wide_df._repr_html_()
19071906

@@ -1911,14 +1910,14 @@ def test_repr_html_wide_multiindex_cols(self):
19111910
mcols = MultiIndex.from_product(
19121911
[np.arange(max_cols // 2), ["foo", "bar"]], names=["first", "second"]
19131912
)
1914-
df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), columns=mcols)
1913+
df = DataFrame([["a" * 25] * len(mcols)] * 10, columns=mcols)
19151914
reg_repr = df._repr_html_()
19161915
assert "..." not in reg_repr
19171916

19181917
mcols = MultiIndex.from_product(
19191918
(np.arange(1 + (max_cols // 2)), ["foo", "bar"]), names=["first", "second"]
19201919
)
1921-
df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), columns=mcols)
1920+
df = DataFrame([["a" * 25] * len(mcols)] * 10, columns=mcols)
19221921
with option_context("display.max_rows", 60, "display.max_columns", 20):
19231922
assert "..." in df._repr_html_()
19241923

pandas/tests/io/pytables/test_errors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def test_append_with_diff_col_name_types_raises_value_error(setup_path):
153153
df5 = DataFrame({("1", 2, object): np.random.default_rng(2).standard_normal(10)})
154154

155155
with ensure_clean_store(setup_path) as store:
156-
name = f"df_{tm.rands(10)}"
156+
name = "df_diff_valerror"
157157
store.append(name, df)
158158

159159
for d in (df2, df3, df4, df5):

pandas/tests/io/pytables/test_round_trip.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,7 @@ def roundtrip(key, obj, **kwargs):
5454

5555
def test_long_strings(setup_path):
5656
# GH6166
57-
df = DataFrame(
58-
{"a": tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10)
59-
)
57+
df = DataFrame({"a": tm.makeStringIndex(10)}, index=tm.makeStringIndex(10))
6058

6159
with ensure_clean_store(setup_path) as store:
6260
store.append("df", df, data_columns=["a"])

pandas/tests/reshape/merge/test_multi.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def test_merge_multiple_cols_with_mixed_cols_index(self):
193193

194194
def test_compress_group_combinations(self):
195195
# ~ 40000000 possible unique groups
196-
key1 = tm.rands_array(10, 10000)
196+
key1 = tm.makeStringIndex(10000)
197197
key1 = np.tile(key1, 2)
198198
key2 = key1[::-1]
199199

pandas/tests/series/indexing/test_getitem.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def test_getitem_unrecognized_scalar(self):
6969
assert result == 2
7070

7171
def test_getitem_negative_out_of_bounds(self):
72-
ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
72+
ser = Series(["a"] * 10, index=["a"] * 10)
7373

7474
msg = "index -11 is out of bounds for axis 0 with size 10"
7575
warn_msg = "Series.__getitem__ treating keys as positions is deprecated"

pandas/tests/series/indexing/test_setitem.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def test_object_series_setitem_dt64array_exact_match(self):
173173

174174
class TestSetitemScalarIndexer:
175175
def test_setitem_negative_out_of_bounds(self):
176-
ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
176+
ser = Series(["a"] * 10, index=["a"] * 10)
177177

178178
msg = "index -11 is out of bounds for axis 0 with size 10"
179179
warn_msg = "Series.__setitem__ treating keys as positions is deprecated"

0 commit comments

Comments
 (0)