Skip to content

TST: Remove tm.rands/rands_array #54368

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions asv_bench/benchmarks/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import pandas as pd

from .pandas_vb_common import tm


class BooleanArray:
def setup(self):
Expand Down Expand Up @@ -56,7 +54,7 @@ def time_from_tuples(self):
class StringArray:
def setup(self):
N = 100_000
values = tm.rands_array(3, N)
values = np.array([str(i) for i in range(N)], dtype=object)
self.values_obj = np.array(values, dtype="object")
self.values_str = np.array(values, dtype="U")
self.values_list = values.tolist()
Expand All @@ -80,7 +78,7 @@ def setup(self, multiple_chunks):
import pyarrow as pa
except ImportError:
raise NotImplementedError
strings = tm.rands_array(3, 10_000)
strings = np.array([str(i) for i in range(10_000)], dtype=object)
if multiple_chunks:
chunks = [strings[i : i + 100] for i in range(0, len(strings), 100)]
self.array = pd.arrays.ArrowStringArray(pa.chunked_array(chunks))
Expand Down Expand Up @@ -127,7 +125,7 @@ def setup(self, dtype, hasna):
elif dtype == "int64[pyarrow]":
data = np.arange(N)
elif dtype == "string[pyarrow]":
data = tm.rands_array(10, N)
data = np.array([str(i) for i in range(N)], dtype=object)
elif dtype == "timestamp[ns][pyarrow]":
data = pd.date_range("2000-01-01", freq="s", periods=N)
else:
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/series_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def setup(self, dtype):
data = np.arange(N)
na_value = NA
elif dtype in ("string", "string[pyarrow]"):
data = tm.rands_array(5, N)
data = np.array([str(i) * 5 for i in range(N)], dtype=object)
na_value = NA
else:
raise NotImplementedError
Expand Down
6 changes: 3 additions & 3 deletions asv_bench/benchmarks/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ class Construction:
dtype_mapping = {"str": "str", "string[python]": object, "string[pyarrow]": object}

def setup(self, pd_type, dtype):
series_arr = tm.rands_array(
nchars=10, size=10**5, dtype=self.dtype_mapping[dtype]
series_arr = np.array(
[str(i) * 10 for i in range(100_000)], dtype=self.dtype_mapping[dtype]
)
if pd_type == "series":
self.arr = series_arr
Expand Down Expand Up @@ -276,7 +276,7 @@ def time_iter(self, dtype):

class StringArrayConstruction:
def setup(self):
self.series_arr = tm.rands_array(nchars=10, size=10**5)
self.series_arr = np.array([str(i) * 10 for i in range(10**5)], dtype=object)
self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)])

def time_string_array_construction(self):
Expand Down
21 changes: 16 additions & 5 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@
round_trip_pickle,
write_to_compressed,
)
from pandas._testing._random import (
rands,
rands_array,
)
from pandas._testing._warnings import (
assert_produces_warning,
maybe_produces_warning,
Expand Down Expand Up @@ -349,6 +345,22 @@ def to_array(obj):
# Others


def rands_array(
nchars, size: int, dtype: NpDtype = "O", replace: bool = True
) -> np.ndarray:
"""
Generate an array of byte strings.
"""
chars = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1))
retval = (
np.random.default_rng(2)
.choice(chars, size=nchars * np.prod(size), replace=replace)
.view((np.str_, nchars))
.reshape(size)
)
return retval.astype(dtype)


def getCols(k) -> str:
return string.ascii_uppercase[:k]

Expand Down Expand Up @@ -1127,7 +1139,6 @@ def shares_memory(left, right) -> bool:
"NULL_OBJECTS",
"OBJECT_DTYPES",
"raise_assert_detail",
"rands",
"reset_display_options",
"raises_chained_assignment_error",
"round_trip_localpath",
Expand Down
4 changes: 2 additions & 2 deletions pandas/_testing/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Any,
Callable,
)
import uuid
import zipfile

from pandas.compat import (
Expand All @@ -18,7 +19,6 @@
from pandas.compat._optional import import_optional_dependency

import pandas as pd
from pandas._testing._random import rands
from pandas._testing.contexts import ensure_clean

if TYPE_CHECKING:
Expand Down Expand Up @@ -56,7 +56,7 @@ def round_trip_pickle(
"""
_path = path
if _path is None:
_path = f"__{rands(10)}__.pickle"
_path = f"__{uuid.uuid4()}__.pickle"
with ensure_clean(_path) as temp_path:
pd.to_pickle(obj, temp_path)
return pd.read_pickle(temp_path)
Expand Down
35 changes: 0 additions & 35 deletions pandas/_testing/_random.py

This file was deleted.

2 changes: 1 addition & 1 deletion pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,7 @@ def test_add_frames(self, first, second, expected):
# TODO: This came from series.test.test_operators, needs cleanup
def test_series_frame_radd_bug(self, fixed_now_ts):
# GH#353
vals = Series(tm.rands_array(5, 10))
vals = Series(tm.makeStringIndex())
result = "foo_" + vals
expected = vals.map(lambda x: "foo_" + x)
tm.assert_series_equal(result, expected)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def test_getitem_series_integer_with_missing_raises(self, data, idx):
msg = "Cannot index with an integer indexer containing NA values"
# TODO: this raises KeyError about labels not found (it tries label-based)

ser = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))])
ser = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])
with pytest.raises(ValueError, match=msg):
ser[idx]

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
# TODO(xfail) this raises KeyError about labels not found (it tries label-based)
# for list of labels with Series
if box_in_series:
arr = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))])
arr = pd.Series(data, index=[chr(100 + i) for i in range(len(data))])

msg = "Cannot index with an integer indexer containing NA values"
with pytest.raises(ValueError, match=msg):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def test_timestamp_compare(self, left, right):
"dates2": pd.date_range("20010102", periods=10),
"intcol": np.random.default_rng(2).integers(1000000000, size=10),
"floatcol": np.random.default_rng(2).standard_normal(10),
"stringcol": list(tm.rands(10)),
"stringcol": [chr(100 + i) for i in range(10)],
}
)
df.loc[np.random.default_rng(2).random(len(df)) > 0.5, "dates2"] = pd.NaT
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_repr_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def test_str_to_bytes_raises(self):
def test_very_wide_info_repr(self):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 20)),
columns=tm.rands_array(10, 20),
columns=np.array(["a" * 10] * 20, dtype=object),
)
repr(df)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,7 @@ def test_cython_grouper_series_bug_noncontig():


def test_series_grouper_noncontig_index():
index = Index(tm.rands_array(10, 100))
index = Index(["a" * 10] * 100)

values = Series(np.random.default_rng(2).standard_normal(50), index=index[::2])
labels = np.random.default_rng(2).integers(0, 5, 50)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/test_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ def test_rank_unordered_categorical_typeerror():


def test_rank_apply():
lev1 = tm.rands_array(10, 100)
lev2 = tm.rands_array(10, 130)
lev1 = np.array(["a" * 10] * 100, dtype=object)
lev2 = np.array(["b" * 10] * 130, dtype=object)
lab1 = np.random.default_rng(2).integers(0, 100, size=500, dtype=int)
lab2 = np.random.default_rng(2).integers(0, 130, size=500, dtype=int)

Expand Down
31 changes: 15 additions & 16 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,9 @@ def test_repr_truncation(self):
{
"A": np.random.default_rng(2).standard_normal(10),
"B": [
tm.rands(
np.random.default_rng(2).integers(max_len - 1, max_len + 1)
)
for i in range(10)
"a"
* np.random.default_rng(2).integers(max_len - 1, max_len + 1)
for _ in range(10)
],
}
)
Expand Down Expand Up @@ -1177,7 +1176,7 @@ def test_wide_repr(self):
20,
):
max_cols = get_option("display.max_columns")
df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
df = DataFrame([["a" * 25] * (max_cols - 1)] * 10)
with option_context("display.expand_frame_repr", False):
rep_str = repr(df)

Expand All @@ -1203,7 +1202,7 @@ def test_wide_repr_wide_columns(self):
def test_wide_repr_named(self):
with option_context("mode.sim_interactive", True, "display.max_columns", 20):
max_cols = get_option("display.max_columns")
df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
df = DataFrame([["a" * 25] * (max_cols - 1)] * 10)
df.index.name = "DataFrame Index"
with option_context("display.expand_frame_repr", False):
rep_str = repr(df)
Expand All @@ -1220,9 +1219,9 @@ def test_wide_repr_named(self):

def test_wide_repr_multiindex(self):
with option_context("mode.sim_interactive", True, "display.max_columns", 20):
midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10)))
midx = MultiIndex.from_arrays([["a" * 5] * 10] * 2)
max_cols = get_option("display.max_columns")
df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)), index=midx)
df = DataFrame([["a" * 25] * (max_cols - 1)] * 10, index=midx)
df.index.names = ["Level 0", "Level 1"]
with option_context("display.expand_frame_repr", False):
rep_str = repr(df)
Expand All @@ -1240,10 +1239,10 @@ def test_wide_repr_multiindex(self):
def test_wide_repr_multiindex_cols(self):
with option_context("mode.sim_interactive", True, "display.max_columns", 20):
max_cols = get_option("display.max_columns")
midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10)))
mcols = MultiIndex.from_arrays(tm.rands_array(3, size=(2, max_cols - 1)))
midx = MultiIndex.from_arrays([["a" * 5] * 10] * 2)
mcols = MultiIndex.from_arrays([["b" * 3] * (max_cols - 1)] * 2)
df = DataFrame(
tm.rands_array(25, (10, max_cols - 1)), index=midx, columns=mcols
[["c" * 25] * (max_cols - 1)] * 10, index=midx, columns=mcols
)
df.index.names = ["Level 0", "Level 1"]
with option_context("display.expand_frame_repr", False):
Expand All @@ -1259,7 +1258,7 @@ def test_wide_repr_multiindex_cols(self):
def test_wide_repr_unicode(self):
with option_context("mode.sim_interactive", True, "display.max_columns", 20):
max_cols = 20
df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
df = DataFrame([["a" * 25] * 10] * (max_cols - 1))
with option_context("display.expand_frame_repr", False):
rep_str = repr(df)
with option_context("display.expand_frame_repr", True):
Expand Down Expand Up @@ -1897,11 +1896,11 @@ def test_repr_html_mathjax(self):

def test_repr_html_wide(self):
max_cols = 20
df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
df = DataFrame([["a" * 25] * (max_cols - 1)] * 10)
with option_context("display.max_rows", 60, "display.max_columns", 20):
assert "..." not in df._repr_html_()

wide_df = DataFrame(tm.rands_array(25, size=(10, max_cols + 1)))
wide_df = DataFrame([["a" * 25] * (max_cols + 1)] * 10)
with option_context("display.max_rows", 60, "display.max_columns", 20):
assert "..." in wide_df._repr_html_()

Expand All @@ -1911,14 +1910,14 @@ def test_repr_html_wide_multiindex_cols(self):
mcols = MultiIndex.from_product(
[np.arange(max_cols // 2), ["foo", "bar"]], names=["first", "second"]
)
df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), columns=mcols)
df = DataFrame([["a" * 25] * len(mcols)] * 10, columns=mcols)
reg_repr = df._repr_html_()
assert "..." not in reg_repr

mcols = MultiIndex.from_product(
(np.arange(1 + (max_cols // 2)), ["foo", "bar"]), names=["first", "second"]
)
df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), columns=mcols)
df = DataFrame([["a" * 25] * len(mcols)] * 10, columns=mcols)
with option_context("display.max_rows", 60, "display.max_columns", 20):
assert "..." in df._repr_html_()

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/pytables/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def test_append_with_diff_col_name_types_raises_value_error(setup_path):
df5 = DataFrame({("1", 2, object): np.random.default_rng(2).standard_normal(10)})

with ensure_clean_store(setup_path) as store:
name = f"df_{tm.rands(10)}"
name = "df_diff_valerror"
store.append(name, df)

for d in (df2, df3, df4, df5):
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/io/pytables/test_round_trip.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,7 @@ def roundtrip(key, obj, **kwargs):

def test_long_strings(setup_path):
# GH6166
df = DataFrame(
{"a": tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10)
)
df = DataFrame({"a": tm.makeStringIndex(10)}, index=tm.makeStringIndex(10))

with ensure_clean_store(setup_path) as store:
store.append("df", df, data_columns=["a"])
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/reshape/merge/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def test_merge_multiple_cols_with_mixed_cols_index(self):

def test_compress_group_combinations(self):
# ~ 40000000 possible unique groups
key1 = tm.rands_array(10, 10000)
key1 = tm.makeStringIndex(10000)
key1 = np.tile(key1, 2)
key2 = key1[::-1]

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/indexing/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def test_getitem_unrecognized_scalar(self):
assert result == 2

def test_getitem_negative_out_of_bounds(self):
ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
ser = Series(["a"] * 10, index=["a"] * 10)

msg = "index -11 is out of bounds for axis 0 with size 10"
warn_msg = "Series.__getitem__ treating keys as positions is deprecated"
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def test_object_series_setitem_dt64array_exact_match(self):

class TestSetitemScalarIndexer:
def test_setitem_negative_out_of_bounds(self):
ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
ser = Series(["a"] * 10, index=["a"] * 10)

msg = "index -11 is out of bounds for axis 0 with size 10"
warn_msg = "Series.__setitem__ treating keys as positions is deprecated"
Expand Down
Loading