Skip to content

CLN: Use np.random.RandomState instead of tm.RNGContext #50915

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@
get_obj,
)
from pandas._testing.contexts import (
RNGContext,
decompress_file,
ensure_clean,
ensure_safe_environment_variables,
Expand Down Expand Up @@ -1125,7 +1124,6 @@ def shares_memory(left, right) -> bool:
"raise_assert_detail",
"rands",
"reset_display_options",
"RNGContext",
"raises_chained_assignment_error",
"round_trip_localpath",
"round_trip_pathlib",
Expand Down
37 changes: 0 additions & 37 deletions pandas/_testing/contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,13 @@
import os
from pathlib import Path
import tempfile
from types import TracebackType
from typing import (
IO,
Any,
Generator,
)
import uuid

import numpy as np

from pandas.compat import PYPY
from pandas.errors import ChainedAssignmentError

Expand Down Expand Up @@ -198,40 +195,6 @@ def use_numexpr(use, min_elements=None) -> Generator[None, None, None]:
set_option("compute.use_numexpr", olduse)


class RNGContext:
"""
Context manager to set the numpy random number generator speed. Returns
to the original value upon exiting the context manager.

Parameters
----------
seed : int
Seed for numpy.random.seed

Examples
--------
with RNGContext(42):
np.random.randn()
"""

def __init__(self, seed) -> None:
self.seed = seed

def __enter__(self) -> None:

self.start_state = np.random.get_state()
np.random.seed(self.seed)

def __exit__(
self,
exc_type: type[BaseException] | None,
exc_value: BaseException | None,
traceback: TracebackType | None,
) -> None:

np.random.set_state(self.start_state)


def raises_chained_assignment_error():

if PYPY:
Expand Down
41 changes: 20 additions & 21 deletions pandas/tests/plotting/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,30 @@
DataFrame,
to_datetime,
)
import pandas._testing as tm


@pytest.fixture
def hist_df():
n = 100
with tm.RNGContext(42):
gender = np.random.choice(["Male", "Female"], size=n)
classroom = np.random.choice(["A", "B", "C"], size=n)
np_random = np.random.RandomState(42)
gender = np_random.choice(["Male", "Female"], size=n)
classroom = np_random.choice(["A", "B", "C"], size=n)

hist_df = DataFrame(
{
"gender": gender,
"classroom": classroom,
"height": np.random.normal(66, 4, size=n),
"weight": np.random.normal(161, 32, size=n),
"category": np.random.randint(4, size=n),
"datetime": to_datetime(
np.random.randint(
812419200000000000,
819331200000000000,
size=n,
dtype=np.int64,
)
),
}
)
hist_df = DataFrame(
{
"gender": gender,
"classroom": classroom,
"height": np.random.normal(66, 4, size=n),
"weight": np.random.normal(161, 32, size=n),
"category": np.random.randint(4, size=n),
"datetime": to_datetime(
np.random.randint(
812419200000000000,
819331200000000000,
size=n,
dtype=np.int64,
)
),
}
)
return hist_df
106 changes: 52 additions & 54 deletions pandas/tests/plotting/frame/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,51 +366,51 @@ def _compare_stacked_y_cood(self, normal_lines, stacked_lines):

@pytest.mark.parametrize("kind", ["line", "area"])
def test_line_area_stacked(self, kind):
with tm.RNGContext(42):
df = DataFrame(np.random.rand(6, 4), columns=["w", "x", "y", "z"])
neg_df = -df
# each column has either positive or negative value
sep_df = DataFrame(
{
"w": np.random.rand(6),
"x": np.random.rand(6),
"y": -np.random.rand(6),
"z": -np.random.rand(6),
}
)
# each column has positive-negative mixed value
mixed_df = DataFrame(
np.random.randn(6, 4),
index=list(string.ascii_letters[:6]),
columns=["w", "x", "y", "z"],
)
np_random = np.random.RandomState(42)
df = DataFrame(np_random.rand(6, 4), columns=["w", "x", "y", "z"])
neg_df = -df
# each column has either positive or negative value
sep_df = DataFrame(
{
"w": np_random.rand(6),
"x": np_random.rand(6),
"y": -np_random.rand(6),
"z": -np_random.rand(6),
}
)
# each column has positive-negative mixed value
mixed_df = DataFrame(
np_random.randn(6, 4),
index=list(string.ascii_letters[:6]),
columns=["w", "x", "y", "z"],
)

ax1 = _check_plot_works(df.plot, kind=kind, stacked=False)
ax2 = _check_plot_works(df.plot, kind=kind, stacked=True)
self._compare_stacked_y_cood(ax1.lines, ax2.lines)
ax1 = _check_plot_works(df.plot, kind=kind, stacked=False)
ax2 = _check_plot_works(df.plot, kind=kind, stacked=True)
self._compare_stacked_y_cood(ax1.lines, ax2.lines)

ax1 = _check_plot_works(neg_df.plot, kind=kind, stacked=False)
ax2 = _check_plot_works(neg_df.plot, kind=kind, stacked=True)
self._compare_stacked_y_cood(ax1.lines, ax2.lines)
ax1 = _check_plot_works(neg_df.plot, kind=kind, stacked=False)
ax2 = _check_plot_works(neg_df.plot, kind=kind, stacked=True)
self._compare_stacked_y_cood(ax1.lines, ax2.lines)

ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False)
ax2 = _check_plot_works(sep_df.plot, kind=kind, stacked=True)
self._compare_stacked_y_cood(ax1.lines[:2], ax2.lines[:2])
self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:])
ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False)
ax2 = _check_plot_works(sep_df.plot, kind=kind, stacked=True)
self._compare_stacked_y_cood(ax1.lines[:2], ax2.lines[:2])
self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:])

_check_plot_works(mixed_df.plot, stacked=False)
msg = (
"When stacked is True, each column must be either all positive or "
"all negative. Column 'w' contains both positive and negative "
"values"
)
with pytest.raises(ValueError, match=msg):
mixed_df.plot(stacked=True)
_check_plot_works(mixed_df.plot, stacked=False)
msg = (
"When stacked is True, each column must be either all positive or "
"all negative. Column 'w' contains both positive and negative "
"values"
)
with pytest.raises(ValueError, match=msg):
mixed_df.plot(stacked=True)

# Use an index with strictly positive values, preventing
# matplotlib from warning about ignoring xlim
df2 = df.set_index(df.index + 1)
_check_plot_works(df2.plot, kind=kind, logx=True, stacked=True)
# Use an index with strictly positive values, preventing
# matplotlib from warning about ignoring xlim
df2 = df.set_index(df.index + 1)
_check_plot_works(df2.plot, kind=kind, logx=True, stacked=True)

def test_line_area_nan_df(self):
values1 = [1, 2, np.nan, 3]
Expand Down Expand Up @@ -1237,20 +1237,18 @@ def test_all_invalid_plot_data(self):
df.plot(kind=kind)

def test_partially_invalid_plot_data(self):
with tm.RNGContext(42):
df = DataFrame(np.random.randn(10, 2), dtype=object)
df[np.random.rand(df.shape[0]) > 0.5] = "a"
for kind in plotting.PlotAccessor._common_kinds:
msg = "no numeric data to plot"
with pytest.raises(TypeError, match=msg):
df.plot(kind=kind)

with tm.RNGContext(42):
# area plot doesn't support positive/negative mixed data
df = DataFrame(np.random.rand(10, 2), dtype=object)
df[np.random.rand(df.shape[0]) > 0.5] = "a"
with pytest.raises(TypeError, match="no numeric data to plot"):
df.plot(kind="area")
df = DataFrame(np.random.RandomState(42).randn(10, 2), dtype=object)
df[np.random.rand(df.shape[0]) > 0.5] = "a"
for kind in plotting.PlotAccessor._common_kinds:
msg = "no numeric data to plot"
with pytest.raises(TypeError, match=msg):
df.plot(kind=kind)

# area plot doesn't support positive/negative mixed data
df = DataFrame(np.random.RandomState(42).rand(10, 2), dtype=object)
df[np.random.rand(df.shape[0]) > 0.5] = "a"
with pytest.raises(TypeError, match="no numeric data to plot"):
df.plot(kind="area")

def test_invalid_kind(self):
df = DataFrame(np.random.randn(10, 2))
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/plotting/test_boxplot_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,7 @@ def test_grouped_plot_fignums(self):
n = 10
weight = Series(np.random.normal(166, 20, size=n))
height = Series(np.random.normal(60, 10, size=n))
with tm.RNGContext(42):
gender = np.random.choice(["male", "female"], size=n)
gender = np.random.RandomState(42).choice(["male", "female"], size=n)
df = DataFrame({"height": height, "weight": weight, "gender": gender})
gb = df.groupby("gender")

Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/plotting/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ def test_series_groupby_plotting_nominally_works(self):
n = 10
weight = Series(np.random.normal(166, 20, size=n))
height = Series(np.random.normal(60, 10, size=n))
with tm.RNGContext(42):
gender = np.random.choice(["male", "female"], size=n)
gender = np.random.RandomState(42).choice(["male", "female"], size=n)

weight.groupby(gender).plot()
tm.close()
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/plotting/test_hist_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,8 +510,9 @@ def test_hist_df_kwargs(self):

def test_hist_df_with_nonnumerics(self):
# GH 9853
with tm.RNGContext(1):
df = DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"])
df = DataFrame(
np.random.RandomState(42).randn(10, 4), columns=["A", "B", "C", "D"]
)
df["E"] = ["x", "y"] * 5
_, ax = self.plt.subplots()
ax = df.plot.hist(bins=5, ax=ax)
Expand Down Expand Up @@ -665,8 +666,7 @@ def test_grouped_hist_legacy2(self):
n = 10
weight = Series(np.random.normal(166, 20, size=n))
height = Series(np.random.normal(60, 10, size=n))
with tm.RNGContext(42):
gender_int = np.random.choice([0, 1], size=n)
gender_int = np.random.RandomState(42).choice([0, 1], size=n)
df_int = DataFrame({"height": height, "weight": weight, "gender": gender_int})
gb = df_int.groupby("gender")
axes = gb.hist()
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/plotting/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,7 @@ def test_scatter_matrix_axis(self, pass_axis):
if pass_axis:
_, ax = self.plt.subplots(3, 3)

with tm.RNGContext(42):
df = DataFrame(np.random.randn(100, 3))
df = DataFrame(np.random.RandomState(42).randn(100, 3))

# we are plotting multiples on a sub-plot
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
Expand Down
12 changes: 0 additions & 12 deletions pandas/tests/util/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,18 +58,6 @@ def test_datapath(datapath):
assert result == expected


def test_rng_context():
import numpy as np

expected0 = 1.764052345967664
expected1 = 1.6243453636632417

with tm.RNGContext(0):
with tm.RNGContext(1):
assert np.random.randn() == expected1
assert np.random.randn() == expected0


def test_external_error_raised():
with tm.external_error_raised(TypeError):
raise TypeError("Should not check this error message, so it will pass")