CLN: Use np.random.RandomState instead of tm.RNGContext (pandas-dev#50915)

mroeschke · web-flow · commit d0221cb44498 · 2023-01-30T12:04:47.000-08:00
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
@@ -100,7 +100,6 @@
     get_obj,
 )
 from pandas._testing.contexts import (
-    RNGContext,
     decompress_file,
     ensure_clean,
     ensure_safe_environment_variables,
@@ -1135,7 +1134,6 @@ def shares_memory(left, right) -> bool:
     "raise_assert_detail",
     "rands",
     "reset_display_options",
-    "RNGContext",
     "raises_chained_assignment_error",
     "round_trip_localpath",
     "round_trip_pathlib",
diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py
@@ -4,16 +4,13 @@
 import os
 from pathlib import Path
 import tempfile
-from types import TracebackType
 from typing import (
     IO,
     Any,
     Generator,
 )
 import uuid
 
-import numpy as np
-
 from pandas.compat import PYPY
 from pandas.errors import ChainedAssignmentError
 
@@ -198,40 +195,6 @@ def use_numexpr(use, min_elements=None) -> Generator[None, None, None]:
         set_option("compute.use_numexpr", olduse)
 
 
-class RNGContext:
-    """
-    Context manager to set the numpy random number generator speed. Returns
-    to the original value upon exiting the context manager.
-
-    Parameters
-    ----------
-    seed : int
-        Seed for numpy.random.seed
-
-    Examples
-    --------
-    with RNGContext(42):
-        np.random.randn()
-    """
-
-    def __init__(self, seed) -> None:
-        self.seed = seed
-
-    def __enter__(self) -> None:
-
-        self.start_state = np.random.get_state()
-        np.random.seed(self.seed)
-
-    def __exit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc_value: BaseException | None,
-        traceback: TracebackType | None,
-    ) -> None:
-
-        np.random.set_state(self.start_state)
-
-
 def raises_chained_assignment_error():
 
     if PYPY:
diff --git a/pandas/tests/plotting/conftest.py b/pandas/tests/plotting/conftest.py
@@ -5,31 +5,30 @@
     DataFrame,
     to_datetime,
 )
-import pandas._testing as tm
 
 
 @pytest.fixture
 def hist_df():
     n = 100
-    with tm.RNGContext(42):
-        gender = np.random.choice(["Male", "Female"], size=n)
-        classroom = np.random.choice(["A", "B", "C"], size=n)
+    np_random = np.random.RandomState(42)
+    gender = np_random.choice(["Male", "Female"], size=n)
+    classroom = np_random.choice(["A", "B", "C"], size=n)
 
-        hist_df = DataFrame(
-            {
-                "gender": gender,
-                "classroom": classroom,
-                "height": np.random.normal(66, 4, size=n),
-                "weight": np.random.normal(161, 32, size=n),
-                "category": np.random.randint(4, size=n),
-                "datetime": to_datetime(
-                    np.random.randint(
-                        812419200000000000,
-                        819331200000000000,
-                        size=n,
-                        dtype=np.int64,
-                    )
-                ),
-            }
-        )
+    hist_df = DataFrame(
+        {
+            "gender": gender,
+            "classroom": classroom,
+            "height": np.random.normal(66, 4, size=n),
+            "weight": np.random.normal(161, 32, size=n),
+            "category": np.random.randint(4, size=n),
+            "datetime": to_datetime(
+                np.random.randint(
+                    812419200000000000,
+                    819331200000000000,
+                    size=n,
+                    dtype=np.int64,
+                )
+            ),
+        }
+    )
     return hist_df
diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py
@@ -366,51 +366,51 @@ def _compare_stacked_y_cood(self, normal_lines, stacked_lines):
 
     @pytest.mark.parametrize("kind", ["line", "area"])
     def test_line_area_stacked(self, kind):
-        with tm.RNGContext(42):
-            df = DataFrame(np.random.rand(6, 4), columns=["w", "x", "y", "z"])
-            neg_df = -df
-            # each column has either positive or negative value
-            sep_df = DataFrame(
-                {
-                    "w": np.random.rand(6),
-                    "x": np.random.rand(6),
-                    "y": -np.random.rand(6),
-                    "z": -np.random.rand(6),
-                }
-            )
-            # each column has positive-negative mixed value
-            mixed_df = DataFrame(
-                np.random.randn(6, 4),
-                index=list(string.ascii_letters[:6]),
-                columns=["w", "x", "y", "z"],
-            )
+        np_random = np.random.RandomState(42)
+        df = DataFrame(np_random.rand(6, 4), columns=["w", "x", "y", "z"])
+        neg_df = -df
+        # each column has either positive or negative value
+        sep_df = DataFrame(
+            {
+                "w": np_random.rand(6),
+                "x": np_random.rand(6),
+                "y": -np_random.rand(6),
+                "z": -np_random.rand(6),
+            }
+        )
+        # each column has positive-negative mixed value
+        mixed_df = DataFrame(
+            np_random.randn(6, 4),
+            index=list(string.ascii_letters[:6]),
+            columns=["w", "x", "y", "z"],
+        )
 
-            ax1 = _check_plot_works(df.plot, kind=kind, stacked=False)
-            ax2 = _check_plot_works(df.plot, kind=kind, stacked=True)
-            self._compare_stacked_y_cood(ax1.lines, ax2.lines)
+        ax1 = _check_plot_works(df.plot, kind=kind, stacked=False)
+        ax2 = _check_plot_works(df.plot, kind=kind, stacked=True)
+        self._compare_stacked_y_cood(ax1.lines, ax2.lines)
 
-            ax1 = _check_plot_works(neg_df.plot, kind=kind, stacked=False)
-            ax2 = _check_plot_works(neg_df.plot, kind=kind, stacked=True)
-            self._compare_stacked_y_cood(ax1.lines, ax2.lines)
+        ax1 = _check_plot_works(neg_df.plot, kind=kind, stacked=False)
+        ax2 = _check_plot_works(neg_df.plot, kind=kind, stacked=True)
+        self._compare_stacked_y_cood(ax1.lines, ax2.lines)
 
-            ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False)
-            ax2 = _check_plot_works(sep_df.plot, kind=kind, stacked=True)
-            self._compare_stacked_y_cood(ax1.lines[:2], ax2.lines[:2])
-            self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:])
+        ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False)
+        ax2 = _check_plot_works(sep_df.plot, kind=kind, stacked=True)
+        self._compare_stacked_y_cood(ax1.lines[:2], ax2.lines[:2])
+        self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:])
 
-            _check_plot_works(mixed_df.plot, stacked=False)
-            msg = (
-                "When stacked is True, each column must be either all positive or "
-                "all negative. Column 'w' contains both positive and negative "
-                "values"
-            )
-            with pytest.raises(ValueError, match=msg):
-                mixed_df.plot(stacked=True)
+        _check_plot_works(mixed_df.plot, stacked=False)
+        msg = (
+            "When stacked is True, each column must be either all positive or "
+            "all negative. Column 'w' contains both positive and negative "
+            "values"
+        )
+        with pytest.raises(ValueError, match=msg):
+            mixed_df.plot(stacked=True)
 
-            # Use an index with strictly positive values, preventing
-            #  matplotlib from warning about ignoring xlim
-            df2 = df.set_index(df.index + 1)
-            _check_plot_works(df2.plot, kind=kind, logx=True, stacked=True)
+        # Use an index with strictly positive values, preventing
+        #  matplotlib from warning about ignoring xlim
+        df2 = df.set_index(df.index + 1)
+        _check_plot_works(df2.plot, kind=kind, logx=True, stacked=True)
 
     def test_line_area_nan_df(self):
         values1 = [1, 2, np.nan, 3]
@@ -1237,20 +1237,18 @@ def test_all_invalid_plot_data(self):
                 df.plot(kind=kind)
 
     def test_partially_invalid_plot_data(self):
-        with tm.RNGContext(42):
-            df = DataFrame(np.random.randn(10, 2), dtype=object)
-            df[np.random.rand(df.shape[0]) > 0.5] = "a"
-            for kind in plotting.PlotAccessor._common_kinds:
-                msg = "no numeric data to plot"
-                with pytest.raises(TypeError, match=msg):
-                    df.plot(kind=kind)
-
-        with tm.RNGContext(42):
-            # area plot doesn't support positive/negative mixed data
-            df = DataFrame(np.random.rand(10, 2), dtype=object)
-            df[np.random.rand(df.shape[0]) > 0.5] = "a"
-            with pytest.raises(TypeError, match="no numeric data to plot"):
-                df.plot(kind="area")
+        df = DataFrame(np.random.RandomState(42).randn(10, 2), dtype=object)
+        df[np.random.rand(df.shape[0]) > 0.5] = "a"
+        for kind in plotting.PlotAccessor._common_kinds:
+            msg = "no numeric data to plot"
+            with pytest.raises(TypeError, match=msg):
+                df.plot(kind=kind)
+
+        # area plot doesn't support positive/negative mixed data
+        df = DataFrame(np.random.RandomState(42).rand(10, 2), dtype=object)
+        df[np.random.rand(df.shape[0]) > 0.5] = "a"
+        with pytest.raises(TypeError, match="no numeric data to plot"):
+            df.plot(kind="area")
 
     def test_invalid_kind(self):
         df = DataFrame(np.random.randn(10, 2))
diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py
@@ -362,8 +362,7 @@ def test_grouped_plot_fignums(self):
         n = 10
         weight = Series(np.random.normal(166, 20, size=n))
         height = Series(np.random.normal(60, 10, size=n))
-        with tm.RNGContext(42):
-            gender = np.random.choice(["male", "female"], size=n)
+        gender = np.random.RandomState(42).choice(["male", "female"], size=n)
         df = DataFrame({"height": height, "weight": weight, "gender": gender})
         gb = df.groupby("gender")
 
diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py
@@ -21,8 +21,7 @@ def test_series_groupby_plotting_nominally_works(self):
         n = 10
         weight = Series(np.random.normal(166, 20, size=n))
         height = Series(np.random.normal(60, 10, size=n))
-        with tm.RNGContext(42):
-            gender = np.random.choice(["male", "female"], size=n)
+        gender = np.random.RandomState(42).choice(["male", "female"], size=n)
 
         weight.groupby(gender).plot()
         tm.close()
diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py
@@ -510,8 +510,9 @@ def test_hist_df_kwargs(self):
 
     def test_hist_df_with_nonnumerics(self):
         # GH 9853
-        with tm.RNGContext(1):
-            df = DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"])
+        df = DataFrame(
+            np.random.RandomState(42).randn(10, 4), columns=["A", "B", "C", "D"]
+        )
         df["E"] = ["x", "y"] * 5
         _, ax = self.plt.subplots()
         ax = df.plot.hist(bins=5, ax=ax)
@@ -665,8 +666,7 @@ def test_grouped_hist_legacy2(self):
         n = 10
         weight = Series(np.random.normal(166, 20, size=n))
         height = Series(np.random.normal(60, 10, size=n))
-        with tm.RNGContext(42):
-            gender_int = np.random.choice([0, 1], size=n)
+        gender_int = np.random.RandomState(42).choice([0, 1], size=n)
         df_int = DataFrame({"height": height, "weight": weight, "gender": gender_int})
         gb = df_int.groupby("gender")
         axes = gb.hist()
diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py
@@ -102,8 +102,7 @@ def test_scatter_matrix_axis(self, pass_axis):
         if pass_axis:
             _, ax = self.plt.subplots(3, 3)
 
-        with tm.RNGContext(42):
-            df = DataFrame(np.random.randn(100, 3))
+        df = DataFrame(np.random.RandomState(42).randn(100, 3))
 
         # we are plotting multiples on a sub-plot
         with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py
@@ -58,18 +58,6 @@ def test_datapath(datapath):
     assert result == expected
 
 
-def test_rng_context():
-    import numpy as np
-
-    expected0 = 1.764052345967664
-    expected1 = 1.6243453636632417
-
-    with tm.RNGContext(0):
-        with tm.RNGContext(1):
-            assert np.random.randn() == expected1
-        assert np.random.randn() == expected0
-
-
 def test_external_error_raised():
     with tm.external_error_raised(TypeError):
         raise TypeError("Should not check this error message, so it will pass")