pandas-dev
diff --git a/‎.pre-commit-config.yaml
Lines changed: 3 additions & 0 deletions b/‎.pre-commit-config.yaml
Lines changed: 3 additions & 0 deletions
diff --git a/‎pandas/_testing/__init__.py
Lines changed: 15 additions & 41 deletions b/‎pandas/_testing/__init__.py
Lines changed: 15 additions & 41 deletions
diff --git a/‎pandas/_testing/_random.py
Lines changed: 3 additions & 2 deletions b/‎pandas/_testing/_random.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎pandas/conftest.py
Lines changed: 7 additions & 5 deletions b/‎pandas/conftest.py
Lines changed: 7 additions & 5 deletions
diff --git a/‎pandas/tests/apply/test_frame_apply.py
Lines changed: 12 additions & 8 deletions b/‎pandas/tests/apply/test_frame_apply.py
Lines changed: 12 additions & 8 deletions
diff --git a/‎pandas/tests/apply/test_invalid_arg.py
Lines changed: 4 additions & 4 deletions b/‎pandas/tests/apply/test_invalid_arg.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎pandas/tests/apply/test_series_apply.py
Lines changed: 1 addition & 1 deletion b/‎pandas/tests/apply/test_series_apply.py
Lines changed: 1 addition & 1 deletion
@@ -251,6 +251,9 @@ repos:
 
             # os.remove
             |os\.remove
+
+            # Unseeded numpy default_rng
+            |default_rng\(\)
         files: ^pandas/tests/
         types_or: [python, cython, rst]
     -   id: unwanted-patterns-in-ea-tests
 
@@ -391,9 +391,9 @@ def makeNumericIndex(k: int = 10, *, name=None, dtype: Dtype | None) -> Index:
         if is_unsigned_integer_dtype(dtype):
             values += 2 ** (dtype.itemsize * 8 - 1)
     elif dtype.kind == "f":
-        values = np.random.random_sample(k) - np.random.random_sample(1)
+        values = np.random.default_rng(2).random(k) - np.random.default_rng(2).random(1)
         values.sort()
-        values = values * (10 ** np.random.randint(0, 9))
+        values = values * (10 ** np.random.default_rng(2).integers(0, 9))
     else:
         raise NotImplementedError(f"wrong dtype {dtype}")
 
@@ -487,7 +487,7 @@ def all_timeseries_index_generator(k: int = 10) -> Iterable[Index]:
 # make series
 def make_rand_series(name=None, dtype=np.float64) -> Series:
     index = makeStringIndex(_N)
-    data = np.random.randn(_N)
+    data = np.random.default_rng(2).standard_normal(_N)
     with np.errstate(invalid="ignore"):
         data = data.astype(dtype, copy=False)
     return Series(data, index=index, name=name)
@@ -510,21 +510,30 @@ def makeObjectSeries(name=None) -> Series:
 
 def getSeriesData() -> dict[str, Series]:
     index = makeStringIndex(_N)
-    return {c: Series(np.random.randn(_N), index=index) for c in getCols(_K)}
+    return {
+        c: Series(np.random.default_rng(i).standard_normal(_N), index=index)
+        for i, c in enumerate(getCols(_K))
+    }
 
 
 def makeTimeSeries(nper=None, freq: Frequency = "B", name=None) -> Series:
     if nper is None:
         nper = _N
     return Series(
-        np.random.randn(nper), index=makeDateIndex(nper, freq=freq), name=name
+        np.random.default_rng(2).standard_normal(nper),
+        index=makeDateIndex(nper, freq=freq),
+        name=name,
     )
 
 
 def makePeriodSeries(nper=None, name=None) -> Series:
     if nper is None:
         nper = _N
-    return Series(np.random.randn(nper), index=makePeriodIndex(nper), name=name)
+    return Series(
+        np.random.default_rng(2).standard_normal(nper),
+        index=makePeriodIndex(nper),
+        name=name,
+    )
 
 
 def getTimeSeriesData(nper=None, freq: Frequency = "B") -> dict[str, Series]:
@@ -787,40 +796,6 @@ def makeCustomDataframe(
     return DataFrame(data, index, columns, dtype=dtype)
 
 
-def _create_missing_idx(nrows, ncols, density: float, random_state=None):
-    if random_state is None:
-        random_state = np.random
-    else:
-        random_state = np.random.RandomState(random_state)
-
-    # below is cribbed from scipy.sparse
-    size = round((1 - density) * nrows * ncols)
-    # generate a few more to ensure unique values
-    min_rows = 5
-    fac = 1.02
-    extra_size = min(size + min_rows, fac * size)
-
-    def _gen_unique_rand(rng, _extra_size):
-        ind = rng.rand(int(_extra_size))
-        return np.unique(np.floor(ind * nrows * ncols))[:size]
-
-    ind = _gen_unique_rand(random_state, extra_size)
-    while ind.size < size:
-        extra_size *= 1.05
-        ind = _gen_unique_rand(random_state, extra_size)
-
-    j = np.floor(ind * 1.0 / nrows).astype(int)
-    i = (ind - j * nrows).astype(int)
-    return i.tolist(), j.tolist()
-
-
-def makeMissingDataframe(density: float = 0.9, random_state=None) -> DataFrame:
-    df = makeDataFrame()
-    i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state)
-    df.iloc[i, j] = np.nan
-    return df
-
-
 class SubclassedSeries(Series):
     _metadata = ["testattr", "name"]
 
@@ -1131,7 +1106,6 @@ def shares_memory(left, right) -> bool:
     "makeFloatSeries",
     "makeIntervalIndex",
     "makeIntIndex",
-    "makeMissingDataframe",
     "makeMixedDataFrame",
     "makeMultiIndex",
     "makeNumericIndex",
 
@@ -17,7 +17,8 @@ def rands_array(
     Generate an array of byte strings.
     """
     retval = (
-        np.random.choice(RANDS_CHARS, size=nchars * np.prod(size), replace=replace)
+        np.random.default_rng(2)
+        .choice(RANDS_CHARS, size=nchars * np.prod(size), replace=replace)
         .view((np.str_, nchars))
         .reshape(size)
     )
@@ -31,4 +32,4 @@ def rands(nchars) -> str:
     See `rands_array` if you want to create an array of random strings.
 
     """
-    return "".join(np.random.choice(RANDS_CHARS, nchars))
+    return "".join(np.random.default_rng(2).choice(RANDS_CHARS, nchars))
@@ -561,7 +561,9 @@ def multiindex_dataframe_random_data(
     """DataFrame with 2 level MultiIndex with random data"""
     index = lexsorted_two_level_string_multiindex
     return DataFrame(
-        np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp")
+        np.random.default_rng(2).standard_normal((10, 3)),
+        index=index,
+        columns=Index(["A", "B", "C"], name="exp"),
     )
 
 
@@ -614,7 +616,7 @@ def _create_mi_with_dt64tz_level():
     "float32": tm.makeFloatIndex(100, dtype="float32"),
     "float64": tm.makeFloatIndex(100, dtype="float64"),
     "bool-object": tm.makeBoolIndex(10).astype(object),
-    "bool-dtype": Index(np.random.randn(10) < 0),
+    "bool-dtype": Index(np.random.default_rng(2).standard_normal(10) < 0),
     "complex64": tm.makeNumericIndex(100, dtype="float64").astype("complex64"),
     "complex128": tm.makeNumericIndex(100, dtype="float64").astype("complex128"),
     "categorical": tm.makeCategoricalIndex(100),
@@ -744,7 +746,7 @@ def datetime_series() -> Series:
 def _create_series(index):
     """Helper for the _series dict"""
     size = len(index)
-    data = np.random.randn(size)
+    data = np.random.default_rng(2).standard_normal(size)
     return Series(data, index=index, name="a", copy=False)
 
 
@@ -773,7 +775,7 @@ def series_with_multilevel_index() -> Series:
     ]
     tuples = zip(*arrays)
     index = MultiIndex.from_tuples(tuples)
-    data = np.random.randn(8)
+    data = np.random.default_rng(2).standard_normal(8)
     ser = Series(data, index=index)
     ser.iloc[3] = np.NaN
     return ser
@@ -946,7 +948,7 @@ def rand_series_with_duplicate_datetimeindex() -> Series:
         datetime(2000, 1, 5),
     ]
 
-    return Series(np.random.randn(len(dates)), index=dates)
+    return Series(np.random.default_rng(2).standard_normal(len(dates)), index=dates)
 
 
 # ----------------------------------------------------------------
 
@@ -376,7 +376,7 @@ def test_apply_reduce_to_dict():
 
 
 def test_apply_differently_indexed():
-    df = DataFrame(np.random.randn(20, 10))
+    df = DataFrame(np.random.default_rng(2).standard_normal((20, 10)))
 
     result = df.apply(Series.describe, axis=0)
     expected = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns)
@@ -463,9 +463,9 @@ def test_apply_convert_objects():
                 "shiny",
                 "shiny",
             ],
-            "D": np.random.randn(11),
-            "E": np.random.randn(11),
-            "F": np.random.randn(11),
+            "D": np.random.default_rng(2).standard_normal(11),
+            "E": np.random.default_rng(2).standard_normal(11),
+            "F": np.random.default_rng(2).standard_normal(11),
         }
     )
 
@@ -659,7 +659,7 @@ def test_apply_category_equalness(val):
 def test_infer_row_shape():
     # GH 17437
     # if row shape is changing, infer it
-    df = DataFrame(np.random.rand(10, 2))
+    df = DataFrame(np.random.default_rng(2).random((10, 2)))
     result = df.apply(np.fft.fft, axis=0).shape
     assert result == (10, 2)
 
@@ -816,7 +816,7 @@ def test_with_listlike_columns():
     # GH 17348
     df = DataFrame(
         {
-            "a": Series(np.random.randn(4)),
+            "a": Series(np.random.default_rng(2).standard_normal(4)),
             "b": ["a", "list", "of", "words"],
             "ts": date_range("2016-10-01", periods=4, freq="H"),
         }
@@ -862,7 +862,9 @@ def test_infer_output_shape_columns():
 def test_infer_output_shape_listlike_columns():
     # GH 16353
 
-    df = DataFrame(np.random.randn(6, 3), columns=["A", "B", "C"])
+    df = DataFrame(
+        np.random.default_rng(2).standard_normal((6, 3)), columns=["A", "B", "C"]
+    )
 
     result = df.apply(lambda x: [1, 2, 3], axis=1)
     expected = Series([[1, 2, 3] for t in df.itertuples()])
@@ -911,7 +913,9 @@ def fun(x):
 def test_consistent_coerce_for_shapes(lst):
     # we want column names to NOT be propagated
     # just because the shape matches the input shape
-    df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"])
+    df = DataFrame(
+        np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"]
+    )
 
     result = df.apply(lambda x: lst, axis=1)
     expected = Series([lst for t in df.itertuples()])
 
@@ -93,7 +93,7 @@ def test_series_nested_renamer(renamer):
 
 def test_apply_dict_depr():
     tsdf = DataFrame(
-        np.random.randn(10, 3),
+        np.random.default_rng(2).standard_normal((10, 3)),
         columns=["A", "B", "C"],
         index=date_range("1/1/2000", periods=10),
     )
@@ -190,9 +190,9 @@ def test_apply_modify_traceback():
                 "shiny",
                 "shiny",
             ],
-            "D": np.random.randn(11),
-            "E": np.random.randn(11),
-            "F": np.random.randn(11),
+            "D": np.random.default_rng(2).standard_normal(11),
+            "E": np.random.default_rng(2).standard_normal(11),
+            "F": np.random.default_rng(2).standard_normal(11),
         }
     )
 
 
@@ -76,7 +76,7 @@ def f(x):
 
 @pytest.mark.parametrize("convert_dtype", [True, False])
 def test_apply_convert_dtype_deprecated(convert_dtype):
-    ser = Series(np.random.randn(10))
+    ser = Series(np.random.default_rng(2).standard_normal(10))
 
     def func(x):
         return x if x > 0 else np.nan
Original file line number	Diff line number	Diff line change
`@@ -93,7 +93,7 @@ def test_series_nested_renamer(renamer):`
`93`	`93`
`94`	`94`	`def test_apply_dict_depr():`
`95`	`95`	`tsdf = DataFrame(`
`96`		`- np.random.randn(10, 3),`
	`96`	`+ np.random.default_rng(2).standard_normal((10, 3)),`
`97`	`97`	`columns=["A", "B", "C"],`
`98`	`98`	`index=date_range("1/1/2000", periods=10),`
`99`	`99`	`)`
`@@ -190,9 +190,9 @@ def test_apply_modify_traceback():`
`190`	`190`	`"shiny",`
`191`	`191`	`"shiny",`
`192`	`192`	`],`
`193`		`- "D": np.random.randn(11),`
`194`		`- "E": np.random.randn(11),`
`195`		`- "F": np.random.randn(11),`
	`193`	`+ "D": np.random.default_rng(2).standard_normal(11),`
	`194`	`+ "E": np.random.default_rng(2).standard_normal(11),`
	`195`	`+ "F": np.random.default_rng(2).standard_normal(11),`
`196`	`196`	`}`
`197`	`197`	`)`
`198`	`198`