diff --git a/pandas/core/sample.py b/pandas/core/sample.py index e4bad22e8e43c..63b8789f3f551 100644 --- a/pandas/core/sample.py +++ b/pandas/core/sample.py @@ -63,7 +63,11 @@ def preprocess_weights(obj: FrameOrSeries, weights, axis: int) -> np.ndarray: if (weights < 0).any(): raise ValueError("weight vector many not include negative values") - weights[np.isnan(weights)] = 0 + missing = np.isnan(weights) + if missing.any(): + # Don't modify weights in place + weights = weights.copy() + weights[missing] = 0 return weights diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index 366722531329a..d5d1f975deefa 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -339,6 +339,24 @@ def test_sample_is_copy(self): with tm.assert_produces_warning(None): df2["d"] = 1 + def test_sample_does_not_modify_weights(self): + # GH-42843 + result = np.array([np.nan, 1, np.nan]) + expected = result.copy() + ser = Series([1, 2, 3]) + + # Test numpy array weights won't be modified in place + ser.sample(weights=result) + tm.assert_numpy_array_equal(result, expected) + + # Test DataFrame column won't be modified in place + df = DataFrame({"values": [1, 1, 1], "weights": [1, np.nan, np.nan]}) + expected = df["weights"].copy() + + df.sample(frac=1.0, replace=True, weights="weights") + result = df["weights"] + tm.assert_series_equal(result, expected) + def test_sample_ignore_index(self): # GH 38581 df = DataFrame(