From db3a9e8648d66a02a900d9f60d8b7892d1f85551 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sun, 1 Aug 2021 14:11:57 -0400 Subject: [PATCH 1/2] REGR: sample modifying weights inplace --- pandas/core/sample.py | 6 +++++- pandas/tests/frame/methods/test_sample.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/pandas/core/sample.py b/pandas/core/sample.py index e4bad22e8e43c..63b8789f3f551 100644 --- a/pandas/core/sample.py +++ b/pandas/core/sample.py @@ -63,7 +63,11 @@ def preprocess_weights(obj: FrameOrSeries, weights, axis: int) -> np.ndarray: if (weights < 0).any(): raise ValueError("weight vector many not include negative values") - weights[np.isnan(weights)] = 0 + missing = np.isnan(weights) + if missing.any(): + # Don't modify weights in place + weights = weights.copy() + weights[missing] = 0 return weights diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index 366722531329a..f65f7e495c53c 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -339,6 +339,24 @@ def test_sample_is_copy(self): with tm.assert_produces_warning(None): df2["d"] = 1 + def test_sample_does_not_modify_weights(self): + # GH-? + result = np.array([np.nan, 1, np.nan]) + expected = result.copy() + ser = Series([1, 2, 3]) + + # Test numpy array weights won't be modified in place + ser.sample(weights=result) + tm.assert_numpy_array_equal(result, expected) + + # Test DataFrame column won't be modified in place + df = DataFrame({"values": [1, 1, 1], "weights": [1, np.nan, np.nan]}) + expected = df["weights"].copy() + + df.sample(frac=1.0, replace=True, weights="weights") + result = df["weights"] + tm.assert_series_equal(result, expected) + def test_sample_ignore_index(self): # GH 38581 df = DataFrame( From 2d06c5651dd046e12e74f5457ae734af0289073a Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sun, 1 Aug 2021 14:14:40 -0400 Subject: [PATCH 2/2] Add issue ref --- pandas/tests/frame/methods/test_sample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index f65f7e495c53c..d5d1f975deefa 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -340,7 +340,7 @@ def test_sample_is_copy(self): df2["d"] = 1 def test_sample_does_not_modify_weights(self): - # GH-? + # GH-42843 result = np.array([np.nan, 1, np.nan]) expected = result.copy() ser = Series([1, 2, 3])