-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
amend sample to return copy and align weight axis #10738
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -374,7 +374,7 @@ def test_sample(self): | |
|
||
self._compare(o.sample(frac=0.7,random_state=np.random.RandomState(test)), | ||
o.sample(frac=0.7, random_state=np.random.RandomState(test))) | ||
|
||
|
||
# Check for error when random_state argument invalid. | ||
with tm.assertRaises(ValueError): | ||
|
@@ -415,6 +415,10 @@ def test_sample(self): | |
bad_weights = [0.5]*11 | ||
o.sample(n=3, weights=bad_weights) | ||
|
||
with tm.assertRaises(ValueError): | ||
bad_weight_series = Series([0,0,0.2]) | ||
o.sample(n=4, weights=bad_weight_series) | ||
|
||
# Check won't accept negative weights | ||
with tm.assertRaises(ValueError): | ||
bad_weights = [-0.1]*10 | ||
|
@@ -431,6 +435,16 @@ def test_sample(self): | |
weights_with_ninf[0] = -np.inf | ||
o.sample(n=3, weights=weights_with_ninf) | ||
|
||
# All zeros raises errors | ||
zero_weights = [0]*10 | ||
with tm.assertRaises(ValueError): | ||
o.sample(n=3, weights=zero_weights) | ||
|
||
# All missing weights | ||
nan_weights = [np.nan]*10 | ||
with tm.assertRaises(ValueError): | ||
o.sample(n=3, weights=nan_weights) | ||
|
||
|
||
# A few dataframe test with degenerate weights. | ||
easy_weight_list = [0]*10 | ||
|
@@ -496,7 +510,6 @@ def test_sample(self): | |
assert_frame_equal(df.sample(n=1, axis='index', weights=weight), | ||
df.iloc[5:6]) | ||
|
||
|
||
# Check out of range axis values | ||
with tm.assertRaises(ValueError): | ||
df.sample(n=1, axis=2) | ||
|
@@ -527,6 +540,26 @@ def test_sample(self): | |
assert_panel_equal(p.sample(n=3, random_state=42), p.sample(n=3, axis=1, random_state=42)) | ||
assert_frame_equal(df.sample(n=3, random_state=42), df.sample(n=3, axis=0, random_state=42)) | ||
|
||
# Test that function aligns weights with frame | ||
df = DataFrame({'col1':[5,6,7], 'col2':['a','b','c'], }, index = [9,5,3]) | ||
s = Series([1,0,0], index=[3,5,9]) | ||
assert_frame_equal(df.loc[[3]], df.sample(1, weights=s)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. test also where you have a missing indexer (so should be nan filled), e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added |
||
|
||
# Weights have index values to be dropped because not in | ||
# sampled DataFrame | ||
s2 = Series([0.001,0,10000], index=[3,5,10]) | ||
assert_frame_equal(df.loc[[3]], df.sample(1, weights=s2)) | ||
|
||
# Weights have empty values to be filed with zeros | ||
s3 = Series([0.01,0], index=[3,5]) | ||
assert_frame_equal(df.loc[[3]], df.sample(1, weights=s3)) | ||
|
||
# No overlap in weight and sampled DataFrame indices | ||
s4 = Series([1,0], index=[1,2]) | ||
with tm.assertRaises(ValueError): | ||
df.sample(1, weights=s4) | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback You had a comment here that I think I addressed but am not sure I quite understood. You said: "can you add a test where none of the weights are there (e.g. weights.sum == 0 so you raise), and also one that combines the passed weights and does the computation." I added simpler tests for weights summing to zero above (around line 438). Does that address your concern? If not, could you please expand on what you had in mind? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think its ok |
||
def test_size_compat(self): | ||
# GH8846 | ||
# size property should be defined | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
New simple tests for all zero or all np.nan weights.