@@ -4297,8 +4297,8 @@ def sample(self, n=None, frac=None, replace=False, weights=None,
4297
4297
Default = 1 if `frac` = None.
4298
4298
frac : float, optional
4299
4299
Fraction of axis items to return. Cannot be used with `n`.
4300
- replace : boolean, optional
4301
- Sample with or without replacement. Default = False.
4300
+ replace : bool, default False
4301
+ Sample with or without replacement.
4302
4302
weights : str or ndarray-like, optional
4303
4303
Default 'None' results in equal probability weighting.
4304
4304
If passed a Series, will align with target object on index. Index
@@ -4311,7 +4311,7 @@ def sample(self, n=None, frac=None, replace=False, weights=None,
4311
4311
being sampled.
4312
4312
If weights do not sum to 1, they will be normalized to sum to 1.
4313
4313
Missing values in the weights column will be treated as zero.
4314
- inf and -inf values not allowed.
4314
+ Infinite values not allowed.
4315
4315
random_state : int or numpy.random.RandomState, optional
4316
4316
Seed for the random number generator (if int), or numpy RandomState
4317
4317
object.
@@ -4321,58 +4321,52 @@ def sample(self, n=None, frac=None, replace=False, weights=None,
4321
4321
4322
4322
Returns
4323
4323
-------
4324
- A new object of same type as caller.
4324
+ Series or DataFrame
4325
+ A new object of same type as caller containing `n` items randomly
4326
+ sampled from the caller object.
4325
4327
4326
- Examples
4328
+ See Also
4327
4329
--------
4328
- Generate an example ``Series`` and ``DataFrame``:
4329
-
4330
- >>> s = pd.Series(np.random.randn(50))
4331
- >>> s.head()
4332
- 0 -0.038497
4333
- 1 1.820773
4334
- 2 -0.972766
4335
- 3 -1.598270
4336
- 4 -1.095526
4337
- dtype: float64
4338
- >>> df = pd.DataFrame(np.random.randn(50, 4), columns=list('ABCD'))
4339
- >>> df.head()
4340
- A B C D
4341
- 0 0.016443 -2.318952 -0.566372 -1.028078
4342
- 1 -1.051921 0.438836 0.658280 -0.175797
4343
- 2 -1.243569 -0.364626 -0.215065 0.057736
4344
- 3 1.768216 0.404512 -0.385604 -1.457834
4345
- 4 1.072446 -1.137172 0.314194 -0.046661
4346
-
4347
- Next extract a random sample from both of these objects...
4330
+ numpy.random.choice: Generates a random sample from a given 1-D numpy
4331
+ array.
4348
4332
4349
- 3 random elements from the ``Series``:
4350
-
4351
- >>> s.sample(n=3)
4352
- 27 -0.994689
4353
- 55 -1.049016
4354
- 67 -0.224565
4355
- dtype: float64
4356
-
4357
- And a random 10% of the ``DataFrame`` with replacement:
4358
-
4359
- >>> df.sample(frac=0.1, replace=True)
4360
- A B C D
4361
- 35 1.981780 0.142106 1.817165 -0.290805
4362
- 49 -1.336199 -0.448634 -0.789640 0.217116
4363
- 40 0.823173 -0.078816 1.009536 1.015108
4364
- 15 1.421154 -0.055301 -1.922594 -0.019696
4365
- 6 -0.148339 0.832938 1.787600 -1.383767
4366
-
4367
- You can use `random state` for reproducibility:
4368
-
4369
- >>> df.sample(random_state=1)
4370
- A B C D
4371
- 37 -2.027662 0.103611 0.237496 -0.165867
4372
- 43 -0.259323 -0.583426 1.516140 -0.479118
4373
- 12 -1.686325 -0.579510 0.985195 -0.460286
4374
- 8 1.167946 0.429082 1.215742 -1.636041
4375
- 9 1.197475 -0.864188 1.554031 -1.505264
4333
+ Examples
4334
+ --------
4335
+ >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0],
4336
+ ... 'num_wings': [2, 0, 0, 0],
4337
+ ... 'num_specimen_seen': [10, 2, 1, 8]},
4338
+ ... index=['falcon', 'dog', 'spider', 'fish'])
4339
+ >>> df
4340
+ num_legs num_wings num_specimen_seen
4341
+ falcon 2 2 10
4342
+ dog 4 0 2
4343
+ spider 8 0 1
4344
+ fish 0 0 8
4345
+
4346
+ Extract 3 random elements from the ``Series`` ``df['num_legs']``:
4347
+ Note that we use `random_state` to ensure the reproducibility of
4348
+ the examples.
4349
+
4350
+ >>> df['num_legs'].sample(n=3, random_state=1)
4351
+ fish 0
4352
+ spider 8
4353
+ falcon 2
4354
+ Name: num_legs, dtype: int64
4355
+
4356
+ A random 50% sample of the ``DataFrame`` with replacement:
4357
+
4358
+ >>> df.sample(frac=0.5, replace=True, random_state=1)
4359
+ num_legs num_wings num_specimen_seen
4360
+ dog 4 0 2
4361
+ fish 0 0 8
4362
+
4363
+ Using a DataFrame column as weights. Rows with larger value in the
4364
+ `num_specimen_seen` column are more likely to be sampled.
4365
+
4366
+ >>> df.sample(n=2, weights='num_specimen_seen', random_state=1)
4367
+ num_legs num_wings num_specimen_seen
4368
+ falcon 2 2 10
4369
+ fish 0 0 8
4376
4370
"""
4377
4371
4378
4372
if axis is None :
0 commit comments