Skip to content

Commit ce1f81f

Browse files
Moisandatapythonista
authored andcommitted
DOC: Fix DataFrame.sample doctests and reformat the docstring (#22937)
1 parent b0a2667 commit ce1f81f

File tree

2 files changed

+47
-53
lines changed

2 files changed

+47
-53
lines changed

ci/doctests.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ if [ "$DOCTEST" ]; then
3535
fi
3636

3737
pytest --doctest-modules -v pandas/core/generic.py \
38-
-k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -transpose -values -xs"
38+
-k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -to_json -transpose -values -xs"
3939

4040
if [ $? -ne "0" ]; then
4141
RET=1

pandas/core/generic.py

+46-52
Original file line numberDiff line numberDiff line change
@@ -4297,8 +4297,8 @@ def sample(self, n=None, frac=None, replace=False, weights=None,
42974297
Default = 1 if `frac` = None.
42984298
frac : float, optional
42994299
Fraction of axis items to return. Cannot be used with `n`.
4300-
replace : boolean, optional
4301-
Sample with or without replacement. Default = False.
4300+
replace : bool, default False
4301+
Sample with or without replacement.
43024302
weights : str or ndarray-like, optional
43034303
Default 'None' results in equal probability weighting.
43044304
If passed a Series, will align with target object on index. Index
@@ -4311,7 +4311,7 @@ def sample(self, n=None, frac=None, replace=False, weights=None,
43114311
being sampled.
43124312
If weights do not sum to 1, they will be normalized to sum to 1.
43134313
Missing values in the weights column will be treated as zero.
4314-
inf and -inf values not allowed.
4314+
Infinite values not allowed.
43154315
random_state : int or numpy.random.RandomState, optional
43164316
Seed for the random number generator (if int), or numpy RandomState
43174317
object.
@@ -4321,58 +4321,52 @@ def sample(self, n=None, frac=None, replace=False, weights=None,
43214321
43224322
Returns
43234323
-------
4324-
A new object of same type as caller.
4324+
Series or DataFrame
4325+
A new object of same type as caller containing `n` items randomly
4326+
sampled from the caller object.
43254327
4326-
Examples
4328+
See Also
43274329
--------
4328-
Generate an example ``Series`` and ``DataFrame``:
4329-
4330-
>>> s = pd.Series(np.random.randn(50))
4331-
>>> s.head()
4332-
0 -0.038497
4333-
1 1.820773
4334-
2 -0.972766
4335-
3 -1.598270
4336-
4 -1.095526
4337-
dtype: float64
4338-
>>> df = pd.DataFrame(np.random.randn(50, 4), columns=list('ABCD'))
4339-
>>> df.head()
4340-
A B C D
4341-
0 0.016443 -2.318952 -0.566372 -1.028078
4342-
1 -1.051921 0.438836 0.658280 -0.175797
4343-
2 -1.243569 -0.364626 -0.215065 0.057736
4344-
3 1.768216 0.404512 -0.385604 -1.457834
4345-
4 1.072446 -1.137172 0.314194 -0.046661
4346-
4347-
Next extract a random sample from both of these objects...
4330+
numpy.random.choice: Generates a random sample from a given 1-D numpy
4331+
array.
43484332
4349-
3 random elements from the ``Series``:
4350-
4351-
>>> s.sample(n=3)
4352-
27 -0.994689
4353-
55 -1.049016
4354-
67 -0.224565
4355-
dtype: float64
4356-
4357-
And a random 10% of the ``DataFrame`` with replacement:
4358-
4359-
>>> df.sample(frac=0.1, replace=True)
4360-
A B C D
4361-
35 1.981780 0.142106 1.817165 -0.290805
4362-
49 -1.336199 -0.448634 -0.789640 0.217116
4363-
40 0.823173 -0.078816 1.009536 1.015108
4364-
15 1.421154 -0.055301 -1.922594 -0.019696
4365-
6 -0.148339 0.832938 1.787600 -1.383767
4366-
4367-
You can use `random state` for reproducibility:
4368-
4369-
>>> df.sample(random_state=1)
4370-
A B C D
4371-
37 -2.027662 0.103611 0.237496 -0.165867
4372-
43 -0.259323 -0.583426 1.516140 -0.479118
4373-
12 -1.686325 -0.579510 0.985195 -0.460286
4374-
8 1.167946 0.429082 1.215742 -1.636041
4375-
9 1.197475 -0.864188 1.554031 -1.505264
4333+
Examples
4334+
--------
4335+
>>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0],
4336+
... 'num_wings': [2, 0, 0, 0],
4337+
... 'num_specimen_seen': [10, 2, 1, 8]},
4338+
... index=['falcon', 'dog', 'spider', 'fish'])
4339+
>>> df
4340+
num_legs num_wings num_specimen_seen
4341+
falcon 2 2 10
4342+
dog 4 0 2
4343+
spider 8 0 1
4344+
fish 0 0 8
4345+
4346+
Extract 3 random elements from the ``Series`` ``df['num_legs']``:
4347+
Note that we use `random_state` to ensure the reproducibility of
4348+
the examples.
4349+
4350+
>>> df['num_legs'].sample(n=3, random_state=1)
4351+
fish 0
4352+
spider 8
4353+
falcon 2
4354+
Name: num_legs, dtype: int64
4355+
4356+
A random 50% sample of the ``DataFrame`` with replacement:
4357+
4358+
>>> df.sample(frac=0.5, replace=True, random_state=1)
4359+
num_legs num_wings num_specimen_seen
4360+
dog 4 0 2
4361+
fish 0 0 8
4362+
4363+
Using a DataFrame column as weights. Rows with larger value in the
4364+
`num_specimen_seen` column are more likely to be sampled.
4365+
4366+
>>> df.sample(n=2, weights='num_specimen_seen', random_state=1)
4367+
num_legs num_wings num_specimen_seen
4368+
falcon 2 2 10
4369+
fish 0 0 8
43764370
"""
43774371

43784372
if axis is None:

0 commit comments

Comments
 (0)