DOC: Fix DataFrame.sample doctests and reformat the docstring (#22937)

Moisan · datapythonista · commit ce1f81f8bcf0 · 2018-10-08T10:01:20.000+05:30
diff --git a/ci/doctests.sh b/ci/doctests.sh
@@ -35,7 +35,7 @@ if [ "$DOCTEST" ]; then
     fi
 
     pytest --doctest-modules -v pandas/core/generic.py \
-        -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -transpose -values -xs"
+        -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -to_json -transpose -values -xs"
 
     if [ $? -ne "0" ]; then
         RET=1
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -4297,8 +4297,8 @@ def sample(self, n=None, frac=None, replace=False, weights=None,
             Default = 1 if `frac` = None.
         frac : float, optional
             Fraction of axis items to return. Cannot be used with `n`.
-        replace : boolean, optional
-            Sample with or without replacement. Default = False.
+        replace : bool, default False
+            Sample with or without replacement.
         weights : str or ndarray-like, optional
             Default 'None' results in equal probability weighting.
             If passed a Series, will align with target object on index. Index
@@ -4311,7 +4311,7 @@ def sample(self, n=None, frac=None, replace=False, weights=None,
             being sampled.
             If weights do not sum to 1, they will be normalized to sum to 1.
             Missing values in the weights column will be treated as zero.
-            inf and -inf values not allowed.
+            Infinite values not allowed.
         random_state : int or numpy.random.RandomState, optional
             Seed for the random number generator (if int), or numpy RandomState
             object.
@@ -4321,58 +4321,52 @@ def sample(self, n=None, frac=None, replace=False, weights=None,
 
         Returns
         -------
-        A new object of same type as caller.
+        Series or DataFrame
+            A new object of same type as caller containing `n` items randomly
+            sampled from the caller object.
 
-        Examples
+        See Also
         --------
-        Generate an example ``Series`` and ``DataFrame``:
-
-        >>> s = pd.Series(np.random.randn(50))
-        >>> s.head()
-        0   -0.038497
-        1    1.820773
-        2   -0.972766
-        3   -1.598270
-        4   -1.095526
-        dtype: float64
-        >>> df = pd.DataFrame(np.random.randn(50, 4), columns=list('ABCD'))
-        >>> df.head()
-                  A         B         C         D
-        0  0.016443 -2.318952 -0.566372 -1.028078
-        1 -1.051921  0.438836  0.658280 -0.175797
-        2 -1.243569 -0.364626 -0.215065  0.057736
-        3  1.768216  0.404512 -0.385604 -1.457834
-        4  1.072446 -1.137172  0.314194 -0.046661
-
-        Next extract a random sample from both of these objects...
+        numpy.random.choice: Generates a random sample from a given 1-D numpy
+            array.
 
-        3 random elements from the ``Series``:
-
-        >>> s.sample(n=3)
-        27   -0.994689
-        55   -1.049016
-        67   -0.224565
-        dtype: float64
-
-        And a random 10% of the ``DataFrame`` with replacement:
-
-        >>> df.sample(frac=0.1, replace=True)
-                   A         B         C         D
-        35  1.981780  0.142106  1.817165 -0.290805
-        49 -1.336199 -0.448634 -0.789640  0.217116
-        40  0.823173 -0.078816  1.009536  1.015108
-        15  1.421154 -0.055301 -1.922594 -0.019696
-        6  -0.148339  0.832938  1.787600 -1.383767
-
-        You can use `random state` for reproducibility:
-
-        >>> df.sample(random_state=1)
-        A         B         C         D
-        37 -2.027662  0.103611  0.237496 -0.165867
-        43 -0.259323 -0.583426  1.516140 -0.479118
-        12 -1.686325 -0.579510  0.985195 -0.460286
-        8   1.167946  0.429082  1.215742 -1.636041
-        9   1.197475 -0.864188  1.554031 -1.505264
+        Examples
+        --------
+        >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0],
+        ...                    'num_wings': [2, 0, 0, 0],
+        ...                    'num_specimen_seen': [10, 2, 1, 8]},
+        ...                   index=['falcon', 'dog', 'spider', 'fish'])
+        >>> df
+                num_legs  num_wings  num_specimen_seen
+        falcon         2          2                 10
+        dog            4          0                  2
+        spider         8          0                  1
+        fish           0          0                  8
+
+        Extract 3 random elements from the ``Series`` ``df['num_legs']``:
+        Note that we use `random_state` to ensure the reproducibility of
+        the examples.
+
+        >>> df['num_legs'].sample(n=3, random_state=1)
+        fish      0
+        spider    8
+        falcon    2
+        Name: num_legs, dtype: int64
+
+        A random 50% sample of the ``DataFrame`` with replacement:
+
+        >>> df.sample(frac=0.5, replace=True, random_state=1)
+              num_legs  num_wings  num_specimen_seen
+        dog          4          0                  2
+        fish         0          0                  8
+
+        Using a DataFrame column as weights. Rows with larger value in the
+        `num_specimen_seen` column are more likely to be sampled.
+
+        >>> df.sample(n=2, weights='num_specimen_seen', random_state=1)
+                num_legs  num_wings  num_specimen_seen
+        falcon         2          2                 10
+        fish           0          0                  8
         """
 
         if axis is None: