DOC: update the pandas.DataFrame.plot.density docstring (pandas-dev#20236)

jonas-schulze · TomAugspurger · commit 0780193b8f99 · 2018-03-11T16:37:37.000-05:00
* DOC: update the pandas.DataFrame.plot.kde and pandas.Series.plot.kde docstrings

Unfortunately, I was not able to compute a kernel estimate of a
two-dimensional random variable. Hence, the example is more of an
analysis of some independent data series.

* DOC: extract similarities of kde docstrings

The `DataFrame.plot.kde` and `Series.plot.kde` now use a common
docstring, for which the differences are inserted.
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
@@ -1380,6 +1380,50 @@ def orientation(self):
             return 'vertical'
 
 
+_kde_docstring = """
+        Generate Kernel Density Estimate plot using Gaussian kernels.
+
+        In statistics, `kernel density estimation`_ (KDE) is a non-parametric
+        way to estimate the probability density function (PDF) of a random
+        variable. This function uses Gaussian kernels and includes automatic
+        bandwith determination.
+
+        .. _kernel density estimation:
+            https://en.wikipedia.org/wiki/Kernel_density_estimation
+
+        Parameters
+        ----------
+        bw_method : str, scalar or callable, optional
+            The method used to calculate the estimator bandwidth. This can be
+            'scott', 'silverman', a scalar constant or a callable.
+            If None (default), 'scott' is used.
+            See :class:`scipy.stats.gaussian_kde` for more information.
+        ind : NumPy array or integer, optional
+            Evaluation points for the estimated PDF. If None (default),
+            1000 equally spaced points are used. If `ind` is a NumPy array, the
+            KDE is evaluated at the points passed. If `ind` is an integer,
+            `ind` number of equally spaced points are used.
+        **kwds : optional
+            Additional keyword arguments are documented in
+            :meth:`pandas.%(this-datatype)s.plot`.
+
+        Returns
+        -------
+        axes : matplotlib.AxesSubplot or np.array of them
+
+        See Also
+        --------
+        scipy.stats.gaussian_kde : Representation of a kernel-density
+            estimate using Gaussian kernels. This is the function used
+            internally to estimate the PDF.
+        %(sibling-datatype)s.plot.kde : Generate a KDE plot for a
+            %(sibling-datatype)s.
+
+        Examples
+        --------
+        %(examples)s
+        """
+
 class KdePlot(HistPlot):
     _kind = 'kde'
     orientation = 'vertical'
@@ -2616,45 +2660,12 @@ def hist(self, bins=10, **kwds):
         """
         return self(kind='hist', bins=bins, **kwds)
 
-    def kde(self, bw_method=None, ind=None, **kwds):
-        """
-        Kernel Density Estimate plot using Gaussian kernels.
-
-        In statistics, kernel density estimation (KDE) is a non-parametric way
-        to estimate the probability density function (PDF) of a random
-        variable. This function uses Gaussian kernels and includes automatic
-        bandwith determination.
-
-        Parameters
-        ----------
-        bw_method : str, scalar or callable, optional
-            The method used to calculate the estimator bandwidth. This can be
-            'scott', 'silverman', a scalar constant or a callable.
-            If None (default), 'scott' is used.
-            See :class:`scipy.stats.gaussian_kde` for more information.
-        ind : NumPy array or integer, optional
-            Evaluation points for the estimated PDF. If None (default),
-            1000 equally spaced points are used. If `ind` is a NumPy array, the
-            kde is evaluated at the points passed. If `ind` is an integer,
-            `ind` number of equally spaced points are used.
-        kwds : optional
-            Additional keyword arguments are documented in
-            :meth:`pandas.Series.plot`.
-
-        Returns
-        -------
-        axes : matplotlib.AxesSubplot or np.array of them
-
-        See also
-        --------
-        scipy.stats.gaussian_kde : Representation of a kernel-density
-            estimate using Gaussian kernels. This is the function used
-            internally to estimate the PDF.
-
-        Examples
-        --------
+    @Appender(_kde_docstring % {
+        'this-datatype': 'Series',
+        'sibling-datatype': 'DataFrame',
+        'examples': """
         Given a Series of points randomly sampled from an unknown
-        distribution, estimate this distribution using KDE with automatic
+        distribution, estimate its PDF using KDE with automatic
         bandwidth determination and plot the results, evaluating them at
         1000 equally spaced points (default):
 
@@ -2664,10 +2675,9 @@ def kde(self, bw_method=None, ind=None, **kwds):
             >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])
             >>> ax = s.plot.kde()
 
-
-        An scalar fixed bandwidth can be specified. Using a too small bandwidth
-        can lead to overfitting, while a too large bandwidth can result in
-        underfitting:
+        A scalar bandwidth can be specified. Using a small bandwidth value can
+        lead to overfitting, while using a large bandwidth value may result
+        in underfitting:
 
         .. plot::
             :context: close-figs
@@ -2686,7 +2696,9 @@ def kde(self, bw_method=None, ind=None, **kwds):
             :context: close-figs
 
             >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])
-        """
+        """.strip()
+    })
+    def kde(self, bw_method=None, ind=None, **kwds):
         return self(kind='kde', bw_method=bw_method, ind=ind, **kwds)
 
     density = kde
@@ -2849,30 +2861,48 @@ def hist(self, by=None, bins=10, **kwds):
         """
         return self(kind='hist', by=by, bins=bins, **kwds)
 
-    def kde(self, bw_method=None, ind=None, **kwds):
-        """
-        Kernel Density Estimate plot
+    @Appender(_kde_docstring % {
+        'this-datatype': 'DataFrame',
+        'sibling-datatype': 'Series',
+        'examples': """
+        Given several Series of points randomly sampled from unknown
+        distributions, estimate their PDFs using KDE with automatic
+        bandwidth determination and plot the results, evaluating them at
+        1000 equally spaced points (default):
 
-        Parameters
-        ----------
-        bw_method: str, scalar or callable, optional
-            The method used to calculate the estimator bandwidth.  This can be
-            'scott', 'silverman', a scalar constant or a callable.
-            If None (default), 'scott' is used.
-            See :class:`scipy.stats.gaussian_kde` for more information.
-        ind : NumPy array or integer, optional
-            Evaluation points. If None (default), 1000 equally spaced points
-            are used. If `ind` is a NumPy array, the kde is evaluated at the
-            points passed. If `ind` is an integer, `ind` number of equally
-            spaced points are used.
-        `**kwds` : optional
-            Additional keyword arguments are documented in
-            :meth:`pandas.DataFrame.plot`.
+        .. plot::
+            :context: close-figs
 
-        Returns
-        -------
-        axes : matplotlib.AxesSubplot or np.array of them
-        """
+            >>> df = pd.DataFrame({
+            ...     'x': [1, 2, 2.5, 3, 3.5, 4, 5],
+            ...     'y': [4, 4, 4.5, 5, 5.5, 6, 6],
+            ... })
+            >>> ax = df.plot.kde()
+
+        A scalar bandwidth can be specified. Using a small bandwidth value can
+        lead to overfitting, while using a large bandwidth value may result
+        in underfitting:
+
+        .. plot::
+            :context: close-figs
+
+            >>> ax = df.plot.kde(bw_method=0.3)
+
+        .. plot::
+            :context: close-figs
+
+            >>> ax = df.plot.kde(bw_method=3)
+
+        Finally, the `ind` parameter determines the evaluation points for the
+        plot of the estimated PDF:
+
+        .. plot::
+            :context: close-figs
+
+            >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
+        """.strip()
+    })
+    def kde(self, bw_method=None, ind=None, **kwds):
         return self(kind='kde', bw_method=bw_method, ind=ind, **kwds)
 
     density = kde