@@ -2604,26 +2604,73 @@ def hist(self, bins=10, **kwds):
2604
2604
2605
2605
def kde (self , bw_method = None , ind = None , ** kwds ):
2606
2606
"""
2607
- Kernel Density Estimate plot
2607
+ Kernel Density Estimate plot using Gaussian kernels.
2608
+
2609
+ In statistics, kernel density estimation (KDE) is a non-parametric way
2610
+ to estimate the probability density function (PDF) of a random
2611
+ variable. This function uses Gaussian kernels and includes automatic
2612
+ bandwith determination.
2608
2613
2609
2614
Parameters
2610
2615
----------
2611
- bw_method: str, scalar or callable, optional
2612
- The method used to calculate the estimator bandwidth. This can be
2616
+ bw_method : str, scalar or callable, optional
2617
+ The method used to calculate the estimator bandwidth. This can be
2613
2618
'scott', 'silverman', a scalar constant or a callable.
2614
2619
If None (default), 'scott' is used.
2615
2620
See :class:`scipy.stats.gaussian_kde` for more information.
2616
2621
ind : NumPy array or integer, optional
2617
- Evaluation points. If None (default), 1000 equally spaced points
2618
- are used. If `ind` is a NumPy array, the kde is evaluated at the
2619
- points passed. If `ind` is an integer, `ind` number of equally
2620
- spaced points are used.
2621
- `** kwds` : optional
2622
+ Evaluation points for the estimated PDF . If None (default),
2623
+ 1000 equally spaced points are used. If `ind` is a NumPy array, the
2624
+ kde is evaluated at the points passed. If `ind` is an integer,
2625
+ `ind` number of equally spaced points are used.
2626
+ kwds : optional
2622
2627
Keyword arguments to pass on to :py:meth:`pandas.Series.plot`.
2623
2628
2624
2629
Returns
2625
2630
-------
2626
2631
axes : matplotlib.AxesSubplot or np.array of them
2632
+
2633
+ See also
2634
+ --------
2635
+ :class:`scipy.stats.gaussian_kde` : Representation of a kernel-density
2636
+ estimate using Gaussian kernels. This is the function used
2637
+ internally to estimate the PDF.
2638
+
2639
+ Examples
2640
+ --------
2641
+ Given a ``Series`` of points randomly sampled from an unknown
2642
+ distribution, estimate this distribution using KDE with automatic
2643
+ bandwidth determination and plot the results, evaluating them at
2644
+ 1000 equally spaced points (default):
2645
+
2646
+ .. plot::
2647
+ :context: close-figs
2648
+
2649
+ >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])
2650
+ >>> p = s.plot.kde()
2651
+
2652
+
2653
+ An scalar fixed bandwidth can be specified. Using a too small bandwidth
2654
+ can lead to overfitting, while a too large bandwidth can result in
2655
+ underfitting:
2656
+
2657
+ .. plot::
2658
+ :context: close-figs
2659
+
2660
+ >>> p = s.plot.kde(bw_method=3)
2661
+
2662
+ .. plot::
2663
+ :context: close-figs
2664
+
2665
+ >>> p = s.plot.kde(bw_method=0.3)
2666
+
2667
+ Finally, the `ind` parameter determines the evaluation points for the
2668
+ plot of the estimated PDF:
2669
+
2670
+ .. plot::
2671
+ :context: close-figs
2672
+
2673
+ >>> p = s.plot.kde(ind=[1, 2, 3, 4, 5])
2627
2674
"""
2628
2675
return self (kind = 'kde' , bw_method = bw_method , ind = ind , ** kwds )
2629
2676
0 commit comments