@@ -2791,26 +2791,77 @@ def hist(self, by=None, bins=10, **kwds):
2791
2791
2792
2792
def kde (self , bw_method = None , ind = None , ** kwds ):
2793
2793
"""
2794
- Kernel Density Estimate plot
2794
+ Generate Kernel Density Estimate plot using Gaussian kernels.
2795
+
2796
+ In statistics, kernel density estimation (KDE) is a non-parametric way
2797
+ to estimate the probability density function (PDF) of a random
2798
+ variable. This function uses Gaussian kernels and includes automatic
2799
+ bandwith determination.
2795
2800
2796
2801
Parameters
2797
2802
----------
2798
- bw_method: str, scalar or callable, optional
2799
- The method used to calculate the estimator bandwidth. This can be
2803
+ bw_method : str, scalar or callable, optional
2804
+ The method used to calculate the estimator bandwidth. This can be
2800
2805
'scott', 'silverman', a scalar constant or a callable.
2801
2806
If None (default), 'scott' is used.
2802
2807
See :class:`scipy.stats.gaussian_kde` for more information.
2803
2808
ind : NumPy array or integer, optional
2804
- Evaluation points. If None (default), 1000 equally spaced points
2805
- are used. If `ind` is a NumPy array, the kde is evaluated at the
2806
- points passed. If `ind` is an integer, `ind` number of equally
2807
- spaced points are used.
2808
- `**kwds` : optional
2809
- Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`.
2809
+ Evaluation points for the estimated PDF. If None (default),
2810
+ 1000 equally spaced points are used. If `ind` is a NumPy array, the
2811
+ kde is evaluated at the points passed. If `ind` is an integer,
2812
+ `ind` number of equally spaced points are used.
2813
+ **kwds : optional
2814
+ Additional keyword arguments are documented in
2815
+ :meth:`pandas.Series.plot`.
2810
2816
2811
2817
Returns
2812
2818
-------
2813
2819
axes : matplotlib.AxesSubplot or np.array of them
2820
+
2821
+ See also
2822
+ --------
2823
+ scipy.stats.gaussian_kde : Representation of a kernel-density
2824
+ estimate using Gaussian kernels. This is the function used
2825
+ internally to estimate the PDF.
2826
+ :meth:`pandas.Series.plot.kde` : Generate a KDE plot for a Series
2827
+
2828
+ Examples
2829
+ --------
2830
+ Given several Series of points randomly sampled from unknown
2831
+ distributions, estimate their distribution using KDE with automatic
2832
+ bandwidth determination and plot the results, evaluating them at
2833
+ 1000 equally spaced points (default):
2834
+
2835
+ .. plot::
2836
+ :context: close-figs
2837
+
2838
+ >>> df = pd.DataFrame({
2839
+ ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
2840
+ ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
2841
+ ... })
2842
+ >>> ax = df.plot.kde()
2843
+
2844
+ A scalar bandwidth can be specified. Using a small bandwidth value can
2845
+ lead to overfitting, while using a large bandwidth value can result
2846
+ in underfitting:
2847
+
2848
+ .. plot::
2849
+ :context: close-figs
2850
+
2851
+ >>> ax = df.plot.kde(bw_method=0.3)
2852
+
2853
+ .. plot::
2854
+ :context: close-figs
2855
+
2856
+ >>> ax = df.plot.kde(bw_method=3)
2857
+
2858
+ Finally, the `ind` parameter determines the evaluation points for the
2859
+ plot of the estimated PDF:
2860
+
2861
+ .. plot::
2862
+ :context: close-figs
2863
+
2864
+ >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
2814
2865
"""
2815
2866
return self (kind = 'kde' , bw_method = bw_method , ind = ind , ** kwds )
2816
2867
0 commit comments