@@ -1380,6 +1380,50 @@ def orientation(self):
1380
1380
return 'vertical'
1381
1381
1382
1382
1383
+ _kde_docstring = """
1384
+ Generate Kernel Density Estimate plot using Gaussian kernels.
1385
+
1386
+ In statistics, `kernel density estimation`_ (KDE) is a non-parametric
1387
+ way to estimate the probability density function (PDF) of a random
1388
+ variable. This function uses Gaussian kernels and includes automatic
1389
+ bandwith determination.
1390
+
1391
+ .. _kernel density estimation:
1392
+ https://en.wikipedia.org/wiki/Kernel_density_estimation
1393
+
1394
+ Parameters
1395
+ ----------
1396
+ bw_method : str, scalar or callable, optional
1397
+ The method used to calculate the estimator bandwidth. This can be
1398
+ 'scott', 'silverman', a scalar constant or a callable.
1399
+ If None (default), 'scott' is used.
1400
+ See :class:`scipy.stats.gaussian_kde` for more information.
1401
+ ind : NumPy array or integer, optional
1402
+ Evaluation points for the estimated PDF. If None (default),
1403
+ 1000 equally spaced points are used. If `ind` is a NumPy array, the
1404
+ KDE is evaluated at the points passed. If `ind` is an integer,
1405
+ `ind` number of equally spaced points are used.
1406
+ **kwds : optional
1407
+ Additional keyword arguments are documented in
1408
+ :meth:`pandas.%(this-datatype)s.plot`.
1409
+
1410
+ Returns
1411
+ -------
1412
+ axes : matplotlib.AxesSubplot or np.array of them
1413
+
1414
+ See Also
1415
+ --------
1416
+ scipy.stats.gaussian_kde : Representation of a kernel-density
1417
+ estimate using Gaussian kernels. This is the function used
1418
+ internally to estimate the PDF.
1419
+ %(sibling-datatype)s.plot.kde : Generate a KDE plot for a
1420
+ %(sibling-datatype)s.
1421
+
1422
+ Examples
1423
+ --------
1424
+ %(examples)s
1425
+ """
1426
+
1383
1427
class KdePlot (HistPlot ):
1384
1428
_kind = 'kde'
1385
1429
orientation = 'vertical'
@@ -2616,45 +2660,12 @@ def hist(self, bins=10, **kwds):
2616
2660
"""
2617
2661
return self (kind = 'hist' , bins = bins , ** kwds )
2618
2662
2619
- def kde (self , bw_method = None , ind = None , ** kwds ):
2620
- """
2621
- Kernel Density Estimate plot using Gaussian kernels.
2622
-
2623
- In statistics, kernel density estimation (KDE) is a non-parametric way
2624
- to estimate the probability density function (PDF) of a random
2625
- variable. This function uses Gaussian kernels and includes automatic
2626
- bandwith determination.
2627
-
2628
- Parameters
2629
- ----------
2630
- bw_method : str, scalar or callable, optional
2631
- The method used to calculate the estimator bandwidth. This can be
2632
- 'scott', 'silverman', a scalar constant or a callable.
2633
- If None (default), 'scott' is used.
2634
- See :class:`scipy.stats.gaussian_kde` for more information.
2635
- ind : NumPy array or integer, optional
2636
- Evaluation points for the estimated PDF. If None (default),
2637
- 1000 equally spaced points are used. If `ind` is a NumPy array, the
2638
- kde is evaluated at the points passed. If `ind` is an integer,
2639
- `ind` number of equally spaced points are used.
2640
- kwds : optional
2641
- Additional keyword arguments are documented in
2642
- :meth:`pandas.Series.plot`.
2643
-
2644
- Returns
2645
- -------
2646
- axes : matplotlib.AxesSubplot or np.array of them
2647
-
2648
- See also
2649
- --------
2650
- scipy.stats.gaussian_kde : Representation of a kernel-density
2651
- estimate using Gaussian kernels. This is the function used
2652
- internally to estimate the PDF.
2653
-
2654
- Examples
2655
- --------
2663
+ @Appender (_kde_docstring % {
2664
+ 'this-datatype' : 'Series' ,
2665
+ 'sibling-datatype' : 'DataFrame' ,
2666
+ 'examples' : """
2656
2667
Given a Series of points randomly sampled from an unknown
2657
- distribution, estimate this distribution using KDE with automatic
2668
+ distribution, estimate its PDF using KDE with automatic
2658
2669
bandwidth determination and plot the results, evaluating them at
2659
2670
1000 equally spaced points (default):
2660
2671
@@ -2664,10 +2675,9 @@ def kde(self, bw_method=None, ind=None, **kwds):
2664
2675
>>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])
2665
2676
>>> ax = s.plot.kde()
2666
2677
2667
-
2668
- An scalar fixed bandwidth can be specified. Using a too small bandwidth
2669
- can lead to overfitting, while a too large bandwidth can result in
2670
- underfitting:
2678
+ A scalar bandwidth can be specified. Using a small bandwidth value can
2679
+ lead to overfitting, while using a large bandwidth value may result
2680
+ in underfitting:
2671
2681
2672
2682
.. plot::
2673
2683
:context: close-figs
@@ -2686,7 +2696,9 @@ def kde(self, bw_method=None, ind=None, **kwds):
2686
2696
:context: close-figs
2687
2697
2688
2698
>>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])
2689
- """
2699
+ """ .strip ()
2700
+ })
2701
+ def kde (self , bw_method = None , ind = None , ** kwds ):
2690
2702
return self (kind = 'kde' , bw_method = bw_method , ind = ind , ** kwds )
2691
2703
2692
2704
density = kde
@@ -2849,30 +2861,48 @@ def hist(self, by=None, bins=10, **kwds):
2849
2861
"""
2850
2862
return self (kind = 'hist' , by = by , bins = bins , ** kwds )
2851
2863
2852
- def kde (self , bw_method = None , ind = None , ** kwds ):
2853
- """
2854
- Kernel Density Estimate plot
2864
+ @Appender (_kde_docstring % {
2865
+ 'this-datatype' : 'DataFrame' ,
2866
+ 'sibling-datatype' : 'Series' ,
2867
+ 'examples' : """
2868
+ Given several Series of points randomly sampled from unknown
2869
+ distributions, estimate their PDFs using KDE with automatic
2870
+ bandwidth determination and plot the results, evaluating them at
2871
+ 1000 equally spaced points (default):
2855
2872
2856
- Parameters
2857
- ----------
2858
- bw_method: str, scalar or callable, optional
2859
- The method used to calculate the estimator bandwidth. This can be
2860
- 'scott', 'silverman', a scalar constant or a callable.
2861
- If None (default), 'scott' is used.
2862
- See :class:`scipy.stats.gaussian_kde` for more information.
2863
- ind : NumPy array or integer, optional
2864
- Evaluation points. If None (default), 1000 equally spaced points
2865
- are used. If `ind` is a NumPy array, the kde is evaluated at the
2866
- points passed. If `ind` is an integer, `ind` number of equally
2867
- spaced points are used.
2868
- `**kwds` : optional
2869
- Additional keyword arguments are documented in
2870
- :meth:`pandas.DataFrame.plot`.
2873
+ .. plot::
2874
+ :context: close-figs
2871
2875
2872
- Returns
2873
- -------
2874
- axes : matplotlib.AxesSubplot or np.array of them
2875
- """
2876
+ >>> df = pd.DataFrame({
2877
+ ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
2878
+ ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
2879
+ ... })
2880
+ >>> ax = df.plot.kde()
2881
+
2882
+ A scalar bandwidth can be specified. Using a small bandwidth value can
2883
+ lead to overfitting, while using a large bandwidth value may result
2884
+ in underfitting:
2885
+
2886
+ .. plot::
2887
+ :context: close-figs
2888
+
2889
+ >>> ax = df.plot.kde(bw_method=0.3)
2890
+
2891
+ .. plot::
2892
+ :context: close-figs
2893
+
2894
+ >>> ax = df.plot.kde(bw_method=3)
2895
+
2896
+ Finally, the `ind` parameter determines the evaluation points for the
2897
+ plot of the estimated PDF:
2898
+
2899
+ .. plot::
2900
+ :context: close-figs
2901
+
2902
+ >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
2903
+ """ .strip ()
2904
+ })
2905
+ def kde (self , bw_method = None , ind = None , ** kwds ):
2876
2906
return self (kind = 'kde' , bw_method = bw_method , ind = ind , ** kwds )
2877
2907
2878
2908
density = kde
0 commit comments