25
25
_try_sort , _pfixed , _default_index ,
26
26
_infer_dtype , _stringify , _maybe_upcast )
27
27
from pandas .core .daterange import DateRange
28
- from pandas .core .generic import AxisProperty , NDFrame
28
+ from pandas .core .generic import NDFrame
29
29
from pandas .core .index import Index , MultiIndex , NULL_INDEX , _ensure_index
30
30
from pandas .core .indexing import _NDFrameIndexer , _maybe_droplevels
31
31
from pandas .core .internals import BlockManager , make_block , form_blocks
60
60
result : DataFrame
61
61
"""
62
62
63
+ _stat_doc = """
64
+ Return %(name)s over requested axis.
65
+ %(na_action)s
66
+
67
+ Parameters
68
+ ----------
69
+ axis : {0, 1}
70
+ 0 for row-wise, 1 for column-wise
71
+ skipna : boolean, default True
72
+ Exclude NA/null values. If an entire row/column is NA, the result
73
+ will be NA
74
+ level : int, default None
75
+ If the axis is a MultiIndex (hierarchical), count along a
76
+ particular level, collapsing into a DataFrame
77
+ %(extras)s
78
+ Returns
79
+ -------
80
+ %(shortname)s : Series (or DataFrame if level specified)
81
+ """
82
+
83
+ _doc_exclude_na = "NA/null values are excluded"
84
+
85
+ _numeric_only_doc = """numeric_only : boolean, default False
86
+ Include only float, int, boolean data
87
+ """
88
+
89
+ def _add_stat_doc (f , name , shortname , na_action = _doc_exclude_na ,
90
+ extras = '' ):
91
+ doc = _stat_doc % {'name' : name ,
92
+ 'shortname' : shortname ,
93
+ 'na_action' : na_action ,
94
+ 'extras' : extras }
95
+ f .__doc__ = doc
63
96
64
97
def _arith_method (func , name , default_axis = 'columns' ):
65
98
def f (self , other , axis = default_axis , fill_value = None ):
@@ -2426,38 +2459,6 @@ def _count_level(self, level, axis=0, numeric_only=False):
2426
2459
return DataFrame (result , index = index , columns = columns )
2427
2460
2428
2461
def sum (self , axis = 0 , numeric_only = False , skipna = True , level = None ):
2429
- """
2430
- Return sum over requested axis
2431
-
2432
- Parameters
2433
- ----------
2434
- axis : {0, 1}
2435
- 0 for row-wise, 1 for column-wise
2436
- numeric_only : boolean, default False
2437
- Include only float, int, boolean data
2438
- skipna : boolean, default True
2439
- Exclude NA/null values. If an entire row/column is NA, the result
2440
- will be NA
2441
- level : integer, default None
2442
- Choose a level to groupby before applying operation
2443
-
2444
- Examples
2445
- --------
2446
- >>> df
2447
- c1 c2
2448
- a 1 0
2449
- b 0 2
2450
- c 3 0
2451
- d 0 4
2452
-
2453
- >>> df.sum(axis=0)
2454
- c1 4
2455
- c2 6
2456
-
2457
- Returns
2458
- -------
2459
- sum : Series
2460
- """
2461
2462
if not level is None :
2462
2463
sumfunc = lambda x : x .sum (skipna = skipna )
2463
2464
return self .groupby (level = level ).aggregate (sumfunc )
@@ -2484,25 +2485,9 @@ def sum(self, axis=0, numeric_only=False, skipna=True, level=None):
2484
2485
the_sum [ct_mask ] = nan
2485
2486
2486
2487
return Series (the_sum , index = axis_labels )
2488
+ _add_stat_doc (sum , 'sum' , 'sum' , extras = _numeric_only_doc )
2487
2489
2488
2490
def min (self , axis = 0 , skipna = True , level = None ):
2489
- """
2490
- Return minimum over requested axis. NA/null values are excluded
2491
-
2492
- Parameters
2493
- ----------
2494
- axis : {0, 1}
2495
- 0 for row-wise, 1 for column-wise
2496
- skipna : boolean, default True
2497
- Exclude NA/null values. If an entire row/column is NA, the result
2498
- will be NA
2499
- level : integer, default None
2500
- Choose a level to groupby before applying operation
2501
-
2502
- Returns
2503
- -------
2504
- min : Series
2505
- """
2506
2491
values = self .values .copy ()
2507
2492
if skipna and not issubclass (values .dtype .type , np .integer ):
2508
2493
np .putmask (values , - np .isfinite (values ), np .inf )
@@ -2512,25 +2497,9 @@ def min(self, axis=0, skipna=True, level=None):
2512
2497
return self .groupby (level = level ).aggregate (minfunc )
2513
2498
2514
2499
return Series (values .min (axis ), index = self ._get_agg_axis (axis ))
2500
+ _add_stat_doc (min , 'minimum' , 'min' )
2515
2501
2516
2502
def max (self , axis = 0 , skipna = True , level = None ):
2517
- """
2518
- Return maximum over requested axis. NA/null values are excluded
2519
-
2520
- Parameters
2521
- ----------
2522
- axis : {0, 1}
2523
- 0 for row-wise, 1 for column-wise
2524
- skipna : boolean, default True
2525
- Exclude NA/null values. If an entire row/column is NA, the result
2526
- will be NA
2527
- level : integer, default None
2528
- Choose a level to groupby before applying operation
2529
-
2530
- Returns
2531
- -------
2532
- max : Series
2533
- """
2534
2503
values = self .values .copy ()
2535
2504
if skipna and not issubclass (values .dtype .type , np .integer ):
2536
2505
np .putmask (values , - np .isfinite (values ), - np .inf )
@@ -2540,25 +2509,9 @@ def max(self, axis=0, skipna=True, level=None):
2540
2509
return self .groupby (level = level ).aggregate (maxfunc )
2541
2510
2542
2511
return Series (values .max (axis ), index = self ._get_agg_axis (axis ))
2512
+ _add_stat_doc (max , 'maximum' , 'max' )
2543
2513
2544
2514
def prod (self , axis = 0 , skipna = True , level = None ):
2545
- """
2546
- Return product over requested axis. NA/null values are treated as 1
2547
-
2548
- Parameters
2549
- ----------
2550
- axis : {0, 1}
2551
- 0 for row-wise, 1 for column-wise
2552
- skipna : boolean, default True
2553
- Exclude NA/null values. If an entire row/column is NA, the result
2554
- will be NA
2555
- level : integer, default None
2556
- Choose a level to groupby before applying operation
2557
-
2558
- Returns
2559
- -------
2560
- product : Series
2561
- """
2562
2515
if not level is None :
2563
2516
prodfunc = lambda x : x .prod (skipna = skipna )
2564
2517
return self .groupby (level = level ).aggregate (prodfunc )
@@ -2572,33 +2525,19 @@ def prod(self, axis=0, skipna=True, level=None):
2572
2525
result [count == 0 ] = nan
2573
2526
2574
2527
return Series (result , index = self ._get_agg_axis (axis ))
2528
+ _add_stat_doc (prod , 'product' , 'product' ,
2529
+ na_action = 'NA/null values are treated as 1' )
2575
2530
product = prod
2576
2531
2577
2532
def mean (self , axis = 0 , skipna = True , level = None ):
2578
- """
2579
- Return mean over requested axis. NA/null values are excluded
2580
-
2581
- Parameters
2582
- ----------
2583
- axis : {0, 1}
2584
- 0 for row-wise, 1 for column-wise
2585
- skipna : boolean, default True
2586
- Exclude NA/null values. If an entire row/column is NA, the result
2587
- will be NA
2588
- level : integer, default None
2589
- Choose a level to groupby before applying operation
2590
-
2591
- Returns
2592
- -------
2593
- mean : Series
2594
- """
2595
2533
if not level is None :
2596
2534
meanfunc = lambda x : x .mean (skipna = skipna )
2597
2535
return self .groupby (level = level ).aggregate (meanfunc )
2598
2536
2599
2537
summed = self .sum (axis , numeric_only = True , skipna = skipna )
2600
2538
count = self .count (axis , numeric_only = True ).astype (float )
2601
2539
return summed / count
2540
+ _add_stat_doc (mean , 'mean' , 'mean' )
2602
2541
2603
2542
def quantile (self , q = 0.5 , axis = 0 ):
2604
2543
"""
@@ -2632,23 +2571,6 @@ def f(arr):
2632
2571
return self .apply (f , axis = axis )
2633
2572
2634
2573
def median (self , axis = 0 , skipna = True , level = None ):
2635
- """
2636
- Return median over requested axis, NA/null are exluded
2637
-
2638
- Parameters
2639
- ----------
2640
- axis : {0, 1}
2641
- 0 for row-wise, 1 for column-wise
2642
- skipna : boolean, default True
2643
- Exclude NA/null values. If an entire row/column is NA, the result
2644
- will be NA
2645
- level : integer, default None
2646
- Choose a level to groupby before applying operation
2647
-
2648
- Returns
2649
- -------
2650
- Series or TimeSeries
2651
- """
2652
2574
if not level is None :
2653
2575
medianfunc = lambda x : x .median (skipna = skipna )
2654
2576
return self .groupby (level = level ).aggregate (medianfunc )
@@ -2661,25 +2583,9 @@ def median(self, axis=0, skipna=True, level=None):
2661
2583
return Series (med , index = self .index )
2662
2584
else :
2663
2585
raise Exception ('Must have 0<= axis <= 1' )
2586
+ _add_stat_doc (median , 'median' , 'median' )
2664
2587
2665
2588
def mad (self , axis = 0 , skipna = True , level = None ):
2666
- """
2667
- Return mean absolute deviation over requested axis
2668
-
2669
- Parameters
2670
- ----------
2671
- axis : {0, 1}
2672
- 0 for row-wise, 1 for column-wise
2673
- skipna : boolean, default True
2674
- Exclude NA/null values. If an entire row/column is NA, the result
2675
- will be NA
2676
- level : integer, default None
2677
- Choose a level to groupby before applying operation
2678
-
2679
- Returns
2680
- -------
2681
- mad : Series
2682
- """
2683
2589
if not level is None :
2684
2590
madfunc = lambda x : x .mad (skipna = skipna )
2685
2591
return self .groupby (level = level ).aggregate (madfunc )
@@ -2689,25 +2595,9 @@ def mad(self, axis=0, skipna=True, level=None):
2689
2595
else :
2690
2596
demeaned = self .sub (self .mean (axis = 1 ), axis = 0 )
2691
2597
return np .abs (demeaned ).mean (axis = axis , skipna = skipna )
2598
+ _add_stat_doc (mad , 'mean absolute deviation' , 'mad' )
2692
2599
2693
2600
def var (self , axis = 0 , skipna = True , level = None ):
2694
- """
2695
- Return unbiased variance over requested axis
2696
-
2697
- Parameters
2698
- ----------
2699
- axis : {0, 1}
2700
- 0 for row-wise, 1 for column-wise
2701
- skipna : boolean, default True
2702
- Exclude NA/null values. If an entire row/column is NA, the result
2703
- will be NA
2704
- level : integer, default None
2705
- Choose a level to groupby before applying operation
2706
-
2707
- Returns
2708
- -------
2709
- var : Series
2710
- """
2711
2601
if not level is None :
2712
2602
varfunc = lambda x : x .var (skipna = skipna )
2713
2603
return self .groupby (level = level ).aggregate (varfunc )
@@ -2726,49 +2616,17 @@ def var(self, axis=0, skipna=True, level=None):
2726
2616
theVar = (XX - X ** 2 / count ) / (count - 1 )
2727
2617
2728
2618
return Series (theVar , index = axis_labels )
2619
+ _add_stat_doc (var , 'unbiased variance' , 'var' )
2729
2620
2730
2621
def std (self , axis = 0 , skipna = True , level = None ):
2731
- """
2732
- Return unbiased std deviation over requested axis
2733
-
2734
- Parameters
2735
- ----------
2736
- axis : {0, 1}
2737
- 0 for row-wise, 1 for column-wise
2738
- skipna : boolean, default True
2739
- Exclude NA/null values. If an entire row/column is NA, the result
2740
- will be NA
2741
- level : integer, default None
2742
- Choose a level to groupby before applying operation
2743
-
2744
- Returns
2745
- -------
2746
- std : Series
2747
- """
2748
2622
if not level is None :
2749
2623
stdfunc = lambda x : x .std (skipna = skipna )
2750
2624
return self .groupby (level = level ).aggregate (stdfunc )
2751
2625
2752
2626
return np .sqrt (self .var (axis = axis , skipna = skipna ))
2627
+ _add_stat_doc (std , 'unbiased standard deviation' , 'std' )
2753
2628
2754
2629
def skew (self , axis = 0 , skipna = True , level = None ):
2755
- """
2756
- Return unbiased skewness over requested axis
2757
-
2758
- Parameters
2759
- ----------
2760
- axis : {0, 1}
2761
- 0 for row-wise, 1 for column-wise
2762
- skipna : boolean, default True
2763
- Exclude NA/null values. If an entire row/column is NA, the result
2764
- will be NA
2765
- level : integer, default None
2766
- Choose a level to groupby before applying operation
2767
-
2768
- Returns
2769
- -------
2770
- skew : Series
2771
- """
2772
2630
if not level is None :
2773
2631
skewfunc = lambda x : x .skew (skipna = skipna )
2774
2632
return self .groupby (level = level ).aggregate (skewfunc )
@@ -2795,6 +2653,7 @@ def skew(self, axis=0, skipna=True, level=None):
2795
2653
result = np .where (B == 0 , 0 , result )
2796
2654
2797
2655
return Series (result , index = axis_labels )
2656
+ _add_stat_doc (skew , 'unbiased skewness' , 'skew' )
2798
2657
2799
2658
def _get_agg_data (self , axis , numeric_only = True , copy = True ):
2800
2659
num_cols = self ._get_numeric_columns ()
0 commit comments