31
31
from pandas .core .internals import BlockManager , make_block , form_blocks
32
32
from pandas .core .series import Series , _is_bool_indexer
33
33
from pandas .util import py3compat
34
+ import pandas .core .nanops as nanops
34
35
import pandas .core .common as common
35
36
import pandas .core .datetools as datetools
36
37
import pandas ._tseries as lib
@@ -2710,57 +2711,36 @@ def _count_level(self, level, axis=0, numeric_only=False):
2710
2711
else :
2711
2712
return result
2712
2713
2713
- def sum (self , axis = 0 , numeric_only = True , skipna = True , level = None ):
2714
+ def sum (self , axis = 0 , numeric_only = None , skipna = True , level = None ):
2714
2715
if level is not None :
2715
2716
return self ._agg_by_level ('sum' , axis = axis , level = level ,
2716
2717
skipna = skipna )
2717
-
2718
- y , axis_labels = self ._get_agg_data (axis , numeric_only = numeric_only )
2719
-
2720
- if len (axis_labels ) == 0 :
2721
- return Series ([], index = [])
2722
-
2723
- if y .dtype == np .object_ :
2724
- the_sum = y .sum (axis )
2725
- else :
2726
- mask = np .isfinite (y )
2727
-
2728
- if skipna and not issubclass (y .dtype .type , np .integer ):
2729
- np .putmask (y , - mask , 0 )
2730
-
2731
- the_sum = y .sum (axis )
2732
- the_count = mask .sum (axis )
2733
-
2734
- ct_mask = the_count == 0
2735
- if ct_mask .any ():
2736
- the_sum [ct_mask ] = nan
2737
-
2738
- return Series (the_sum , index = axis_labels )
2718
+ return self ._reduce (nanops .nansum , axis = axis , skipna = skipna ,
2719
+ numeric_only = numeric_only )
2739
2720
_add_stat_doc (sum , 'sum' , 'sum' , extras = _numeric_only_doc )
2740
2721
2722
+ def mean (self , axis = 0 , skipna = True , level = None ):
2723
+ if level is not None :
2724
+ return self ._agg_by_level ('mean' , axis = axis , level = level ,
2725
+ skipna = skipna )
2726
+ return self ._reduce (nanops .nanmean , axis = axis , skipna = skipna ,
2727
+ numeric_only = None )
2728
+ _add_stat_doc (mean , 'mean' , 'mean' )
2729
+
2741
2730
def min (self , axis = 0 , skipna = True , level = None ):
2742
2731
if level is not None :
2743
2732
return self ._agg_by_level ('min' , axis = axis , level = level ,
2744
2733
skipna = skipna )
2745
-
2746
- values , axis_labels = self ._get_agg_data (axis , numeric_only = True )
2747
-
2748
- if skipna and not issubclass (values .dtype .type , np .integer ):
2749
- np .putmask (values , - np .isfinite (values ), np .inf )
2750
-
2751
- return Series (values .min (axis ), index = axis_labels )
2734
+ return self ._reduce (nanops .nanmin , axis = axis , skipna = skipna ,
2735
+ numeric_only = None )
2752
2736
_add_stat_doc (min , 'minimum' , 'min' )
2753
2737
2754
2738
def max (self , axis = 0 , skipna = True , level = None ):
2755
2739
if level is not None :
2756
2740
return self ._agg_by_level ('max' , axis = axis , level = level ,
2757
2741
skipna = skipna )
2758
-
2759
- values , axis_labels = self ._get_agg_data (axis , numeric_only = True )
2760
- if skipna and not issubclass (values .dtype .type , np .integer ):
2761
- np .putmask (values , - np .isfinite (values ), - np .inf )
2762
-
2763
- return Series (values .max (axis ), index = axis_labels )
2742
+ return self ._reduce (nanops .nanmax , axis = axis , skipna = skipna ,
2743
+ numeric_only = None )
2764
2744
_add_stat_doc (max , 'maximum' , 'max' )
2765
2745
2766
2746
def prod (self , axis = 0 , skipna = True , level = None ):
@@ -2781,16 +2761,6 @@ def prod(self, axis=0, skipna=True, level=None):
2781
2761
na_action = 'NA/null values are treated as 1' )
2782
2762
product = prod
2783
2763
2784
- def mean (self , axis = 0 , skipna = True , level = None ):
2785
- if level is not None :
2786
- return self ._agg_by_level ('mean' , axis = axis , level = level ,
2787
- skipna = skipna )
2788
-
2789
- summed = self .sum (axis , numeric_only = True , skipna = skipna )
2790
- count = self .count (axis , numeric_only = True ).astype (float )
2791
- return summed / count
2792
- _add_stat_doc (mean , 'mean' , 'mean' )
2793
-
2794
2764
def median (self , axis = 0 , skipna = True , level = None ):
2795
2765
if level is not None :
2796
2766
return self ._agg_by_level ('median' , axis = axis , level = level ,
@@ -2839,59 +2809,51 @@ def var(self, axis=0, skipna=True, level=None):
2839
2809
if level is not None :
2840
2810
return self ._agg_by_level ('var' , axis = axis , level = level ,
2841
2811
skipna = skipna )
2842
-
2843
- y , axis_labels = self ._get_agg_data (axis , numeric_only = True )
2844
-
2845
- mask = np .isnan (y )
2846
- count = (y .shape [axis ] - mask .sum (axis )).astype (float )
2847
-
2848
- if skipna :
2849
- np .putmask (y , mask , 0 )
2850
-
2851
- X = y .sum (axis )
2852
- XX = (y ** 2 ).sum (axis )
2853
-
2854
- theVar = (XX - X ** 2 / count ) / (count - 1 )
2855
-
2856
- return Series (theVar , index = axis_labels )
2812
+ return self ._reduce (nanops .nanvar , axis = axis , skipna = skipna ,
2813
+ numeric_only = None )
2857
2814
_add_stat_doc (var , 'unbiased variance' , 'var' )
2858
2815
2859
2816
def std (self , axis = 0 , skipna = True , level = None ):
2860
2817
if level is not None :
2861
2818
return self ._agg_by_level ('std' , axis = axis , level = level ,
2862
2819
skipna = skipna )
2863
-
2864
2820
return np .sqrt (self .var (axis = axis , skipna = skipna ))
2865
2821
_add_stat_doc (std , 'unbiased standard deviation' , 'std' )
2866
2822
2867
2823
def skew (self , axis = 0 , skipna = True , level = None ):
2868
2824
if level is not None :
2869
2825
return self ._agg_by_level ('skew' , axis = axis , level = level ,
2870
2826
skipna = skipna )
2827
+ return self ._reduce (nanops .nanskew , axis = axis , skipna = skipna ,
2828
+ numeric_only = None )
2829
+ _add_stat_doc (skew , 'unbiased skewness' , 'skew' )
2871
2830
2872
- y , axis_labels = self ._get_agg_data (axis , numeric_only = True )
2873
-
2874
- mask = np .isnan (y )
2875
- count = (y .shape [axis ] - mask .sum (axis )).astype (float )
2876
-
2877
- if skipna :
2878
- np .putmask (y , mask , 0 )
2879
-
2880
- A = y .sum (axis ) / count
2881
- B = (y ** 2 ).sum (axis ) / count - A ** 2
2882
- C = (y ** 3 ).sum (axis ) / count - A ** 3 - 3 * A * B
2883
-
2884
- # floating point error
2885
- B = np .where (np .abs (B ) < 1e-14 , 0 , B )
2886
- C = np .where (np .abs (C ) < 1e-14 , 0 , C )
2887
-
2888
- result = ((np .sqrt ((count ** 2 - count )) * C ) /
2889
- ((count - 2 ) * np .sqrt (B ) ** 3 ))
2831
+ def _reduce (self , op , axis = 0 , skipna = True , numeric_only = None ):
2890
2832
2891
- result = np .where (B == 0 , 0 , result )
2833
+ f = lambda x : op (x , axis = axis , skipna = skipna , copy = True )
2834
+ labels = self ._get_agg_axis (axis )
2835
+ if numeric_only is None :
2836
+ try :
2837
+ values = self .values
2838
+ if not self ._is_mixed_type :
2839
+ values = values .copy ()
2840
+ result = f (values )
2841
+ except Exception :
2842
+ data = self ._get_numeric_data ()
2843
+ result = f (data .values )
2844
+ labels = data ._get_agg_axis (axis )
2845
+ else :
2846
+ if numeric_only :
2847
+ data = self ._get_numeric_data ()
2848
+ values = data .values
2849
+ labels = data ._get_agg_axis (axis )
2850
+ else :
2851
+ values = self .values
2852
+ result = f (values )
2892
2853
2893
- return Series (result , index = axis_labels )
2894
- _add_stat_doc (skew , 'unbiased skewness' , 'skew' )
2854
+ if result .dtype == np .object_ :
2855
+ result = result .astype ('f8' )
2856
+ return Series (result , index = labels )
2895
2857
2896
2858
def idxmin (self , axis = 0 , skipna = True ):
2897
2859
"""
0 commit comments