1
- # -*- coding: utf-8 -*-
1
+ f # -*- coding: utf-8 -*-
2
2
3
3
from datetime import timedelta
4
4
import operator
@@ -704,7 +704,7 @@ def test_stat_op_api(self, float_frame, float_string_frame):
704
704
has_numeric_only = True )
705
705
assert_stat_op_api ('sum' , float_frame , float_string_frame ,
706
706
has_numeric_only = True )
707
-
707
+
708
708
assert_stat_op_api ('nunique' , float_frame , float_string_frame )
709
709
assert_stat_op_api ('mean' , float_frame , float_string_frame )
710
710
assert_stat_op_api ('product' , float_frame , float_string_frame )
@@ -723,7 +723,71 @@ def test_stat_op_api(self, float_frame, float_string_frame):
723
723
assert_stat_op_api ('kurt' , float_frame , float_string_frame )
724
724
except ImportError :
725
725
pass
726
-
726
+
727
+ def test_stat_op_calc (self , float_frame_with_na , mixed_float_frame ):
728
+
729
+ def count (s ):
730
+ return notna (s ).sum ()
731
+
732
+ def nunique (s ):
733
+ return len (algorithms .unique1d (s .dropna ()))
734
+
735
+ def mad (x ):
736
+ return np .abs (x - x .mean ()).mean ()
737
+
738
+ def var (x ):
739
+ return np .var (x , ddof = 1 )
740
+
741
+ def std (x ):
742
+ return x : np .std (x , ddof = 1 )
743
+
744
+ def sem (x ):
745
+ return np .std (x , ddof = 1 ) / np .sqrt (len (x ))
746
+
747
+ def skew (x ):
748
+ from scipy .stats import skew
749
+ if len (x ) < 3 :
750
+ return np .nan
751
+ return skew (x , bias = False )
752
+
753
+ def kurt (x ):
754
+ from scipy .stats import kurtosis
755
+ if len (x ) < 4 :
756
+ return np .nan
757
+ return kurtosis (x , bias = False )
758
+
759
+ assert_stat_op_calc ('nunique' , nunique , float_frame_with_na ,
760
+ has_skipna = False , check_dtype = False ,
761
+ check_dates = True )
762
+
763
+ # mixed types (with upcasting happening)
764
+ assert_stat_op_calc ('sum' , np .sum , mixed_float_frame .astype ('float32' ),
765
+ check_dtype = False , check_less_precise = True )
766
+
767
+ assert_stat_op_calc ('sum' , np .sum , float_frame_with_na ,
768
+ skipna_alternative = np .nansum )
769
+ assert_stat_op_calc ('mean' , np .mean , float_frame_with_na ,
770
+ check_dates = True )
771
+ assert_stat_op_calc ('product' , np .prod , float_frame_with_na )
772
+
773
+
774
+ assert_stat_op_calc ('mad' , mad , float_frame_with_na )
775
+ assert_stat_op_calc ('var' , var , float_frame_with_na )
776
+ assert_stat_op_calc ('std' , std , float_frame_with_na )
777
+ assert_stat_op_calc ('sem' , sem , float_frame_with_na )
778
+
779
+
780
+ assert_stat_op_calc ('count' , count , float_frame_with_na ,
781
+ has_skipna = False , check_dtype = False ,
782
+ check_dates = True )
783
+
784
+ try :
785
+ from scipy import skew , kurtosis
786
+ assert_stat_op_calc ('skew' , skew , float_frame_with_na )
787
+ assert_stat_op_calc ('kurt' , kurt , float_frame_with_na )
788
+ except ImportError :
789
+ pass
790
+
727
791
@pytest .mark .parametrize ('method' , ['sum' , 'mean' , 'prod' , 'var' ,
728
792
'std' , 'skew' , 'min' , 'max' ])
729
793
def test_stat_operators_attempt_obj_array (self , method ):
@@ -776,12 +840,7 @@ def test_reduce_mixed_frame(self):
776
840
np .array ([2 , 150 , 'abcde' ], dtype = object ))
777
841
tm .assert_series_equal (test , df .T .sum (axis = 1 ))
778
842
779
- def test_nunique (self , float_frame_with_na ):
780
- f = lambda s : len (algorithms .unique1d (s .dropna ()))
781
- assert_stat_op_calc ('nunique' , f , float_frame_with_na ,
782
- has_skipna = False , check_dtype = False ,
783
- check_dates = True )
784
-
843
+ def test_nunique (self ):
785
844
df = DataFrame ({'A' : [1 , 1 , 1 ],
786
845
'B' : [1 , 2 , 3 ],
787
846
'C' : [1 , np .nan , 3 ]})
@@ -792,20 +851,6 @@ def test_nunique(self, float_frame_with_na):
792
851
tm .assert_series_equal (df .nunique (axis = 1 , dropna = False ),
793
852
Series ({0 : 1 , 1 : 3 , 2 : 2 }))
794
853
795
- def test_sum (self , float_frame_with_na , mixed_float_frame ):
796
- assert_stat_op_calc ('sum' , np .sum , float_frame_with_na ,
797
- skipna_alternative = np .nansum )
798
- # mixed types (with upcasting happening)
799
- assert_stat_op_calc ('sum' , np .sum , mixed_float_frame .astype ('float32' ),
800
- check_dtype = False , check_less_precise = True )
801
-
802
- def test_mean (self , float_frame_with_na ):
803
- assert_stat_op_calc ('mean' , np .mean , float_frame_with_na ,
804
- check_dates = True )
805
-
806
- def test_product (self , float_frame_with_na ):
807
- assert_stat_op_calc ('product' , np .prod , float_frame_with_na )
808
-
809
854
@pytest .mark .parametrize ('tz' , [None , 'UTC' ])
810
855
def test_mean_mixed_datetime_numeric (self , tz ):
811
856
# https://github.com/pandas-dev/pandas/issues/24752
@@ -861,17 +906,7 @@ def test_max(self, float_frame_with_na, int_frame):
861
906
check_dates = True )
862
907
assert_stat_op_calc ('max' , np .max , int_frame )
863
908
864
- def test_mad (self , float_frame_with_na ):
865
- f = lambda x : np .abs (x - x .mean ()).mean ()
866
- assert_stat_op_calc ('mad' , f , float_frame_with_na )
867
-
868
- def test_var_std (self , float_frame_with_na , datetime_frame ):
869
- alt = lambda x : np .var (x , ddof = 1 )
870
- assert_stat_op_calc ('var' , alt , float_frame_with_na )
871
-
872
- alt = lambda x : np .std (x , ddof = 1 )
873
- assert_stat_op_calc ('std' , alt , float_frame_with_na )
874
-
909
+ def test_var_std (self , datetime_frame ):
875
910
result = datetime_frame .std (ddof = 4 )
876
911
expected = datetime_frame .apply (lambda x : x .std (ddof = 4 ))
877
912
tm .assert_almost_equal (result , expected )
@@ -914,10 +949,7 @@ def test_numeric_only_flag(self, meth):
914
949
pytest .raises (TypeError , lambda : getattr (df2 , meth )(
915
950
axis = 1 , numeric_only = False ))
916
951
917
- def test_sem (self , float_frame_with_na , datetime_frame ):
918
- alt = lambda x : np .std (x , ddof = 1 ) / np .sqrt (len (x ))
919
- assert_stat_op_calc ('sem' , alt , float_frame_with_na )
920
-
952
+ def test_sem (self , datetime_frame ):
921
953
result = datetime_frame .sem (ddof = 4 )
922
954
expected = datetime_frame .apply (
923
955
lambda x : x .std (ddof = 4 ) / np .sqrt (len (x )))
@@ -932,27 +964,7 @@ def test_sem(self, float_frame_with_na, datetime_frame):
932
964
assert not (result < 0 ).any ()
933
965
934
966
@td .skip_if_no_scipy
935
- def test_skew (self , float_frame_with_na , float_frame ):
936
- from scipy .stats import skew
937
-
938
- def alt (x ):
939
- if len (x ) < 3 :
940
- return np .nan
941
- return skew (x , bias = False )
942
-
943
- assert_stat_op_calc ('skew' , alt , float_frame_with_na )
944
-
945
- @td .skip_if_no_scipy
946
- def test_kurt (self , float_frame_with_na , float_frame ):
947
- from scipy .stats import kurtosis
948
-
949
- def alt (x ):
950
- if len (x ) < 4 :
951
- return np .nan
952
- return kurtosis (x , bias = False )
953
-
954
- assert_stat_op_calc ('kurt' , alt , float_frame_with_na )
955
-
967
+ def test_kurt (self ):
956
968
index = MultiIndex (levels = [['bar' ], ['one' , 'two' , 'three' ], [0 , 1 ]],
957
969
codes = [[0 , 0 , 0 , 0 , 0 , 0 ],
958
970
[0 , 1 , 2 , 0 , 1 , 2 ],
@@ -1329,11 +1341,7 @@ def test_cummax(self, datetime_frame):
1329
1341
# ---------------------------------------------------------------------
1330
1342
# Miscellanea
1331
1343
1332
- def test_count (self , float_frame_with_na ):
1333
- f = lambda s : notna (s ).sum ()
1334
- assert_stat_op_calc ('count' , f , float_frame_with_na , has_skipna = False ,
1335
- check_dtype = False , check_dates = True )
1336
-
1344
+ def test_count (self ):
1337
1345
# corner case
1338
1346
frame = DataFrame ()
1339
1347
ct1 = frame .count (1 )
0 commit comments