@@ -642,173 +642,6 @@ def test_cumprod(self):
642
642
df .cumprod (0 )
643
643
df .cumprod (1 )
644
644
645
- def test_rank (self ):
646
- tm ._skip_if_no_scipy ()
647
- from scipy .stats import rankdata
648
-
649
- self .frame ['A' ][::2 ] = np .nan
650
- self .frame ['B' ][::3 ] = np .nan
651
- self .frame ['C' ][::4 ] = np .nan
652
- self .frame ['D' ][::5 ] = np .nan
653
-
654
- ranks0 = self .frame .rank ()
655
- ranks1 = self .frame .rank (1 )
656
- mask = np .isnan (self .frame .values )
657
-
658
- fvals = self .frame .fillna (np .inf ).values
659
-
660
- exp0 = np .apply_along_axis (rankdata , 0 , fvals )
661
- exp0 [mask ] = np .nan
662
-
663
- exp1 = np .apply_along_axis (rankdata , 1 , fvals )
664
- exp1 [mask ] = np .nan
665
-
666
- tm .assert_almost_equal (ranks0 .values , exp0 )
667
- tm .assert_almost_equal (ranks1 .values , exp1 )
668
-
669
- # integers
670
- df = DataFrame (np .random .randint (0 , 5 , size = 40 ).reshape ((10 , 4 )))
671
-
672
- result = df .rank ()
673
- exp = df .astype (float ).rank ()
674
- tm .assert_frame_equal (result , exp )
675
-
676
- result = df .rank (1 )
677
- exp = df .astype (float ).rank (1 )
678
- tm .assert_frame_equal (result , exp )
679
-
680
- def test_rank2 (self ):
681
- df = DataFrame ([[1 , 3 , 2 ], [1 , 2 , 3 ]])
682
- expected = DataFrame ([[1.0 , 3.0 , 2.0 ], [1 , 2 , 3 ]]) / 3.0
683
- result = df .rank (1 , pct = True )
684
- tm .assert_frame_equal (result , expected )
685
-
686
- df = DataFrame ([[1 , 3 , 2 ], [1 , 2 , 3 ]])
687
- expected = df .rank (0 ) / 2.0
688
- result = df .rank (0 , pct = True )
689
- tm .assert_frame_equal (result , expected )
690
-
691
- df = DataFrame ([['b' , 'c' , 'a' ], ['a' , 'c' , 'b' ]])
692
- expected = DataFrame ([[2.0 , 3.0 , 1.0 ], [1 , 3 , 2 ]])
693
- result = df .rank (1 , numeric_only = False )
694
- tm .assert_frame_equal (result , expected )
695
-
696
- expected = DataFrame ([[2.0 , 1.5 , 1.0 ], [1 , 1.5 , 2 ]])
697
- result = df .rank (0 , numeric_only = False )
698
- tm .assert_frame_equal (result , expected )
699
-
700
- df = DataFrame ([['b' , np .nan , 'a' ], ['a' , 'c' , 'b' ]])
701
- expected = DataFrame ([[2.0 , nan , 1.0 ], [1.0 , 3.0 , 2.0 ]])
702
- result = df .rank (1 , numeric_only = False )
703
- tm .assert_frame_equal (result , expected )
704
-
705
- expected = DataFrame ([[2.0 , nan , 1.0 ], [1.0 , 1.0 , 2.0 ]])
706
- result = df .rank (0 , numeric_only = False )
707
- tm .assert_frame_equal (result , expected )
708
-
709
- # f7u12, this does not work without extensive workaround
710
- data = [[datetime (2001 , 1 , 5 ), nan , datetime (2001 , 1 , 2 )],
711
- [datetime (2000 , 1 , 2 ), datetime (2000 , 1 , 3 ),
712
- datetime (2000 , 1 , 1 )]]
713
- df = DataFrame (data )
714
-
715
- # check the rank
716
- expected = DataFrame ([[2. , nan , 1. ],
717
- [2. , 3. , 1. ]])
718
- result = df .rank (1 , numeric_only = False , ascending = True )
719
- tm .assert_frame_equal (result , expected )
720
-
721
- expected = DataFrame ([[1. , nan , 2. ],
722
- [2. , 1. , 3. ]])
723
- result = df .rank (1 , numeric_only = False , ascending = False )
724
- tm .assert_frame_equal (result , expected )
725
-
726
- # mixed-type frames
727
- self .mixed_frame ['datetime' ] = datetime .now ()
728
- self .mixed_frame ['timedelta' ] = timedelta (days = 1 , seconds = 1 )
729
-
730
- result = self .mixed_frame .rank (1 )
731
- expected = self .mixed_frame .rank (1 , numeric_only = True )
732
- tm .assert_frame_equal (result , expected )
733
-
734
- df = DataFrame ({"a" : [1e-20 , - 5 , 1e-20 + 1e-40 , 10 ,
735
- 1e60 , 1e80 , 1e-30 ]})
736
- exp = DataFrame ({"a" : [3.5 , 1. , 3.5 , 5. , 6. , 7. , 2. ]})
737
- tm .assert_frame_equal (df .rank (), exp )
738
-
739
- def test_rank_na_option (self ):
740
- tm ._skip_if_no_scipy ()
741
- from scipy .stats import rankdata
742
-
743
- self .frame ['A' ][::2 ] = np .nan
744
- self .frame ['B' ][::3 ] = np .nan
745
- self .frame ['C' ][::4 ] = np .nan
746
- self .frame ['D' ][::5 ] = np .nan
747
-
748
- # bottom
749
- ranks0 = self .frame .rank (na_option = 'bottom' )
750
- ranks1 = self .frame .rank (1 , na_option = 'bottom' )
751
-
752
- fvals = self .frame .fillna (np .inf ).values
753
-
754
- exp0 = np .apply_along_axis (rankdata , 0 , fvals )
755
- exp1 = np .apply_along_axis (rankdata , 1 , fvals )
756
-
757
- tm .assert_almost_equal (ranks0 .values , exp0 )
758
- tm .assert_almost_equal (ranks1 .values , exp1 )
759
-
760
- # top
761
- ranks0 = self .frame .rank (na_option = 'top' )
762
- ranks1 = self .frame .rank (1 , na_option = 'top' )
763
-
764
- fval0 = self .frame .fillna ((self .frame .min () - 1 ).to_dict ()).values
765
- fval1 = self .frame .T
766
- fval1 = fval1 .fillna ((fval1 .min () - 1 ).to_dict ()).T
767
- fval1 = fval1 .fillna (np .inf ).values
768
-
769
- exp0 = np .apply_along_axis (rankdata , 0 , fval0 )
770
- exp1 = np .apply_along_axis (rankdata , 1 , fval1 )
771
-
772
- tm .assert_almost_equal (ranks0 .values , exp0 )
773
- tm .assert_almost_equal (ranks1 .values , exp1 )
774
-
775
- # descending
776
-
777
- # bottom
778
- ranks0 = self .frame .rank (na_option = 'top' , ascending = False )
779
- ranks1 = self .frame .rank (1 , na_option = 'top' , ascending = False )
780
-
781
- fvals = self .frame .fillna (np .inf ).values
782
-
783
- exp0 = np .apply_along_axis (rankdata , 0 , - fvals )
784
- exp1 = np .apply_along_axis (rankdata , 1 , - fvals )
785
-
786
- tm .assert_almost_equal (ranks0 .values , exp0 )
787
- tm .assert_almost_equal (ranks1 .values , exp1 )
788
-
789
- # descending
790
-
791
- # top
792
- ranks0 = self .frame .rank (na_option = 'bottom' , ascending = False )
793
- ranks1 = self .frame .rank (1 , na_option = 'bottom' , ascending = False )
794
-
795
- fval0 = self .frame .fillna ((self .frame .min () - 1 ).to_dict ()).values
796
- fval1 = self .frame .T
797
- fval1 = fval1 .fillna ((fval1 .min () - 1 ).to_dict ()).T
798
- fval1 = fval1 .fillna (np .inf ).values
799
-
800
- exp0 = np .apply_along_axis (rankdata , 0 , - fval0 )
801
- exp1 = np .apply_along_axis (rankdata , 1 , - fval1 )
802
-
803
- tm .assert_numpy_array_equal (ranks0 .values , exp0 )
804
- tm .assert_numpy_array_equal (ranks1 .values , exp1 )
805
-
806
- def test_rank_axis (self ):
807
- # check if using axes' names gives the same result
808
- df = pd .DataFrame ([[2 , 1 ], [4 , 3 ]])
809
- tm .assert_frame_equal (df .rank (axis = 0 ), df .rank (axis = 'index' ))
810
- tm .assert_frame_equal (df .rank (axis = 1 ), df .rank (axis = 'columns' ))
811
-
812
645
def test_sem (self ):
813
646
alt = lambda x : np .std (x , ddof = 1 ) / np .sqrt (len (x ))
814
647
self ._check_stat_op ('sem' , alt )
0 commit comments