|
2 | 2 |
|
3 | 3 | from __future__ import print_function
|
4 | 4 |
|
5 |
| -from datetime import timedelta, datetime |
| 5 | +from datetime import timedelta |
6 | 6 | from distutils.version import LooseVersion
|
7 | 7 | import sys
|
8 | 8 | import pytest
|
@@ -642,173 +642,6 @@ def test_cumprod(self):
|
642 | 642 | df.cumprod(0)
|
643 | 643 | df.cumprod(1)
|
644 | 644 |
|
645 |
| - def test_rank(self): |
646 |
| - tm._skip_if_no_scipy() |
647 |
| - from scipy.stats import rankdata |
648 |
| - |
649 |
| - self.frame['A'][::2] = np.nan |
650 |
| - self.frame['B'][::3] = np.nan |
651 |
| - self.frame['C'][::4] = np.nan |
652 |
| - self.frame['D'][::5] = np.nan |
653 |
| - |
654 |
| - ranks0 = self.frame.rank() |
655 |
| - ranks1 = self.frame.rank(1) |
656 |
| - mask = np.isnan(self.frame.values) |
657 |
| - |
658 |
| - fvals = self.frame.fillna(np.inf).values |
659 |
| - |
660 |
| - exp0 = np.apply_along_axis(rankdata, 0, fvals) |
661 |
| - exp0[mask] = np.nan |
662 |
| - |
663 |
| - exp1 = np.apply_along_axis(rankdata, 1, fvals) |
664 |
| - exp1[mask] = np.nan |
665 |
| - |
666 |
| - tm.assert_almost_equal(ranks0.values, exp0) |
667 |
| - tm.assert_almost_equal(ranks1.values, exp1) |
668 |
| - |
669 |
| - # integers |
670 |
| - df = DataFrame(np.random.randint(0, 5, size=40).reshape((10, 4))) |
671 |
| - |
672 |
| - result = df.rank() |
673 |
| - exp = df.astype(float).rank() |
674 |
| - tm.assert_frame_equal(result, exp) |
675 |
| - |
676 |
| - result = df.rank(1) |
677 |
| - exp = df.astype(float).rank(1) |
678 |
| - tm.assert_frame_equal(result, exp) |
679 |
| - |
680 |
| - def test_rank2(self): |
681 |
| - df = DataFrame([[1, 3, 2], [1, 2, 3]]) |
682 |
| - expected = DataFrame([[1.0, 3.0, 2.0], [1, 2, 3]]) / 3.0 |
683 |
| - result = df.rank(1, pct=True) |
684 |
| - tm.assert_frame_equal(result, expected) |
685 |
| - |
686 |
| - df = DataFrame([[1, 3, 2], [1, 2, 3]]) |
687 |
| - expected = df.rank(0) / 2.0 |
688 |
| - result = df.rank(0, pct=True) |
689 |
| - tm.assert_frame_equal(result, expected) |
690 |
| - |
691 |
| - df = DataFrame([['b', 'c', 'a'], ['a', 'c', 'b']]) |
692 |
| - expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]]) |
693 |
| - result = df.rank(1, numeric_only=False) |
694 |
| - tm.assert_frame_equal(result, expected) |
695 |
| - |
696 |
| - expected = DataFrame([[2.0, 1.5, 1.0], [1, 1.5, 2]]) |
697 |
| - result = df.rank(0, numeric_only=False) |
698 |
| - tm.assert_frame_equal(result, expected) |
699 |
| - |
700 |
| - df = DataFrame([['b', np.nan, 'a'], ['a', 'c', 'b']]) |
701 |
| - expected = DataFrame([[2.0, nan, 1.0], [1.0, 3.0, 2.0]]) |
702 |
| - result = df.rank(1, numeric_only=False) |
703 |
| - tm.assert_frame_equal(result, expected) |
704 |
| - |
705 |
| - expected = DataFrame([[2.0, nan, 1.0], [1.0, 1.0, 2.0]]) |
706 |
| - result = df.rank(0, numeric_only=False) |
707 |
| - tm.assert_frame_equal(result, expected) |
708 |
| - |
709 |
| - # f7u12, this does not work without extensive workaround |
710 |
| - data = [[datetime(2001, 1, 5), nan, datetime(2001, 1, 2)], |
711 |
| - [datetime(2000, 1, 2), datetime(2000, 1, 3), |
712 |
| - datetime(2000, 1, 1)]] |
713 |
| - df = DataFrame(data) |
714 |
| - |
715 |
| - # check the rank |
716 |
| - expected = DataFrame([[2., nan, 1.], |
717 |
| - [2., 3., 1.]]) |
718 |
| - result = df.rank(1, numeric_only=False, ascending=True) |
719 |
| - tm.assert_frame_equal(result, expected) |
720 |
| - |
721 |
| - expected = DataFrame([[1., nan, 2.], |
722 |
| - [2., 1., 3.]]) |
723 |
| - result = df.rank(1, numeric_only=False, ascending=False) |
724 |
| - tm.assert_frame_equal(result, expected) |
725 |
| - |
726 |
| - # mixed-type frames |
727 |
| - self.mixed_frame['datetime'] = datetime.now() |
728 |
| - self.mixed_frame['timedelta'] = timedelta(days=1, seconds=1) |
729 |
| - |
730 |
| - result = self.mixed_frame.rank(1) |
731 |
| - expected = self.mixed_frame.rank(1, numeric_only=True) |
732 |
| - tm.assert_frame_equal(result, expected) |
733 |
| - |
734 |
| - df = DataFrame({"a": [1e-20, -5, 1e-20 + 1e-40, 10, |
735 |
| - 1e60, 1e80, 1e-30]}) |
736 |
| - exp = DataFrame({"a": [3.5, 1., 3.5, 5., 6., 7., 2.]}) |
737 |
| - tm.assert_frame_equal(df.rank(), exp) |
738 |
| - |
739 |
| - def test_rank_na_option(self): |
740 |
| - tm._skip_if_no_scipy() |
741 |
| - from scipy.stats import rankdata |
742 |
| - |
743 |
| - self.frame['A'][::2] = np.nan |
744 |
| - self.frame['B'][::3] = np.nan |
745 |
| - self.frame['C'][::4] = np.nan |
746 |
| - self.frame['D'][::5] = np.nan |
747 |
| - |
748 |
| - # bottom |
749 |
| - ranks0 = self.frame.rank(na_option='bottom') |
750 |
| - ranks1 = self.frame.rank(1, na_option='bottom') |
751 |
| - |
752 |
| - fvals = self.frame.fillna(np.inf).values |
753 |
| - |
754 |
| - exp0 = np.apply_along_axis(rankdata, 0, fvals) |
755 |
| - exp1 = np.apply_along_axis(rankdata, 1, fvals) |
756 |
| - |
757 |
| - tm.assert_almost_equal(ranks0.values, exp0) |
758 |
| - tm.assert_almost_equal(ranks1.values, exp1) |
759 |
| - |
760 |
| - # top |
761 |
| - ranks0 = self.frame.rank(na_option='top') |
762 |
| - ranks1 = self.frame.rank(1, na_option='top') |
763 |
| - |
764 |
| - fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values |
765 |
| - fval1 = self.frame.T |
766 |
| - fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T |
767 |
| - fval1 = fval1.fillna(np.inf).values |
768 |
| - |
769 |
| - exp0 = np.apply_along_axis(rankdata, 0, fval0) |
770 |
| - exp1 = np.apply_along_axis(rankdata, 1, fval1) |
771 |
| - |
772 |
| - tm.assert_almost_equal(ranks0.values, exp0) |
773 |
| - tm.assert_almost_equal(ranks1.values, exp1) |
774 |
| - |
775 |
| - # descending |
776 |
| - |
777 |
| - # bottom |
778 |
| - ranks0 = self.frame.rank(na_option='top', ascending=False) |
779 |
| - ranks1 = self.frame.rank(1, na_option='top', ascending=False) |
780 |
| - |
781 |
| - fvals = self.frame.fillna(np.inf).values |
782 |
| - |
783 |
| - exp0 = np.apply_along_axis(rankdata, 0, -fvals) |
784 |
| - exp1 = np.apply_along_axis(rankdata, 1, -fvals) |
785 |
| - |
786 |
| - tm.assert_almost_equal(ranks0.values, exp0) |
787 |
| - tm.assert_almost_equal(ranks1.values, exp1) |
788 |
| - |
789 |
| - # descending |
790 |
| - |
791 |
| - # top |
792 |
| - ranks0 = self.frame.rank(na_option='bottom', ascending=False) |
793 |
| - ranks1 = self.frame.rank(1, na_option='bottom', ascending=False) |
794 |
| - |
795 |
| - fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values |
796 |
| - fval1 = self.frame.T |
797 |
| - fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T |
798 |
| - fval1 = fval1.fillna(np.inf).values |
799 |
| - |
800 |
| - exp0 = np.apply_along_axis(rankdata, 0, -fval0) |
801 |
| - exp1 = np.apply_along_axis(rankdata, 1, -fval1) |
802 |
| - |
803 |
| - tm.assert_numpy_array_equal(ranks0.values, exp0) |
804 |
| - tm.assert_numpy_array_equal(ranks1.values, exp1) |
805 |
| - |
806 |
| - def test_rank_axis(self): |
807 |
| - # check if using axes' names gives the same result |
808 |
| - df = pd.DataFrame([[2, 1], [4, 3]]) |
809 |
| - tm.assert_frame_equal(df.rank(axis=0), df.rank(axis='index')) |
810 |
| - tm.assert_frame_equal(df.rank(axis=1), df.rank(axis='columns')) |
811 |
| - |
812 | 645 | def test_sem(self):
|
813 | 646 | alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x))
|
814 | 647 | self._check_stat_op('sem', alt)
|
|
0 commit comments