Skip to content

Commit e0b37f9

Browse files
rouzazarijreback
authored andcommitted
CLN: Cleanup tests for .rank()
closes #15640 Author: Rouz Azari <[email protected]> Closes #15658 from rouzazari/GH15640 and squashes the following commits: d0a2abc [Rouz Azari] Fixed linting error with datetime.datetime import 9580af0 [Rouz Azari] CLN: Cleanup tests for .rank()
1 parent 5eac08a commit e0b37f9

File tree

5 files changed

+592
-554
lines changed

5 files changed

+592
-554
lines changed

pandas/tests/frame/test_analytics.py

+1-168
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from __future__ import print_function
44

5-
from datetime import timedelta, datetime
5+
from datetime import timedelta
66
from distutils.version import LooseVersion
77
import sys
88
import pytest
@@ -642,173 +642,6 @@ def test_cumprod(self):
642642
df.cumprod(0)
643643
df.cumprod(1)
644644

645-
def test_rank(self):
646-
tm._skip_if_no_scipy()
647-
from scipy.stats import rankdata
648-
649-
self.frame['A'][::2] = np.nan
650-
self.frame['B'][::3] = np.nan
651-
self.frame['C'][::4] = np.nan
652-
self.frame['D'][::5] = np.nan
653-
654-
ranks0 = self.frame.rank()
655-
ranks1 = self.frame.rank(1)
656-
mask = np.isnan(self.frame.values)
657-
658-
fvals = self.frame.fillna(np.inf).values
659-
660-
exp0 = np.apply_along_axis(rankdata, 0, fvals)
661-
exp0[mask] = np.nan
662-
663-
exp1 = np.apply_along_axis(rankdata, 1, fvals)
664-
exp1[mask] = np.nan
665-
666-
tm.assert_almost_equal(ranks0.values, exp0)
667-
tm.assert_almost_equal(ranks1.values, exp1)
668-
669-
# integers
670-
df = DataFrame(np.random.randint(0, 5, size=40).reshape((10, 4)))
671-
672-
result = df.rank()
673-
exp = df.astype(float).rank()
674-
tm.assert_frame_equal(result, exp)
675-
676-
result = df.rank(1)
677-
exp = df.astype(float).rank(1)
678-
tm.assert_frame_equal(result, exp)
679-
680-
def test_rank2(self):
681-
df = DataFrame([[1, 3, 2], [1, 2, 3]])
682-
expected = DataFrame([[1.0, 3.0, 2.0], [1, 2, 3]]) / 3.0
683-
result = df.rank(1, pct=True)
684-
tm.assert_frame_equal(result, expected)
685-
686-
df = DataFrame([[1, 3, 2], [1, 2, 3]])
687-
expected = df.rank(0) / 2.0
688-
result = df.rank(0, pct=True)
689-
tm.assert_frame_equal(result, expected)
690-
691-
df = DataFrame([['b', 'c', 'a'], ['a', 'c', 'b']])
692-
expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]])
693-
result = df.rank(1, numeric_only=False)
694-
tm.assert_frame_equal(result, expected)
695-
696-
expected = DataFrame([[2.0, 1.5, 1.0], [1, 1.5, 2]])
697-
result = df.rank(0, numeric_only=False)
698-
tm.assert_frame_equal(result, expected)
699-
700-
df = DataFrame([['b', np.nan, 'a'], ['a', 'c', 'b']])
701-
expected = DataFrame([[2.0, nan, 1.0], [1.0, 3.0, 2.0]])
702-
result = df.rank(1, numeric_only=False)
703-
tm.assert_frame_equal(result, expected)
704-
705-
expected = DataFrame([[2.0, nan, 1.0], [1.0, 1.0, 2.0]])
706-
result = df.rank(0, numeric_only=False)
707-
tm.assert_frame_equal(result, expected)
708-
709-
# f7u12, this does not work without extensive workaround
710-
data = [[datetime(2001, 1, 5), nan, datetime(2001, 1, 2)],
711-
[datetime(2000, 1, 2), datetime(2000, 1, 3),
712-
datetime(2000, 1, 1)]]
713-
df = DataFrame(data)
714-
715-
# check the rank
716-
expected = DataFrame([[2., nan, 1.],
717-
[2., 3., 1.]])
718-
result = df.rank(1, numeric_only=False, ascending=True)
719-
tm.assert_frame_equal(result, expected)
720-
721-
expected = DataFrame([[1., nan, 2.],
722-
[2., 1., 3.]])
723-
result = df.rank(1, numeric_only=False, ascending=False)
724-
tm.assert_frame_equal(result, expected)
725-
726-
# mixed-type frames
727-
self.mixed_frame['datetime'] = datetime.now()
728-
self.mixed_frame['timedelta'] = timedelta(days=1, seconds=1)
729-
730-
result = self.mixed_frame.rank(1)
731-
expected = self.mixed_frame.rank(1, numeric_only=True)
732-
tm.assert_frame_equal(result, expected)
733-
734-
df = DataFrame({"a": [1e-20, -5, 1e-20 + 1e-40, 10,
735-
1e60, 1e80, 1e-30]})
736-
exp = DataFrame({"a": [3.5, 1., 3.5, 5., 6., 7., 2.]})
737-
tm.assert_frame_equal(df.rank(), exp)
738-
739-
def test_rank_na_option(self):
740-
tm._skip_if_no_scipy()
741-
from scipy.stats import rankdata
742-
743-
self.frame['A'][::2] = np.nan
744-
self.frame['B'][::3] = np.nan
745-
self.frame['C'][::4] = np.nan
746-
self.frame['D'][::5] = np.nan
747-
748-
# bottom
749-
ranks0 = self.frame.rank(na_option='bottom')
750-
ranks1 = self.frame.rank(1, na_option='bottom')
751-
752-
fvals = self.frame.fillna(np.inf).values
753-
754-
exp0 = np.apply_along_axis(rankdata, 0, fvals)
755-
exp1 = np.apply_along_axis(rankdata, 1, fvals)
756-
757-
tm.assert_almost_equal(ranks0.values, exp0)
758-
tm.assert_almost_equal(ranks1.values, exp1)
759-
760-
# top
761-
ranks0 = self.frame.rank(na_option='top')
762-
ranks1 = self.frame.rank(1, na_option='top')
763-
764-
fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values
765-
fval1 = self.frame.T
766-
fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T
767-
fval1 = fval1.fillna(np.inf).values
768-
769-
exp0 = np.apply_along_axis(rankdata, 0, fval0)
770-
exp1 = np.apply_along_axis(rankdata, 1, fval1)
771-
772-
tm.assert_almost_equal(ranks0.values, exp0)
773-
tm.assert_almost_equal(ranks1.values, exp1)
774-
775-
# descending
776-
777-
# bottom
778-
ranks0 = self.frame.rank(na_option='top', ascending=False)
779-
ranks1 = self.frame.rank(1, na_option='top', ascending=False)
780-
781-
fvals = self.frame.fillna(np.inf).values
782-
783-
exp0 = np.apply_along_axis(rankdata, 0, -fvals)
784-
exp1 = np.apply_along_axis(rankdata, 1, -fvals)
785-
786-
tm.assert_almost_equal(ranks0.values, exp0)
787-
tm.assert_almost_equal(ranks1.values, exp1)
788-
789-
# descending
790-
791-
# top
792-
ranks0 = self.frame.rank(na_option='bottom', ascending=False)
793-
ranks1 = self.frame.rank(1, na_option='bottom', ascending=False)
794-
795-
fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values
796-
fval1 = self.frame.T
797-
fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T
798-
fval1 = fval1.fillna(np.inf).values
799-
800-
exp0 = np.apply_along_axis(rankdata, 0, -fval0)
801-
exp1 = np.apply_along_axis(rankdata, 1, -fval1)
802-
803-
tm.assert_numpy_array_equal(ranks0.values, exp0)
804-
tm.assert_numpy_array_equal(ranks1.values, exp1)
805-
806-
def test_rank_axis(self):
807-
# check if using axes' names gives the same result
808-
df = pd.DataFrame([[2, 1], [4, 3]])
809-
tm.assert_frame_equal(df.rank(axis=0), df.rank(axis='index'))
810-
tm.assert_frame_equal(df.rank(axis=1), df.rank(axis='columns'))
811-
812645
def test_sem(self):
813646
alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x))
814647
self._check_stat_op('sem', alt)

0 commit comments

Comments
 (0)