BUG: use infinity sentinel in #742, but having some datetime woes

wesm · wesm · commit a4e90e0ea0dc · 2012-02-05T17:54:15.000-05:00
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3591,7 +3591,7 @@ def clip_lower(self, threshold):
         """
         return self.apply(lambda x: x.clip_lower(threshold))
 
-    def rank(self, axis=0, numeric_only=True):
+    def rank(self, axis=0, numeric_only=None):
         """
         Compute numerical data ranks (1 through n) along axis. Equal values are
         assigned a rank that is the average of the ranks of those values
@@ -3607,6 +3607,16 @@ def rank(self, axis=0, numeric_only=True):
         -------
         ranks : DataFrame
         """
+        if numeric_only is None:
+            try:
+                values = self.values
+                if issubclass(values.dtype.type, np.floating):
+                    ranks = lib.rank_2d_float64(values, axis=axis)
+                else:
+                    ranks = lib.rank_2d_generic(values, axis=axis)
+            except TypeError:
+                numeric_only = True
+
         if numeric_only:
             data = self._get_numeric_data()
             ranks = lib.rank_2d_float64(data.values.astype('f8'), axis=axis)
diff --git a/pandas/src/stats.pyx b/pandas/src/stats.pyx
@@ -106,9 +106,11 @@ def rank_1d_generic(object in_arr):
 
     values = np.asarray(in_arr).copy()
 
-    nan_value = np.inf
+    if values.dtype != np.object_:
+        values = values.astype('O')
 
-    mask = isnullobj(values.astype('O'))
+    nan_value = Infinity() # always greater than everything
+    mask = isnullobj(values)
     np.putmask(values, mask, nan_value)
 
     n = len(values)
@@ -123,7 +125,7 @@ def rank_1d_generic(object in_arr):
         sum_ranks += i + 1
         dups += 1
         val = util.get_value_at(sorted_data, i)
-        if val == nan_value:
+        if val is nan_value:
             ranks[argsorted[i]] = nan
             continue
         if (i == n - 1 or
@@ -133,6 +135,18 @@ def rank_1d_generic(object in_arr):
             sum_ranks = dups = 0
     return ranks
 
+class Infinity(object):
+
+    return_false = lambda self, other: False
+    return_true = lambda self, other: True
+    __lt__ = return_false
+    __le__ = return_false
+    __eq__ = return_false
+    __ne__ = return_true
+    __gt__ = return_true
+    __ge__ = return_true
+    __cmp__ = return_false
+
 def rank_2d_generic(object in_arr, axis=0):
     """
     Fast NaN-friendly version of scipy.stats.rankdata
@@ -154,7 +168,10 @@ def rank_2d_generic(object in_arr, axis=0):
     else:
         values = in_arr.copy()
 
-    nan_value = -np.inf # subtlety, infs are ranked before alphanumeric!
+    if values.dtype != np.object_:
+        values = values.astype('O')
+
+    nan_value = Infinity() # always greater than everything
     mask = isnullobj2d(values)
     np.putmask(values, mask, nan_value)
 
@@ -167,7 +184,7 @@ def rank_2d_generic(object in_arr, axis=0):
         dups = sum_ranks = infs = 0
         for j in range(k):
             val = values[i, j]
-            if val == nan_value:
+            if val is nan_value:
                 ranks[i, argsorted[i, j]] = nan
                 infs += 1
                 continue
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -4166,6 +4166,8 @@ def test_rank(self):
         assert_almost_equal(ranks1.values, exp1)
 
     def test_rank2(self):
+        from datetime import datetime
+
         df = DataFrame([['b','c','a'],['a','c','b']])
         expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]])
         result = df.rank(1, numeric_only=False)
@@ -4184,6 +4186,15 @@ def test_rank2(self):
         result = df.rank(0, numeric_only=False)
         assert_frame_equal(result, expected)
 
+        data = [[datetime(2001, 1, 5), nan, datetime(2001, 1, 2)],
+                [datetime(2000, 1, 2), datetime(2000, 1, 3),
+                 datetime(2000, 1, 1)]]
+        df = DataFrame(data)
+        expected = DataFrame([[2., nan, 1.],
+                              [2., 3., 1.]])
+        result = df.rank(1, numeric_only=False)
+        assert_frame_equal(result, expected)
+
     def test_describe(self):
         desc = self.tsframe.describe()
         desc = self.mixed_frame.describe()