Skip to content

Commit a4e90e0

Browse files
committed
BUG: use infinity sentinel in #742, but having some datetime woes
1 parent 70c7509 commit a4e90e0

File tree

3 files changed

+44
-6
lines changed

3 files changed

+44
-6
lines changed

pandas/core/frame.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -3591,7 +3591,7 @@ def clip_lower(self, threshold):
35913591
"""
35923592
return self.apply(lambda x: x.clip_lower(threshold))
35933593

3594-
def rank(self, axis=0, numeric_only=True):
3594+
def rank(self, axis=0, numeric_only=None):
35953595
"""
35963596
Compute numerical data ranks (1 through n) along axis. Equal values are
35973597
assigned a rank that is the average of the ranks of those values
@@ -3607,6 +3607,16 @@ def rank(self, axis=0, numeric_only=True):
36073607
-------
36083608
ranks : DataFrame
36093609
"""
3610+
if numeric_only is None:
3611+
try:
3612+
values = self.values
3613+
if issubclass(values.dtype.type, np.floating):
3614+
ranks = lib.rank_2d_float64(values, axis=axis)
3615+
else:
3616+
ranks = lib.rank_2d_generic(values, axis=axis)
3617+
except TypeError:
3618+
numeric_only = True
3619+
36103620
if numeric_only:
36113621
data = self._get_numeric_data()
36123622
ranks = lib.rank_2d_float64(data.values.astype('f8'), axis=axis)

pandas/src/stats.pyx

+22-5
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,11 @@ def rank_1d_generic(object in_arr):
106106

107107
values = np.asarray(in_arr).copy()
108108

109-
nan_value = np.inf
109+
if values.dtype != np.object_:
110+
values = values.astype('O')
110111

111-
mask = isnullobj(values.astype('O'))
112+
nan_value = Infinity() # always greater than everything
113+
mask = isnullobj(values)
112114
np.putmask(values, mask, nan_value)
113115

114116
n = len(values)
@@ -123,7 +125,7 @@ def rank_1d_generic(object in_arr):
123125
sum_ranks += i + 1
124126
dups += 1
125127
val = util.get_value_at(sorted_data, i)
126-
if val == nan_value:
128+
if val is nan_value:
127129
ranks[argsorted[i]] = nan
128130
continue
129131
if (i == n - 1 or
@@ -133,6 +135,18 @@ def rank_1d_generic(object in_arr):
133135
sum_ranks = dups = 0
134136
return ranks
135137

138+
class Infinity(object):
139+
140+
return_false = lambda self, other: False
141+
return_true = lambda self, other: True
142+
__lt__ = return_false
143+
__le__ = return_false
144+
__eq__ = return_false
145+
__ne__ = return_true
146+
__gt__ = return_true
147+
__ge__ = return_true
148+
__cmp__ = return_false
149+
136150
def rank_2d_generic(object in_arr, axis=0):
137151
"""
138152
Fast NaN-friendly version of scipy.stats.rankdata
@@ -154,7 +168,10 @@ def rank_2d_generic(object in_arr, axis=0):
154168
else:
155169
values = in_arr.copy()
156170

157-
nan_value = -np.inf # subtlety, infs are ranked before alphanumeric!
171+
if values.dtype != np.object_:
172+
values = values.astype('O')
173+
174+
nan_value = Infinity() # always greater than everything
158175
mask = isnullobj2d(values)
159176
np.putmask(values, mask, nan_value)
160177

@@ -167,7 +184,7 @@ def rank_2d_generic(object in_arr, axis=0):
167184
dups = sum_ranks = infs = 0
168185
for j in range(k):
169186
val = values[i, j]
170-
if val == nan_value:
187+
if val is nan_value:
171188
ranks[i, argsorted[i, j]] = nan
172189
infs += 1
173190
continue

pandas/tests/test_frame.py

+11
Original file line numberDiff line numberDiff line change
@@ -4166,6 +4166,8 @@ def test_rank(self):
41664166
assert_almost_equal(ranks1.values, exp1)
41674167

41684168
def test_rank2(self):
4169+
from datetime import datetime
4170+
41694171
df = DataFrame([['b','c','a'],['a','c','b']])
41704172
expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]])
41714173
result = df.rank(1, numeric_only=False)
@@ -4184,6 +4186,15 @@ def test_rank2(self):
41844186
result = df.rank(0, numeric_only=False)
41854187
assert_frame_equal(result, expected)
41864188

4189+
data = [[datetime(2001, 1, 5), nan, datetime(2001, 1, 2)],
4190+
[datetime(2000, 1, 2), datetime(2000, 1, 3),
4191+
datetime(2000, 1, 1)]]
4192+
df = DataFrame(data)
4193+
expected = DataFrame([[2., nan, 1.],
4194+
[2., 3., 1.]])
4195+
result = df.rank(1, numeric_only=False)
4196+
assert_frame_equal(result, expected)
4197+
41874198
def test_describe(self):
41884199
desc = self.tsframe.describe()
41894200
desc = self.mixed_frame.describe()

0 commit comments

Comments
 (0)