Skip to content

Commit ca6e471

Browse files
committed
rank functions now handle int64 and missing values
1 parent e1385d8 commit ca6e471

File tree

2 files changed

+43
-5
lines changed

2 files changed

+43
-5
lines changed

pandas/algos.pyx

+37-4
Original file line numberDiff line numberDiff line change
@@ -226,14 +226,27 @@ def rank_1d_int64(object in_arr, ties_method='average', ascending=True,
226226
ndarray[int64_t] sorted_data, values
227227
ndarray[float64_t] ranks
228228
ndarray[int64_t] argsorted
229-
int64_t val
229+
int64_t val, nan_value
230230
float64_t sum_ranks = 0
231+
bint keep_na
231232
int tiebreak = 0
232233
float count = 0.0
233234
tiebreak = tiebreakers[ties_method]
234235

236+
keep_na = na_option == 'keep'
237+
235238
values = np.asarray(in_arr)
236239

240+
if ascending ^ (na_option == 'top'):
241+
nan_value = np.iinfo('int64').max
242+
else:
243+
nan_value = np.iinfo('int64').min
244+
245+
# unlike floats, which have np.inf, -np.inf, and np.nan
246+
# ints do not
247+
mask = values == iNaT
248+
np.putmask(values, mask, nan_value)
249+
237250
n = len(values)
238251
ranks = np.empty(n, dtype='f8')
239252

@@ -256,6 +269,9 @@ def rank_1d_int64(object in_arr, ties_method='average', ascending=True,
256269
sum_ranks += i + 1
257270
dups += 1
258271
val = sorted_data[i]
272+
if (val == nan_value) and keep_na:
273+
ranks[argsorted[i]] = nan
274+
continue
259275
count += 1.0
260276
if i == n - 1 or fabs(sorted_data[i + 1] - val) > 0:
261277
if tiebreak == TIEBREAK_AVERAGE:
@@ -387,16 +403,30 @@ def rank_2d_int64(object in_arr, axis=0, ties_method='average',
387403
ndarray[float64_t, ndim=2] ranks
388404
ndarray[int64_t, ndim=2] argsorted
389405
ndarray[int64_t, ndim=2, cast=True] values
390-
int64_t val
406+
int64_t val, nan_value
391407
float64_t sum_ranks = 0
408+
bint keep_na = 0
392409
int tiebreak = 0
393410
float count = 0.0
394411
tiebreak = tiebreakers[ties_method]
395412

413+
keep_na = na_option == 'keep'
414+
415+
in_arr = np.asarray(in_arr)
416+
396417
if axis == 0:
397-
values = np.asarray(in_arr).T
418+
values = in_arr.T.copy()
419+
else:
420+
values = in_arr.copy()
421+
422+
if ascending ^ (na_option == 'top'):
423+
nan_value = np.iinfo('int64').max
398424
else:
399-
values = np.asarray(in_arr)
425+
nan_value = np.iinfo('int64').min
426+
427+
# unlike floats, which have np.inf, -np.inf, and np.nan
428+
# ints do not
429+
np.putmask(values, values == iNaT, nan_value)
400430

401431
n, k = (<object> values).shape
402432
ranks = np.empty((n, k), dtype='f8')
@@ -423,6 +453,9 @@ def rank_2d_int64(object in_arr, axis=0, ties_method='average',
423453
sum_ranks += j + 1
424454
dups += 1
425455
val = values[i, j]
456+
if val == nan_value and keep_na:
457+
ranks[i, argsorted[i, j]] = nan
458+
continue
426459
count += 1.0
427460
if j == k - 1 or fabs(values[i, j + 1] - val) > FP_ERR:
428461
if tiebreak == TIEBREAK_AVERAGE:

pandas/tests/frame/test_analytics.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -787,7 +787,12 @@ def test_rank2(self):
787787
# check the rank
788788
expected = DataFrame([[2., nan, 1.],
789789
[2., 3., 1.]])
790-
result = df.rank(1, numeric_only=False)
790+
result = df.rank(1, numeric_only=False, ascending=True)
791+
assert_frame_equal(result, expected)
792+
793+
expected = DataFrame([[1., nan, 2.],
794+
[2., 1., 3.]])
795+
result = df.rank(1, numeric_only=False, ascending=False)
791796
assert_frame_equal(result, expected)
792797

793798
# mixed-type frames

0 commit comments

Comments
 (0)