Skip to content

Commit bc2790b

Browse files
committed
ENH: tweak na_option checking per #2159
1 parent 72f0758 commit bc2790b

File tree

2 files changed

+21
-4
lines changed

2 files changed

+21
-4
lines changed

RELEASE.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ pandas 0.9.1
3030
**New features**
3131

3232
- Can specify multiple sort orders in DataFrame/Series.sort/sort_index (#928)
33+
- New `top` and `bottom` options for handling NAs in rank (#1508, #2159)
3334

3435
**Improvements to existing features**
3536

@@ -50,6 +51,7 @@ pandas 0.9.1
5051
- Fix Series.str failures when using pipe pattern '|' (#2119)
5152
- Fix pretty-printing of dict entries in Series, DataFrame (#2144)
5253
- Cast other datetime64 values to nanoseconds in DataFrame ctor (#2095)
54+
- Alias Timestamp.astimezone to tz_convert, so will yield Timestamp (#2060)
5355
5456
pandas 0.9.0
5557
============

pandas/src/stats.pyx

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,13 @@ def rank_1d_float64(object in_arr, ties_method='average', ascending=True,
8383
float64_t val, nan_value
8484
float64_t sum_ranks = 0
8585
int tiebreak = 0
86+
bint keep_na = 0
8687
tiebreak = tiebreakers[ties_method]
8788

8889
values = np.asarray(in_arr).copy()
8990

91+
keep_na = na_option == 'keep'
92+
9093
if ascending ^ (na_option == 'top'):
9194
nan_value = np.inf
9295
else:
@@ -116,7 +119,7 @@ def rank_1d_float64(object in_arr, ties_method='average', ascending=True,
116119
sum_ranks += i + 1
117120
dups += 1
118121
val = sorted_data[i]
119-
if (val == nan_value) and (na_option == 'keep'):
122+
if (val == nan_value) and keep_na:
120123
ranks[argsorted[i]] = nan
121124
continue
122125
if i == n - 1 or fabs(sorted_data[i + 1] - val) > FP_ERR:
@@ -212,8 +215,12 @@ def rank_2d_float64(object in_arr, axis=0, ties_method='average',
212215
float64_t val, nan_value
213216
float64_t sum_ranks = 0
214217
int tiebreak = 0
218+
bint keep_na = 0
219+
215220
tiebreak = tiebreakers[ties_method]
216221

222+
keep_na = na_option == 'keep'
223+
217224
in_arr = np.asarray(in_arr)
218225

219226
if axis == 0:
@@ -251,7 +258,7 @@ def rank_2d_float64(object in_arr, axis=0, ties_method='average',
251258
sum_ranks += j + 1
252259
dups += 1
253260
val = values[i, j]
254-
if val == nan_value and na_option == 'keep':
261+
if val == nan_value and keep_na:
255262
ranks[i, argsorted[i, j]] = nan
256263
continue
257264
if j == k - 1 or fabs(values[i, j + 1] - val) > FP_ERR:
@@ -360,8 +367,12 @@ def rank_1d_generic(object in_arr, bint retry=1, ties_method='average',
360367
object val, nan_value
361368
float64_t sum_ranks = 0
362369
int tiebreak = 0
370+
bint keep_na = 0
371+
363372
tiebreak = tiebreakers[ties_method]
364373

374+
keep_na = na_option == 'keep'
375+
365376
values = np.array(in_arr, copy=True)
366377

367378
if values.dtype != np.object_:
@@ -403,7 +414,7 @@ def rank_1d_generic(object in_arr, bint retry=1, ties_method='average',
403414
sum_ranks += i + 1
404415
dups += 1
405416
val = util.get_value_at(sorted_data, i)
406-
if val is nan_value and na_option=='keep':
417+
if val is nan_value and keep_na:
407418
ranks[argsorted[i]] = nan
408419
continue
409420
if (i == n - 1 or
@@ -465,8 +476,12 @@ def rank_2d_generic(object in_arr, axis=0, ties_method='average',
465476
object val, nan_value
466477
float64_t sum_ranks = 0
467478
int tiebreak = 0
479+
bint keep_na = 0
480+
468481
tiebreak = tiebreakers[ties_method]
469482

483+
keep_na = na_option == 'keep'
484+
470485
in_arr = np.asarray(in_arr)
471486

472487
if axis == 0:
@@ -512,7 +527,7 @@ def rank_2d_generic(object in_arr, axis=0, ties_method='average',
512527
dups = sum_ranks = infs = 0
513528
for j in range(k):
514529
val = values[i, j]
515-
if val is nan_value and na_option == 'keep':
530+
if val is nan_value and keep_na:
516531
ranks[i, argsorted[i, j]] = nan
517532
infs += 1
518533
continue

0 commit comments

Comments
 (0)