Skip to content

Commit 0990a89

Browse files
committed
ERR: Consistent errors for non-numeric ranking. (pandas-dev#19560)
1 parent 2d8e03b commit 0990a89

File tree

3 files changed

+63
-56
lines changed

3 files changed

+63
-56
lines changed

pandas/core/algorithms.py

+11
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,17 @@ def rank(values, axis=0, method='average', na_option='keep',
859859
Whether or not to the display the returned rankings in integer form
860860
(e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
861861
"""
862+
if is_object_dtype(values):
863+
def raise_non_numeric_error():
864+
raise ValueError("pandas.core.algorithms.rank "
865+
"not supported for unordered "
866+
"non-numeric data")
867+
if is_categorical_dtype(values):
868+
if not values.ordered:
869+
raise_non_numeric_error()
870+
else:
871+
raise_non_numeric_error()
872+
862873
if values.ndim == 1:
863874
f, values = _get_data_algo(values, _rank1d_functions)
864875
ranks = f(values, ties_method=method, ascending=ascending,

pandas/tests/frame/test_rank.py

+21-27
Original file line numberDiff line numberDiff line change
@@ -71,23 +71,22 @@ def test_rank2(self):
7171
result = df.rank(0, pct=True)
7272
tm.assert_frame_equal(result, expected)
7373

74-
df = DataFrame([['b', 'c', 'a'], ['a', 'c', 'b']])
75-
expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]])
76-
result = df.rank(1, numeric_only=False)
77-
tm.assert_frame_equal(result, expected)
74+
# See #19560
75+
error_msg = ("pandas.core.algorithms.rank "
76+
"not supported for unordered "
77+
"non-numeric data")
7878

79-
expected = DataFrame([[2.0, 1.5, 1.0], [1, 1.5, 2]])
80-
result = df.rank(0, numeric_only=False)
81-
tm.assert_frame_equal(result, expected)
79+
df = DataFrame([['b', 'c', 'a'], ['a', 'c', 'b']])
80+
with tm.assert_raises_regex(ValueError, error_msg):
81+
df.rank(1, numeric_only=False)
82+
with tm.assert_raises_regex(ValueError, error_msg):
83+
df.rank(0, numeric_only=False)
8284

8385
df = DataFrame([['b', np.nan, 'a'], ['a', 'c', 'b']])
84-
expected = DataFrame([[2.0, nan, 1.0], [1.0, 3.0, 2.0]])
85-
result = df.rank(1, numeric_only=False)
86-
tm.assert_frame_equal(result, expected)
87-
88-
expected = DataFrame([[2.0, nan, 1.0], [1.0, 1.0, 2.0]])
89-
result = df.rank(0, numeric_only=False)
90-
tm.assert_frame_equal(result, expected)
86+
with tm.assert_raises_regex(ValueError, error_msg):
87+
df.rank(1, numeric_only=False)
88+
with tm.assert_raises_regex(ValueError, error_msg):
89+
df.rank(0, numeric_only=False)
9190

9291
# f7u12, this does not work without extensive workaround
9392
data = [[datetime(2001, 1, 5), nan, datetime(2001, 1, 2)],
@@ -110,9 +109,9 @@ def test_rank2(self):
110109
self.mixed_frame['datetime'] = datetime.now()
111110
self.mixed_frame['timedelta'] = timedelta(days=1, seconds=1)
112111

113-
result = self.mixed_frame.rank(1)
114-
expected = self.mixed_frame.rank(1, numeric_only=True)
115-
tm.assert_frame_equal(result, expected)
112+
# mixed_frame["foo"] is of string-type
113+
with tm.assert_raises_regex(ValueError, error_msg):
114+
self.mixed_frame.rank(1)
116115

117116
df = DataFrame({"a": [1e-20, -5, 1e-20 + 1e-40, 10,
118117
1e60, 1e80, 1e-30]})
@@ -218,7 +217,7 @@ def test_rank_methods_frame(self):
218217
tm.assert_frame_equal(result, expected)
219218

220219
def test_rank_descending(self):
221-
dtypes = ['O', 'f8', 'i8']
220+
dtypes = ['f8', 'i8']
222221

223222
for dtype, method in product(dtypes, self.results):
224223
if 'i' in dtype:
@@ -230,15 +229,11 @@ def test_rank_descending(self):
230229
expected = (df.max() - df).rank()
231230
assert_frame_equal(res, expected)
232231

233-
if method == 'first' and dtype == 'O':
234-
continue
235-
236232
expected = (df.max() - df).rank(method=method)
237233

238-
if dtype != 'O':
239-
res2 = df.rank(method=method, ascending=False,
240-
numeric_only=True)
241-
assert_frame_equal(res2, expected)
234+
res2 = df.rank(method=method, ascending=False,
235+
numeric_only=True)
236+
assert_frame_equal(res2, expected)
242237

243238
res3 = df.rank(method=method, ascending=False,
244239
numeric_only=False)
@@ -258,11 +253,10 @@ def _check2d(df, expected, method='average', axis=0):
258253
assert_frame_equal(result, exp_df)
259254

260255
dtypes = [None, object]
261-
disabled = set([(object, 'first')])
262256
results = self.results
263257

264258
for method, axis, dtype in product(results, [0, 1], dtypes):
265-
if (dtype, method) in disabled:
259+
if dtype == object:
266260
continue
267261
frame = df if dtype is None else df.astype(dtype)
268262
_check2d(frame, results[method], method=method, axis=axis)

pandas/tests/series/test_rank.py

+31-29
Original file line numberDiff line numberDiff line change
@@ -134,22 +134,27 @@ def test_rank_categorical(self):
134134
assert_series_equal(ordered.rank(), exp)
135135
assert_series_equal(ordered.rank(ascending=False), exp_desc)
136136

137-
# Unordered categoricals should be ranked as objects
137+
# See #19560
138+
error_msg = ("pandas.core.algorithms.rank "
139+
"not supported for unordered "
140+
"non-numeric data")
141+
142+
# Ranking unordered categorials depreciated per #19560
138143
unordered = Series(['first', 'second', 'third', 'fourth',
139144
'fifth', 'sixth']).astype(
140145
CategoricalDtype(categories=['first', 'second', 'third',
141146
'fourth', 'fifth', 'sixth'],
142147
ordered=False))
143-
exp_unordered = Series([2., 4., 6., 3., 1., 5.])
144-
res = unordered.rank()
145-
assert_series_equal(res, exp_unordered)
148+
149+
with tm.assert_raises_regex(ValueError, error_msg):
150+
unordered.rank()
146151

147152
unordered1 = Series(
148153
[1, 2, 3, 4, 5, 6],
149154
).astype(CategoricalDtype([1, 2, 3, 4, 5, 6], False))
150-
exp_unordered1 = Series([1., 2., 3., 4., 5., 6.])
151-
res1 = unordered1.rank()
152-
assert_series_equal(res1, exp_unordered1)
155+
156+
# Won't raise ValueError because entries not objects.
157+
unordered1.rank()
153158

154159
# Test na_option for rank data
155160
na_ser = Series(
@@ -213,16 +218,13 @@ def test_rank_signature(self):
213218
'int64',
214219
marks=pytest.mark.xfail(
215220
reason="iNaT is equivalent to minimum value of dtype"
216-
"int64 pending issue #16674")),
217-
([NegInfinity(), '1', 'A', 'BA', 'Ba', 'C', Infinity()],
218-
'object')
221+
"int64 pending issue #16674"))
219222
])
220223
def test_rank_inf(self, contents, dtype):
221224
dtype_na_map = {
222225
'float64': np.nan,
223226
'float32': np.nan,
224-
'int64': iNaT,
225-
'object': None
227+
'int64': iNaT
226228
}
227229
# Insert nans at random positions if underlying dtype has missing
228230
# value. Then adjust the expected order by adding nans accordingly
@@ -249,13 +251,10 @@ def _check(s, expected, method='average'):
249251
result = s.rank(method=method)
250252
tm.assert_series_equal(result, Series(expected))
251253

252-
dtypes = [None, object]
253-
disabled = set([(object, 'first')])
254+
dtypes = [None]
254255
results = self.results
255256

256257
for method, dtype in product(results, dtypes):
257-
if (dtype, method) in disabled:
258-
continue
259258
series = s if dtype is None else s.astype(dtype)
260259
_check(series, results[method], method=method)
261260

@@ -294,7 +293,7 @@ def _check(s, method, na_option, ascending):
294293
for dtype, na_value, pos_inf, neg_inf in dtypes:
295294
in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk
296295
iseries = Series(in_arr, dtype=dtype)
297-
if (dtype, method) in disabled:
296+
if dtype == 'object':
298297
continue
299298
_check(iseries, method, na_option, ascending)
300299

@@ -330,7 +329,7 @@ def test_rank_methods_series(self):
330329
tm.assert_series_equal(result, expected)
331330

332331
def test_rank_dense_method(self):
333-
dtypes = ['O', 'f8', 'i8']
332+
dtypes = ['f8', 'i8']
334333
in_out = [([1], [1]),
335334
([2], [1]),
336335
([0], [1]),
@@ -348,7 +347,7 @@ def test_rank_dense_method(self):
348347
assert_series_equal(result, expected)
349348

350349
def test_rank_descending(self):
351-
dtypes = ['O', 'f8', 'i8']
350+
dtypes = ['f8', 'i8']
352351

353352
for dtype, method in product(dtypes, self.results):
354353
if 'i' in dtype:
@@ -360,9 +359,6 @@ def test_rank_descending(self):
360359
expected = (s.max() - s).rank()
361360
assert_series_equal(res, expected)
362361

363-
if method == 'first' and dtype == 'O':
364-
continue
365-
366362
expected = (s.max() - s).rank(method=method)
367363
res2 = s.rank(method=method, ascending=False)
368364
assert_series_equal(res2, expected)
@@ -379,9 +375,15 @@ def test_rank_int(self):
379375
def test_rank_object_bug(self):
380376
# GH 13445
381377

382-
# smoke tests
383-
Series([np.nan] * 32).astype(object).rank(ascending=True)
384-
Series([np.nan] * 32).astype(object).rank(ascending=False)
378+
# See #19560
379+
error_msg = ("pandas.core.algorithms.rank "
380+
"not supported for unordered "
381+
"non-numeric data")
382+
383+
with tm.assert_raises_regex(ValueError, error_msg):
384+
Series([np.nan] * 32).astype(object).rank(ascending=True)
385+
with tm.assert_raises_regex(ValueError, error_msg):
386+
Series([np.nan] * 32).astype(object).rank(ascending=False)
385387

386388
def test_rank_modify_inplace(self):
387389
# GH 18521
@@ -396,7 +398,7 @@ def test_rank_modify_inplace(self):
396398

397399
# GH15630, pct should be on 100% basis when method='dense'
398400

399-
@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8'])
401+
@pytest.mark.parametrize('dtype', ['f8', 'i8'])
400402
@pytest.mark.parametrize('ser, exp', [
401403
([1], [1.]),
402404
([1, 2], [1. / 2, 2. / 2]),
@@ -414,7 +416,7 @@ def test_rank_dense_pct(dtype, ser, exp):
414416
assert_series_equal(result, expected)
415417

416418

417-
@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8'])
419+
@pytest.mark.parametrize('dtype', ['f8', 'i8'])
418420
@pytest.mark.parametrize('ser, exp', [
419421
([1], [1.]),
420422
([1, 2], [1. / 2, 2. / 2]),
@@ -432,7 +434,7 @@ def test_rank_min_pct(dtype, ser, exp):
432434
assert_series_equal(result, expected)
433435

434436

435-
@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8'])
437+
@pytest.mark.parametrize('dtype', ['f8', 'i8'])
436438
@pytest.mark.parametrize('ser, exp', [
437439
([1], [1.]),
438440
([1, 2], [1. / 2, 2. / 2]),
@@ -450,7 +452,7 @@ def test_rank_max_pct(dtype, ser, exp):
450452
assert_series_equal(result, expected)
451453

452454

453-
@pytest.mark.parametrize('dtype', ['O', 'f8', 'i8'])
455+
@pytest.mark.parametrize('dtype', ['f8', 'i8'])
454456
@pytest.mark.parametrize('ser, exp', [
455457
([1], [1.]),
456458
([1, 2], [1. / 2, 2. / 2]),

0 commit comments

Comments
 (0)