Skip to content

Commit 8523cfa

Browse files
authored
Merge pull request #82 from pandas-dev/master
TST: refactored test_factorize (pandas-dev#32311)
2 parents 0f93848 + 3d08aa5 commit 8523cfa

File tree

1 file changed

+17
-56
lines changed

1 file changed

+17
-56
lines changed

pandas/tests/base/test_ops.py

+17-56
Original file line numberDiff line numberDiff line change
@@ -547,66 +547,27 @@ def test_value_counts_datetime64(self, index_or_series):
547547
result2 = td2.value_counts()
548548
tm.assert_series_equal(result2, expected_s)
549549

550-
def test_factorize(self):
551-
for orig in self.objs:
552-
o = orig.copy()
553-
554-
if isinstance(o, Index) and o.is_boolean():
555-
exp_arr = np.array([0, 1] + [0] * 8, dtype=np.intp)
556-
exp_uniques = o
557-
exp_uniques = Index([False, True])
558-
else:
559-
exp_arr = np.array(range(len(o)), dtype=np.intp)
560-
exp_uniques = o
561-
codes, uniques = o.factorize()
562-
563-
tm.assert_numpy_array_equal(codes, exp_arr)
564-
if isinstance(o, Series):
565-
tm.assert_index_equal(uniques, Index(orig), check_names=False)
566-
else:
567-
# factorize explicitly resets name
568-
tm.assert_index_equal(uniques, exp_uniques, check_names=False)
569-
570-
def test_factorize_repeated(self):
571-
for orig in self.objs:
572-
o = orig.copy()
550+
@pytest.mark.parametrize("sort", [True, False])
551+
def test_factorize(self, index_or_series_obj, sort):
552+
obj = index_or_series_obj
553+
result_codes, result_uniques = obj.factorize(sort=sort)
573554

574-
# don't test boolean
575-
if isinstance(o, Index) and o.is_boolean():
576-
continue
555+
constructor = pd.Index
556+
if isinstance(obj, pd.MultiIndex):
557+
constructor = pd.MultiIndex.from_tuples
558+
expected_uniques = constructor(obj.unique())
577559

578-
# sort by value, and create duplicates
579-
if isinstance(o, Series):
580-
o = o.sort_values()
581-
n = o.iloc[5:].append(o)
582-
else:
583-
indexer = o.argsort()
584-
o = o.take(indexer)
585-
n = o[5:].append(o)
586-
587-
exp_arr = np.array(
588-
[5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp
589-
)
590-
codes, uniques = n.factorize(sort=True)
591-
592-
tm.assert_numpy_array_equal(codes, exp_arr)
593-
if isinstance(o, Series):
594-
tm.assert_index_equal(
595-
uniques, Index(orig).sort_values(), check_names=False
596-
)
597-
else:
598-
tm.assert_index_equal(uniques, o, check_names=False)
560+
if sort:
561+
expected_uniques = expected_uniques.sort_values()
599562

600-
exp_arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4], np.intp)
601-
codes, uniques = n.factorize(sort=False)
602-
tm.assert_numpy_array_equal(codes, exp_arr)
563+
# construct an integer ndarray so that
564+
# `expected_uniques.take(expected_codes)` is equal to `obj`
565+
expected_uniques_list = list(expected_uniques)
566+
expected_codes = [expected_uniques_list.index(val) for val in obj]
567+
expected_codes = np.asarray(expected_codes, dtype=np.intp)
603568

604-
if isinstance(o, Series):
605-
expected = Index(o.iloc[5:10].append(o.iloc[:5]))
606-
tm.assert_index_equal(uniques, expected, check_names=False)
607-
else:
608-
expected = o[5:10].append(o[:5])
609-
tm.assert_index_equal(uniques, expected, check_names=False)
569+
tm.assert_numpy_array_equal(result_codes, expected_codes)
570+
tm.assert_index_equal(result_uniques, expected_uniques)
610571

611572
def test_duplicated_drop_duplicates_index(self):
612573
# GH 4060

0 commit comments

Comments
 (0)