|
17 | 17 | from pandas._libs.hashtable import unique_label_indices
|
18 | 18 | from pandas.compat import lrange, range
|
19 | 19 | import pandas.core.algorithms as algos
|
| 20 | +from pandas.core.common import _asarray_tuplesafe |
20 | 21 | import pandas.util.testing as tm
|
21 | 22 | from pandas.compat.numpy import np_array_datetime64_compat
|
22 | 23 | from pandas.util.testing import assert_almost_equal
|
@@ -190,6 +191,33 @@ def test_factorize_nan(self):
|
190 | 191 | assert len(set(key)) == len(set(expected))
|
191 | 192 | tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel)
|
192 | 193 |
|
| 194 | + @pytest.mark.parametrize("data,expected_label,expected_level", [ |
| 195 | + ( |
| 196 | + [(1, 1), (1, 2), (0, 0), (1, 2), 'nonsense'], |
| 197 | + [0, 1, 2, 1, 3], |
| 198 | + [(1, 1), (1, 2), (0, 0), 'nonsense'] |
| 199 | + ), |
| 200 | + ( |
| 201 | + [(1, 1), (1, 2), (0, 0), (1, 2), (1, 2, 3)], |
| 202 | + [0, 1, 2, 1, 3], |
| 203 | + [(1, 1), (1, 2), (0, 0), (1, 2, 3)] |
| 204 | + ), |
| 205 | + ( |
| 206 | + [(1, 1), (1, 2), (0, 0), (1, 2)], |
| 207 | + [0, 1, 2, 1], |
| 208 | + [(1, 1), (1, 2), (0, 0)] |
| 209 | + ) |
| 210 | + ]) |
| 211 | + def test_factorize_tuple_list(self, data, expected_label, expected_level): |
| 212 | + # GH9454 |
| 213 | + result = pd.factorize(data) |
| 214 | + |
| 215 | + tm.assert_numpy_array_equal(result[0], |
| 216 | + np.array(expected_label, dtype=np.intp)) |
| 217 | + |
| 218 | + expected_level_array = _asarray_tuplesafe(expected_level, dtype=object) |
| 219 | + tm.assert_numpy_array_equal(result[1], expected_level_array) |
| 220 | + |
193 | 221 | def test_complex_sorting(self):
|
194 | 222 | # gh 12666 - check no segfault
|
195 | 223 | # Test not valid numpy versions older than 1.11
|
|
0 commit comments