|
4 | 4 | import numpy as np
|
5 | 5 | from numpy.random import RandomState
|
6 | 6 |
|
7 |
| -from pandas.core.api import Series, Categorical |
| 7 | +from pandas.core.api import Series, Categorical, CategoricalIndex |
8 | 8 | import pandas as pd
|
9 | 9 |
|
10 | 10 | import pandas.core.algorithms as algos
|
@@ -246,9 +246,15 @@ def test_value_counts(self):
|
246 | 246 | factor = cut(arr, 4)
|
247 | 247 |
|
248 | 248 | tm.assertIsInstance(factor, Categorical)
|
249 |
| - |
250 | 249 | result = algos.value_counts(factor)
|
251 |
| - expected = algos.value_counts(np.asarray(factor)) |
| 250 | + cats = ['(-1.194, -0.535]', |
| 251 | + '(-0.535, 0.121]', |
| 252 | + '(0.121, 0.777]', |
| 253 | + '(0.777, 1.433]' |
| 254 | + ] |
| 255 | + expected_index = CategoricalIndex(cats, cats, ordered=True) |
| 256 | + expected = Series([1, 1, 1, 1], |
| 257 | + index=expected_index) |
252 | 258 | tm.assert_series_equal(result.sort_index(), expected.sort_index())
|
253 | 259 |
|
254 | 260 | def test_value_counts_bins(self):
|
@@ -288,6 +294,57 @@ def test_value_counts_nat(self):
|
288 | 294 | tm.assert_series_equal(algos.value_counts(dt), exp_dt)
|
289 | 295 | # TODO same for (timedelta)
|
290 | 296 |
|
| 297 | + def test_categorical(self): |
| 298 | + s = Series(pd.Categorical(list('aaabbc'))) |
| 299 | + result = s.value_counts() |
| 300 | + expected = pd.Series([3, 2, 1], index=pd.CategoricalIndex(['a', 'b', 'c'])) |
| 301 | + tm.assert_series_equal(result, expected, check_index_type=True) |
| 302 | + |
| 303 | + # preserve order? |
| 304 | + s = s.cat.as_ordered() |
| 305 | + result = s.value_counts() |
| 306 | + expected.index = expected.index.as_ordered() |
| 307 | + tm.assert_series_equal(result, expected, check_index_type=True) |
| 308 | + |
| 309 | + def test_categorical_nans(self): |
| 310 | + s = Series(pd.Categorical(list('aaaaabbbcc'))) # 4,3,2,1 (nan) |
| 311 | + s.iloc[1] = np.nan |
| 312 | + result = s.value_counts() |
| 313 | + expected = pd.Series([4, 3, 2], |
| 314 | + index=pd.CategoricalIndex(['a', 'b', 'c'], |
| 315 | + categories=['a', 'b', 'c'])) |
| 316 | + tm.assert_series_equal(result, expected, check_index_type=True) |
| 317 | + result = s.value_counts(dropna=False) |
| 318 | + expected = pd.Series([4, 3, 2, 1], index=pd.CategoricalIndex( |
| 319 | + ['a', 'b', 'c', np.nan])) |
| 320 | + tm.assert_series_equal(result, expected, check_index_type=True) |
| 321 | + |
| 322 | + # out of order |
| 323 | + s = Series(pd.Categorical(list('aaaaabbbcc'), |
| 324 | + ordered=True, categories=['b', 'a', 'c'])) |
| 325 | + s.iloc[1] = np.nan |
| 326 | + result = s.value_counts() |
| 327 | + expected = pd.Series([4, 3, 2], |
| 328 | + index=pd.CategoricalIndex(['a', 'b', 'c'], |
| 329 | + categories=['b', 'a', 'c'], |
| 330 | + ordered=True)) |
| 331 | + tm.assert_series_equal(result, expected, check_index_type=True) |
| 332 | + |
| 333 | + result = s.value_counts(dropna=False) |
| 334 | + expected = pd.Series([4, 3, 2, 1], index=pd.CategoricalIndex( |
| 335 | + ['a', 'b', 'c', np.nan], categories=['b', 'a', 'c'], ordered=True)) |
| 336 | + tm.assert_series_equal(result, expected, check_index_type=True) |
| 337 | + |
| 338 | + def test_categorical_zeroes(self): |
| 339 | + # keep the `d` category with 0 |
| 340 | + s = Series(pd.Categorical(list('bbbaac'), categories=list('abcd'), |
| 341 | + ordered=True)) |
| 342 | + result = s.value_counts() |
| 343 | + expected = Series([3, 2, 1, 0], index=pd.Categorical( |
| 344 | + ['b', 'a', 'c', 'd'], categories=list('abcd'), ordered=True)) |
| 345 | + tm.assert_series_equal(result, expected, check_index_type=True) |
| 346 | + |
| 347 | + |
291 | 348 | def test_dropna(self):
|
292 | 349 | # https://github.com/pydata/pandas/issues/9443#issuecomment-73719328
|
293 | 350 |
|
|
0 commit comments