|
4 | 4 | import numpy as np
|
5 | 5 | from numpy.random import RandomState
|
6 | 6 |
|
7 |
| -from pandas.core.api import Series, Categorical |
| 7 | +from pandas.core.api import Series, Categorical, CategoricalIndex |
8 | 8 | import pandas as pd
|
9 | 9 |
|
10 | 10 | import pandas.core.algorithms as algos
|
@@ -290,9 +290,15 @@ def test_value_counts(self):
|
290 | 290 | factor = cut(arr, 4)
|
291 | 291 |
|
292 | 292 | tm.assertIsInstance(factor, Categorical)
|
293 |
| - |
294 | 293 | result = algos.value_counts(factor)
|
295 |
| - expected = algos.value_counts(np.asarray(factor)) |
| 294 | + cats = ['(-1.194, -0.535]', |
| 295 | + '(-0.535, 0.121]', |
| 296 | + '(0.121, 0.777]', |
| 297 | + '(0.777, 1.433]' |
| 298 | + ] |
| 299 | + expected_index = CategoricalIndex(cats, cats, ordered=True) |
| 300 | + expected = Series([1, 1, 1, 1], |
| 301 | + index=expected_index) |
296 | 302 | tm.assert_series_equal(result.sort_index(), expected.sort_index())
|
297 | 303 |
|
298 | 304 | def test_value_counts_bins(self):
|
@@ -332,6 +338,57 @@ def test_value_counts_nat(self):
|
332 | 338 | tm.assert_series_equal(algos.value_counts(dt), exp_dt)
|
333 | 339 | # TODO same for (timedelta)
|
334 | 340 |
|
| 341 | + def test_categorical(self): |
| 342 | + s = Series(pd.Categorical(list('aaabbc'))) |
| 343 | + result = s.value_counts() |
| 344 | + expected = pd.Series([3, 2, 1], index=pd.CategoricalIndex(['a', 'b', 'c'])) |
| 345 | + tm.assert_series_equal(result, expected, check_index_type=True) |
| 346 | + |
| 347 | + # preserve order? |
| 348 | + s = s.cat.as_ordered() |
| 349 | + result = s.value_counts() |
| 350 | + expected.index = expected.index.as_ordered() |
| 351 | + tm.assert_series_equal(result, expected, check_index_type=True) |
| 352 | + |
| 353 | + def test_categorical_nans(self): |
| 354 | + s = Series(pd.Categorical(list('aaaaabbbcc'))) # 4,3,2,1 (nan) |
| 355 | + s.iloc[1] = np.nan |
| 356 | + result = s.value_counts() |
| 357 | + expected = pd.Series([4, 3, 2], |
| 358 | + index=pd.CategoricalIndex(['a', 'b', 'c'], |
| 359 | + categories=['a', 'b', 'c'])) |
| 360 | + tm.assert_series_equal(result, expected, check_index_type=True) |
| 361 | + result = s.value_counts(dropna=False) |
| 362 | + expected = pd.Series([4, 3, 2, 1], index=pd.CategoricalIndex( |
| 363 | + ['a', 'b', 'c', np.nan])) |
| 364 | + tm.assert_series_equal(result, expected, check_index_type=True) |
| 365 | + |
| 366 | + # out of order |
| 367 | + s = Series(pd.Categorical(list('aaaaabbbcc'), |
| 368 | + ordered=True, categories=['b', 'a', 'c'])) |
| 369 | + s.iloc[1] = np.nan |
| 370 | + result = s.value_counts() |
| 371 | + expected = pd.Series([4, 3, 2], |
| 372 | + index=pd.CategoricalIndex(['a', 'b', 'c'], |
| 373 | + categories=['b', 'a', 'c'], |
| 374 | + ordered=True)) |
| 375 | + tm.assert_series_equal(result, expected, check_index_type=True) |
| 376 | + |
| 377 | + result = s.value_counts(dropna=False) |
| 378 | + expected = pd.Series([4, 3, 2, 1], index=pd.CategoricalIndex( |
| 379 | + ['a', 'b', 'c', np.nan], categories=['b', 'a', 'c'], ordered=True)) |
| 380 | + tm.assert_series_equal(result, expected, check_index_type=True) |
| 381 | + |
| 382 | + def test_categorical_zeroes(self): |
| 383 | + # keep the `d` category with 0 |
| 384 | + s = Series(pd.Categorical(list('bbbaac'), categories=list('abcd'), |
| 385 | + ordered=True)) |
| 386 | + result = s.value_counts() |
| 387 | + expected = Series([3, 2, 1, 0], index=pd.Categorical( |
| 388 | + ['b', 'a', 'c', 'd'], categories=list('abcd'), ordered=True)) |
| 389 | + tm.assert_series_equal(result, expected, check_index_type=True) |
| 390 | + |
| 391 | + |
335 | 392 | def test_dropna(self):
|
336 | 393 | # https://github.com/pydata/pandas/issues/9443#issuecomment-73719328
|
337 | 394 |
|
|
0 commit comments