|
1 | 1 | # -*- coding: utf-8 -*-
|
2 | 2 | # pylint: disable=E1101,E1103,W0232
|
3 | 3 |
|
4 |
| -import os |
5 | 4 | import sys
|
6 | 5 | from datetime import datetime
|
7 | 6 | from distutils.version import LooseVersion
|
@@ -2906,54 +2905,41 @@ def test_value_counts(self):
|
2906 | 2905 | tm.assert_series_equal(res, exp)
|
2907 | 2906 |
|
2908 | 2907 | def test_value_counts_with_nan(self):
|
2909 |
| - # https://github.com/pydata/pandas/issues/9443 |
| 2908 | + # see gh-9443 |
2910 | 2909 |
|
| 2910 | + # sanity check |
2911 | 2911 | s = pd.Series(["a", "b", "a"], dtype="category")
|
2912 |
| - tm.assert_series_equal( |
2913 |
| - s.value_counts(dropna=True), |
2914 |
| - pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))) |
2915 |
| - tm.assert_series_equal( |
2916 |
| - s.value_counts(dropna=False), |
2917 |
| - pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))) |
| 2912 | + exp = pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"])) |
2918 | 2913 |
|
2919 |
| - s = pd.Series(["a", "b", None, "a", None, None], dtype="category") |
2920 |
| - tm.assert_series_equal( |
2921 |
| - s.value_counts(dropna=True), |
2922 |
| - pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))) |
2923 |
| - tm.assert_series_equal( |
2924 |
| - s.value_counts(dropna=False), |
2925 |
| - pd.Series([3, 2, 1], index=pd.CategoricalIndex([np.nan, "a", "b"]))) |
2926 |
| - # When we aren't sorting by counts, and np.nan isn't a |
2927 |
| - # category, it should be last. |
2928 |
| - tm.assert_series_equal( |
2929 |
| - s.value_counts(dropna=False, sort=False), |
2930 |
| - pd.Series([2, 1, 3], |
2931 |
| - index=pd.CategoricalIndex(["a", "b", np.nan]))) |
| 2914 | + res = s.value_counts(dropna=True) |
| 2915 | + tm.assert_series_equal(res, exp) |
2932 | 2916 |
|
2933 |
| - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
2934 |
| - s = pd.Series(pd.Categorical(["a", "b", "a"], |
2935 |
| - categories=["a", "b", np.nan])) |
| 2917 | + res = s.value_counts(dropna=True) |
| 2918 | + tm.assert_series_equal(res, exp) |
2936 | 2919 |
|
2937 |
| - # internal categories are different because of NaN |
2938 |
| - exp = pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"])) |
2939 |
| - tm.assert_series_equal(s.value_counts(dropna=True), exp, |
2940 |
| - check_categorical=False) |
2941 |
| - exp = pd.Series([2, 1, 0], |
2942 |
| - index=pd.CategoricalIndex(["a", "b", np.nan])) |
2943 |
| - tm.assert_series_equal(s.value_counts(dropna=False), exp, |
2944 |
| - check_categorical=False) |
| 2920 | + # same Series via two different constructions --> same behaviour |
| 2921 | + series = [ |
| 2922 | + pd.Series(["a", "b", None, "a", None, None], dtype="category"), |
| 2923 | + pd.Series(pd.Categorical(["a", "b", None, "a", None, None], |
| 2924 | + categories=["a", "b"])) |
| 2925 | + ] |
2945 | 2926 |
|
2946 |
| - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): |
2947 |
| - s = pd.Series(pd.Categorical(["a", "b", None, "a", None, None], |
2948 |
| - categories=["a", "b", np.nan])) |
| 2927 | + for s in series: |
| 2928 | + # None is a NaN value, so we exclude its count here |
| 2929 | + exp = pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"])) |
| 2930 | + res = s.value_counts(dropna=True) |
| 2931 | + tm.assert_series_equal(res, exp) |
2949 | 2932 |
|
2950 |
| - exp = pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"])) |
2951 |
| - tm.assert_series_equal(s.value_counts(dropna=True), exp, |
2952 |
| - check_categorical=False) |
2953 |
| - exp = pd.Series([3, 2, 1], |
2954 |
| - index=pd.CategoricalIndex([np.nan, "a", "b"])) |
2955 |
| - tm.assert_series_equal(s.value_counts(dropna=False), exp, |
2956 |
| - check_categorical=False) |
| 2933 | + # we don't exclude the count of None and sort by counts |
| 2934 | + exp = pd.Series([3, 2, 1], index=pd.CategoricalIndex([np.nan, "a", "b"])) |
| 2935 | + res = s.value_counts(dropna=False) |
| 2936 | + tm.assert_series_equal(res, exp) |
| 2937 | + |
| 2938 | + # When we aren't sorting by counts, and np.nan isn't a |
| 2939 | + # category, it should be last. |
| 2940 | + exp = pd.Series([2, 1, 3], index=pd.CategoricalIndex(["a", "b", np.nan])) |
| 2941 | + res = s.value_counts(dropna=False, sort=False) |
| 2942 | + tm.assert_series_equal(res, exp) |
2957 | 2943 |
|
2958 | 2944 | def test_groupby(self):
|
2959 | 2945 |
|
@@ -4113,16 +4099,11 @@ def f():
|
4113 | 4099 | res = df.dropna()
|
4114 | 4100 | tm.assert_frame_equal(res, df_exp_drop_all)
|
4115 | 4101 |
|
4116 |
| - # make sure that fillna takes both missing values and NA categories |
4117 |
| - # into account |
4118 |
| - c = Categorical(["a", "b", np.nan]) |
4119 |
| - with tm.assert_produces_warning(FutureWarning): |
4120 |
| - c.set_categories(["a", "b", np.nan], rename=True, inplace=True) |
4121 |
| - |
4122 |
| - c[0] = np.nan |
| 4102 | + # make sure that fillna takes missing values into account |
| 4103 | + c = Categorical([np.nan, "b", np.nan], categories=["a", "b"]) |
4123 | 4104 | df = pd.DataFrame({"cats": c, "vals": [1, 2, 3]})
|
4124 | 4105 |
|
4125 |
| - cat_exp = Categorical(["a", "b", "a"], categories=["a", "b", np.nan]) |
| 4106 | + cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"]) |
4126 | 4107 | df_exp = pd.DataFrame({"cats": cat_exp, "vals": [1, 2, 3]})
|
4127 | 4108 |
|
4128 | 4109 | res = df.fillna("a")
|
|
0 commit comments