|
| 1 | +import warnings |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import pytest |
| 5 | + |
| 6 | +from pandas import ( |
| 7 | + Categorical, |
| 8 | + DataFrame, |
| 9 | + DatetimeIndex, |
| 10 | + Index, |
| 11 | + Series, |
| 12 | + TimedeltaIndex, |
| 13 | + Timestamp, |
| 14 | + date_range, |
| 15 | + period_range, |
| 16 | + timedelta_range, |
| 17 | +) |
| 18 | +import pandas._testing as tm |
| 19 | +from pandas.core.arrays import PeriodArray |
| 20 | +from pandas.core.arrays.categorical import CategoricalAccessor |
| 21 | +from pandas.core.indexes.accessors import Properties |
| 22 | + |
| 23 | + |
| 24 | +class TestCatAccessor: |
| 25 | + @pytest.mark.parametrize( |
| 26 | + "method", |
| 27 | + [ |
| 28 | + lambda x: x.cat.set_categories([1, 2, 3]), |
| 29 | + lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True), |
| 30 | + lambda x: x.cat.rename_categories([1, 2, 3]), |
| 31 | + lambda x: x.cat.remove_unused_categories(), |
| 32 | + lambda x: x.cat.remove_categories([2]), |
| 33 | + lambda x: x.cat.add_categories([4]), |
| 34 | + lambda x: x.cat.as_ordered(), |
| 35 | + lambda x: x.cat.as_unordered(), |
| 36 | + ], |
| 37 | + ) |
| 38 | + def test_getname_categorical_accessor(self, method): |
| 39 | + # GH#17509 |
| 40 | + ser = Series([1, 2, 3], name="A").astype("category") |
| 41 | + expected = "A" |
| 42 | + result = method(ser).name |
| 43 | + assert result == expected |
| 44 | + |
| 45 | + def test_cat_accessor(self): |
| 46 | + ser = Series(Categorical(["a", "b", np.nan, "a"])) |
| 47 | + tm.assert_index_equal(ser.cat.categories, Index(["a", "b"])) |
| 48 | + assert not ser.cat.ordered, False |
| 49 | + |
| 50 | + exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"]) |
| 51 | + return_value = ser.cat.set_categories(["b", "a"], inplace=True) |
| 52 | + assert return_value is None |
| 53 | + tm.assert_categorical_equal(ser.values, exp) |
| 54 | + |
| 55 | + res = ser.cat.set_categories(["b", "a"]) |
| 56 | + tm.assert_categorical_equal(res.values, exp) |
| 57 | + |
| 58 | + ser[:] = "a" |
| 59 | + ser = ser.cat.remove_unused_categories() |
| 60 | + tm.assert_index_equal(ser.cat.categories, Index(["a"])) |
| 61 | + |
| 62 | + def test_cat_accessor_api(self): |
| 63 | + # GH#9322 |
| 64 | + |
| 65 | + assert Series.cat is CategoricalAccessor |
| 66 | + ser = Series(list("aabbcde")).astype("category") |
| 67 | + assert isinstance(ser.cat, CategoricalAccessor) |
| 68 | + |
| 69 | + invalid = Series([1]) |
| 70 | + with pytest.raises(AttributeError, match="only use .cat accessor"): |
| 71 | + invalid.cat |
| 72 | + assert not hasattr(invalid, "cat") |
| 73 | + |
| 74 | + def test_cat_accessor_no_new_attributes(self): |
| 75 | + # https://github.com/pandas-dev/pandas/issues/10673 |
| 76 | + cat = Series(list("aabbcde")).astype("category") |
| 77 | + with pytest.raises(AttributeError, match="You cannot add any new attribute"): |
| 78 | + cat.cat.xlabel = "a" |
| 79 | + |
| 80 | + def test_cat_accessor_updates_on_inplace(self): |
| 81 | + ser = Series(list("abc")).astype("category") |
| 82 | + return_value = ser.drop(0, inplace=True) |
| 83 | + assert return_value is None |
| 84 | + return_value = ser.cat.remove_unused_categories(inplace=True) |
| 85 | + assert return_value is None |
| 86 | + assert len(ser.cat.categories) == 2 |
| 87 | + |
| 88 | + def test_categorical_delegations(self): |
| 89 | + |
| 90 | + # invalid accessor |
| 91 | + msg = r"Can only use \.cat accessor with a 'category' dtype" |
| 92 | + with pytest.raises(AttributeError, match=msg): |
| 93 | + Series([1, 2, 3]).cat |
| 94 | + with pytest.raises(AttributeError, match=msg): |
| 95 | + Series([1, 2, 3]).cat() |
| 96 | + with pytest.raises(AttributeError, match=msg): |
| 97 | + Series(["a", "b", "c"]).cat |
| 98 | + with pytest.raises(AttributeError, match=msg): |
| 99 | + Series(np.arange(5.0)).cat |
| 100 | + with pytest.raises(AttributeError, match=msg): |
| 101 | + Series([Timestamp("20130101")]).cat |
| 102 | + |
| 103 | + # Series should delegate calls to '.categories', '.codes', '.ordered' |
| 104 | + # and the methods '.set_categories()' 'drop_unused_categories()' to the |
| 105 | + # categorical |
| 106 | + ser = Series(Categorical(["a", "b", "c", "a"], ordered=True)) |
| 107 | + exp_categories = Index(["a", "b", "c"]) |
| 108 | + tm.assert_index_equal(ser.cat.categories, exp_categories) |
| 109 | + ser.cat.categories = [1, 2, 3] |
| 110 | + exp_categories = Index([1, 2, 3]) |
| 111 | + tm.assert_index_equal(ser.cat.categories, exp_categories) |
| 112 | + |
| 113 | + exp_codes = Series([0, 1, 2, 0], dtype="int8") |
| 114 | + tm.assert_series_equal(ser.cat.codes, exp_codes) |
| 115 | + |
| 116 | + assert ser.cat.ordered |
| 117 | + ser = ser.cat.as_unordered() |
| 118 | + assert not ser.cat.ordered |
| 119 | + return_value = ser.cat.as_ordered(inplace=True) |
| 120 | + assert return_value is None |
| 121 | + assert ser.cat.ordered |
| 122 | + |
| 123 | + # reorder |
| 124 | + ser = Series(Categorical(["a", "b", "c", "a"], ordered=True)) |
| 125 | + exp_categories = Index(["c", "b", "a"]) |
| 126 | + exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) |
| 127 | + ser = ser.cat.set_categories(["c", "b", "a"]) |
| 128 | + tm.assert_index_equal(ser.cat.categories, exp_categories) |
| 129 | + tm.assert_numpy_array_equal(ser.values.__array__(), exp_values) |
| 130 | + tm.assert_numpy_array_equal(ser.__array__(), exp_values) |
| 131 | + |
| 132 | + # remove unused categories |
| 133 | + ser = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"])) |
| 134 | + exp_categories = Index(["a", "b"]) |
| 135 | + exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_) |
| 136 | + ser = ser.cat.remove_unused_categories() |
| 137 | + tm.assert_index_equal(ser.cat.categories, exp_categories) |
| 138 | + tm.assert_numpy_array_equal(ser.values.__array__(), exp_values) |
| 139 | + tm.assert_numpy_array_equal(ser.__array__(), exp_values) |
| 140 | + |
| 141 | + # This method is likely to be confused, so test that it raises an error |
| 142 | + # on wrong inputs: |
| 143 | + msg = "'Series' object has no attribute 'set_categories'" |
| 144 | + with pytest.raises(AttributeError, match=msg): |
| 145 | + ser.set_categories([4, 3, 2, 1]) |
| 146 | + |
| 147 | + # right: ser.cat.set_categories([4,3,2,1]) |
| 148 | + |
| 149 | + # GH#18862 (let Series.cat.rename_categories take callables) |
| 150 | + ser = Series(Categorical(["a", "b", "c", "a"], ordered=True)) |
| 151 | + result = ser.cat.rename_categories(lambda x: x.upper()) |
| 152 | + expected = Series( |
| 153 | + Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True) |
| 154 | + ) |
| 155 | + tm.assert_series_equal(result, expected) |
| 156 | + |
| 157 | + def test_dt_accessor_api_for_categorical(self): |
| 158 | + # https://github.com/pandas-dev/pandas/issues/10661 |
| 159 | + |
| 160 | + s_dr = Series(date_range("1/1/2015", periods=5, tz="MET")) |
| 161 | + c_dr = s_dr.astype("category") |
| 162 | + |
| 163 | + s_pr = Series(period_range("1/1/2015", freq="D", periods=5)) |
| 164 | + c_pr = s_pr.astype("category") |
| 165 | + |
| 166 | + s_tdr = Series(timedelta_range("1 days", "10 days")) |
| 167 | + c_tdr = s_tdr.astype("category") |
| 168 | + |
| 169 | + # only testing field (like .day) |
| 170 | + # and bool (is_month_start) |
| 171 | + get_ops = lambda x: x._datetimelike_ops |
| 172 | + |
| 173 | + test_data = [ |
| 174 | + ("Datetime", get_ops(DatetimeIndex), s_dr, c_dr), |
| 175 | + ("Period", get_ops(PeriodArray), s_pr, c_pr), |
| 176 | + ("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr), |
| 177 | + ] |
| 178 | + |
| 179 | + assert isinstance(c_dr.dt, Properties) |
| 180 | + |
| 181 | + special_func_defs = [ |
| 182 | + ("strftime", ("%Y-%m-%d",), {}), |
| 183 | + ("tz_convert", ("EST",), {}), |
| 184 | + ("round", ("D",), {}), |
| 185 | + ("floor", ("D",), {}), |
| 186 | + ("ceil", ("D",), {}), |
| 187 | + ("asfreq", ("D",), {}), |
| 188 | + # FIXME: don't leave commented-out |
| 189 | + # ('tz_localize', ("UTC",), {}), |
| 190 | + ] |
| 191 | + _special_func_names = [f[0] for f in special_func_defs] |
| 192 | + |
| 193 | + # the series is already localized |
| 194 | + _ignore_names = ["tz_localize", "components"] |
| 195 | + |
| 196 | + for name, attr_names, s, c in test_data: |
| 197 | + func_names = [ |
| 198 | + f |
| 199 | + for f in dir(s.dt) |
| 200 | + if not ( |
| 201 | + f.startswith("_") |
| 202 | + or f in attr_names |
| 203 | + or f in _special_func_names |
| 204 | + or f in _ignore_names |
| 205 | + ) |
| 206 | + ] |
| 207 | + |
| 208 | + func_defs = [(f, (), {}) for f in func_names] |
| 209 | + for f_def in special_func_defs: |
| 210 | + if f_def[0] in dir(s.dt): |
| 211 | + func_defs.append(f_def) |
| 212 | + |
| 213 | + for func, args, kwargs in func_defs: |
| 214 | + with warnings.catch_warnings(): |
| 215 | + if func == "to_period": |
| 216 | + # dropping TZ |
| 217 | + warnings.simplefilter("ignore", UserWarning) |
| 218 | + res = getattr(c.dt, func)(*args, **kwargs) |
| 219 | + exp = getattr(s.dt, func)(*args, **kwargs) |
| 220 | + |
| 221 | + tm.assert_equal(res, exp) |
| 222 | + |
| 223 | + for attr in attr_names: |
| 224 | + if attr in ["week", "weekofyear"]: |
| 225 | + # GH#33595 Deprecate week and weekofyear |
| 226 | + continue |
| 227 | + res = getattr(c.dt, attr) |
| 228 | + exp = getattr(s.dt, attr) |
| 229 | + |
| 230 | + if isinstance(res, DataFrame): |
| 231 | + tm.assert_frame_equal(res, exp) |
| 232 | + elif isinstance(res, Series): |
| 233 | + tm.assert_series_equal(res, exp) |
| 234 | + else: |
| 235 | + tm.assert_almost_equal(res, exp) |
| 236 | + |
| 237 | + invalid = Series([1, 2, 3]).astype("category") |
| 238 | + msg = "Can only use .dt accessor with datetimelike" |
| 239 | + |
| 240 | + with pytest.raises(AttributeError, match=msg): |
| 241 | + invalid.dt |
| 242 | + assert not hasattr(invalid, "str") |
0 commit comments