From 08322a5dd1a029b331a1573f38e10b64118b0087 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Tue, 31 Mar 2020 15:49:04 +0300 Subject: [PATCH 01/13] DOC: Fixed examples in pandas/core/arrays/ --- ci/code_checks.sh | 13 +-- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/categorical.py | 116 +++++++++++++++++++++----- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/datetimes.py | 19 +++-- pandas/core/arrays/masked.py | 4 +- pandas/core/arrays/period.py | 1 + pandas/core/arrays/sparse/accessor.py | 89 ++++++++++++++------ pandas/core/arrays/sparse/array.py | 12 +-- pandas/core/arrays/sparse/dtype.py | 2 +- 10 files changed, 182 insertions(+), 78 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 5401cc81785ab..3074369a212e7 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -288,26 +288,17 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests interval classes' ; echo $MSG - pytest -q --doctest-modules \ - pandas/core/indexes/interval.py \ - pandas/core/arrays/interval.py + pytest -q --doctest-modules pandas/core/indexes/interval.py RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests arrays'; echo $MSG - pytest -q --doctest-modules \ - pandas/core/arrays/string_.py \ - pandas/core/arrays/integer.py \ - pandas/core/arrays/boolean.py + pytest -q --doctest-modules pandas/core/arrays/ RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests dtypes'; echo $MSG pytest -q --doctest-modules pandas/core/dtypes/ RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests arrays/boolean.py' ; echo $MSG - pytest -q --doctest-modules pandas/core/arrays/boolean.py - RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests base.py' ; echo $MSG pytest -q --doctest-modules pandas/core/base.py RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index af897e86a14d4..9915d394af1c1 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1161,7 +1161,7 @@ def _create_method(cls, op, coerce_to_dtype=True): -------- Given an ExtensionArray subclass called MyExtensionArray, use - >>> __add__ = cls._create_method(operator.add) + >>> __add__ = cls._create_method(operator.add) # doctest: +SKIP in the class definition of MyExtensionArray to create the operator for addition, that will be based on the operator implementation diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c11d879840fb9..59a3dd584735b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1598,19 +1598,19 @@ def sort_values(self, inplace=False, ascending=True, na_position="last"): >>> c = pd.Categorical([np.nan, 2, 2, np.nan, 5]) >>> c - [NaN, 2.0, 2.0, NaN, 5.0] + [NaN, 2, 2, NaN, 5] Categories (2, int64): [2, 5] >>> c.sort_values() - [2.0, 2.0, 5.0, NaN, NaN] + [2, 2, 5, NaN, NaN] Categories (2, int64): [2, 5] >>> c.sort_values(ascending=False) - [5.0, 2.0, 2.0, NaN, NaN] + [5, 2, 2, NaN, NaN] Categories (2, int64): [2, 5] >>> c.sort_values(na_position='first') - [NaN, NaN, 2.0, 2.0, 5.0] + [NaN, NaN, 2, 2, 5] Categories (2, int64): [2, 5] >>> c.sort_values(ascending=False, na_position='first') - [NaN, NaN, 5.0, 2.0, 2.0] + [NaN, NaN, 5, 2, 2] Categories (2, int64): [2, 5] """ inplace = validate_bool_kwarg(inplace, "inplace") @@ -1835,7 +1835,7 @@ def take(self, indexer, allow_fill: bool = False, fill_value=None): >>> cat.take([0, -1, -1], allow_fill=True, fill_value='a') [a, a, a] - Categories (3, object): [a, b] + Categories (2, object): [a, b] Specifying a fill value that's not in ``self.categories`` will raise a ``TypeError``. @@ -2237,21 +2237,20 @@ def unique(self): order of appearance. >>> pd.Categorical(list('baabc')) - [b, a, c] - Categories (3, object): [b, a, c] + [b, a, a, b, c] + Categories (3, object): [a, b, c] >>> pd.Categorical(list('baabc'), categories=list('abc')) - [b, a, c] - Categories (3, object): [b, a, c] + [b, a, a, b, c] + Categories (3, object): [a, b, c] An ordered Categorical preserves the category ordering. - >>> pd.Categorical(list('baabc'), - ... categories=list('abc'), - ... ordered=True) - [b, a, c] + >>> pd.Categorical(list('baabc'), categories=list('abc'), ordered=True) + [b, a, a, b, c] Categories (3, object): [a < b < c] + See Also -------- unique @@ -2438,7 +2437,7 @@ def replace(self, to_replace, value, inplace: bool = False): -------- >>> s = pd.Categorical([1, 2, 1, 3]) >>> s.replace(1, 3) - [3, 3, 2, 3] + [3, 2, 3, 3] Categories (2, int64): [2, 3] """ inplace = validate_bool_kwarg(inplace, "inplace") @@ -2506,16 +2505,91 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): Examples -------- + >>> s = pd.Series(list("aabc")).astype("category") + >>> s + 0 a + 1 a + 2 b + 3 c + dtype: category + Categories (3, object): [a, b, c] + >>> s.cat.categories - >>> s.cat.categories = list('abc') - >>> s.cat.rename_categories(list('cab')) - >>> s.cat.reorder_categories(list('cab')) - >>> s.cat.add_categories(['d','e']) + Index(['a', 'b', 'c'], dtype='object') + + >>> s.cat.categories = list("bcd") + >>> s + 0 b + 1 b + 2 c + 3 d + dtype: category + Categories (3, object): [b, c, d] + + >>> s.cat.rename_categories(list("abc")) + 0 a + 1 a + 2 b + 3 c + dtype: category + Categories (3, object): [a, b, c] + + >>> s.cat.reorder_categories(list("cdb")) + 0 b + 1 b + 2 c + 3 d + dtype: category + Categories (3, object): [c, d, b] + + >>> s.cat.add_categories(["e", "f"]) + 0 b + 1 b + 2 c + 3 d + dtype: category + Categories (5, object): [b, c, d, e, f] + >>> s.cat.remove_categories(['d']) + 0 b + 1 b + 2 c + 3 NaN + dtype: category + Categories (2, object): [b, c] + >>> s.cat.remove_unused_categories() - >>> s.cat.set_categories(list('abcde')) + 0 b + 1 b + 2 c + 3 d + dtype: category + Categories (3, object): [b, c, d] + + >>> s.cat.set_categories(list("abcde")) + 0 b + 1 b + 2 c + 3 d + dtype: category + Categories (5, object): [a, b, c, d, e] + >>> s.cat.as_ordered() + 0 b + 1 b + 2 c + 3 d + dtype: category + Categories (3, object): [b < c < d] + >>> s.cat.as_unordered() + 0 b + 1 b + 2 c + 3 d + dtype: category + Categories (3, object): [b, c, d] + """ def __init__(self, data): @@ -2603,7 +2677,7 @@ def _recode_for_categories(codes: np.ndarray, old_categories, new_categories): >>> new_cat = pd.Index(['a', 'b']) >>> codes = np.array([0, 1, 1, 2]) >>> _recode_for_categories(codes, old_cat, new_cat) - array([ 1, 0, 0, -1]) + array([ 1, 0, 0, -1], dtype=int8) """ if len(old_categories) == 0: # All null anyway, so just retain the nulls diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index a153b4e06157b..1f2962ef54ca0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -181,7 +181,7 @@ def _unbox_scalar(self, value: Union[Period, Timestamp, Timedelta, NaTType]) -> Examples -------- - >>> self._unbox_scalar(Timedelta('10s')) # DOCTEST: +SKIP + >>> _unbox_scalar(Timedelta('10s')) # doctest: +SKIP 10000000000 """ raise AbstractMethodError(self) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e2a13df069ae2..e6a17491e9378 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -922,9 +922,10 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): ... '2018-10-28 02:36:00', ... '2018-10-28 03:46:00'])) >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) - 0 2015-03-29 03:00:00+02:00 - 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, Europe/Warsaw] + 0 2018-10-28 01:20:00+02:00 + 1 2018-10-28 02:36:00+02:00 + 2 2018-10-28 03:46:00+01:00 + dtype: datetime64[ns, CET] If the DST transition causes nonexistent times, you can shift these dates forward or backwards with a timedelta object or `'shift_forward'` @@ -935,15 +936,17 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') 0 2015-03-29 03:00:00+02:00 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, 'Europe/Warsaw'] + dtype: datetime64[ns, Europe/Warsaw] + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') 0 2015-03-29 01:59:59.999999999+01:00 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, 'Europe/Warsaw'] + dtype: datetime64[ns, Europe/Warsaw] + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) 0 2015-03-29 03:30:00+02:00 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, 'Europe/Warsaw'] + dtype: datetime64[ns, Europe/Warsaw] """ nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") if nonexistent not in nonexistent_options and not isinstance( @@ -1604,9 +1607,9 @@ def date(self): DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'], dtype='datetime64[ns]', freq='A-DEC') >>> idx.is_leap_year - array([ True, False, False], dtype=bool) + array([ True, False, False]) - >>> dates = pd.Series(idx) + >>> dates_series = pd.Series(idx) >>> dates_series 0 2012-12-31 1 2013-12-31 diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index cf6c16d4cad5d..d23d26d870f75 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -94,7 +94,7 @@ def to_numpy( >>> a = pd.array([True, False, pd.NA], dtype="boolean") >>> a.to_numpy() - array([True, False, NA], dtype=object) + array([True, False, ], dtype=object) When no missing values are present, an equivalent dtype can be used. @@ -110,7 +110,7 @@ def to_numpy( >>> a = pd.array([True, False, pd.NA], dtype="boolean") >>> a - [True, False, NA] + [True, False, ] Length: 3, dtype: boolean >>> a.to_numpy(dtype="bool") diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index be9cc53d33d6f..d9bd567f88845 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -818,6 +818,7 @@ def period_array( Integers that look like years are handled >>> period_array([2000, 2001, 2002], freq='D') + ['2000-01-01', '2001-01-01', '2002-01-01'] Length: 3, dtype: period[D] diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index 787407060c7f1..39c06e374fa43 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -67,24 +67,25 @@ def from_coo(cls, A, dense_index=False): Examples -------- >>> from scipy import sparse - >>> A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), - shape=(3, 4)) + + >>> A = sparse.coo_matrix( + ... ([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(3, 4) + ... ) >>> A <3x4 sparse matrix of type '' - with 3 stored elements in COOrdinate format> + with 3 stored elements in COOrdinate format> + >>> A.todense() - matrix([[ 0., 0., 1., 2.], - [ 3., 0., 0., 0.], - [ 0., 0., 0., 0.]]) + matrix([[0., 0., 1., 2.], + [3., 0., 0., 0.], + [0., 0., 0., 0.]]) + >>> ss = pd.Series.sparse.from_coo(A) >>> ss - 0 2 1 - 3 2 - 1 0 3 - dtype: float64 - BlockIndex - Block locations: array([0], dtype=int32) - Block lengths: array([3], dtype=int32) + 0 2 1.0 + 3 2.0 + 1 0 3.0 + dtype: Sparse[float64, nan] """ from pandas.core.arrays.sparse.scipy_sparse import _coo_to_sparse_series from pandas import Series @@ -119,24 +120,58 @@ def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False): Examples -------- >>> s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan]) - >>> s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0), - (1, 2, 'a', 1), - (1, 1, 'b', 0), - (1, 1, 'b', 1), - (2, 1, 'b', 0), - (2, 1, 'b', 1)], - names=['A', 'B', 'C', 'D']) + >>> s + 0 3.0 + 1 NaN + 2 1.0 + 3 3.0 + 4 NaN + 5 NaN + dtype: float64 + + >>> s.index = pd.MultiIndex.from_tuples( + ... [ + ... (1, 2, "a", 0), + ... (1, 2, "a", 1), + ... (1, 1, "b", 0), + ... (1, 1, "b", 1), + ... (2, 1, "b", 0), + ... (2, 1, "b", 1) + ... ], + ... names=["A", "B", "C", "D"], + ... ) + >>> s + A B C D + 1 2 a 0 3.0 + 1 NaN + 1 b 0 1.0 + 1 3.0 + 2 1 b 0 NaN + 1 NaN + dtype: float64 + >>> ss = s.astype("Sparse") - >>> A, rows, columns = ss.sparse.to_coo(row_levels=['A', 'B'], - ... column_levels=['C', 'D'], - ... sort_labels=True) + >>> ss + A B C D + 1 2 a 0 3.0 + 1 NaN + 1 b 0 1.0 + 1 3.0 + 2 1 b 0 NaN + 1 NaN + dtype: Sparse[float64, nan] + + >>> A, rows, columns = ss.sparse.to_coo( + ... row_levels=["A", "B"], column_levels=["C", "D"], sort_labels=True + ... ) >>> A <3x4 sparse matrix of type '' - with 3 stored elements in COOrdinate format> + with 3 stored elements in COOrdinate format> >>> A.todense() - matrix([[ 0., 0., 1., 3.], - [ 3., 0., 0., 0.], - [ 0., 0., 0., 0.]]) + matrix([[0., 0., 1., 3.], + [3., 0., 0., 0.], + [0., 0., 0., 0.]]) + >>> rows [(1, 1), (1, 2), (2, 1)] >>> columns diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 8021e0babe4e0..79f3ff73032d9 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1048,7 +1048,7 @@ def astype(self, dtype=None, copy=True): Examples -------- - >>> arr = SparseArray([0, 0, 1, 2]) + >>> arr = pd.arrays.SparseArray([0, 0, 1, 2]) >>> arr [0, 0, 1, 2] Fill: 0 @@ -1066,8 +1066,8 @@ def astype(self, dtype=None, copy=True): >>> arr.astype(np.dtype('float64')) ... # doctest: +NORMALIZE_WHITESPACE - [0, 0, 1.0, 2.0] - Fill: 0 + [0.0, 0.0, 1.0, 2.0] + Fill: 0.0 IntIndex Indices: array([2, 3], dtype=int32) @@ -1107,19 +1107,19 @@ def map(self, mapper): Examples -------- >>> arr = pd.arrays.SparseArray([0, 1, 2]) - >>> arr.apply(lambda x: x + 10) + >>> arr.map(lambda x: x + 10) [10, 11, 12] Fill: 10 IntIndex Indices: array([1, 2], dtype=int32) - >>> arr.apply({0: 10, 1: 11, 2: 12}) + >>> arr.map({0: 10, 1: 11, 2: 12}) [10, 11, 12] Fill: 10 IntIndex Indices: array([1, 2], dtype=int32) - >>> arr.apply(pd.Series([10, 11, 12], index=[0, 1, 2])) + >>> arr.map(pd.Series([10, 11, 12], index=[0, 1, 2])) [10, 11, 12] Fill: 10 IntIndex diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index 135514e334920..afa11586fda04 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -347,7 +347,7 @@ def _subtype_with_str(self): dtype('O') >>> dtype._subtype_with_str - str + """ if isinstance(self.fill_value, str): return type(self.fill_value) From 1855f140bb1c0f7cd05e49d4967a43e18f02dbda Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Tue, 31 Mar 2020 16:15:11 +0300 Subject: [PATCH 02/13] Lint complain --- pandas/core/arrays/categorical.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 59a3dd584735b..f849b71a7ea0f 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2589,7 +2589,6 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 3 d dtype: category Categories (3, object): [b, c, d] - """ def __init__(self, data): From 4c87ad54b96a06819e0a3ea6eb208dfe8214b063 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 1 Apr 2020 14:49:15 +0300 Subject: [PATCH 03/13] Removed extra blank lines XREF: https://github.com/pandas-dev/pandas/pull/33179#discussion_r401174033 --- pandas/core/arrays/categorical.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f849b71a7ea0f..27cc0c0e6d954 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2250,13 +2250,11 @@ def unique(self): [b, a, a, b, c] Categories (3, object): [a < b < c] - See Also -------- unique CategoricalIndex.unique Series.unique - """ # unlike np.unique, unique1d does not sort unique_codes = unique1d(self.codes) From 435564e4a95fc4106828bed98a9e3995619d0f13 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 1 Apr 2020 14:50:14 +0300 Subject: [PATCH 04/13] Fixed order of docstring headers --- pandas/core/arrays/categorical.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 27cc0c0e6d954..fa9e1ddec5f21 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2231,6 +2231,12 @@ def unique(self): ------- unique values : ``Categorical`` + See Also + -------- + unique + CategoricalIndex.unique + Series.unique + Examples -------- An unordered Categorical will return categories in the @@ -2249,12 +2255,6 @@ def unique(self): >>> pd.Categorical(list('baabc'), categories=list('abc'), ordered=True) [b, a, a, b, c] Categories (3, object): [a < b < c] - - See Also - -------- - unique - CategoricalIndex.unique - Series.unique """ # unlike np.unique, unique1d does not sort unique_codes = unique1d(self.codes) From 3f03fde2e7d29bdf3896379ea587c0a0d526819c Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 1 Apr 2020 14:52:42 +0300 Subject: [PATCH 05/13] Reverted deleted "self" XREF: https://github.com/pandas-dev/pandas/pull/33179#discussion_r401174584 --- pandas/core/arrays/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1f2962ef54ca0..c0bbbebac7c33 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -181,7 +181,7 @@ def _unbox_scalar(self, value: Union[Period, Timestamp, Timedelta, NaTType]) -> Examples -------- - >>> _unbox_scalar(Timedelta('10s')) # doctest: +SKIP + >>> self._unbox_scalar(Timedelta("10s")) # doctest: +SKIP 10000000000 """ raise AbstractMethodError(self) From 3aeed895e88951a7ba25c0f3ed5d9ae006033eec Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Wed, 1 Apr 2020 14:52:47 +0300 Subject: [PATCH 06/13] Update pandas/core/arrays/base.py Co-Authored-By: Joris Van den Bossche --- pandas/core/arrays/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 9915d394af1c1..6cb597ba75852 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1161,7 +1161,7 @@ def _create_method(cls, op, coerce_to_dtype=True): -------- Given an ExtensionArray subclass called MyExtensionArray, use - >>> __add__ = cls._create_method(operator.add) # doctest: +SKIP + __add__ = cls._create_method(operator.add) in the class definition of MyExtensionArray to create the operator for addition, that will be based on the operator implementation From b2c0dc724222a319b56600c676107882649d52ba Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 1 Apr 2020 15:04:23 +0300 Subject: [PATCH 07/13] Fixed documentation examples for `categorical.unique` XREF: https://github.com/pandas-dev/pandas/pull/33179#discussion_r401544329 --- pandas/core/arrays/categorical.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index fa9e1ddec5f21..c175eb3ecc77e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2242,18 +2242,20 @@ def unique(self): An unordered Categorical will return categories in the order of appearance. - >>> pd.Categorical(list('baabc')) - [b, a, a, b, c] - Categories (3, object): [a, b, c] + >>> pd.Categorical(list("baabc")).unique() + [b, a, c] + Categories (3, object): [b, a, c] - >>> pd.Categorical(list('baabc'), categories=list('abc')) - [b, a, a, b, c] - Categories (3, object): [a, b, c] + >>> pd.Categorical(list("baabc"), categories=list("abc")).unique() + [b, a, c] + Categories (3, object): [b, a, c] An ordered Categorical preserves the category ordering. - >>> pd.Categorical(list('baabc'), categories=list('abc'), ordered=True) - [b, a, a, b, c] + >>> pd.Categorical( + ... list("baabc"), categories=list("abc"), ordered=True + ... ).unique() + [b, a, c] Categories (3, object): [a < b < c] """ # unlike np.unique, unique1d does not sort From 250b267d6abad07cf4b55dc2c2e35b928dd05f14 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 1 Apr 2020 15:08:10 +0300 Subject: [PATCH 08/13] Removed example with bad practices XREF: https://github.com/pandas-dev/pandas/pull/33179#discussion_r401545207 --- pandas/core/arrays/categorical.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c175eb3ecc77e..ecc19c9f4335d 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2517,15 +2517,6 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): >>> s.cat.categories Index(['a', 'b', 'c'], dtype='object') - >>> s.cat.categories = list("bcd") - >>> s - 0 b - 1 b - 2 c - 3 d - dtype: category - Categories (3, object): [b, c, d] - >>> s.cat.rename_categories(list("abc")) 0 a 1 a From a7455849927739535410edec9549d3f577f1f528 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 1 Apr 2020 15:28:14 +0300 Subject: [PATCH 09/13] Improved categorical documentation --- pandas/core/arrays/categorical.py | 91 +++++++++++++++++++------------ 1 file changed, 55 insertions(+), 36 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index ecc19c9f4335d..d40564f19ba8a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2505,81 +2505,100 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): Examples -------- - >>> s = pd.Series(list("aabc")).astype("category") + >>> s = pd.Series(list("abbccc")).astype("category") >>> s 0 a - 1 a + 1 b 2 b 3 c + 4 c + 5 c dtype: category Categories (3, object): [a, b, c] >>> s.cat.categories Index(['a', 'b', 'c'], dtype='object') - >>> s.cat.rename_categories(list("abc")) - 0 a - 1 a + >>> s.cat.rename_categories(list("cba")) + 0 c + 1 b 2 b - 3 c + 3 a + 4 a + 5 a dtype: category - Categories (3, object): [a, b, c] + Categories (3, object): [c, b, a] - >>> s.cat.reorder_categories(list("cdb")) - 0 b + >>> s.cat.reorder_categories(list("cba")) + 0 a 1 b - 2 c - 3 d + 2 b + 3 c + 4 c + 5 c dtype: category - Categories (3, object): [c, d, b] + Categories (3, object): [c, b, a] - >>> s.cat.add_categories(["e", "f"]) - 0 b + >>> s.cat.add_categories(["d", "e"]) + 0 a 1 b - 2 c - 3 d + 2 b + 3 c + 4 c + 5 c dtype: category - Categories (5, object): [b, c, d, e, f] + Categories (5, object): [a, b, c, d, e] + - >>> s.cat.remove_categories(['d']) - 0 b + >>> s.cat.remove_categories(["a", "c"]) + 0 NaN 1 b - 2 c + 2 b 3 NaN + 4 NaN + 5 NaN dtype: category - Categories (2, object): [b, c] + Categories (1, object): [b] >>> s.cat.remove_unused_categories() - 0 b + 0 a 1 b - 2 c - 3 d + 2 b + 3 c + 4 c + 5 c dtype: category - Categories (3, object): [b, c, d] + Categories (3, object): [a, b, c] >>> s.cat.set_categories(list("abcde")) - 0 b + 0 a 1 b - 2 c - 3 d + 2 b + 3 c + 4 c + 5 c dtype: category Categories (5, object): [a, b, c, d, e] >>> s.cat.as_ordered() - 0 b + 0 a 1 b - 2 c - 3 d + 2 b + 3 c + 4 c + 5 c dtype: category - Categories (3, object): [b < c < d] + Categories (3, object): [a < b < c] >>> s.cat.as_unordered() - 0 b + 0 a 1 b - 2 c - 3 d + 2 b + 3 c + 4 c + 5 c dtype: category - Categories (3, object): [b, c, d] + Categories (3, object): [a, b, c] """ def __init__(self, data): From a8cc997d664713921956b64c50189871374c6126 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 1 Apr 2020 15:30:31 +0300 Subject: [PATCH 10/13] Made the example to show it's effect XREF: https://github.com/pandas-dev/pandas/pull/33179#discussion_r401546153 --- pandas/core/arrays/categorical.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d40564f19ba8a..6f468e471449f 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2560,7 +2560,8 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): dtype: category Categories (1, object): [b] - >>> s.cat.remove_unused_categories() + >>> s1 = s.cat.add_categories(["d", "e"]) + >>> s1.cat.remove_unused_categories() 0 a 1 b 2 b From 5fbfd46ee191d0628027e210a4e3f59c151c2db3 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 1 Apr 2020 15:38:57 +0300 Subject: [PATCH 11/13] Removed verbosity of example XREF: https://github.com/pandas-dev/pandas/pull/33179#discussion_r401547517 --- pandas/core/arrays/sparse/accessor.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index 39c06e374fa43..8a30d2b954b55 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -120,15 +120,6 @@ def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False): Examples -------- >>> s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan]) - >>> s - 0 3.0 - 1 NaN - 2 1.0 - 3 3.0 - 4 NaN - 5 NaN - dtype: float64 - >>> s.index = pd.MultiIndex.from_tuples( ... [ ... (1, 2, "a", 0), From b6bdad816225dbe4fadc0265b354eefea9f96764 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Wed, 1 Apr 2020 16:13:48 +0300 Subject: [PATCH 12/13] Update pandas/core/arrays/categorical.py Co-Authored-By: Joris Van den Bossche --- pandas/core/arrays/categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f93837445227b..63caa486bd05f 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2233,7 +2233,7 @@ def unique(self): See Also -------- - unique + pandas.unique CategoricalIndex.unique Series.unique From 7f730eaa914d216622d50c87d4832a50eaecb45e Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 1 Apr 2020 16:17:07 +0300 Subject: [PATCH 13/13] Addressed lint issues --- pandas/core/arrays/categorical.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 63caa486bd05f..f283b6fd3b4b3 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2549,7 +2549,6 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): dtype: category Categories (5, object): [a, b, c, d, e] - >>> s.cat.remove_categories(["a", "c"]) 0 NaN 1 b