From 7f8efef143a532674b8a8a00539586ef43fc8bee Mon Sep 17 00:00:00 2001 From: maikia Date: Sat, 13 May 2023 14:51:31 +0200 Subject: [PATCH 1/6] update docstring --- pandas/core/groupby/generic.py | 70 +++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d26448dffc11a..b3eadbd407e83 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1217,8 +1217,76 @@ def hist( def dtype(self) -> Series: return self.apply(lambda ser: ser.dtype) - @doc(Series.unique.__doc__) def unique(self) -> Series: + """ + Return unique values of Series object. + + Uniques are returned in order of appearance. Hash table-based unique, + therefore does NOT sort. + + Parameters + ---------- + *docstrings : None, str, or callable + The string / docstring / docstring template to be appended in order + after default docstring under callable. + **params + The string which would be used to format docstring template. + + Returns + ------- + ndarray or ExtensionArray + The unique values returned as a NumPy array. See Notes. + + See Also + -------- + Series.drop_duplicates : Return Series with duplicate values removed. + unique : Top-level unique method for any 1-d array-like object. + Index.unique : Return Index with unique values from an Index object. + + Notes + ----- + Returns the unique values as a NumPy array. In case of an + extension-array backed Series, a new ExtensionArray of that type with + just the unique values is returned. This includes + ``` + Categorical + Period + Datetime with Timezone + Datetime without Timezone + Timedelta + Interval + Sparse + IntegerNA + ``` + + See Examples section. + + Examples + -------- + >>> pd.Series([2, 1, 3, 3], name='A').unique() + array([2, 1, 3]) + + >>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique() + + ['2016-01-01 00:00:00'] + Length: 1, dtype: datetime64[ns] + + >>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern') + ... for _ in range(3)]).unique() + + ['2016-01-01 00:00:00-05:00'] + Length: 1, dtype: datetime64[ns, US/Eastern] + + An Categorical will return categories in the order of appearance and with the same dtype. + + >>> pd.Series(pd.Categorical(list('baabc'))).unique() + ['b', 'a', 'c'] + Categories (3, object): ['a', 'b', 'c'] + >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'), + ... ordered=True)).unique() + ['b', 'a', 'c'] + Categories (3, object): ['a' < 'b' < 'c'] + """ result = self._op_via_apply("unique") return result From f68223b246efba66d4a071d34155f468dcececbc Mon Sep 17 00:00:00 2001 From: maikia Date: Sat, 13 May 2023 15:14:06 +0200 Subject: [PATCH 2/6] make a list of items --- pandas/core/groupby/generic.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b3eadbd407e83..103143eb3abab 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1224,14 +1224,6 @@ def unique(self) -> Series: Uniques are returned in order of appearance. Hash table-based unique, therefore does NOT sort. - Parameters - ---------- - *docstrings : None, str, or callable - The string / docstring / docstring template to be appended in order - after default docstring under callable. - **params - The string which would be used to format docstring template. - Returns ------- ndarray or ExtensionArray @@ -1248,16 +1240,15 @@ def unique(self) -> Series: Returns the unique values as a NumPy array. In case of an extension-array backed Series, a new ExtensionArray of that type with just the unique values is returned. This includes - ``` - Categorical - Period - Datetime with Timezone - Datetime without Timezone - Timedelta - Interval - Sparse - IntegerNA - ``` + + - Categorical + - Period + - Datetime with Timezone + - Datetime without Timezone + - Timedelta + - Interval + - Sparse + - IntegerNA See Examples section. From d81747ba711c6233a4e358e8050d443466d74f07 Mon Sep 17 00:00:00 2001 From: maikia Date: Sat, 13 May 2023 15:47:39 +0200 Subject: [PATCH 3/6] update example --- pandas/core/groupby/generic.py | 70 ++++++++++++---------------------- 1 file changed, 24 insertions(+), 46 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 103143eb3abab..228c11b94a2e8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1221,62 +1221,40 @@ def unique(self) -> Series: """ Return unique values of Series object. - Uniques are returned in order of appearance. Hash table-based unique, + TODO Uniques are returned in order of appearance. Hash table-based unique, therefore does NOT sort. Returns ------- - ndarray or ExtensionArray - The unique values returned as a NumPy array. See Notes. + Series + TODO The unique values returned as a NumPy array. See Notes. See Also -------- - Series.drop_duplicates : Return Series with duplicate values removed. - unique : Top-level unique method for any 1-d array-like object. - Index.unique : Return Index with unique values from an Index object. - - Notes - ----- - Returns the unique values as a NumPy array. In case of an - extension-array backed Series, a new ExtensionArray of that type with - just the unique values is returned. This includes - - - Categorical - - Period - - Datetime with Timezone - - Datetime without Timezone - - Timedelta - - Interval - - Sparse - - IntegerNA - - See Examples section. + Series.unique : TODO Return Series with duplicate values removed. Examples -------- - >>> pd.Series([2, 1, 3, 3], name='A').unique() - array([2, 1, 3]) - - >>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique() - - ['2016-01-01 00:00:00'] - Length: 1, dtype: datetime64[ns] - - >>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern') - ... for _ in range(3)]).unique() - - ['2016-01-01 00:00:00-05:00'] - Length: 1, dtype: datetime64[ns, US/Eastern] - - An Categorical will return categories in the order of appearance and with the same dtype. - - >>> pd.Series(pd.Categorical(list('baabc'))).unique() - ['b', 'a', 'c'] - Categories (3, object): ['a', 'b', 'c'] - >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'), - ... ordered=True)).unique() - ['b', 'a', 'c'] - Categories (3, object): ['a' < 'b' < 'c'] + >>> df = pd.DataFrame([('Chihuahua', 'dog', 6.1), + ... ('Beagle', 'dog', 15.2), + ... ('Chihuahua', 'dog', 6.9), + ... ('Persian', 'cat', 9.2), + ... ('Chihuahua', 'dog', 7), + ... ('Persian', 'cat', 8.8)], + ... columns=['breed', 'animal', 'height_in']) + >>> df + breed animal height_in + 0 Chihuahua dog 6.1 + 1 Beagle dog 15.2 + 2 Chihuahua dog 6.9 + 3 Persian cat 9.2 + 4 Chihuahua dog 7.0 + 5 Persian cat 8.8 + >>> ser = df.groupby('animal')['breed'].unique() + animal + cat [Persian] + dog [Chihuahua, Beagle] + Name: breed, dtype: object """ result = self._op_via_apply("unique") return result From 309904e9b68769bc1ea9dd8c6197c6c12b64fba4 Mon Sep 17 00:00:00 2001 From: maikia Date: Sat, 13 May 2023 15:55:08 +0200 Subject: [PATCH 4/6] update wording in docstring for more specific to groupby unique --- pandas/core/groupby/generic.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 228c11b94a2e8..3a53c8973f615 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1219,19 +1219,19 @@ def dtype(self) -> Series: def unique(self) -> Series: """ - Return unique values of Series object. + Return unique values of GroupedBy Series object. - TODO Uniques are returned in order of appearance. Hash table-based unique, - therefore does NOT sort. + It returns unique values for each of the GroupedBy values. Returned in + order of appearance. Hash table-based unique, therefore does NOT sort. Returns ------- Series - TODO The unique values returned as a NumPy array. See Notes. + Unique values for each of the GroupedBy values. See Also -------- - Series.unique : TODO Return Series with duplicate values removed. + Series.unique : Return unique values of Series object. Examples -------- From 1ed549f62e3c6f9deed9255c111e72fa3d5147b7 Mon Sep 17 00:00:00 2001 From: maikia Date: Sat, 13 May 2023 16:10:19 +0200 Subject: [PATCH 5/6] update grouped from GroupedBy --- pandas/core/groupby/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3a53c8973f615..7fdf165741173 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1219,15 +1219,15 @@ def dtype(self) -> Series: def unique(self) -> Series: """ - Return unique values of GroupedBy Series object. + Return unique values for each group. - It returns unique values for each of the GroupedBy values. Returned in + It returns unique values for each of the grouped values. Returned in order of appearance. Hash table-based unique, therefore does NOT sort. Returns ------- Series - Unique values for each of the GroupedBy values. + Unique values for each of the grouped values. See Also -------- From 9d79078bebe987592a9afbcafe30d9e1f731fa6e Mon Sep 17 00:00:00 2001 From: maikia Date: Sat, 13 May 2023 17:17:43 +0200 Subject: [PATCH 6/6] correct output --- pandas/core/groupby/generic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7fdf165741173..005da7a3deca2 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1251,6 +1251,7 @@ def unique(self) -> Series: 4 Chihuahua dog 7.0 5 Persian cat 8.8 >>> ser = df.groupby('animal')['breed'].unique() + >>> ser animal cat [Persian] dog [Chihuahua, Beagle]