From 28326c6a15de6f6c992c3fd6dcc0dce64a89a2e6 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 8 Feb 2023 19:28:54 +0100 Subject: [PATCH 1/3] fix algorithms.factorize docstring --- pandas/core/algorithms.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 0037cd20e8c1e..3a843ce71f130 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -688,7 +688,7 @@ def factorize( >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b']) >>> codes - array([0, 0, 1, 2, 0]...) + array([0, 0, 1, 2, 0], dtype=int64) >>> uniques array(['b', 'a', 'c'], dtype=object) @@ -697,7 +697,7 @@ def factorize( >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'], sort=True) >>> codes - array([1, 1, 0, 2, 1]...) + array([1, 1, 0, 2, 1], dtype=int64) >>> uniques array(['a', 'b', 'c'], dtype=object) @@ -707,7 +707,7 @@ def factorize( >>> codes, uniques = pd.factorize(['b', None, 'a', 'c', 'b']) >>> codes - array([ 0, -1, 1, 2, 0]...) + array([ 0, -1, 1, 2, 0], dtype=int64) >>> uniques array(['b', 'a', 'c'], dtype=object) @@ -718,7 +718,7 @@ def factorize( >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c']) >>> codes, uniques = pd.factorize(cat) >>> codes - array([0, 0, 1]...) + array([0, 0, 1], dtype=int64) >>> uniques ['a', 'c'] Categories (3, object): ['a', 'b', 'c'] @@ -732,7 +732,7 @@ def factorize( >>> cat = pd.Series(['a', 'a', 'c']) >>> codes, uniques = pd.factorize(cat) >>> codes - array([0, 0, 1]...) + array([0, 0, 1], dtype=int64) >>> uniques Index(['a', 'c'], dtype='object') @@ -742,13 +742,13 @@ def factorize( >>> values = np.array([1, 2, 1, np.nan]) >>> codes, uniques = pd.factorize(values) # default: use_na_sentinel=True >>> codes - array([ 0, 1, 0, -1]) + array([ 0, 1, 0, -1], dtype=int64) >>> uniques array([1., 2.]) >>> codes, uniques = pd.factorize(values, use_na_sentinel=False) >>> codes - array([0, 1, 0, 2]) + array([0, 1, 0, 2], dtype=int64) >>> uniques array([ 1., 2., nan]) """ From 7c7c26307ed7e9ada2ba6add67aae823704d22df Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 8 Feb 2023 20:05:51 +0100 Subject: [PATCH 2/3] remove from exclusion list --- ci/code_checks.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 18f394b8e549b..143802bf21d26 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -579,9 +579,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then MSG='Partially validate docstrings (EX02)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX02 --ignore_functions \ pandas.DataFrame.plot.line \ - pandas.Index.factorize \ pandas.Period.strftime \ - pandas.Series.factorize \ pandas.Series.floordiv \ pandas.Series.plot.line \ pandas.Series.rfloordiv \ @@ -612,7 +610,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.api.types.is_unsigned_integer_dtype \ pandas.core.groupby.DataFrameGroupBy.take \ pandas.core.groupby.SeriesGroupBy.take \ - pandas.factorize \ pandas.io.formats.style.Styler.concat \ pandas.io.formats.style.Styler.export \ pandas.io.formats.style.Styler.set_td_classes \ From 42357829e0a417bade2b0d2a2891bd2c164632ba Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 9 Feb 2023 15:41:14 +0100 Subject: [PATCH 3/3] fix dtypes --- pandas/core/algorithms.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3a843ce71f130..636273724b57c 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -688,7 +688,7 @@ def factorize( >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b']) >>> codes - array([0, 0, 1, 2, 0], dtype=int64) + array([0, 0, 1, 2, 0]) >>> uniques array(['b', 'a', 'c'], dtype=object) @@ -697,7 +697,7 @@ def factorize( >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'], sort=True) >>> codes - array([1, 1, 0, 2, 1], dtype=int64) + array([1, 1, 0, 2, 1]) >>> uniques array(['a', 'b', 'c'], dtype=object) @@ -707,7 +707,7 @@ def factorize( >>> codes, uniques = pd.factorize(['b', None, 'a', 'c', 'b']) >>> codes - array([ 0, -1, 1, 2, 0], dtype=int64) + array([ 0, -1, 1, 2, 0]) >>> uniques array(['b', 'a', 'c'], dtype=object) @@ -718,7 +718,7 @@ def factorize( >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c']) >>> codes, uniques = pd.factorize(cat) >>> codes - array([0, 0, 1], dtype=int64) + array([0, 0, 1]) >>> uniques ['a', 'c'] Categories (3, object): ['a', 'b', 'c'] @@ -732,7 +732,7 @@ def factorize( >>> cat = pd.Series(['a', 'a', 'c']) >>> codes, uniques = pd.factorize(cat) >>> codes - array([0, 0, 1], dtype=int64) + array([0, 0, 1]) >>> uniques Index(['a', 'c'], dtype='object') @@ -742,13 +742,13 @@ def factorize( >>> values = np.array([1, 2, 1, np.nan]) >>> codes, uniques = pd.factorize(values) # default: use_na_sentinel=True >>> codes - array([ 0, 1, 0, -1], dtype=int64) + array([ 0, 1, 0, -1]) >>> uniques array([1., 2.]) >>> codes, uniques = pd.factorize(values, use_na_sentinel=False) >>> codes - array([0, 1, 0, 2], dtype=int64) + array([0, 1, 0, 2]) >>> uniques array([ 1., 2., nan]) """