From 690cc20d329de971f5b31fbebab183bce7bcc883 Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Mon, 1 Aug 2022 15:22:26 -0500 Subject: [PATCH 1/6] gh 47911 --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/strings/accessor.py | 3 +++ pandas/tests/strings/test_strings.py | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 4674f28744f7e..fe5802a1453e8 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -279,6 +279,7 @@ Other enhancements - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) - :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) +- :meth:`Series.str.get` now raises when ``i`` is not a integer (:issue:`47911`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index d50daad9a22b1..a91d66383fda9 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1045,6 +1045,9 @@ def get(self, i): 5 None dtype: object """ + if not is_integer(i): + msg = f"i must be of integer type, not {type(i).__name__}" + raise TypeError(msg) result = self._data.array._str_get(i) return self._wrap_result(result) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 0e55676699c21..f38d82f3cc25f 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -828,3 +828,21 @@ def test_zfill_with_leading_sign(): value = Series(["-cat", "-1", "+dog"]) expected = Series(["-0cat", "-0001", "+0dog"]) tm.assert_series_equal(value.str.zfill(5), expected) + + +@pytest.mark.parametrize("arg", [{1: 1}, [1, 2], (1, 2), "0"]) +def test_get_with_non_integer_argument(arg): + # GH47911 + s = Series( + [ + "String", + (1, 2, 3), + ["a", "b", "c"], + 123, + -456, + {1: "Hello", "2": "World"} + ] + ) + msg = f"i must be of integer type, not {type(arg).__name__}" + with pytest.raises(TypeError, match=msg): + s.str.get(arg) From 9a40008c8234b80085ec6e6c027e4e31557ee2dd Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Mon, 1 Aug 2022 15:57:36 -0500 Subject: [PATCH 2/6] pre-commit issue --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/tests/strings/test_strings.py | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index fe5802a1453e8..5b796456f7eec 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -279,7 +279,7 @@ Other enhancements - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) - :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) -- :meth:`Series.str.get` now raises when ``i`` is not a integer (:issue:`47911`) +- :meth:`Series.str.get` now raises ``TypeError`` when positional argument ``i`` is not an integer (:issue:`47911`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index f38d82f3cc25f..e08f80c6ffde5 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -834,14 +834,7 @@ def test_zfill_with_leading_sign(): def test_get_with_non_integer_argument(arg): # GH47911 s = Series( - [ - "String", - (1, 2, 3), - ["a", "b", "c"], - 123, - -456, - {1: "Hello", "2": "World"} - ] + ["String", (1, 2, 3), ["a", "b", "c"], 123, -456, {1: "Hello", "2": "World"}] ) msg = f"i must be of integer type, not {type(arg).__name__}" with pytest.raises(TypeError, match=msg): From e74e1e20974a57bbac1385f455eeab1c3f5e0106 Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Wed, 3 Aug 2022 12:12:38 -0500 Subject: [PATCH 3/6] add test and fix doc --- doc/source/whatsnew/v1.5.0.rst | 1 - pandas/core/strings/accessor.py | 20 +++++++++++++------- pandas/tests/strings/test_strings.py | 11 +++++------ 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 5b796456f7eec..4674f28744f7e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -279,7 +279,6 @@ Other enhancements - :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`) - :meth:`RangeIndex.union` now can return a :class:`RangeIndex` instead of a :class:`Int64Index` if the resulting values are equally spaced (:issue:`47557`, :issue:`43885`) - :meth:`DataFrame.compare` now accepts an argument ``result_names`` to allow the user to specify the result's names of both left and right DataFrame which are being compared. This is by default ``'self'`` and ``'other'`` (:issue:`44354`) -- :meth:`Series.str.get` now raises ``TypeError`` when positional argument ``i`` is not an integer (:issue:`47911`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index a91d66383fda9..e7a564c8f3f90 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -996,15 +996,15 @@ def rpartition(self, sep=" ", expand=True): def get(self, i): """ - Extract element from each component at specified position. + Extract element from each component at specified position or with specified key. - Extract element from lists, tuples, or strings in each element in the + Extract element from lists, tuples, dict, or strings in each element in the Series/Index. Parameters ---------- - i : int - Position of element to extract. + i : int or hashable dict label + Position or key of element to extract. Returns ------- @@ -1044,10 +1044,16 @@ def get(self, i): 4 NaN 5 None dtype: object + + Return element with given key + + >>> s = pd.Series([{"name": "Hello", "value": "World"}, + ... {"name": "Goodbye", "value": "Planet"}]) + >>> s.str.get('name') + 0 Hello + 1 Goodbye + dtype: object """ - if not is_integer(i): - msg = f"i must be of integer type, not {type(i).__name__}" - raise TypeError(msg) result = self._data.array._str_get(i) return self._wrap_result(result) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index e08f80c6ffde5..1dbfc22700b7e 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -830,12 +830,11 @@ def test_zfill_with_leading_sign(): tm.assert_series_equal(value.str.zfill(5), expected) -@pytest.mark.parametrize("arg", [{1: 1}, [1, 2], (1, 2), "0"]) -def test_get_with_non_integer_argument(arg): +def test_get_with_dict_label(): # GH47911 s = Series( - ["String", (1, 2, 3), ["a", "b", "c"], 123, -456, {1: "Hello", "2": "World"}] + [{"name": "Hello", "value": "World"}, {"name": "Goodbye", "value": "Planet"}] ) - msg = f"i must be of integer type, not {type(arg).__name__}" - with pytest.raises(TypeError, match=msg): - s.str.get(arg) + result = s.str.get("name") + expected = Series(["Hello", "Goodbye"]) + tm.assert_series_equal(result, expected) From 02c14367a9d6f50b6ae703396b0497a1bca31866 Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Mon, 8 Aug 2022 15:57:49 -0500 Subject: [PATCH 4/6] modified comment --- pandas/core/strings/accessor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index e7a564c8f3f90..738891888f671 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1047,8 +1047,9 @@ def get(self, i): Return element with given key - >>> s = pd.Series([{"name": "Hello", "value": "World"}, - ... {"name": "Goodbye", "value": "Planet"}]) + >>> s = pd.Series( + ... [{"name": "Hello", "value": "World"}, {"name": "Goodbye", "value": "Planet"}] + ... ) >>> s.str.get('name') 0 Hello 1 Goodbye From 39fafc6dd7351cf64f6ef5548db0e37895d1f8dc Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Mon, 8 Aug 2022 16:00:29 -0500 Subject: [PATCH 5/6] pep 8 --- pandas/core/strings/accessor.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 738891888f671..e7a564c8f3f90 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1047,9 +1047,8 @@ def get(self, i): Return element with given key - >>> s = pd.Series( - ... [{"name": "Hello", "value": "World"}, {"name": "Goodbye", "value": "Planet"}] - ... ) + >>> s = pd.Series([{"name": "Hello", "value": "World"}, + ... {"name": "Goodbye", "value": "Planet"}]) >>> s.str.get('name') 0 Hello 1 Goodbye From ab627670b1ad2426b813e432c2173fe749f72837 Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Mon, 8 Aug 2022 20:03:05 -0500 Subject: [PATCH 6/6] add more elements --- pandas/tests/strings/test_strings.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 1dbfc22700b7e..ffa8b557d2379 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -833,8 +833,15 @@ def test_zfill_with_leading_sign(): def test_get_with_dict_label(): # GH47911 s = Series( - [{"name": "Hello", "value": "World"}, {"name": "Goodbye", "value": "Planet"}] + [ + {"name": "Hello", "value": "World"}, + {"name": "Goodbye", "value": "Planet"}, + {"value": "Sea"}, + ] ) result = s.str.get("name") - expected = Series(["Hello", "Goodbye"]) + expected = Series(["Hello", "Goodbye", None]) + tm.assert_series_equal(result, expected) + result = s.str.get("value") + expected = Series(["World", "Planet", "Sea"]) tm.assert_series_equal(result, expected)