From e743be4b86b398e3de7fd316605016eafc5fff6e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 17 Jul 2023 17:34:24 -0700 Subject: [PATCH 1/4] BUG: iter(ser.str) did not raise TypeError --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/strings/accessor.py | 3 +++ pandas/tests/strings/test_strings.py | 7 +++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 31293a42a3977..e9d22efcdd58a 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -484,7 +484,7 @@ Conversion Strings ^^^^^^^ -- +- Bug in :meth:`Series.str` that did not raise a ``TypeError`` when iterated (:issue:`54173`) - Interval diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index abd0dceb6e35d..d72a6416e4092 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -243,6 +243,9 @@ def __getitem__(self, key): result = self._data.array._str_getitem(key) return self._wrap_result(result) + def __iter__(self): + raise TypeError(f"'{type(self).__name__}' object is not iterable") + def _wrap_result( self, result, diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index d4807a8eedaaa..a5c4f8f7c8a4f 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -27,6 +27,13 @@ def test_startswith_endswith_non_str_patterns(pattern): ser.str.endswith(pattern) +def test_iter_raises(): + # GH 54173 + ser = Series(["foo", "bar"]) + with pytest.raises(TypeError, match="'StringMethods' object is not iterable"): + iter(ser.str) + + # test integer/float dtypes (inferred by constructor) and mixed From 2db079aab839372c8e27d6620a7d2e4f3a8b4d47 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 17 Jul 2023 18:46:29 -0700 Subject: [PATCH 2/4] pass through --- pandas/core/strings/accessor.py | 36 ++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index d72a6416e4092..c0eedf8222cbd 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -441,22 +441,26 @@ def _get_series_list(self, others): others = DataFrame(others, index=idx) return [others[x] for x in others] elif is_list_like(others, allow_sets=False): - others = list(others) # ensure iterators do not get read twice etc - - # in case of list-like `others`, all elements must be - # either Series/Index/np.ndarray (1-dim)... - if all( - isinstance(x, (ABCSeries, ABCIndex)) - or (isinstance(x, np.ndarray) and x.ndim == 1) - for x in others - ): - los: list[Series] = [] - while others: # iterate through list and append each element - los = los + self._get_series_list(others.pop(0)) - return los - # ... or just strings - elif all(not is_list_like(x) for x in others): - return [Series(others, index=idx)] + try: + others = list(others) # ensure iterators do not get read twice etc + except TypeError: + # e.g. ser.str, raise below + pass + else: + # in case of list-like `others`, all elements must be + # either Series/Index/np.ndarray (1-dim)... + if all( + isinstance(x, (ABCSeries, ABCIndex)) + or (isinstance(x, np.ndarray) and x.ndim == 1) + for x in others + ): + los: list[Series] = [] + while others: # iterate through list and append each element + los = los + self._get_series_list(others.pop(0)) + return los + # ... or just strings + elif all(not is_list_like(x) for x in others): + return [Series(others, index=idx)] raise TypeError( "others must be Series, Index, DataFrame, np.ndarray " "or list-like (either containing only strings or " From 53767d662706c3aaebbb22a397fb473597b91ea4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 18 Jul 2023 13:37:00 -0700 Subject: [PATCH 3/4] Change test --- pandas/tests/dtypes/test_inference.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 9931e71c16254..78f960f4d46d5 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -150,8 +150,9 @@ def shape(self): ((_ for _ in []), True, "generator-empty"), (Series([1]), True, "Series"), (Series([], dtype=object), True, "Series-empty"), - (Series(["a"]).str, False, "StringMethods"), - (Series([], dtype="O").str, False, "StringMethods-empty"), + # Series.str will still raise a TypeError if iterated + (Series(["a"]).str, True, "StringMethods"), + (Series([], dtype="O").str, True, "StringMethods-empty"), (Index([1]), True, "Index"), (Index([]), True, "Index-empty"), (DataFrame([[1]]), True, "DataFrame"), From 56714b5f4a6468f89f110dcae4812c54b7d9a064 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 18 Jul 2023 16:05:05 -0700 Subject: [PATCH 4/4] Typing --- pandas/core/strings/accessor.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index c0eedf8222cbd..e59369db776da 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -48,7 +48,10 @@ from pandas.core.construction import extract_array if TYPE_CHECKING: - from collections.abc import Hashable + from collections.abc import ( + Hashable, + Iterator, + ) from pandas import ( DataFrame, @@ -243,7 +246,7 @@ def __getitem__(self, key): result = self._data.array._str_getitem(key) return self._wrap_result(result) - def __iter__(self): + def __iter__(self) -> Iterator: raise TypeError(f"'{type(self).__name__}' object is not iterable") def _wrap_result(