Skip to content

Commit bd163c3

Browse files
committed
ENH: add Series.str iterator
add release notes py26 TestCase does not support assertIsInstance print out testvalu add superfluous print statemetns change tests forgot gh issue target
1 parent 9d0a26a commit bd163c3

File tree

4 files changed

+110
-1
lines changed

4 files changed

+110
-1
lines changed

RELEASE.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ pandas 0.11.1
4343
multi-index column.
4444
Note: The default value will change in 0.12 to make the default *to* write and
4545
read multi-index columns in the new format. (GH3571_, GH1651_, GH3141_)
46+
- Add iterator to ``Series.str`` (GH3638_)
4647

4748
**Improvements to existing features**
4849

@@ -199,7 +200,7 @@ pandas 0.11.1
199200
.. _GH3571: https://github.com/pydata/pandas/issues/3571
200201
.. _GH1651: https://github.com/pydata/pandas/issues/1651
201202
.. _GH3141: https://github.com/pydata/pandas/issues/3141
202-
203+
.. _GH3638: https://github.com/pydata/pandas/issues/3638
203204

204205
pandas 0.11.0
205206
=============

doc/source/v0.11.1.txt

+22
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,27 @@ Enhancements
8080
- ``DataFrame.replace()`` now allows regular expressions on contained
8181
``Series`` with object dtype. See the examples section in the regular docs
8282
:ref:`Replacing via String Expression <missing_data.replace_expression>`
83+
- ``Series.str`` now supports iteration (GH3638_). You can iterate over the
84+
individual elements of each string in the ``Series``. Each iteration yields
85+
yields a ``Series`` with either a single character at each index of the
86+
original ``Series`` or ``NaN``. For example,
87+
88+
.. ipython:: python
89+
90+
strs = 'go', 'bow', 'joe', 'slow'
91+
ds = Series(strs)
92+
93+
for s in ds.str:
94+
print s
95+
96+
s
97+
s.dropna().values.item() == 'w'
98+
99+
The last element yielded by the iterator will be a ``Series`` containing
100+
the last element of the longest string in the ``Series`` with all other
101+
elements being ``NaN``. Here since ``'wikitravel'`` is the longest string
102+
and there are no other strings with the same length ``'l'`` is the only
103+
non-null string in the yielded ``Series``.
83104

84105
- Multi-index column support for reading and writing csvs
85106

@@ -133,3 +154,4 @@ on GitHub for a complete list.
133154
.. _GH3571: https://github.com/pydata/pandas/issues/3571
134155
.. _GH1651: https://github.com/pydata/pandas/issues/1651
135156
.. _GH3141: https://github.com/pydata/pandas/issues/3141
157+
.. _GH3638: https://github.com/pydata/pandas/issues/3638

pandas/core/strings.py

+8
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,14 @@ def __getitem__(self, key):
661661
else:
662662
return self.get(key)
663663

664+
def __iter__(self):
665+
i = 0
666+
g = self.get(i)
667+
while g.notnull().any():
668+
yield g
669+
i += 1
670+
g = self.get(i)
671+
664672
def _wrap_result(self, result):
665673
return Series(result, index=self.series.index,
666674
name=self.series.name)

pandas/tests/test_strings.py

+78
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
from numpy import nan as NA
1212
import numpy as np
13+
from numpy.testing import assert_array_equal
14+
from numpy.random import randint
1315

1416
from pandas import (Index, Series, TimeSeries, DataFrame, isnull, notnull,
1517
bdate_range, date_range)
@@ -25,6 +27,82 @@ class TestStringMethods(unittest.TestCase):
2527

2628
_multiprocess_can_split_ = True
2729

30+
def test_iter(self):
31+
# GH3638
32+
strs = 'google', 'wikimedia', 'wikipedia', 'wikitravel'
33+
ds = Series(strs)
34+
35+
for s in ds.str:
36+
# iter must yield a Series
37+
self.assert_(isinstance(s, Series))
38+
39+
# indices of each yielded Series should be equal to the index of
40+
# the original Series
41+
assert_array_equal(s.index, ds.index)
42+
43+
for el in s:
44+
# each element of the series is either a basestring or nan
45+
self.assert_(isinstance(el, basestring) or isnull(el))
46+
47+
# desired behavior is to iterate until everything would be nan on the
48+
# next iter so make sure the last element of the iterator was 'l' in
49+
# this case since 'wikitravel' is the longest string
50+
self.assertEqual(s.dropna().values.item(), 'l')
51+
52+
def test_iter_empty(self):
53+
ds = Series([], dtype=object)
54+
55+
i, s = 100, 1
56+
57+
for i, s in enumerate(ds.str):
58+
pass
59+
60+
# nothing to iterate over so nothing defined values should remain
61+
# unchanged
62+
self.assertEqual(i, 100)
63+
self.assertEqual(s, 1)
64+
65+
def test_iter_single_element(self):
66+
ds = Series(['a'])
67+
68+
for i, s in enumerate(ds.str):
69+
pass
70+
71+
self.assertFalse(i)
72+
assert_series_equal(ds, s)
73+
74+
def test_iter_numeric_try_string(self):
75+
# behavior identical to empty series
76+
dsi = Series(range(4))
77+
78+
i, s = 100, 'h'
79+
80+
for i, s in enumerate(dsi.str):
81+
pass
82+
83+
self.assertEqual(i, 100)
84+
self.assertEqual(s, 'h')
85+
86+
dsf = Series(np.arange(4.))
87+
88+
for i, s in enumerate(dsf.str):
89+
pass
90+
91+
self.assertEqual(i, 100)
92+
self.assertEqual(s, 'h')
93+
94+
def test_iter_object_try_string(self):
95+
ds = Series([slice(None, randint(10), randint(10, 20))
96+
for _ in xrange(4)])
97+
98+
i, s = 100, 'h'
99+
100+
for i, s in enumerate(ds.str):
101+
pass
102+
103+
self.assertEqual(i, 100)
104+
self.assertEqual(s, 'h')
105+
28106
def test_cat(self):
29107
one = ['a', 'a', 'b', 'b', 'c', NA]
30108
two = ['a', NA, 'b', 'd', 'foo', NA]

0 commit comments

Comments
 (0)