Skip to content

Commit b7a6d1b

Browse files
committed
BUG: ensure Series.str raises TypeError for inappropriate dtype
Fixes GH9184 Also includes a fix for Series.apply to ensure that it propagates metadata and dtypes properly for empty Series (this was necessary to fix a Stata test)
1 parent 9b5a5ea commit b7a6d1b

File tree

5 files changed

+24
-22
lines changed

5 files changed

+24
-22
lines changed

doc/source/whatsnew/v0.16.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ Bug Fixes
197197
- Bug in groupby ``.nth()`` with a multiple column groupby (:issue:`8979`)
198198
- Bug in ``DataFrame.where`` and ``Series.where`` coerce numerics to string incorrectly (:issue:`9280`)
199199
- Bug in ``DataFrame.where`` and ``Series.where`` raise ``ValueError`` when string list-like is passed. (:issue:`9280`)
200+
- Accessing ``Series.str`` methods on with non-string values now raises ``TypeError`` instead of producing incorrect results (:issue:`9184`)
200201

201202
- Fixed division by zero error for ``Series.kurt()`` when all values are equal (:issue:`9197`)
202203

pandas/core/series.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -2045,7 +2045,8 @@ def apply(self, func, convert_dtype=True, args=(), **kwds):
20452045
y : Series or DataFrame if func returns a Series
20462046
"""
20472047
if len(self) == 0:
2048-
return Series()
2048+
return self._constructor(dtype=self.dtype,
2049+
index=self.index).__finalize__(self)
20492050

20502051
if kwds or args and not isinstance(func, np.ufunc):
20512052
f = lambda x: func(x, *args, **kwds)
@@ -2504,6 +2505,12 @@ def to_period(self, freq=None, copy=True):
25042505
# string methods
25052506

25062507
def _make_str_accessor(self):
2508+
if not com.is_object_dtype(self.dtype):
2509+
# this really should exclude all series with any non-string values,
2510+
# but that isn't practical for performance reasons until we have a
2511+
# str dtype (GH 9343)
2512+
raise TypeError("Can only use .str accessor with string values, "
2513+
"which use np.object_ dtype in pandas")
25072514
return StringMethods(self)
25082515

25092516
str = base.AccessorProperty(StringMethods, _make_str_accessor)

pandas/tests/test_categorical.py

+2
Original file line numberDiff line numberDiff line change
@@ -2522,6 +2522,8 @@ def test_cat_accessor_api(self):
25222522
self.assertIs(Series.cat, CategoricalAccessor)
25232523
s = Series(list('aabbcde')).astype('category')
25242524
self.assertIsInstance(s.cat, CategoricalAccessor)
2525+
with tm.assertRaisesRegexp(TypeError, "only use .cat accessor"):
2526+
Series([1]).cat
25252527

25262528
def test_pickle_v0_14_1(self):
25272529
cat = pd.Categorical(values=['a', 'b', 'c'],

pandas/tests/test_series.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,9 @@ def test_dt_accessor_api(self):
240240
s = Series(date_range('2000-01-01', periods=3))
241241
self.assertIsInstance(s.dt, DatetimeProperties)
242242

243+
with tm.assertRaisesRegexp(TypeError, "only use .dt accessor"):
244+
Series([1]).dt
245+
243246
def test_binop_maybe_preserve_name(self):
244247

245248
# names match, preserve
@@ -5411,9 +5414,14 @@ def test_apply(self):
54115414
tm.assert_frame_equal(result, expected)
54125415

54135416
# empty series
5414-
s = Series()
5417+
s = Series(dtype=object, name='foo', index=pd.Index([], name='bar'))
54155418
rs = s.apply(lambda x: x)
54165419
tm.assert_series_equal(s, rs)
5420+
# check all metadata (GH 9322)
5421+
self.assertIsNot(s, rs)
5422+
self.assertIs(s.index, rs.index)
5423+
self.assertEqual(s.dtype, rs.dtype)
5424+
self.assertEqual(s.name, rs.name)
54175425

54185426
# index but no data
54195427
s = Series(index=[1, 2, 3])

pandas/tests/test_strings.py

+4-20
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ def test_api(self):
3636
self.assertIs(Series.str, strings.StringMethods)
3737
self.assertIsInstance(Series(['']).str, strings.StringMethods)
3838

39+
# GH 9184
40+
with tm.assertRaisesRegexp(TypeError, "only use .str accessor"):
41+
Series([1]).str
42+
3943
def test_iter(self):
4044
# GH3638
4145
strs = 'google', 'wikimedia', 'wikipedia', 'wikitravel'
@@ -80,26 +84,6 @@ def test_iter_single_element(self):
8084
self.assertFalse(i)
8185
assert_series_equal(ds, s)
8286

83-
def test_iter_numeric_try_string(self):
84-
# behavior identical to empty series
85-
dsi = Series(lrange(4))
86-
87-
i, s = 100, 'h'
88-
89-
for i, s in enumerate(dsi.str):
90-
pass
91-
92-
self.assertEqual(i, 100)
93-
self.assertEqual(s, 'h')
94-
95-
dsf = Series(np.arange(4.))
96-
97-
for i, s in enumerate(dsf.str):
98-
pass
99-
100-
self.assertEqual(i, 100)
101-
self.assertEqual(s, 'h')
102-
10387
def test_iter_object_try_string(self):
10488
ds = Series([slice(None, randint(10), randint(10, 20))
10589
for _ in range(4)])

0 commit comments

Comments
 (0)