Skip to content

Commit 9bab07a

Browse files
committed
ENH: add StringMethods (.str accessor) to Index, fixes pandas-dev#9068
1 parent 9772a5b commit 9bab07a

File tree

5 files changed

+48
-19
lines changed

5 files changed

+48
-19
lines changed

pandas/core/base.py

+14
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import pandas.tslib as tslib
1111
import pandas.lib as lib
1212
from pandas.util.decorators import Appender, cache_readonly
13+
from pandas.core.strings import StringMethods
1314

1415

1516
_shared_docs = dict()
@@ -497,6 +498,19 @@ def searchsorted(self, key, side='left'):
497498
#### needs tests/doc-string
498499
return self.values.searchsorted(key, side=side)
499500

501+
# string methods
502+
def _make_str_accessor(self):
503+
if not com.is_object_dtype(self.dtype):
504+
# this really should exclude all series with any non-string values,
505+
# but that isn't practical for performance reasons until we have a
506+
# str dtype (GH 9343)
507+
raise AttributeError("Can only use .str accessor with string "
508+
"values, which use np.object_ dtype in "
509+
"pandas")
510+
return StringMethods(self)
511+
512+
str = AccessorProperty(StringMethods, _make_str_accessor)
513+
500514
_shared_docs['drop_duplicates'] = (
501515
"""Return %(klass)s with duplicate values removed
502516

pandas/core/series.py

-16
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
from pandas.core import generic, base
2929
from pandas.core.internals import SingleBlockManager
3030
from pandas.core.categorical import Categorical, CategoricalAccessor
31-
from pandas.core.strings import StringMethods
3231
from pandas.tseries.common import (maybe_to_datetimelike,
3332
CombinedDatetimelikeProperties)
3433
from pandas.tseries.index import DatetimeIndex
@@ -2514,21 +2513,6 @@ def to_period(self, freq=None, copy=True):
25142513
return self._constructor(new_values,
25152514
index=new_index).__finalize__(self)
25162515

2517-
#------------------------------------------------------------------------------
2518-
# string methods
2519-
2520-
def _make_str_accessor(self):
2521-
if not com.is_object_dtype(self.dtype):
2522-
# this really should exclude all series with any non-string values,
2523-
# but that isn't practical for performance reasons until we have a
2524-
# str dtype (GH 9343)
2525-
raise AttributeError("Can only use .str accessor with string "
2526-
"values, which use np.object_ dtype in "
2527-
"pandas")
2528-
return StringMethods(self)
2529-
2530-
str = base.AccessorProperty(StringMethods, _make_str_accessor)
2531-
25322516
#------------------------------------------------------------------------------
25332517
# Datetimelike delegation methods
25342518

pandas/core/strings.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -926,9 +926,9 @@ def do_copy(target):
926926
class StringMethods(object):
927927

928928
"""
929-
Vectorized string functions for Series. NAs stay NA unless handled
930-
otherwise by a particular method. Patterned after Python's string methods,
931-
with some inspiration from R's stringr package.
929+
Vectorized string functions for Series and Index. NAs stay NA unless
930+
handled otherwise by a particular method. Patterned after Python's string
931+
methods, with some inspiration from R's stringr package.
932932
933933
Examples
934934
--------
@@ -957,11 +957,14 @@ def __iter__(self):
957957
def _wrap_result(self, result):
958958
from pandas.core.series import Series
959959
from pandas.core.frame import DataFrame
960+
from pandas.core.index import Index
960961

961962
if not hasattr(result, 'ndim'):
962963
return result
963964
elif result.ndim == 1:
964965
name = getattr(result, 'name', None)
966+
if isinstance(self.series, Index):
967+
return Index(result, name=name or self.series.name)
965968
return Series(result, index=self.series.index,
966969
name=name or self.series.name)
967970
else:

pandas/tests/test_index.py

+15
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,21 @@ def test_join_self(self):
11741174
for kind in kinds:
11751175
joined = res.join(res, how=kind)
11761176
self.assertIs(res, joined)
1177+
def test_str_attribute(self):
1178+
# GH9068
1179+
methods = ['strip', 'rstrip', 'lstrip']
1180+
idx = Index([' jack', 'jill ', ' jesse ', 'frank'])
1181+
for method in methods:
1182+
expected = Index([getattr(str, method)(x) for x in idx.values])
1183+
tm.assert_index_equal(getattr(Index.str, method)(idx.str), expected)
1184+
1185+
# create a few instances that are not able to use .str accessor
1186+
indices = [Index(range(5)),
1187+
tm.makeDateIndex(10),
1188+
PeriodIndex(start='2000', end='2010', freq='A')]
1189+
for idx in indices:
1190+
with self.assertRaisesRegexp(AttributeError, 'only use .str accessor'):
1191+
idx.str.repeat(2)
11771192

11781193
def test_indexing_doesnt_change_class(self):
11791194
idx = Index([1, 2, 3, 'a', 'b', 'c'])

pandas/tests/test_series.py

+13
Original file line numberDiff line numberDiff line change
@@ -4855,6 +4855,19 @@ def test_to_csv_path_is_none(self):
48554855
csv_str = s.to_csv(path=None)
48564856
self.assertIsInstance(csv_str, str)
48574857

4858+
def test_str_attribute(self):
4859+
# GH9068
4860+
methods = ['strip', 'rstrip', 'lstrip']
4861+
s = Series([' jack', 'jill ', ' jesse ', 'frank'])
4862+
for method in methods:
4863+
expected = Series([getattr(str, method)(x) for x in s.values])
4864+
assert_series_equal(getattr(Series.str, method)(s.str), expected)
4865+
4866+
# str accessor only valid with string values
4867+
s = Series(range(5))
4868+
with self.assertRaisesRegexp(AttributeError, 'only use .str accessor'):
4869+
s.str.repeat(2)
4870+
48584871
def test_clip(self):
48594872
val = self.ts.median()
48604873

0 commit comments

Comments
 (0)