Skip to content

Commit 01e0ff3

Browse files
committed
TST: add testing module for string methods #620
1 parent dfb5343 commit 01e0ff3

File tree

1 file changed

+217
-0
lines changed

1 file changed

+217
-0
lines changed

pandas/tests/test_strings.py

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
# pylint: disable-msg=E1101,W0612
2+
3+
from datetime import datetime, timedelta, date
4+
import os
5+
import operator
6+
import unittest
7+
8+
import nose
9+
10+
from numpy import nan as NA
11+
import numpy as np
12+
13+
from pandas import (Index, Series, TimeSeries, DataFrame, isnull, notnull,
14+
bdate_range, date_range)
15+
import pandas.core.common as com
16+
17+
from pandas.util.testing import assert_series_equal, assert_almost_equal
18+
import pandas.util.testing as tm
19+
20+
import pandas.core.strings as strings
21+
22+
class TestStringMethods(unittest.TestCase):
23+
24+
def test_cat(self):
25+
one = ['a', 'a', 'b', 'b', 'c', NA]
26+
two = ['a', NA, 'b', 'd', 'foo', NA]
27+
28+
# single array
29+
result = strings.str_cat(one)
30+
self.assert_(isnull(result))
31+
32+
result = strings.str_cat(one, na_rep='NA')
33+
exp = 'aabbcNA'
34+
self.assertEquals(result, exp)
35+
36+
result = strings.str_cat(one, na_rep='-')
37+
exp = 'aabbc-'
38+
self.assertEquals(result, exp)
39+
40+
result = strings.str_cat(one, sep='_', na_rep='NA')
41+
exp = 'a_a_b_b_c_NA'
42+
self.assertEquals(result, exp)
43+
44+
# Multiple arrays
45+
result = strings.str_cat(one, [two], na_rep='NA')
46+
exp = ['aa', 'aNA', 'bb', 'bd', 'cfoo', 'NANA']
47+
self.assert_(np.array_equal(result, exp))
48+
49+
result = strings.str_cat(one, two)
50+
exp = ['aa', NA, 'bb', 'bd', 'cfoo', NA]
51+
tm.assert_almost_equal(result, exp)
52+
53+
def test_count(self):
54+
values = ['foo', 'foofoo', NA, 'foooofooofommmfoo']
55+
56+
result = strings.str_count(values, 'f[o]+')
57+
exp = [1, 2, NA, 4]
58+
tm.assert_almost_equal(result, exp)
59+
60+
result = Series(values).str.count('f[o]+')
61+
self.assert_(isinstance(result, Series))
62+
tm.assert_almost_equal(result, exp)
63+
64+
def test_contains(self):
65+
values = ['foo', NA, 'fooommm__foo', 'mmm_']
66+
pat = 'mmm[_]+'
67+
68+
result = strings.str_contains(values, pat)
69+
expected = [False, np.nan, True, True]
70+
tm.assert_almost_equal(result, expected)
71+
72+
values = ['foo', 'xyz', 'fooommm__foo', 'mmm_']
73+
result = strings.str_contains(values, pat)
74+
expected = [False, False, True, True]
75+
self.assert_(result.dtype == np.bool_)
76+
tm.assert_almost_equal(result, expected)
77+
78+
def test_startswith(self):
79+
values = Series(['om', NA, 'foo_nom', 'nom', 'bar_foo', NA, 'foo'])
80+
81+
result = values.str.startswith('foo')
82+
exp = Series([False, NA, True, False, False, NA, True])
83+
tm.assert_series_equal(result, exp)
84+
85+
def test_endswith(self):
86+
values = Series(['om', NA, 'foo_nom', 'nom', 'bar_foo', NA, 'foo'])
87+
88+
result = values.str.endswith('foo')
89+
exp = Series([False, NA, False, False, True, NA, True])
90+
tm.assert_series_equal(result, exp)
91+
92+
def test_lower_upper(self):
93+
values = Series(['om', NA, 'nom', 'nom'])
94+
95+
result = values.str.upper()
96+
exp = Series(['OM', NA, 'NOM', 'NOM'])
97+
tm.assert_series_equal(result, exp)
98+
99+
result = result.str.lower()
100+
tm.assert_series_equal(result, values)
101+
102+
def test_replace(self):
103+
values = Series(['fooBAD__barBAD', NA])
104+
105+
result = values.str.replace('BAD[_]*', '')
106+
exp = Series(['foobar', NA])
107+
tm.assert_series_equal(result, exp)
108+
109+
result = values.str.replace('BAD[_]*', '', n=1)
110+
exp = Series(['foobarBAD', NA])
111+
tm.assert_series_equal(result, exp)
112+
113+
def test_repeat(self):
114+
values = Series(['a', 'b', NA, 'c', NA, 'd'])
115+
116+
result = values.str.repeat(3)
117+
exp = Series(['aaa', 'bbb', NA, 'ccc', NA, 'ddd'])
118+
tm.assert_series_equal(result, exp)
119+
120+
result = values.str.repeat([1, 2, 3, 4, 5, 6])
121+
exp = Series(['a', 'bb', NA, 'cccc', NA, 'dddddd'])
122+
tm.assert_series_equal(result, exp)
123+
124+
def test_match(self):
125+
values = Series(['fooBAD__barBAD', NA, 'foo'])
126+
127+
result = values.str.match('.*(BAD[_]+).*(BAD)')
128+
exp = Series([('BAD__', 'BAD'), NA, []])
129+
tm.assert_series_equal(result, exp)
130+
131+
def test_join(self):
132+
values = Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h'])
133+
result = values.str.split('_').str.join('_')
134+
tm.assert_series_equal(values, result)
135+
136+
def test_len(self):
137+
values = Series(['foo', 'fooo', 'fooooo', np.nan, 'fooooooo'])
138+
139+
result = values.str.len()
140+
exp = values.map(lambda x: len(x) if com.notnull(x) else NA)
141+
tm.assert_series_equal(result, exp)
142+
143+
def test_findall(self):
144+
values = Series(['fooBAD__barBAD', NA, 'foo', 'BAD'])
145+
146+
result = values.str.findall('BAD[_]*')
147+
exp = Series([['BAD__', 'BAD'], NA, [], ['BAD']])
148+
tm.assert_almost_equal(result, exp)
149+
150+
def test_pad(self):
151+
values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])
152+
153+
result = values.str.pad(5, side='left')
154+
exp = Series([' a', ' b', NA, ' c', NA, 'eeeeee'])
155+
tm.assert_almost_equal(result, exp)
156+
157+
result = values.str.pad(5, side='right')
158+
exp = Series(['a ', 'b ', NA, 'c ', NA, 'eeeeee'])
159+
tm.assert_almost_equal(result, exp)
160+
161+
result = values.str.pad(5, side='both')
162+
exp = Series([' a ', ' b ', NA, ' c ', NA, 'eeeeee'])
163+
tm.assert_almost_equal(result, exp)
164+
165+
def test_center(self):
166+
values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])
167+
168+
result = values.str.center(5)
169+
exp = Series([' a ', ' b ', NA, ' c ', NA, 'eeeeee'])
170+
tm.assert_almost_equal(result, exp)
171+
172+
def test_split(self):
173+
values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h'])
174+
175+
result = values.str.split('_')
176+
exp = Series([['a', 'b', 'c'], ['c', 'd', 'e'], NA, ['f', 'g', 'h']])
177+
tm.assert_series_equal(result, exp)
178+
179+
def test_slice(self):
180+
values = Series(['aafootwo','aabartwo', NA, 'aabazqux'])
181+
182+
result = values.str.slice(2, 5)
183+
exp = Series(['foo', 'bar', NA, 'baz'])
184+
tm.assert_series_equal(result, exp)
185+
186+
def test_slice_replace(self):
187+
pass
188+
189+
def test_strip_lstrip_rstrip(self):
190+
values = Series([' aa ', ' bb \n', NA, 'cc '])
191+
192+
result = values.str.strip()
193+
exp = Series(['aa', 'bb', NA, 'cc'])
194+
tm.assert_series_equal(result, exp)
195+
196+
result = values.str.lstrip()
197+
exp = Series(['aa ', 'bb \n', NA, 'cc '])
198+
tm.assert_series_equal(result, exp)
199+
200+
result = values.str.rstrip()
201+
exp = Series([' aa', ' bb', NA, 'cc'])
202+
tm.assert_series_equal(result, exp)
203+
204+
def test_wrap(self):
205+
pass
206+
207+
def test_get(self):
208+
values = Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h'])
209+
210+
result = values.str.split('_').str.get(1)
211+
expected = Series(['b', 'd', np.nan, 'g'])
212+
tm.assert_series_equal(result, expected)
213+
214+
215+
if __name__ == '__main__':
216+
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
217+
exit=False)

0 commit comments

Comments
 (0)