@@ -71,6 +71,17 @@ def test_count(self):
71
71
self .assert_ (isinstance (rs , Series ))
72
72
tm .assert_almost_equal (rs , xp )
73
73
74
+ #unicode
75
+ values = [u'foo' , u'foofoo' , NA , u'foooofooofommmfoo' ]
76
+
77
+ result = strings .str_count (values , 'f[o]+' )
78
+ exp = [1 , 2 , NA , 4 ]
79
+ tm .assert_almost_equal (result , exp )
80
+
81
+ result = Series (values ).str .count ('f[o]+' )
82
+ self .assert_ (isinstance (result , Series ))
83
+ tm .assert_almost_equal (result , exp )
84
+
74
85
def test_contains (self ):
75
86
values = ['foo' , NA , 'fooommm__foo' , 'mmm_' ]
76
87
pat = 'mmm[_]+'
@@ -95,6 +106,20 @@ def test_contains(self):
95
106
self .assert_ (isinstance (rs , Series ))
96
107
tm .assert_almost_equal (rs , xp )
97
108
109
+ #unicode
110
+ values = [u'foo' , NA , u'fooommm__foo' , u'mmm_' ]
111
+ pat = 'mmm[_]+'
112
+
113
+ result = strings .str_contains (values , pat )
114
+ expected = [False , np .nan , True , True ]
115
+ tm .assert_almost_equal (result , expected )
116
+
117
+ values = ['foo' , 'xyz' , 'fooommm__foo' , 'mmm_' ]
118
+ result = strings .str_contains (values , pat )
119
+ expected = [False , False , True , True ]
120
+ self .assert_ (result .dtype == np .bool_ )
121
+ tm .assert_almost_equal (result , expected )
122
+
98
123
def test_startswith (self ):
99
124
values = Series (['om' , NA , 'foo_nom' , 'nom' , 'bar_foo' , NA , 'foo' ])
100
125
@@ -112,6 +137,14 @@ def test_startswith(self):
112
137
self .assert_ (isinstance (rs , Series ))
113
138
tm .assert_almost_equal (rs , xp )
114
139
140
+ #unicode
141
+ values = Series ([u'om' , NA , u'foo_nom' , u'nom' , u'bar_foo' , NA ,
142
+ u'foo' ])
143
+
144
+ result = values .str .startswith ('foo' )
145
+ exp = Series ([False , NA , True , False , False , NA , True ])
146
+ tm .assert_series_equal (result , exp )
147
+
115
148
def test_endswith (self ):
116
149
values = Series (['om' , NA , 'foo_nom' , 'nom' , 'bar_foo' , NA , 'foo' ])
117
150
@@ -129,6 +162,14 @@ def test_endswith(self):
129
162
self .assert_ (isinstance (rs , Series ))
130
163
tm .assert_almost_equal (rs , xp )
131
164
165
+ #unicode
166
+ values = Series ([u'om' , NA , u'foo_nom' , u'nom' , u'bar_foo' , NA ,
167
+ u'foo' ])
168
+
169
+ result = values .str .endswith ('foo' )
170
+ exp = Series ([False , NA , False , False , True , NA , True ])
171
+ tm .assert_series_equal (result , exp )
172
+
132
173
def test_lower_upper (self ):
133
174
values = Series (['om' , NA , 'nom' , 'nom' ])
134
175
@@ -148,6 +189,16 @@ def test_lower_upper(self):
148
189
self .assert_ (isinstance (rs , Series ))
149
190
tm .assert_almost_equal (rs , xp )
150
191
192
+ #unicode
193
+ values = Series ([u'om' , NA , u'nom' , u'nom' ])
194
+
195
+ result = values .str .upper ()
196
+ exp = Series ([u'OM' , NA , u'NOM' , u'NOM' ])
197
+ tm .assert_series_equal (result , exp )
198
+
199
+ result = result .str .lower ()
200
+ tm .assert_series_equal (result , values )
201
+
151
202
def test_replace (self ):
152
203
values = Series (['fooBAD__barBAD' , NA ])
153
204
@@ -168,6 +219,17 @@ def test_replace(self):
168
219
self .assert_ (isinstance (rs , Series ))
169
220
tm .assert_almost_equal (rs , xp )
170
221
222
+ #unicode
223
+ values = Series ([u'fooBAD__barBAD' , NA ])
224
+
225
+ result = values .str .replace ('BAD[_]*' , '' )
226
+ exp = Series ([u'foobar' , NA ])
227
+ tm .assert_series_equal (result , exp )
228
+
229
+ result = values .str .replace ('BAD[_]*' , '' , n = 1 )
230
+ exp = Series ([u'foobarBAD' , NA ])
231
+ tm .assert_series_equal (result , exp )
232
+
171
233
def test_repeat (self ):
172
234
values = Series (['a' , 'b' , NA , 'c' , NA , 'd' ])
173
235
@@ -188,6 +250,18 @@ def test_repeat(self):
188
250
self .assert_ (isinstance (rs , Series ))
189
251
tm .assert_almost_equal (rs , xp )
190
252
253
+ #unicode
254
+ values = Series ([u'a' , u'b' , NA , u'c' , NA , u'd' ])
255
+
256
+ result = values .str .repeat (3 )
257
+ exp = Series ([u'aaa' , u'bbb' , NA , u'ccc' , NA , u'ddd' ])
258
+ tm .assert_series_equal (result , exp )
259
+
260
+ result = values .str .repeat ([1 , 2 , 3 , 4 , 5 , 6 ])
261
+ exp = Series ([u'a' , u'bb' , NA , u'cccc' , NA , u'dddddd' ])
262
+ tm .assert_series_equal (result , exp )
263
+
264
+
191
265
def test_match (self ):
192
266
values = Series (['fooBAD__barBAD' , NA , 'foo' ])
193
267
@@ -204,6 +278,13 @@ def test_match(self):
204
278
self .assert_ (isinstance (rs , Series ))
205
279
tm .assert_almost_equal (rs , xp )
206
280
281
+ #unicode
282
+ values = Series ([u'fooBAD__barBAD' , NA , u'foo' ])
283
+
284
+ result = values .str .match ('.*(BAD[_]+).*(BAD)' )
285
+ exp = Series ([(u'BAD__' , u'BAD' ), NA , []])
286
+ tm .assert_series_equal (result , exp )
287
+
207
288
def test_join (self ):
208
289
values = Series (['a_b_c' , 'c_d_e' , np .nan , 'f_g_h' ])
209
290
result = values .str .split ('_' ).str .join ('_' )
@@ -219,6 +300,11 @@ def test_join(self):
219
300
self .assert_ (isinstance (rs , Series ))
220
301
tm .assert_almost_equal (rs , xp )
221
302
303
+ #unicode
304
+ values = Series ([u'a_b_c' , u'c_d_e' , np .nan , u'f_g_h' ])
305
+ result = values .str .split ('_' ).str .join ('_' )
306
+ tm .assert_series_equal (values , result )
307
+
222
308
def test_len (self ):
223
309
values = Series (['foo' , 'fooo' , 'fooooo' , np .nan , 'fooooooo' ])
224
310
@@ -236,6 +322,13 @@ def test_len(self):
236
322
self .assert_ (isinstance (rs , Series ))
237
323
tm .assert_almost_equal (rs , xp )
238
324
325
+ #unicode
326
+ values = Series ([u'foo' , u'fooo' , u'fooooo' , np .nan , u'fooooooo' ])
327
+
328
+ result = values .str .len ()
329
+ exp = values .map (lambda x : len (x ) if com .notnull (x ) else NA )
330
+ tm .assert_series_equal (result , exp )
331
+
239
332
def test_findall (self ):
240
333
values = Series (['fooBAD__barBAD' , NA , 'foo' , 'BAD' ])
241
334
@@ -253,6 +346,13 @@ def test_findall(self):
253
346
self .assert_ (isinstance (rs , Series ))
254
347
tm .assert_almost_equal (rs , xp )
255
348
349
+ #unicode
350
+ values = Series ([u'fooBAD__barBAD' , NA , u'foo' , u'BAD' ])
351
+
352
+ result = values .str .findall ('BAD[_]*' )
353
+ exp = Series ([[u'BAD__' , u'BAD' ], NA , [], [u'BAD' ]])
354
+ tm .assert_almost_equal (result , exp )
355
+
256
356
def test_pad (self ):
257
357
values = Series (['a' , 'b' , NA , 'c' , NA , 'eeeeee' ])
258
358
@@ -296,6 +396,21 @@ def test_pad(self):
296
396
self .assert_ (isinstance (rs , Series ))
297
397
tm .assert_almost_equal (rs , xp )
298
398
399
+ #unicode
400
+ values = Series ([u'a' , u'b' , NA , u'c' , NA , u'eeeeee' ])
401
+
402
+ result = values .str .pad (5 , side = 'left' )
403
+ exp = Series ([u' a' , u' b' , NA , u' c' , NA , u'eeeeee' ])
404
+ tm .assert_almost_equal (result , exp )
405
+
406
+ result = values .str .pad (5 , side = 'right' )
407
+ exp = Series ([u'a ' , u'b ' , NA , u'c ' , NA , u'eeeeee' ])
408
+ tm .assert_almost_equal (result , exp )
409
+
410
+ result = values .str .pad (5 , side = 'both' )
411
+ exp = Series ([u' a ' , u' b ' , NA , u' c ' , NA , u'eeeeee' ])
412
+ tm .assert_almost_equal (result , exp )
413
+
299
414
def test_center (self ):
300
415
values = Series (['a' , 'b' , NA , 'c' , NA , 'eeeeee' ])
301
416
@@ -314,6 +429,13 @@ def test_center(self):
314
429
self .assert_ (isinstance (rs , Series ))
315
430
tm .assert_almost_equal (rs , xp )
316
431
432
+ #unicode
433
+ values = Series ([u'a' , u'b' , NA , u'c' , NA , u'eeeeee' ])
434
+
435
+ result = values .str .center (5 )
436
+ exp = Series ([u' a ' , u' b ' , NA , u' c ' , NA , u'eeeeee' ])
437
+ tm .assert_almost_equal (result , exp )
438
+
317
439
def test_split (self ):
318
440
values = Series (['a_b_c' , 'c_d_e' , NA , 'f_g_h' ])
319
441
@@ -332,6 +454,14 @@ def test_split(self):
332
454
self .assert_ (isinstance (rs , Series ))
333
455
tm .assert_almost_equal (rs , xp )
334
456
457
+ #unicode
458
+ values = Series ([u'a_b_c' , u'c_d_e' , NA , u'f_g_h' ])
459
+
460
+ result = values .str .split ('_' )
461
+ exp = Series ([[u'a' , u'b' , u'c' ], [u'c' , u'd' , u'e' ], NA ,
462
+ [u'f' , u'g' , u'h' ]])
463
+ tm .assert_series_equal (result , exp )
464
+
335
465
def test_slice (self ):
336
466
values = Series (['aafootwo' ,'aabartwo' , NA , 'aabazqux' ])
337
467
@@ -350,6 +480,13 @@ def test_slice(self):
350
480
self .assert_ (isinstance (rs , Series ))
351
481
tm .assert_almost_equal (rs , xp )
352
482
483
+ #unicode
484
+ values = Series ([u'aafootwo' , u'aabartwo' , NA , u'aabazqux' ])
485
+
486
+ result = values .str .slice (2 , 5 )
487
+ exp = Series ([u'foo' , u'bar' , NA , u'baz' ])
488
+ tm .assert_series_equal (result , exp )
489
+
353
490
def test_slice_replace (self ):
354
491
pass
355
492
@@ -393,6 +530,21 @@ def test_strip_lstrip_rstrip(self):
393
530
self .assert_ (isinstance (rs , Series ))
394
531
tm .assert_almost_equal (rs , xp )
395
532
533
+ #unicode
534
+ values = Series ([u' aa ' , u' bb \n ' , NA , u'cc ' ])
535
+
536
+ result = values .str .strip ()
537
+ exp = Series ([u'aa' , u'bb' , NA , u'cc' ])
538
+ tm .assert_series_equal (result , exp )
539
+
540
+ result = values .str .lstrip ()
541
+ exp = Series ([u'aa ' , u'bb \n ' , NA , u'cc ' ])
542
+ tm .assert_series_equal (result , exp )
543
+
544
+ result = values .str .rstrip ()
545
+ exp = Series ([u' aa' , u' bb' , NA , u'cc' ])
546
+ tm .assert_series_equal (result , exp )
547
+
396
548
def test_wrap (self ):
397
549
pass
398
550
@@ -414,6 +566,13 @@ def test_get(self):
414
566
self .assert_ (isinstance (rs , Series ))
415
567
tm .assert_almost_equal (rs , xp )
416
568
569
+ #unicode
570
+ values = Series ([u'a_b_c' , u'c_d_e' , np .nan , u'f_g_h' ])
571
+
572
+ result = values .str .split ('_' ).str .get (1 )
573
+ expected = Series ([u'b' , u'd' , np .nan , u'g' ])
574
+ tm .assert_series_equal (result , expected )
575
+
417
576
if __name__ == '__main__' :
418
577
nose .runmodule (argv = [__file__ ,'-vvs' ,'-x' ,'--pdb' , '--pdb-failure' ],
419
578
exit = False )
0 commit comments