|
| 1 | +# pylint: disable-msg=E1101,W0612 |
| 2 | + |
| 3 | +from datetime import datetime, timedelta, date |
| 4 | +import os |
| 5 | +import operator |
| 6 | +import unittest |
| 7 | + |
| 8 | +import nose |
| 9 | + |
| 10 | +from numpy import nan as NA |
| 11 | +import numpy as np |
| 12 | + |
| 13 | +from pandas import (Index, Series, TimeSeries, DataFrame, isnull, notnull, |
| 14 | + bdate_range, date_range) |
| 15 | +import pandas.core.common as com |
| 16 | + |
| 17 | +from pandas.util.testing import assert_series_equal, assert_almost_equal |
| 18 | +import pandas.util.testing as tm |
| 19 | + |
| 20 | +import pandas.core.strings as strings |
| 21 | + |
| 22 | +class TestStringMethods(unittest.TestCase): |
| 23 | + |
| 24 | + def test_cat(self): |
| 25 | + one = ['a', 'a', 'b', 'b', 'c', NA] |
| 26 | + two = ['a', NA, 'b', 'd', 'foo', NA] |
| 27 | + |
| 28 | + # single array |
| 29 | + result = strings.str_cat(one) |
| 30 | + self.assert_(isnull(result)) |
| 31 | + |
| 32 | + result = strings.str_cat(one, na_rep='NA') |
| 33 | + exp = 'aabbcNA' |
| 34 | + self.assertEquals(result, exp) |
| 35 | + |
| 36 | + result = strings.str_cat(one, na_rep='-') |
| 37 | + exp = 'aabbc-' |
| 38 | + self.assertEquals(result, exp) |
| 39 | + |
| 40 | + result = strings.str_cat(one, sep='_', na_rep='NA') |
| 41 | + exp = 'a_a_b_b_c_NA' |
| 42 | + self.assertEquals(result, exp) |
| 43 | + |
| 44 | + # Multiple arrays |
| 45 | + result = strings.str_cat(one, [two], na_rep='NA') |
| 46 | + exp = ['aa', 'aNA', 'bb', 'bd', 'cfoo', 'NANA'] |
| 47 | + self.assert_(np.array_equal(result, exp)) |
| 48 | + |
| 49 | + result = strings.str_cat(one, two) |
| 50 | + exp = ['aa', NA, 'bb', 'bd', 'cfoo', NA] |
| 51 | + tm.assert_almost_equal(result, exp) |
| 52 | + |
| 53 | + def test_count(self): |
| 54 | + values = ['foo', 'foofoo', NA, 'foooofooofommmfoo'] |
| 55 | + |
| 56 | + result = strings.str_count(values, 'f[o]+') |
| 57 | + exp = [1, 2, NA, 4] |
| 58 | + tm.assert_almost_equal(result, exp) |
| 59 | + |
| 60 | + result = Series(values).str.count('f[o]+') |
| 61 | + self.assert_(isinstance(result, Series)) |
| 62 | + tm.assert_almost_equal(result, exp) |
| 63 | + |
| 64 | + def test_contains(self): |
| 65 | + values = ['foo', NA, 'fooommm__foo', 'mmm_'] |
| 66 | + pat = 'mmm[_]+' |
| 67 | + |
| 68 | + result = strings.str_contains(values, pat) |
| 69 | + expected = [False, np.nan, True, True] |
| 70 | + tm.assert_almost_equal(result, expected) |
| 71 | + |
| 72 | + values = ['foo', 'xyz', 'fooommm__foo', 'mmm_'] |
| 73 | + result = strings.str_contains(values, pat) |
| 74 | + expected = [False, False, True, True] |
| 75 | + self.assert_(result.dtype == np.bool_) |
| 76 | + tm.assert_almost_equal(result, expected) |
| 77 | + |
| 78 | + def test_startswith(self): |
| 79 | + values = Series(['om', NA, 'foo_nom', 'nom', 'bar_foo', NA, 'foo']) |
| 80 | + |
| 81 | + result = values.str.startswith('foo') |
| 82 | + exp = Series([False, NA, True, False, False, NA, True]) |
| 83 | + tm.assert_series_equal(result, exp) |
| 84 | + |
| 85 | + def test_endswith(self): |
| 86 | + values = Series(['om', NA, 'foo_nom', 'nom', 'bar_foo', NA, 'foo']) |
| 87 | + |
| 88 | + result = values.str.endswith('foo') |
| 89 | + exp = Series([False, NA, False, False, True, NA, True]) |
| 90 | + tm.assert_series_equal(result, exp) |
| 91 | + |
| 92 | + def test_lower_upper(self): |
| 93 | + values = Series(['om', NA, 'nom', 'nom']) |
| 94 | + |
| 95 | + result = values.str.upper() |
| 96 | + exp = Series(['OM', NA, 'NOM', 'NOM']) |
| 97 | + tm.assert_series_equal(result, exp) |
| 98 | + |
| 99 | + result = result.str.lower() |
| 100 | + tm.assert_series_equal(result, values) |
| 101 | + |
| 102 | + def test_replace(self): |
| 103 | + values = Series(['fooBAD__barBAD', NA]) |
| 104 | + |
| 105 | + result = values.str.replace('BAD[_]*', '') |
| 106 | + exp = Series(['foobar', NA]) |
| 107 | + tm.assert_series_equal(result, exp) |
| 108 | + |
| 109 | + result = values.str.replace('BAD[_]*', '', n=1) |
| 110 | + exp = Series(['foobarBAD', NA]) |
| 111 | + tm.assert_series_equal(result, exp) |
| 112 | + |
| 113 | + def test_repeat(self): |
| 114 | + values = Series(['a', 'b', NA, 'c', NA, 'd']) |
| 115 | + |
| 116 | + result = values.str.repeat(3) |
| 117 | + exp = Series(['aaa', 'bbb', NA, 'ccc', NA, 'ddd']) |
| 118 | + tm.assert_series_equal(result, exp) |
| 119 | + |
| 120 | + result = values.str.repeat([1, 2, 3, 4, 5, 6]) |
| 121 | + exp = Series(['a', 'bb', NA, 'cccc', NA, 'dddddd']) |
| 122 | + tm.assert_series_equal(result, exp) |
| 123 | + |
| 124 | + def test_match(self): |
| 125 | + values = Series(['fooBAD__barBAD', NA, 'foo']) |
| 126 | + |
| 127 | + result = values.str.match('.*(BAD[_]+).*(BAD)') |
| 128 | + exp = Series([('BAD__', 'BAD'), NA, []]) |
| 129 | + tm.assert_series_equal(result, exp) |
| 130 | + |
| 131 | + def test_join(self): |
| 132 | + values = Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h']) |
| 133 | + result = values.str.split('_').str.join('_') |
| 134 | + tm.assert_series_equal(values, result) |
| 135 | + |
| 136 | + def test_len(self): |
| 137 | + values = Series(['foo', 'fooo', 'fooooo', np.nan, 'fooooooo']) |
| 138 | + |
| 139 | + result = values.str.len() |
| 140 | + exp = values.map(lambda x: len(x) if com.notnull(x) else NA) |
| 141 | + tm.assert_series_equal(result, exp) |
| 142 | + |
| 143 | + def test_findall(self): |
| 144 | + values = Series(['fooBAD__barBAD', NA, 'foo', 'BAD']) |
| 145 | + |
| 146 | + result = values.str.findall('BAD[_]*') |
| 147 | + exp = Series([['BAD__', 'BAD'], NA, [], ['BAD']]) |
| 148 | + tm.assert_almost_equal(result, exp) |
| 149 | + |
| 150 | + def test_pad(self): |
| 151 | + values = Series(['a', 'b', NA, 'c', NA, 'eeeeee']) |
| 152 | + |
| 153 | + result = values.str.pad(5, side='left') |
| 154 | + exp = Series([' a', ' b', NA, ' c', NA, 'eeeeee']) |
| 155 | + tm.assert_almost_equal(result, exp) |
| 156 | + |
| 157 | + result = values.str.pad(5, side='right') |
| 158 | + exp = Series(['a ', 'b ', NA, 'c ', NA, 'eeeeee']) |
| 159 | + tm.assert_almost_equal(result, exp) |
| 160 | + |
| 161 | + result = values.str.pad(5, side='both') |
| 162 | + exp = Series([' a ', ' b ', NA, ' c ', NA, 'eeeeee']) |
| 163 | + tm.assert_almost_equal(result, exp) |
| 164 | + |
| 165 | + def test_center(self): |
| 166 | + values = Series(['a', 'b', NA, 'c', NA, 'eeeeee']) |
| 167 | + |
| 168 | + result = values.str.center(5) |
| 169 | + exp = Series([' a ', ' b ', NA, ' c ', NA, 'eeeeee']) |
| 170 | + tm.assert_almost_equal(result, exp) |
| 171 | + |
| 172 | + def test_split(self): |
| 173 | + values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h']) |
| 174 | + |
| 175 | + result = values.str.split('_') |
| 176 | + exp = Series([['a', 'b', 'c'], ['c', 'd', 'e'], NA, ['f', 'g', 'h']]) |
| 177 | + tm.assert_series_equal(result, exp) |
| 178 | + |
| 179 | + def test_slice(self): |
| 180 | + values = Series(['aafootwo','aabartwo', NA, 'aabazqux']) |
| 181 | + |
| 182 | + result = values.str.slice(2, 5) |
| 183 | + exp = Series(['foo', 'bar', NA, 'baz']) |
| 184 | + tm.assert_series_equal(result, exp) |
| 185 | + |
| 186 | + def test_slice_replace(self): |
| 187 | + pass |
| 188 | + |
| 189 | + def test_strip_lstrip_rstrip(self): |
| 190 | + values = Series([' aa ', ' bb \n', NA, 'cc ']) |
| 191 | + |
| 192 | + result = values.str.strip() |
| 193 | + exp = Series(['aa', 'bb', NA, 'cc']) |
| 194 | + tm.assert_series_equal(result, exp) |
| 195 | + |
| 196 | + result = values.str.lstrip() |
| 197 | + exp = Series(['aa ', 'bb \n', NA, 'cc ']) |
| 198 | + tm.assert_series_equal(result, exp) |
| 199 | + |
| 200 | + result = values.str.rstrip() |
| 201 | + exp = Series([' aa', ' bb', NA, 'cc']) |
| 202 | + tm.assert_series_equal(result, exp) |
| 203 | + |
| 204 | + def test_wrap(self): |
| 205 | + pass |
| 206 | + |
| 207 | + def test_get(self): |
| 208 | + values = Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h']) |
| 209 | + |
| 210 | + result = values.str.split('_').str.get(1) |
| 211 | + expected = Series(['b', 'd', np.nan, 'g']) |
| 212 | + tm.assert_series_equal(result, expected) |
| 213 | + |
| 214 | + |
| 215 | +if __name__ == '__main__': |
| 216 | + nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'], |
| 217 | + exit=False) |
0 commit comments