Skip to content

Commit 5bf4ff2

Browse files
committed
Merge pull request pandas-dev#10405 from sinhrks/test_unicode
TST: Use unicode literals in string test
2 parents 30197b5 + 0518e63 commit 5bf4ff2

File tree

1 file changed

+9
-32
lines changed

1 file changed

+9
-32
lines changed

pandas/tests/test_strings.py

+9-32
Original file line numberDiff line numberDiff line change
@@ -747,20 +747,18 @@ def test_isnumeric(self):
747747
# 0x2605: ★ not number
748748
# 0x1378: ፸ ETHIOPIC NUMBER SEVENTY
749749
# 0xFF13: 3 Em 3
750-
values = ['A', '3', unichr(0x00bc), unichr(0x2605),
751-
unichr(0x1378), unichr(0xFF13), 'four']
750+
values = ['A', '3', u'¼', u'★', u'፸', u'3', 'four']
752751
s = Series(values)
753752
numeric_e = [False, True, True, False, True, True, False]
754753
decimal_e = [False, True, False, False, False, True, False]
755754
tm.assert_series_equal(s.str.isnumeric(), Series(numeric_e))
756755
tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e))
757-
unicodes = [u('A'), u('3'), unichr(0x00bc), unichr(0x2605),
758-
unichr(0x1378), unichr(0xFF13), u('four')]
756+
757+
unicodes = [u'A', u'3', u'¼', u'★', u'፸', u'3', u'four']
759758
self.assertEqual(s.str.isnumeric().tolist(), [v.isnumeric() for v in unicodes])
760759
self.assertEqual(s.str.isdecimal().tolist(), [v.isdecimal() for v in unicodes])
761760

762-
values = ['A', np.nan, unichr(0x00bc), unichr(0x2605),
763-
np.nan, unichr(0xFF13), 'four']
761+
values = ['A', np.nan, u'¼', u'★', np.nan, u'3', 'four']
764762
s = Series(values)
765763
numeric_e = [False, np.nan, True, False, np.nan, True, False]
766764
decimal_e = [False, np.nan, False, False, np.nan, True, False]
@@ -1950,33 +1948,16 @@ def test_encode_decode_errors(self):
19501948
tm.assert_series_equal(result, exp)
19511949

19521950
def test_normalize(self):
1953-
def unistr(codes):
1954-
# build unicode string from unichr
1955-
# we cannot use six.u() here because it escapes unicode
1956-
return ''.join([unichr(c) for c in codes])
1957-
1958-
values = ['ABC', # ASCII
1959-
unistr([0xFF21, 0xFF22, 0xFF23]), # ABC
1960-
unistr([0xFF11, 0xFF12, 0xFF13]), # 123
1961-
np.nan,
1962-
unistr([0xFF71, 0xFF72, 0xFF74])] # アイエ
1951+
values = ['ABC', u'ABC', u'123', np.nan, u'アイエ']
19631952
s = Series(values, index=['a', 'b', 'c', 'd', 'e'])
19641953

1965-
normed = [compat.u_safe('ABC'),
1966-
compat.u_safe('ABC'),
1967-
compat.u_safe('123'),
1968-
np.nan,
1969-
unistr([0x30A2, 0x30A4, 0x30A8])] # アイエ
1954+
normed = [u'ABC', u'ABC', u'123', np.nan, u'アイエ']
19701955
expected = Series(normed, index=['a', 'b', 'c', 'd', 'e'])
19711956

19721957
result = s.str.normalize('NFKC')
19731958
tm.assert_series_equal(result, expected)
19741959

1975-
expected = Series([compat.u_safe('ABC'),
1976-
unistr([0xFF21, 0xFF22, 0xFF23]), # ABC
1977-
unistr([0xFF11, 0xFF12, 0xFF13]), # 123
1978-
np.nan,
1979-
unistr([0xFF71, 0xFF72, 0xFF74])], # アイエ
1960+
expected = Series([u'ABC', u'ABC', u'123', np.nan, u'アイエ'],
19801961
index=['a', 'b', 'c', 'd', 'e'])
19811962

19821963
result = s.str.normalize('NFC')
@@ -1985,12 +1966,8 @@ def unistr(codes):
19851966
with tm.assertRaisesRegexp(ValueError, "invalid normalization form"):
19861967
s.str.normalize('xxx')
19871968

1988-
s = Index([unistr([0xFF21, 0xFF22, 0xFF23]), # ABC
1989-
unistr([0xFF11, 0xFF12, 0xFF13]), # 123
1990-
unistr([0xFF71, 0xFF72, 0xFF74])]) # アイエ
1991-
expected = Index([compat.u_safe('ABC'),
1992-
compat.u_safe('123'),
1993-
unistr([0x30A2, 0x30A4, 0x30A8])])
1969+
s = Index([u'ABC', u'123', u'アイエ'])
1970+
expected = Index([u'ABC', u'123', u'アイエ'])
19941971
result = s.str.normalize('NFKC')
19951972
tm.assert_index_equal(result, expected)
19961973

0 commit comments

Comments
 (0)