|
19 | 19 | import pandas.io.parsers as parsers
|
20 | 20 | from pandas.io.parsers import (read_csv, read_table, read_fwf,
|
21 | 21 | TextFileReader, TextParser)
|
22 |
| -from pandas.util.testing import (assert_almost_equal, |
| 22 | +from pandas.util.testing import (assert_equal, |
| 23 | + assert_almost_equal, |
23 | 24 | assert_series_equal,
|
24 | 25 | makeCustomDataframe as mkdf,
|
25 | 26 | network,
|
@@ -67,6 +68,35 @@ def setUp(self):
|
67 | 68 | self.csv2 = os.path.join(self.dirpath, 'test2.csv')
|
68 | 69 | self.xls1 = os.path.join(self.dirpath, 'test.xls')
|
69 | 70 |
|
| 71 | + def test_multi_character_decimal_marker(self): |
| 72 | + data = """A|B|C |
| 73 | +1|2,334|5 |
| 74 | +10|13|10. |
| 75 | +""" |
| 76 | + self.assertRaises(ValueError, read_csv, StringIO(data), decimal=',,') |
| 77 | + |
| 78 | + def test_empty_decimal_marker(self): |
| 79 | + data = """A|B|C |
| 80 | +1|2,334|5 |
| 81 | +10|13|10. |
| 82 | +""" |
| 83 | + self.assertRaises(ValueError, read_csv, StringIO(data), decimal='') |
| 84 | + |
| 85 | + def test_empty_thousands_marker(self): |
| 86 | + data = """A|B|C |
| 87 | +1|2,334|5 |
| 88 | +10|13|10. |
| 89 | +""" |
| 90 | + self.assertRaises(ValueError, read_csv, StringIO(data), thousands='') |
| 91 | + |
| 92 | + |
| 93 | + def test_multi_character_decimal_marker(self): |
| 94 | + data = """A|B|C |
| 95 | +1|2,334|5 |
| 96 | +10|13|10. |
| 97 | +""" |
| 98 | + self.assertRaises(ValueError, read_csv, StringIO(data), thousands=',,') |
| 99 | + |
70 | 100 | def test_empty_string(self):
|
71 | 101 | data = """\
|
72 | 102 | One,Two,Three
|
@@ -164,14 +194,48 @@ def test_1000_sep(self):
|
164 | 194 | 1|2,334|5
|
165 | 195 | 10|13|10.
|
166 | 196 | """
|
167 |
| - expected = [[1, 2334., 5], |
168 |
| - [10, 13, 10]] |
| 197 | + expected = DataFrame({ |
| 198 | + 'A': [1, 10], |
| 199 | + 'B': [2334, 13], |
| 200 | + 'C': [5, 10.] |
| 201 | + }) |
169 | 202 |
|
170 | 203 | df = self.read_csv(StringIO(data), sep='|', thousands=',')
|
171 |
| - assert_almost_equal(df.values, expected) |
| 204 | + tm.assert_frame_equal(df, expected) |
172 | 205 |
|
173 | 206 | df = self.read_table(StringIO(data), sep='|', thousands=',')
|
174 |
| - assert_almost_equal(df.values, expected) |
| 207 | + tm.assert_frame_equal(df, expected) |
| 208 | + |
| 209 | + def test_1000_sep_with_decimal(self): |
| 210 | + data = """A|B|C |
| 211 | +1|2,334.01|5 |
| 212 | +10|13|10. |
| 213 | +""" |
| 214 | + expected = DataFrame({ |
| 215 | + 'A': [1, 10], |
| 216 | + 'B': [2334.01, 13], |
| 217 | + 'C': [5, 10.] |
| 218 | + }) |
| 219 | + |
| 220 | + assert_equal(expected.A.dtype, 'int64') |
| 221 | + assert_equal(expected.B.dtype, 'float') |
| 222 | + assert_equal(expected.C.dtype, 'float') |
| 223 | + |
| 224 | + df = self.read_csv(StringIO(data), sep='|', thousands=',', decimal='.') |
| 225 | + tm.assert_frame_equal(df, expected) |
| 226 | + |
| 227 | + df = self.read_table(StringIO(data), sep='|', thousands=',', decimal='.') |
| 228 | + tm.assert_frame_equal(df, expected) |
| 229 | + |
| 230 | + data_with_odd_sep = """A|B|C |
| 231 | +1|2.334,01|5 |
| 232 | +10|13|10, |
| 233 | +""" |
| 234 | + df = self.read_csv(StringIO(data_with_odd_sep), sep='|', thousands='.', decimal=',') |
| 235 | + tm.assert_frame_equal(df, expected) |
| 236 | + |
| 237 | + df = self.read_table(StringIO(data_with_odd_sep), sep='|', thousands='.', decimal=',') |
| 238 | + tm.assert_frame_equal(df, expected) |
175 | 239 |
|
176 | 240 | def test_squeeze(self):
|
177 | 241 | data = """\
|
@@ -1862,6 +1926,24 @@ def test_1000_fwf(self):
|
1862 | 1926 | thousands=',')
|
1863 | 1927 | assert_almost_equal(df.values, expected)
|
1864 | 1928 |
|
| 1929 | + def test_1000_sep_with_decimal(self): |
| 1930 | + data = """A|B|C |
| 1931 | +1|2,334.01|5 |
| 1932 | +10|13|10. |
| 1933 | +""" |
| 1934 | + |
| 1935 | + expected = DataFrame({ |
| 1936 | + 'A': [1, 10], |
| 1937 | + 'B': [2334.01, 13], |
| 1938 | + 'C': [5, 10.] |
| 1939 | + }) |
| 1940 | + |
| 1941 | + df = self.read_csv(StringIO(data), sep='|', thousands=',') |
| 1942 | + tm.assert_frame_equal(df, expected) |
| 1943 | + |
| 1944 | + df = self.read_table(StringIO(data), sep='|', thousands=',') |
| 1945 | + tm.assert_frame_equal(df, expected) |
| 1946 | + |
1865 | 1947 | def test_comment_fwf(self):
|
1866 | 1948 | data = """
|
1867 | 1949 | 1 2. 4 #hello world
|
|
0 commit comments