|
11 | 11 | import pandas.io.parsers as parsers
|
12 | 12 | import pandas.util.testing as tm
|
13 | 13 |
|
14 |
| -from pandas import DataFrame, MultiIndex, read_csv |
| 14 | +from pandas import DataFrame, MultiIndex |
15 | 15 | from pandas.compat import StringIO, range
|
16 | 16 |
|
17 | 17 |
|
@@ -43,57 +43,30 @@ def test_detect_string_na(self):
|
43 | 43 | tm.assert_numpy_array_equal(df.values, expected)
|
44 | 44 |
|
45 | 45 | def test_non_string_na_values(self):
|
46 |
| - # see gh-3611, na_values that are not a string are an issue |
47 |
| - with tm.ensure_clean('__non_string_na_values__.csv') as path: |
48 |
| - df = DataFrame({'A': [-999, 2, 3], 'B': [1.2, -999, 4.5]}) |
49 |
| - df.to_csv(path, sep=' ', index=False) |
50 |
| - result1 = self.read_csv(path, sep=' ', header=0, |
51 |
| - na_values=['-999.0', '-999']) |
52 |
| - result2 = self.read_csv(path, sep=' ', header=0, |
53 |
| - na_values=[-999, -999.0]) |
54 |
| - result3 = self.read_csv(path, sep=' ', header=0, |
55 |
| - na_values=[-999.0, -999]) |
56 |
| - tm.assert_frame_equal(result1, result2) |
57 |
| - tm.assert_frame_equal(result2, result3) |
58 |
| - |
59 |
| - result4 = self.read_csv( |
60 |
| - path, sep=' ', header=0, na_values=['-999.0']) |
61 |
| - result5 = self.read_csv( |
62 |
| - path, sep=' ', header=0, na_values=['-999']) |
63 |
| - result6 = self.read_csv( |
64 |
| - path, sep=' ', header=0, na_values=[-999.0]) |
65 |
| - result7 = self.read_csv( |
66 |
| - path, sep=' ', header=0, na_values=[-999]) |
67 |
| - tm.assert_frame_equal(result4, result3) |
68 |
| - tm.assert_frame_equal(result5, result3) |
69 |
| - tm.assert_frame_equal(result6, result3) |
70 |
| - tm.assert_frame_equal(result7, result3) |
71 |
| - |
72 |
| - good_compare = result3 |
73 |
| - |
74 |
| - # with an odd float format, so we can't match the string 999.0 |
75 |
| - # exactly, but need float matching |
76 |
| - # TODO: change these to self.read_csv when Python bug is squashed |
77 |
| - df.to_csv(path, sep=' ', index=False, float_format='%.3f') |
78 |
| - result1 = read_csv(path, sep=' ', header=0, |
79 |
| - na_values=['-999.0', '-999']) |
80 |
| - result2 = read_csv(path, sep=' ', header=0, |
81 |
| - na_values=[-999.0, -999]) |
82 |
| - tm.assert_frame_equal(result1, good_compare) |
83 |
| - tm.assert_frame_equal(result2, good_compare) |
84 |
| - |
85 |
| - result3 = read_csv(path, sep=' ', |
86 |
| - header=0, na_values=['-999.0']) |
87 |
| - result4 = read_csv(path, sep=' ', |
88 |
| - header=0, na_values=['-999']) |
89 |
| - result5 = read_csv(path, sep=' ', |
90 |
| - header=0, na_values=[-999.0]) |
91 |
| - result6 = read_csv(path, sep=' ', |
92 |
| - header=0, na_values=[-999]) |
93 |
| - tm.assert_frame_equal(result3, good_compare) |
94 |
| - tm.assert_frame_equal(result4, good_compare) |
95 |
| - tm.assert_frame_equal(result5, good_compare) |
96 |
| - tm.assert_frame_equal(result6, good_compare) |
| 46 | + # see gh-3611: with an odd float format, we can't match |
| 47 | + # the string '999.0' exactly but still need float matching |
| 48 | + nice = """A,B |
| 49 | +-999,1.2 |
| 50 | +2,-999 |
| 51 | +3,4.5 |
| 52 | +""" |
| 53 | + ugly = """A,B |
| 54 | +-999,1.200 |
| 55 | +2,-999.000 |
| 56 | +3,4.500 |
| 57 | +""" |
| 58 | + na_values_param = [['-999.0', '-999'], |
| 59 | + [-999, -999.0], |
| 60 | + [-999.0, -999], |
| 61 | + ['-999.0'], ['-999'], |
| 62 | + [-999.0], [-999]] |
| 63 | + expected = DataFrame([[np.nan, 1.2], [2.0, np.nan], |
| 64 | + [3.0, 4.5]], columns=['A', 'B']) |
| 65 | + |
| 66 | + for data in (nice, ugly): |
| 67 | + for na_values in na_values_param: |
| 68 | + out = self.read_csv(StringIO(data), na_values=na_values) |
| 69 | + tm.assert_frame_equal(out, expected) |
97 | 70 |
|
98 | 71 | def test_default_na_values(self):
|
99 | 72 | _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN',
|
|
0 commit comments