|
3 | 3 | import os
|
4 | 4 | import pandas.util.testing as tm
|
5 | 5 |
|
6 |
| -from pandas import read_csv, read_table |
| 6 | +from pandas import read_csv, read_table, DataFrame, Index |
7 | 7 | from pandas.core.common import AbstractMethodError
|
| 8 | +from pandas._libs.lib import Timestamp |
| 9 | +from pandas.compat import StringIO |
8 | 10 |
|
9 | 11 | from .common import ParserTests
|
10 | 12 | from .header import HeaderTests
|
@@ -100,3 +102,51 @@ def read_table(self, *args, **kwds):
|
100 | 102 | kwds = kwds.copy()
|
101 | 103 | kwds['engine'] = self.engine
|
102 | 104 | return read_table(*args, **kwds)
|
| 105 | + |
| 106 | +class TestUnsortedUsecols(object): |
| 107 | + def test_override__set_noconvert_columns(self): |
| 108 | + # GH 17351 - usecols needs to be sorted in _setnoconvert_columns |
| 109 | + # based on the test_usecols_with_parse_dates test from usecols.py |
| 110 | + from pandas.io.parsers import CParserWrapper, TextFileReader |
| 111 | + |
| 112 | + s = """a,b,c,d,e |
| 113 | + 0,1,20140101,0900,4 |
| 114 | + 0,1,20140102,1000,4""" |
| 115 | + |
| 116 | + parse_dates = [[1, 2]] |
| 117 | + cols = { |
| 118 | + 'a': [0, 0], |
| 119 | + 'c_d': [ |
| 120 | + Timestamp('2014-01-01 09:00:00'), |
| 121 | + Timestamp('2014-01-02 10:00:00') |
| 122 | + ] |
| 123 | + } |
| 124 | + expected = DataFrame(cols, columns=['c_d', 'a']) |
| 125 | + |
| 126 | + class MyTextFileReader(TextFileReader): |
| 127 | + def __init__(self): |
| 128 | + self._currow = 0 |
| 129 | + self.squeeze = False |
| 130 | + |
| 131 | + class MyCParserWrapper(CParserWrapper): |
| 132 | + def _set_noconvert_columns(self): |
| 133 | + if self.usecols_dtype == 'integer': |
| 134 | + # self.usecols is a set, which is documented as unordered |
| 135 | + # but in practice, a CPython set of integers is sorted. |
| 136 | + # In other implementations this assumption does not hold. |
| 137 | + # The following code simulates a different order, which |
| 138 | + # before GH 17351 would cause the wrong columns to be |
| 139 | + # converted via the parse_dates parameter |
| 140 | + self.usecols = list(self.usecols) |
| 141 | + self.usecols.reverse() |
| 142 | + return CParserWrapper._set_noconvert_columns(self) |
| 143 | + |
| 144 | + parser = MyTextFileReader() |
| 145 | + parser.options = {'usecols': [0, 2, 3], |
| 146 | + 'parse_dates': parse_dates, |
| 147 | + 'delimiter': ','} |
| 148 | + parser._engine = MyCParserWrapper(StringIO(s), **parser.options) |
| 149 | + df = parser.read() |
| 150 | + |
| 151 | + tm.assert_frame_equal(df, expected) |
| 152 | + |
0 commit comments