Skip to content

Commit 8f54770

Browse files
committed
add test that fails before sorting usecols
1 parent 2886a0e commit 8f54770

File tree

1 file changed

+51
-1
lines changed

1 file changed

+51
-1
lines changed

pandas/tests/io/parser/test_parsers.py

+51-1
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
import os
44
import pandas.util.testing as tm
55

6-
from pandas import read_csv, read_table
6+
from pandas import read_csv, read_table, DataFrame, Index
77
from pandas.core.common import AbstractMethodError
8+
from pandas._libs.lib import Timestamp
9+
from pandas.compat import StringIO
810

911
from .common import ParserTests
1012
from .header import HeaderTests
@@ -100,3 +102,51 @@ def read_table(self, *args, **kwds):
100102
kwds = kwds.copy()
101103
kwds['engine'] = self.engine
102104
return read_table(*args, **kwds)
105+
106+
class TestUnsortedUsecols(object):
107+
def test_override__set_noconvert_columns(self):
108+
# GH 17351 - usecols needs to be sorted in _setnoconvert_columns
109+
# based on the test_usecols_with_parse_dates test from usecols.py
110+
from pandas.io.parsers import CParserWrapper, TextFileReader
111+
112+
s = """a,b,c,d,e
113+
0,1,20140101,0900,4
114+
0,1,20140102,1000,4"""
115+
116+
parse_dates = [[1, 2]]
117+
cols = {
118+
'a': [0, 0],
119+
'c_d': [
120+
Timestamp('2014-01-01 09:00:00'),
121+
Timestamp('2014-01-02 10:00:00')
122+
]
123+
}
124+
expected = DataFrame(cols, columns=['c_d', 'a'])
125+
126+
class MyTextFileReader(TextFileReader):
127+
def __init__(self):
128+
self._currow = 0
129+
self.squeeze = False
130+
131+
class MyCParserWrapper(CParserWrapper):
132+
def _set_noconvert_columns(self):
133+
if self.usecols_dtype == 'integer':
134+
# self.usecols is a set, which is documented as unordered
135+
# but in practice, a CPython set of integers is sorted.
136+
# In other implementations this assumption does not hold.
137+
# The following code simulates a different order, which
138+
# before GH 17351 would cause the wrong columns to be
139+
# converted via the parse_dates parameter
140+
self.usecols = list(self.usecols)
141+
self.usecols.reverse()
142+
return CParserWrapper._set_noconvert_columns(self)
143+
144+
parser = MyTextFileReader()
145+
parser.options = {'usecols': [0, 2, 3],
146+
'parse_dates': parse_dates,
147+
'delimiter': ','}
148+
parser._engine = MyCParserWrapper(StringIO(s), **parser.options)
149+
df = parser.read()
150+
151+
tm.assert_frame_equal(df, expected)
152+

0 commit comments

Comments
 (0)