Skip to content

Commit bb7ef63

Browse files
committed
add test that fails before sorting usecols
1 parent 2886a0e commit bb7ef63

File tree

1 file changed

+51
-1
lines changed

1 file changed

+51
-1
lines changed

pandas/tests/io/parser/test_parsers.py

+51-1
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
import os
44
import pandas.util.testing as tm
55

6-
from pandas import read_csv, read_table
6+
from pandas import read_csv, read_table, DataFrame
77
from pandas.core.common import AbstractMethodError
8+
from pandas._libs.lib import Timestamp
9+
from pandas.compat import StringIO
810

911
from .common import ParserTests
1012
from .header import HeaderTests
@@ -100,3 +102,51 @@ def read_table(self, *args, **kwds):
100102
kwds = kwds.copy()
101103
kwds['engine'] = self.engine
102104
return read_table(*args, **kwds)
105+
106+
107+
class TestUnsortedUsecols(object):
108+
def test_override__set_noconvert_columns(self):
109+
# GH 17351 - usecols needs to be sorted in _setnoconvert_columns
110+
# based on the test_usecols_with_parse_dates test from usecols.py
111+
from pandas.io.parsers import CParserWrapper, TextFileReader
112+
113+
s = """a,b,c,d,e
114+
0,1,20140101,0900,4
115+
0,1,20140102,1000,4"""
116+
117+
parse_dates = [[1, 2]]
118+
cols = {
119+
'a': [0, 0],
120+
'c_d': [
121+
Timestamp('2014-01-01 09:00:00'),
122+
Timestamp('2014-01-02 10:00:00')
123+
]
124+
}
125+
expected = DataFrame(cols, columns=['c_d', 'a'])
126+
127+
class MyTextFileReader(TextFileReader):
128+
def __init__(self):
129+
self._currow = 0
130+
self.squeeze = False
131+
132+
class MyCParserWrapper(CParserWrapper):
133+
def _set_noconvert_columns(self):
134+
if self.usecols_dtype == 'integer':
135+
# self.usecols is a set, which is documented as unordered
136+
# but in practice, a CPython set of integers is sorted.
137+
# In other implementations this assumption does not hold.
138+
# The following code simulates a different order, which
139+
# before GH 17351 would cause the wrong columns to be
140+
# converted via the parse_dates parameter
141+
self.usecols = list(self.usecols)
142+
self.usecols.reverse()
143+
return CParserWrapper._set_noconvert_columns(self)
144+
145+
parser = MyTextFileReader()
146+
parser.options = {'usecols': [0, 2, 3],
147+
'parse_dates': parse_dates,
148+
'delimiter': ','}
149+
parser._engine = MyCParserWrapper(StringIO(s), **parser.options)
150+
df = parser.read()
151+
152+
tm.assert_frame_equal(df, expected)

0 commit comments

Comments
 (0)