Skip to content

Commit d076438

Browse files
committed
BUG: csv reader bugfixes and test coverage
1 parent 92dea5e commit d076438

File tree

2 files changed

+92
-19
lines changed

2 files changed

+92
-19
lines changed

pandas/io/parsers.py

+11-18
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,8 @@ def _infer_columns(self):
201201
line = self.buf[0]
202202
else:
203203
line = self._next_line()
204-
while self.header > self.pos:
204+
205+
while self.pos <= self.header:
205206
line = self._next_line()
206207

207208
columns = []
@@ -227,7 +228,6 @@ def _infer_columns(self):
227228
else:
228229
columns = names
229230

230-
231231
return columns
232232

233233
def _next_line(self):
@@ -258,38 +258,31 @@ def __iter__(self):
258258

259259
def _get_index_name(self):
260260
columns = self.columns
261+
passed_names = self.names is not None
261262

262263
try:
263264
line = self._next_line()
264265
except StopIteration:
265266
line = None
266267

267268
# implicitly index_col=0 b/c 1 fewer column names
268-
index_name = None
269269
implicit_first_col = (line is not None and
270270
len(line) == len(columns) + 1)
271271

272-
passed_names = self.names is not None
273-
272+
index_name = None
274273
if implicit_first_col:
275274
if self.index_col is None:
276275
self.index_col = 0
277276
index_name = None
278277
elif np.isscalar(self.index_col):
279-
if passed_names:
280-
index_name = None
281-
else:
282-
index_name = columns.pop(self.index_col)
278+
index_name = columns.pop(self.index_col)
283279
elif self.index_col is not None:
284-
if not passed_names:
285-
cp_cols = list(columns)
286-
index_name = []
287-
for i in self.index_col:
288-
name = cp_cols[i]
289-
columns.remove(name)
290-
index_name.append(name)
291-
else:
292-
index_name=None
280+
cp_cols = list(columns)
281+
index_name = []
282+
for i in self.index_col:
283+
name = cp_cols[i]
284+
columns.remove(name)
285+
index_name.append(name)
293286

294287
return index_name
295288

pandas/io/tests/test_parsers.py

+81-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@
1414
import pandas._tseries as lib
1515

1616
class TestParsers(unittest.TestCase):
17+
data1 = """index,A,B,C,D
18+
foo,2,3,4,5
19+
bar,7,8,9,10
20+
baz,12,13,14,15
21+
qux,12,13,14,15
22+
foo2,12,13,14,15
23+
bar2,12,13,14,15
24+
"""
1725

1826
def setUp(self):
1927
self.dirpath = curpath()
@@ -181,7 +189,9 @@ def test_read_table_duplicate_index(self):
181189
foo,12,13,14,15
182190
bar,12,13,14,15
183191
"""
184-
self.assertRaises(Exception, read_csv, StringIO(data), index_col=0)
192+
193+
self.assertRaises(Exception, read_csv, StringIO(data),
194+
index_col=0)
185195

186196
def test_parse_bools(self):
187197
data = """A,B
@@ -211,6 +221,76 @@ def test_infer_index_col(self):
211221
data = read_csv(StringIO(data))
212222
self.assert_(data.index.equals(Index(['foo', 'bar', 'baz'])))
213223

224+
def test_read_nrows(self):
225+
df = read_csv(StringIO(self.data1), nrows=3)
226+
expected = read_csv(StringIO(self.data1))[:3]
227+
assert_frame_equal(df, expected)
228+
229+
def test_read_chunksize(self):
230+
reader = read_csv(StringIO(self.data1), index_col=0, chunksize=2)
231+
df = read_csv(StringIO(self.data1), index_col=0)
232+
233+
chunks = list(reader)
234+
235+
assert_frame_equal(chunks[0], df[:2])
236+
assert_frame_equal(chunks[1], df[2:4])
237+
assert_frame_equal(chunks[2], df[4:])
238+
239+
def test_iterator(self):
240+
reader = read_csv(StringIO(self.data1), index_col=0, iterator=True)
241+
df = read_csv(StringIO(self.data1), index_col=0)
242+
243+
chunk = reader.get_chunk(3)
244+
assert_frame_equal(chunk, df[:3])
245+
246+
last_chunk = reader.get_chunk(5)
247+
assert_frame_equal(last_chunk, df[3:])
248+
249+
def test_header_not_first_line(self):
250+
data = """got,to,ignore,this,line
251+
got,to,ignore,this,line
252+
index,A,B,C,D
253+
foo,2,3,4,5
254+
bar,7,8,9,10
255+
baz,12,13,14,15
256+
"""
257+
data2 = """index,A,B,C,D
258+
foo,2,3,4,5
259+
bar,7,8,9,10
260+
baz,12,13,14,15
261+
"""
262+
263+
df = read_csv(StringIO(data), header=2, index_col=0)
264+
expected = read_csv(StringIO(data2), header=0, index_col=0)
265+
assert_frame_equal(df, expected)
266+
267+
def test_pass_names_with_index(self):
268+
lines = self.data1.split('\n')
269+
no_header = '\n'.join(lines[1:])
270+
271+
# regular index
272+
names = ['index', 'A', 'B', 'C', 'D']
273+
df = read_csv(StringIO(no_header), index_col=0, names=names)
274+
expected = read_csv(StringIO(self.data1), index_col=0)
275+
assert_frame_equal(df, expected)
276+
277+
# multi index
278+
data = """index1,index2,A,B,C,D
279+
foo,one,2,3,4,5
280+
foo,two,7,8,9,10
281+
foo,three,12,13,14,15
282+
bar,one,12,13,14,15
283+
bar,two,12,13,14,15
284+
"""
285+
lines = data.split('\n')
286+
no_header = '\n'.join(lines[1:])
287+
names = ['index1', 'index2', 'A', 'B', 'C', 'D']
288+
df = read_csv(StringIO(no_header), index_col=[0, 1], names=names)
289+
expected = read_csv(StringIO(data), index_col=[0, 1])
290+
assert_frame_equal(df, expected)
291+
292+
def test_multi_index_no_level_names(self):
293+
pass
214294

215295
class TestParseSQL(unittest.TestCase):
216296

0 commit comments

Comments
 (0)