Skip to content

Commit faf4d53

Browse files
committed
TST: test for tupleize_cols=True,index=False
TST: better error messages on multi_index column read failure
1 parent a9a89f8 commit faf4d53

File tree

3 files changed

+28
-5
lines changed

3 files changed

+28
-5
lines changed

pandas/io/parsers.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -809,8 +809,8 @@ def extract(r):
809809
# if we find 'Unnamed' all of a single level, then our header was too long
810810
for n in range(len(columns[0])):
811811
if all([ 'Unnamed' in c[n] for c in columns ]):
812-
raise Exception("Passed header=[%s] are too many rows for this "
813-
"multi_index of columns" % ','.join([ str(x) for x in self.header ]))
812+
raise _parser.CParserError("Passed header=[%s] are too many rows for this "
813+
"multi_index of columns" % ','.join([ str(x) for x in self.header ]))
814814

815815
# clean the column names (if we have an index_col)
816816
if len(ic):

pandas/src/parser.pyx

+7-3
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ cdef class TextReader:
244244
object na_values, true_values, false_values
245245
object memory_map
246246
object as_recarray
247-
object header, names, header_start, header_end
247+
object header, orig_header, names, header_start, header_end
248248
object low_memory
249249
object skiprows
250250
object compact_ints, use_unsigned
@@ -441,6 +441,7 @@ cdef class TextReader:
441441

442442
# TODO: no header vs. header is not the first row
443443
self.has_mi_columns = 0
444+
self.orig_header = header
444445
if header is None:
445446
# sentinel value
446447
self.parser.header_start = -1
@@ -585,8 +586,11 @@ cdef class TextReader:
585586

586587
# e.g., if header=3 and file only has 2 lines
587588
if self.parser.lines < hr + 1:
588-
raise CParserError('Passed header=%d but only %d lines in file'
589-
% (self.parser.header, self.parser.lines))
589+
msg = self.orig_header
590+
if isinstance(msg,list):
591+
msg = "[%s], len of %d," % (','.join([ str(m) for m in msg ]),len(msg))
592+
raise CParserError('Passed header=%s but only %d lines in file'
593+
% (msg, self.parser.lines))
590594

591595
field_count = self.parser.line_fields[hr]
592596
start = self.parser.line_start[hr]

pandas/tests/test_frame.py

+19
Original file line numberDiff line numberDiff line change
@@ -5037,6 +5037,13 @@ def _make_frame(names=None):
50375037
result.columns.names = df.columns.names
50385038
assert_frame_equal(df,result)
50395039

5040+
# tupleize_cols=True and index=False
5041+
df = _make_frame(True)
5042+
df.to_csv(path,tupleize_cols=True,index=False)
5043+
result = read_csv(path,header=0,tupleize_cols=True,index_col=None)
5044+
result.columns = df.columns
5045+
assert_frame_equal(df,result)
5046+
50405047
# whatsnew example
50415048
df = _make_frame()
50425049
df.to_csv(path,tupleize_cols=False)
@@ -5060,6 +5067,18 @@ def _make_frame(names=None):
50605067
df.to_csv(path,tupleize_cols=False)
50615068

50625069
# catch invalid headers
5070+
try:
5071+
read_csv(path,tupleize_cols=False,header=range(3),index_col=0)
5072+
except (Exception), detail:
5073+
if not str(detail).startswith('Passed header=[0,1,2] are too many rows for this multi_index of columns'):
5074+
raise AssertionError("failure in read_csv header=range(3)")
5075+
5076+
try:
5077+
read_csv(path,tupleize_cols=False,header=range(7),index_col=0)
5078+
except (Exception), detail:
5079+
if not str(detail).startswith('Passed header=[0,1,2,3,4,5,6], len of 7, but only 6 lines in file'):
5080+
raise AssertionError("failure in read_csv header=range(7)")
5081+
50635082
for i in [3,4,5,6,7]:
50645083
self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=range(i), index_col=0)
50655084
self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=[0,2], index_col=0)

0 commit comments

Comments
 (0)