Skip to content

Commit d2ac001

Browse files
committed
ENH: add skip_footer option to read_csv/read_table, GH #291
1 parent 6a0452b commit d2ac001

File tree

2 files changed

+40
-6
lines changed

2 files changed

+40
-6
lines changed

pandas/io/parsers.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Module contains tools for processing files into DataFrames or other objects
33
"""
4+
from __future__ import print_function
45

56
from StringIO import StringIO
67
import zipfile
@@ -13,7 +14,8 @@
1314

1415
def read_csv(filepath_or_buffer, sep=None, header=0, index_col=None, names=None,
1516
skiprows=None, na_values=None, parse_dates=False,
16-
date_parser=None, nrows=None, iterator=False, chunksize=None):
17+
date_parser=None, nrows=None, iterator=False, chunksize=None,
18+
skip_footer=0):
1719
import csv
1820

1921
if hasattr(filepath_or_buffer, 'read'):
@@ -50,7 +52,8 @@ def read_csv(filepath_or_buffer, sep=None, header=0, index_col=None, names=None,
5052
parse_dates=parse_dates,
5153
date_parser=date_parser,
5254
skiprows=skiprows,
53-
chunksize=chunksize, buf=buf)
55+
chunksize=chunksize, buf=buf,
56+
skip_footer=skip_footer)
5457

5558
if nrows is not None:
5659
return parser.get_chunk(nrows)
@@ -62,12 +65,14 @@ def read_csv(filepath_or_buffer, sep=None, header=0, index_col=None, names=None,
6265

6366
def read_table(filepath_or_buffer, sep='\t', header=0, index_col=None,
6467
names=None, skiprows=None, na_values=None, parse_dates=False,
65-
date_parser=None, nrows=None, iterator=False, chunksize=None):
68+
date_parser=None, nrows=None, iterator=False, chunksize=None,
69+
skip_footer=0):
6670
return read_csv(filepath_or_buffer, sep=sep, header=header,
6771
skiprows=skiprows, index_col=index_col,
6872
na_values=na_values, date_parser=date_parser,
6973
names=names, parse_dates=parse_dates,
70-
nrows=nrows, iterator=iterator, chunksize=chunksize)
74+
nrows=nrows, iterator=iterator, chunksize=chunksize,
75+
skip_footer=skip_footer)
7176

7277
_parser_params = """Also supports optionally iterating or breaking of the file
7378
into chunks.
@@ -98,6 +103,8 @@ def read_table(filepath_or_buffer, sep='\t', header=0, index_col=None,
98103
Return TextParser object
99104
chunksize : int, default None
100105
Return TextParser object for iteration
106+
skip_footer : int, default 0
107+
Number of line at bottom of file to skip
101108
102109
Returns
103110
-------
@@ -163,7 +170,10 @@ class TextParser(object):
163170
Custom NA values
164171
parse_dates : boolean, default False
165172
date_parser : function, default None
166-
skiprows
173+
skiprows : list of integers
174+
Row numbers to skip
175+
skip_footer : int
176+
Number of line at bottom of file to skip
167177
"""
168178

169179
# common NA values
@@ -175,7 +185,7 @@ class TextParser(object):
175185

176186
def __init__(self, data, names=None, header=0, index_col=None,
177187
na_values=None, parse_dates=False, date_parser=None,
178-
chunksize=None, skiprows=None, buf=None):
188+
chunksize=None, skiprows=None, skip_footer=0, buf=None):
179189
"""
180190
Workhorse function for processing nested list into DataFrame
181191
@@ -195,6 +205,9 @@ def __init__(self, data, names=None, header=0, index_col=None,
195205
self.chunksize = chunksize
196206
self.passed_names = names is not None
197207
self.skiprows = set() if skiprows is None else set(skiprows)
208+
self.skip_footer = skip_footer
209+
210+
assert(self.skip_footer >= 0)
198211

199212
if na_values is None:
200213
self.na_values = self.NA_VALUES
@@ -306,6 +319,9 @@ def _get_index_name(self):
306319
return index_name
307320

308321
def get_chunk(self, rows=None):
322+
if rows is not None and self.skip_footer:
323+
print('skip_footer not supported for iteration')
324+
309325
try:
310326
content = self._get_lines(rows)
311327
except StopIteration:
@@ -401,6 +417,9 @@ def _get_lines(self, rows=None):
401417

402418
self.buf = []
403419

420+
if self.skip_footer:
421+
lines = lines[:-self.skip_footer]
422+
404423
return lines
405424

406425
def _maybe_convert_int_mindex(index, parse_dates, date_parser):

pandas/io/tests/test_parsers.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,21 @@ def test_multi_index_parse_dates(self):
382382
df = read_csv(StringIO(data), index_col=[0, 1], parse_dates=True)
383383
self.assert_(isinstance(df.index.levels[0][0], datetime))
384384

385+
def test_skip_footer(self):
386+
data = """A,B,C
387+
1,2,3
388+
4,5,6
389+
7,8,9
390+
want to skip this
391+
also also skip this
392+
and this
393+
"""
394+
result = read_csv(StringIO(data), skip_footer=3)
395+
no_footer = '\n'.join(data.split('\n')[:-4])
396+
expected = read_csv(StringIO(no_footer))
397+
398+
assert_frame_equal(result, expected)
399+
385400
class TestParseSQL(unittest.TestCase):
386401

387402
def test_convert_sql_column_floats(self):

0 commit comments

Comments
 (0)