Skip to content

Commit 36f24af

Browse files
changhiskhanwesm
authored andcommitted
ENH: allow pos and neg for skip_footer #1843
1 parent 89a61e5 commit 36f24af

File tree

3 files changed

+18
-8
lines changed

3 files changed

+18
-8
lines changed

doc/source/io.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ data into a DataFrame object. They can take a number of arguments:
7676
Defaults to 0 (first row); specify None if there is no header row.
7777
- ``skiprows``: A collection of numbers for rows in the file to skip. Can
7878
also be an integer to skip the first ``n`` rows
79+
- ``skip_footer``: Lines at bottom of file to skip. If >0 then indicates the
80+
row to start skipping. If <0 then skips the specified number of rows from
81+
the end.
7982
- ``index_col``: column number, column name, or list of column numbers/names,
8083
to use as the ``index`` (row labels) of the resulting DataFrame. By default,
8184
it will number the rows without using any column, unless there is one more
@@ -116,7 +119,6 @@ data into a DataFrame object. They can take a number of arguments:
116119
- ``chunksize``: An number of rows to be used to "chunk" a file into
117120
pieces. Will cause an ``TextParser`` object to be returned. More on this
118121
below in the section on :ref:`iterating and chunking <io.chunking>`
119-
- ``skip_footer``: number of lines to skip at bottom of file (default 0)
120122
- ``converters``: a dictionary of functions for converting values in certain
121123
columns, where keys are either integers or column labels
122124
- ``encoding``: a string representing the encoding to use if the contents are

pandas/io/parsers.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ class DateConversionError(Exception):
4646
Row to use for the column labels of the parsed DataFrame
4747
skiprows : list-like or integer
4848
Row numbers to skip (0-indexed) or number of rows to skip (int)
49+
at the start of the file
50+
skip_footer : int, default 0
51+
Lines at bottom of file to skip. If >0 then indicates the row to start
52+
skipping. If <0 then skips the specified number of rows from the end.
4953
index_col : int or sequence, default None
5054
Column to use as the row labels of the DataFrame. If a sequence is
5155
given, a MultiIndex is used.
@@ -81,8 +85,6 @@ class DateConversionError(Exception):
8185
Return TextParser object
8286
chunksize : int, default None
8387
Return TextParser object for iteration
84-
skip_footer : int, default 0
85-
Number of line at bottom of file to skip
8688
converters : dict. optional
8789
Dict of functions for converting values in certain columns. Keys can either
8890
be integers or column labels
@@ -476,7 +478,7 @@ def __init__(self, f, delimiter=None, dialect=None, names=None, header=0,
476478
else:
477479
self.converters = {}
478480

479-
assert(self.skip_footer >= 0)
481+
#assert(self.skip_footer >= 0)
480482

481483
self.keep_default_na = keep_default_na
482484
if na_values is None and keep_default_na:
@@ -771,14 +773,17 @@ def _rows_to_cols(self, content):
771773

772774
if col_len != zip_len:
773775
row_num = -1
776+
i = 0
774777
for (i, l) in enumerate(content):
775778
if len(l) != col_len:
776779
break
777780

778781
footers = 0
779782
if self.skip_footer:
780783
footers = self.skip_footer
781-
row_num = self.pos - (len(content) - i + footers)
784+
if footers > 0:
785+
footers = footers - self.pos
786+
row_num = self.pos - (len(content) - i - footers)
782787

783788
msg = ('Expecting %d columns, got %d in row %d' %
784789
(col_len, zip_len, row_num))
@@ -1101,7 +1106,7 @@ def _get_lines(self, rows=None):
11011106
self.buf = []
11021107

11031108
if self.skip_footer:
1104-
lines = lines[:-self.skip_footer]
1109+
lines = lines[:self.skip_footer]
11051110

11061111
lines = self._check_comments(lines)
11071112
return self._check_thousands(lines)

pandas/io/tests/test_parsers.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ def test_malformed(self):
437437

438438
try:
439439
df = read_table(StringIO(data), sep=',', header=1, comment='#',
440-
skip_footer=1)
440+
skip_footer=-1)
441441
self.assert_(False)
442442
except ValueError, inst:
443443
self.assert_('Expecting 3 columns, got 5 in row 3' in str(inst))
@@ -1112,12 +1112,15 @@ def test_skip_footer(self):
11121112
also also skip this
11131113
and this
11141114
"""
1115-
result = read_csv(StringIO(data), skip_footer=3)
1115+
result = read_csv(StringIO(data), skip_footer=-3)
11161116
no_footer = '\n'.join(data.split('\n')[:-4])
11171117
expected = read_csv(StringIO(no_footer))
11181118

11191119
assert_frame_equal(result, expected)
11201120

1121+
result = read_csv(StringIO(data), skip_footer=3)
1122+
assert_frame_equal(result, expected)
1123+
11211124
def test_no_unnamed_index(self):
11221125
data = """ id c0 c1 c2
11231126
0 1 0 a b

0 commit comments

Comments
 (0)