ENH: allow pos and neg for skip_footer #1843

changhiskhan · wesm · commit 36f24afc9039 · 2012-09-18T09:52:04.000-04:00
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -76,6 +76,9 @@ data into a DataFrame object. They can take a number of arguments:
     Defaults to 0 (first row); specify None if there is no header row.
   - ``skiprows``: A collection of numbers for rows in the file to skip. Can
     also be an integer to skip the first ``n`` rows
+  - ``skip_footer``: Lines at bottom of file to skip. If >0 then indicates the
+    row to start skipping. If <0 then skips the specified number of rows from
+    the end.
   - ``index_col``: column number, column name, or list of column numbers/names,
     to use as the ``index`` (row labels) of the resulting DataFrame. By default,
     it will number the rows without using any column, unless there is one more
@@ -116,7 +119,6 @@ data into a DataFrame object. They can take a number of arguments:
   - ``chunksize``: An number of rows to be used to "chunk" a file into
     pieces. Will cause an ``TextParser`` object to be returned. More on this
     below in the section on :ref:`iterating and chunking <io.chunking>`
-  - ``skip_footer``: number of lines to skip at bottom of file (default 0)
   - ``converters``: a dictionary of functions for converting values in certain
     columns, where keys are either integers or column labels
   - ``encoding``: a string representing the encoding to use if the contents are
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -46,6 +46,10 @@ class DateConversionError(Exception):
     Row to use for the column labels of the parsed DataFrame
 skiprows : list-like or integer
     Row numbers to skip (0-indexed) or number of rows to skip (int)
+    at the start of the file
+skip_footer : int, default 0
+    Lines at bottom of file to skip. If >0 then indicates the row to start
+    skipping. If <0 then skips the specified number of rows from the end.
 index_col : int or sequence, default None
     Column to use as the row labels of the DataFrame. If a sequence is
     given, a MultiIndex is used.
@@ -81,8 +85,6 @@ class DateConversionError(Exception):
     Return TextParser object
 chunksize : int, default None
     Return TextParser object for iteration
-skip_footer : int, default 0
-    Number of line at bottom of file to skip
 converters : dict. optional
     Dict of functions for converting values in certain columns. Keys can either
     be integers or column labels
@@ -476,7 +478,7 @@ def __init__(self, f, delimiter=None, dialect=None, names=None, header=0,
         else:
             self.converters = {}
 
-        assert(self.skip_footer >= 0)
+        #assert(self.skip_footer >= 0)
 
         self.keep_default_na = keep_default_na
         if na_values is None and keep_default_na:
@@ -771,14 +773,17 @@ def _rows_to_cols(self, content):
 
         if col_len != zip_len:
             row_num = -1
+            i = 0
             for (i, l) in enumerate(content):
                 if len(l) != col_len:
                     break
 
             footers = 0
             if self.skip_footer:
                 footers = self.skip_footer
-            row_num = self.pos - (len(content) - i + footers)
+                if footers > 0:
+                    footers = footers - self.pos
+            row_num = self.pos - (len(content) - i - footers)
 
             msg = ('Expecting %d columns, got %d in row %d' %
                    (col_len, zip_len, row_num))
@@ -1101,7 +1106,7 @@ def _get_lines(self, rows=None):
         self.buf = []
 
         if self.skip_footer:
-            lines = lines[:-self.skip_footer]
+            lines = lines[:self.skip_footer]
 
         lines = self._check_comments(lines)
         return self._check_thousands(lines)
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -437,7 +437,7 @@ def test_malformed(self):
 
         try:
             df = read_table(StringIO(data), sep=',', header=1, comment='#',
-                            skip_footer=1)
+                            skip_footer=-1)
             self.assert_(False)
         except ValueError, inst:
             self.assert_('Expecting 3 columns, got 5 in row 3' in str(inst))
@@ -1112,12 +1112,15 @@ def test_skip_footer(self):
 also also skip this
 and this
 """
-        result = read_csv(StringIO(data), skip_footer=3)
+        result = read_csv(StringIO(data), skip_footer=-3)
         no_footer = '\n'.join(data.split('\n')[:-4])
         expected = read_csv(StringIO(no_footer))
 
         assert_frame_equal(result, expected)
 
+        result = read_csv(StringIO(data), skip_footer=3)
+        assert_frame_equal(result, expected)
+
     def test_no_unnamed_index(self):
         data = """ id c0 c1 c2
 0 1 0 a b