API: Deprecate skip_footer in read_csv

gfyoung · gfyoung · commit d21345f35364 · 2016-07-27T22:38:29.000-04:00
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -175,6 +175,8 @@ skiprows : list-like or integer, default ``None``
   of the file.
 skipfooter : int, default ``0``
   Number of lines at bottom of file to skip (unsupported with engine='c').
+skip_footer : int, default ``0``
+  DEPRECATED: use the ``skipfooter`` parameter instead, as they are identical
 nrows : int, default ``None``
   Number of rows of file to read. Useful for reading pieces of large files.
 low_memory : boolean, default ``True``
@@ -1411,7 +1413,7 @@ back to python if C-unsupported options are specified. Currently, C-unsupported
 options include:
 
 - ``sep`` other than a single character (e.g. regex separators)
-- ``skip_footer``
+- ``skipfooter``
 - ``sep=None`` with ``delim_whitespace=False``
 
 Specifying any of the above options will produce a ``ParserWarning`` unless the
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -612,6 +612,7 @@ Deprecations
 - ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13320`)
 - ``buffer_lines`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13360`)
 - ``as_recarray`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13373`)
+- ``skip_footer`` has been deprecated in ``pd.read_csv()`` in favor of ``skipfooter`` and will be removed in a future version (:issue:`13349`)
 - top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`)
 - ``Timestamp.offset`` property (and named arg in the constructor), has been deprecated in favor of ``freq`` (:issue:`12160`)
 - ``pd.tseries.util.pivot_annual`` is deprecated. Use ``pivot_table`` as alternative, an example is :ref:`here <cookbook.pivot>` (:issue:`736`)
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -473,7 +473,7 @@ def _parse_cell(cell_contents, cell_typ):
                                     parse_dates=parse_dates,
                                     date_parser=date_parser,
                                     skiprows=skiprows,
-                                    skip_footer=skip_footer,
+                                    skipfooter=skip_footer,
                                     squeeze=squeeze,
                                     **kwds)
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -125,6 +125,8 @@
     at the start of the file
 skipfooter : int, default 0
     Number of lines at bottom of file to skip (Unsupported with engine='c')
+skip_footer : int, default 0
+    DEPRECATED: use the `skipfooter` parameter instead, as they are identical
 nrows : int, default None
     Number of rows of file to read. Useful for reading pieces of large files
 na_values : str or list-like or dict, default None
@@ -341,9 +343,6 @@ def _validate_nrows(nrows):
 def _read(filepath_or_buffer, kwds):
     "Generic reader of line files."
     encoding = kwds.get('encoding', None)
-    skipfooter = kwds.pop('skipfooter', None)
-    if skipfooter is not None:
-        kwds['skip_footer'] = skipfooter
 
     # If the input could be a filename, check for a recognizable compression
     # extension.  If we're reading from a URL, the `get_filepath_or_buffer`
@@ -411,8 +410,8 @@ def _read(filepath_or_buffer, kwds):
     'na_values': None,
     'true_values': None,
     'false_values': None,
-    'skip_footer': 0,
     'converters': None,
+    'skipfooter': 0,
 
     'keep_default_na': True,
     'thousands': None,
@@ -461,7 +460,7 @@ def _read(filepath_or_buffer, kwds):
     'widths': None,
 }
 
-_c_unsupported = set(['skip_footer'])
+_c_unsupported = set(['skipfooter'])
 _python_unsupported = set([
     'low_memory',
     'buffer_lines',
@@ -503,7 +502,6 @@ def parser_f(filepath_or_buffer,
                  false_values=None,
                  skipinitialspace=False,
                  skiprows=None,
-                 skipfooter=None,
                  nrows=None,
 
                  # NA and Missing Data Handling
@@ -541,8 +539,8 @@ def parser_f(filepath_or_buffer,
                  error_bad_lines=True,
                  warn_bad_lines=True,
 
-                 # Deprecated
-                 skip_footer=0,
+                 skipfooter=0,
+                 skip_footer=0,  # deprecated
 
                  # Internal
                  doublequote=True,
@@ -570,6 +568,13 @@ def parser_f(filepath_or_buffer,
             engine = 'c'
             engine_specified = False
 
+        if skip_footer != 0:
+            warnings.warn("The 'skip_footer' argument has "
+                          "been deprecated and will be removed "
+                          "in a future version. Please use the "
+                          "'skipfooter' argument instead.",
+                          FutureWarning, stacklevel=2)
+
         kwds = dict(delimiter=delimiter,
                     engine=engine,
                     dialect=dialect,
@@ -767,9 +772,9 @@ def _clean_options(self, options, engine):
 
         # C engine not supported yet
         if engine == 'c':
-            if options['skip_footer'] > 0:
+            if options['skipfooter'] > 0:
                 fallback_reason = "the 'c' engine does not support"\
-                                  " skip_footer"
+                                  " skipfooter"
                 engine = 'python'
 
         if sep is None and not delim_whitespace:
@@ -902,8 +907,8 @@ def _failover_to_python(self):
 
     def read(self, nrows=None):
         if nrows is not None:
-            if self.options.get('skip_footer'):
-                raise ValueError('skip_footer not supported for iteration')
+            if self.options.get('skipfooter'):
+                raise ValueError('skipfooter not supported for iteration')
 
         ret = self._engine.read(nrows)
 
@@ -1578,7 +1583,7 @@ def TextParser(*args, **kwds):
     date_parser : function, default None
     skiprows : list of integers
         Row numbers to skip
-    skip_footer : int
+    skipfooter : int
         Number of line at bottom of file to skip
     converters : dict, default None
         Dict of functions for converting values in certain columns. Keys can
@@ -1691,7 +1696,7 @@ def __init__(self, f, **kwds):
         self.memory_map = kwds['memory_map']
         self.skiprows = kwds['skiprows']
 
-        self.skip_footer = kwds['skip_footer']
+        self.skipfooter = kwds['skipfooter']
         self.delimiter = kwds['delimiter']
 
         self.quotechar = kwds['quotechar']
@@ -2323,7 +2328,7 @@ def _rows_to_cols(self, content):
             content, min_width=col_len).T)
         zip_len = len(zipped_content)
 
-        if self.skip_footer < 0:
+        if self.skipfooter < 0:
             raise ValueError('skip footer cannot be negative')
 
         # Loop through rows to verify lengths are correct.
@@ -2336,8 +2341,8 @@ def _rows_to_cols(self, content):
                     break
 
             footers = 0
-            if self.skip_footer:
-                footers = self.skip_footer
+            if self.skipfooter:
+                footers = self.skipfooter
 
             row_num = self.pos - (len(content) - i + footers)
 
@@ -2423,8 +2428,8 @@ def _get_lines(self, rows=None):
         else:
             lines = new_rows
 
-        if self.skip_footer:
-            lines = lines[:-self.skip_footer]
+        if self.skipfooter:
+            lines = lines[:-self.skipfooter]
 
         lines = self._check_comments(lines)
         if self.skip_blank_lines:
diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
@@ -218,9 +218,9 @@ def test_malformed(self):
                                  skiprows=[2])
             it.read()
 
-        # skip_footer is not supported with the C parser yet
+        # skipfooter is not supported with the C parser yet
         if self.engine == 'python':
-            # skip_footer
+            # skipfooter
             data = """ignore
 A,B,C
 1,2,3 # comment
@@ -232,7 +232,7 @@ def test_malformed(self):
             with tm.assertRaisesRegexp(Exception, msg):
                 self.read_table(StringIO(data), sep=',',
                                 header=1, comment='#',
-                                skip_footer=1)
+                                skipfooter=1)
 
     def test_quoting(self):
         bad_line_small = """printer\tresult\tvariant_name
@@ -524,11 +524,11 @@ def test_iterator(self):
         self.assertEqual(len(result), 3)
         tm.assert_frame_equal(pd.concat(result), expected)
 
-        # skip_footer is not supported with the C parser yet
+        # skipfooter is not supported with the C parser yet
         if self.engine == 'python':
-            # test bad parameter (skip_footer)
+            # test bad parameter (skipfooter)
             reader = self.read_csv(StringIO(self.data1), index_col=0,
-                                   iterator=True, skip_footer=True)
+                                   iterator=True, skipfooter=True)
             self.assertRaises(ValueError, reader.read, 3)
 
     def test_pass_names_with_index(self):
diff --git a/pandas/io/tests/parser/python_parser_only.py b/pandas/io/tests/parser/python_parser_only.py
@@ -98,7 +98,7 @@ def test_single_line(self):
         finally:
             sys.stdout = sys.__stdout__
 
-    def test_skip_footer(self):
+    def test_skipfooter(self):
         # see gh-6607
         data = """A,B,C
 1,2,3
@@ -107,7 +107,7 @@ def test_skip_footer(self):
 want to skip this
 also also skip this
 """
-        result = self.read_csv(StringIO(data), skip_footer=2)
+        result = self.read_csv(StringIO(data), skipfooter=2)
         no_footer = '\n'.join(data.split('\n')[:-3])
         expected = self.read_csv(StringIO(no_footer))
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py
@@ -52,7 +52,7 @@ def test_c_engine(self):
         with tm.assertRaisesRegexp(ValueError, msg):
             read_table(StringIO(data), sep='\s', dtype={'a': float})
         with tm.assertRaisesRegexp(ValueError, msg):
-            read_table(StringIO(data), skip_footer=1, dtype={'a': float})
+            read_table(StringIO(data), skipfooter=1, dtype={'a': float})
 
         # specify C engine with unsupported options (raise)
         with tm.assertRaisesRegexp(ValueError, msg):
@@ -61,15 +61,15 @@ def test_c_engine(self):
         with tm.assertRaisesRegexp(ValueError, msg):
             read_table(StringIO(data), engine='c', sep='\s')
         with tm.assertRaisesRegexp(ValueError, msg):
-            read_table(StringIO(data), engine='c', skip_footer=1)
+            read_table(StringIO(data), engine='c', skipfooter=1)
 
         # specify C-unsupported options without python-unsupported options
         with tm.assert_produces_warning(parsers.ParserWarning):
             read_table(StringIO(data), sep=None, delim_whitespace=False)
         with tm.assert_produces_warning(parsers.ParserWarning):
             read_table(StringIO(data), sep='\s')
         with tm.assert_produces_warning(parsers.ParserWarning):
-            read_table(StringIO(data), skip_footer=1)
+            read_table(StringIO(data), skipfooter=1)
 
         text = """                      A       B       C       D        E
 one two three   four
@@ -127,15 +127,20 @@ def test_deprecated_args(self):
             'as_recarray': True,
             'buffer_lines': True,
             'compact_ints': True,
+            'skip_footer': True,
             'use_unsigned': True,
         }
 
         engines = 'c', 'python'
 
         for engine in engines:
             for arg, non_default_val in deprecated.items():
+                if engine == 'c' and arg == 'skip_footer':
+                    # unsupported --> exception is raised
+                    continue
+
                 if engine == 'python' and arg == 'buffer_lines':
-                    # unsupported --> exception is raised first
+                    # unsupported --> exception is raised
                     continue
 
                 with tm.assert_produces_warning(
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
@@ -165,7 +165,7 @@ cdef extern from "parser/tokenizer.h":
 
         void *skipset
         int64_t skip_first_N_rows
-        int skip_footer
+        int skipfooter
         double (*converter)(const char *, char **, char, char, char, int) nogil
 
         #  error handling
@@ -270,7 +270,7 @@ cdef class TextReader:
         kh_str_t *true_set
 
     cdef public:
-        int leading_cols, table_width, skip_footer, buffer_lines
+        int leading_cols, table_width, skipfooter, buffer_lines
         object allow_leading_cols
         object delimiter, converters, delim_whitespace
         object na_values
@@ -338,7 +338,7 @@ cdef class TextReader:
                   low_memory=False,
                   buffer_lines=None,
                   skiprows=None,
-                  skip_footer=0,
+                  skipfooter=0,
                   verbose=False,
                   mangle_dupe_cols=True,
                   tupleize_cols=False,
@@ -418,15 +418,15 @@ cdef class TextReader:
         if skiprows is not None:
             self._make_skiprow_set()
 
-        self.skip_footer = skip_footer
+        self.skipfooter = skipfooter
 
         # suboptimal
         if usecols is not None:
             self.has_usecols = 1
             self.usecols = set(usecols)
 
         # XXX
-        if skip_footer > 0:
+        if skipfooter > 0:
             self.parser.error_bad_lines = 0
             self.parser.warn_bad_lines = 0
 
@@ -912,8 +912,8 @@ cdef class TextReader:
             if buffered_lines < irows:
                 self._tokenize_rows(irows - buffered_lines)
 
-            if self.skip_footer > 0:
-                raise ValueError('skip_footer can only be used to read '
+            if self.skipfooter > 0:
+                raise ValueError('skipfooter can only be used to read '
                                  'the whole file')
         else:
             with nogil:
@@ -926,7 +926,7 @@ cdef class TextReader:
 
             if status < 0:
                 raise_parser_error('Error tokenizing data', self.parser)
-            footer = self.skip_footer
+            footer = self.skipfooter
 
         if self.parser_start == self.parser.lines:
             raise StopIteration