DEPR, DOC: Deprecate buffer_lines in read_csv

gfyoung · jreback · commit 863cbc571b17 · 2016-06-05T09:57:26.000-04:00
`buffer_lines` is not respected, as it is determined internally via a heuristic involving `table_width` (see <a href="https://github.com/pyd ata/pandas/blob/master/pandas/parser.pyx#L527">here</a> for how it is computed). Author: gfyoung <gfyoung17@gmail.com> Closes #13360 from gfyoung/buffer-lines-depr-doc and squashes the following commits: a72ecbe [gfyoung] DEPR, DOC: Deprecate buffer_lines in read_csv
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -176,6 +176,12 @@ low_memory : boolean, default ``True``
   Note that the entire file is read into a single DataFrame regardless,
   use the ``chunksize`` or ``iterator`` parameter to return the data in chunks.
   (Only valid with C parser)
+buffer_lines : int, default None
+    DEPRECATED: this argument will be removed in a future version because its
+    value is not respected by the parser
+
+    If ``low_memory`` is ``True``, specify the number of rows to be read for
+    each chunk. (Only valid with C parser)
 compact_ints : boolean, default False
   DEPRECATED: this argument will be removed in a future version
 
diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt
@@ -293,7 +293,8 @@ Other API changes
 Deprecations
 ^^^^^^^^^^^^
 
-- ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv`` and will be removed in a future version (:issue:`13320`)
+- ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13320`)
+- ``buffer_lines`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13360`)
 
 .. _whatsnew_0182.performance:
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -231,14 +231,19 @@
     Note that the entire file is read into a single DataFrame regardless,
     use the `chunksize` or `iterator` parameter to return the data in chunks.
     (Only valid with C parser)
+buffer_lines : int, default None
+    DEPRECATED: this argument will be removed in a future version because its
+    value is not respected by the parser
+
+    If low_memory is True, specify the number of rows to be read for each
+    chunk. (Only valid with C parser)
 compact_ints : boolean, default False
     DEPRECATED: this argument will be removed in a future version
 
     If compact_ints is True, then for any column that is of integer dtype,
     the parser will attempt to cast it as the smallest integer dtype possible,
     either signed or unsigned depending on the specification from the
     `use_unsigned` parameter.
-
 use_unsigned : boolean, default False
     DEPRECATED: this argument will be removed in a future version
 
@@ -452,6 +457,7 @@ def _read(filepath_or_buffer, kwds):
     'float_precision',
 ])
 _deprecated_args = set([
+    'buffer_lines',
     'compact_ints',
     'use_unsigned',
 ])
@@ -810,7 +816,8 @@ def _clean_options(self, options, engine):
         _validate_header_arg(options['header'])
 
         for arg in _deprecated_args:
-            if result[arg] != _c_parser_defaults[arg]:
+            parser_default = _c_parser_defaults[arg]
+            if result.get(arg, parser_default) != parser_default:
                 warnings.warn("The '{arg}' argument has been deprecated "
                               "and will be removed in a future version"
                               .format(arg=arg), FutureWarning, stacklevel=2)
diff --git a/pandas/io/tests/parser/test_parsers.py b/pandas/io/tests/parser/test_parsers.py
@@ -72,14 +72,12 @@ def read_csv(self, *args, **kwds):
         kwds = kwds.copy()
         kwds['engine'] = self.engine
         kwds['low_memory'] = self.low_memory
-        kwds['buffer_lines'] = 2
         return read_csv(*args, **kwds)
 
     def read_table(self, *args, **kwds):
         kwds = kwds.copy()
         kwds['engine'] = self.engine
         kwds['low_memory'] = True
-        kwds['buffer_lines'] = 2
         return read_table(*args, **kwds)
 
 
diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py
@@ -124,6 +124,7 @@ def test_deprecated_args(self):
 
         # deprecated arguments with non-default values
         deprecated = {
+            'buffer_lines': True,
             'compact_ints': True,
             'use_unsigned': True,
         }
@@ -132,6 +133,10 @@ def test_deprecated_args(self):
 
         for engine in engines:
             for arg, non_default_val in deprecated.items():
+                if engine == 'python' and arg == 'buffer_lines':
+                    # unsupported --> exception is raised first
+                    continue
+
                 with tm.assert_produces_warning(
                         FutureWarning, check_stacklevel=False):
                     kwargs = {arg: non_default_val}