Skip to content

Commit 863cbc5

Browse files
gfyoungjreback
authored andcommitted
DEPR, DOC: Deprecate buffer_lines in read_csv
`buffer_lines` is not respected, as it is determined internally via a heuristic involving `table_width` (see <a href="https://github.com/pyd ata/pandas/blob/master/pandas/parser.pyx#L527">here</a> for how it is computed). Author: gfyoung <[email protected]> Closes #13360 from gfyoung/buffer-lines-depr-doc and squashes the following commits: a72ecbe [gfyoung] DEPR, DOC: Deprecate buffer_lines in read_csv
1 parent eca7891 commit 863cbc5

File tree

5 files changed

+22
-5
lines changed

5 files changed

+22
-5
lines changed

doc/source/io.rst

+6
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,12 @@ low_memory : boolean, default ``True``
176176
Note that the entire file is read into a single DataFrame regardless,
177177
use the ``chunksize`` or ``iterator`` parameter to return the data in chunks.
178178
(Only valid with C parser)
179+
buffer_lines : int, default None
180+
DEPRECATED: this argument will be removed in a future version because its
181+
value is not respected by the parser
182+
183+
If ``low_memory`` is ``True``, specify the number of rows to be read for
184+
each chunk. (Only valid with C parser)
179185
compact_ints : boolean, default False
180186
DEPRECATED: this argument will be removed in a future version
181187

doc/source/whatsnew/v0.18.2.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,8 @@ Other API changes
293293
Deprecations
294294
^^^^^^^^^^^^
295295

296-
- ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv`` and will be removed in a future version (:issue:`13320`)
296+
- ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13320`)
297+
- ``buffer_lines`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13360`)
297298

298299
.. _whatsnew_0182.performance:
299300

pandas/io/parsers.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -231,14 +231,19 @@
231231
Note that the entire file is read into a single DataFrame regardless,
232232
use the `chunksize` or `iterator` parameter to return the data in chunks.
233233
(Only valid with C parser)
234+
buffer_lines : int, default None
235+
DEPRECATED: this argument will be removed in a future version because its
236+
value is not respected by the parser
237+
238+
If low_memory is True, specify the number of rows to be read for each
239+
chunk. (Only valid with C parser)
234240
compact_ints : boolean, default False
235241
DEPRECATED: this argument will be removed in a future version
236242
237243
If compact_ints is True, then for any column that is of integer dtype,
238244
the parser will attempt to cast it as the smallest integer dtype possible,
239245
either signed or unsigned depending on the specification from the
240246
`use_unsigned` parameter.
241-
242247
use_unsigned : boolean, default False
243248
DEPRECATED: this argument will be removed in a future version
244249
@@ -452,6 +457,7 @@ def _read(filepath_or_buffer, kwds):
452457
'float_precision',
453458
])
454459
_deprecated_args = set([
460+
'buffer_lines',
455461
'compact_ints',
456462
'use_unsigned',
457463
])
@@ -810,7 +816,8 @@ def _clean_options(self, options, engine):
810816
_validate_header_arg(options['header'])
811817

812818
for arg in _deprecated_args:
813-
if result[arg] != _c_parser_defaults[arg]:
819+
parser_default = _c_parser_defaults[arg]
820+
if result.get(arg, parser_default) != parser_default:
814821
warnings.warn("The '{arg}' argument has been deprecated "
815822
"and will be removed in a future version"
816823
.format(arg=arg), FutureWarning, stacklevel=2)

pandas/io/tests/parser/test_parsers.py

-2
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,12 @@ def read_csv(self, *args, **kwds):
7272
kwds = kwds.copy()
7373
kwds['engine'] = self.engine
7474
kwds['low_memory'] = self.low_memory
75-
kwds['buffer_lines'] = 2
7675
return read_csv(*args, **kwds)
7776

7877
def read_table(self, *args, **kwds):
7978
kwds = kwds.copy()
8079
kwds['engine'] = self.engine
8180
kwds['low_memory'] = True
82-
kwds['buffer_lines'] = 2
8381
return read_table(*args, **kwds)
8482

8583

pandas/io/tests/parser/test_unsupported.py

+5
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ def test_deprecated_args(self):
124124

125125
# deprecated arguments with non-default values
126126
deprecated = {
127+
'buffer_lines': True,
127128
'compact_ints': True,
128129
'use_unsigned': True,
129130
}
@@ -132,6 +133,10 @@ def test_deprecated_args(self):
132133

133134
for engine in engines:
134135
for arg, non_default_val in deprecated.items():
136+
if engine == 'python' and arg == 'buffer_lines':
137+
# unsupported --> exception is raised first
138+
continue
139+
135140
with tm.assert_produces_warning(
136141
FutureWarning, check_stacklevel=False):
137142
kwargs = {arg: non_default_val}

0 commit comments

Comments
 (0)