Skip to content

Commit f8c385b

Browse files
gfyoungNo-Stream
authored andcommitted
DEPR: Deprecate parse_cols in read_excel (pandas-dev#17774)
closes pandas-dev#4988
1 parent 7dbc05b commit f8c385b

File tree

4 files changed

+62
-38
lines changed

4 files changed

+62
-38
lines changed

doc/source/io.rst

+5-5
Original file line numberDiff line numberDiff line change
@@ -2800,21 +2800,21 @@ Parsing Specific Columns
28002800

28012801
It is often the case that users will insert columns to do temporary computations
28022802
in Excel and you may not want to read in those columns. `read_excel` takes
2803-
a `parse_cols` keyword to allow you to specify a subset of columns to parse.
2803+
a `usecols` keyword to allow you to specify a subset of columns to parse.
28042804

2805-
If `parse_cols` is an integer, then it is assumed to indicate the last column
2805+
If `usecols` is an integer, then it is assumed to indicate the last column
28062806
to be parsed.
28072807

28082808
.. code-block:: python
28092809
2810-
read_excel('path_to_file.xls', 'Sheet1', parse_cols=2)
2810+
read_excel('path_to_file.xls', 'Sheet1', usecols=2)
28112811
2812-
If `parse_cols` is a list of integers, then it is assumed to be the file column
2812+
If `usecols` is a list of integers, then it is assumed to be the file column
28132813
indices to be parsed.
28142814

28152815
.. code-block:: python
28162816
2817-
read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3])
2817+
read_excel('path_to_file.xls', 'Sheet1', usecols=[0, 2, 3])
28182818
28192819
28202820
Parsing Dates

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,7 @@ Deprecations
658658
~~~~~~~~~~~~
659659

660660
- :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`).
661+
- :func:`read_excel()` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`)
661662
- The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`)
662663
- ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`).
663664
- :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`).

pandas/io/excel.py

+21-16
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import pandas.compat.openpyxl_compat as openpyxl_compat
3232
from warnings import warn
3333
from distutils.version import LooseVersion
34-
from pandas.util._decorators import Appender
34+
from pandas.util._decorators import Appender, deprecate_kwarg
3535
from textwrap import fill
3636

3737
__all__ = ["read_excel", "ExcelWriter", "ExcelFile"]
@@ -86,7 +86,7 @@
8686
Column (0-indexed) to use as the row labels of the DataFrame.
8787
Pass None if there is no such column. If a list is passed,
8888
those columns will be combined into a ``MultiIndex``. If a
89-
subset of data is selected with ``parse_cols``, index_col
89+
subset of data is selected with ``usecols``, index_col
9090
is based on the subset.
9191
names : array-like, default None
9292
List of column names to use. If file contains no header row,
@@ -115,6 +115,10 @@
115115
.. versionadded:: 0.19.0
116116
117117
parse_cols : int or list, default None
118+
.. deprecated:: 0.21.0
119+
Pass in `usecols` instead.
120+
121+
usecols : int or list, default None
118122
* If None then parse all columns,
119123
* If int then indicates last column to be parsed
120124
* If list of ints then indicates list of column numbers to be parsed
@@ -205,8 +209,9 @@ def get_writer(engine_name):
205209

206210

207211
@Appender(_read_excel_doc)
212+
@deprecate_kwarg("parse_cols", "usecols")
208213
def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
209-
index_col=None, names=None, parse_cols=None, parse_dates=False,
214+
index_col=None, names=None, usecols=None, parse_dates=False,
210215
date_parser=None, na_values=None, thousands=None,
211216
convert_float=True, converters=None, dtype=None,
212217
true_values=None, false_values=None, engine=None,
@@ -226,7 +231,7 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
226231

227232
return io._parse_excel(
228233
sheetname=sheet_name, header=header, skiprows=skiprows, names=names,
229-
index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates,
234+
index_col=index_col, usecols=usecols, parse_dates=parse_dates,
230235
date_parser=date_parser, na_values=na_values, thousands=thousands,
231236
convert_float=convert_float, skip_footer=skip_footer,
232237
converters=converters, dtype=dtype, true_values=true_values,
@@ -295,7 +300,7 @@ def __fspath__(self):
295300
return self._io
296301

297302
def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0,
298-
names=None, index_col=None, parse_cols=None, parse_dates=False,
303+
names=None, index_col=None, usecols=None, parse_dates=False,
299304
date_parser=None, na_values=None, thousands=None,
300305
convert_float=True, converters=None, true_values=None,
301306
false_values=None, squeeze=False, **kwds):
@@ -309,7 +314,7 @@ def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0,
309314
return self._parse_excel(sheetname=sheet_name, header=header,
310315
skiprows=skiprows, names=names,
311316
index_col=index_col,
312-
parse_cols=parse_cols,
317+
usecols=usecols,
313318
parse_dates=parse_dates,
314319
date_parser=date_parser, na_values=na_values,
315320
thousands=thousands,
@@ -321,7 +326,7 @@ def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0,
321326
squeeze=squeeze,
322327
**kwds)
323328

324-
def _should_parse(self, i, parse_cols):
329+
def _should_parse(self, i, usecols):
325330

326331
def _range2cols(areas):
327332
"""
@@ -347,15 +352,15 @@ def _excel2num(x):
347352
cols.append(_excel2num(rng))
348353
return cols
349354

350-
if isinstance(parse_cols, int):
351-
return i <= parse_cols
352-
elif isinstance(parse_cols, compat.string_types):
353-
return i in _range2cols(parse_cols)
355+
if isinstance(usecols, int):
356+
return i <= usecols
357+
elif isinstance(usecols, compat.string_types):
358+
return i in _range2cols(usecols)
354359
else:
355-
return i in parse_cols
360+
return i in usecols
356361

357362
def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None,
358-
skip_footer=0, index_col=None, parse_cols=None,
363+
skip_footer=0, index_col=None, usecols=None,
359364
parse_dates=False, date_parser=None, na_values=None,
360365
thousands=None, convert_float=True, true_values=None,
361366
false_values=None, verbose=False, dtype=None,
@@ -470,10 +475,10 @@ def _parse_cell(cell_contents, cell_typ):
470475
row = []
471476
for j, (value, typ) in enumerate(zip(sheet.row_values(i),
472477
sheet.row_types(i))):
473-
if parse_cols is not None and j not in should_parse:
474-
should_parse[j] = self._should_parse(j, parse_cols)
478+
if usecols is not None and j not in should_parse:
479+
should_parse[j] = self._should_parse(j, usecols)
475480

476-
if parse_cols is None or should_parse[j]:
481+
if usecols is None or should_parse[j]:
477482
row.append(_parse_cell(value, typ))
478483
data.append(row)
479484

pandas/tests/io/test_excel.py

+35-17
Original file line numberDiff line numberDiff line change
@@ -158,56 +158,74 @@ def setup_method(self, method):
158158
self.check_skip()
159159
super(ReadingTestsBase, self).setup_method(method)
160160

161-
def test_parse_cols_int(self):
161+
def test_usecols_int(self):
162162

163163
dfref = self.get_csv_refdf('test1')
164164
dfref = dfref.reindex(columns=['A', 'B', 'C'])
165-
df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_cols=3)
165+
df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, usecols=3)
166166
df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
167-
parse_cols=3)
167+
usecols=3)
168+
169+
with tm.assert_produces_warning(FutureWarning):
170+
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1],
171+
index_col=0, parse_cols=3)
172+
168173
# TODO add index to xls file)
169174
tm.assert_frame_equal(df1, dfref, check_names=False)
170175
tm.assert_frame_equal(df2, dfref, check_names=False)
176+
tm.assert_frame_equal(df3, dfref, check_names=False)
171177

172-
def test_parse_cols_list(self):
178+
def test_usecols_list(self):
173179

174180
dfref = self.get_csv_refdf('test1')
175181
dfref = dfref.reindex(columns=['B', 'C'])
176182
df1 = self.get_exceldf('test1', 'Sheet1', index_col=0,
177-
parse_cols=[0, 2, 3])
183+
usecols=[0, 2, 3])
178184
df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
179-
parse_cols=[0, 2, 3])
185+
usecols=[0, 2, 3])
186+
187+
with tm.assert_produces_warning(FutureWarning):
188+
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1],
189+
index_col=0, parse_cols=[0, 2, 3])
190+
180191
# TODO add index to xls file)
181192
tm.assert_frame_equal(df1, dfref, check_names=False)
182193
tm.assert_frame_equal(df2, dfref, check_names=False)
194+
tm.assert_frame_equal(df3, dfref, check_names=False)
183195

184-
def test_parse_cols_str(self):
196+
def test_usecols_str(self):
185197

186198
dfref = self.get_csv_refdf('test1')
187199

188200
df1 = dfref.reindex(columns=['A', 'B', 'C'])
189201
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0,
190-
parse_cols='A:D')
202+
usecols='A:D')
191203
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
192-
parse_cols='A:D')
204+
usecols='A:D')
205+
206+
with tm.assert_produces_warning(FutureWarning):
207+
df4 = self.get_exceldf('test1', 'Sheet2', skiprows=[1],
208+
index_col=0, parse_cols='A:D')
209+
193210
# TODO add index to xls, read xls ignores index name ?
194211
tm.assert_frame_equal(df2, df1, check_names=False)
195212
tm.assert_frame_equal(df3, df1, check_names=False)
213+
tm.assert_frame_equal(df4, df1, check_names=False)
196214

197215
df1 = dfref.reindex(columns=['B', 'C'])
198216
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0,
199-
parse_cols='A,C,D')
217+
usecols='A,C,D')
200218
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
201-
parse_cols='A,C,D')
219+
usecols='A,C,D')
202220
# TODO add index to xls file
203221
tm.assert_frame_equal(df2, df1, check_names=False)
204222
tm.assert_frame_equal(df3, df1, check_names=False)
205223

206224
df1 = dfref.reindex(columns=['B', 'C'])
207225
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0,
208-
parse_cols='A,C:D')
226+
usecols='A,C:D')
209227
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
210-
parse_cols='A,C:D')
228+
usecols='A,C:D')
211229
tm.assert_frame_equal(df2, df1, check_names=False)
212230
tm.assert_frame_equal(df3, df1, check_names=False)
213231

@@ -457,14 +475,14 @@ def test_read_one_empty_col_no_header(self):
457475
actual_header_none = read_excel(
458476
path,
459477
'no_header',
460-
parse_cols=[0],
478+
usecols=[0],
461479
header=None
462480
)
463481

464482
actual_header_zero = read_excel(
465483
path,
466484
'no_header',
467-
parse_cols=[0],
485+
usecols=[0],
468486
header=0
469487
)
470488
expected = DataFrame()
@@ -486,14 +504,14 @@ def test_read_one_empty_col_with_header(self):
486504
actual_header_none = read_excel(
487505
path,
488506
'with_header',
489-
parse_cols=[0],
507+
usecols=[0],
490508
header=None
491509
)
492510

493511
actual_header_zero = read_excel(
494512
path,
495513
'with_header',
496-
parse_cols=[0],
514+
usecols=[0],
497515
header=0
498516
)
499517
expected_header_none = DataFrame(pd.Series([0], dtype='int64'))

0 commit comments

Comments
 (0)