Skip to content

Commit 517b726

Browse files
committed
DEPR: Deprecate parse_cols in read_excel
Will now use "usecols" just like in read_csv. xref pandas-devgh-4988.
1 parent 81694dc commit 517b726

File tree

4 files changed

+49
-25
lines changed

4 files changed

+49
-25
lines changed

doc/source/io.rst

+5-5
Original file line numberDiff line numberDiff line change
@@ -2800,21 +2800,21 @@ Parsing Specific Columns
28002800

28012801
It is often the case that users will insert columns to do temporary computations
28022802
in Excel and you may not want to read in those columns. `read_excel` takes
2803-
a `parse_cols` keyword to allow you to specify a subset of columns to parse.
2803+
a `usecols` keyword to allow you to specify a subset of columns to parse.
28042804

2805-
If `parse_cols` is an integer, then it is assumed to indicate the last column
2805+
If `usecols` is an integer, then it is assumed to indicate the last column
28062806
to be parsed.
28072807

28082808
.. code-block:: python
28092809
2810-
read_excel('path_to_file.xls', 'Sheet1', parse_cols=2)
2810+
read_excel('path_to_file.xls', 'Sheet1', usecols=2)
28112811
2812-
If `parse_cols` is a list of integers, then it is assumed to be the file column
2812+
If `usecols` is a list of integers, then it is assumed to be the file column
28132813
indices to be parsed.
28142814

28152815
.. code-block:: python
28162816
2817-
read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3])
2817+
read_excel('path_to_file.xls', 'Sheet1', usecols=[0, 2, 3])
28182818
28192819
28202820
Parsing Dates

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,7 @@ Deprecations
658658
~~~~~~~~~~~~
659659

660660
- :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`).
661+
- :func:`read_excel()` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`)
661662
- The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`)
662663
- ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`).
663664
- :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`).

pandas/io/excel.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import pandas.compat.openpyxl_compat as openpyxl_compat
3232
from warnings import warn
3333
from distutils.version import LooseVersion
34-
from pandas.util._decorators import Appender
34+
from pandas.util._decorators import Appender, deprecate_kwarg
3535
from textwrap import fill
3636

3737
__all__ = ["read_excel", "ExcelWriter", "ExcelFile"]
@@ -115,6 +115,10 @@
115115
.. versionadded:: 0.19.0
116116
117117
parse_cols : int or list, default None
118+
.. deprecated:: 0.21.0
119+
Pass in `usecols` instead.
120+
121+
usecols : int or list, default None
118122
* If None then parse all columns,
119123
* If int then indicates last column to be parsed
120124
* If list of ints then indicates list of column numbers to be parsed
@@ -205,8 +209,9 @@ def get_writer(engine_name):
205209

206210

207211
@Appender(_read_excel_doc)
212+
@deprecate_kwarg("parse_cols", "usecols")
208213
def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
209-
index_col=None, names=None, parse_cols=None, parse_dates=False,
214+
index_col=None, names=None, usecols=None, parse_dates=False,
210215
date_parser=None, na_values=None, thousands=None,
211216
convert_float=True, converters=None, dtype=None,
212217
true_values=None, false_values=None, engine=None,
@@ -226,7 +231,7 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
226231

227232
return io._parse_excel(
228233
sheetname=sheet_name, header=header, skiprows=skiprows, names=names,
229-
index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates,
234+
index_col=index_col, parse_cols=usecols, parse_dates=parse_dates,
230235
date_parser=date_parser, na_values=na_values, thousands=thousands,
231236
convert_float=convert_float, skip_footer=skip_footer,
232237
converters=converters, dtype=dtype, true_values=true_values,

pandas/tests/io/test_excel.py

+35-17
Original file line numberDiff line numberDiff line change
@@ -158,56 +158,74 @@ def setup_method(self, method):
158158
self.check_skip()
159159
super(ReadingTestsBase, self).setup_method(method)
160160

161-
def test_parse_cols_int(self):
161+
def test_usecols_int(self):
162162

163163
dfref = self.get_csv_refdf('test1')
164164
dfref = dfref.reindex(columns=['A', 'B', 'C'])
165-
df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_cols=3)
165+
df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, usecols=3)
166166
df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
167-
parse_cols=3)
167+
usecols=3)
168+
169+
with tm.assert_produces_warning(FutureWarning):
170+
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1],
171+
index_col=0, parse_cols=3)
172+
168173
# TODO add index to xls file)
169174
tm.assert_frame_equal(df1, dfref, check_names=False)
170175
tm.assert_frame_equal(df2, dfref, check_names=False)
176+
tm.assert_frame_equal(df3, dfref, check_names=False)
171177

172-
def test_parse_cols_list(self):
178+
def test_usecols_list(self):
173179

174180
dfref = self.get_csv_refdf('test1')
175181
dfref = dfref.reindex(columns=['B', 'C'])
176182
df1 = self.get_exceldf('test1', 'Sheet1', index_col=0,
177-
parse_cols=[0, 2, 3])
183+
usecols=[0, 2, 3])
178184
df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
179-
parse_cols=[0, 2, 3])
185+
usecols=[0, 2, 3])
186+
187+
with tm.assert_produces_warning(FutureWarning):
188+
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1],
189+
index_col=0, parse_cols=[0, 2, 3])
190+
180191
# TODO add index to xls file)
181192
tm.assert_frame_equal(df1, dfref, check_names=False)
182193
tm.assert_frame_equal(df2, dfref, check_names=False)
194+
tm.assert_frame_equal(df3, dfref, check_names=False)
183195

184-
def test_parse_cols_str(self):
196+
def test_usecols_str(self):
185197

186198
dfref = self.get_csv_refdf('test1')
187199

188200
df1 = dfref.reindex(columns=['A', 'B', 'C'])
189201
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0,
190-
parse_cols='A:D')
202+
usecols='A:D')
191203
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
192-
parse_cols='A:D')
204+
usecols='A:D')
205+
206+
with tm.assert_produces_warning(FutureWarning):
207+
df4 = self.get_exceldf('test1', 'Sheet2', skiprows=[1],
208+
index_col=0, parse_cols='A:D')
209+
193210
# TODO add index to xls, read xls ignores index name ?
194211
tm.assert_frame_equal(df2, df1, check_names=False)
195212
tm.assert_frame_equal(df3, df1, check_names=False)
213+
tm.assert_frame_equal(df4, df1, check_names=False)
196214

197215
df1 = dfref.reindex(columns=['B', 'C'])
198216
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0,
199-
parse_cols='A,C,D')
217+
usecols='A,C,D')
200218
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
201-
parse_cols='A,C,D')
219+
usecols='A,C,D')
202220
# TODO add index to xls file
203221
tm.assert_frame_equal(df2, df1, check_names=False)
204222
tm.assert_frame_equal(df3, df1, check_names=False)
205223

206224
df1 = dfref.reindex(columns=['B', 'C'])
207225
df2 = self.get_exceldf('test1', 'Sheet1', index_col=0,
208-
parse_cols='A,C:D')
226+
usecols='A,C:D')
209227
df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0,
210-
parse_cols='A,C:D')
228+
usecols='A,C:D')
211229
tm.assert_frame_equal(df2, df1, check_names=False)
212230
tm.assert_frame_equal(df3, df1, check_names=False)
213231

@@ -457,14 +475,14 @@ def test_read_one_empty_col_no_header(self):
457475
actual_header_none = read_excel(
458476
path,
459477
'no_header',
460-
parse_cols=[0],
478+
usecols=[0],
461479
header=None
462480
)
463481

464482
actual_header_zero = read_excel(
465483
path,
466484
'no_header',
467-
parse_cols=[0],
485+
usecols=[0],
468486
header=0
469487
)
470488
expected = DataFrame()
@@ -486,14 +504,14 @@ def test_read_one_empty_col_with_header(self):
486504
actual_header_none = read_excel(
487505
path,
488506
'with_header',
489-
parse_cols=[0],
507+
usecols=[0],
490508
header=None
491509
)
492510

493511
actual_header_zero = read_excel(
494512
path,
495513
'with_header',
496-
parse_cols=[0],
514+
usecols=[0],
497515
header=0
498516
)
499517
expected_header_none = DataFrame(pd.Series([0], dtype='int64'))

0 commit comments

Comments
 (0)