Skip to content

Commit 171c9b6

Browse files
committed
BUG: read_excel return empty dataframe when using usecols
closes pandas-dev#18273 tests added/passed passes git diff master --name-only -- "*.py" | grep "pandas/" | xargs -r flake8 whatsnew entry As mentioned read_excel returns an empty DataFrame when usecols argument is a list of strings. Now lists of strings are correctly interpreted by read_excel function.
1 parent 4fb963b commit 171c9b6

File tree

3 files changed

+45
-2
lines changed

3 files changed

+45
-2
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,7 @@ I/O
983983
- Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`)
984984
- Bug in :func:`read_pickle` when unpickling objects with :class:`TimedeltaIndex` or :class:`Float64Index` created with pandas prior to version 0.20 (:issue:`19939`)
985985
- Bug in :meth:`pandas.io.json.json_normalize` where subrecords are not properly normalized if any subrecords values are NoneType (:issue:`20030`)
986+
- Bug in :func:`read_excel` where read_excel return empty ``DataFrame`` when using ``usecols`` argument as a list of strings (:issue:`18273`)
986987

987988
Plotting
988989
^^^^^^^^

pandas/io/excel.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,11 @@
9696
* If int then indicates last column to be parsed
9797
* If list of ints then indicates list of column numbers to be parsed
9898
* If string then indicates comma separated list of Excel column letters and
99-
column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of
100-
both sides.
99+
column ranges (e.g. "A:E" or "A,C,E:F") to be parsed. Ranges are
100+
inclusive of both sides.
101+
* If list of strings then indicates list of Excel column letters and
102+
column ranges (e.g. "A:E" or "A,C,E:F") to be parsed. Ranges are
103+
inclusive of both sides.
101104
squeeze : boolean, default False
102105
If the parsed data only contains one column then return a Series
103106
dtype : Type name or dict of column -> type, default None
@@ -479,6 +482,9 @@ def _excel2num(x):
479482
return i <= usecols
480483
elif isinstance(usecols, compat.string_types):
481484
return i in _range2cols(usecols)
485+
elif all(isinstance(x, compat.string_types) for x in usecols) is True:
486+
usecols_str = ",".join(usecols)
487+
return i in _range2cols(usecols_str)
482488
else:
483489
return i in usecols
484490

pandas/tests/io/test_excel.py

+36
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,42 @@ def test_usecols_str(self, ext):
179179
tm.assert_frame_equal(df2, df1, check_names=False)
180180
tm.assert_frame_equal(df3, df1, check_names=False)
181181

182+
def test_usecols_str_list(self, ext):
183+
184+
dfref = self.get_csv_refdf('test1')
185+
186+
df1 = dfref.reindex(columns=['A', 'B', 'C'])
187+
df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0,
188+
usecols=['A:D'])
189+
df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
190+
index_col=0, usecols=['A:D'])
191+
192+
with tm.assert_produces_warning(FutureWarning):
193+
df4 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
194+
index_col=0, parse_cols=['A:D'])
195+
196+
# TODO add index to xls, read xls ignores index name ?
197+
tm.assert_frame_equal(df2, df1, check_names=False)
198+
tm.assert_frame_equal(df3, df1, check_names=False)
199+
tm.assert_frame_equal(df4, df1, check_names=False)
200+
201+
df1 = dfref.reindex(columns=['B', 'C'])
202+
df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0,
203+
usecols=['A', 'C', 'D'])
204+
df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
205+
index_col=0, usecols=['A', 'C', 'D'])
206+
# TODO add index to xls file
207+
tm.assert_frame_equal(df2, df1, check_names=False)
208+
tm.assert_frame_equal(df3, df1, check_names=False)
209+
210+
df1 = dfref.reindex(columns=['B', 'C'])
211+
df2 = self.get_exceldf('test1', ext, 'Sheet1', index_col=0,
212+
usecols=['A', 'C:D'])
213+
df3 = self.get_exceldf('test1', ext, 'Sheet2', skiprows=[1],
214+
index_col=0, usecols=['A', 'C:D'])
215+
tm.assert_frame_equal(df2, df1, check_names=False)
216+
tm.assert_frame_equal(df3, df1, check_names=False)
217+
182218
def test_excel_stop_iterator(self, ext):
183219

184220
parsed = self.get_exceldf('test2', ext, 'Sheet1')

0 commit comments

Comments
 (0)