Skip to content

Commit 121c4dc

Browse files
committed
ENH: add integer sheetname support
1 parent ebf9147 commit 121c4dc

File tree

5 files changed

+93
-28
lines changed

5 files changed

+93
-28
lines changed

doc/source/io.rst

+20
Original file line numberDiff line numberDiff line change
@@ -1532,6 +1532,26 @@ advanced strategies
15321532
15331533
read_excel('path_to_file.xls', 'Sheet1', index_col=None, na_values=['NA'])
15341534
1535+
.. versionadded:: 0.13
1536+
1537+
There are now two ways to read in sheets from an Excel file. You can provide
1538+
either the index of a sheet or its name. If the value provided is an integer
1539+
then it is assumed that the integer refers to the index of a sheet, otherwise
1540+
if a string is passed then it is assumed that the string refers to the name of
1541+
a particular sheet in the file.
1542+
1543+
Using the sheet name:
1544+
1545+
.. code-block:: python
1546+
1547+
read_excel('path_to_file.xls', 'Sheet1', index_col=None, na_values=['NA'])
1548+
1549+
Using the sheet index:
1550+
1551+
.. code-block:: python
1552+
1553+
read_excel('path_to_file.xls', 0, index_col=None, na_values=['NA'])
1554+
15351555
It is often the case that users will insert columns to do temporary computations
15361556
in Excel and you may not want to read in those columns. `read_excel` takes
15371557
a `parse_cols` keyword to allow you to specify a subset of columns to parse.

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ pandas 0.13
3737

3838
- ``read_html`` now raises a ``URLError`` instead of catching and raising a
3939
``ValueError`` (:issue:`4303`, :issue:`4305`)
40+
- ``read_excel`` now supports an integer in its ``sheetname`` argument giving
41+
the index of the sheet to read in (:issue:`4301`).
4042

4143
**API Changes**
4244

doc/source/v0.13.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ enhancements along with a large number of bug fixes.
99
API changes
1010
~~~~~~~~~~~
1111

12+
- ``read_excel`` now supports an integer in its ``sheetname`` argument giving
13+
the index of the sheet to read in (:issue:`4301`).
14+
1215
Enhancements
1316
~~~~~~~~~~~~
1417

pandas/io/excel.py

+23-27
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,9 @@ def read_excel(path_or_buf, sheetname, kind=None, **kwds):
4848
parsed : DataFrame
4949
DataFrame from the passed in Excel file
5050
"""
51-
return ExcelFile(path_or_buf,kind=kind).parse(sheetname=sheetname,
52-
kind=kind, **kwds)
51+
return ExcelFile(path_or_buf, kind=kind).parse(sheetname=sheetname,
52+
kind=kind, **kwds)
53+
5354

5455
class ExcelFile(object):
5556
"""
@@ -86,8 +87,8 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0,
8687
8788
Parameters
8889
----------
89-
sheetname : string
90-
Name of Excel sheet
90+
sheetname : string or integer
91+
Name of Excel sheet or the page number of the sheet
9192
header : int, default 0
9293
Row to use for the column labels of the parsed DataFrame
9394
skiprows : list-like
@@ -117,27 +118,20 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0,
117118
parsed : DataFrame
118119
DataFrame parsed from the Excel file
119120
"""
120-
121-
# has_index_names: boolean, default False
122-
# True if the cols defined in index_col have an index name and are
123-
# not in the header
124121
has_index_names = False # removed as new argument of API function
125122

126123
skipfooter = kwds.pop('skipfooter', None)
127124
if skipfooter is not None:
128125
skip_footer = skipfooter
129126

130-
return self._parse_excel(sheetname, header=header,
131-
skiprows=skiprows, index_col=index_col,
132-
has_index_names=has_index_names,
133-
parse_cols=parse_cols,
134-
parse_dates=parse_dates,
135-
date_parser=date_parser,
136-
na_values=na_values,
137-
thousands=thousands,
138-
chunksize=chunksize,
139-
skip_footer=skip_footer,
140-
**kwds)
127+
return self._parse_excel(sheetname, header=header, skiprows=skiprows,
128+
index_col=index_col,
129+
has_index_names=has_index_names,
130+
parse_cols=parse_cols,
131+
parse_dates=parse_dates,
132+
date_parser=date_parser, na_values=na_values,
133+
thousands=thousands, chunksize=chunksize,
134+
skip_footer=skip_footer, **kwds)
141135

142136
def _should_parse(self, i, parse_cols):
143137

@@ -171,20 +165,22 @@ def _excel2num(x):
171165
else:
172166
return i in parse_cols
173167

174-
def _parse_excel(self, sheetname, header=0, skiprows=None,
175-
skip_footer=0, index_col=None, has_index_names=None,
176-
parse_cols=None, parse_dates=False, date_parser=None,
177-
na_values=None, thousands=None, chunksize=None,
178-
**kwds):
168+
def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0,
169+
index_col=None, has_index_names=None, parse_cols=None,
170+
parse_dates=False, date_parser=None, na_values=None,
171+
thousands=None, chunksize=None, **kwds):
179172
from xlrd import (xldate_as_tuple, XL_CELL_DATE,
180173
XL_CELL_ERROR, XL_CELL_BOOLEAN)
181174

182175
datemode = self.book.datemode
183-
sheet = self.book.sheet_by_name(sheetname)
176+
if isinstance(sheetname, basestring):
177+
sheet = self.book.sheet_by_name(sheetname)
178+
else: # assume an integer if not a string
179+
sheet = self.book.sheet_by_index(sheetname)
184180

185181
data = []
186182
should_parse = {}
187-
for i in range(sheet.nrows):
183+
for i in xrange(sheet.nrows):
188184
row = []
189185
for j, (value, typ) in enumerate(izip(sheet.row_values(i),
190186
sheet.row_types(i))):
@@ -225,7 +221,7 @@ def _parse_excel(self, sheetname, header=0, skiprows=None,
225221

226222
@property
227223
def sheet_names(self):
228-
return self.book.sheet_names()
224+
return self.book.sheet_names()
229225

230226

231227
def _trim_excel_header(row):

pandas/io/tests/test_excel.py

+45-1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def setUp(self):
8282
self.csv1 = os.path.join(self.dirpath, 'test1.csv')
8383
self.csv2 = os.path.join(self.dirpath, 'test2.csv')
8484
self.xls1 = os.path.join(self.dirpath, 'test.xls')
85+
self.xlsx1 = os.path.join(self.dirpath, 'test.xlsx')
8586
self.frame = _frame.copy()
8687
self.frame2 = _frame2.copy()
8788
self.tsframe = _tsframe.copy()
@@ -198,6 +199,49 @@ def test_excel_passes_na(self):
198199
columns=['Test'])
199200
tm.assert_frame_equal(parsed, expected)
200201

202+
def check_excel_table_sheet_by_index(self, filename, csvfile):
203+
import xlrd
204+
205+
pth = os.path.join(self.dirpath, filename)
206+
xls = ExcelFile(pth)
207+
df = xls.parse(0, index_col=0, parse_dates=True)
208+
df2 = self.read_csv(csvfile, index_col=0, parse_dates=True)
209+
df3 = xls.parse(1, skiprows=[1], index_col=0, parse_dates=True)
210+
tm.assert_frame_equal(df, df2, check_names=False)
211+
tm.assert_frame_equal(df3, df2, check_names=False)
212+
213+
df4 = xls.parse(0, index_col=0, parse_dates=True, skipfooter=1)
214+
df5 = xls.parse(0, index_col=0, parse_dates=True, skip_footer=1)
215+
tm.assert_frame_equal(df4, df.ix[:-1])
216+
tm.assert_frame_equal(df4, df5)
217+
218+
self.assertRaises(xlrd.XLRDError, xls.parse, 'asdf')
219+
220+
def test_excel_table_sheet_by_index(self):
221+
_skip_if_no_xlrd()
222+
for filename, csvfile in [(self.xls1, self.csv1),
223+
(self.xlsx1, self.csv1)]:
224+
self.check_excel_table_sheet_by_index(filename, csvfile)
225+
226+
def check_excel_sheet_by_name_raise(self, ext):
227+
import xlrd
228+
pth = os.path.join(self.dirpath, 'testit.{0}'.format(ext))
229+
230+
with ensure_clean(pth) as pth:
231+
gt = DataFrame(np.random.randn(10, 2))
232+
gt.to_excel(pth)
233+
xl = ExcelFile(pth)
234+
df = xl.parse(0)
235+
tm.assert_frame_equal(gt, df)
236+
237+
self.assertRaises(xlrd.XLRDError, xl.parse, '0')
238+
239+
def test_excel_sheet_by_name_raise(self):
240+
_skip_if_no_xlrd()
241+
_skip_if_no_xlwt()
242+
for ext in ('xls', 'xlsx'):
243+
self.check_excel_sheet_by_name_raise(ext)
244+
201245
def test_excel_table(self):
202246
_skip_if_no_xlrd()
203247

@@ -438,7 +482,6 @@ def _check_extension_sheets(self, ext):
438482
np.testing.assert_equal('test1', reader.sheet_names[0])
439483
np.testing.assert_equal('test2', reader.sheet_names[1])
440484

441-
442485
def test_excel_roundtrip_xls_colaliases(self):
443486
_skip_if_no_excelsuite()
444487
self._check_extension_colaliases('xls')
@@ -892,6 +935,7 @@ def test_deprecated_from_parsers(self):
892935
from pandas.io.parsers import ExcelWriter as xw
893936
xw(path)
894937

938+
895939
if __name__ == '__main__':
896940
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
897941
exit=False)

0 commit comments

Comments
 (0)