From 5be379f013f275b58c9ee6c04669ab7919ff728f Mon Sep 17 00:00:00 2001 From: Clark Fitzgerald Date: Tue, 11 Mar 2014 21:48:14 -0700 Subject: [PATCH] API: default value for read_excel sheet (GH6573) --- doc/source/io.rst | 19 ++++++++++++++----- doc/source/release.rst | 1 + doc/source/v0.14.0.txt | 1 + pandas/io/excel.py | 15 ++++++++------- pandas/io/tests/test_excel.py | 9 +++++++++ 5 files changed, 33 insertions(+), 12 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index ac2cabe009694..f43582ded4473 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1820,7 +1820,7 @@ a DataFrame. See the :ref:`cookbook` for some advanced strategies Besides ``read_excel`` you can also read Excel files using the ``ExcelFile`` -class. The following two command are equivalent: +class. The following two commands are equivalent: .. code-block:: python @@ -1843,10 +1843,13 @@ the sheet names using the ``sheet_names`` attribute. .. versionadded:: 0.13 There are now two ways to read in sheets from an Excel file. You can provide -either the index of a sheet or its name. If the value provided is an integer -then it is assumed that the integer refers to the index of a sheet, otherwise -if a string is passed then it is assumed that the string refers to the name of -a particular sheet in the file. +either the index of a sheet or its name to by passing different values for +``sheet_name``. + +- Pass a string to refer to the name of a particular sheet in the workbook. +- Pass an integer to refer to the index of a sheet. Indices follow Python + convention, beginning at 0. +- The default value is ``sheet_name=0``. This reads the first sheet. Using the sheet name: @@ -1860,6 +1863,12 @@ Using the sheet index: read_excel('path_to_file.xls', 0, index_col=None, na_values=['NA']) +Using all default values: + +.. code-block:: python + + read_excel('path_to_file.xls') + It is often the case that users will insert columns to do temporary computations in Excel and you may not want to read in those columns. `read_excel` takes a `parse_cols` keyword to allow you to specify a subset of columns to parse. diff --git a/doc/source/release.rst b/doc/source/release.rst index 434477d071c4b..d6c319cc0be01 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -63,6 +63,7 @@ New features API Changes ~~~~~~~~~~~ +- ``read_excel`` uses 0 as the default sheet (:issue:`6573`) - ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index d773f3e7df799..fe9e291c02d10 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -15,6 +15,7 @@ Highlights include: API changes ~~~~~~~~~~~ +``read_excel`` uses 0 as the default sheet (:issue:`6573`) - ``iloc`` will now accept out-of-bounds indexers for slices, e.g. a value that exceeds the length of the object being indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds values. A single indexer / list of indexers that is out-of-bounds will still raise diff --git a/pandas/io/excel.py b/pandas/io/excel.py index e1c956c625091..0abb2a789f1ab 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -49,17 +49,17 @@ def get_writer(engine_name): raise ValueError("No Excel writer '%s'" % engine_name) -def read_excel(io, sheetname, **kwds): +def read_excel(io, sheetname=0, **kwds): """Read an Excel table into a pandas DataFrame Parameters ---------- io : string, file-like object or xlrd workbook If a string, expected to be a path to xls or xlsx file - sheetname : string - Name of Excel sheet + sheetname : string or int, default 0 + Name of Excel sheet or the page number of the sheet header : int, default 0 - Row to use for the column labels of the parsed DataFrame + Row to use for the column labels of the parsed DataFrame skiprows : list-like Rows to skip at the beginning (0-indexed) skip_footer : int, default 0 @@ -147,7 +147,7 @@ def __init__(self, io, **kwds): raise ValueError('Must explicitly set engine if not passing in' ' buffer or path for io.') - def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, + def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0, index_col=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None, convert_float=True, has_index_names=False, **kwds): @@ -200,7 +200,8 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, if skipfooter is not None: skip_footer = skipfooter - return self._parse_excel(sheetname, header=header, skiprows=skiprows, + return self._parse_excel(sheetname=sheetname, header=header, + skiprows=skiprows, index_col=index_col, has_index_names=has_index_names, parse_cols=parse_cols, @@ -244,7 +245,7 @@ def _excel2num(x): else: return i in parse_cols - def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0, + def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0, index_col=None, has_index_names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None, convert_float=True, diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 46e5fd74fbbfd..55a0bc0dfd879 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -445,6 +445,15 @@ def test_roundtrip(self): na_values=[88, 88.0]) tm.assert_frame_equal(self.frame, recons) + # GH 6573 + self.frame.to_excel(path, 'Sheet1') + recons = read_excel(path, index_col=0) + tm.assert_frame_equal(self.frame, recons) + + self.frame.to_excel(path, '0') + recons = read_excel(path, index_col=0) + tm.assert_frame_equal(self.frame, recons) + def test_mixed(self): _skip_if_no_xlrd()