diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7fa9991138fba..660300e1814e8 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -30,7 +30,7 @@ New features Other enhancements ^^^^^^^^^^^^^^^^^^ - +- ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) .. _whatsnew_0200.api_breaking: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 6662d106ad85d..d3171ceedfc03 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -21,7 +21,7 @@ from pandas.tseries.period import Period from pandas import json from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass, - string_types) + string_types, OrderedDict) from pandas.core import config from pandas.formats.printing import pprint_thing import pandas.compat as compat @@ -418,9 +418,9 @@ def _parse_cell(cell_contents, cell_typ): sheets = [sheetname] # handle same-type duplicates. - sheets = list(set(sheets)) + sheets = list(OrderedDict.fromkeys(sheets).keys()) - output = {} + output = OrderedDict() for asheetname in sheets: if verbose: diff --git a/pandas/io/tests/data/test_multisheet.xls b/pandas/io/tests/data/test_multisheet.xls index fa37723fcdefb..7b4b9759a1a94 100644 Binary files a/pandas/io/tests/data/test_multisheet.xls and b/pandas/io/tests/data/test_multisheet.xls differ diff --git a/pandas/io/tests/data/test_multisheet.xlsm b/pandas/io/tests/data/test_multisheet.xlsm index 694f8e07d5e29..c6191bc61bc49 100644 Binary files a/pandas/io/tests/data/test_multisheet.xlsm and b/pandas/io/tests/data/test_multisheet.xlsm differ diff --git a/pandas/io/tests/data/test_multisheet.xlsx b/pandas/io/tests/data/test_multisheet.xlsx index 5de07772b276a..dc424a9963253 100644 Binary files a/pandas/io/tests/data/test_multisheet.xlsx and b/pandas/io/tests/data/test_multisheet.xlsx differ diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 998e71076b7c0..a4132cd69141a 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -379,8 +379,12 @@ def test_reading_all_sheets(self): # See PR #9450 basename = 'test_multisheet' dfs = self.get_exceldf(basename, sheetname=None) - expected_keys = ['Alpha', 'Beta', 'Charlie'] + # ensure this is not alphabetical to test order preservation + expected_keys = ['Charlie', 'Alpha', 'Beta'] tm.assert_contains_all(expected_keys, dfs.keys()) + # Issue 9930 + # Ensure sheet order is preserved + tm.assert_equal(expected_keys, list(dfs.keys())) def test_reading_multiple_specific_sheets(self): # Test reading specific sheetnames by specifying a mixed list