From f882497afba5baa1d3470e850888f3616555edd0 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 28 May 2017 04:23:27 -0400 Subject: [PATCH] MAINT: Drop has_index_names input from read_excel --- doc/source/io.rst | 5 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/excel.py | 40 +++++++-------------- pandas/tests/io/test_excel.py | 63 ++++++++++++++++++++------------- 4 files changed, 52 insertions(+), 57 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index bca23dd18a0e3..af337d770c7f7 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2739,11 +2739,6 @@ should be passed to ``index_col`` and ``header`` import os os.remove('path_to_file.xlsx') -.. warning:: - - Excel files saved in version 0.16.2 or prior that had index names will still able to be read in, - but the ``has_index_names`` argument must specified to ``True``. - Parsing Specific Columns ++++++++++++++++++++++++ diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index a6b6d704737bd..4b6abca50cd20 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -70,6 +70,7 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- ``pd.read_excel()`` has dropped the ``has_index_names`` parameter (:issue:`10967`) .. _whatsnew_0210.performance: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index aa08e5fd378f0..a4d2fabf76a41 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -141,10 +141,6 @@ convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric data will be read in as floats: Excel stores all numbers as floats internally -has_index_names : boolean, default None - DEPRECATED: for version 0.17+ index names will be automatically - inferred based on index_col. To read Excel output from 0.16.2 and - prior that had saved index names, use True. Returns ------- @@ -198,8 +194,8 @@ def get_writer(engine_name): def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0, index_col=None, names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, - convert_float=True, has_index_names=None, converters=None, - dtype=None, true_values=None, false_values=None, engine=None, + convert_float=True, converters=None, dtype=None, + true_values=None, false_values=None, engine=None, squeeze=False, **kwds): # Can't use _deprecate_kwarg since sheetname=None has a special meaning @@ -218,10 +214,9 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0, sheetname=sheet_name, header=header, skiprows=skiprows, names=names, index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates, date_parser=date_parser, na_values=na_values, thousands=thousands, - convert_float=convert_float, has_index_names=has_index_names, - skip_footer=skip_footer, converters=converters, dtype=dtype, - true_values=true_values, false_values=false_values, squeeze=squeeze, - **kwds) + convert_float=convert_float, skip_footer=skip_footer, + converters=converters, dtype=dtype, true_values=true_values, + false_values=false_values, squeeze=squeeze, **kwds) class ExcelFile(object): @@ -283,9 +278,8 @@ def __fspath__(self): def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0, names=None, index_col=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, - convert_float=True, has_index_names=None, - converters=None, true_values=None, false_values=None, - squeeze=False, **kwds): + convert_float=True, converters=None, true_values=None, + false_values=None, squeeze=False, **kwds): """ Parse specified sheet(s) into a DataFrame @@ -296,7 +290,6 @@ def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0, return self._parse_excel(sheetname=sheet_name, header=header, skiprows=skiprows, names=names, index_col=index_col, - has_index_names=has_index_names, parse_cols=parse_cols, parse_dates=parse_dates, date_parser=date_parser, na_values=na_values, @@ -343,23 +336,17 @@ def _excel2num(x): return i in parse_cols def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None, - skip_footer=0, index_col=None, has_index_names=None, - parse_cols=None, parse_dates=False, date_parser=None, - na_values=None, thousands=None, convert_float=True, - true_values=None, false_values=None, verbose=False, - dtype=None, squeeze=False, **kwds): + skip_footer=0, index_col=None, parse_cols=None, + parse_dates=False, date_parser=None, na_values=None, + thousands=None, convert_float=True, true_values=None, + false_values=None, verbose=False, dtype=None, + squeeze=False, **kwds): skipfooter = kwds.pop('skipfooter', None) if skipfooter is not None: skip_footer = skipfooter _validate_header_arg(header) - if has_index_names is not None: - warn("\nThe has_index_names argument is deprecated; index names " - "will be automatically inferred based on index_col.\n" - "This argmument is still necessary if reading Excel output " - "from 0.16.2 or prior with index names.", FutureWarning, - stacklevel=3) if 'chunksize' in kwds: raise NotImplementedError("chunksize keyword of read_excel " @@ -511,8 +498,7 @@ def _parse_cell(cell_contents, cell_typ): else: last = data[row][col] - if is_list_like(header) and len(header) > 1: - has_index_names = True + has_index_names = is_list_like(header) and len(header) > 1 # GH 12292 : error when read one empty column from excel file try: diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 4441ed815370b..abe3757ec64f3 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -881,8 +881,42 @@ def test_excel_multindex_roundtrip(self): tm.assert_frame_equal( df, act, check_names=check_names) - def test_excel_oldindex_format(self): - # GH 4679 + def test_excel_old_index_format(self): + # see gh-4679 + filename = 'test_index_name_pre17' + self.ext + in_file = os.path.join(self.dirpath, filename) + + # We detect headers to determine if index names exist, so + # that "index" name in the "names" version of the data will + # now be interpreted as rows that include null data. + data = np.array([[None, None, None, None, None], + ['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'], + ['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'], + ['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'], + ['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'], + ['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']]) + columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4'] + mi = MultiIndex(levels=[['R0', 'R_l0_g0', 'R_l0_g1', + 'R_l0_g2', 'R_l0_g3', 'R_l0_g4'], + ['R1', 'R_l1_g0', 'R_l1_g1', + 'R_l1_g2', 'R_l1_g3', 'R_l1_g4']], + labels=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], + names=[None, None]) + si = Index(['R0', 'R_l0_g0', 'R_l0_g1', 'R_l0_g2', + 'R_l0_g3', 'R_l0_g4'], name=None) + + expected = pd.DataFrame(data, index=si, columns=columns) + + actual = pd.read_excel(in_file, 'single_names') + tm.assert_frame_equal(actual, expected) + + expected.index = mi + + actual = pd.read_excel(in_file, 'multi_names') + tm.assert_frame_equal(actual, expected) + + # The analogous versions of the "names" version data + # where there are explicitly no names for the indices. data = np.array([['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'], ['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'], ['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'], @@ -894,40 +928,19 @@ def test_excel_oldindex_format(self): ['R_l1_g0', 'R_l1_g1', 'R_l1_g2', 'R_l1_g3', 'R_l1_g4']], labels=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], - names=['R0', 'R1']) + names=[None, None]) si = Index(['R_l0_g0', 'R_l0_g1', 'R_l0_g2', - 'R_l0_g3', 'R_l0_g4'], name='R0') - - in_file = os.path.join( - self.dirpath, 'test_index_name_pre17' + self.ext) + 'R_l0_g3', 'R_l0_g4'], name=None) expected = pd.DataFrame(data, index=si, columns=columns) - with tm.assert_produces_warning(FutureWarning): - actual = pd.read_excel( - in_file, 'single_names', has_index_names=True) - tm.assert_frame_equal(actual, expected) - expected.index.name = None actual = pd.read_excel(in_file, 'single_no_names') tm.assert_frame_equal(actual, expected) - with tm.assert_produces_warning(FutureWarning): - actual = pd.read_excel( - in_file, 'single_no_names', has_index_names=False) - tm.assert_frame_equal(actual, expected) expected.index = mi - with tm.assert_produces_warning(FutureWarning): - actual = pd.read_excel( - in_file, 'multi_names', has_index_names=True) - tm.assert_frame_equal(actual, expected) - expected.index.names = [None, None] actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1]) tm.assert_frame_equal(actual, expected, check_names=False) - with tm.assert_produces_warning(FutureWarning): - actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1], - has_index_names=False) - tm.assert_frame_equal(actual, expected, check_names=False) def test_read_excel_bool_header_arg(self): # GH 6114