Skip to content

MAINT: Drop has_index_names input from read_excel #16522

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 1, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2739,11 +2739,6 @@ should be passed to ``index_col`` and ``header``
import os
os.remove('path_to_file.xlsx')

.. warning::

Excel files saved in version 0.16.2 or prior that had index names will still able to be read in,
but the ``has_index_names`` argument must specified to ``True``.


Parsing Specific Columns
++++++++++++++++++++++++
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ Deprecations
Removal of prior version deprecations/changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

- ``pd.read_excel()`` has dropped the ``has_index_names`` parameter (:issue:`10967`)


.. _whatsnew_0210.performance:
Expand Down
40 changes: 13 additions & 27 deletions pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,6 @@
convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
data will be read in as floats: Excel stores all numbers as floats
internally
has_index_names : boolean, default None
DEPRECATED: for version 0.17+ index names will be automatically
inferred based on index_col. To read Excel output from 0.16.2 and
prior that had saved index names, use True.

Returns
-------
Expand Down Expand Up @@ -198,8 +194,8 @@ def get_writer(engine_name):
def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
index_col=None, names=None, parse_cols=None, parse_dates=False,
date_parser=None, na_values=None, thousands=None,
convert_float=True, has_index_names=None, converters=None,
dtype=None, true_values=None, false_values=None, engine=None,
convert_float=True, converters=None, dtype=None,
true_values=None, false_values=None, engine=None,
squeeze=False, **kwds):

# Can't use _deprecate_kwarg since sheetname=None has a special meaning
Expand All @@ -218,10 +214,9 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
sheetname=sheet_name, header=header, skiprows=skiprows, names=names,
index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates,
date_parser=date_parser, na_values=na_values, thousands=thousands,
convert_float=convert_float, has_index_names=has_index_names,
skip_footer=skip_footer, converters=converters, dtype=dtype,
true_values=true_values, false_values=false_values, squeeze=squeeze,
**kwds)
convert_float=convert_float, skip_footer=skip_footer,
converters=converters, dtype=dtype, true_values=true_values,
false_values=false_values, squeeze=squeeze, **kwds)


class ExcelFile(object):
Expand Down Expand Up @@ -283,9 +278,8 @@ def __fspath__(self):
def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0,
names=None, index_col=None, parse_cols=None, parse_dates=False,
date_parser=None, na_values=None, thousands=None,
convert_float=True, has_index_names=None,
converters=None, true_values=None, false_values=None,
squeeze=False, **kwds):
convert_float=True, converters=None, true_values=None,
false_values=None, squeeze=False, **kwds):
"""
Parse specified sheet(s) into a DataFrame

Expand All @@ -296,7 +290,6 @@ def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0,
return self._parse_excel(sheetname=sheet_name, header=header,
skiprows=skiprows, names=names,
index_col=index_col,
has_index_names=has_index_names,
parse_cols=parse_cols,
parse_dates=parse_dates,
date_parser=date_parser, na_values=na_values,
Expand Down Expand Up @@ -343,23 +336,17 @@ def _excel2num(x):
return i in parse_cols

def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None,
skip_footer=0, index_col=None, has_index_names=None,
parse_cols=None, parse_dates=False, date_parser=None,
na_values=None, thousands=None, convert_float=True,
true_values=None, false_values=None, verbose=False,
dtype=None, squeeze=False, **kwds):
skip_footer=0, index_col=None, parse_cols=None,
parse_dates=False, date_parser=None, na_values=None,
thousands=None, convert_float=True, true_values=None,
false_values=None, verbose=False, dtype=None,
squeeze=False, **kwds):

skipfooter = kwds.pop('skipfooter', None)
if skipfooter is not None:
skip_footer = skipfooter

_validate_header_arg(header)
if has_index_names is not None:
warn("\nThe has_index_names argument is deprecated; index names "
"will be automatically inferred based on index_col.\n"
"This argmument is still necessary if reading Excel output "
"from 0.16.2 or prior with index names.", FutureWarning,
stacklevel=3)

if 'chunksize' in kwds:
raise NotImplementedError("chunksize keyword of read_excel "
Expand Down Expand Up @@ -511,8 +498,7 @@ def _parse_cell(cell_contents, cell_typ):
else:
last = data[row][col]

if is_list_like(header) and len(header) > 1:
has_index_names = True
has_index_names = is_list_like(header) and len(header) > 1

# GH 12292 : error when read one empty column from excel file
try:
Expand Down
63 changes: 38 additions & 25 deletions pandas/tests/io/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,8 +881,42 @@ def test_excel_multindex_roundtrip(self):
tm.assert_frame_equal(
df, act, check_names=check_names)

def test_excel_oldindex_format(self):
# GH 4679
def test_excel_old_index_format(self):
# see gh-4679
filename = 'test_index_name_pre17' + self.ext
in_file = os.path.join(self.dirpath, filename)

# We detect headers to determine if index names exist, so
# that "index" name in the "names" version of the data will
# now be interpreted as rows that include null data.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did this test change?

Copy link
Member Author

@gfyoung gfyoung May 30, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, because has_index_names got dropped (test fails in original form). That's because when we passed in has_index_names, the value of the parameter would only change to True if they were detected.

data = np.array([[None, None, None, None, None],
['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'],
['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'],
['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'],
['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'],
['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']])
columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4']
mi = MultiIndex(levels=[['R0', 'R_l0_g0', 'R_l0_g1',
'R_l0_g2', 'R_l0_g3', 'R_l0_g4'],
['R1', 'R_l1_g0', 'R_l1_g1',
'R_l1_g2', 'R_l1_g3', 'R_l1_g4']],
labels=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]],
names=[None, None])
si = Index(['R0', 'R_l0_g0', 'R_l0_g1', 'R_l0_g2',
'R_l0_g3', 'R_l0_g4'], name=None)

expected = pd.DataFrame(data, index=si, columns=columns)

actual = pd.read_excel(in_file, 'single_names')
tm.assert_frame_equal(actual, expected)

expected.index = mi

actual = pd.read_excel(in_file, 'multi_names')
tm.assert_frame_equal(actual, expected)

# The analogous versions of the "names" version data
# where there are explicitly no names for the indices.
data = np.array([['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'],
['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'],
['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'],
Expand All @@ -894,40 +928,19 @@ def test_excel_oldindex_format(self):
['R_l1_g0', 'R_l1_g1', 'R_l1_g2',
'R_l1_g3', 'R_l1_g4']],
labels=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]],
names=['R0', 'R1'])
names=[None, None])
si = Index(['R_l0_g0', 'R_l0_g1', 'R_l0_g2',
'R_l0_g3', 'R_l0_g4'], name='R0')

in_file = os.path.join(
self.dirpath, 'test_index_name_pre17' + self.ext)
'R_l0_g3', 'R_l0_g4'], name=None)

expected = pd.DataFrame(data, index=si, columns=columns)
with tm.assert_produces_warning(FutureWarning):
actual = pd.read_excel(
in_file, 'single_names', has_index_names=True)
tm.assert_frame_equal(actual, expected)

expected.index.name = None
actual = pd.read_excel(in_file, 'single_no_names')
tm.assert_frame_equal(actual, expected)
with tm.assert_produces_warning(FutureWarning):
actual = pd.read_excel(
in_file, 'single_no_names', has_index_names=False)
tm.assert_frame_equal(actual, expected)

expected.index = mi
with tm.assert_produces_warning(FutureWarning):
actual = pd.read_excel(
in_file, 'multi_names', has_index_names=True)
tm.assert_frame_equal(actual, expected)

expected.index.names = [None, None]
actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1])
tm.assert_frame_equal(actual, expected, check_names=False)
with tm.assert_produces_warning(FutureWarning):
actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1],
has_index_names=False)
tm.assert_frame_equal(actual, expected, check_names=False)

def test_read_excel_bool_header_arg(self):
# GH 6114
Expand Down