Skip to content

Commit 38ee8c7

Browse files
committed
Merge pull request #11182 from chris-b1/header-bool-readers
API: raise on header=bool in parsers
2 parents 5039d22 + 5697abb commit 38ee8c7

File tree

8 files changed

+59
-4
lines changed

8 files changed

+59
-4
lines changed

doc/source/whatsnew/v0.17.0.txt

+17
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,23 @@ Changes to ``Categorical.unique``
907907
cat
908908
cat.unique()
909909

910+
Changes to ``bool`` passed as ``header`` in Parsers
911+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
912+
913+
In earlier versions of pandas, if a bool was passed the ``header`` argument of
914+
``read_csv``, ``read_excel``, or ``read_html`` it was implicitly converted to
915+
an integer, resulting in ``header=0`` for ``False`` and ``header=1`` for ``True``
916+
(:issue:`6113`)
917+
918+
A ``bool`` input to ``header`` will now raise a ``TypeError``
919+
920+
.. code-block :: python
921+
922+
In [29]: df = pd.read_csv('data.csv', header=False)
923+
TypeError: Passing a bool to header is invalid. Use header=None for no header or
924+
header=int or list-like of ints to specify the row(s) making up the column names
925+
926+
910927
.. _whatsnew_0170.api_breaking.other:
911928

912929
Other API Changes

pandas/io/common.py

+6
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,12 @@ def _expand_user(filepath_or_buffer):
194194
return os.path.expanduser(filepath_or_buffer)
195195
return filepath_or_buffer
196196

197+
def _validate_header_arg(header):
198+
if isinstance(header, bool):
199+
raise TypeError("Passing a bool to header is invalid. "
200+
"Use header=None for no header or "
201+
"header=int or list-like of ints to specify "
202+
"the row(s) making up the column names")
197203

198204
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
199205
compression=None):

pandas/io/excel.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from pandas.core.frame import DataFrame
1313
from pandas.io.parsers import TextParser
14-
from pandas.io.common import _is_url, _urlopen
14+
from pandas.io.common import _is_url, _urlopen, _validate_header_arg
1515
from pandas.tseries.period import Period
1616
from pandas import json
1717
from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass,
@@ -217,6 +217,7 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
217217
if skipfooter is not None:
218218
skip_footer = skipfooter
219219

220+
_validate_header_arg(header)
220221
if has_index_names is not None:
221222
warn("\nThe has_index_names argument is deprecated; index names "
222223
"will be automatically inferred based on index_col.\n"

pandas/io/html.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
import numpy as np
1515

16-
from pandas.io.common import _is_url, urlopen, parse_url
16+
from pandas.io.common import _is_url, urlopen, parse_url, _validate_header_arg
1717
from pandas.io.parsers import TextParser
1818
from pandas.compat import (lrange, lmap, u, string_types, iteritems,
1919
raise_with_traceback, binary_type)
@@ -861,5 +861,6 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
861861
if isinstance(skiprows, numbers.Integral) and skiprows < 0:
862862
raise ValueError('cannot skip rows starting from the end of the '
863863
'data (you passed a negative value)')
864+
_validate_header_arg(header)
864865
return _parse(flavor, io, match, header, index_col, skiprows,
865866
parse_dates, tupleize_cols, thousands, attrs, encoding)

pandas/io/parsers.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from pandas.core.common import AbstractMethodError
1818
from pandas.core.config import get_option
1919
from pandas.io.date_converters import generic_parser
20-
from pandas.io.common import get_filepath_or_buffer
20+
from pandas.io.common import get_filepath_or_buffer, _validate_header_arg
2121
from pandas.tseries import tools
2222

2323
from pandas.util.decorators import Appender
@@ -673,6 +673,8 @@ def _clean_options(self, options, engine):
673673
# really delete this one
674674
keep_default_na = result.pop('keep_default_na')
675675

676+
_validate_header_arg(options['header'])
677+
676678
if index_col is True:
677679
raise ValueError("The value of index_col couldn't be 'True'")
678680
if _is_index_col(index_col):

pandas/io/tests/test_excel.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,8 @@ def test_read_excel_blank_with_header(self):
384384
tm.assert_frame_equal(actual, expected)
385385

386386

387+
388+
387389
class XlrdTests(ReadingTestsBase):
388390
"""
389391
This is the base class for the xlrd tests, and 3 different file formats
@@ -641,7 +643,12 @@ def test_excel_oldindex_format(self):
641643
has_index_names=False)
642644
tm.assert_frame_equal(actual, expected, check_names=False)
643645

644-
646+
def test_read_excel_bool_header_arg(self):
647+
#GH 6114
648+
for arg in [True, False]:
649+
with tm.assertRaises(TypeError):
650+
pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext),
651+
header=arg)
645652

646653
class XlsReaderTests(XlrdTests, tm.TestCase):
647654
ext = '.xls'

pandas/io/tests/test_html.py

+5
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,11 @@ def test_wikipedia_states_table(self):
637637
result = self.read_html(data, 'Arizona', header=1)[0]
638638
nose.tools.assert_equal(result['sq mi'].dtype, np.dtype('float64'))
639639

640+
def test_bool_header_arg(self):
641+
#GH 6114
642+
for arg in [True, False]:
643+
with tm.assertRaises(TypeError):
644+
read_html(self.spam_data, header=arg)
640645

641646
def _lang_enc(filename):
642647
return os.path.splitext(os.path.basename(filename))[0].split('_')

pandas/io/tests/test_parsers.py

+16
Original file line numberDiff line numberDiff line change
@@ -4117,6 +4117,22 @@ def test_single_char_leading_whitespace(self):
41174117
skipinitialspace=True)
41184118
tm.assert_frame_equal(result, expected)
41194119

4120+
def test_bool_header_arg(self):
4121+
# GH 6114
4122+
data = """\
4123+
MyColumn
4124+
a
4125+
b
4126+
a
4127+
b"""
4128+
for arg in [True, False]:
4129+
with tm.assertRaises(TypeError):
4130+
pd.read_csv(StringIO(data), header=arg)
4131+
with tm.assertRaises(TypeError):
4132+
pd.read_table(StringIO(data), header=arg)
4133+
with tm.assertRaises(TypeError):
4134+
pd.read_fwf(StringIO(data), header=arg)
4135+
41204136
class TestMiscellaneous(tm.TestCase):
41214137

41224138
# for tests that don't fit into any of the other classes, e.g. those that

0 commit comments

Comments
 (0)