Skip to content

Commit 53a61db

Browse files
committed
read_excel - added comment as named argument comment and test_comment_* tests
1 parent 1157d72 commit 53a61db

File tree

2 files changed

+64
-0
lines changed

2 files changed

+64
-0
lines changed

pandas/io/excel.py

+8
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,9 @@
158158
convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
159159
data will be read in as floats: Excel stores all numbers as floats
160160
internally
161+
comment : str, default None
162+
Comment out remainder of line. Character or characters to indicate comments
163+
in the input file. Any data between comment and line end is ignored.
161164
162165
Returns
163166
-------
@@ -299,6 +302,7 @@ def read_excel(io,
299302
thousands=None,
300303
skipfooter=0,
301304
convert_float=True,
305+
comment=None,
302306
**kwds):
303307

304308
# Can't use _deprecate_kwarg since sheetname=None has a special meaning
@@ -332,6 +336,7 @@ def read_excel(io,
332336
thousands=thousands,
333337
skipfooter=skipfooter,
334338
convert_float=convert_float,
339+
comment=comment,
335340
**kwds)
336341

337342

@@ -414,6 +419,7 @@ def parse(self,
414419
thousands=None,
415420
skipfooter=0,
416421
convert_float=True,
422+
comment=None,
417423
**kwds):
418424
"""
419425
Parse specified sheet(s) into a DataFrame
@@ -439,6 +445,7 @@ def parse(self,
439445
thousands=thousands,
440446
skipfooter=skipfooter,
441447
convert_float=convert_float,
448+
comment=comment,
442449
**kwds)
443450

444451
def _should_parse(self, i, usecols):
@@ -493,6 +500,7 @@ def _parse_excel(self,
493500
thousands=None,
494501
skipfooter=0,
495502
convert_float=True,
503+
comment=None,
496504
**kwds):
497505

498506
_validate_header_arg(header)

pandas/tests/io/test_excel.py

+56
Original file line numberDiff line numberDiff line change
@@ -1858,6 +1858,62 @@ def test_invalid_columns(self):
18581858
with pytest.raises(KeyError):
18591859
write_frame.to_excel(path, 'test1', columns=['C', 'D'])
18601860

1861+
def test_comment_arg(self):
1862+
# Test the comment argument functionality to read_excel
1863+
with ensure_clean(self.ext) as path:
1864+
1865+
# Create file to read in
1866+
write_frame = DataFrame({'A': ['one', '#one', 'one'],
1867+
'B': ['two', 'two', '#two']})
1868+
write_frame.to_excel(path, 'test_c')
1869+
1870+
# Read file without comment arg
1871+
read_frame = read_excel(path, 'test_c')
1872+
read_frame_commented = read_excel(path, 'test_c', comment='#')
1873+
tm.assert_class_equal(read_frame, read_frame_commented)
1874+
1875+
def test_comment_default(self):
1876+
# Test the comment argument default to read_excel
1877+
with ensure_clean(self.ext) as path:
1878+
1879+
# Create file to read in
1880+
write_frame = DataFrame({'A': ['one', '#one', 'one'],
1881+
'B': ['two', 'two', '#two']})
1882+
write_frame.to_excel(path, 'test_c')
1883+
1884+
# Read file with default and explicit comment=None
1885+
read_frame = read_excel(path, 'test_c')
1886+
read_frame_uncommented = read_excel(path, 'test_c', comment=None)
1887+
tm.assert_frame_equal(read_frame, read_frame_uncommented)
1888+
1889+
def test_comment_used(self):
1890+
# Test the comment argument is working as expected when used
1891+
with ensure_clean(self.ext) as path:
1892+
1893+
# Create file to read in
1894+
write_frame = DataFrame({'A': ['one', '#one', 'one'],
1895+
'B': ['two', 'two', '#two']})
1896+
write_frame.to_excel(path, 'test_c')
1897+
1898+
# Test read_frame_comment against manually produced expected output
1899+
read_frame_commented = read_excel(path, 'test_c', comment='#')
1900+
expected = read_excel(path, 'test_c')
1901+
expected.iloc[1, 0] = None
1902+
expected.iloc[1, 1] = None
1903+
expected.iloc[2, 1] = None
1904+
tm.assert_frame_equal(read_frame_commented, expected)
1905+
1906+
def test_comment_emptyline(self):
1907+
# Test that read_excel ignores commented lines at the end of file
1908+
with ensure_clean(self.ext) as path:
1909+
1910+
write_frame = DataFrame({'a': ['1', '#2'], 'b': ['2', '3']})
1911+
write_frame.to_excel(path, index=False)
1912+
1913+
# Test that all-comment lines at EoF are ignored
1914+
read_frame_short = read_excel(path, comment='#')
1915+
assert (read_frame_short.shape == write_frame.iloc[0:1, :].shape)
1916+
18611917
def test_datetimes(self):
18621918

18631919
# Test writing and reading datetimes. For issue #9139. (xref #9185)

0 commit comments

Comments
 (0)