Skip to content

Commit 21b154c

Browse files
JanLauGehexgnu
authored andcommitted
DOC: read_excel doc - fixed formatting and added examples (pandas-dev#18753)
1 parent d9da78a commit 21b154c

File tree

3 files changed

+146
-1
lines changed

3 files changed

+146
-1
lines changed

doc/source/whatsnew/v0.23.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,8 @@ Other API Changes
205205
- Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`)
206206
- :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`)
207207
- :func:`wide_to_long` previously kept numeric-like suffixes as ``object`` dtype. Now they are cast to numeric if possible (:issue:`17627`)
208+
- In :func:`read_excel`, the ``comment`` argument is now exposed as a named parameter (:issue:`18735`)
209+
- Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`)
208210

209211
.. _whatsnew_0230.deprecations:
210212

pandas/io/excel.py

+82-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@
137137
na_values : scalar, str, list-like, or dict, default None
138138
Additional strings to recognize as NA/NaN. If dict passed, specific
139139
per-column NA values. By default the following values are interpreted
140-
as NaN: '""" + fill("', '".join(sorted(_NA_VALUES)), 70) + """'.
140+
as NaN: '""" + fill("', '".join(sorted(_NA_VALUES)), 70, subsequent_indent=" ") + """'.
141141
keep_default_na : bool, default True
142142
If na_values are specified and keep_default_na is False the default NaN
143143
values are overridden, otherwise they're appended to.
@@ -148,6 +148,10 @@
148148
this parameter is only necessary for columns stored as TEXT in Excel,
149149
any numeric columns will automatically be parsed, regardless of display
150150
format.
151+
comment : str, default None
152+
Comments out remainder of line. Pass a character or characters to this
153+
argument to indicate comments in the input file. Any data between the
154+
comment string and the end of the current line is ignored.
151155
skip_footer : int, default 0
152156
153157
.. deprecated:: 0.23.0
@@ -164,6 +168,77 @@
164168
parsed : DataFrame or Dict of DataFrames
165169
DataFrame from the passed in Excel file. See notes in sheet_name
166170
argument for more information on when a Dict of Dataframes is returned.
171+
172+
Examples
173+
--------
174+
175+
An example DataFrame written to a local file
176+
177+
>>> df_out = pd.DataFrame([('string1', 1),
178+
... ('string2', 2),
179+
... ('string3', 3)],
180+
... columns=['Name', 'Value'])
181+
>>> df_out
182+
Name Value
183+
0 string1 1
184+
1 string2 2
185+
2 string3 3
186+
>>> df_out.to_excel('tmp.xlsx')
187+
188+
The file can be read using the file name as string or an open file object:
189+
190+
>>> pd.read_excel('tmp.xlsx')
191+
Name Value
192+
0 string1 1
193+
1 string2 2
194+
2 string3 3
195+
196+
>>> pd.read_excel(open('tmp.xlsx','rb'))
197+
Name Value
198+
0 string1 1
199+
1 string2 2
200+
2 string3 3
201+
202+
Index and header can be specified via the `index_col` and `header` arguments
203+
204+
>>> pd.read_excel('tmp.xlsx', index_col=None, header=None)
205+
0 1 2
206+
0 NaN Name Value
207+
1 0.0 string1 1
208+
2 1.0 string2 2
209+
3 2.0 string3 3
210+
211+
Column types are inferred but can be explicitly specified
212+
213+
>>> pd.read_excel('tmp.xlsx', dtype={'Name':str, 'Value':float})
214+
Name Value
215+
0 string1 1.0
216+
1 string2 2.0
217+
2 string3 3.0
218+
219+
True, False, and NA values, and thousands separators have defaults,
220+
but can be explicitly specified, too. Supply the values you would like
221+
as strings or lists of strings!
222+
223+
>>> pd.read_excel('tmp.xlsx',
224+
... na_values=['string1', 'string2'])
225+
Name Value
226+
0 NaN 1
227+
1 NaN 2
228+
2 string3 3
229+
230+
Comment lines in the excel input file can be skipped using the `comment` kwarg
231+
232+
>>> df = pd.DataFrame({'a': ['1', '#2'], 'b': ['2', '3']})
233+
>>> df.to_excel('tmp.xlsx', index=False)
234+
>>> pd.read_excel('tmp.xlsx')
235+
a b
236+
0 1 2
237+
1 #2 3
238+
239+
>>> pd.read_excel('tmp.xlsx', comment='#')
240+
a b
241+
0 1 2
167242
"""
168243

169244

@@ -223,6 +298,7 @@ def read_excel(io,
223298
parse_dates=False,
224299
date_parser=None,
225300
thousands=None,
301+
comment=None,
226302
skipfooter=0,
227303
convert_float=True,
228304
**kwds):
@@ -256,6 +332,7 @@ def read_excel(io,
256332
parse_dates=parse_dates,
257333
date_parser=date_parser,
258334
thousands=thousands,
335+
comment=comment,
259336
skipfooter=skipfooter,
260337
convert_float=convert_float,
261338
**kwds)
@@ -338,6 +415,7 @@ def parse(self,
338415
parse_dates=False,
339416
date_parser=None,
340417
thousands=None,
418+
comment=None,
341419
skipfooter=0,
342420
convert_float=True,
343421
**kwds):
@@ -363,6 +441,7 @@ def parse(self,
363441
parse_dates=parse_dates,
364442
date_parser=date_parser,
365443
thousands=thousands,
444+
comment=comment,
366445
skipfooter=skipfooter,
367446
convert_float=convert_float,
368447
**kwds)
@@ -417,6 +496,7 @@ def _parse_excel(self,
417496
parse_dates=False,
418497
date_parser=None,
419498
thousands=None,
499+
comment=None,
420500
skipfooter=0,
421501
convert_float=True,
422502
**kwds):
@@ -591,6 +671,7 @@ def _parse_cell(cell_contents, cell_typ):
591671
parse_dates=parse_dates,
592672
date_parser=date_parser,
593673
thousands=thousands,
674+
comment=comment,
594675
skipfooter=skipfooter,
595676
**kwds)
596677

pandas/tests/io/test_excel.py

+62
Original file line numberDiff line numberDiff line change
@@ -1858,6 +1858,68 @@ def test_invalid_columns(self):
18581858
with pytest.raises(KeyError):
18591859
write_frame.to_excel(path, 'test1', columns=['C', 'D'])
18601860

1861+
def test_comment_arg(self):
1862+
# Re issue #18735
1863+
# Test the comment argument functionality to read_excel
1864+
with ensure_clean(self.ext) as path:
1865+
1866+
# Create file to read in
1867+
df = DataFrame({'A': ['one', '#one', 'one'],
1868+
'B': ['two', 'two', '#two']})
1869+
df.to_excel(path, 'test_c')
1870+
1871+
# Read file without comment arg
1872+
result1 = read_excel(path, 'test_c')
1873+
result1.iloc[1, 0] = None
1874+
result1.iloc[1, 1] = None
1875+
result1.iloc[2, 1] = None
1876+
result2 = read_excel(path, 'test_c', comment='#')
1877+
tm.assert_frame_equal(result1, result2)
1878+
1879+
def test_comment_default(self):
1880+
# Re issue #18735
1881+
# Test the comment argument default to read_excel
1882+
with ensure_clean(self.ext) as path:
1883+
1884+
# Create file to read in
1885+
df = DataFrame({'A': ['one', '#one', 'one'],
1886+
'B': ['two', 'two', '#two']})
1887+
df.to_excel(path, 'test_c')
1888+
1889+
# Read file with default and explicit comment=None
1890+
result1 = read_excel(path, 'test_c')
1891+
result2 = read_excel(path, 'test_c', comment=None)
1892+
tm.assert_frame_equal(result1, result2)
1893+
1894+
def test_comment_used(self):
1895+
# Re issue #18735
1896+
# Test the comment argument is working as expected when used
1897+
with ensure_clean(self.ext) as path:
1898+
1899+
# Create file to read in
1900+
df = DataFrame({'A': ['one', '#one', 'one'],
1901+
'B': ['two', 'two', '#two']})
1902+
df.to_excel(path, 'test_c')
1903+
1904+
# Test read_frame_comment against manually produced expected output
1905+
expected = DataFrame({'A': ['one', None, 'one'],
1906+
'B': ['two', None, None]})
1907+
result = read_excel(path, 'test_c', comment='#')
1908+
tm.assert_frame_equal(result, expected)
1909+
1910+
def test_comment_emptyline(self):
1911+
# Re issue #18735
1912+
# Test that read_excel ignores commented lines at the end of file
1913+
with ensure_clean(self.ext) as path:
1914+
1915+
df = DataFrame({'a': ['1', '#2'], 'b': ['2', '3']})
1916+
df.to_excel(path, index=False)
1917+
1918+
# Test that all-comment lines at EoF are ignored
1919+
expected = DataFrame({'a': [1], 'b': [2]})
1920+
result = read_excel(path, comment='#')
1921+
tm.assert_frame_equal(result, expected)
1922+
18611923
def test_datetimes(self):
18621924

18631925
# Test writing and reading datetimes. For issue #9139. (xref #9185)

0 commit comments

Comments
 (0)