Skip to content

Commit cebc70c

Browse files
WillAydjorisvandenbossche
authored andcommitted
ENH: Allow true_values and false_values options in read_excel (#14002)
closes #13347 Author: willayd <[email protected]> Squashes the following commits: 242a6a6 ENH: Allow true_values and fals_values options in read_excel
1 parent 5e665b3 commit cebc70c

File tree

3 files changed

+34
-4
lines changed

3 files changed

+34
-4
lines changed

doc/source/whatsnew/v0.19.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,7 @@ Other enhancements
514514
- ``Timestamp``, ``Period``, ``DatetimeIndex``, ``PeriodIndex`` and ``.dt`` accessor have gained a ``.is_leap_year`` property to check whether the date belongs to a leap year. (:issue:`13727`)
515515
- ``astype()`` will now accept a dict of column name to data types mapping as the ``dtype`` argument. (:issue:`12086`)
516516
- The ``pd.read_json`` and ``DataFrame.to_json`` has gained support for reading and writing json lines with ``lines`` option see :ref:`Line delimited json <io.jsonl>` (:issue:`9180`)
517+
- :func:``read_excel`` now supports the true_values and false_values keyword arguments (:issue:`13347`)
517518

518519
.. _whatsnew_0190.api:
519520

pandas/io/excel.py

+22-4
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@
8686
either be integers or column labels, values are functions that take one
8787
input argument, the Excel cell content, and return the transformed
8888
content.
89+
true_values : list, default None
90+
Values to consider as True
91+
92+
.. versionadded:: 0.19.0
93+
94+
false_values : list, default None
95+
Values to consider as False
96+
97+
.. versionadded:: 0.19.0
98+
8999
parse_cols : int or list, default None
90100
* If None then parse all columns,
91101
* If int then indicates last column to be parsed
@@ -173,7 +183,8 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0,
173183
index_col=None, names=None, parse_cols=None, parse_dates=False,
174184
date_parser=None, na_values=None, thousands=None,
175185
convert_float=True, has_index_names=None, converters=None,
176-
engine=None, squeeze=False, **kwds):
186+
true_values=None, false_values=None, engine=None, squeeze=False,
187+
**kwds):
177188

178189
if not isinstance(io, ExcelFile):
179190
io = ExcelFile(io, engine=engine)
@@ -184,7 +195,8 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0,
184195
date_parser=date_parser, na_values=na_values, thousands=thousands,
185196
convert_float=convert_float, has_index_names=has_index_names,
186197
skip_footer=skip_footer, converters=converters,
187-
squeeze=squeeze, **kwds)
198+
true_values=true_values, false_values=false_values, squeeze=squeeze,
199+
**kwds)
188200

189201

190202
class ExcelFile(object):
@@ -242,7 +254,8 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
242254
names=None, index_col=None, parse_cols=None, parse_dates=False,
243255
date_parser=None, na_values=None, thousands=None,
244256
convert_float=True, has_index_names=None,
245-
converters=None, squeeze=False, **kwds):
257+
converters=None, true_values=None, false_values=None,
258+
squeeze=False, **kwds):
246259
"""
247260
Parse specified sheet(s) into a DataFrame
248261
@@ -261,6 +274,8 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
261274
skip_footer=skip_footer,
262275
convert_float=convert_float,
263276
converters=converters,
277+
true_values=true_values,
278+
false_values=false_values,
264279
squeeze=squeeze,
265280
**kwds)
266281

@@ -301,7 +316,8 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None,
301316
skip_footer=0, index_col=None, has_index_names=None,
302317
parse_cols=None, parse_dates=False, date_parser=None,
303318
na_values=None, thousands=None, convert_float=True,
304-
verbose=False, squeeze=False, **kwds):
319+
true_values=None, false_values=None, verbose=False,
320+
squeeze=False, **kwds):
305321

306322
skipfooter = kwds.pop('skipfooter', None)
307323
if skipfooter is not None:
@@ -479,6 +495,8 @@ def _parse_cell(cell_contents, cell_typ):
479495
thousands=thousands,
480496
parse_dates=parse_dates,
481497
date_parser=date_parser,
498+
true_values=true_values,
499+
false_values=false_values,
482500
skiprows=skiprows,
483501
skipfooter=skip_footer,
484502
squeeze=squeeze,

pandas/io/tests/test_excel.py

+11
Original file line numberDiff line numberDiff line change
@@ -1781,6 +1781,17 @@ def test_write_lists_dict(self):
17811781
read = read_excel(path, 'Sheet1', header=0)
17821782
tm.assert_frame_equal(read, expected)
17831783

1784+
# GH13347
1785+
def test_true_and_false_value_options(self):
1786+
df = pd.DataFrame([['foo', 'bar']], columns=['col1', 'col2'])
1787+
expected = df.replace({'foo': True,
1788+
'bar': False})
1789+
with ensure_clean(self.ext) as path:
1790+
df.to_excel(path)
1791+
read_frame = read_excel(path, true_values=['foo'],
1792+
false_values=['bar'])
1793+
tm.assert_frame_equal(read_frame, expected)
1794+
17841795

17851796
def raise_wrapper(major_ver):
17861797
def versioned_raise_wrapper(orig_method):

0 commit comments

Comments
 (0)