From 15ec67f1b2841dc51ba712a64f5c8c6b7f4b0f6d Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sun, 14 Aug 2016 20:38:54 -0400 Subject: [PATCH] ENH: Allow true_values and false_values options in read_excel closes #13347 Author: willayd Squashes the following commits: 242a6a6 ENH: Allow true_values and fals_values options in read_excel --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/io/excel.py | 26 ++++++++++++++++++++++---- pandas/io/tests/test_excel.py | 11 +++++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index f3a6736ff9920..ed2278c3116f0 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -513,6 +513,7 @@ Other enhancements - ``Timestamp``, ``Period``, ``DatetimeIndex``, ``PeriodIndex`` and ``.dt`` accessor have gained a ``.is_leap_year`` property to check whether the date belongs to a leap year. (:issue:`13727`) - ``astype()`` will now accept a dict of column name to data types mapping as the ``dtype`` argument. (:issue:`12086`) - The ``pd.read_json`` and ``DataFrame.to_json`` has gained support for reading and writing json lines with ``lines`` option see :ref:`Line delimited json ` (:issue:`9180`) +- :func:``read_excel`` now supports the true_values and false_values keyword arguments (:issue:`13347`) .. _whatsnew_0190.api: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 5e4dd4379a8e3..11aeb4962a610 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -86,6 +86,16 @@ either be integers or column labels, values are functions that take one input argument, the Excel cell content, and return the transformed content. +true_values : list, default None + Values to consider as True + + .. versionadded:: 0.19.0 + +false_values : list, default None + Values to consider as False + + .. versionadded:: 0.19.0 + parse_cols : int or list, default None * If None then parse all columns, * If int then indicates last column to be parsed @@ -173,7 +183,8 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0, index_col=None, names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, has_index_names=None, converters=None, - engine=None, squeeze=False, **kwds): + true_values=None, false_values=None, engine=None, squeeze=False, + **kwds): if not isinstance(io, ExcelFile): io = ExcelFile(io, engine=engine) @@ -184,7 +195,8 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0, date_parser=date_parser, na_values=na_values, thousands=thousands, convert_float=convert_float, has_index_names=has_index_names, skip_footer=skip_footer, converters=converters, - squeeze=squeeze, **kwds) + true_values=true_values, false_values=false_values, squeeze=squeeze, + **kwds) class ExcelFile(object): @@ -242,7 +254,8 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0, names=None, index_col=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, has_index_names=None, - converters=None, squeeze=False, **kwds): + converters=None, true_values=None, false_values=None, + squeeze=False, **kwds): """ Parse specified sheet(s) into a DataFrame @@ -261,6 +274,8 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0, skip_footer=skip_footer, convert_float=convert_float, converters=converters, + true_values=true_values, + false_values=false_values, squeeze=squeeze, **kwds) @@ -301,7 +316,8 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None, skip_footer=0, index_col=None, has_index_names=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, convert_float=True, - verbose=False, squeeze=False, **kwds): + true_values=None, false_values=None, verbose=False, + squeeze=False, **kwds): skipfooter = kwds.pop('skipfooter', None) if skipfooter is not None: @@ -479,6 +495,8 @@ def _parse_cell(cell_contents, cell_typ): thousands=thousands, parse_dates=parse_dates, date_parser=date_parser, + true_values=true_values, + false_values=false_values, skiprows=skiprows, skipfooter=skip_footer, squeeze=squeeze, diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 449c27482e0a5..d163b05aa01d4 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -1781,6 +1781,17 @@ def test_write_lists_dict(self): read = read_excel(path, 'Sheet1', header=0) tm.assert_frame_equal(read, expected) + # GH13347 + def test_true_and_false_value_options(self): + df = pd.DataFrame([['foo', 'bar']], columns=['col1', 'col2']) + expected = df.replace({'foo': True, + 'bar': False}) + with ensure_clean(self.ext) as path: + df.to_excel(path) + read_frame = read_excel(path, true_values=['foo'], + false_values=['bar']) + tm.assert_frame_equal(read_frame, expected) + def raise_wrapper(major_ver): def versioned_raise_wrapper(orig_method):