diff --git a/RELEASE.rst b/RELEASE.rst index b5dd3eef68dea..12d2389a8a59b 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -120,6 +120,8 @@ pandas 0.11.1 - Implement ``__nonzero__`` for ``NDFrame`` objects (GH3691_, GH3696_) - ``as_matrix`` with mixed signed and unsigned dtypes will result in 2 x the lcd of the unsigned as an int, maxing with ``int64``, to avoid precision issues (GH3733_) + - ``na_values`` in a list provided to ``read_csv/read_excel`` will match string and numeric versions + e.g. ``na_values=['99']`` will match 99 whether the column ends up being int, float, or string (GH3611_) **Bug Fixes** @@ -174,7 +176,7 @@ pandas 0.11.1 - Fix modulo and integer division on Series,DataFrames to act similary to ``float`` dtypes to return ``np.nan`` or ``np.inf`` as appropriate (GH3590_) - Fix incorrect dtype on groupby with ``as_index=False`` (GH3610_) - - Fix ``read_csv`` to correctly encode identical na_values, e.g. ``na_values=[-999.0,-999]`` + - Fix ``read_csv/read_excel`` to correctly encode identical na_values, e.g. ``na_values=[-999.0,-999]`` was failing (GH3611_) - Disable HTML output in qtconsole again. (GH3657_) - Reworked the new repr display logic, which users found confusing. (GH3663_) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index c23056ce76a62..5b7d13acd99ec 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -13,10 +13,7 @@ from pandas.tseries.period import Period import json -def read_excel(path_or_buf, sheetname, header=0, skiprows=None, skip_footer=0, - index_col=None, parse_cols=None, parse_dates=False, - date_parser=None, na_values=None, thousands=None, chunksize=None, - kind=None, **kwds): +def read_excel(path_or_buf, sheetname, kind=None, **kwds): """Read an Excel table into a pandas DataFrame Parameters @@ -47,16 +44,7 @@ def read_excel(path_or_buf, sheetname, header=0, skiprows=None, skip_footer=0, DataFrame from the passed in Excel file """ return ExcelFile(path_or_buf,kind=kind).parse(sheetname=sheetname, - header=0, skiprows=None, - skip_footer=0, - index_col=None, - parse_cols=None, - parse_dates=False, - date_parser=None, - na_values=None, - thousands=None, - chunksize=None, kind=None, - **kwds) + kind=kind, **kwds) class ExcelFile(object): """ diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 60028d3f3f831..556d1ab1976b4 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1853,7 +1853,20 @@ def _clean_na_values(na_values, keep_default_na=True): return na_values def _stringify_na_values(na_values): - return [ str(x) for x in na_values ] + """ return a stringified and numeric for these values """ + result = [] + for x in na_values: + result.append(str(x)) + result.append(x) + try: + result.append(float(x)) + except: + pass + try: + result.append(int(x)) + except: + pass + return result def _clean_index_names(columns, index_col): if not _is_index_col(index_col): diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 28242cda0b46b..39e1042d125a2 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -285,6 +285,15 @@ def _check_extension(self, ext): recons = read_excel(path, 'test1', index_col=0, na_values=['NA']) tm.assert_frame_equal(self.frame, recons) + # GH 3611 + self.frame.to_excel(path, 'test1', na_rep='88') + recons = read_excel(path, 'test1', index_col=0, na_values=['88']) + tm.assert_frame_equal(self.frame, recons) + + self.frame.to_excel(path, 'test1', na_rep='88') + recons = read_excel(path, 'test1', index_col=0, na_values=[88,88.0]) + tm.assert_frame_equal(self.frame, recons) + def test_excel_roundtrip_xls_mixed(self): _skip_if_no_xlrd() _skip_if_no_xlwt() diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 7d13aa8ce6765..5343819b9fbfe 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -373,12 +373,12 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, for i from 0 <= i < n: val = values[i] - if util.is_float_object(val): - floats[i] = complexes[i] = val - seen_float = 1 - elif val in na_values: + if val in na_values: floats[i] = complexes[i] = nan seen_float = 1 + elif util.is_float_object(val): + floats[i] = complexes[i] = val + seen_float = 1 elif val is None: floats[i] = complexes[i] = nan seen_float = 1