Skip to content

Commit eef04cc

Browse files
committed
Merge pull request #3758 from jreback/parse_na
BUG: (GH3611) revisited; read_excel not passing thru options to ExcelFile.parse
2 parents cf47a42 + 3416180 commit eef04cc

File tree

5 files changed

+32
-20
lines changed

5 files changed

+32
-20
lines changed

RELEASE.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ pandas 0.11.1
120120
- Implement ``__nonzero__`` for ``NDFrame`` objects (GH3691_, GH3696_)
121121
- ``as_matrix`` with mixed signed and unsigned dtypes will result in 2 x the lcd of the unsigned
122122
as an int, maxing with ``int64``, to avoid precision issues (GH3733_)
123+
- ``na_values`` in a list provided to ``read_csv/read_excel`` will match string and numeric versions
124+
e.g. ``na_values=['99']`` will match 99 whether the column ends up being int, float, or string (GH3611_)
123125

124126
**Bug Fixes**
125127

@@ -174,7 +176,7 @@ pandas 0.11.1
174176
- Fix modulo and integer division on Series,DataFrames to act similary to ``float`` dtypes to return
175177
``np.nan`` or ``np.inf`` as appropriate (GH3590_)
176178
- Fix incorrect dtype on groupby with ``as_index=False`` (GH3610_)
177-
- Fix ``read_csv`` to correctly encode identical na_values, e.g. ``na_values=[-999.0,-999]``
179+
- Fix ``read_csv/read_excel`` to correctly encode identical na_values, e.g. ``na_values=[-999.0,-999]``
178180
was failing (GH3611_)
179181
- Disable HTML output in qtconsole again. (GH3657_)
180182
- Reworked the new repr display logic, which users found confusing. (GH3663_)

pandas/io/excel.py

+2-14
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,7 @@
1313
from pandas.tseries.period import Period
1414
import json
1515

16-
def read_excel(path_or_buf, sheetname, header=0, skiprows=None, skip_footer=0,
17-
index_col=None, parse_cols=None, parse_dates=False,
18-
date_parser=None, na_values=None, thousands=None, chunksize=None,
19-
kind=None, **kwds):
16+
def read_excel(path_or_buf, sheetname, kind=None, **kwds):
2017
"""Read an Excel table into a pandas DataFrame
2118
2219
Parameters
@@ -47,16 +44,7 @@ def read_excel(path_or_buf, sheetname, header=0, skiprows=None, skip_footer=0,
4744
DataFrame from the passed in Excel file
4845
"""
4946
return ExcelFile(path_or_buf,kind=kind).parse(sheetname=sheetname,
50-
header=0, skiprows=None,
51-
skip_footer=0,
52-
index_col=None,
53-
parse_cols=None,
54-
parse_dates=False,
55-
date_parser=None,
56-
na_values=None,
57-
thousands=None,
58-
chunksize=None, kind=None,
59-
**kwds)
47+
kind=kind, **kwds)
6048

6149
class ExcelFile(object):
6250
"""

pandas/io/parsers.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -1853,7 +1853,20 @@ def _clean_na_values(na_values, keep_default_na=True):
18531853
return na_values
18541854

18551855
def _stringify_na_values(na_values):
1856-
return [ str(x) for x in na_values ]
1856+
""" return a stringified and numeric for these values """
1857+
result = []
1858+
for x in na_values:
1859+
result.append(str(x))
1860+
result.append(x)
1861+
try:
1862+
result.append(float(x))
1863+
except:
1864+
pass
1865+
try:
1866+
result.append(int(x))
1867+
except:
1868+
pass
1869+
return result
18571870

18581871
def _clean_index_names(columns, index_col):
18591872
if not _is_index_col(index_col):

pandas/io/tests/test_excel.py

+9
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,15 @@ def _check_extension(self, ext):
285285
recons = read_excel(path, 'test1', index_col=0, na_values=['NA'])
286286
tm.assert_frame_equal(self.frame, recons)
287287

288+
# GH 3611
289+
self.frame.to_excel(path, 'test1', na_rep='88')
290+
recons = read_excel(path, 'test1', index_col=0, na_values=['88'])
291+
tm.assert_frame_equal(self.frame, recons)
292+
293+
self.frame.to_excel(path, 'test1', na_rep='88')
294+
recons = read_excel(path, 'test1', index_col=0, na_values=[88,88.0])
295+
tm.assert_frame_equal(self.frame, recons)
296+
288297
def test_excel_roundtrip_xls_mixed(self):
289298
_skip_if_no_xlrd()
290299
_skip_if_no_xlwt()

pandas/src/inference.pyx

+4-4
Original file line numberDiff line numberDiff line change
@@ -373,12 +373,12 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
373373
for i from 0 <= i < n:
374374
val = values[i]
375375

376-
if util.is_float_object(val):
377-
floats[i] = complexes[i] = val
378-
seen_float = 1
379-
elif val in na_values:
376+
if val in na_values:
380377
floats[i] = complexes[i] = nan
381378
seen_float = 1
379+
elif util.is_float_object(val):
380+
floats[i] = complexes[i] = val
381+
seen_float = 1
382382
elif val is None:
383383
floats[i] = complexes[i] = nan
384384
seen_float = 1

0 commit comments

Comments
 (0)