Skip to content

Commit cfb27c4

Browse files
committed
Merge branch 'master' into pandas-dev#17778
2 parents 5896f93 + c176a3c commit cfb27c4

File tree

3 files changed

+27
-5
lines changed

3 files changed

+27
-5
lines changed

doc/source/whatsnew/v0.21.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ I/O
7979

8080
- Bug in class:`~pandas.io.stata.StataReader` not converting date/time columns with display formatting addressed (:issue:`17990`). Previously columns with display formatting were normally left as ordinal numbers and not converted to datetime objects.
8181
- Bug in :func:`read_csv` when reading a compressed UTF-16 encoded file (:issue:`18071`)
82+
- Bug in :func:`read_csv` for handling null values in index columns when specifying ``na_filter=False`` (:issue:`5239`)
8283
- Bug in :meth:`DataFrame.to_csv` when the table had ``MultiIndex`` columns, and a list of strings was passed in for ``header`` (:issue:`5539`)
8384

8485
Plotting

pandas/io/parsers.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -1249,6 +1249,8 @@ def __init__(self, kwds):
12491249

12501250
self.na_values = kwds.get('na_values')
12511251
self.na_fvalues = kwds.get('na_fvalues')
1252+
self.na_filter = kwds.get('na_filter', False)
1253+
12521254
self.true_values = kwds.get('true_values')
12531255
self.false_values = kwds.get('false_values')
12541256
self.as_recarray = kwds.get('as_recarray', False)
@@ -1424,7 +1426,6 @@ def _make_index(self, data, alldata, columns, indexnamerow=False):
14241426
elif not self._has_complex_date_col:
14251427
index = self._get_simple_index(alldata, columns)
14261428
index = self._agg_index(index)
1427-
14281429
elif self._has_complex_date_col:
14291430
if not self._name_processed:
14301431
(self.index_names, _,
@@ -1504,8 +1505,12 @@ def _agg_index(self, index, try_parse_dates=True):
15041505
if try_parse_dates and self._should_parse_dates(i):
15051506
arr = self._date_conv(arr)
15061507

1507-
col_na_values = self.na_values
1508-
col_na_fvalues = self.na_fvalues
1508+
if self.na_filter:
1509+
col_na_values = self.na_values
1510+
col_na_fvalues = self.na_fvalues
1511+
else:
1512+
col_na_values = set()
1513+
col_na_fvalues = set()
15091514

15101515
if isinstance(self.na_values, dict):
15111516
col_name = self.index_names[i]
@@ -2060,8 +2065,6 @@ def __init__(self, f, **kwds):
20602065

20612066
self.names_passed = kwds['names'] or None
20622067

2063-
self.na_filter = kwds['na_filter']
2064-
20652068
self.has_index_names = False
20662069
if 'has_index_names' in kwds:
20672070
self.has_index_names = kwds['has_index_names']

pandas/tests/io/parser/na_values.py

+18
Original file line numberDiff line numberDiff line change
@@ -312,3 +312,21 @@ def test_empty_na_values_no_default_with_index(self):
312312
out = self.read_csv(StringIO(data), keep_default_na=False, index_col=0)
313313

314314
tm.assert_frame_equal(out, expected)
315+
316+
def test_no_na_filter_on_index(self):
317+
# see gh-5239
318+
data = "a,b,c\n1,,3\n4,5,6"
319+
320+
# Don't parse NA-values in index when na_filter=False.
321+
out = self.read_csv(StringIO(data), index_col=[1], na_filter=False)
322+
323+
expected = DataFrame({"a": [1, 4], "c": [3, 6]},
324+
index=Index(["", "5"], name="b"))
325+
tm.assert_frame_equal(out, expected)
326+
327+
# Parse NA-values in index when na_filter=True.
328+
out = self.read_csv(StringIO(data), index_col=[1], na_filter=True)
329+
330+
expected = DataFrame({"a": [1, 4], "c": [3, 6]},
331+
index=Index([np.nan, 5.0], name="b"))
332+
tm.assert_frame_equal(out, expected)

0 commit comments

Comments
 (0)