Skip to content

Commit a2a5b63

Browse files
gfyoungU-GFWY2\Greg
authored and
U-GFWY2\Greg
committed
BUG: Respect column indices for dict-like na_values
Closes pandas-devgh-14203.
1 parent a20ab74 commit a2a5b63

File tree

4 files changed

+47
-14
lines changed

4 files changed

+47
-14
lines changed

doc/source/whatsnew/v0.19.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Bug Fixes
3030
- Compat with ``dateutil==2.6.0``; segfault reported in the testing suite (:issue:`14621`)
3131
- Allow ``nanoseconds`` in ``Timestamp.replace`` as a kwarg (:issue:`14621`)
3232
- Bug in ``pd.read_csv`` in which aliasing was being done for ``na_values`` when passed in as a dictionary (:issue:`14203`)
33+
- Bug in ``pd.read_csv`` in which column indices for a dict-like ``na_values`` were not being respected (:issue:`14203`)
3334
- Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`)
3435
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`)
3536

pandas/io/parsers.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -2123,9 +2123,27 @@ def _clean_mapping(mapping):
21232123
else:
21242124
clean_dtypes = _clean_mapping(self.dtype)
21252125

2126-
return self._convert_to_ndarrays(data, self.na_values, self.na_fvalues,
2127-
self.verbose, clean_conv,
2128-
clean_dtypes)
2126+
# Apply NA values.
2127+
clean_na_values = {}
2128+
clean_na_fvalues = {}
2129+
2130+
if isinstance(self.na_values, dict):
2131+
for col in self.na_values:
2132+
na_value = self.na_values[col]
2133+
na_fvalue = self.na_fvalues[col]
2134+
2135+
if isinstance(col, int) and col not in self.orig_names:
2136+
col = self.orig_names[col]
2137+
2138+
clean_na_values[col] = na_value
2139+
clean_na_fvalues[col] = na_fvalue
2140+
else:
2141+
clean_na_values = self.na_values
2142+
clean_na_fvalues = self.na_fvalues
2143+
2144+
return self._convert_to_ndarrays(data, clean_na_values,
2145+
clean_na_fvalues, self.verbose,
2146+
clean_conv, clean_dtypes)
21292147

21302148
def _to_recarray(self, data, columns):
21312149
dtypes = []

pandas/io/tests/parser/na_values.py

+10
Original file line numberDiff line numberDiff line change
@@ -279,3 +279,13 @@ def test_na_values_dict_aliasing(self):
279279

280280
tm.assert_frame_equal(out, expected)
281281
tm.assert_dict_equal(na_values, na_values_copy)
282+
283+
def test_na_values_dict_col_index(self):
284+
# see gh-14203
285+
286+
data = 'a\nfoo\n1'
287+
na_values = {0: 'foo'}
288+
289+
out = self.read_csv(StringIO(data), na_values=na_values)
290+
expected = DataFrame({'a': [np.nan, 1]})
291+
tm.assert_frame_equal(out, expected)

pandas/parser.pyx

+15-11
Original file line numberDiff line numberDiff line change
@@ -1251,19 +1251,23 @@ cdef class TextReader:
12511251
return None, set()
12521252

12531253
if isinstance(self.na_values, dict):
1254+
key = None
12541255
values = None
1256+
12551257
if name is not None and name in self.na_values:
1256-
values = self.na_values[name]
1257-
if values is not None and not isinstance(values, list):
1258-
values = list(values)
1259-
fvalues = self.na_fvalues[name]
1260-
if fvalues is not None and not isinstance(fvalues, set):
1261-
fvalues = set(fvalues)
1262-
else:
1263-
if i in self.na_values:
1264-
return self.na_values[i], self.na_fvalues[i]
1265-
else:
1266-
return _NA_VALUES, set()
1258+
key = name
1259+
elif i in self.na_values:
1260+
key = i
1261+
else: # No na_values provided for this column.
1262+
return _NA_VALUES, set()
1263+
1264+
values = self.na_values[key]
1265+
if values is not None and not isinstance(values, list):
1266+
values = list(values)
1267+
1268+
fvalues = self.na_fvalues[key]
1269+
if fvalues is not None and not isinstance(fvalues, set):
1270+
fvalues = set(fvalues)
12671271

12681272
return _ensure_encoded(values), fvalues
12691273
else:

0 commit comments

Comments
 (0)