Skip to content

Commit a20ab74

Browse files
committed
BUG: Prevent aliasing of dict na_values
1 parent 837db72 commit a20ab74

File tree

3 files changed

+15
-0
lines changed

3 files changed

+15
-0
lines changed

doc/source/whatsnew/v0.19.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Bug Fixes
2929

3030
- Compat with ``dateutil==2.6.0``; segfault reported in the testing suite (:issue:`14621`)
3131
- Allow ``nanoseconds`` in ``Timestamp.replace`` as a kwarg (:issue:`14621`)
32+
- Bug in ``pd.read_csv`` in which aliasing was being done for ``na_values`` when passed in as a dictionary (:issue:`14203`)
3233
- Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`)
3334
- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`)
3435

pandas/io/parsers.py

+1
Original file line numberDiff line numberDiff line change
@@ -2824,6 +2824,7 @@ def _clean_na_values(na_values, keep_default_na=True):
28242824
na_values = []
28252825
na_fvalues = set()
28262826
elif isinstance(na_values, dict):
2827+
na_values = na_values.copy() # Prevent aliasing.
28272828
if keep_default_na:
28282829
for k, v in compat.iteritems(na_values):
28292830
if not is_list_like(v):

pandas/io/tests/parser/na_values.py

+13
Original file line numberDiff line numberDiff line change
@@ -266,3 +266,16 @@ def test_na_values_scalar(self):
266266
out = self.read_csv(StringIO(data), names=names,
267267
na_values={'a': 2, 'b': 1})
268268
tm.assert_frame_equal(out, expected)
269+
270+
def test_na_values_dict_aliasing(self):
271+
na_values = {'a': 2, 'b': 1}
272+
na_values_copy = na_values.copy()
273+
274+
names = ['a', 'b']
275+
data = '1,2\n2,1'
276+
277+
expected = DataFrame([[1.0, 2.0], [np.nan, np.nan]], columns=names)
278+
out = self.read_csv(StringIO(data), names=names, na_values=na_values)
279+
280+
tm.assert_frame_equal(out, expected)
281+
tm.assert_dict_equal(na_values, na_values_copy)

0 commit comments

Comments
 (0)