diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 63aea96ef3369..fd7744158829f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -995,6 +995,7 @@ I/O - Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) - Bug in ``pd.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) +- Bug in ``pd.read_csv()`` when an index was specified and no values were specified as null values (:issue:`15835`) - Added checks in ``pd.read_csv()`` ensuring that values for ``nrows`` and ``chunksize`` are valid (:issue:`15767`) - Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 30b88de91ef76..0080ded1ac03d 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2890,7 +2890,7 @@ def _clean_na_values(na_values, keep_default_na=True): if keep_default_na: na_values = _NA_VALUES else: - na_values = [] + na_values = set() na_fvalues = set() elif isinstance(na_values, dict): na_values = na_values.copy() # Prevent aliasing. diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index 2cbd7cdedf2ab..cf29dbdfef49d 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -11,7 +11,7 @@ import pandas.io.parsers as parsers import pandas.util.testing as tm -from pandas import DataFrame, MultiIndex +from pandas import DataFrame, Index, MultiIndex from pandas.compat import StringIO, range @@ -303,3 +303,12 @@ def test_na_values_uint64(self): expected = DataFrame([[str(2**63), 1], ['', 2]]) out = self.read_csv(StringIO(data), header=None) tm.assert_frame_equal(out, expected) + + def test_empty_na_values_no_default_with_index(self): + # see gh-15835 + data = "a,1\nb,2" + + expected = DataFrame({'1': [2]}, index=Index(["b"], name="a")) + out = self.read_csv(StringIO(data), keep_default_na=False, index_col=0) + + tm.assert_frame_equal(out, expected)