diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index da16734dc873b..6b29854144456 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -134,3 +134,5 @@ Bug Fixes - Bug in `pandas.concat` with ``axis=0`` when column is of dtype ``category`` (:issue:`10177`) - Bug in ``read_msgpack`` where input type is not always checked (:issue:`10369`) + +- Bug in `pandas.read_csv` with ``index_col=False`` or with ``index_col=['a', 'b']`` (:issue:`10413`, :issue:`10467`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index dd972150de0fe..7d4c9df64c0bb 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2223,13 +2223,14 @@ def _clean_index_names(columns, index_col): def _get_empty_meta(columns, index_col, index_names): columns = list(columns) - if index_col is not None: - index = MultiIndex.from_arrays([[]] * len(index_col), - names=index_names) - for n in index_col: - columns.pop(n) - else: + if index_col is None or index_col is False: index = Index([]) + else: + index_col = list(index_col) + index = MultiIndex.from_arrays([[]] * len(index_col), names=index_names) + index_col.sort() + for i, n in enumerate(index_col): + columns.pop(n-i) return index, columns, {} diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 52430bb6e0999..3dae9f383db8f 100755 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -2301,6 +2301,81 @@ def test_empty_with_index(self): expected = DataFrame([], columns=['y'], index=Index([], name='x')) tm.assert_frame_equal(result, expected) + def test_emtpy_with_multiindex(self): + # GH 10467 + data = 'x,y,z' + result = self.read_csv(StringIO(data), index_col=['x', 'y']) + expected = DataFrame([], columns=['z'], + index=MultiIndex.from_arrays([[]] * 2, names=['x', 'y'])) + tm.assert_frame_equal(result, expected) + + def test_empty_with_reversed_multiindex(self): + data = 'x,y,z' + result = self.read_csv(StringIO(data), index_col=[1, 0]) + expected = DataFrame([], columns=['z'], + index=MultiIndex.from_arrays([[]] * 2, names=['y', 'x'])) + tm.assert_frame_equal(result, expected) + + def test_empty_index_col_scenarios(self): + data = 'x,y,z' + + # None, no index + index_col, expected = None, DataFrame([], columns=list('xyz')), + tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected) + + # False, no index + index_col, expected = False, DataFrame([], columns=list('xyz')), + tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected) + + # int, first column + index_col, expected = 0, DataFrame([], columns=['y', 'z'], index=Index([], name='x')) + tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected) + + # int, not first column + index_col, expected = 1, DataFrame([], columns=['x', 'z'], index=Index([], name='y')) + tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected) + + # str, first column + index_col, expected = 'x', DataFrame([], columns=['y', 'z'], index=Index([], name='x')) + tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected) + + # str, not the first column + index_col, expected = 'y', DataFrame([], columns=['x', 'z'], index=Index([], name='y')) + tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected) + + # list of int + index_col, expected = [0, 1], DataFrame([], columns=['z'], + index=MultiIndex.from_arrays([[]] * 2, names=['x', 'y'])) + tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected) + + # list of str + index_col, expected = ( + ['x', 'y'], + DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['x', 'y'])) + ) + tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected) + + # list of int, reversed sequence + index_col, expected = ( + [1, 0], + DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['y', 'x'])) + ) + tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected) + + # list of str, reversed sequence + index_col, expected = ( + ['y', 'x'], + DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['y', 'x'])) + ) + tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected) + + def test_empty_with_index_col_false(self): + # GH 10413 + data = 'x,y' + result = self.read_csv(StringIO(data), index_col=False) + expected = DataFrame([], columns=['x', 'y']) + tm.assert_frame_equal(result, expected) + def test_float_parser(self): # GH 9565 data = '45e-1,4.5,45.,inf,-inf'