Skip to content

Commit fbe8c0b

Browse files
committed
Merge pull request #10469 from santegoeds/bugfix/fix-csv_reader-multiindex-empty-data
BUG: Fix csv_read bugs when using empty input. GH10467 & GH10413
2 parents 9fef291 + 8523105 commit fbe8c0b

File tree

3 files changed

+84
-6
lines changed

3 files changed

+84
-6
lines changed

doc/source/whatsnew/v0.17.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,5 @@ Bug Fixes
136136

137137
- Bug in `pandas.concat` with ``axis=0`` when column is of dtype ``category`` (:issue:`10177`)
138138
- Bug in ``read_msgpack`` where input type is not always checked (:issue:`10369`)
139+
140+
- Bug in `pandas.read_csv` with ``index_col=False`` or with ``index_col=['a', 'b']`` (:issue:`10413`, :issue:`10467`)

pandas/io/parsers.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -2223,13 +2223,14 @@ def _clean_index_names(columns, index_col):
22232223
def _get_empty_meta(columns, index_col, index_names):
22242224
columns = list(columns)
22252225

2226-
if index_col is not None:
2227-
index = MultiIndex.from_arrays([[]] * len(index_col),
2228-
names=index_names)
2229-
for n in index_col:
2230-
columns.pop(n)
2231-
else:
2226+
if index_col is None or index_col is False:
22322227
index = Index([])
2228+
else:
2229+
index_col = list(index_col)
2230+
index = MultiIndex.from_arrays([[]] * len(index_col), names=index_names)
2231+
index_col.sort()
2232+
for i, n in enumerate(index_col):
2233+
columns.pop(n-i)
22332234

22342235
return index, columns, {}
22352236

pandas/io/tests/test_parsers.py

+75
Original file line numberDiff line numberDiff line change
@@ -2301,6 +2301,81 @@ def test_empty_with_index(self):
23012301
expected = DataFrame([], columns=['y'], index=Index([], name='x'))
23022302
tm.assert_frame_equal(result, expected)
23032303

2304+
def test_emtpy_with_multiindex(self):
2305+
# GH 10467
2306+
data = 'x,y,z'
2307+
result = self.read_csv(StringIO(data), index_col=['x', 'y'])
2308+
expected = DataFrame([], columns=['z'],
2309+
index=MultiIndex.from_arrays([[]] * 2, names=['x', 'y']))
2310+
tm.assert_frame_equal(result, expected)
2311+
2312+
def test_empty_with_reversed_multiindex(self):
2313+
data = 'x,y,z'
2314+
result = self.read_csv(StringIO(data), index_col=[1, 0])
2315+
expected = DataFrame([], columns=['z'],
2316+
index=MultiIndex.from_arrays([[]] * 2, names=['y', 'x']))
2317+
tm.assert_frame_equal(result, expected)
2318+
2319+
def test_empty_index_col_scenarios(self):
2320+
data = 'x,y,z'
2321+
2322+
# None, no index
2323+
index_col, expected = None, DataFrame([], columns=list('xyz')),
2324+
tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected)
2325+
2326+
# False, no index
2327+
index_col, expected = False, DataFrame([], columns=list('xyz')),
2328+
tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected)
2329+
2330+
# int, first column
2331+
index_col, expected = 0, DataFrame([], columns=['y', 'z'], index=Index([], name='x'))
2332+
tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected)
2333+
2334+
# int, not first column
2335+
index_col, expected = 1, DataFrame([], columns=['x', 'z'], index=Index([], name='y'))
2336+
tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected)
2337+
2338+
# str, first column
2339+
index_col, expected = 'x', DataFrame([], columns=['y', 'z'], index=Index([], name='x'))
2340+
tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected)
2341+
2342+
# str, not the first column
2343+
index_col, expected = 'y', DataFrame([], columns=['x', 'z'], index=Index([], name='y'))
2344+
tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected)
2345+
2346+
# list of int
2347+
index_col, expected = [0, 1], DataFrame([], columns=['z'],
2348+
index=MultiIndex.from_arrays([[]] * 2, names=['x', 'y']))
2349+
tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected)
2350+
2351+
# list of str
2352+
index_col, expected = (
2353+
['x', 'y'],
2354+
DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['x', 'y']))
2355+
)
2356+
tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected)
2357+
2358+
# list of int, reversed sequence
2359+
index_col, expected = (
2360+
[1, 0],
2361+
DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['y', 'x']))
2362+
)
2363+
tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected)
2364+
2365+
# list of str, reversed sequence
2366+
index_col, expected = (
2367+
['y', 'x'],
2368+
DataFrame([], columns=['z'], index=MultiIndex.from_arrays([[]] * 2, names=['y', 'x']))
2369+
)
2370+
tm.assert_frame_equal(self.read_csv(StringIO(data), index_col=index_col), expected)
2371+
2372+
def test_empty_with_index_col_false(self):
2373+
# GH 10413
2374+
data = 'x,y'
2375+
result = self.read_csv(StringIO(data), index_col=False)
2376+
expected = DataFrame([], columns=['x', 'y'])
2377+
tm.assert_frame_equal(result, expected)
2378+
23042379
def test_float_parser(self):
23052380
# GH 9565
23062381
data = '45e-1,4.5,45.,inf,-inf'

0 commit comments

Comments
 (0)