Skip to content

Commit 20a430a

Browse files
r00tavictor
authored and
victor
committed
BUG: Handle read_csv corner case (pandas-dev#21176)
Closes pandas-devgh-21141
1 parent 935cbe6 commit 20a430a

File tree

3 files changed

+27
-1
lines changed

3 files changed

+27
-1
lines changed

doc/source/whatsnew/v0.23.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ Bug Fixes
6464

6565
**I/O**
6666

67+
- Bug in :func:`read_csv` that caused it to incorrectly raise an error when ``nrows=0``, ``low_memory=True``, and ``index_col`` was not ``None`` (:issue:`21141`)
6768
-
6869
-
6970

pandas/io/parsers.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -3209,12 +3209,22 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None):
32093209
col = columns[k] if is_integer(k) else k
32103210
dtype[col] = v
32113211

3212-
if index_col is None or index_col is False:
3212+
# Even though we have no data, the "index" of the empty DataFrame
3213+
# could for example still be an empty MultiIndex. Thus, we need to
3214+
# check whether we have any index columns specified, via either:
3215+
#
3216+
# 1) index_col (column indices)
3217+
# 2) index_names (column names)
3218+
#
3219+
# Both must be non-null to ensure a successful construction. Otherwise,
3220+
# we have to create a generic emtpy Index.
3221+
if (index_col is None or index_col is False) or index_names is None:
32133222
index = Index([])
32143223
else:
32153224
data = [Series([], dtype=dtype[name]) for name in index_names]
32163225
index = _ensure_index_from_sequences(data, names=index_names)
32173226
index_col.sort()
3227+
32183228
for i, n in enumerate(index_col):
32193229
columns.pop(n - i)
32203230

pandas/tests/io/parser/common.py

+15
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,21 @@ def test_csv_mixed_type(self):
238238
out = self.read_csv(StringIO(data))
239239
tm.assert_frame_equal(out, expected)
240240

241+
def test_read_csv_low_memory_no_rows_with_index(self):
242+
if self.engine == "c" and not self.low_memory:
243+
pytest.skip("This is a low-memory specific test")
244+
245+
# see gh-21141
246+
data = """A,B,C
247+
1,1,1,2
248+
2,2,3,4
249+
3,3,4,5
250+
"""
251+
out = self.read_csv(StringIO(data), low_memory=True,
252+
index_col=0, nrows=0)
253+
expected = DataFrame(columns=["A", "B", "C"])
254+
tm.assert_frame_equal(out, expected)
255+
241256
def test_read_csv_dataframe(self):
242257
df = self.read_csv(self.csv1, index_col=0, parse_dates=True)
243258
df2 = self.read_table(self.csv1, sep=',', index_col=0,

0 commit comments

Comments
 (0)