|
11 | 11 | import pytest
|
12 | 12 |
|
13 | 13 | from pandas.compat import StringIO, lrange, u
|
| 14 | +from pandas.errors import ParserError |
14 | 15 |
|
15 | 16 | from pandas import DataFrame, Index, MultiIndex
|
16 | 17 | import pandas.util.testing as tm
|
@@ -360,3 +361,47 @@ def test_mangles_multi_index(self):
|
360 | 361 | ('A', 'one.1.1'), ('B', 'two'),
|
361 | 362 | ('B', 'two.1')]))
|
362 | 363 | tm.assert_frame_equal(df, expected)
|
| 364 | + |
| 365 | + @pytest.mark.parametrize("index_col", [None, [0]]) |
| 366 | + @pytest.mark.parametrize("columns", [None, |
| 367 | + (["", "Unnamed"]), |
| 368 | + (["Unnamed", ""]), |
| 369 | + (["Unnamed", "NotUnnamed"])]) |
| 370 | + def test_multi_index_unnamed(self, index_col, columns): |
| 371 | + # see gh-23687 |
| 372 | + # |
| 373 | + # When specifying a multi-index header, make sure that |
| 374 | + # we don't error just because one of the rows in our header |
| 375 | + # has ALL column names containing the string "Unnamed". The |
| 376 | + # correct condition to check is whether the row contains |
| 377 | + # ALL columns that did not have names (and instead were given |
| 378 | + # placeholder ones). |
| 379 | + header = [0, 1] |
| 380 | + |
| 381 | + if index_col is None: |
| 382 | + data = ",".join(columns or ["", ""]) + "\n0,1\n2,3\n4,5\n" |
| 383 | + else: |
| 384 | + data = (",".join([""] + (columns or ["", ""])) + |
| 385 | + "\n,0,1\n0,2,3\n1,4,5\n") |
| 386 | + |
| 387 | + if columns is None: |
| 388 | + msg = (r"Passed header=\[0,1\] are too " |
| 389 | + r"many rows for this multi_index of columns") |
| 390 | + with pytest.raises(ParserError, match=msg): |
| 391 | + self.read_csv(StringIO(data), header=header, |
| 392 | + index_col=index_col) |
| 393 | + else: |
| 394 | + result = self.read_csv(StringIO(data), header=header, |
| 395 | + index_col=index_col) |
| 396 | + template = "Unnamed: {i}_level_0" |
| 397 | + exp_columns = [] |
| 398 | + |
| 399 | + for i, col in enumerate(columns): |
| 400 | + if not col: # Unnamed. |
| 401 | + col = template.format(i=i if index_col is None else i + 1) |
| 402 | + |
| 403 | + exp_columns.append(col) |
| 404 | + |
| 405 | + columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"])) |
| 406 | + expected = DataFrame([[2, 3], [4, 5]], columns=columns) |
| 407 | + tm.assert_frame_equal(result, expected) |
0 commit comments