|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | + |
| 3 | +""" |
| 4 | +Tests that duplicate columns are handled appropriately when parsed by the |
| 5 | +CSV engine. In general, the expected result is that they are either thoroughly |
| 6 | +de-duplicated (if mangling requested) or ignored otherwise. |
| 7 | +""" |
| 8 | + |
| 9 | +from pandas.compat import StringIO |
| 10 | + |
| 11 | + |
| 12 | +class DupeColumnTests(object): |
| 13 | + def test_basic(self): |
| 14 | + # TODO: add test for condition "mangle_dupe_cols=False" |
| 15 | + # once it is actually supported (gh-12935) |
| 16 | + data = "a,a,b,b,b\n1,2,3,4,5" |
| 17 | + |
| 18 | + for method in ("read_csv", "read_table"): |
| 19 | + # Check default behavior. |
| 20 | + expected = ["a", "a.1", "b", "b.1", "b.2"] |
| 21 | + df = getattr(self, method)(StringIO(data), sep=",") |
| 22 | + assert list(df.columns) == expected |
| 23 | + |
| 24 | + df = getattr(self, method)(StringIO(data), sep=",", |
| 25 | + mangle_dupe_cols=True) |
| 26 | + assert list(df.columns) == expected |
| 27 | + |
| 28 | + def test_thorough_mangle(self): |
| 29 | + # see gh-17060 |
| 30 | + data = "a,a,a.1\n1,2,3" |
| 31 | + df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True) |
| 32 | + assert list(df.columns) == ["a", "a.1", "a.1.1"] |
| 33 | + |
| 34 | + data = "a,a,a.1,a.1.1,a.1.1.1,a.1.1.1.1\n1,2,3,4,5,6" |
| 35 | + df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True) |
| 36 | + assert list(df.columns) == ["a", "a.1", "a.1.1", "a.1.1.1", |
| 37 | + "a.1.1.1.1", "a.1.1.1.1.1"] |
| 38 | + |
| 39 | + data = "a,a,a.3,a.1,a.2,a,a\n1,2,3,4,5,6,7" |
| 40 | + df = self.read_csv(StringIO(data), sep=",", mangle_dupe_cols=True) |
| 41 | + assert list(df.columns) == ["a", "a.1", "a.3", "a.1.1", |
| 42 | + "a.2", "a.2.1", "a.3.1"] |
0 commit comments