From d1a600f5fbc25a732ce8859db7e59d2936f49c8d Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 1 Sep 2016 01:13:57 -0400 Subject: [PATCH] BUG: Don't print stray newline with MultiIndex Closes gh-6618. --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/formats/format.py | 18 +++++++++--------- pandas/tests/formats/test_format.py | 27 +++++++++++++++++++++++++++ pandas/tests/frame/test_to_csv.py | 18 +++--------------- 4 files changed, 40 insertions(+), 24 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 4365c66237752..a02e6ac200e42 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1355,6 +1355,7 @@ Bug Fixes - Bug in using NumPy ufunc with ``PeriodIndex`` to add or subtract integer raise ``IncompatibleFrequency``. Note that using standard operator like ``+`` or ``-`` is recommended, because standard operators use more efficient path (:issue:`13980`) - Bug in operations on ``NaT`` returning ``float`` instead of ``datetime64[ns]`` (:issue:`12941`) - Bug in ``Series`` flexible arithmetic methods (like ``.add()``) raises ``ValueError`` when ``axis=None`` (:issue:`13894`) +- Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`) - Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`) diff --git a/pandas/formats/format.py b/pandas/formats/format.py index dd9a852bd8713..4740dd25c419d 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -1524,9 +1524,9 @@ def _save_header(self): if not has_mi_columns: encoded_labels += list(write_cols) - - # write out the mi - if has_mi_columns: + writer.writerow(encoded_labels) + else: + # write out the mi columns = obj.columns # write out the names for each level, then ALL of the values for @@ -1547,12 +1547,12 @@ def _save_header(self): writer.writerow(col_line) - # add blanks for the columns, so that we - # have consistent seps - encoded_labels.extend([''] * len(columns)) - - # write out the index label line - writer.writerow(encoded_labels) + # Write out the index line if it's not empty. + # Otherwise, we will print out an extraneous + # blank line between the mi and the data rows. + if encoded_labels and set(encoded_labels) != set(['']): + encoded_labels.extend([''] * len(columns)) + writer.writerow(encoded_labels) def _save(self): diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index 0a2e63a018799..7e55c04fec7cc 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -3327,6 +3327,33 @@ def test_to_csv_date_format(self): self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'), expected_ymd_sec) + def test_to_csv_multi_index(self): + # see gh-6618 + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1],[2]])) + + exp = ",1\n,2\n0,1\n" + self.assertEqual(df.to_csv(), exp) + + exp = "1\n2\n1\n" + self.assertEqual(df.to_csv(index=False), exp) + + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1],[2]]), + index=pd.MultiIndex.from_arrays([[1],[2]])) + + exp = ",,1\n,,2\n1,2,1\n" + self.assertEqual(df.to_csv(), exp) + + exp = "1\n2\n1\n" + self.assertEqual(df.to_csv(index=False), exp) + + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([['foo'],['bar']])) + + exp = ",foo\n,bar\n0,1\n" + self.assertEqual(df.to_csv(), exp) + + exp = "foo\nbar\n1\n" + self.assertEqual(df.to_csv(index=False), exp) + def test_period(self): # GH 12615 df = pd.DataFrame({'A': pd.period_range('2013-01', diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 54bcb670caaef..6d09378ca864e 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -587,21 +587,9 @@ def _make_frame(names=None): df = _make_frame(True) df.to_csv(path, tupleize_cols=False) - # catch invalid headers - with assertRaisesRegexp(CParserError, - 'Passed header=\[0,1,2\] are too many ' - 'rows for this multi_index of columns'): - read_csv(path, tupleize_cols=False, - header=lrange(3), index_col=0) - - with assertRaisesRegexp(CParserError, - 'Passed header=\[0,1,2,3,4,5,6\], len of ' - '7, but only 6 lines in file'): - read_csv(path, tupleize_cols=False, - header=lrange(7), index_col=0) - - for i in [4, 5, 6]: - with tm.assertRaises(CParserError): + for i in [5, 6, 7]: + msg = 'len of {i}, but only 5 lines in file'.format(i=i) + with assertRaisesRegexp(CParserError, msg): read_csv(path, tupleize_cols=False, header=lrange(i), index_col=0)