diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1717b00664f92..efc21c2d91f84 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2956,12 +2956,17 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, index : bool, default True Write row names (index). - index_label : str or sequence, or False, default None - Column label for index column(s) if desired. If None is given, and - `header` and `index` are True, then the index names are used. A - sequence should be given if the object uses MultiIndex. If - False do not print fields for index names. Use index_label=False - for easier importing in R. + index_label : bool or str or sequence, default None + If index_label is not explicitly called, False if either header + or index is set to False; otherwise, True. If index_label is + explicitly called by allowed types of input, then input will be + given to index_label. If False, do not print fields + for index names. Use index_label=False for easier importing in R. + + .. versionchanged:: 0.25.0 + + Previously True option is not allowed. + mode : str Python write mode, default 'w'. encoding : str, optional diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 71b7ef32bebc3..7b7e43c814a36 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -50,7 +50,18 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', self.header = header self.index = index - self.index_label = index_label + + # if index label is not explicitly called, index label is True if + # header or index is not False; otherwise, index label is set to False + if index_label is None: + if self.header is False or self.header is None or not self.index: + self.index_label = False + else: + self.index_label = True + else: + # if index label is explicitly called, then use the caller. + self.index_label = index_label + self.mode = mode if encoding is None: encoding = 'ascii' if compat.PY2 else 'utf-8' @@ -188,6 +199,40 @@ def save(self): for _fh in handles: _fh.close() + def _index_label_encoder(self): + """ + Encode index label if it is not False. + + Returns + ------- + index_label: list + New index_label given index types + encode_labels: list + List of index labels + """ + index_label = self.index_label + obj = self.obj + + if index_label is True: + index_label = [] + # append index label based on index type + if isinstance(obj.index, ABCMultiIndex): + # add empty string is name is None + index_label = list(map(lambda name: name or '', + obj.index.names)) + else: + # if no name, use empty string + if obj.index.name is None: + index_label.append('') + else: + index_label.append(obj.index.name) + elif not isinstance(index_label, + (list, tuple, np.ndarray, ABCIndexClass)): + index_label = [index_label] + + encoded_labels = list(index_label) + return index_label, encoded_labels + def _save_header(self): writer = self.writer @@ -200,8 +245,16 @@ def _save_header(self): has_aliases = isinstance(header, (tuple, list, np.ndarray, ABCIndexClass)) - if not (has_aliases or self.header): - return + if not (has_aliases or header): + # if index_label is False, nothing will display. + if index_label is False: + return + else: + # based on index_label value, encoded labels are given + index_label, encoded_labels = self._index_label_encoder() + encoded_labels.extend([''] * len(obj.columns)) + writer.writerow(encoded_labels) + return if has_aliases: if len(header) != len(cols): raise ValueError(('Writing {ncols} cols but got {nalias} ' @@ -215,27 +268,16 @@ def _save_header(self): if self.index: # should write something for index label if index_label is not False: - if index_label is None: - if isinstance(obj.index, ABCMultiIndex): - index_label = [] - for i, name in enumerate(obj.index.names): - if name is None: - name = '' - index_label.append(name) - else: - index_label = obj.index.name - if index_label is None: - index_label = [''] - else: - index_label = [index_label] - elif not isinstance(index_label, - (list, tuple, np.ndarray, ABCIndexClass)): - # given a string for a DF with Index - index_label = [index_label] - - encoded_labels = list(index_label) + index_label, encoded_labels = self._index_label_encoder() else: - encoded_labels = [] + # if index is multiindex, multiple empty labels are provided + if isinstance(obj.index, ABCMultiIndex): + index_label = [] + index_label.extend([''] * len(obj.index.names)) + # if index is single index, list of empty string is provided + else: + index_label = [''] + encoded_labels = list(index_label) if not has_mi_columns or has_aliases: encoded_labels += list(write_cols) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 917cd9a04efaf..bfc5ca0b9640b 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -862,14 +862,14 @@ def test_to_csv_quote_none(self): expected = tm.convert_rows_list_to_csv_str(expected_rows) assert result == expected - def test_to_csv_index_no_leading_comma(self): + def test_to_csv_index_leading_comma(self): df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['one', 'two', 'three']) buf = StringIO() - df.to_csv(buf, index_label=False) + df.to_csv(buf) - expected_rows = ['A,B', + expected_rows = [',A,B', 'one,1,4', 'two,2,5', 'three,3,6'] diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 1a28cafa2b520..f89dafeef65b5 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -537,3 +537,47 @@ def test_to_csv_compression(self, compression_only, result = pd.read_csv(path, index_col=0, compression=read_compression) tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize("header, index_label, expected_rows", [ + (False, True, ['index.name,,', '0,0,0', '1,0,0']), + (True, True, ['index.name,0,1', '0,0,0', '1,0,0']), + (False, False, ['0,0,0', '1,0,0']), + (True, False, [',0,1', '0,0,0', '1,0,0']), + (False, None, ['0,0,0', '1,0,0']), + (True, None, ['index.name,0,1', '0,0,0', '1,0,0']), + (True, "new_index", ['new_index,0,1', '0,0,0', '1,0,0']), + (True, ["new_index"], ['new_index,0,1', '0,0,0', '1,0,0']) + ]) + def test_to_csv_header_single_index(self, header, index_label, + expected_rows): + # GH 24546 + df = pd.DataFrame(np.zeros((2, 2), dtype=int)) + df.index.name = 'index.name' + df.columns.name = 'columns.name' + + result = df.to_csv(header=header, index_label=index_label) + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + @pytest.mark.parametrize("header, index_label, expected_rows", [ + (False, True, ['index.name.0,index.name.1,,', 'a,b,0,0', 'a,c,0,0']), + (True, True, ['index.name.0,index.name.1,0,1', 'a,b,0,0', 'a,c,0,0']), + (False, False, ['a,b,0,0', 'a,c,0,0']), + (True, False, [',,0,1', 'a,b,0,0', 'a,c,0,0']), + (False, None, ['a,b,0,0', 'a,c,0,0']), + (True, None, ['index.name.0,index.name.1,0,1', 'a,b,0,0', 'a,c,0,0']), + (True, ("index1", "index2"), + ['index1,index2,0,1', 'a,b,0,0', 'a,c,0,0']), + (True, ["index1", "index2"], + ['index1,index2,0,1', 'a,b,0,0', 'a,c,0,0']) + ]) + def test_to_csv_header_multi_index(self, header, index_label, + expected_rows): + # GH 24546 + df = pd.DataFrame(np.zeros((2, 2), dtype=int)) + df.index = pd.MultiIndex.from_product([['a'], ['b', 'c']], names=[ + 'index.name.0', 'index.name.1']) + + result = df.to_csv(header=header, index_label=index_label) + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected