merge master

charlesdong1991 · charlesdong1991 · commit 1d5237e87828 · 2019-03-03T11:15:41.000+01:00
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2922,12 +2922,12 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
 
         index : bool, default True
             Write row names (index).
-        index_label : str or sequence, or False, default None
-            Column label for index column(s) if desired. If None is given, and
-            `header` and `index` are True, then the index names are used. A
-            sequence should be given if the object uses MultiIndex. If
-            False do not print fields for index names. Use index_label=False
-            for easier importing in R.
+        index_label : bool or str or sequence, default None
+            If index_label is not explicitly called, False if either header
+            or index is set to False; otherwise, True. If index_label is
+            explicitly called by allowed types of input, then input will be
+            given to index_label. If False, do not print fields
+            for index names. Use index_label=False for easier importing in R.
         mode : str
             Python write mode, default 'w'.
         encoding : str, optional
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
@@ -50,7 +50,17 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
 
         self.header = header
         self.index = index
-        self.index_label = index_label
+        # if index label is not explicitly called, index label is True if
+        # header or index is not False; otherwise, index label is set to False
+        if index_label is None:
+            if self.header is False or self.header is None or not self.index:
+                self.index_label = False
+            else:
+                self.index_label = True
+        else:
+            # if index label is explicitly called, then use the caller.
+            self.index_label = index_label
+
         self.mode = mode
         if encoding is None:
             encoding = 'ascii' if compat.PY2 else 'utf-8'
@@ -188,6 +198,42 @@ def save(self):
                 for _fh in handles:
                     _fh.close()
 
+    def _index_label_encoder(self):
+        """
+        Encode index label if it is not False.
+
+        Returns
+        -------
+        index_label: list
+            New index_label given index types
+        encode_labels: list
+            List of index labels
+        """
+        index_label = self.index_label
+        obj = self.obj
+
+        if index_label is True:
+            index_label = []
+            # append index label based on index type
+            if isinstance(obj.index, ABCMultiIndex):
+                for name in obj.index.names:
+                    # add empty string is name is None
+                    if name is None:
+                        name = ''
+                    index_label.append(name)
+            else:
+                # if no name, use empty string
+                if obj.index.name is None:
+                    index_label.append('')
+                else:
+                    index_label.append(obj.index.name)
+        elif not isinstance(index_label,
+                            (list, tuple, np.ndarray, ABCIndexClass)):
+            index_label = [index_label]
+
+        encoded_labels = list(index_label)
+        return index_label, encoded_labels
+
     def _save_header(self):
 
         writer = self.writer
@@ -200,8 +246,16 @@ def _save_header(self):
 
         has_aliases = isinstance(header, (tuple, list, np.ndarray,
                                           ABCIndexClass))
-        if not (has_aliases or self.header):
-            return
+        if not (has_aliases or header):
+            # if index_label is False, nothing will display.
+            if index_label is False:
+                return
+            else:
+                # based on index_label value, encoded labels are given
+                index_label, encoded_labels = self._index_label_encoder()
+                encoded_labels.extend([''] * len(obj.columns))
+                writer.writerow(encoded_labels)
+                return
         if has_aliases:
             if len(header) != len(cols):
                 raise ValueError(('Writing {ncols} cols but got {nalias} '
@@ -215,27 +269,16 @@ def _save_header(self):
         if self.index:
             # should write something for index label
             if index_label is not False:
-                if index_label is None:
-                    if isinstance(obj.index, ABCMultiIndex):
-                        index_label = []
-                        for i, name in enumerate(obj.index.names):
-                            if name is None:
-                                name = ''
-                            index_label.append(name)
-                    else:
-                        index_label = obj.index.name
-                        if index_label is None:
-                            index_label = ['']
-                        else:
-                            index_label = [index_label]
-                elif not isinstance(index_label,
-                                    (list, tuple, np.ndarray, ABCIndexClass)):
-                    # given a string for a DF with Index
-                    index_label = [index_label]
-
-                encoded_labels = list(index_label)
+                index_label, encoded_labels = self._index_label_encoder()
             else:
-                encoded_labels = []
+                # if index is multiindex, multiple empty labels are provided
+                if isinstance(obj.index, ABCMultiIndex):
+                    index_label = []
+                    index_label.extend([''] * len(obj.index.names))
+                # if index is single index, list of empty string is provided
+                else:
+                    index_label = ['']
+                encoded_labels = list(index_label)
 
         if not has_mi_columns or has_aliases:
             encoded_labels += list(write_cols)
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -861,14 +861,14 @@ def test_to_csv_quote_none(self):
             expected = tm.convert_rows_list_to_csv_str(expected_rows)
             assert result == expected
 
-    def test_to_csv_index_no_leading_comma(self):
+    def test_to_csv_index_leading_comma(self):
         df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
                        index=['one', 'two', 'three'])
 
         buf = StringIO()
-        df.to_csv(buf, index_label=False)
+        df.to_csv(buf)
 
-        expected_rows = ['A,B',
+        expected_rows = [',A,B',
                          'one,1,4',
                          'two,2,5',
                          'three,3,6']
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
@@ -561,3 +561,47 @@ def test_to_csv_compression(self, compression_only,
             result = pd.read_csv(path, index_col=0,
                                  compression=read_compression)
             tm.assert_frame_equal(result, df)
+
+    @pytest.mark.parametrize("header, index_label, expected_rows", [
+        (False, True, ['index.name,,', '0,0,0', '1,0,0']),
+        (True, True, ['index.name,0,1', '0,0,0', '1,0,0']),
+        (False, False, ['0,0,0', '1,0,0']),
+        (True, False, [',0,1', '0,0,0', '1,0,0']),
+        (False, None, ['0,0,0', '1,0,0']),
+        (True, None, ['index.name,0,1', '0,0,0', '1,0,0']),
+        (True, "new_index", ['new_index,0,1', '0,0,0', '1,0,0']),
+        (True, ["new_index"], ['new_index,0,1', '0,0,0', '1,0,0'])
+    ])
+    def test_to_csv_header_single_index(self, header, index_label,
+                                        expected_rows):
+        # issue 24546
+        df = pd.DataFrame(np.zeros((2, 2), dtype=int))
+        df.index.name = 'index.name'
+        df.columns.name = 'columns.name'
+
+        result = df.to_csv(header=header, index_label=index_label)
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert result == expected
+
+    @pytest.mark.parametrize("header, index_label, expected_rows", [
+        (False, True, ['index.name.0,index.name.1,,', 'a,b,0,0', 'a,c,0,0']),
+        (True, True, ['index.name.0,index.name.1,0,1', 'a,b,0,0', 'a,c,0,0']),
+        (False, False, ['a,b,0,0', 'a,c,0,0']),
+        (True, False, [',,0,1', 'a,b,0,0', 'a,c,0,0']),
+        (False, None, ['a,b,0,0', 'a,c,0,0']),
+        (True, None, ['index.name.0,index.name.1,0,1', 'a,b,0,0', 'a,c,0,0']),
+        (True, ("index1", "index2"),
+         ['index1,index2,0,1', 'a,b,0,0', 'a,c,0,0']),
+        (True, ["index1", "index2"],
+         ['index1,index2,0,1', 'a,b,0,0', 'a,c,0,0'])
+    ])
+    def test_to_csv_header_multi_index(self, header, index_label,
+                                       expected_rows):
+        # issue 24546
+        df = pd.DataFrame(np.zeros((2, 2), dtype=int))
+        df.index = pd.MultiIndex.from_product([['a'], ['b', 'c']], names=[
+            'index.name.0', 'index.name.1'])
+
+        result = df.to_csv(header=header, index_label=index_label)
+        expected = tm.convert_rows_list_to_csv_str(expected_rows)
+        assert result == expected