Skip to content

BUG: Index Name is not displayed with header=False in to_csv #24840

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2956,12 +2956,17 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,

index : bool, default True
Write row names (index).
index_label : str or sequence, or False, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the object uses MultiIndex. If
False do not print fields for index names. Use index_label=False
for easier importing in R.
index_label : bool or str or sequence, default None
If index_label is not explicitly called, False if either header
or index is set to False; otherwise, True. If index_label is
explicitly called by allowed types of input, then input will be
given to index_label. If False, do not print fields
for index names. Use index_label=False for easier importing in R.

.. versionchanged:: 0.25.0

Previously True option is not allowed.

mode : str
Python write mode, default 'w'.
encoding : str, optional
Expand Down
88 changes: 65 additions & 23 deletions pandas/io/formats/csvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,18 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',

self.header = header
self.index = index
self.index_label = index_label

# if index label is not explicitly called, index label is True if
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

blank line here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

# header or index is not False; otherwise, index label is set to False
if index_label is None:
if self.header is False or self.header is None or not self.index:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if self.header is False or self.header is None or not self.index:
if not (self.header or self.index):

Copy link
Member Author

@charlesdong1991 charlesdong1991 Mar 1, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah... thanks @WillAyd ... however, it doesn't seem equal in my test. Because by default, self.index is True, and with if not (self.header or self.index), the result will be always False if self.index uses default value, and it doesn't align to my purpose.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So not self.header or not self.index then?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you update this

self.index_label = self.header or self.index

seems equivalent

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it does seem equivalent though,

        header : bool or list of str, default True
            Write out the column names. If a list of strings is given it is
            assumed to be aliases for the column names.
            .. versionchanged:: 0.24.0

for instance, since from version 0.24.0, it looks like header could be a list, and if a list is given, we don't want to assign this list to index_label

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

try bool(self.header) or self.index then

self.index_label = False
else:
self.index_label = True
else:
# if index label is explicitly called, then use the caller.
self.index_label = index_label

self.mode = mode
if encoding is None:
encoding = 'ascii' if compat.PY2 else 'utf-8'
Expand Down Expand Up @@ -188,6 +199,40 @@ def save(self):
for _fh in handles:
_fh.close()

def _index_label_encoder(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a doc-string

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is _get_column_name_list in format.py, could this be moved into a base class/mixin and reused?

def _get_column_name_list(self):
names = []
columns = self.frame.columns
if isinstance(columns, ABCMultiIndex):
names.extend('' if name is None else name
for name in columns.names)
else:
names.append('' if columns.name is None else columns.name)
return names

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potentially. Let's revisit once tests are passing again.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

future PR though.

"""
Encode index label if it is not False.

Returns
-------
index_label: list
New index_label given index types
encode_labels: list
List of index labels
"""
index_label = self.index_label
obj = self.obj

if index_label is True:
index_label = []
# append index label based on index type
if isinstance(obj.index, ABCMultiIndex):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we not simply do
index_label = list(map(lambda name: name or '', obj.index.names))

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed!

# add empty string is name is None
index_label = list(map(lambda name: name or '',
obj.index.names))
else:
# if no name, use empty string
if obj.index.name is None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't htink you need this branch at all

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think it's needed, and looks like if the branch is removed, lots of tests will fail

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see above the cases for multiindex and index are the same.

index_label.append('')
else:
index_label.append(obj.index.name)
elif not isinstance(index_label,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When does the code go down this branch?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, should be if index_label is True above. @WillAyd

(list, tuple, np.ndarray, ABCIndexClass)):
index_label = [index_label]

encoded_labels = list(index_label)
return index_label, encoded_labels

def _save_header(self):

writer = self.writer
Expand All @@ -200,8 +245,16 @@ def _save_header(self):

has_aliases = isinstance(header, (tuple, list, np.ndarray,
ABCIndexClass))
if not (has_aliases or self.header):
return
if not (has_aliases or header):
# if index_label is False, nothing will display.
if index_label is False:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it matter if this is None?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it depends, if index label is not explicitly called which is None, index label is True if header or index is not False; otherwise, index label is set to False

return
else:
# based on index_label value, encoded labels are given
index_label, encoded_labels = self._index_label_encoder()
encoded_labels.extend([''] * len(obj.columns))
writer.writerow(encoded_labels)
return
if has_aliases:
if len(header) != len(cols):
raise ValueError(('Writing {ncols} cols but got {nalias} '
Expand All @@ -215,27 +268,16 @@ def _save_header(self):
if self.index:
# should write something for index label
if index_label is not False:
if index_label is None:
if isinstance(obj.index, ABCMultiIndex):
index_label = []
for i, name in enumerate(obj.index.names):
if name is None:
name = ''
index_label.append(name)
else:
index_label = obj.index.name
if index_label is None:
index_label = ['']
else:
index_label = [index_label]
elif not isinstance(index_label,
(list, tuple, np.ndarray, ABCIndexClass)):
# given a string for a DF with Index
index_label = [index_label]

encoded_labels = list(index_label)
index_label, encoded_labels = self._index_label_encoder()
else:
encoded_labels = []
# if index is multiindex, multiple empty labels are provided
if isinstance(obj.index, ABCMultiIndex):
index_label = []
index_label.extend([''] * len(obj.index.names))
# if index is single index, list of empty string is provided
else:
index_label = ['']
encoded_labels = list(index_label)

if not has_mi_columns or has_aliases:
encoded_labels += list(write_cols)
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,14 +862,14 @@ def test_to_csv_quote_none(self):
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert result == expected

def test_to_csv_index_no_leading_comma(self):
def test_to_csv_index_leading_comma(self):
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
index=['one', 'two', 'three'])

buf = StringIO()
df.to_csv(buf, index_label=False)
df.to_csv(buf)

expected_rows = ['A,B',
expected_rows = [',A,B',
'one,1,4',
'two,2,5',
'three,3,6']
Expand Down
44 changes: 44 additions & 0 deletions pandas/tests/io/formats/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,3 +537,47 @@ def test_to_csv_compression(self, compression_only,
result = pd.read_csv(path, index_col=0,
compression=read_compression)
tm.assert_frame_equal(result, df)

@pytest.mark.parametrize("header, index_label, expected_rows", [
(False, True, ['index.name,,', '0,0,0', '1,0,0']),
(True, True, ['index.name,0,1', '0,0,0', '1,0,0']),
(False, False, ['0,0,0', '1,0,0']),
(True, False, [',0,1', '0,0,0', '1,0,0']),
(False, None, ['0,0,0', '1,0,0']),
(True, None, ['index.name,0,1', '0,0,0', '1,0,0']),
(True, "new_index", ['new_index,0,1', '0,0,0', '1,0,0']),
(True, ["new_index"], ['new_index,0,1', '0,0,0', '1,0,0'])
])
def test_to_csv_header_single_index(self, header, index_label,
expected_rows):
# GH 24546
df = pd.DataFrame(np.zeros((2, 2), dtype=int))
df.index.name = 'index.name'
df.columns.name = 'columns.name'

result = df.to_csv(header=header, index_label=index_label)
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert result == expected

@pytest.mark.parametrize("header, index_label, expected_rows", [
(False, True, ['index.name.0,index.name.1,,', 'a,b,0,0', 'a,c,0,0']),
(True, True, ['index.name.0,index.name.1,0,1', 'a,b,0,0', 'a,c,0,0']),
(False, False, ['a,b,0,0', 'a,c,0,0']),
(True, False, [',,0,1', 'a,b,0,0', 'a,c,0,0']),
(False, None, ['a,b,0,0', 'a,c,0,0']),
(True, None, ['index.name.0,index.name.1,0,1', 'a,b,0,0', 'a,c,0,0']),
(True, ("index1", "index2"),
['index1,index2,0,1', 'a,b,0,0', 'a,c,0,0']),
(True, ["index1", "index2"],
['index1,index2,0,1', 'a,b,0,0', 'a,c,0,0'])
])
def test_to_csv_header_multi_index(self, header, index_label,
expected_rows):
# GH 24546
df = pd.DataFrame(np.zeros((2, 2), dtype=int))
df.index = pd.MultiIndex.from_product([['a'], ['b', 'c']], names=[
'index.name.0', 'index.name.1'])

result = df.to_csv(header=header, index_label=index_label)
expected = tm.convert_rows_list_to_csv_str(expected_rows)
assert result == expected