Skip to content

ENH: Allow comment lines to be written to CSV outputs. Complements read_csv's comment param #53569

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
19 changes: 19 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Callable,
ClassVar,
Hashable,
Iterable,
Iterator,
Literal,
Mapping,
Expand Down Expand Up @@ -3604,6 +3605,8 @@ def to_csv(
decimal: str = ...,
errors: OpenFileErrors = ...,
storage_options: StorageOptions = ...,
comment: str | None = ...,
comment_lines: Iterable[str] | None = ...,
) -> str:
...

Expand Down Expand Up @@ -3631,6 +3634,8 @@ def to_csv(
decimal: str = ...,
errors: OpenFileErrors = ...,
storage_options: StorageOptions = ...,
comment: str | None = ...,
comment_lines: Iterable[str] | None = ...,
) -> None:
...

Expand Down Expand Up @@ -3662,6 +3667,8 @@ def to_csv(
decimal: str = ".",
errors: OpenFileErrors = "strict",
storage_options: StorageOptions = None,
comment: str | None = None,
comment_lines: Iterable[str] | None = None,
) -> str | None:
r"""
Write object to a comma-separated values (csv) file.
Expand Down Expand Up @@ -3767,6 +3774,16 @@ def to_csv(

.. versionadded:: 1.2.0

comment : str, default None
Prefix which should be written to lines preceding the body
of an output csv. These lines can be used for comments or
metadata which are not part of the csv data itself. Complement
of pd.read_csv 'comment' param.
comment_lines : Iterable['str'], default None
Comment or metadata lines to write to the beginning of the csv
file. Each item is a row and will be prefixed with the character in
the 'comment' param.

Returns
-------
None or str
Expand Down Expand Up @@ -3833,6 +3850,8 @@ def to_csv(
doublequote=doublequote,
escapechar=escapechar,
storage_options=storage_options,
comment=comment,
comment_lines=comment_lines,
)

# ----------------------------------------------------------------------
Expand Down
12 changes: 12 additions & 0 deletions pandas/io/formats/csvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
TYPE_CHECKING,
Any,
Hashable,
Iterable,
Iterator,
Sequence,
cast,
Expand Down Expand Up @@ -67,6 +68,8 @@ def __init__(
doublequote: bool = True,
escapechar: str | None = None,
storage_options: StorageOptions = None,
comment: str | None = None,
comment_lines: Iterable[str] | None = None,
) -> None:
self.fmt = formatter

Expand All @@ -89,6 +92,8 @@ def __init__(
self.date_format = date_format
self.cols = self._initialize_columns(cols)
self.chunksize = self._initialize_chunksize(chunksize)
self.comment = comment
self.comment_lines = comment_lines

@property
def na_rep(self) -> str:
Expand Down Expand Up @@ -260,6 +265,8 @@ def save(self) -> None:
self._save()

def _save(self) -> None:
if self.comment:
self._save_comment_lines()
if self._need_to_save_header:
self._save_header()
self._save_body()
Expand Down Expand Up @@ -318,3 +325,8 @@ def _save_chunk(self, start_i: int, end_i: int) -> None:
self.cols,
self.writer,
)

def _save_comment_lines(self) -> None:
if self.comment_lines:
for line in self.comment_lines:
self.writer.writerow([f"{self.comment}" + line])
4 changes: 4 additions & 0 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1117,6 +1117,8 @@ def to_csv(
escapechar: str | None = None,
errors: str = "strict",
storage_options: StorageOptions = None,
comment: str | None = None,
comment_lines: Iterable[str] | None = None,
) -> str | None:
"""
Render dataframe as comma-separated file.
Expand Down Expand Up @@ -1147,6 +1149,8 @@ def to_csv(
escapechar=escapechar,
storage_options=storage_options,
formatter=self.fmt,
comment=comment,
comment_lines=comment_lines,
)
csv_formatter.save()

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/io/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ def salaries_table(datapath):
return read_csv(datapath("io", "parser", "data", "salaries.csv"), sep="\t")


@pytest.fixture
def salaries_table_comments(datapath):
return read_csv(
datapath("io", "parser", "data", "salaries_comments.csv"), sep="\t", comment="#"
)


@pytest.fixture
def feather_file(datapath):
return datapath("io", "data", "feather", "feather-0_3_1.feather")
Expand Down
50 changes: 50 additions & 0 deletions pandas/tests/io/parser/data/salaries_comments.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#line one
#line_two
#three lines_hello_world
S X E M
13876 1 1 1
11608 1 3 0
18701 1 3 1
11283 1 2 0
11767 1 3 0
20872 2 2 1
11772 2 2 0
10535 2 1 0
12195 2 3 0
12313 3 2 0
14975 3 1 1
21371 3 2 1
19800 3 3 1
11417 4 1 0
20263 4 3 1
13231 4 3 0
12884 4 2 0
13245 5 2 0
13677 5 3 0
15965 5 1 1
12336 6 1 0
21352 6 3 1
13839 6 2 0
22884 6 2 1
16978 7 1 1
14803 8 2 0
17404 8 1 1
22184 8 3 1
13548 8 1 0
14467 10 1 0
15942 10 2 0
23174 10 3 1
23780 10 2 1
25410 11 2 1
14861 11 1 0
16882 12 2 0
24170 12 3 1
15990 13 1 0
26330 13 2 1
17949 14 2 0
25685 15 3 1
27837 16 2 1
18838 16 2 0
17483 16 1 0
19207 17 2 0
19346 20 1 0
21 changes: 21 additions & 0 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
import codecs
import errno
import filecmp
from functools import partial
from io import (
BytesIO,
Expand Down Expand Up @@ -620,3 +621,23 @@ def test_pickle_reader(reader):
# GH 22265
with BytesIO() as buffer:
pickle.dump(reader, buffer)


def test_comment_writer(salaries_table, salaries_table_comments, datapath):
comment = "#"
comment_lines = ["line one", "line_two", "three lines_hello_world"]
tm.assert_frame_equal(salaries_table, salaries_table_comments)
with tm.ensure_clean() as path:
# Check commented table can be read and matches non-commented version
tm.assert_frame_equal(salaries_table, salaries_table_comments)

# Write comments on uncommented table then validate
salaries_table.to_csv(
path, sep="\t", comment=comment, comment_lines=comment_lines, index=False
)

assert filecmp.cmp(
path,
datapath("io", "parser", "data", "salaries_comments.csv"),
shallow=False,
), "Generated csv file with comments does not match expectation"