diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 91083f4018c06..b1e4e114c1833 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -15,6 +15,7 @@ Callable, ClassVar, Hashable, + Iterable, Iterator, Literal, Mapping, @@ -3604,6 +3605,8 @@ def to_csv( decimal: str = ..., errors: OpenFileErrors = ..., storage_options: StorageOptions = ..., + comment: str | None = ..., + comment_lines: Iterable[str] | None = ..., ) -> str: ... @@ -3631,6 +3634,8 @@ def to_csv( decimal: str = ..., errors: OpenFileErrors = ..., storage_options: StorageOptions = ..., + comment: str | None = ..., + comment_lines: Iterable[str] | None = ..., ) -> None: ... @@ -3662,6 +3667,8 @@ def to_csv( decimal: str = ".", errors: OpenFileErrors = "strict", storage_options: StorageOptions = None, + comment: str | None = None, + comment_lines: Iterable[str] | None = None, ) -> str | None: r""" Write object to a comma-separated values (csv) file. @@ -3767,6 +3774,16 @@ def to_csv( .. versionadded:: 1.2.0 + comment : str, default None + Prefix which should be written to lines preceding the body + of an output csv. These lines can be used for comments or + metadata which are not part of the csv data itself. Complement + of pd.read_csv 'comment' param. + comment_lines : Iterable['str'], default None + Comment or metadata lines to write to the beginning of the csv + file. Each item is a row and will be prefixed with the character in + the 'comment' param. + Returns ------- None or str @@ -3833,6 +3850,8 @@ def to_csv( doublequote=doublequote, escapechar=escapechar, storage_options=storage_options, + comment=comment, + comment_lines=comment_lines, ) # ---------------------------------------------------------------------- diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 672f7c1f71b15..f1c05bbe76327 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -10,6 +10,7 @@ TYPE_CHECKING, Any, Hashable, + Iterable, Iterator, Sequence, cast, @@ -67,6 +68,8 @@ def __init__( doublequote: bool = True, escapechar: str | None = None, storage_options: StorageOptions = None, + comment: str | None = None, + comment_lines: Iterable[str] | None = None, ) -> None: self.fmt = formatter @@ -89,6 +92,8 @@ def __init__( self.date_format = date_format self.cols = self._initialize_columns(cols) self.chunksize = self._initialize_chunksize(chunksize) + self.comment = comment + self.comment_lines = comment_lines @property def na_rep(self) -> str: @@ -260,6 +265,8 @@ def save(self) -> None: self._save() def _save(self) -> None: + if self.comment: + self._save_comment_lines() if self._need_to_save_header: self._save_header() self._save_body() @@ -318,3 +325,8 @@ def _save_chunk(self, start_i: int, end_i: int) -> None: self.cols, self.writer, ) + + def _save_comment_lines(self) -> None: + if self.comment_lines: + for line in self.comment_lines: + self.writer.writerow([f"{self.comment}" + line]) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index a425944647b5c..96e40304ace39 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1117,6 +1117,8 @@ def to_csv( escapechar: str | None = None, errors: str = "strict", storage_options: StorageOptions = None, + comment: str | None = None, + comment_lines: Iterable[str] | None = None, ) -> str | None: """ Render dataframe as comma-separated file. @@ -1147,6 +1149,8 @@ def to_csv( escapechar=escapechar, storage_options=storage_options, formatter=self.fmt, + comment=comment, + comment_lines=comment_lines, ) csv_formatter.save() diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index b863e85cae457..b84a18538308f 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -36,6 +36,13 @@ def salaries_table(datapath): return read_csv(datapath("io", "parser", "data", "salaries.csv"), sep="\t") +@pytest.fixture +def salaries_table_comments(datapath): + return read_csv( + datapath("io", "parser", "data", "salaries_comments.csv"), sep="\t", comment="#" + ) + + @pytest.fixture def feather_file(datapath): return datapath("io", "data", "feather", "feather-0_3_1.feather") diff --git a/pandas/tests/io/parser/data/salaries_comments.csv b/pandas/tests/io/parser/data/salaries_comments.csv new file mode 100644 index 0000000000000..3384ce64c4dd0 --- /dev/null +++ b/pandas/tests/io/parser/data/salaries_comments.csv @@ -0,0 +1,50 @@ +#line one +#line_two +#three lines_hello_world +S X E M +13876 1 1 1 +11608 1 3 0 +18701 1 3 1 +11283 1 2 0 +11767 1 3 0 +20872 2 2 1 +11772 2 2 0 +10535 2 1 0 +12195 2 3 0 +12313 3 2 0 +14975 3 1 1 +21371 3 2 1 +19800 3 3 1 +11417 4 1 0 +20263 4 3 1 +13231 4 3 0 +12884 4 2 0 +13245 5 2 0 +13677 5 3 0 +15965 5 1 1 +12336 6 1 0 +21352 6 3 1 +13839 6 2 0 +22884 6 2 1 +16978 7 1 1 +14803 8 2 0 +17404 8 1 1 +22184 8 3 1 +13548 8 1 0 +14467 10 1 0 +15942 10 2 0 +23174 10 3 1 +23780 10 2 1 +25410 11 2 1 +14861 11 1 0 +16882 12 2 0 +24170 12 3 1 +15990 13 1 0 +26330 13 2 1 +17949 14 2 0 +25685 15 3 1 +27837 16 2 1 +18838 16 2 0 +17483 16 1 0 +19207 17 2 0 +19346 20 1 0 diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 435b9bdade944..2136a0275dd00 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -3,6 +3,7 @@ """ import codecs import errno +import filecmp from functools import partial from io import ( BytesIO, @@ -620,3 +621,23 @@ def test_pickle_reader(reader): # GH 22265 with BytesIO() as buffer: pickle.dump(reader, buffer) + + +def test_comment_writer(salaries_table, salaries_table_comments, datapath): + comment = "#" + comment_lines = ["line one", "line_two", "three lines_hello_world"] + tm.assert_frame_equal(salaries_table, salaries_table_comments) + with tm.ensure_clean() as path: + # Check commented table can be read and matches non-commented version + tm.assert_frame_equal(salaries_table, salaries_table_comments) + + # Write comments on uncommented table then validate + salaries_table.to_csv( + path, sep="\t", comment=comment, comment_lines=comment_lines, index=False + ) + + assert filecmp.cmp( + path, + datapath("io", "parser", "data", "salaries_comments.csv"), + shallow=False, + ), "Generated csv file with comments does not match expectation"