From 4a7c69486d3bec755a703215312c007c0e771e3c Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Wed, 22 Aug 2018 21:28:14 -0400 Subject: [PATCH 1/3] DOC: Fix DataFrame.to_csv docstring and add an example. GH22459 --- pandas/core/generic.py | 86 +++++++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 35 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0e5204fcd6524..da489fc25ab5c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9360,7 +9360,11 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, quotechar='"', line_terminator='\n', chunksize=None, tupleize_cols=None, date_format=None, doublequote=True, escapechar=None, decimal='.'): - r"""Write object to a comma-separated values (csv) file + r""" + Write object to a comma-separated values (csv) file. + + .. versionchanged:: 0.24.0 + The order of arguments for Series was changed. Parameters ---------- @@ -9372,68 +9376,80 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, sep : character, default ',' Field delimiter for the output file. na_rep : string, default '' - Missing data representation + Missing data representation. float_format : string, default None - Format string for floating point numbers + Format string for floating point numbers. columns : sequence, optional - Columns to write + Columns to write. header : boolean or list of string, default True Write out the column names. If a list of strings is given it is - assumed to be aliases for the column names + assumed to be aliases for the column names. .. versionchanged:: 0.24.0 Previously defaulted to False for Series. index : boolean, default True - Write row names (index) + Write row names (index). index_label : string or sequence, or False, default None Column label for index column(s) if desired. If None is given, and `header` and `index` are True, then the index names are used. A sequence should be given if the object uses MultiIndex. If False do not print fields for index names. Use index_label=False - for easier importing in R + for easier importing in R. mode : str - Python write mode, default 'w' + Python write mode, default 'w'. encoding : string, optional A string representing the encoding to use in the output file, defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. - compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, - default 'infer' - If 'infer' and `path_or_buf` is path-like, then detect compression - from the following extensions: '.gz', '.bz2', '.zip' or '.xz' - (otherwise no compression). - + compression : string, default 'infer' + Compression mode among the following possible values: {'infer', + 'gzip', 'bz2', 'zip', 'xz', None}. If 'infer' and `path_or_buf` + is path-like, then detect compression from the following + extensions: '.gz', '.bz2', '.zip' or '.xz'. (otherwise no + compression). .. versionchanged:: 0.24.0 - 'infer' option added and set to default - line_terminator : string, default ``'\n'`` - The newline character or character sequence to use in the output - file + 'infer' option added and set to default. quoting : optional constant from csv module - defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` + Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` then floats are converted to strings and thus csv.QUOTE_NONNUMERIC - will treat them as non-numeric + will treat them as non-numeric. quotechar : string (length 1), default '\"' - character used to quote fields - doublequote : boolean, default True - Control quoting of `quotechar` inside a field - escapechar : string (length 1), default None - character used to escape `sep` and `quotechar` when appropriate + Character used to quote fields. + line_terminator : string, default ``'\n'`` + The newline character or character sequence to use in the output + file. chunksize : int or None - rows to write at a time + Rows to write at a time. tupleize_cols : boolean, default False - .. deprecated:: 0.21.0 - This argument will be removed and will always write each row - of the multi-index as a separate row in the CSV file. - Write MultiIndex columns as a list of tuples (if True) or in the new, expanded format, where each MultiIndex column is a row in the CSV (if False). + .. deprecated:: 0.21.0 + This argument will be removed and will always write each row + of the multi-index as a separate row in the CSV file. date_format : string, default None - Format string for datetime objects - decimal: string, default '.' + Format string for datetime objects. + doublequote : boolean, default True + Control quoting of `quotechar` inside a field. + escapechar : string (length 1), default None + Character used to escape `sep` and `quotechar` when appropriate. + decimal : string, default '.' Character recognized as decimal separator. E.g. use ',' for - European data + European data. - .. versionchanged:: 0.24.0 - The order of arguments for Series was changed. + Returns + ------- + If path_or_buf is None, returns the resulting csv format as a string. + Otherwise returns None. + + See Also + -------- + pandas.read_csv : load a CSV file into a DataFrame + + Examples + -------- + + >>> df = pd.DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]}) + >>> df.to_csv(decimal=',', sep=';', float_format='%.2f', index=False) + 'col1;col2;col3\n1;a;10,10\n' """ df = self if isinstance(self, ABCDataFrame) else self.to_frame() From b939239e706b7e815b41e6f47653b98e267e27ca Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Thu, 23 Aug 2018 11:48:40 -0400 Subject: [PATCH 2/3] Use standard Python types, add a better example and further See Also section to DataFrame.to_csv docstring --- pandas/core/generic.py | 50 ++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index da489fc25ab5c..5bfdde47971bf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9368,38 +9368,38 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, Parameters ---------- - path_or_buf : string or file handle, default None + path_or_buf : str or file handle, default None File path or object, if None is provided the result is returned as a string. .. versionchanged:: 0.24.0 Was previously named "path" for Series. - sep : character, default ',' - Field delimiter for the output file. - na_rep : string, default '' + sep : str, default ',' + String of length 1. Field delimiter for the output file. + na_rep : str, default '' Missing data representation. - float_format : string, default None + float_format : str, default None Format string for floating point numbers. columns : sequence, optional Columns to write. - header : boolean or list of string, default True + header : bool or list of str, default True Write out the column names. If a list of strings is given it is assumed to be aliases for the column names. .. versionchanged:: 0.24.0 Previously defaulted to False for Series. - index : boolean, default True + index : bool, default True Write row names (index). - index_label : string or sequence, or False, default None + index_label : str or sequence, or False, default None Column label for index column(s) if desired. If None is given, and `header` and `index` are True, then the index names are used. A - sequence should be given if the object uses MultiIndex. If + sequence should be given if the object uses MultiIndex. If False do not print fields for index names. Use index_label=False for easier importing in R. mode : str Python write mode, default 'w'. - encoding : string, optional + encoding : str, optional A string representing the encoding to use in the output file, defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. - compression : string, default 'infer' + compression : str, default 'infer' Compression mode among the following possible values: {'infer', 'gzip', 'bz2', 'zip', 'xz', None}. If 'infer' and `path_or_buf` is path-like, then detect compression from the following @@ -9411,27 +9411,28 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` then floats are converted to strings and thus csv.QUOTE_NONNUMERIC will treat them as non-numeric. - quotechar : string (length 1), default '\"' - Character used to quote fields. + quotechar : str, default '\"' + String of length 1. Character used to quote fields. line_terminator : string, default ``'\n'`` The newline character or character sequence to use in the output file. chunksize : int or None Rows to write at a time. - tupleize_cols : boolean, default False + tupleize_cols : bool, default False Write MultiIndex columns as a list of tuples (if True) or in the new, expanded format, where each MultiIndex column is a row in the CSV (if False). .. deprecated:: 0.21.0 This argument will be removed and will always write each row of the multi-index as a separate row in the CSV file. - date_format : string, default None + date_format : str, default None Format string for datetime objects. - doublequote : boolean, default True + doublequote : bool, default True Control quoting of `quotechar` inside a field. - escapechar : string (length 1), default None - Character used to escape `sep` and `quotechar` when appropriate. - decimal : string, default '.' + escapechar : str, default None + String of length 1. Character used to escape `sep` and `quotechar` + when appropriate. + decimal : str, default '.' Character recognized as decimal separator. E.g. use ',' for European data. @@ -9442,14 +9443,15 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, See Also -------- - pandas.read_csv : load a CSV file into a DataFrame + pandas.read_csv : Load a CSV file into a DataFrame. + pandas.to_excel: Load an Excel file into a DataFrame. Examples -------- - - >>> df = pd.DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]}) - >>> df.to_csv(decimal=',', sep=';', float_format='%.2f', index=False) - 'col1;col2;col3\n1;a;10,10\n' + >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'], + ... 'mask': ['red', 'purple'], 'weapon': ['sai', 'bo staff']}) + >>> df.to_csv(index=False) + 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n' """ df = self if isinstance(self, ABCDataFrame) else self.to_frame() From b33d42f6c2686ccca78c8ae3745ac7a90d43f89a Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Thu, 23 Aug 2018 12:15:11 -0400 Subject: [PATCH 3/3] Add return type and indent example of DataFrame.to_csv docstring. --- pandas/core/generic.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5bfdde47971bf..a789c80fc38da 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9438,8 +9438,9 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, Returns ------- - If path_or_buf is None, returns the resulting csv format as a string. - Otherwise returns None. + None or str + If path_or_buf is None, returns the resulting csv format as a + string. Otherwise returns None. See Also -------- @@ -9449,7 +9450,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, Examples -------- >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'], - ... 'mask': ['red', 'purple'], 'weapon': ['sai', 'bo staff']}) + ... 'mask': ['red', 'purple'], + ... 'weapon': ['sai', 'bo staff']}) >>> df.to_csv(index=False) 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n' """