From 5b303ec657208e0d23335b8f9c78e9fcddc76c4e Mon Sep 17 00:00:00 2001 From: y-p Date: Tue, 2 Oct 2012 03:54:37 +0200 Subject: [PATCH] ENH: always use UnicodeWriter in Series/DF to_csv() maybe closes #1966 If the input is NOT pure ascii and no encoding is specified, the python stdlib csv module will die. if the input IS pure ascii, then using UnicodeWriter with utf-8 as encoding will produce the same end result as a pure ascii writer. This change will "just work" for more cases. also, presumably, internal representations of all text in pandas will eventually be unicode, so this meshes with that program too. there might be a performance issue for large files (is the python csv native?). If so, I think this still the way to go with the stdlib csv module becoming the optional path. a lot of issues have touched on csv and unicode, see #206,#300,#680,#705,#1966, probably more --- pandas/core/frame.py | 13 +++++-------- pandas/core/series.py | 6 +++--- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7be9f67826497..3150cdf30e560 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1118,7 +1118,7 @@ def _helper_csvexcel(self, writer, na_rep=None, cols=None, def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, - mode='w', nanRep=None, encoding=None, quoting=None): + mode='w', nanRep=None, encoding='utf-8', quoting=None): """ Write DataFrame to a comma-separated values (csv) file @@ -1168,13 +1168,10 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, quoting = csv.QUOTE_MINIMAL try: - if encoding is not None: - csvout = com.UnicodeWriter(f, lineterminator='\n', - delimiter=sep, encoding=encoding, - quoting=quoting) - else: - csvout = csv.writer(f, lineterminator='\n', delimiter=sep, - quoting=quoting) + csvout = com.UnicodeWriter(f, lineterminator='\n', + delimiter=sep, encoding=encoding, + quoting=quoting) + self._helper_csvexcel(csvout, na_rep=na_rep, float_format=float_format, cols=cols, header=header, index=index, diff --git a/pandas/core/series.py b/pandas/core/series.py index 7400aa5bde2e7..6eac2679d5dc7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1151,7 +1151,7 @@ def max(self, axis=None, out=None, skipna=True, level=None): @Substitution(name='standard deviation', shortname='stdev', na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc + + @Appender(_stat_doc + """ Normalized by N-1 (unbiased estimator). """) @@ -1164,7 +1164,7 @@ def std(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, @Substitution(name='variance', shortname='var', na_action=_doc_exclude_na, extras='') - @Appender(_stat_doc + + @Appender(_stat_doc + """ Normalized by N-1 (unbiased estimator). """) @@ -2419,7 +2419,7 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None, def to_csv(self, path, index=True, sep=",", na_rep='', float_format=None, header=False, - index_label=None, mode='w', nanRep=None, encoding=None): + index_label=None, mode='w', nanRep=None, encoding='utf-8'): """ Write Series to a comma-separated values (csv) file