Skip to content

Add support of 'decimal' option to Series.to_csv and Dataframe.to_csv #8448

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1169,7 +1169,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
mode='w', nanRep=None, encoding=None, quoting=None,
line_terminator='\n', chunksize=None, engine=None,
tupleize_cols=False, quotechar='"', date_format=None,
doublequote=True, escapechar=None):
doublequote=True, escapechar=None, decimal='.'):

self.engine = engine # remove for 0.13
self.obj = obj
Expand All @@ -1181,6 +1181,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
self.sep = sep
self.na_rep = na_rep
self.float_format = float_format
self.decimal = decimal

self.header = header
self.index = index
Expand Down Expand Up @@ -1509,6 +1510,7 @@ def _save_chunk(self, start_i, end_i):
b = self.blocks[i]
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep,
float_format=self.float_format,
decimal=self.decimal,
date_format=self.date_format)

for col_loc, col in zip(b.mgr_locs, d):
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
mode='w', encoding=None, quoting=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=False, date_format=None, doublequote=True,
escapechar=None, **kwds):
escapechar=None, decimal='.', **kwds):
r"""Write DataFrame to a comma-separated values (csv) file

Parameters
Expand Down Expand Up @@ -1126,6 +1126,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
date_format : string, default None
Format string for datetime objects
cols : kwarg only alias of columns [deprecated]
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ‘,’ for European data
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you use standard single quotes here? ' instead of

"""

formatter = fmt.CSVFormatter(self, path_or_buf,
Expand All @@ -1140,7 +1142,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
escapechar=escapechar)
escapechar=escapechar,
decimal=decimal)
formatter.save()

if path_or_buf is None:
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1161,7 +1161,7 @@ def _try_cast(self, element):
except: # pragma: no cover
return element

def to_native_types(self, slicer=None, na_rep='', float_format=None,
def to_native_types(self, slicer=None, na_rep='', float_format=None, decimal='.',
**kwargs):
""" convert to our native types format, slicing if desired """

Expand All @@ -1171,10 +1171,17 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None,
values = np.array(values, dtype=object)
mask = isnull(values)
values[mask] = na_rep
if not float_format and decimal != '.':
float_format = '%f'
if float_format:
imask = (~mask).ravel()
values.flat[imask] = np.array(
[float_format % val for val in values.ravel()[imask]])
if decimal != '.':
imask = (~mask).ravel()
values.flat[imask] = np.array(
[val.replace('.',',',1) for val in values.ravel()[imask]])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you write this in a way that avoids redundant code? e.g., define formatter = lambda x: x.replace('.', decimal, 1) depending on the desired formatting and then use the function

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New proposal submited


return values.tolist()

def should_store(self, value):
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2239,7 +2239,7 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
def to_csv(self, path, index=True, sep=",", na_rep='',
float_format=None, header=False,
index_label=None, mode='w', nanRep=None, encoding=None,
date_format=None):
date_format=None, decimal='.'):
"""
Write Series to a comma-separated values (csv) file

Expand Down Expand Up @@ -2267,14 +2267,16 @@ def to_csv(self, path, index=True, sep=",", na_rep='',
non-ascii, for python versions prior to 3
date_format: string, default None
Format string for datetime objects.
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ‘,’ for European data
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same remark here

"""
from pandas.core.frame import DataFrame
df = DataFrame(self)
# result is only a string if no path provided, otherwise None
result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
float_format=float_format, header=header,
index_label=index_label, mode=mode, nanRep=nanRep,
encoding=encoding, date_format=date_format)
encoding=encoding, date_format=date_format, decimal=decimal)
if path is None:
return result

Expand Down