Skip to content

Add support of 'decimal' option to Series.to_csv and Dataframe.to_csv #8448

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1169,7 +1169,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
mode='w', nanRep=None, encoding=None, quoting=None,
line_terminator='\n', chunksize=None, engine=None,
tupleize_cols=False, quotechar='"', date_format=None,
doublequote=True, escapechar=None):
doublequote=True, escapechar=None, decimal='.'):

self.engine = engine # remove for 0.13
self.obj = obj
Expand All @@ -1181,6 +1181,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
self.sep = sep
self.na_rep = na_rep
self.float_format = float_format
self.decimal = decimal

self.header = header
self.index = index
Expand Down Expand Up @@ -1509,6 +1510,7 @@ def _save_chunk(self, start_i, end_i):
b = self.blocks[i]
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep,
float_format=self.float_format,
decimal=self.decimal,
date_format=self.date_format)

for col_loc, col in zip(b.mgr_locs, d):
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,7 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
mode='w', encoding=None, quoting=None,
quotechar='"', line_terminator='\n', chunksize=None,
tupleize_cols=False, date_format=None, doublequote=True,
escapechar=None, **kwds):
escapechar=None, decimal='.', **kwds):
r"""Write DataFrame to a comma-separated values (csv) file

Parameters
Expand Down Expand Up @@ -1126,6 +1126,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
date_format : string, default None
Format string for datetime objects
cols : kwarg only alias of columns [deprecated]
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for European data
"""

formatter = fmt.CSVFormatter(self, path_or_buf,
Expand All @@ -1140,7 +1142,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
tupleize_cols=tupleize_cols,
date_format=date_format,
doublequote=doublequote,
escapechar=escapechar)
escapechar=escapechar,
decimal=decimal)
formatter.save()

if path_or_buf is None:
Expand Down
18 changes: 15 additions & 3 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1161,7 +1161,7 @@ def _try_cast(self, element):
except: # pragma: no cover
return element

def to_native_types(self, slicer=None, na_rep='', float_format=None,
def to_native_types(self, slicer=None, na_rep='', float_format=None, decimal='.',
**kwargs):
""" convert to our native types format, slicing if desired """

Expand All @@ -1171,10 +1171,22 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None,
values = np.array(values, dtype=object)
mask = isnull(values)
values[mask] = na_rep
if float_format:


if float_format and decimal != '.':
formatter = lambda v : (float_format % v).replace('.',decimal,1)
elif decimal != '.':
formatter = lambda v : ('%g' % v).replace('.',decimal,1)
elif float_format:
formatter = lambda v : float_format % v
else:
formatter = None

if formatter:
imask = (~mask).ravel()
values.flat[imask] = np.array(
[float_format % val for val in values.ravel()[imask]])
[formatter(val) for val in values.ravel()[imask]])

return values.tolist()

def should_store(self, value):
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2239,7 +2239,7 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
def to_csv(self, path, index=True, sep=",", na_rep='',
float_format=None, header=False,
index_label=None, mode='w', nanRep=None, encoding=None,
date_format=None):
date_format=None, decimal='.'):
"""
Write Series to a comma-separated values (csv) file

Expand Down Expand Up @@ -2267,14 +2267,16 @@ def to_csv(self, path, index=True, sep=",", na_rep='',
non-ascii, for python versions prior to 3
date_format: string, default None
Format string for datetime objects.
decimal: string, default '.'
Character recognized as decimal separator. E.g. use ',' for European data
"""
from pandas.core.frame import DataFrame
df = DataFrame(self)
# result is only a string if no path provided, otherwise None
result = df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
float_format=float_format, header=header,
index_label=index_label, mode=mode, nanRep=nanRep,
encoding=encoding, date_format=date_format)
encoding=encoding, date_format=date_format, decimal=decimal)
if path is None:
return result

Expand Down
17 changes: 16 additions & 1 deletion pandas/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -2343,7 +2343,22 @@ def test_csv_to_string(self):
df = DataFrame({'col' : [1,2]})
expected = ',col\n0,1\n1,2\n'
self.assertEqual(df.to_csv(), expected)


def test_to_csv_decimal(self):
# GH 8448
df = DataFrame({'col1' : [1], 'col2' : ['a'], 'col3' : [10.1] })
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add the issue number as a comment here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've tried to squash the commits as described but I faced the following error:
$git rebase pandas/master
fatal: Needed a single revision
invalid upstream pandas/master

Any idea ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Try this:

git checkout master
git pull pandas/master master
git checkout your-branch
git rebase master

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There should be something special that I do to create my local version...

When I do the command "git checkout master" in my to level directory I get
error: pathspec 'master' did not match any file(s) known to git.

If I do it in the "pandas" directory (the one with the setup.py file) I get:
Already on 'master'
Your branch is up-to-date with 'origin/master'.
but then the second command git pull pandas/master master lead to
fatal: 'pandas/master' does not appear to be a git repository
fatal : Could not read from remote repository

By the way, in my repository, I don't think I've made any branch. I've done
everything in master. Maybe was that not a good idea ?

On Tue, Mar 3, 2015 at 9:31 AM, Stephan Hoyer [email protected]
wrote:

In pandas/tests/test_format.py
#8448 (comment):

@@ -2343,7 +2343,21 @@ def test_csv_to_string(self):
df = DataFrame({'col' : [1,2]})
expected = ',col\n0,1\n1,2\n'

self.assertEqual(df.to_csv(), expected)

  • def test_to_csv_decimal(self):
  •    df = DataFrame({'col1' : [1], 'col2' : ['a'], 'col3' : [10.1] })
    

Try this:

git checkout master
git pull pandas/master master
git checkout your-branch
git rebase master


Reply to this email directly or view it on GitHub
https://github.com/pydata/pandas/pull/8448/files#r25670074.

Bertrand Haut


expected_default = ',col1,col2,col3\n0,1,a,10.1\n'
self.assertEqual(df.to_csv(), expected_default)

expected_european_excel = ';col1;col2;col3\n0;1;a;10,1\n'
self.assertEqual(df.to_csv(decimal=',',sep=';'), expected_european_excel)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a test for the case that also float_format is specified?


expected_float_format_default = ',col1,col2,col3\n0,1,a,10.10\n'
self.assertEqual(df.to_csv(float_format = '%.2f'), expected_float_format_default)

expected_float_format = ';col1;col2;col3\n0;1;a;10,10\n'
self.assertEqual(df.to_csv(decimal=',',sep=';', float_format = '%.2f'), expected_float_format)

class TestSeriesFormatting(tm.TestCase):
_multiprocess_can_split_ = True
Expand Down