Skip to content

Commit 0bd5e77

Browse files
author
y-p
committed
Merge pull request #3458 from y-p/GH3454
BUG: to_csv handles cols= reordering,dupe cols GH3454
2 parents 67ad556 + d5d9534 commit 0bd5e77

File tree

3 files changed

+77
-6
lines changed

3 files changed

+77
-6
lines changed

RELEASE.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ pandas 0.12.0
4848
- Fixed an esoteric excel reading bug, xlrd>= 0.9.0 now required for excel
4949
support. Should provide python3 support (for reading) which has been
5050
lacking. (GH3164_)
51-
- Fix to_csv issue when having a large number of rows and ``NaT`` in some
51+
- Addressed handling of dupe columns in df.to_csv new and old (GH3454_, GH3457_)
52+
- Fix to_csv issue when having a large number of rows and ``NaT`` in some
5253
columns (GH3437_)
5354
- ``.loc`` was not raising when passed an integer list (GH3449_)
5455
- Unordered time series selection was misbehaving when using label slicing (GH3448_)
@@ -57,6 +58,8 @@ pandas 0.12.0
5758
.. _GH3164: https://github.com/pydata/pandas/issues/3164
5859
.. _GH3251: https://github.com/pydata/pandas/issues/3251
5960
.. _GH3379: https://github.com/pydata/pandas/issues/3379
61+
.. _GH3454: https://github.com/pydata/pandas/issues/3454
62+
.. _GH3457: https://github.com/pydata/pandas/issues/3457
6063
.. _GH3038: https://github.com/pydata/pandas/issues/3038
6164
.. _GH3437: https://github.com/pydata/pandas/issues/3437
6265
.. _GH3455: https://github.com/pydata/pandas/issues/3455

pandas/core/format.py

+21-4
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,7 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
772772

773773
self.engine = engine # remove for 0.12
774774
self.obj = obj
775+
775776
self.path_or_buf = path_or_buf
776777
self.sep = sep
777778
self.na_rep = na_rep
@@ -789,13 +790,27 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
789790

790791
self.line_terminator = line_terminator
791792

792-
if cols is None:
793-
cols = obj.columns
793+
#GH3457
794+
if not self.obj.columns.is_unique and engine == 'python':
795+
msg= "columns.is_unique == False not supported with engine='python'"
796+
raise NotImplementedError(msg)
794797

798+
if cols is not None:
799+
if isinstance(cols,Index):
800+
cols = cols.to_native_types(na_rep=na_rep,float_format=float_format)
801+
else:
802+
cols=list(cols)
803+
self.obj = self.obj.loc[:,cols]
804+
805+
# update columns to include possible multiplicity of dupes
806+
# and make sure sure cols is just a list of labels
807+
cols = self.obj.columns
795808
if isinstance(cols,Index):
796809
cols = cols.to_native_types(na_rep=na_rep,float_format=float_format)
797810
else:
798811
cols=list(cols)
812+
813+
# save it
799814
self.cols = cols
800815

801816
# preallocate data 2d list
@@ -804,7 +819,7 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
804819
self.data =[None] * ncols
805820

806821
if self.obj.columns.is_unique:
807-
self.colname_map = dict((k,i) for i,k in enumerate(obj.columns))
822+
self.colname_map = dict((k,i) for i,k in enumerate(self.obj.columns))
808823
else:
809824
ks = [set(x.items) for x in self.blocks]
810825
u = len(reduce(lambda a,x: a.union(x),ks,set()))
@@ -1024,7 +1039,9 @@ def _save_chunk(self, start_i, end_i):
10241039
# self.data is a preallocated list
10251040
self.data[self.colname_map[k]] = d[j]
10261041
else:
1027-
for i in range(len(self.cols)):
1042+
# self.obj should contain a proper view of the dataframes
1043+
# with the specified ordering of cols if cols was specified
1044+
for i in range(len(self.obj.columns)):
10281045
self.data[i] = self.obj.icol(i).values[slicer].tolist()
10291046

10301047
ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)

pandas/tests/test_frame.py

+52-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from pandas.util.testing import (assert_almost_equal,
2929
assert_series_equal,
3030
assert_frame_equal,
31+
makeCustomDataframe as mkdf,
3132
ensure_clean)
3233
from pandas.util import py3compat
3334
from pandas.util.compat import OrderedDict
@@ -4621,9 +4622,59 @@ def test_to_csv_from_csv(self):
46214622
xp.columns = map(int,xp.columns)
46224623
assert_frame_equal(xp,rs)
46234624

4625+
def test_to_csv_cols_reordering(self):
4626+
# GH3454
4627+
import pandas as pd
4628+
4629+
def _check_df(df,cols=None):
4630+
with ensure_clean() as path:
4631+
df.to_csv(path,cols = cols,engine='python')
4632+
rs_p = pd.read_csv(path,index_col=0)
4633+
df.to_csv(path,cols = cols,chunksize=chunksize)
4634+
rs_c = pd.read_csv(path,index_col=0)
4635+
4636+
if cols:
4637+
df = df[cols]
4638+
assert (rs_c.columns==rs_p.columns).all()
4639+
assert_frame_equal(df,rs_c,check_names=False)
4640+
4641+
chunksize=5
4642+
N = int(chunksize*2.5)
4643+
4644+
df= mkdf(N, 3)
4645+
cs = df.columns
4646+
cols = [cs[2],cs[0]]
4647+
_check_df(df,cols)
4648+
4649+
def test_to_csv_legacy_raises_on_dupe_cols(self):
4650+
df= mkdf(10, 3)
4651+
df.columns = ['a','a','b']
4652+
with ensure_clean() as path:
4653+
self.assertRaises(NotImplementedError,df.to_csv,path,engine='python')
4654+
4655+
def test_to_csv_new_dupe_cols(self):
4656+
import pandas as pd
4657+
def _check_df(df,cols=None):
4658+
with ensure_clean() as path:
4659+
df.to_csv(path,cols = cols,chunksize=chunksize)
4660+
rs_c = pd.read_csv(path,index_col=0)
4661+
rs_c.columns = df.columns
4662+
assert_frame_equal(df,rs_c,check_names=False)
4663+
4664+
chunksize=5
4665+
N = int(chunksize*2.5)
4666+
4667+
# dupe cols
4668+
df= mkdf(N, 3)
4669+
df.columns = ['a','a','b']
4670+
_check_df(df,None)
4671+
4672+
# dupe cols with selection
4673+
cols = ['b','a']
4674+
_check_df(df,cols)
4675+
46244676
@slow
46254677
def test_to_csv_moar(self):
4626-
from pandas.util.testing import makeCustomDataframe as mkdf
46274678
path = '__tmp_to_csv_moar__'
46284679

46294680
def _do_test(df,path,r_dtype=None,c_dtype=None,rnlvl=None,cnlvl=None,

0 commit comments

Comments
 (0)