Skip to content

Commit 1f138a4

Browse files
author
y-p
committed
BUG: to_csv should allow writing of dupe cols if within same block GH3095
closes #3095
1 parent ad082bc commit 1f138a4

File tree

2 files changed

+128
-108
lines changed

2 files changed

+128
-108
lines changed

pandas/core/format.py

+23-11
Original file line numberDiff line numberDiff line change
@@ -803,11 +803,20 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
803803
ncols = sum(len(b.items) for b in self.blocks)
804804
self.data =[None] * ncols
805805

806-
# fail early if we have duplicate columns
807-
if len(set(self.cols)) != len(self.cols):
808-
raise Exception("duplicate columns are not permitted in to_csv")
806+
if self.obj.columns.is_unique:
807+
self.colname_map = dict((k,i) for i,k in enumerate(obj.columns))
808+
else:
809+
ks = [set(x.items) for x in self.blocks]
810+
u = len(reduce(lambda a,x: a.union(x),ks,set()))
811+
t = sum(map(len,ks))
812+
if u != t:
813+
if len(set(self.cols)) != len(self.cols):
814+
raise NotImplementedError("duplicate columns with differing dtypes are unsupported")
815+
else:
816+
# if columns are not unique and we acces this,
817+
# we're doing it wrong
818+
pass
809819

810-
self.colname_map = dict((k,i) for i,k in enumerate(obj.columns))
811820

812821
if chunksize is None:
813822
chunksize = (100000/ (len(self.cols) or 1)) or 1
@@ -1002,17 +1011,20 @@ def _save(self):
10021011

10031012
def _save_chunk(self, start_i, end_i):
10041013

1005-
colname_map = self.colname_map
10061014
data_index = self.data_index
10071015

10081016
# create the data for a chunk
10091017
slicer = slice(start_i,end_i)
1010-
for i in range(len(self.blocks)):
1011-
b = self.blocks[i]
1012-
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
1013-
for j, k in enumerate(b.items):
1014-
# self.data is a preallocated list
1015-
self.data[colname_map[k]] = d[j]
1018+
if self.obj.columns.is_unique:
1019+
for i in range(len(self.blocks)):
1020+
b = self.blocks[i]
1021+
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
1022+
for j, k in enumerate(b.items):
1023+
# self.data is a preallocated list
1024+
self.data[self.colname_map[k]] = d[j]
1025+
else:
1026+
for i in range(len(self.cols)):
1027+
self.data[i] = self.obj.icol(i).values[slicer].tolist()
10161028

10171029
ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
10181030

0 commit comments

Comments
 (0)