Skip to content

Commit b8382a3

Browse files
committed
BUG: GH3495 change core/format/CSVFormatter.save to allow generic way of dealing
with columns duplicate or not
1 parent b4677c1 commit b8382a3

File tree

5 files changed

+198
-119
lines changed

5 files changed

+198
-119
lines changed

RELEASE.rst

+5-5
Original file line numberDiff line numberDiff line change
@@ -61,16 +61,15 @@ pandas 0.11.1
6161
- Fix regression in a DataFrame apply with axis=1, objects were not being converted back
6262
to base dtypes correctly (GH3480_)
6363
- Fix issue when storing uint dtypes in an HDFStore. (GH3493_)
64-
- Fix assigning a new index to a duplicate index in a DataFrame would fail (GH3468_)
65-
- ref_locs support to allow duplicative indices across dtypes (GH3468_)
6664
- Non-unique index support clarified (GH3468_)
6765

68-
- Fix assigning a new index to a duplicate index in a DataFrame would fail
66+
- Fix assigning a new index to a duplicate index in a DataFrame would fail (GH3468_)
6967
- Fix construction of a DataFrame with a duplicate index
70-
- ref_locs support to allow duplicative indices across dtypes
71-
(GH2194_)
68+
- ref_locs support to allow duplicative indices across dtypes,
69+
allows iget support to always find the index (even across dtypes) (GH2194_)
7270
- applymap on a DataFrame with a non-unique index now works
7371
(removed warning) (GH2786_), and fix (GH3230_)
72+
- Fix to_csv to handle non-unique columns (GH3495_)
7473

7574
.. _GH3164: https://github.com/pydata/pandas/issues/3164
7675
.. _GH2786: https://github.com/pydata/pandas/issues/2786
@@ -91,6 +90,7 @@ pandas 0.11.1
9190
.. _GH3468: https://github.com/pydata/pandas/issues/3468
9291
.. _GH3448: https://github.com/pydata/pandas/issues/3448
9392
.. _GH3449: https://github.com/pydata/pandas/issues/3449
93+
.. _GH3495: https://github.com/pydata/pandas/issues/3495
9494
.. _GH3493: https://github.com/pydata/pandas/issues/3493
9595

9696

pandas/core/format.py

+8-27
Original file line numberDiff line numberDiff line change
@@ -820,21 +820,7 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
820820
self.blocks = self.obj._data.blocks
821821
ncols = sum(len(b.items) for b in self.blocks)
822822
self.data =[None] * ncols
823-
824-
if self.obj.columns.is_unique:
825-
self.colname_map = dict((k,i) for i,k in enumerate(self.obj.columns))
826-
else:
827-
ks = [set(x.items) for x in self.blocks]
828-
u = len(reduce(lambda a,x: a.union(x),ks,set()))
829-
t = sum(map(len,ks))
830-
if u != t:
831-
if len(set(self.cols)) != len(self.cols):
832-
raise NotImplementedError("duplicate columns with differing dtypes are unsupported")
833-
else:
834-
# if columns are not unique and we acces this,
835-
# we're doing it wrong
836-
pass
837-
823+
self.column_map = self.obj._data.get_items_map()
838824

839825
if chunksize is None:
840826
chunksize = (100000/ (len(self.cols) or 1)) or 1
@@ -1034,18 +1020,13 @@ def _save_chunk(self, start_i, end_i):
10341020

10351021
# create the data for a chunk
10361022
slicer = slice(start_i,end_i)
1037-
if self.obj.columns.is_unique:
1038-
for i in range(len(self.blocks)):
1039-
b = self.blocks[i]
1040-
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
1041-
for j, k in enumerate(b.items):
1042-
# self.data is a preallocated list
1043-
self.data[self.colname_map[k]] = d[j]
1044-
else:
1045-
# self.obj should contain a proper view of the dataframes
1046-
# with the specified ordering of cols if cols was specified
1047-
for i in range(len(self.obj.columns)):
1048-
self.data[i] = self.obj.icol(i).values[slicer].tolist()
1023+
for i in range(len(self.blocks)):
1024+
b = self.blocks[i]
1025+
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
1026+
for i, item in enumerate(b.items):
1027+
1028+
# self.data is a preallocated list
1029+
self.data[self.column_map[b][i]] = d[i]
10491030

10501031
ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
10511032

0 commit comments

Comments
 (0)