Skip to content

Commit 1434776

Browse files
committed
Merge pull request #4797 from jreback/clean_csv
CLN: default for tupleize_cols is now False for both to_csv and read_csv. Fair warning in 0.12 (GH3604)
2 parents 04314e6 + e19e7f0 commit 1434776

File tree

6 files changed

+16
-18
lines changed

6 files changed

+16
-18
lines changed

doc/source/io.rst

+6-9
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ They can take a number of arguments:
153153
time and lower memory usage.
154154
- ``mangle_dupe_cols``: boolean, default True, then duplicate columns will be specified
155155
as 'X.0'...'X.N', rather than 'X'...'X'
156-
- ``tupleize_cols``: boolean, default True, if False, convert a list of tuples
156+
- ``tupleize_cols``: boolean, default False, if False, convert a list of tuples
157157
to a multi-index of columns, otherwise, leave the column index as a list of tuples
158158

159159
.. ipython:: python
@@ -860,19 +860,16 @@ Reading columns with a ``MultiIndex``
860860

861861
By specifying list of row locations for the ``header`` argument, you
862862
can read in a ``MultiIndex`` for the columns. Specifying non-consecutive
863-
rows will skip the interveaning rows.
863+
rows will skip the interveaning rows. In order to have the pre-0.13 behavior
864+
of tupleizing columns, specify ``tupleize_cols=True``.
864865

865866
.. ipython:: python
866867
867868
from pandas.util.testing import makeCustomDataframe as mkdf
868869
df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
869-
df.to_csv('mi.csv',tupleize_cols=False)
870+
df.to_csv('mi.csv')
870871
print open('mi.csv').read()
871-
pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1],tupleize_cols=False)
872-
873-
Note: The default behavior in 0.12 remains unchanged (``tupleize_cols=True``) from prior versions,
874-
but starting with 0.13, the default *to* write and read multi-index columns will be in the new
875-
format (``tupleize_cols=False``)
872+
pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1])
876873
877874
Note: If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it
878875
with ``df.to_csv(..., index=False``), then any ``names`` on the columns index will be *lost*.
@@ -966,7 +963,7 @@ function takes a number of arguments. Only the first is required.
966963
- ``sep`` : Field delimiter for the output file (default ",")
967964
- ``encoding``: a string representing the encoding to use if the contents are
968965
non-ascii, for python versions prior to 3
969-
- ``tupleize_cols``: boolean, default True, if False, write as a list of tuples,
966+
- ``tupleize_cols``: boolean, default False, if False, write as a list of tuples,
970967
otherwise write in an expanded line format suitable for ``read_csv``
971968

972969
Writing a formatted string

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ API Changes
188188
a list can be passed to ``to_replace`` (:issue:`4743`).
189189
- provide automatic dtype conversions on _reduce operations (:issue:`3371`)
190190
- exclude non-numerics if mixed types with datelike in _reduce operations (:issue:`3371`)
191+
- default for ``tupleize_cols`` is now ``False`` for both ``to_csv`` and ``read_csv``. Fair warning in 0.12 (:issue:`3604`)
191192

192193
Internal Refactoring
193194
~~~~~~~~~~~~~~~~~~~~

pandas/core/format.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -787,7 +787,7 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
787787
cols=None, header=True, index=True, index_label=None,
788788
mode='w', nanRep=None, encoding=None, quoting=None,
789789
line_terminator='\n', chunksize=None, engine=None,
790-
tupleize_cols=True, quotechar='"'):
790+
tupleize_cols=False, quotechar='"'):
791791

792792
self.engine = engine # remove for 0.13
793793
self.obj = obj

pandas/core/frame.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1191,7 +1191,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0,
11911191
is used. Different default from read_table
11921192
parse_dates : boolean, default True
11931193
Parse dates. Different default from read_table
1194-
tupleize_cols : boolean, default True
1194+
tupleize_cols : boolean, default False
11951195
write multi_index columns as a list of tuples (if True)
11961196
or new (expanded format) if False)
11971197
@@ -1208,7 +1208,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0,
12081208
from pandas.io.parsers import read_table
12091209
return read_table(path, header=header, sep=sep,
12101210
parse_dates=parse_dates, index_col=index_col,
1211-
encoding=encoding, tupleize_cols=False)
1211+
encoding=encoding, tupleize_cols=tupleize_cols)
12121212

12131213
def to_sparse(self, fill_value=None, kind='block'):
12141214
"""
@@ -1291,7 +1291,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
12911291
cols=None, header=True, index=True, index_label=None,
12921292
mode='w', nanRep=None, encoding=None, quoting=None,
12931293
line_terminator='\n', chunksize=None,
1294-
tupleize_cols=True, **kwds):
1294+
tupleize_cols=False, **kwds):
12951295
r"""Write DataFrame to a comma-separated values (csv) file
12961296
12971297
Parameters
@@ -1331,7 +1331,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
13311331
defaults to csv.QUOTE_MINIMAL
13321332
chunksize : int or None
13331333
rows to write at a time
1334-
tupleize_cols : boolean, default True
1334+
tupleize_cols : boolean, default False
13351335
write multi_index columns as a list of tuples (if True)
13361336
or new (expanded format) if False)
13371337
"""

pandas/io/parsers.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def _read(filepath_or_buffer, kwds):
247247
'squeeze': False,
248248
'compression': None,
249249
'mangle_dupe_cols': True,
250-
'tupleize_cols':True,
250+
'tupleize_cols':False,
251251
}
252252

253253

@@ -336,7 +336,7 @@ def parser_f(filepath_or_buffer,
336336
encoding=None,
337337
squeeze=False,
338338
mangle_dupe_cols=True,
339-
tupleize_cols=True,
339+
tupleize_cols=False,
340340
):
341341

342342
# Alias sep -> delimiter.
@@ -656,7 +656,7 @@ def __init__(self, kwds):
656656
self.na_fvalues = kwds.get('na_fvalues')
657657
self.true_values = kwds.get('true_values')
658658
self.false_values = kwds.get('false_values')
659-
self.tupleize_cols = kwds.get('tupleize_cols',True)
659+
self.tupleize_cols = kwds.get('tupleize_cols',False)
660660

661661
self._date_conv = _make_date_converter(date_parser=self.date_parser,
662662
dayfirst=self.dayfirst)

pandas/parser.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ cdef class TextReader:
310310
skip_footer=0,
311311
verbose=False,
312312
mangle_dupe_cols=True,
313-
tupleize_cols=True):
313+
tupleize_cols=False):
314314

315315
self.parser = parser_new()
316316
self.parser.chunksize = tokenize_chunksize

0 commit comments

Comments
 (0)