Merge pull request #4797 from jreback/clean_csv

jreback · jreback · commit 1434776a0fd5 · 2013-09-10T06:07:32.000-07:00
CLN: default for tupleize_cols is now False for both to_csv and read_csv. Fair warning in 0.12 (GH3604)
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -153,7 +153,7 @@ They can take a number of arguments:
     time and lower memory usage.
   - ``mangle_dupe_cols``: boolean, default True, then duplicate columns will be specified
     as 'X.0'...'X.N', rather than 'X'...'X'
-  - ``tupleize_cols``: boolean, default True, if False, convert a list of tuples
+  - ``tupleize_cols``: boolean, default False, if False, convert a list of tuples
     to a multi-index of columns, otherwise, leave the column index as a list of tuples
 
 .. ipython:: python
@@ -860,19 +860,16 @@ Reading columns with a ``MultiIndex``
 
 By specifying list of row locations for the ``header`` argument, you
 can read in a ``MultiIndex`` for the columns. Specifying non-consecutive
-rows will skip the interveaning rows.
+rows will skip the interveaning rows. In order to have the pre-0.13 behavior
+of tupleizing columns, specify ``tupleize_cols=True``.
 
 .. ipython:: python
 
    from pandas.util.testing import makeCustomDataframe as mkdf
    df = mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4)
-   df.to_csv('mi.csv',tupleize_cols=False)
+   df.to_csv('mi.csv')
    print open('mi.csv').read()
-   pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1],tupleize_cols=False)
-
-Note: The default behavior in 0.12 remains unchanged (``tupleize_cols=True``) from prior versions,
-but starting with 0.13, the default *to* write and read multi-index columns will be in the new
-format (``tupleize_cols=False``)
+   pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1])
 
 Note: If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it
 with ``df.to_csv(..., index=False``), then any ``names`` on the columns index will be *lost*.
@@ -966,7 +963,7 @@ function takes a number of arguments. Only the first is required.
   - ``sep`` : Field delimiter for the output file (default ",")
   - ``encoding``: a string representing the encoding to use if the contents are
     non-ascii, for python versions prior to 3
-  - ``tupleize_cols``: boolean, default True, if False, write as a list of tuples,
+  - ``tupleize_cols``: boolean, default False, if False, write as a list of tuples,
     otherwise write in an expanded line format suitable for ``read_csv``
 
 Writing a formatted string
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -188,6 +188,7 @@ API Changes
     a list can be passed to ``to_replace`` (:issue:`4743`).
   - provide automatic dtype conversions on _reduce operations (:issue:`3371`)
   - exclude non-numerics if mixed types with datelike in _reduce operations (:issue:`3371`)
+  - default for ``tupleize_cols`` is now ``False`` for both ``to_csv`` and ``read_csv``. Fair warning in 0.12 (:issue:`3604`)
 
 Internal Refactoring
 ~~~~~~~~~~~~~~~~~~~~
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -787,7 +787,7 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
                  cols=None, header=True, index=True, index_label=None,
                  mode='w', nanRep=None, encoding=None, quoting=None,
                  line_terminator='\n', chunksize=None, engine=None,
-                 tupleize_cols=True, quotechar='"'):
+                 tupleize_cols=False, quotechar='"'):
 
         self.engine = engine  # remove for 0.13
         self.obj = obj
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1191,7 +1191,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0,
             is used. Different default from read_table
         parse_dates : boolean, default True
             Parse dates. Different default from read_table
-        tupleize_cols : boolean, default True
+        tupleize_cols : boolean, default False
             write multi_index columns as a list of tuples (if True)
             or new (expanded format) if False)
 
@@ -1208,7 +1208,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0,
         from pandas.io.parsers import read_table
         return read_table(path, header=header, sep=sep,
                           parse_dates=parse_dates, index_col=index_col,
-                          encoding=encoding, tupleize_cols=False)
+                          encoding=encoding, tupleize_cols=tupleize_cols)
 
     def to_sparse(self, fill_value=None, kind='block'):
         """
@@ -1291,7 +1291,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
                cols=None, header=True, index=True, index_label=None,
                mode='w', nanRep=None, encoding=None, quoting=None,
                line_terminator='\n', chunksize=None,
-               tupleize_cols=True, **kwds):
+               tupleize_cols=False, **kwds):
         r"""Write DataFrame to a comma-separated values (csv) file
 
         Parameters
@@ -1331,7 +1331,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
             defaults to csv.QUOTE_MINIMAL
         chunksize : int or None
             rows to write at a time
-        tupleize_cols : boolean, default True
+        tupleize_cols : boolean, default False
             write multi_index columns as a list of tuples (if True)
             or new (expanded format) if False)
         """
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -247,7 +247,7 @@ def _read(filepath_or_buffer, kwds):
     'squeeze': False,
     'compression': None,
     'mangle_dupe_cols': True,
-    'tupleize_cols':True,
+    'tupleize_cols':False,
 }
 
 
@@ -336,7 +336,7 @@ def parser_f(filepath_or_buffer,
                  encoding=None,
                  squeeze=False,
                  mangle_dupe_cols=True,
-                 tupleize_cols=True,
+                 tupleize_cols=False,
                  ):
 
         # Alias sep -> delimiter.
@@ -656,7 +656,7 @@ def __init__(self, kwds):
         self.na_fvalues = kwds.get('na_fvalues')
         self.true_values = kwds.get('true_values')
         self.false_values = kwds.get('false_values')
-        self.tupleize_cols = kwds.get('tupleize_cols',True)
+        self.tupleize_cols = kwds.get('tupleize_cols',False)
 
         self._date_conv = _make_date_converter(date_parser=self.date_parser,
                                                dayfirst=self.dayfirst)
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
@@ -310,7 +310,7 @@ cdef class TextReader:
                   skip_footer=0,
                   verbose=False,
                   mangle_dupe_cols=True,
-                  tupleize_cols=True):
+                  tupleize_cols=False):
 
         self.parser = parser_new()
         self.parser.chunksize = tokenize_chunksize