DEPR: Deprecate read_csv arguments fully (#17865)

gfyoung · jreback · commit b59413792fca · 2017-10-14T10:39:06.000-04:00
Issue warnings on `read_csv` deprecated args in full, even if the normal defaults were passed in. Closes gh-17828.
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1294,7 +1294,7 @@ def _from_arrays(cls, arrays, columns, index, dtype=None):
 
     @classmethod
     def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True,
-                 encoding=None, tupleize_cols=False,
+                 encoding=None, tupleize_cols=None,
                  infer_datetime_format=False):
         """
         Read CSV file (DEPRECATED, please use :func:`pandas.read_csv`
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -529,6 +529,14 @@ def _read(filepath_or_buffer, kwds):
     'buffer_lines',
     'float_precision',
 }
+
+_deprecated_defaults = {
+    'as_recarray': None,
+    'buffer_lines': None,
+    'compact_ints': None,
+    'use_unsigned': None,
+    'tupleize_cols': None
+}
 _deprecated_args = {
     'as_recarray',
     'buffer_lines',
@@ -594,7 +602,7 @@ def parser_f(filepath_or_buffer,
                  comment=None,
                  encoding=None,
                  dialect=None,
-                 tupleize_cols=False,
+                 tupleize_cols=None,
 
                  # Error Handling
                  error_bad_lines=True,
@@ -606,9 +614,9 @@ def parser_f(filepath_or_buffer,
                  # Internal
                  doublequote=True,
                  delim_whitespace=False,
-                 as_recarray=False,
-                 compact_ints=False,
-                 use_unsigned=False,
+                 as_recarray=None,
+                 compact_ints=None,
+                 use_unsigned=None,
                  low_memory=_c_parser_defaults['low_memory'],
                  buffer_lines=None,
                  memory_map=False,
@@ -831,12 +839,14 @@ def _get_options_with_defaults(self, engine):
                     if ('python' in engine and
                             argname not in _python_unsupported):
                         pass
+                    elif value == _deprecated_defaults.get(argname, default):
+                        pass
                     else:
                         raise ValueError(
                             'The %r option is not supported with the'
                             ' %r engine' % (argname, engine))
             else:
-                value = default
+                value = _deprecated_defaults.get(argname, default)
             options[argname] = value
 
         if engine == 'python-fwf':
@@ -962,6 +972,8 @@ def _clean_options(self, options, engine):
 
         for arg in _deprecated_args:
             parser_default = _c_parser_defaults[arg]
+            depr_default = _deprecated_defaults[arg]
+
             msg = ("The '{arg}' argument has been deprecated "
                    "and will be removed in a future version."
                    .format(arg=arg))
@@ -970,10 +982,13 @@ def _clean_options(self, options, engine):
                 msg += ' Please call pd.to_csv(...).to_records() instead.'
             elif arg == 'tupleize_cols':
                 msg += (' Column tuples will then '
-                        'always be converted to MultiIndex')
+                        'always be converted to MultiIndex.')
 
-            if result.get(arg, parser_default) != parser_default:
+            if result.get(arg, depr_default) != depr_default:
+                # raise Exception(result.get(arg, depr_default), depr_default)
                 depr_warning += msg + '\n\n'
+            else:
+                result[arg] = parser_default
 
         if depr_warning != '':
             warnings.warn(depr_warning, FutureWarning, stacklevel=2)
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -267,8 +267,8 @@ def _do_test(df, r_dtype=None, c_dtype=None,
 
                 with ensure_clean('__tmp_to_csv_moar__') as path:
                     df.to_csv(path, encoding='utf8',
-                              chunksize=chunksize, tupleize_cols=False)
-                    recons = self.read_csv(path, tupleize_cols=False, **kwargs)
+                              chunksize=chunksize)
+                    recons = self.read_csv(path, **kwargs)
             else:
                 kwargs['header'] = 0
 
@@ -542,35 +542,35 @@ def _make_frame(names=None):
 
             # column & index are multi-index
             df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
-            df.to_csv(path, tupleize_cols=False)
-            result = read_csv(path, header=[0, 1, 2, 3], index_col=[
-                              0, 1], tupleize_cols=False)
+            df.to_csv(path)
+            result = read_csv(path, header=[0, 1, 2, 3],
+                              index_col=[0, 1])
             assert_frame_equal(df, result)
 
             # column is mi
             df = mkdf(5, 3, r_idx_nlevels=1, c_idx_nlevels=4)
-            df.to_csv(path, tupleize_cols=False)
+            df.to_csv(path)
             result = read_csv(
-                path, header=[0, 1, 2, 3], index_col=0, tupleize_cols=False)
+                path, header=[0, 1, 2, 3], index_col=0)
             assert_frame_equal(df, result)
 
             # dup column names?
             df = mkdf(5, 3, r_idx_nlevels=3, c_idx_nlevels=4)
-            df.to_csv(path, tupleize_cols=False)
-            result = read_csv(path, header=[0, 1, 2, 3], index_col=[
-                              0, 1, 2], tupleize_cols=False)
+            df.to_csv(path)
+            result = read_csv(path, header=[0, 1, 2, 3],
+                              index_col=[0, 1, 2])
             assert_frame_equal(df, result)
 
             # writing with no index
             df = _make_frame()
-            df.to_csv(path, tupleize_cols=False, index=False)
-            result = read_csv(path, header=[0, 1], tupleize_cols=False)
+            df.to_csv(path, index=False)
+            result = read_csv(path, header=[0, 1])
             assert_frame_equal(df, result)
 
             # we lose the names here
             df = _make_frame(True)
-            df.to_csv(path, tupleize_cols=False, index=False)
-            result = read_csv(path, header=[0, 1], tupleize_cols=False)
+            df.to_csv(path, index=False)
+            result = read_csv(path, header=[0, 1])
             assert _all_none(*result.columns.names)
             result.columns.names = df.columns.names
             assert_frame_equal(df, result)
@@ -589,15 +589,15 @@ def _make_frame(names=None):
 
             # whatsnew example
             df = _make_frame()
-            df.to_csv(path, tupleize_cols=False)
-            result = read_csv(path, header=[0, 1], index_col=[
-                              0], tupleize_cols=False)
+            df.to_csv(path)
+            result = read_csv(path, header=[0, 1],
+                              index_col=[0])
             assert_frame_equal(df, result)
 
             df = _make_frame(True)
-            df.to_csv(path, tupleize_cols=False)
-            result = read_csv(path, header=[0, 1], index_col=[
-                              0], tupleize_cols=False)
+            df.to_csv(path)
+            result = read_csv(path, header=[0, 1],
+                              index_col=[0])
             assert_frame_equal(df, result)
 
             # column & index are multi-index (compatibility)
@@ -613,18 +613,17 @@ def _make_frame(names=None):
 
             # invalid options
             df = _make_frame(True)
-            df.to_csv(path, tupleize_cols=False)
+            df.to_csv(path)
 
             for i in [6, 7]:
                 msg = 'len of {i}, but only 5 lines in file'.format(i=i)
                 with tm.assert_raises_regex(ParserError, msg):
-                    read_csv(path, tupleize_cols=False,
-                             header=lrange(i), index_col=0)
+                    read_csv(path, header=lrange(i), index_col=0)
 
             # write with cols
             with tm.assert_raises_regex(TypeError, 'cannot specify cols '
                                         'with a MultiIndex'):
-                df.to_csv(path, tupleize_cols=False, columns=['foo', 'bar'])
+                df.to_csv(path, columns=['foo', 'bar'])
 
         with ensure_clean('__tmp_to_csv_multiindex__') as path:
             # empty
diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py
@@ -129,10 +129,15 @@ class TestDeprecatedFeatures(object):
 
     @pytest.mark.parametrize("engine", ["c", "python"])
     @pytest.mark.parametrize("kwargs", [{"as_recarray": True},
+                                        {"as_recarray": False},
                                         {"buffer_lines": True},
+                                        {"buffer_lines": False},
                                         {"compact_ints": True},
+                                        {"compact_ints": False},
                                         {"use_unsigned": True},
+                                        {"use_unsigned": False},
                                         {"tupleize_cols": True},
+                                        {"tupleize_cols": False},
                                         {"skip_footer": 1}])
     def test_deprecated_args(self, engine, kwargs):
         data = "1,2,3"