Skip to content

DEPR: Deprecate read_csv arguments fully #17865

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 14, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1294,7 +1294,7 @@ def _from_arrays(cls, arrays, columns, index, dtype=None):

@classmethod
def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True,
encoding=None, tupleize_cols=False,
encoding=None, tupleize_cols=None,
infer_datetime_format=False):
"""
Read CSV file (DEPRECATED, please use :func:`pandas.read_csv`
Expand Down
29 changes: 22 additions & 7 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,14 @@ def _read(filepath_or_buffer, kwds):
'buffer_lines',
'float_precision',
}

_deprecated_defaults = {
'as_recarray': None,
'buffer_lines': None,
'compact_ints': None,
'use_unsigned': None,
'tupleize_cols': None
}
_deprecated_args = {
'as_recarray',
'buffer_lines',
Expand Down Expand Up @@ -594,7 +602,7 @@ def parser_f(filepath_or_buffer,
comment=None,
encoding=None,
dialect=None,
tupleize_cols=False,
tupleize_cols=None,

# Error Handling
error_bad_lines=True,
Expand All @@ -606,9 +614,9 @@ def parser_f(filepath_or_buffer,
# Internal
doublequote=True,
delim_whitespace=False,
as_recarray=False,
compact_ints=False,
use_unsigned=False,
as_recarray=None,
compact_ints=None,
use_unsigned=None,
low_memory=_c_parser_defaults['low_memory'],
buffer_lines=None,
memory_map=False,
Expand Down Expand Up @@ -831,12 +839,14 @@ def _get_options_with_defaults(self, engine):
if ('python' in engine and
argname not in _python_unsupported):
pass
elif value == _deprecated_defaults.get(argname, default):
pass
else:
raise ValueError(
'The %r option is not supported with the'
' %r engine' % (argname, engine))
else:
value = default
value = _deprecated_defaults.get(argname, default)
options[argname] = value

if engine == 'python-fwf':
Expand Down Expand Up @@ -962,6 +972,8 @@ def _clean_options(self, options, engine):

for arg in _deprecated_args:
parser_default = _c_parser_defaults[arg]
depr_default = _deprecated_defaults[arg]

msg = ("The '{arg}' argument has been deprecated "
"and will be removed in a future version."
.format(arg=arg))
Expand All @@ -970,10 +982,13 @@ def _clean_options(self, options, engine):
msg += ' Please call pd.to_csv(...).to_records() instead.'
elif arg == 'tupleize_cols':
msg += (' Column tuples will then '
'always be converted to MultiIndex')
'always be converted to MultiIndex.')

if result.get(arg, parser_default) != parser_default:
if result.get(arg, depr_default) != depr_default:
# raise Exception(result.get(arg, depr_default), depr_default)
depr_warning += msg + '\n\n'
else:
result[arg] = parser_default

if depr_warning != '':
warnings.warn(depr_warning, FutureWarning, stacklevel=2)
Expand Down
47 changes: 23 additions & 24 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,8 @@ def _do_test(df, r_dtype=None, c_dtype=None,

with ensure_clean('__tmp_to_csv_moar__') as path:
df.to_csv(path, encoding='utf8',
chunksize=chunksize, tupleize_cols=False)
recons = self.read_csv(path, tupleize_cols=False, **kwargs)
chunksize=chunksize)
recons = self.read_csv(path, **kwargs)
else:
kwargs['header'] = 0

Expand Down Expand Up @@ -542,35 +542,35 @@ def _make_frame(names=None):

# column & index are multi-index
df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4)
df.to_csv(path, tupleize_cols=False)
result = read_csv(path, header=[0, 1, 2, 3], index_col=[
0, 1], tupleize_cols=False)
df.to_csv(path)
result = read_csv(path, header=[0, 1, 2, 3],
index_col=[0, 1])
assert_frame_equal(df, result)

# column is mi
df = mkdf(5, 3, r_idx_nlevels=1, c_idx_nlevels=4)
df.to_csv(path, tupleize_cols=False)
df.to_csv(path)
result = read_csv(
path, header=[0, 1, 2, 3], index_col=0, tupleize_cols=False)
path, header=[0, 1, 2, 3], index_col=0)
assert_frame_equal(df, result)

# dup column names?
df = mkdf(5, 3, r_idx_nlevels=3, c_idx_nlevels=4)
df.to_csv(path, tupleize_cols=False)
result = read_csv(path, header=[0, 1, 2, 3], index_col=[
0, 1, 2], tupleize_cols=False)
df.to_csv(path)
result = read_csv(path, header=[0, 1, 2, 3],
index_col=[0, 1, 2])
assert_frame_equal(df, result)

# writing with no index
df = _make_frame()
df.to_csv(path, tupleize_cols=False, index=False)
result = read_csv(path, header=[0, 1], tupleize_cols=False)
df.to_csv(path, index=False)
result = read_csv(path, header=[0, 1])
assert_frame_equal(df, result)

# we lose the names here
df = _make_frame(True)
df.to_csv(path, tupleize_cols=False, index=False)
result = read_csv(path, header=[0, 1], tupleize_cols=False)
df.to_csv(path, index=False)
result = read_csv(path, header=[0, 1])
assert _all_none(*result.columns.names)
result.columns.names = df.columns.names
assert_frame_equal(df, result)
Expand All @@ -589,15 +589,15 @@ def _make_frame(names=None):

# whatsnew example
df = _make_frame()
df.to_csv(path, tupleize_cols=False)
result = read_csv(path, header=[0, 1], index_col=[
0], tupleize_cols=False)
df.to_csv(path)
result = read_csv(path, header=[0, 1],
index_col=[0])
assert_frame_equal(df, result)

df = _make_frame(True)
df.to_csv(path, tupleize_cols=False)
result = read_csv(path, header=[0, 1], index_col=[
0], tupleize_cols=False)
df.to_csv(path)
result = read_csv(path, header=[0, 1],
index_col=[0])
assert_frame_equal(df, result)

# column & index are multi-index (compatibility)
Expand All @@ -613,18 +613,17 @@ def _make_frame(names=None):

# invalid options
df = _make_frame(True)
df.to_csv(path, tupleize_cols=False)
df.to_csv(path)

for i in [6, 7]:
msg = 'len of {i}, but only 5 lines in file'.format(i=i)
with tm.assert_raises_regex(ParserError, msg):
read_csv(path, tupleize_cols=False,
header=lrange(i), index_col=0)
read_csv(path, header=lrange(i), index_col=0)

# write with cols
with tm.assert_raises_regex(TypeError, 'cannot specify cols '
'with a MultiIndex'):
df.to_csv(path, tupleize_cols=False, columns=['foo', 'bar'])
df.to_csv(path, columns=['foo', 'bar'])

with ensure_clean('__tmp_to_csv_multiindex__') as path:
# empty
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/io/parser/test_unsupported.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,15 @@ class TestDeprecatedFeatures(object):

@pytest.mark.parametrize("engine", ["c", "python"])
@pytest.mark.parametrize("kwargs", [{"as_recarray": True},
{"as_recarray": False},
{"buffer_lines": True},
{"buffer_lines": False},
{"compact_ints": True},
{"compact_ints": False},
{"use_unsigned": True},
{"use_unsigned": False},
{"tupleize_cols": True},
{"tupleize_cols": False},
{"skip_footer": 1}])
def test_deprecated_args(self, engine, kwargs):
data = "1,2,3"
Expand Down