Skip to content

Commit 8e7904f

Browse files
gfyoungnateGeorge
authored andcommitted
CLN: Remove the engine parameter in CSVFormatter and to_csv
closes pandas-dev#13419 xref pandas-dev#11274
1 parent 5d99cff commit 8e7904f

File tree

5 files changed

+56
-202
lines changed

5 files changed

+56
-202
lines changed

doc/source/whatsnew/v0.19.0.txt

+9
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,15 @@ Deprecations
436436
- top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`)
437437
- ``Timestamp.offset`` property (and named arg in the constructor), has been deprecated in favor of ``freq`` (:issue:`12160`)
438438

439+
440+
.. _whatsnew_0190.prior_deprecations:
441+
442+
Removal of prior version deprecations/changes
443+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
444+
445+
- ``DataFrame.to_csv()`` has dropped the ``engine`` parameter, as was deprecated in 0.17.1 (:issue:`11274`, :issue:`13419`)
446+
447+
439448
.. _whatsnew_0190.performance:
440449

441450
Performance Improvements

pandas/core/frame.py

-1
Original file line numberDiff line numberDiff line change
@@ -1342,7 +1342,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
13421342
cols=columns, header=header, index=index,
13431343
index_label=index_label, mode=mode,
13441344
chunksize=chunksize, quotechar=quotechar,
1345-
engine=kwds.get("engine"),
13461345
tupleize_cols=tupleize_cols,
13471346
date_format=date_format,
13481347
doublequote=doublequote,

pandas/formats/format.py

+5-128
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030

3131
import itertools
3232
import csv
33-
import warnings
3433

3534
common_docstring = """
3635
Parameters
@@ -1326,15 +1325,10 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
13261325
float_format=None, cols=None, header=True, index=True,
13271326
index_label=None, mode='w', nanRep=None, encoding=None,
13281327
compression=None, quoting=None, line_terminator='\n',
1329-
chunksize=None, engine=None, tupleize_cols=False,
1330-
quotechar='"', date_format=None, doublequote=True,
1331-
escapechar=None, decimal='.'):
1332-
1333-
if engine is not None:
1334-
warnings.warn("'engine' keyword is deprecated and will be "
1335-
"removed in a future version", FutureWarning,
1336-
stacklevel=3)
1337-
self.engine = engine # remove for 0.18
1328+
chunksize=None, tupleize_cols=False, quotechar='"',
1329+
date_format=None, doublequote=True, escapechar=None,
1330+
decimal='.'):
1331+
13381332
self.obj = obj
13391333

13401334
if path_or_buf is None:
@@ -1369,11 +1363,6 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
13691363

13701364
self.date_format = date_format
13711365

1372-
# GH3457
1373-
if not self.obj.columns.is_unique and engine == 'python':
1374-
raise NotImplementedError("columns.is_unique == False not "
1375-
"supported with engine='python'")
1376-
13771366
self.tupleize_cols = tupleize_cols
13781367
self.has_mi_columns = (isinstance(obj.columns, MultiIndex) and
13791368
not self.tupleize_cols)
@@ -1430,108 +1419,6 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
14301419
if not index:
14311420
self.nlevels = 0
14321421

1433-
# original python implem. of df.to_csv
1434-
# invoked by df.to_csv(engine=python)
1435-
def _helper_csv(self, writer, na_rep=None, cols=None, header=True,
1436-
index=True, index_label=None, float_format=None,
1437-
date_format=None):
1438-
if cols is None:
1439-
cols = self.columns
1440-
1441-
has_aliases = isinstance(header, (tuple, list, np.ndarray, Index))
1442-
if has_aliases or header:
1443-
if index:
1444-
# should write something for index label
1445-
if index_label is not False:
1446-
if index_label is None:
1447-
if isinstance(self.obj.index, MultiIndex):
1448-
index_label = []
1449-
for i, name in enumerate(self.obj.index.names):
1450-
if name is None:
1451-
name = ''
1452-
index_label.append(name)
1453-
else:
1454-
index_label = self.obj.index.name
1455-
if index_label is None:
1456-
index_label = ['']
1457-
else:
1458-
index_label = [index_label]
1459-
elif not isinstance(index_label,
1460-
(list, tuple, np.ndarray, Index)):
1461-
# given a string for a DF with Index
1462-
index_label = [index_label]
1463-
1464-
encoded_labels = list(index_label)
1465-
else:
1466-
encoded_labels = []
1467-
1468-
if has_aliases:
1469-
if len(header) != len(cols):
1470-
raise ValueError(('Writing %d cols but got %d aliases'
1471-
% (len(cols), len(header))))
1472-
else:
1473-
write_cols = header
1474-
else:
1475-
write_cols = cols
1476-
encoded_cols = list(write_cols)
1477-
1478-
writer.writerow(encoded_labels + encoded_cols)
1479-
else:
1480-
encoded_cols = list(cols)
1481-
writer.writerow(encoded_cols)
1482-
1483-
if date_format is None:
1484-
date_formatter = lambda x: Timestamp(x)._repr_base
1485-
else:
1486-
1487-
def strftime_with_nulls(x):
1488-
x = Timestamp(x)
1489-
if notnull(x):
1490-
return x.strftime(date_format)
1491-
1492-
date_formatter = lambda x: strftime_with_nulls(x)
1493-
1494-
data_index = self.obj.index
1495-
1496-
if isinstance(self.obj.index, PeriodIndex):
1497-
data_index = self.obj.index.to_timestamp()
1498-
1499-
if isinstance(data_index, DatetimeIndex) and date_format is not None:
1500-
data_index = Index([date_formatter(x) for x in data_index])
1501-
1502-
values = self.obj.copy()
1503-
values.index = data_index
1504-
values.columns = values.columns.to_native_types(
1505-
na_rep=na_rep, float_format=float_format, date_format=date_format,
1506-
quoting=self.quoting)
1507-
values = values[cols]
1508-
1509-
series = {}
1510-
for k, v in compat.iteritems(values._series):
1511-
series[k] = v._values
1512-
1513-
nlevels = getattr(data_index, 'nlevels', 1)
1514-
for j, idx in enumerate(data_index):
1515-
row_fields = []
1516-
if index:
1517-
if nlevels == 1:
1518-
row_fields = [idx]
1519-
else: # handle MultiIndex
1520-
row_fields = list(idx)
1521-
for i, col in enumerate(cols):
1522-
val = series[col][j]
1523-
if lib.checknull(val):
1524-
val = na_rep
1525-
1526-
if float_format is not None and com.is_float(val):
1527-
val = float_format % val
1528-
elif isinstance(val, (np.datetime64, Timestamp)):
1529-
val = date_formatter(val)
1530-
1531-
row_fields.append(val)
1532-
1533-
writer.writerow(row_fields)
1534-
15351422
def save(self):
15361423
# create the writer & save
15371424
if hasattr(self.path_or_buf, 'write'):
@@ -1555,17 +1442,7 @@ def save(self):
15551442
else:
15561443
self.writer = csv.writer(f, **writer_kwargs)
15571444

1558-
if self.engine == 'python':
1559-
# to be removed in 0.13
1560-
self._helper_csv(self.writer, na_rep=self.na_rep,
1561-
float_format=self.float_format,
1562-
cols=self.cols, header=self.header,
1563-
index=self.index,
1564-
index_label=self.index_label,
1565-
date_format=self.date_format)
1566-
1567-
else:
1568-
self._save()
1445+
self._save()
15691446

15701447
finally:
15711448
if close:

pandas/tests/formats/test_format.py

-6
Original file line numberDiff line numberDiff line change
@@ -3329,12 +3329,6 @@ def test_to_csv_date_format(self):
33293329
self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'),
33303330
expected_ymd_sec)
33313331

3332-
# deprecation GH11274
3333-
def test_to_csv_engine_kw_deprecation(self):
3334-
with tm.assert_produces_warning(FutureWarning):
3335-
df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]})
3336-
df.to_csv(engine='python')
3337-
33383332
def test_period(self):
33393333
# GH 12615
33403334
df = pd.DataFrame({'A': pd.period_range('2013-01',

pandas/tests/frame/test_to_csv.py

+42-67
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pandas.compat import (lmap, range, lrange, StringIO, u)
1111
from pandas.parser import CParserError
1212
from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp,
13-
date_range, read_csv, compat)
13+
date_range, read_csv, compat, to_datetime)
1414
import pandas as pd
1515

1616
from pandas.util.testing import (assert_almost_equal,
@@ -139,7 +139,7 @@ def test_to_csv_from_csv5(self):
139139
self.tzframe.to_csv(path)
140140
result = pd.read_csv(path, index_col=0, parse_dates=['A'])
141141

142-
converter = lambda c: pd.to_datetime(result[c]).dt.tz_localize(
142+
converter = lambda c: to_datetime(result[c]).dt.tz_localize(
143143
'UTC').dt.tz_convert(self.tzframe[c].dt.tz)
144144
result['B'] = converter('B')
145145
result['C'] = converter('C')
@@ -162,15 +162,6 @@ def test_to_csv_cols_reordering(self):
162162

163163
assert_frame_equal(df[cols], rs_c, check_names=False)
164164

165-
def test_to_csv_legacy_raises_on_dupe_cols(self):
166-
df = mkdf(10, 3)
167-
df.columns = ['a', 'a', 'b']
168-
with ensure_clean() as path:
169-
with tm.assert_produces_warning(FutureWarning,
170-
check_stacklevel=False):
171-
self.assertRaises(NotImplementedError,
172-
df.to_csv, path, engine='python')
173-
174165
def test_to_csv_new_dupe_cols(self):
175166
import pandas as pd
176167

@@ -712,7 +703,6 @@ def test_to_csv_dups_cols(self):
712703
cols.extend([0, 1, 2])
713704
df.columns = cols
714705

715-
from pandas import to_datetime
716706
with ensure_clean() as filename:
717707
df.to_csv(filename)
718708
result = read_csv(filename, index_col=0)
@@ -993,72 +983,57 @@ def test_to_csv_compression_value_error(self):
993983
filename, compression="zip")
994984

995985
def test_to_csv_date_format(self):
996-
from pandas import to_datetime
997986
with ensure_clean('__tmp_to_csv_date_format__') as path:
998-
for engine in [None, 'python']:
999-
w = FutureWarning if engine == 'python' else None
1000-
1001-
dt_index = self.tsframe.index
1002-
datetime_frame = DataFrame(
1003-
{'A': dt_index, 'B': dt_index.shift(1)}, index=dt_index)
1004-
1005-
with tm.assert_produces_warning(w, check_stacklevel=False):
1006-
datetime_frame.to_csv(
1007-
path, date_format='%Y%m%d', engine=engine)
1008-
1009-
# Check that the data was put in the specified format
1010-
test = read_csv(path, index_col=0)
1011-
1012-
datetime_frame_int = datetime_frame.applymap(
1013-
lambda x: int(x.strftime('%Y%m%d')))
1014-
datetime_frame_int.index = datetime_frame_int.index.map(
1015-
lambda x: int(x.strftime('%Y%m%d')))
987+
dt_index = self.tsframe.index
988+
datetime_frame = DataFrame(
989+
{'A': dt_index, 'B': dt_index.shift(1)}, index=dt_index)
990+
datetime_frame.to_csv(path, date_format='%Y%m%d')
1016991

1017-
assert_frame_equal(test, datetime_frame_int)
992+
# Check that the data was put in the specified format
993+
test = read_csv(path, index_col=0)
1018994

1019-
with tm.assert_produces_warning(w, check_stacklevel=False):
1020-
datetime_frame.to_csv(
1021-
path, date_format='%Y-%m-%d', engine=engine)
995+
datetime_frame_int = datetime_frame.applymap(
996+
lambda x: int(x.strftime('%Y%m%d')))
997+
datetime_frame_int.index = datetime_frame_int.index.map(
998+
lambda x: int(x.strftime('%Y%m%d')))
1022999

1023-
# Check that the data was put in the specified format
1024-
test = read_csv(path, index_col=0)
1025-
datetime_frame_str = datetime_frame.applymap(
1026-
lambda x: x.strftime('%Y-%m-%d'))
1027-
datetime_frame_str.index = datetime_frame_str.index.map(
1028-
lambda x: x.strftime('%Y-%m-%d'))
1000+
assert_frame_equal(test, datetime_frame_int)
10291001

1030-
assert_frame_equal(test, datetime_frame_str)
1002+
datetime_frame.to_csv(path, date_format='%Y-%m-%d')
10311003

1032-
# Check that columns get converted
1033-
datetime_frame_columns = datetime_frame.T
1004+
# Check that the data was put in the specified format
1005+
test = read_csv(path, index_col=0)
1006+
datetime_frame_str = datetime_frame.applymap(
1007+
lambda x: x.strftime('%Y-%m-%d'))
1008+
datetime_frame_str.index = datetime_frame_str.index.map(
1009+
lambda x: x.strftime('%Y-%m-%d'))
10341010

1035-
with tm.assert_produces_warning(w, check_stacklevel=False):
1036-
datetime_frame_columns.to_csv(
1037-
path, date_format='%Y%m%d', engine=engine)
1011+
assert_frame_equal(test, datetime_frame_str)
10381012

1039-
test = read_csv(path, index_col=0)
1013+
# Check that columns get converted
1014+
datetime_frame_columns = datetime_frame.T
1015+
datetime_frame_columns.to_csv(path, date_format='%Y%m%d')
10401016

1041-
datetime_frame_columns = datetime_frame_columns.applymap(
1042-
lambda x: int(x.strftime('%Y%m%d')))
1043-
# Columns don't get converted to ints by read_csv
1044-
datetime_frame_columns.columns = (
1045-
datetime_frame_columns.columns
1046-
.map(lambda x: x.strftime('%Y%m%d')))
1017+
test = read_csv(path, index_col=0)
10471018

1048-
assert_frame_equal(test, datetime_frame_columns)
1019+
datetime_frame_columns = datetime_frame_columns.applymap(
1020+
lambda x: int(x.strftime('%Y%m%d')))
1021+
# Columns don't get converted to ints by read_csv
1022+
datetime_frame_columns.columns = (
1023+
datetime_frame_columns.columns
1024+
.map(lambda x: x.strftime('%Y%m%d')))
10491025

1050-
# test NaTs
1051-
nat_index = to_datetime(
1052-
['NaT'] * 10 + ['2000-01-01', '1/1/2000', '1-1-2000'])
1053-
nat_frame = DataFrame({'A': nat_index}, index=nat_index)
1026+
assert_frame_equal(test, datetime_frame_columns)
10541027

1055-
with tm.assert_produces_warning(w, check_stacklevel=False):
1056-
nat_frame.to_csv(
1057-
path, date_format='%Y-%m-%d', engine=engine)
1028+
# test NaTs
1029+
nat_index = to_datetime(
1030+
['NaT'] * 10 + ['2000-01-01', '1/1/2000', '1-1-2000'])
1031+
nat_frame = DataFrame({'A': nat_index}, index=nat_index)
1032+
nat_frame.to_csv(path, date_format='%Y-%m-%d')
10581033

1059-
test = read_csv(path, parse_dates=[0, 1], index_col=0)
1034+
test = read_csv(path, parse_dates=[0, 1], index_col=0)
10601035

1061-
assert_frame_equal(test, nat_frame)
1036+
assert_frame_equal(test, nat_frame)
10621037

10631038
def test_to_csv_with_dst_transitions(self):
10641039

@@ -1077,7 +1052,7 @@ def test_to_csv_with_dst_transitions(self):
10771052
# we have to reconvert the index as we
10781053
# don't parse the tz's
10791054
result = read_csv(path, index_col=0)
1080-
result.index = pd.to_datetime(result.index).tz_localize(
1055+
result.index = to_datetime(result.index).tz_localize(
10811056
'UTC').tz_convert('Europe/London')
10821057
assert_frame_equal(result, df)
10831058

@@ -1089,9 +1064,9 @@ def test_to_csv_with_dst_transitions(self):
10891064
with ensure_clean('csv_date_format_with_dst') as path:
10901065
df.to_csv(path, index=True)
10911066
result = read_csv(path, index_col=0)
1092-
result.index = pd.to_datetime(result.index).tz_localize(
1067+
result.index = to_datetime(result.index).tz_localize(
10931068
'UTC').tz_convert('Europe/Paris')
1094-
result['idx'] = pd.to_datetime(result['idx']).astype(
1069+
result['idx'] = to_datetime(result['idx']).astype(
10951070
'datetime64[ns, Europe/Paris]')
10961071
assert_frame_equal(result, df)
10971072

0 commit comments

Comments
 (0)