CLN: Remove the engine parameter in CSVFormatter and to_csv

gfyoung · nateGeorge · commit 8e7904f205b4 · 2016-08-15T14:11:19.000-06:00
closes pandas-dev#13419 xref pandas-dev#11274
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -436,6 +436,15 @@ Deprecations
 - top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`)
 - ``Timestamp.offset`` property (and named arg in the constructor), has been deprecated in favor of ``freq`` (:issue:`12160`)
 
+
+.. _whatsnew_0190.prior_deprecations:
+
+Removal of prior version deprecations/changes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- ``DataFrame.to_csv()`` has dropped the ``engine`` parameter, as was deprecated in 0.17.1 (:issue:`11274`, :issue:`13419`)
+
+
 .. _whatsnew_0190.performance:
 
 Performance Improvements
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1342,7 +1342,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
                                      cols=columns, header=header, index=index,
                                      index_label=index_label, mode=mode,
                                      chunksize=chunksize, quotechar=quotechar,
-                                     engine=kwds.get("engine"),
                                      tupleize_cols=tupleize_cols,
                                      date_format=date_format,
                                      doublequote=doublequote,
diff --git a/pandas/formats/format.py b/pandas/formats/format.py
@@ -30,7 +30,6 @@
 
 import itertools
 import csv
-import warnings
 
 common_docstring = """
     Parameters
@@ -1326,15 +1325,10 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
                  float_format=None, cols=None, header=True, index=True,
                  index_label=None, mode='w', nanRep=None, encoding=None,
                  compression=None, quoting=None, line_terminator='\n',
-                 chunksize=None, engine=None, tupleize_cols=False,
-                 quotechar='"', date_format=None, doublequote=True,
-                 escapechar=None, decimal='.'):
-
-        if engine is not None:
-            warnings.warn("'engine' keyword is deprecated and will be "
-                          "removed in a future version", FutureWarning,
-                          stacklevel=3)
-        self.engine = engine  # remove for 0.18
+                 chunksize=None, tupleize_cols=False, quotechar='"',
+                 date_format=None, doublequote=True, escapechar=None,
+                 decimal='.'):
+
         self.obj = obj
 
         if path_or_buf is None:
@@ -1369,11 +1363,6 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
 
         self.date_format = date_format
 
-        # GH3457
-        if not self.obj.columns.is_unique and engine == 'python':
-            raise NotImplementedError("columns.is_unique == False not "
-                                      "supported with engine='python'")
-
         self.tupleize_cols = tupleize_cols
         self.has_mi_columns = (isinstance(obj.columns, MultiIndex) and
                                not self.tupleize_cols)
@@ -1430,108 +1419,6 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='',
         if not index:
             self.nlevels = 0
 
-    # original python implem. of df.to_csv
-    # invoked by df.to_csv(engine=python)
-    def _helper_csv(self, writer, na_rep=None, cols=None, header=True,
-                    index=True, index_label=None, float_format=None,
-                    date_format=None):
-        if cols is None:
-            cols = self.columns
-
-        has_aliases = isinstance(header, (tuple, list, np.ndarray, Index))
-        if has_aliases or header:
-            if index:
-                # should write something for index label
-                if index_label is not False:
-                    if index_label is None:
-                        if isinstance(self.obj.index, MultiIndex):
-                            index_label = []
-                            for i, name in enumerate(self.obj.index.names):
-                                if name is None:
-                                    name = ''
-                                index_label.append(name)
-                        else:
-                            index_label = self.obj.index.name
-                            if index_label is None:
-                                index_label = ['']
-                            else:
-                                index_label = [index_label]
-                    elif not isinstance(index_label,
-                                        (list, tuple, np.ndarray, Index)):
-                        # given a string for a DF with Index
-                        index_label = [index_label]
-
-                    encoded_labels = list(index_label)
-                else:
-                    encoded_labels = []
-
-                if has_aliases:
-                    if len(header) != len(cols):
-                        raise ValueError(('Writing %d cols but got %d aliases'
-                                          % (len(cols), len(header))))
-                    else:
-                        write_cols = header
-                else:
-                    write_cols = cols
-                encoded_cols = list(write_cols)
-
-                writer.writerow(encoded_labels + encoded_cols)
-            else:
-                encoded_cols = list(cols)
-                writer.writerow(encoded_cols)
-
-        if date_format is None:
-            date_formatter = lambda x: Timestamp(x)._repr_base
-        else:
-
-            def strftime_with_nulls(x):
-                x = Timestamp(x)
-                if notnull(x):
-                    return x.strftime(date_format)
-
-            date_formatter = lambda x: strftime_with_nulls(x)
-
-        data_index = self.obj.index
-
-        if isinstance(self.obj.index, PeriodIndex):
-            data_index = self.obj.index.to_timestamp()
-
-        if isinstance(data_index, DatetimeIndex) and date_format is not None:
-            data_index = Index([date_formatter(x) for x in data_index])
-
-        values = self.obj.copy()
-        values.index = data_index
-        values.columns = values.columns.to_native_types(
-            na_rep=na_rep, float_format=float_format, date_format=date_format,
-            quoting=self.quoting)
-        values = values[cols]
-
-        series = {}
-        for k, v in compat.iteritems(values._series):
-            series[k] = v._values
-
-        nlevels = getattr(data_index, 'nlevels', 1)
-        for j, idx in enumerate(data_index):
-            row_fields = []
-            if index:
-                if nlevels == 1:
-                    row_fields = [idx]
-                else:  # handle MultiIndex
-                    row_fields = list(idx)
-            for i, col in enumerate(cols):
-                val = series[col][j]
-                if lib.checknull(val):
-                    val = na_rep
-
-                if float_format is not None and com.is_float(val):
-                    val = float_format % val
-                elif isinstance(val, (np.datetime64, Timestamp)):
-                    val = date_formatter(val)
-
-                row_fields.append(val)
-
-            writer.writerow(row_fields)
-
     def save(self):
         # create the writer & save
         if hasattr(self.path_or_buf, 'write'):
@@ -1555,17 +1442,7 @@ def save(self):
             else:
                 self.writer = csv.writer(f, **writer_kwargs)
 
-            if self.engine == 'python':
-                # to be removed in 0.13
-                self._helper_csv(self.writer, na_rep=self.na_rep,
-                                 float_format=self.float_format,
-                                 cols=self.cols, header=self.header,
-                                 index=self.index,
-                                 index_label=self.index_label,
-                                 date_format=self.date_format)
-
-            else:
-                self._save()
+            self._save()
 
         finally:
             if close:
diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py
@@ -3329,12 +3329,6 @@ def test_to_csv_date_format(self):
         self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'),
                          expected_ymd_sec)
 
-    # deprecation GH11274
-    def test_to_csv_engine_kw_deprecation(self):
-        with tm.assert_produces_warning(FutureWarning):
-            df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]})
-            df.to_csv(engine='python')
-
     def test_period(self):
         # GH 12615
         df = pd.DataFrame({'A': pd.period_range('2013-01',
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -10,7 +10,7 @@
 from pandas.compat import (lmap, range, lrange, StringIO, u)
 from pandas.parser import CParserError
 from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp,
-                    date_range, read_csv, compat)
+                    date_range, read_csv, compat, to_datetime)
 import pandas as pd
 
 from pandas.util.testing import (assert_almost_equal,
@@ -139,7 +139,7 @@ def test_to_csv_from_csv5(self):
             self.tzframe.to_csv(path)
             result = pd.read_csv(path, index_col=0, parse_dates=['A'])
 
-            converter = lambda c: pd.to_datetime(result[c]).dt.tz_localize(
+            converter = lambda c: to_datetime(result[c]).dt.tz_localize(
                 'UTC').dt.tz_convert(self.tzframe[c].dt.tz)
             result['B'] = converter('B')
             result['C'] = converter('C')
@@ -162,15 +162,6 @@ def test_to_csv_cols_reordering(self):
 
         assert_frame_equal(df[cols], rs_c, check_names=False)
 
-    def test_to_csv_legacy_raises_on_dupe_cols(self):
-        df = mkdf(10, 3)
-        df.columns = ['a', 'a', 'b']
-        with ensure_clean() as path:
-            with tm.assert_produces_warning(FutureWarning,
-                                            check_stacklevel=False):
-                self.assertRaises(NotImplementedError,
-                                  df.to_csv, path, engine='python')
-
     def test_to_csv_new_dupe_cols(self):
         import pandas as pd
 
@@ -712,7 +703,6 @@ def test_to_csv_dups_cols(self):
             cols.extend([0, 1, 2])
         df.columns = cols
 
-        from pandas import to_datetime
         with ensure_clean() as filename:
             df.to_csv(filename)
             result = read_csv(filename, index_col=0)
@@ -993,72 +983,57 @@ def test_to_csv_compression_value_error(self):
                               filename, compression="zip")
 
     def test_to_csv_date_format(self):
-        from pandas import to_datetime
         with ensure_clean('__tmp_to_csv_date_format__') as path:
-            for engine in [None, 'python']:
-                w = FutureWarning if engine == 'python' else None
-
-                dt_index = self.tsframe.index
-                datetime_frame = DataFrame(
-                    {'A': dt_index, 'B': dt_index.shift(1)}, index=dt_index)
-
-                with tm.assert_produces_warning(w, check_stacklevel=False):
-                    datetime_frame.to_csv(
-                        path, date_format='%Y%m%d', engine=engine)
-
-                # Check that the data was put in the specified format
-                test = read_csv(path, index_col=0)
-
-                datetime_frame_int = datetime_frame.applymap(
-                    lambda x: int(x.strftime('%Y%m%d')))
-                datetime_frame_int.index = datetime_frame_int.index.map(
-                    lambda x: int(x.strftime('%Y%m%d')))
+            dt_index = self.tsframe.index
+            datetime_frame = DataFrame(
+                {'A': dt_index, 'B': dt_index.shift(1)}, index=dt_index)
+            datetime_frame.to_csv(path, date_format='%Y%m%d')
 
-                assert_frame_equal(test, datetime_frame_int)
+            # Check that the data was put in the specified format
+            test = read_csv(path, index_col=0)
 
-                with tm.assert_produces_warning(w, check_stacklevel=False):
-                    datetime_frame.to_csv(
-                        path, date_format='%Y-%m-%d', engine=engine)
+            datetime_frame_int = datetime_frame.applymap(
+                lambda x: int(x.strftime('%Y%m%d')))
+            datetime_frame_int.index = datetime_frame_int.index.map(
+                lambda x: int(x.strftime('%Y%m%d')))
 
-                # Check that the data was put in the specified format
-                test = read_csv(path, index_col=0)
-                datetime_frame_str = datetime_frame.applymap(
-                    lambda x: x.strftime('%Y-%m-%d'))
-                datetime_frame_str.index = datetime_frame_str.index.map(
-                    lambda x: x.strftime('%Y-%m-%d'))
+            assert_frame_equal(test, datetime_frame_int)
 
-                assert_frame_equal(test, datetime_frame_str)
+            datetime_frame.to_csv(path, date_format='%Y-%m-%d')
 
-                # Check that columns get converted
-                datetime_frame_columns = datetime_frame.T
+            # Check that the data was put in the specified format
+            test = read_csv(path, index_col=0)
+            datetime_frame_str = datetime_frame.applymap(
+                lambda x: x.strftime('%Y-%m-%d'))
+            datetime_frame_str.index = datetime_frame_str.index.map(
+                lambda x: x.strftime('%Y-%m-%d'))
 
-                with tm.assert_produces_warning(w, check_stacklevel=False):
-                    datetime_frame_columns.to_csv(
-                        path, date_format='%Y%m%d', engine=engine)
+            assert_frame_equal(test, datetime_frame_str)
 
-                test = read_csv(path, index_col=0)
+            # Check that columns get converted
+            datetime_frame_columns = datetime_frame.T
+            datetime_frame_columns.to_csv(path, date_format='%Y%m%d')
 
-                datetime_frame_columns = datetime_frame_columns.applymap(
-                    lambda x: int(x.strftime('%Y%m%d')))
-                # Columns don't get converted to ints by read_csv
-                datetime_frame_columns.columns = (
-                    datetime_frame_columns.columns
-                    .map(lambda x: x.strftime('%Y%m%d')))
+            test = read_csv(path, index_col=0)
 
-                assert_frame_equal(test, datetime_frame_columns)
+            datetime_frame_columns = datetime_frame_columns.applymap(
+                lambda x: int(x.strftime('%Y%m%d')))
+            # Columns don't get converted to ints by read_csv
+            datetime_frame_columns.columns = (
+                datetime_frame_columns.columns
+                .map(lambda x: x.strftime('%Y%m%d')))
 
-                # test NaTs
-                nat_index = to_datetime(
-                    ['NaT'] * 10 + ['2000-01-01', '1/1/2000', '1-1-2000'])
-                nat_frame = DataFrame({'A': nat_index}, index=nat_index)
+            assert_frame_equal(test, datetime_frame_columns)
 
-                with tm.assert_produces_warning(w, check_stacklevel=False):
-                    nat_frame.to_csv(
-                        path, date_format='%Y-%m-%d', engine=engine)
+            # test NaTs
+            nat_index = to_datetime(
+                ['NaT'] * 10 + ['2000-01-01', '1/1/2000', '1-1-2000'])
+            nat_frame = DataFrame({'A': nat_index}, index=nat_index)
+            nat_frame.to_csv(path, date_format='%Y-%m-%d')
 
-                test = read_csv(path, parse_dates=[0, 1], index_col=0)
+            test = read_csv(path, parse_dates=[0, 1], index_col=0)
 
-                assert_frame_equal(test, nat_frame)
+            assert_frame_equal(test, nat_frame)
 
     def test_to_csv_with_dst_transitions(self):
 
@@ -1077,7 +1052,7 @@ def test_to_csv_with_dst_transitions(self):
                 # we have to reconvert the index as we
                 # don't parse the tz's
                 result = read_csv(path, index_col=0)
-                result.index = pd.to_datetime(result.index).tz_localize(
+                result.index = to_datetime(result.index).tz_localize(
                     'UTC').tz_convert('Europe/London')
                 assert_frame_equal(result, df)
 
@@ -1089,9 +1064,9 @@ def test_to_csv_with_dst_transitions(self):
         with ensure_clean('csv_date_format_with_dst') as path:
             df.to_csv(path, index=True)
             result = read_csv(path, index_col=0)
-            result.index = pd.to_datetime(result.index).tz_localize(
+            result.index = to_datetime(result.index).tz_localize(
                 'UTC').tz_convert('Europe/Paris')
-            result['idx'] = pd.to_datetime(result['idx']).astype(
+            result['idx'] = to_datetime(result['idx']).astype(
                 'datetime64[ns, Europe/Paris]')
             assert_frame_equal(result, df)