diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index ed3be71852299..2eefc7ec1b636 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -715,6 +715,7 @@ Other API Changes Deprecations ~~~~~~~~~~~~ +- :meth:`DataFrame.from_csv` and :meth:`Series.from_csv` have been deprecated in favor of :func:`read_csv()` (:issue:`4191`) - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). - :func:`read_excel()` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`) - :func:`read_csv()` has deprecated the ``tupleize_cols`` argument. Column tuples will always be converted to a ``MultiIndex`` (:issue:`17060`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d907492759dbd..c536cc9f2b82c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -298,7 +298,7 @@ def _constructor(self): _constructor_sliced = Series _deprecations = NDFrame._deprecations | frozenset( - ['sortlevel', 'get_value', 'set_value']) + ['sortlevel', 'get_value', 'set_value', 'from_csv']) @property def _constructor_expanddim(self): @@ -1291,7 +1291,7 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, encoding=None, tupleize_cols=False, infer_datetime_format=False): """ - Read CSV file (DISCOURAGED, please use :func:`pandas.read_csv` + Read CSV file (DEPRECATED, please use :func:`pandas.read_csv` instead). It is preferable to use the more powerful :func:`pandas.read_csv` @@ -1339,6 +1339,13 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True, y : DataFrame """ + + warnings.warn("from_csv is deprecated. Please use read_csv(...) " + "instead. Note that some of the default arguments are " + "different, so please refer to the documentation " + "for from_csv when changing your function calls", + FutureWarning, stacklevel=2) + from pandas.io.parsers import read_table return read_table(path, header=header, sep=sep, parse_dates=parse_dates, index_col=index_col, diff --git a/pandas/core/series.py b/pandas/core/series.py index 49b6a6651367b..be4066f0c39b9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -147,7 +147,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): _metadata = ['name'] _accessors = frozenset(['dt', 'cat', 'str']) _deprecations = generic.NDFrame._deprecations | frozenset( - ['sortlevel', 'reshape', 'get_value', 'set_value']) + ['sortlevel', 'reshape', 'get_value', 'set_value', 'from_csv']) _allow_index_ops = True def __init__(self, data=None, index=None, dtype=None, name=None, @@ -2688,7 +2688,7 @@ def between(self, left, right, inclusive=True): def from_csv(cls, path, sep=',', parse_dates=True, header=None, index_col=0, encoding=None, infer_datetime_format=False): """ - Read CSV file (DISCOURAGED, please use :func:`pandas.read_csv` + Read CSV file (DEPRECATED, please use :func:`pandas.read_csv` instead). It is preferable to use the more powerful :func:`pandas.read_csv` @@ -2736,6 +2736,9 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None, ------- y : Series """ + + # We're calling `DataFrame.from_csv` in the implementation, + # which will propagate a warning regarding `from_csv` deprecation. from pandas.core.frame import DataFrame df = DataFrame.from_csv(path, header=header, index_col=index_col, sep=sep, parse_dates=parse_dates, diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index a61a157181253..ab34ce877a726 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -31,6 +31,21 @@ class TestDataFrameToCSV(TestData): + def read_csv(self, path, **kwargs): + params = dict(index_col=0, parse_dates=True) + params.update(**kwargs) + + return pd.read_csv(path, **params) + + def test_from_csv_deprecation(self): + # see gh-17812 + with ensure_clean('__tmp_from_csv_deprecation__') as path: + self.tsframe.to_csv(path) + + with tm.assert_produces_warning(FutureWarning): + depr_recons = DataFrame.from_csv(path) + assert_frame_equal(self.tsframe, depr_recons) + def test_to_csv_from_csv1(self): with ensure_clean('__tmp_to_csv_from_csv1__') as path: @@ -43,24 +58,25 @@ def test_to_csv_from_csv1(self): # test roundtrip self.tsframe.to_csv(path) - recons = DataFrame.from_csv(path) - + recons = self.read_csv(path) assert_frame_equal(self.tsframe, recons) self.tsframe.to_csv(path, index_label='index') - recons = DataFrame.from_csv(path, index_col=None) + recons = self.read_csv(path, index_col=None) + assert(len(recons.columns) == len(self.tsframe.columns) + 1) # no index self.tsframe.to_csv(path, index=False) - recons = DataFrame.from_csv(path, index_col=None) + recons = self.read_csv(path, index_col=None) assert_almost_equal(self.tsframe.values, recons.values) # corner case dm = DataFrame({'s1': Series(lrange(3), lrange(3)), 's2': Series(lrange(2), lrange(2))}) dm.to_csv(path) - recons = DataFrame.from_csv(path) + + recons = self.read_csv(path) assert_frame_equal(dm, recons) def test_to_csv_from_csv2(self): @@ -71,27 +87,26 @@ def test_to_csv_from_csv2(self): df = DataFrame(np.random.randn(3, 3), index=['a', 'a', 'b'], columns=['x', 'y', 'z']) df.to_csv(path) - result = DataFrame.from_csv(path) + result = self.read_csv(path) assert_frame_equal(result, df) midx = MultiIndex.from_tuples( [('A', 1, 2), ('A', 1, 2), ('B', 1, 2)]) df = DataFrame(np.random.randn(3, 3), index=midx, columns=['x', 'y', 'z']) + df.to_csv(path) - result = DataFrame.from_csv(path, index_col=[0, 1, 2], - parse_dates=False) - # TODO from_csv names index ['Unnamed: 1', 'Unnamed: 2'] should it - # ? + result = self.read_csv(path, index_col=[0, 1, 2], + parse_dates=False) assert_frame_equal(result, df, check_names=False) # column aliases col_aliases = Index(['AA', 'X', 'Y', 'Z']) self.frame2.to_csv(path, header=col_aliases) - rs = DataFrame.from_csv(path) + + rs = self.read_csv(path) xp = self.frame2.copy() xp.columns = col_aliases - assert_frame_equal(xp, rs) pytest.raises(ValueError, self.frame2.to_csv, path, @@ -231,8 +246,9 @@ def make_dtnat_arr(n, nnat=None): with ensure_clean('1.csv') as pth: df = DataFrame(dict(a=s1, b=s2)) df.to_csv(pth, chunksize=chunksize) - recons = DataFrame.from_csv(pth)._convert(datetime=True, - coerce=True) + + recons = self.read_csv(pth)._convert(datetime=True, + coerce=True) assert_frame_equal(df, recons, check_names=False, check_less_precise=True) @@ -247,16 +263,17 @@ def _do_test(df, r_dtype=None, c_dtype=None, if rnlvl is not None: kwargs['index_col'] = lrange(rnlvl) kwargs['header'] = lrange(cnlvl) + with ensure_clean('__tmp_to_csv_moar__') as path: df.to_csv(path, encoding='utf8', chunksize=chunksize, tupleize_cols=False) - recons = DataFrame.from_csv( - path, tupleize_cols=False, **kwargs) + recons = self.read_csv(path, tupleize_cols=False, **kwargs) else: kwargs['header'] = 0 + with ensure_clean('__tmp_to_csv_moar__') as path: df.to_csv(path, encoding='utf8', chunksize=chunksize) - recons = DataFrame.from_csv(path, **kwargs) + recons = self.read_csv(path, **kwargs) def _to_uni(x): if not isinstance(x, compat.text_type): @@ -398,7 +415,7 @@ def test_to_csv_from_csv_w_some_infs(self): with ensure_clean() as path: self.frame.to_csv(path) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) # TODO to_csv drops column name assert_frame_equal(self.frame, recons, check_names=False) @@ -413,7 +430,7 @@ def test_to_csv_from_csv_w_all_infs(self): with ensure_clean() as path: self.frame.to_csv(path) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) # TODO to_csv drops column name assert_frame_equal(self.frame, recons, check_names=False) @@ -448,11 +465,13 @@ def test_to_csv_headers(self): to_df = DataFrame([[1, 2], [3, 4]], columns=['X', 'Y']) with ensure_clean('__tmp_to_csv_headers__') as path: from_df.to_csv(path, header=['X', 'Y']) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) + assert_frame_equal(to_df, recons) from_df.to_csv(path, index=False, header=['X', 'Y']) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) + recons.reset_index(inplace=True) assert_frame_equal(to_df, recons) @@ -471,13 +490,15 @@ def test_to_csv_multiindex(self): # round trip frame.to_csv(path) - df = DataFrame.from_csv(path, index_col=[0, 1], parse_dates=False) + + df = self.read_csv(path, index_col=[0, 1], + parse_dates=False) # TODO to_csv drops column name assert_frame_equal(frame, df, check_names=False) assert frame.index.names == df.index.names - # needed if setUP becomes a classmethod + # needed if setUp becomes a class method self.frame.index = old_index # try multiindex with dates @@ -487,21 +508,22 @@ def test_to_csv_multiindex(self): tsframe.index = MultiIndex.from_arrays(new_index) tsframe.to_csv(path, index_label=['time', 'foo']) - recons = DataFrame.from_csv(path, index_col=[0, 1]) + recons = self.read_csv(path, index_col=[0, 1]) + # TODO to_csv drops column name assert_frame_equal(tsframe, recons, check_names=False) # do not load index tsframe.to_csv(path) - recons = DataFrame.from_csv(path, index_col=None) + recons = self.read_csv(path, index_col=None) assert len(recons.columns) == len(tsframe.columns) + 2 # no index tsframe.to_csv(path, index=False) - recons = DataFrame.from_csv(path, index_col=None) + recons = self.read_csv(path, index_col=None) assert_almost_equal(recons.values, self.tsframe.values) - # needed if setUP becomes classmethod + # needed if setUp becomes class method self.tsframe.index = old_index with ensure_clean('__tmp_to_csv_multiindex__') as path: @@ -606,7 +628,8 @@ def _make_frame(names=None): with ensure_clean('__tmp_to_csv_multiindex__') as path: # empty tsframe[:0].to_csv(path) - recons = DataFrame.from_csv(path) + recons = self.read_csv(path) + exp = tsframe[:0] exp.index = [] @@ -631,7 +654,7 @@ def test_to_csv_withcommas(self): with ensure_clean('__tmp_to_csv_withcommas__.csv') as path: df.to_csv(path) - df2 = DataFrame.from_csv(path) + df2 = self.read_csv(path) assert_frame_equal(df2, df) def test_to_csv_mixed(self): @@ -746,7 +769,7 @@ def test_to_csv_wide_frame_formatting(self): def test_to_csv_bug(self): f1 = StringIO('a,1.0\nb,2.0') - df = DataFrame.from_csv(f1, header=None) + df = self.read_csv(f1, header=None) newdf = DataFrame({'t': df[df.columns[0]]}) with ensure_clean() as path: diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 5b7fd1ec94a90..ad51261a47c5c 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -20,43 +20,73 @@ class TestSeriesToCSV(TestData): + def read_csv(self, path, **kwargs): + params = dict(squeeze=True, index_col=0, + header=None, parse_dates=True) + params.update(**kwargs) + + header = params.get("header") + out = pd.read_csv(path, **params) + + if header is None: + out.name = out.index.name = None + + return out + + def test_from_csv_deprecation(self): + # see gh-17812 + with ensure_clean() as path: + self.ts.to_csv(path) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + ts = self.read_csv(path) + depr_ts = Series.from_csv(path) + assert_series_equal(depr_ts, ts) + def test_from_csv(self): with ensure_clean() as path: self.ts.to_csv(path) - ts = Series.from_csv(path) + ts = self.read_csv(path) assert_series_equal(self.ts, ts, check_names=False) + assert ts.name is None assert ts.index.name is None - # GH10483 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + depr_ts = Series.from_csv(path) + assert_series_equal(depr_ts, ts) + + # see gh-10483 self.ts.to_csv(path, header=True) - ts_h = Series.from_csv(path, header=0) - assert ts_h.name == 'ts' + ts_h = self.read_csv(path, header=0) + assert ts_h.name == "ts" self.series.to_csv(path) - series = Series.from_csv(path) - assert series.name is None - assert series.index.name is None + series = self.read_csv(path) assert_series_equal(self.series, series, check_names=False) + assert series.name is None assert series.index.name is None self.series.to_csv(path, header=True) - series_h = Series.from_csv(path, header=0) - assert series_h.name == 'series' + series_h = self.read_csv(path, header=0) + assert series_h.name == "series" - outfile = open(path, 'w') - outfile.write('1998-01-01|1.0\n1999-01-01|2.0') + outfile = open(path, "w") + outfile.write("1998-01-01|1.0\n1999-01-01|2.0") outfile.close() - series = Series.from_csv(path, sep='|') - checkseries = Series({datetime(1998, 1, 1): 1.0, - datetime(1999, 1, 1): 2.0}) - assert_series_equal(checkseries, series) - series = Series.from_csv(path, sep='|', parse_dates=False) - checkseries = Series({'1998-01-01': 1.0, '1999-01-01': 2.0}) - assert_series_equal(checkseries, series) + series = self.read_csv(path, sep="|") + check_series = Series({datetime(1998, 1, 1): 1.0, + datetime(1999, 1, 1): 2.0}) + assert_series_equal(check_series, series) + + series = self.read_csv(path, sep="|", parse_dates=False) + check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0}) + assert_series_equal(check_series, series) def test_to_csv(self): import io @@ -76,20 +106,19 @@ def test_to_csv_unicode_index(self): buf = StringIO() s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")]) - s.to_csv(buf, encoding='UTF-8') + s.to_csv(buf, encoding="UTF-8") buf.seek(0) - s2 = Series.from_csv(buf, index_col=0, encoding='UTF-8') - + s2 = self.read_csv(buf, index_col=0, encoding="UTF-8") assert_series_equal(s, s2) def test_to_csv_float_format(self): with ensure_clean() as filename: ser = Series([0.123456, 0.234567, 0.567567]) - ser.to_csv(filename, float_format='%.2f') + ser.to_csv(filename, float_format="%.2f") - rs = Series.from_csv(filename) + rs = self.read_csv(filename) xp = Series([0.12, 0.23, 0.57]) assert_series_equal(rs, xp)