DEPR: pd.read_table

dahlbaek · dahlbaek · commit f417c9275cf7 · 2018-08-02T09:56:12.000+02:00
- pd.read_table is deprecated and replaced by pd.read_csv.

- add whatsnew note

- change tests to test for warning messages

- change DataFrame.from_csv to use pandas.read_csv instead of
  pandas.read_table

- Change pandas.read_clipboard to use pandas.read_csv instead
  of pandas.read_table
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -478,6 +478,7 @@ Deprecations
 - :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`)
 - :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
 - :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)
+- :func:`pandas.read_table` is deprecated. Use :func:`pandas.read_csv` instead (:issue:`21948`)
 
 .. _whatsnew_0240.prior_deprecations:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1594,11 +1594,11 @@ def from_csv(cls, path, header=0, sep=',', index_col=0, parse_dates=True,
                       "for from_csv when changing your function calls",
                       FutureWarning, stacklevel=2)
 
-        from pandas.io.parsers import read_table
-        return read_table(path, header=header, sep=sep,
-                          parse_dates=parse_dates, index_col=index_col,
-                          encoding=encoding, tupleize_cols=tupleize_cols,
-                          infer_datetime_format=infer_datetime_format)
+        from pandas.io.parsers import read_csv
+        return read_csv(path, header=header, sep=sep,
+                        parse_dates=parse_dates, index_col=index_col,
+                        encoding=encoding, tupleize_cols=tupleize_cols,
+                        infer_datetime_format=infer_datetime_format)
 
     def to_sparse(self, fill_value=None, kind='block'):
         """
diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py
@@ -9,7 +9,7 @@
 
 def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
     r"""
-    Read text from clipboard and pass to read_table. See read_table for the
+    Read text from clipboard and pass to read_csv. See read_csv for the
     full argument list
 
     Parameters
@@ -31,7 +31,7 @@ def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
             'reading from clipboard only supports utf-8 encoding')
 
     from pandas.io.clipboard import clipboard_get
-    from pandas.io.parsers import read_table
+    from pandas.io.parsers import read_csv
     text = clipboard_get()
 
     # try to decode (if needed on PY3)
@@ -51,7 +51,7 @@ def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
     # that this came from excel and set 'sep' accordingly
     lines = text[:10000].split('\n')[:-1][:10]
 
-    # Need to remove leading white space, since read_table
+    # Need to remove leading white space, since read_csv
     # accepts:
     #    a  b
     # 0  1  2
@@ -80,7 +80,7 @@ def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
     if kwargs.get('engine') == 'python' and PY2:
         text = text.encode('utf-8')
 
-    return read_table(StringIO(text), sep=sep, **kwargs)
+    return read_csv(StringIO(text), sep=sep, **kwargs)
 
 
 def to_clipboard(obj, excel=True, sep=None, **kwargs):  # pragma: no cover
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -331,6 +331,10 @@
 """ % (_parser_params % (_sep_doc.format(default="','"), _engine_doc))
 
 _read_table_doc = """
+
+.. deprecated:: 0.24.0
+   Use :func:`pandas.read_csv` instead, passing `sep='\t'` if necessary.
+
 Read general delimited file into DataFrame
 
 %s
@@ -540,9 +544,13 @@ def _read(filepath_or_buffer, kwds):
 }
 
 
-def _make_parser_function(name, sep=','):
+def _make_parser_function(name, default_sep=','):
 
-    default_sep = sep
+    # prepare read_table deprecation
+    if name == "read_table":
+        sep = False
+    else:
+        sep = default_sep
 
     def parser_f(filepath_or_buffer,
                  sep=sep,
@@ -611,11 +619,24 @@ def parser_f(filepath_or_buffer,
                  memory_map=False,
                  float_precision=None):
 
+        # deprecate read_table GH21948
+        if name == "read_table":
+            if sep is False and delimiter is None:
+                warnings.warn("read_table is deprecated, use read_csv "
+                              "instead, passing sep='\\t'.",
+                              FutureWarning, stacklevel=2)
+            else:
+                warnings.warn("read_table is deprecated, use read_csv "
+                              "instead.",
+                              FutureWarning, stacklevel=2)
+            if sep is False:
+                sep = default_sep
+
         # Alias sep -> delimiter.
         if delimiter is None:
             delimiter = sep
 
-        if delim_whitespace and delimiter is not default_sep:
+        if delim_whitespace and delimiter != default_sep:
             raise ValueError("Specified a delimiter with both sep and"
                              " delim_whitespace=True; you can only"
                              " specify one.")
@@ -687,10 +708,10 @@ def parser_f(filepath_or_buffer,
     return parser_f
 
 
-read_csv = _make_parser_function('read_csv', sep=',')
+read_csv = _make_parser_function('read_csv', default_sep=',')
 read_csv = Appender(_read_csv_doc)(read_csv)
 
-read_table = _make_parser_function('read_table', sep='\t')
+read_table = _make_parser_function('read_table', default_sep='\t')
 read_table = Appender(_read_table_doc)(read_table)
 
 
diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
@@ -1,5 +1,5 @@
 import pytest
-from pandas.io.parsers import read_table
+from pandas.io.parsers import read_csv
 
 
 @pytest.fixture
@@ -17,7 +17,7 @@ def jsonl_file(datapath):
 @pytest.fixture
 def salaries_table(datapath):
     """DataFrame with the salaries dataset"""
-    return read_table(datapath('io', 'parser', 'data', 'salaries.csv'))
+    return read_csv(datapath('io', 'parser', 'data', 'salaries.csv'), sep='\t')
 
 
 @pytest.fixture
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -21,7 +21,7 @@
 import numpy as np
 import pandas as pd
 from pandas import (DataFrame, Series, Index, Timestamp, MultiIndex,
-                    date_range, NaT, read_table)
+                    date_range, NaT, read_csv)
 from pandas.compat import (range, zip, lrange, StringIO, PY3,
                            u, lzip, is_platform_windows,
                            is_platform_32bit)
@@ -1225,8 +1225,8 @@ def test_to_string(self):
         lines = result.split('\n')
         header = lines[0].strip().split()
         joined = '\n'.join(re.sub(r'\s+', ' ', x).strip() for x in lines[1:])
-        recons = read_table(StringIO(joined), names=header,
-                            header=None, sep=' ')
+        recons = read_csv(StringIO(joined), names=header,
+                          header=None, sep=' ')
         tm.assert_series_equal(recons['B'], biggie['B'])
         assert recons['A'].count() == biggie['A'].count()
         assert (np.abs(recons['A'].dropna() -
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
@@ -12,7 +12,7 @@
 import pandas.util.testing as tm
 import pandas.util._test_decorators as td
 from pandas import DataFrame
-from pandas.io.parsers import read_csv, read_table
+from pandas.io.parsers import read_csv
 from pandas.compat import BytesIO, StringIO
 
 
@@ -44,7 +44,7 @@ def check_compressed_urls(salaries_table, compression, extension, mode,
     if mode != 'explicit':
         compression = mode
 
-    url_table = read_table(url, compression=compression, engine=engine)
+    url_table = read_csv(url, sep='\t', compression=compression, engine=engine)
     tm.assert_frame_equal(url_table, salaries_table)
 
 
diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py
@@ -70,7 +70,9 @@ def read_table(self, *args, **kwds):
         kwds = kwds.copy()
         kwds['engine'] = self.engine
         kwds['low_memory'] = self.low_memory
-        return read_table(*args, **kwds)
+        with tm.assert_produces_warning(FutureWarning):
+            df = read_table(*args, **kwds)
+        return df
 
 
 class TestCParserLowMemory(BaseParser, CParserTests):
@@ -88,7 +90,9 @@ def read_table(self, *args, **kwds):
         kwds = kwds.copy()
         kwds['engine'] = self.engine
         kwds['low_memory'] = True
-        return read_table(*args, **kwds)
+        with tm.assert_produces_warning(FutureWarning):
+            df = read_table(*args, **kwds)
+        return df
 
 
 class TestPythonParser(BaseParser, PythonParserTests):
@@ -103,7 +107,9 @@ def read_csv(self, *args, **kwds):
     def read_table(self, *args, **kwds):
         kwds = kwds.copy()
         kwds['engine'] = self.engine
-        return read_table(*args, **kwds)
+        with tm.assert_produces_warning(FutureWarning):
+            df = read_table(*args, **kwds)
+        return df
 
 
 class TestUnsortedUsecols(object):
diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py
@@ -14,7 +14,7 @@
 
 from pandas.compat import StringIO
 from pandas.errors import ParserError
-from pandas.io.parsers import read_csv, read_table
+from pandas.io.parsers import read_csv
 
 import pytest
 
@@ -43,24 +43,24 @@ def test_c_engine(self):
 
         # specify C engine with unsupported options (raise)
         with tm.assert_raises_regex(ValueError, msg):
-            read_table(StringIO(data), engine='c',
-                       sep=None, delim_whitespace=False)
+            read_csv(StringIO(data), engine='c',
+                     sep=None, delim_whitespace=False)
         with tm.assert_raises_regex(ValueError, msg):
-            read_table(StringIO(data), engine='c', sep=r'\s')
+            read_csv(StringIO(data), engine='c', sep=r'\s')
         with tm.assert_raises_regex(ValueError, msg):
-            read_table(StringIO(data), engine='c', quotechar=chr(128))
+            read_csv(StringIO(data), engine='c', sep='\t', quotechar=chr(128))
         with tm.assert_raises_regex(ValueError, msg):
-            read_table(StringIO(data), engine='c', skipfooter=1)
+            read_csv(StringIO(data), engine='c', skipfooter=1)
 
         # specify C-unsupported options without python-unsupported options
         with tm.assert_produces_warning(parsers.ParserWarning):
-            read_table(StringIO(data), sep=None, delim_whitespace=False)
+            read_csv(StringIO(data), sep=None, delim_whitespace=False)
         with tm.assert_produces_warning(parsers.ParserWarning):
-            read_table(StringIO(data), quotechar=chr(128))
+            read_csv(StringIO(data), sep=r'\s')
         with tm.assert_produces_warning(parsers.ParserWarning):
-            read_table(StringIO(data), sep=r'\s')
+            read_csv(StringIO(data), sep='\t', quotechar=chr(128))
         with tm.assert_produces_warning(parsers.ParserWarning):
-            read_table(StringIO(data), skipfooter=1)
+            read_csv(StringIO(data), skipfooter=1)
 
         text = """                      A       B       C       D        E
 one two three   four
@@ -70,9 +70,9 @@ def test_c_engine(self):
         msg = 'Error tokenizing data'
 
         with tm.assert_raises_regex(ParserError, msg):
-            read_table(StringIO(text), sep='\\s+')
+            read_csv(StringIO(text), sep='\\s+')
         with tm.assert_raises_regex(ParserError, msg):
-            read_table(StringIO(text), engine='c', sep='\\s+')
+            read_csv(StringIO(text), engine='c', sep='\\s+')
 
         msg = "Only length-1 thousands markers supported"
         data = """A|B|C
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
@@ -131,7 +131,6 @@ def test_iterator(self):
 
     @pytest.mark.parametrize('reader, module, error_class, fn_ext', [
         (pd.read_csv, 'os', FileNotFoundError, 'csv'),
-        (pd.read_table, 'os', FileNotFoundError, 'csv'),
         (pd.read_fwf, 'os', FileNotFoundError, 'txt'),
         (pd.read_excel, 'xlrd', FileNotFoundError, 'xlsx'),
         (pd.read_feather, 'feather', Exception, 'feather'),
@@ -149,9 +148,14 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext):
         with pytest.raises(error_class):
             reader(path)
 
+    def test_read_non_existant_read_table(self):
+        path = os.path.join(HERE, 'data', 'does_not_exist.' + 'csv')
+        with pytest.raises(FileNotFoundError):
+            with tm.assert_produces_warning(FutureWarning):
+                pd.read_table(path)
+
     @pytest.mark.parametrize('reader, module, path', [
         (pd.read_csv, 'os', ('io', 'data', 'iris.csv')),
-        (pd.read_table, 'os', ('io', 'data', 'iris.csv')),
         (pd.read_fwf, 'os', ('io', 'data', 'fixed_width_format.txt')),
         (pd.read_excel, 'xlrd', ('io', 'data', 'test1.xlsx')),
         (pd.read_feather, 'feather', ('io', 'data', 'feather-0_3_1.feather')),
@@ -170,6 +174,22 @@ def test_read_fspath_all(self, reader, module, path, datapath):
         mypath = CustomFSPath(path)
         result = reader(mypath)
         expected = reader(path)
+
+        if path.endswith('.pickle'):
+            # categorical
+            tm.assert_categorical_equal(result, expected)
+        else:
+            tm.assert_frame_equal(result, expected)
+
+    def test_read_fspath_all_read_table(self, datapath):
+        path = datapath('io', 'data', 'iris.csv')
+
+        mypath = CustomFSPath(path)
+        with tm.assert_produces_warning(FutureWarning):
+            result = pd.read_table(mypath)
+        with tm.assert_produces_warning(FutureWarning):
+            expected = pd.read_table(path)
+
         if path.endswith('.pickle'):
             # categorical
             tm.assert_categorical_equal(result, expected)
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
@@ -10,7 +10,7 @@
 import numpy as np
 
 from pandas.core.index import Index, MultiIndex
-from pandas import Panel, DataFrame, Series, notna, isna, Timestamp
+from pandas import Panel, DataFrame, Series, notna, isna, Timestamp, read_csv
 
 from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype
 import pandas.core.common as com
@@ -512,14 +512,13 @@ def f(x):
         pytest.raises(com.SettingWithCopyError, f, result)
 
     def test_xs_level_multiple(self):
-        from pandas import read_table
         text = """                      A       B       C       D        E
 one two three   four
 a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
 a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
 x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""
 
-        df = read_table(StringIO(text), sep=r'\s+', engine='python')
+        df = read_csv(StringIO(text), sep=r'\s+', engine='python')
 
         result = df.xs(('a', 4), level=['one', 'four'])
         expected = df.xs('a').xs(4, level='four')
@@ -547,14 +546,13 @@ def f(x):
         tm.assert_frame_equal(rs, xp)
 
     def test_xs_level0(self):
-        from pandas import read_table
         text = """                      A       B       C       D        E
 one two three   four
 a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
 a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
 x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""
 
-        df = read_table(StringIO(text), sep=r'\s+', engine='python')
+        df = read_csv(StringIO(text), sep=r'\s+', engine='python')
 
         result = df.xs('a', level=0)
         expected = df.xs('a')