Merge pull request #10163 from jorisvandenbossche/depr-from_csv

jorisvandenbossche · jorisvandenbossche · commit bd804aa75ac9 · 2015-08-21T09:33:31.000+02:00
DOC: clarify purpose of DataFrame.from_csv (GH4191)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -180,7 +180,6 @@ class DataFrame(NDFrame):
     --------
     DataFrame.from_records : constructor from tuples, also record arrays
     DataFrame.from_dict : from dicts of Series, arrays, or dicts
-    DataFrame.from_csv : from CSV files
     DataFrame.from_items : from sequence of (key, value) pairs
     pandas.read_csv, pandas.read_table, pandas.read_clipboard
     """
@@ -1059,13 +1058,29 @@ def from_csv(cls, path, header=0, sep=',', index_col=0,
                  parse_dates=True, encoding=None, tupleize_cols=False,
                  infer_datetime_format=False):
         """
-        Read delimited file into DataFrame
+        Read CSV file (DISCOURAGED, please use :func:`pandas.read_csv` instead).
+
+        It is preferable to use the more powerful :func:`pandas.read_csv`
+        for most general purposes, but ``from_csv`` makes for an easy
+        roundtrip to and from a file (the exact counterpart of
+        ``to_csv``), especially with a DataFrame of time series data.
+
+        This method only differs from the preferred :func:`pandas.read_csv`
+        in some defaults:
+
+        - `index_col` is ``0`` instead of ``None`` (take first column as index
+          by default)
+        - `parse_dates` is ``True`` instead of ``False`` (try parsing the index
+          as datetime by default)
+
+        So a ``pd.DataFrame.from_csv(path)`` can be replaced by
+        ``pd.read_csv(path, index_col=0, parse_dates=True)``.
 
         Parameters
         ----------
         path : string file path or file handle / StringIO
         header : int, default 0
-            Row to use at header (skip prior rows)
+            Row to use as header (skip prior rows)
         sep : string, default ','
             Field delimiter
         index_col : int or sequence, default 0
@@ -1081,15 +1096,14 @@ def from_csv(cls, path, header=0, sep=',', index_col=0,
             datetime format based on the first datetime string. If the format
             can be inferred, there often will be a large parsing speed-up.
 
-        Notes
-        -----
-        Preferable to use read_table for most general purposes but from_csv
-        makes for an easy roundtrip to and from file, especially with a
-        DataFrame of time series data
+        See also
+        --------
+        pandas.read_csv
 
         Returns
         -------
         y : DataFrame
+
         """
         from pandas.io.parsers import read_table
         return read_table(path, header=header, sep=sep,
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2317,7 +2317,24 @@ def between(self, left, right, inclusive=True):
     def from_csv(cls, path, sep=',', parse_dates=True, header=None,
                  index_col=0, encoding=None, infer_datetime_format=False):
         """
-        Read delimited file into Series
+        Read CSV file (DISCOURAGED, please use :func:`pandas.read_csv` instead).
+
+        It is preferable to use the more powerful :func:`pandas.read_csv`
+        for most general purposes, but ``from_csv`` makes for an easy
+        roundtrip to and from a file (the exact counterpart of
+        ``to_csv``), especially with a time Series.
+
+        This method only differs from :func:`pandas.read_csv` in some defaults:
+
+        - `index_col` is ``0`` instead of ``None`` (take first column as index
+          by default)
+        - `header` is ``None`` instead of ``0`` (the first row is not used as
+          the column names)
+        - `parse_dates` is ``True`` instead of ``False`` (try parsing the index
+          as datetime by default)
+
+        With :func:`pandas.read_csv`, the option ``squeeze=True`` can be used
+        to return a Series like ``from_csv``.
 
         Parameters
         ----------
@@ -2326,8 +2343,8 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
             Field delimiter
         parse_dates : boolean, default True
             Parse dates. Different default from read_table
-        header : int, default 0
-            Row to use at header (skip prior rows)
+        header : int, default None
+            Row to use as header (skip prior rows)
         index_col : int or sequence, default 0
             Column to use for index. If a sequence is given, a MultiIndex
             is used. Different default from read_table
@@ -2339,6 +2356,10 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
             datetime format based on the first datetime string. If the format
             can be inferred, there often will be a large parsing speed-up.
 
+        See also
+        --------
+        pandas.read_csv
+
         Returns
         -------
         y : Series