Improved docs for infer_datetime_format

robintw · jreback · commit 4f5099b4ee5f · 2016-03-14T09:01:53.000-04:00
Fixes pandas-dev#12152 Author: Robin Wilson <robin@rtwilson.com> Closes pandas-dev#12606 from robintw/infer_datetime_format-docs and squashes the following commits: 4dbb8ec [Robin Wilson] Added example to docs 50311ce [Robin Wilson] Updated docs 19fd9d4 [Robin Wilson] Fix linter error d3ce9a1 [Robin Wilson] Improved docs infer_datetime_format
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -132,8 +132,10 @@ class ParserWarning(Warning):
 
     Note: A fast-path exists for iso8601-formatted dates.
 infer_datetime_format : boolean, default False
-    If True and parse_dates is enabled for a column, attempt to infer
-    the datetime format to speed up the processing
+    If True and parse_dates is enabled, pandas will attempt to infer the format
+    of the datetime strings in the columns, and if it can be inferred, switch
+    to a faster method of parsing them. In some cases this can increase the
+    parsing speed by ~5-10x.
 keep_date_col : boolean, default False
     If True and parse_dates specifies combining multiple columns then
     keep the original columns.
diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py
@@ -231,8 +231,10 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
     unit : unit of the arg (D,s,ms,us,ns) denote the unit in epoch
         (e.g. a unix timestamp), which is an integer/float number.
     infer_datetime_format : boolean, default False
-        If no `format` is given, try to infer the format based on the first
-        datetime string. Provides a large speed-up in many cases.
+        If True and no `format` is given, attempt to infer the format of the
+        datetime strings, and if it can be inferred, switch to a faster
+        method of parsing them. In some cases this can increase the parsing
+        speed by ~5-10x.
 
     Returns
     -------
@@ -264,15 +266,28 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
 
     Or from strings
 
-    >>> df = df.astype(str)
-    >>> pd.to_datetime(df.day + df.month + df.year, format="%d%m%Y")
+    >>> dfs = df.astype(str)
+    >>> pd.to_datetime(dfs.day + dfs.month + dfs.year, format="%d%m%Y")
     0    2000-01-01
     1    2000-01-02
     ...
     98   2000-04-08
     99   2000-04-09
     Length: 100, dtype: datetime64[ns]
 
+    Infer the format from the first entry
+
+    >>> pd.to_datetime(dfs.month + '/' +  dfs.day + '/' + dfs.year,
+                       infer_datetime_format=True)
+    0    2000-01-01
+    1    2000-01-02
+    ...
+    98   2000-04-08
+    99   2000-04-09
+
+    This gives the same results as omitting the `infer_datetime_format=True`,
+    but is much faster.
+
     Date that does not meet timestamp limitations:
 
     >>> pd.to_datetime('13000101', format='%Y%m%d')