diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 2604b6e0784cf..fa9a5cf12570d 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -132,8 +132,10 @@ class ParserWarning(Warning): Note: A fast-path exists for iso8601-formatted dates. infer_datetime_format : boolean, default False - If True and parse_dates is enabled for a column, attempt to infer - the datetime format to speed up the processing + If True and parse_dates is enabled, pandas will attempt to infer the format + of the datetime strings in the columns, and if it can be inferred, switch + to a faster method of parsing them. In some cases this can increase the + parsing speed by ~5-10x. keep_date_col : boolean, default False If True and parse_dates specifies combining multiple columns then keep the original columns. diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index d92cfef5280fc..f9df0d082f2ff 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -231,8 +231,10 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, unit : unit of the arg (D,s,ms,us,ns) denote the unit in epoch (e.g. a unix timestamp), which is an integer/float number. infer_datetime_format : boolean, default False - If no `format` is given, try to infer the format based on the first - datetime string. Provides a large speed-up in many cases. + If True and no `format` is given, attempt to infer the format of the + datetime strings, and if it can be inferred, switch to a faster + method of parsing them. In some cases this can increase the parsing + speed by ~5-10x. Returns ------- @@ -273,6 +275,19 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, 99 2000-04-09 Length: 100, dtype: datetime64[ns] + Infer the format from the first entry + + >>> pd.to_datetime(df.month + '/' + df.day + '/' + df.year, + infer_datetime_format=True) + 0 2000-01-01 + 1 2000-01-02 + ... + 98 2000-04-08 + 99 2000-04-09 + + This gives the same results as omitting the `infer_datetime_format=True`, + but is much faster. + Date that does not meet timestamp limitations: >>> pd.to_datetime('13000101', format='%Y%m%d')