From d3ce9a147b0b769ba3815c97e28a35836c5c72a8 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Sun, 13 Mar 2016 12:26:19 +0000 Subject: [PATCH 1/4] Improved docs infer_datetime_format Fixes #12152 --- pandas/io/parsers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 2604b6e0784cf..244a4f40727e3 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -132,8 +132,10 @@ class ParserWarning(Warning): Note: A fast-path exists for iso8601-formatted dates. infer_datetime_format : boolean, default False - If True and parse_dates is enabled for a column, attempt to infer - the datetime format to speed up the processing + If parse_dates is enabled and this flag is set, pandas will attempt to infer + the format of the datetime strings in the columns, and if it can be + inferred, switch to a faster method of parsing them. In some cases this + can increase the parsing speed by ~5-10x. keep_date_col : boolean, default False If True and parse_dates specifies combining multiple columns then keep the original columns. From 19fd9d4f22f234aad23b16110cdc4ceafa0771c3 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Sun, 13 Mar 2016 15:04:49 +0000 Subject: [PATCH 2/4] Fix linter error --- pandas/io/parsers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 244a4f40727e3..8cd822e81e9ab 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -132,8 +132,8 @@ class ParserWarning(Warning): Note: A fast-path exists for iso8601-formatted dates. infer_datetime_format : boolean, default False - If parse_dates is enabled and this flag is set, pandas will attempt to infer - the format of the datetime strings in the columns, and if it can be + If parse_dates is enabled and this flag is set, pandas will attempt to + infer the format of the datetime strings in the columns, and if it can be inferred, switch to a faster method of parsing them. In some cases this can increase the parsing speed by ~5-10x. keep_date_col : boolean, default False From 50311cef05596e2a4d151c5ffa8e4913f1bfd902 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Sun, 13 Mar 2016 18:36:03 +0000 Subject: [PATCH 3/4] Updated docs --- pandas/io/parsers.py | 8 ++++---- pandas/tseries/tools.py | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 8cd822e81e9ab..fa9a5cf12570d 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -132,10 +132,10 @@ class ParserWarning(Warning): Note: A fast-path exists for iso8601-formatted dates. infer_datetime_format : boolean, default False - If parse_dates is enabled and this flag is set, pandas will attempt to - infer the format of the datetime strings in the columns, and if it can be - inferred, switch to a faster method of parsing them. In some cases this - can increase the parsing speed by ~5-10x. + If True and parse_dates is enabled, pandas will attempt to infer the format + of the datetime strings in the columns, and if it can be inferred, switch + to a faster method of parsing them. In some cases this can increase the + parsing speed by ~5-10x. keep_date_col : boolean, default False If True and parse_dates specifies combining multiple columns then keep the original columns. diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index d92cfef5280fc..28404fd4447d1 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -231,8 +231,10 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, unit : unit of the arg (D,s,ms,us,ns) denote the unit in epoch (e.g. a unix timestamp), which is an integer/float number. infer_datetime_format : boolean, default False - If no `format` is given, try to infer the format based on the first - datetime string. Provides a large speed-up in many cases. + If True and no `format` is given, attempt to infer the format of the + datetime strings, and if it can be inferred, switch to a faster + method of parsing them. In some cases this can increase the parsing + speed by ~5-10x. Returns ------- From 4dbb8ec3393c352fff61619c2e88a1b4211b8386 Mon Sep 17 00:00:00 2001 From: Robin Wilson Date: Sun, 13 Mar 2016 18:55:41 +0000 Subject: [PATCH 4/4] Added example to docs --- pandas/tseries/tools.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 28404fd4447d1..f9df0d082f2ff 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -275,6 +275,19 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, 99 2000-04-09 Length: 100, dtype: datetime64[ns] + Infer the format from the first entry + + >>> pd.to_datetime(df.month + '/' + df.day + '/' + df.year, + infer_datetime_format=True) + 0 2000-01-01 + 1 2000-01-02 + ... + 98 2000-04-08 + 99 2000-04-09 + + This gives the same results as omitting the `infer_datetime_format=True`, + but is much faster. + Date that does not meet timestamp limitations: >>> pd.to_datetime('13000101', format='%Y%m%d')