From 20fae8be04e9b7fd4965b5536255cfa1e161042a Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 4 Apr 2019 21:08:53 +0300
Subject: [PATCH 1/9] added new cache_dates parameter for read_csv func

---
 asv_bench/benchmarks/io/csv.py | 20 ++++++++++++++++++++
 pandas/io/parsers.py           | 28 +++++++++++++++++++++++-----
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
index add0943c5ce9f..0be5fcc8d94c9 100644
--- a/asv_bench/benchmarks/io/csv.py
+++ b/asv_bench/benchmarks/io/csv.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pandas.util.testing as tm
 from pandas import DataFrame, Categorical, date_range, read_csv
+from pandas.io.parsers import _parser_defaults
 from io import StringIO
 
 from ..pandas_vb_common import BaseIO
@@ -232,6 +233,25 @@ def time_baseline(self):
                  names=list(string.digits[:9]))
 
 
+class ReadCSVCachedParseDates(StringIORewind):
+    params = ([True, False],)
+    param_names = ['do_cache']
+
+    def setup(self, do_cache):
+        data = ('\n'.join('10/{}'.format(year)
+                for year in range(2000, 2100)) + '\n') * 10
+        self.StringIO_input = StringIO(data)
+
+    def time_read_csv_cached(self, do_cache):
+        # kwds setting here is used to avoid breaking tests in
+        # previuos version of pandas, because this is api changes
+        kwds = {}
+        if 'cache_dates' in _parser_defaults:
+            kwds['cache_dates'] = do_cache
+        read_csv(self.data(self.StringIO_input), header=None,
+                 parse_dates=[0], **kwds)
+
+
 class ReadCSVMemoryGrowth(BaseIO):
 
     chunksize = 20
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 701d4bb58bb08..fa017c69e8e45 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -327,6 +327,12 @@
     values. The options are `None` for the ordinary converter,
     `high` for the high-precision converter, and `round_trip` for the
     round-trip converter.
+cache_dates : boolean, default False
+    If True, use a cache of unique, converted dates to apply the datetime
+    conversion. May produce significant speed-up when parsing duplicate
+    date strings, especially ones with timezone offsets.
+
+    .. versionadded:: 0.23.0
 
 Returns
 -------
@@ -476,6 +482,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
     'false_values': None,
     'converters': None,
     'dtype': None,
+    'cache_dates': False,
 
     'thousands': None,
     'comment': None,
@@ -577,6 +584,7 @@ def parser_f(filepath_or_buffer: FilePathOrBuffer,
                  keep_date_col=False,
                  date_parser=None,
                  dayfirst=False,
+                 cache_dates=False,
 
                  # Iteration
                  iterator=False,
@@ -683,6 +691,7 @@ def parser_f(filepath_or_buffer: FilePathOrBuffer,
                     keep_date_col=keep_date_col,
                     dayfirst=dayfirst,
                     date_parser=date_parser,
+                    cache_dates=cache_dates,
 
                     nrows=nrows,
                     iterator=iterator,
@@ -1379,11 +1388,13 @@ def __init__(self, kwds):
         self.tupleize_cols = kwds.get('tupleize_cols', False)
         self.mangle_dupe_cols = kwds.get('mangle_dupe_cols', True)
         self.infer_datetime_format = kwds.pop('infer_datetime_format', False)
+        self.cache_dates = kwds.pop('cache_dates', False)
 
         self._date_conv = _make_date_converter(
             date_parser=self.date_parser,
             dayfirst=self.dayfirst,
-            infer_datetime_format=self.infer_datetime_format
+            infer_datetime_format=self.infer_datetime_format,
+            cache_dates = self.cache_dates
         )
 
         # validate header options for mi
@@ -3173,7 +3184,7 @@ def _get_lines(self, rows=None):
 
 
 def _make_date_converter(date_parser=None, dayfirst=False,
-                         infer_datetime_format=False):
+                         infer_datetime_format=False, cache_dates=False):
     def converter(*date_cols):
         if date_parser is None:
             strs = _concat_date_cols(date_cols)
@@ -3184,16 +3195,22 @@ def converter(*date_cols):
                     utc=None,
                     dayfirst=dayfirst,
                     errors='ignore',
-                    infer_datetime_format=infer_datetime_format
+                    infer_datetime_format=infer_datetime_format,
+                    cache=cache_dates
                 ).to_numpy()
 
             except ValueError:
                 return tools.to_datetime(
-                    parsing.try_parse_dates(strs, dayfirst=dayfirst))
+                    parsing.try_parse_dates(strs, dayfirst=dayfirst),
+                    cache=cache_dates
+                )
         else:
             try:
                 result = tools.to_datetime(
-                    date_parser(*date_cols), errors='ignore')
+                    date_parser(*date_cols),
+                    errors='ignore',
+                    cache=cache_dates
+                )
                 if isinstance(result, datetime.datetime):
                     raise Exception('scalar parser')
                 return result
@@ -3203,6 +3220,7 @@ def converter(*date_cols):
                         parsing.try_parse_dates(_concat_date_cols(date_cols),
                                                 parser=date_parser,
                                                 dayfirst=dayfirst),
+                        cache=cache_dates,
                         errors='ignore')
                 except Exception:
                     return generic_parser(date_parser, *date_cols)

From 028b55ddf3deb57637e16e166625f858a3e65ec1 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 4 Apr 2019 21:17:42 +0300
Subject: [PATCH 2/9] fix PEP 8 issues

---
 pandas/io/parsers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index fa017c69e8e45..f1f9fd330a95d 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1394,7 +1394,7 @@ def __init__(self, kwds):
             date_parser=self.date_parser,
             dayfirst=self.dayfirst,
             infer_datetime_format=self.infer_datetime_format,
-            cache_dates = self.cache_dates
+            cache_dates=self.cache_dates
         )
 
         # validate header options for mi

From f83c65c403574384372ea4259b32e19fa4288e87 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 4 Apr 2019 22:11:33 +0300
Subject: [PATCH 3/9] changed default value of cache_dates var to true

---
 pandas/io/parsers.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index f1f9fd330a95d..f76c4f59d022d 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -327,7 +327,7 @@
     values. The options are `None` for the ordinary converter,
     `high` for the high-precision converter, and `round_trip` for the
     round-trip converter.
-cache_dates : boolean, default False
+cache_dates : boolean, default True
     If True, use a cache of unique, converted dates to apply the datetime
     conversion. May produce significant speed-up when parsing duplicate
     date strings, especially ones with timezone offsets.
@@ -482,7 +482,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
     'false_values': None,
     'converters': None,
     'dtype': None,
-    'cache_dates': False,
+    'cache_dates': True,
 
     'thousands': None,
     'comment': None,
@@ -584,7 +584,7 @@ def parser_f(filepath_or_buffer: FilePathOrBuffer,
                  keep_date_col=False,
                  date_parser=None,
                  dayfirst=False,
-                 cache_dates=False,
+                 cache_dates=True,
 
                  # Iteration
                  iterator=False,
@@ -1388,7 +1388,7 @@ def __init__(self, kwds):
         self.tupleize_cols = kwds.get('tupleize_cols', False)
         self.mangle_dupe_cols = kwds.get('mangle_dupe_cols', True)
         self.infer_datetime_format = kwds.pop('infer_datetime_format', False)
-        self.cache_dates = kwds.pop('cache_dates', False)
+        self.cache_dates = kwds.pop('cache_dates', True)
 
         self._date_conv = _make_date_converter(
             date_parser=self.date_parser,
@@ -3184,7 +3184,7 @@ def _get_lines(self, rows=None):
 
 
 def _make_date_converter(date_parser=None, dayfirst=False,
-                         infer_datetime_format=False, cache_dates=False):
+                         infer_datetime_format=False, cache_dates=True):
     def converter(*date_cols):
         if date_parser is None:
             strs = _concat_date_cols(date_cols)

From 4da942d5561ecdcc56d1f01f79481b7a153c2a22 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 4 Apr 2019 22:20:11 +0300
Subject: [PATCH 4/9] added whatsnew entry

---
 doc/source/whatsnew/v0.25.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 1f4176b18c2e0..8e7fbd86aebc4 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -216,6 +216,7 @@ Other API Changes
 - Comparing :class:`Timestamp` with unsupported objects now returns :py:obj:`NotImplemented` instead of raising ``TypeError``. This implies that unsupported rich comparisons are delegated to the other object, and are now consistent with Python 3 behavior for ``datetime`` objects (:issue:`24011`)
 - Bug in :meth:`DatetimeIndex.snap` which didn't preserving the ``name`` of the input :class:`Index` (:issue:`25575`)
 - The ``arg`` argument in :meth:`pandas.core.groupby.DataFrameGroupBy.agg` has been renamed to ``func`` (:issue:`26089`)
+- Added ``cache_dates`` parameter to :meth:`read_csv`, which allows to cache unique dates when they are parsed (:issue:`25990`)
 
 .. _whatsnew_0250.deprecations:
 

From 995d8d550b9d4e890a19ef22f2e2192cc3681a96 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 4 Apr 2019 22:37:39 +0300
Subject: [PATCH 5/9] move cache_dates var in docstring on the rigth place

---
 pandas/io/parsers.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index f76c4f59d022d..4d19c95f97696 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -235,6 +235,12 @@
     arguments.
 dayfirst : bool, default False
     DD/MM format dates, international and European format.
+cache_dates : boolean, default True
+    If True, use a cache of unique, converted dates to apply the datetime
+    conversion. May produce significant speed-up when parsing duplicate
+    date strings, especially ones with timezone offsets.
+
+    .. versionadded:: 0.23.0
 iterator : bool, default False
     Return TextFileReader object for iteration or getting chunks with
     ``get_chunk()``.
@@ -327,12 +333,6 @@
     values. The options are `None` for the ordinary converter,
     `high` for the high-precision converter, and `round_trip` for the
     round-trip converter.
-cache_dates : boolean, default True
-    If True, use a cache of unique, converted dates to apply the datetime
-    conversion. May produce significant speed-up when parsing duplicate
-    date strings, especially ones with timezone offsets.
-
-    .. versionadded:: 0.23.0
 
 Returns
 -------

From 07575a2aaeb13731f9766dce426ddbbb623f2794 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Thu, 4 Apr 2019 23:30:37 +0300
Subject: [PATCH 6/9] fix wrong pandas version in docstring

---
 pandas/io/parsers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 4d19c95f97696..f85863c4384bd 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -240,7 +240,7 @@
     conversion. May produce significant speed-up when parsing duplicate
     date strings, especially ones with timezone offsets.
 
-    .. versionadded:: 0.23.0
+    .. versionadded:: 0.25.0
 iterator : bool, default False
     Return TextFileReader object for iteration or getting chunks with
     ``get_chunk()``.

From f3186a257fa1f576c787f15f4425858e264b1607 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Fri, 5 Apr 2019 00:17:54 +0300
Subject: [PATCH 7/9] added cache_dates argument for read_csv func in
 doc/source/user_guide/io.rst

---
 doc/source/user_guide/io.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 9ec39c0ff2b23..0abd073c7dc07 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -271,6 +271,12 @@ date_parser : function, default ``None``
   (corresponding to the columns defined by parse_dates) as arguments.
 dayfirst : boolean, default ``False``
   DD/MM format dates, international and European format.
+cache_dates : boolean, default True
+  If True, use a cache of unique, converted dates to apply the datetime
+  conversion. May produce significant speed-up when parsing duplicate
+  date strings, especially ones with timezone offsets.
+
+  .. versionadded:: 0.25.0
 
 Iteration
 +++++++++

From fba9407366dbf01992b0781d7e8856c51eadd077 Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Wed, 10 Apr 2019 08:54:23 +0300
Subject: [PATCH 8/9] fixed typo; writed 'cache_dates=True' in
 doc/source/whatsnew/v0.25.0.rst

---
 asv_bench/benchmarks/io/csv.py  | 2 +-
 doc/source/whatsnew/v0.25.0.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
index 0be5fcc8d94c9..c51fb09ad8671 100644
--- a/asv_bench/benchmarks/io/csv.py
+++ b/asv_bench/benchmarks/io/csv.py
@@ -244,7 +244,7 @@ def setup(self, do_cache):
 
     def time_read_csv_cached(self, do_cache):
         # kwds setting here is used to avoid breaking tests in
-        # previuos version of pandas, because this is api changes
+        # previous version of pandas, because this is api changes
         kwds = {}
         if 'cache_dates' in _parser_defaults:
             kwds['cache_dates'] = do_cache
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 8e7fbd86aebc4..7e88186a3302b 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -216,7 +216,7 @@ Other API Changes
 - Comparing :class:`Timestamp` with unsupported objects now returns :py:obj:`NotImplemented` instead of raising ``TypeError``. This implies that unsupported rich comparisons are delegated to the other object, and are now consistent with Python 3 behavior for ``datetime`` objects (:issue:`24011`)
 - Bug in :meth:`DatetimeIndex.snap` which didn't preserving the ``name`` of the input :class:`Index` (:issue:`25575`)
 - The ``arg`` argument in :meth:`pandas.core.groupby.DataFrameGroupBy.agg` has been renamed to ``func`` (:issue:`26089`)
-- Added ``cache_dates`` parameter to :meth:`read_csv`, which allows to cache unique dates when they are parsed (:issue:`25990`)
+- Added ``cache_dates=True`` parameter to :meth:`read_csv`, which allows to cache unique dates when they are parsed (:issue:`25990`)
 
 .. _whatsnew_0250.deprecations:
 

From 687d0c4c738a876d962d2cad634c7d000e86c0dd Mon Sep 17 00:00:00 2001
From: Anatoly Myachev <anatoly.myachev@intel.com>
Date: Sun, 28 Apr 2019 21:17:31 +0300
Subject: [PATCH 9/9] replaced information about cache_dates=True in IO section

---
 doc/source/whatsnew/v0.25.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 7e88186a3302b..3e1ce702f0423 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -216,7 +216,6 @@ Other API Changes
 - Comparing :class:`Timestamp` with unsupported objects now returns :py:obj:`NotImplemented` instead of raising ``TypeError``. This implies that unsupported rich comparisons are delegated to the other object, and are now consistent with Python 3 behavior for ``datetime`` objects (:issue:`24011`)
 - Bug in :meth:`DatetimeIndex.snap` which didn't preserving the ``name`` of the input :class:`Index` (:issue:`25575`)
 - The ``arg`` argument in :meth:`pandas.core.groupby.DataFrameGroupBy.agg` has been renamed to ``func`` (:issue:`26089`)
-- Added ``cache_dates=True`` parameter to :meth:`read_csv`, which allows to cache unique dates when they are parsed (:issue:`25990`)
 
 .. _whatsnew_0250.deprecations:
 
@@ -376,6 +375,7 @@ I/O
 - Adds ``use_bqstorage_api`` parameter to :func:`read_gbq` to speed up downloads of large data frames. This feature requires version 0.10.0 of the ``pandas-gbq`` library as well as the ``google-cloud-bigquery-storage`` and ``fastavro`` libraries. (:issue:`26104`)
 - Fixed memory leak in :meth:`DataFrame.to_json` when dealing with numeric data (:issue:`24889`)
 - Bug in :func:`read_json` where date strings with ``Z`` were not converted to a UTC timezone (:issue:`26168`)
+- Added ``cache_dates=True`` parameter to :meth:`read_csv`, which allows to cache unique dates when they are parsed (:issue:`25990`)
 
 Plotting
 ^^^^^^^^