From 518ab4707cc6aa41e998c72bd57c10e20877b00c Mon Sep 17 00:00:00 2001 From: Ming Li Date: Mon, 19 Feb 2018 16:12:21 +0800 Subject: [PATCH 01/39] add require_iso8601 parameter and documentation in dataframe method iterrows --- pandas/core/frame.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bc045d74cee52..81240f2462566 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -710,10 +710,16 @@ def iteritems(self): for i, k in enumerate(self.columns): yield k, self._ixs(i, axis=1) - def iterrows(self): + def iterrows(self, require_iso8601=False): """ Iterate over DataFrame rows as (index, Series) pairs. + Parameters + ---------- + require_iso8601 : boolean, default False + If True, only try to infer ISO8601-compliant datetime string in + iterated rows. + Notes ----- @@ -755,9 +761,13 @@ def iterrows(self): columns = self.columns klass = self._constructor_sliced for k, v in zip(self.index, self.values): - s = klass(v, index=columns, name=k) + s = klass(v, + index=columns, + name=k, + require_iso8601=require_iso8601) yield k, s + def itertuples(self, index=True, name="Pandas"): """ Iterate over DataFrame rows as namedtuples, with index value as first From 156adbbcc455e72cdd3162a92101a2241ee33a8c Mon Sep 17 00:00:00 2001 From: Ming Li Date: Mon, 19 Feb 2018 16:22:18 +0800 Subject: [PATCH 02/39] remove blank line --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 81240f2462566..bb4d3e9b19a77 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -767,7 +767,6 @@ def iterrows(self, require_iso8601=False): require_iso8601=require_iso8601) yield k, s - def itertuples(self, index=True, name="Pandas"): """ Iterate over DataFrame rows as namedtuples, with index value as first From 6d06cf1256af6caf6367b15083c8a8b46d019ccf Mon Sep 17 00:00:00 2001 From: Ming Li Date: Mon, 19 Feb 2018 17:00:56 +0800 Subject: [PATCH 03/39] expose require_iso8601 parameter --- pandas/core/dtypes/cast.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 55919fb2bea0d..65e949a6dcaac 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -860,7 +860,9 @@ def maybe_castable(arr): return arr.dtype.name not in _POSSIBLY_CAST_DTYPES -def maybe_infer_to_datetimelike(value, convert_dates=False): +def maybe_infer_to_datetimelike(value, + convert_dates=False, + require_iso8601=False): """ we might have a array (or single object) that is datetime like, and no dtype is passed don't change the value unless we find a @@ -875,6 +877,8 @@ def maybe_infer_to_datetimelike(value, convert_dates=False): convert_dates : boolean, default False if True try really hard to convert dates (such as datetime.date), other leave inferred dtype 'date' alone + require_iso8601 : boolean, default False + If True, only try to infer ISO8601-compliant datetime string. """ @@ -901,18 +905,19 @@ def maybe_infer_to_datetimelike(value, convert_dates=False): if not len(v): return value - def try_datetime(v): + def try_datetime(v, require_iso8601=require_iso8601): # safe coerce to datetime64 try: - v = tslib.array_to_datetime(v, errors='raise') + v = tslib.array_to_datetime(v, + require_iso8601=require_iso8601, + errors='raise') except ValueError: - # we might have a sequence of the same-datetimes with tz's # if so coerce to a DatetimeIndex; if they are not the same, # then these stay as object dtype try: from pandas import to_datetime - return to_datetime(v) + return to_datetime(v, require_iso8601=require_iso8601) except Exception: pass @@ -933,6 +938,7 @@ def try_timedelta(v): inferred_type = lib.infer_datetimelike_array(_ensure_object(v)) + # TODO if any column is datetime and others are numeric or object, the inferred_type will be datetime. if inferred_type == 'date' and convert_dates: value = try_datetime(v) elif inferred_type == 'datetime': @@ -957,7 +963,8 @@ def try_timedelta(v): return value -def maybe_cast_to_datetime(value, dtype, errors='raise'): +def maybe_cast_to_datetime(value, dtype, require_iso8601=False, + errors='raise'): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ @@ -1074,7 +1081,9 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): # conversion elif not (is_array and not (issubclass(value.dtype.type, np.integer) or value.dtype == np.object_)): - value = maybe_infer_to_datetimelike(value) + value = \ + maybe_infer_to_datetimelike(value, + require_iso8601=require_iso8601) return value From f2617ddf53112a3833f711a02eff96f9b128bd78 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Mon, 19 Feb 2018 17:01:57 +0800 Subject: [PATCH 04/39] expose require_iso8601 parameter --- pandas/core/series.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 655eaa5373f5a..a770e2c84ae7d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -146,7 +146,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): 'from_csv', 'valid']) def __init__(self, data=None, index=None, dtype=None, name=None, - copy=False, fastpath=False): + copy=False, fastpath=False, require_iso8601=False): # we are called internally, so short-circuit if fastpath: @@ -236,7 +236,8 @@ def __init__(self, data=None, index=None, dtype=None, name=None, data = data.copy() else: data = _sanitize_array(data, index, dtype, copy, - raise_cast_failure=True) + raise_cast_failure=True, + require_iso8601=require_iso8601) data = SingleBlockManager(data, index, fastpath=True) @@ -3129,7 +3130,7 @@ def _sanitize_index(data, index, copy=False): def _sanitize_array(data, index, dtype=None, copy=False, - raise_cast_failure=False): + raise_cast_failure=False, require_iso8601=False): """ sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified """ @@ -3145,7 +3146,7 @@ def _sanitize_array(data, index, dtype=None, copy=False, else: data = data.copy() - def _try_cast(arr, take_fast_path): + def _try_cast(arr, take_fast_path, require_iso8601=require_iso8601): # perf shortcut as this is the most common case if take_fast_path: @@ -3153,7 +3154,9 @@ def _try_cast(arr, take_fast_path): return arr try: - subarr = maybe_cast_to_datetime(arr, dtype) + subarr = maybe_cast_to_datetime(arr, + dtype, + require_iso8601=require_iso8601) if not is_extension_type(subarr): subarr = np.array(subarr, dtype=dtype, copy=copy) except (ValueError, TypeError): @@ -3211,7 +3214,9 @@ def _try_cast(arr, take_fast_path): else: subarr = maybe_convert_platform(data) - subarr = maybe_cast_to_datetime(subarr, dtype) + subarr = maybe_cast_to_datetime(subarr, + dtype, + require_iso8601=require_iso8601) elif isinstance(data, range): # GH 16804 @@ -3233,7 +3238,9 @@ def _try_cast(arr, take_fast_path): dtype, value = infer_dtype_from_scalar(value) else: # need to possibly convert the value here - value = maybe_cast_to_datetime(value, dtype) + value = maybe_cast_to_datetime(subarr, + dtype, + require_iso8601=require_iso8601) subarr = construct_1d_arraylike_from_scalar( value, len(index), dtype) From 09ae4e5fb6df0c0d28e7e577bdcf1962987aaab3 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Mon, 19 Feb 2018 17:02:55 +0800 Subject: [PATCH 05/39] expose require_8601 parameter --- pandas/core/tools/datetimes.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 1de43116d0b49..e47839c21653c 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -105,8 +105,8 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None): def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, utc=None, box=True, format=None, exact=True, - unit=None, infer_datetime_format=False, origin='unix', - cache=False): + unit=None, infer_datetime_format=False, require_iso8601=False, + origin='unix', cache=False): """ Convert argument to datetime. @@ -167,6 +167,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, datetime strings, and if it can be inferred, switch to a faster method of parsing them. In some cases this can increase the parsing speed by ~5-10x. + require_iso8601 : boolean, default False + If True, only try to infer ISO8601-compliant datetime string. origin : scalar, default is 'unix' Define the reference date. The numeric values would be parsed as number of units (defined by `unit`) since this reference date. @@ -273,7 +275,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, tz = 'utc' if utc else None - def _convert_listlike(arg, box, format, name=None, tz=tz): + def _convert_listlike(arg, box, format, name=None, tz=tz, + require_iso8601=require_iso8601): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') @@ -313,11 +316,8 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): '1-d array, or Series') arg = _ensure_object(arg) - require_iso8601 = False - if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) - if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred From 7ea24ecb7e4755cb0c87f84b62e9bdebbe51f859 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Mon, 19 Feb 2018 17:20:42 +0800 Subject: [PATCH 06/39] remove redundant TODO --- pandas/core/dtypes/cast.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 65e949a6dcaac..96347d2d95985 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -938,7 +938,6 @@ def try_timedelta(v): inferred_type = lib.infer_datetimelike_array(_ensure_object(v)) - # TODO if any column is datetime and others are numeric or object, the inferred_type will be datetime. if inferred_type == 'date' and convert_dates: value = try_datetime(v) elif inferred_type == 'datetime': From fac665b0cf1176484f7ef227eb265028055d0d8f Mon Sep 17 00:00:00 2001 From: Ming Li Date: Wed, 21 Feb 2018 02:20:51 +0800 Subject: [PATCH 07/39] revert pandas.core.frame --- pandas/core/frame.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bb4d3e9b19a77..bc045d74cee52 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -710,16 +710,10 @@ def iteritems(self): for i, k in enumerate(self.columns): yield k, self._ixs(i, axis=1) - def iterrows(self, require_iso8601=False): + def iterrows(self): """ Iterate over DataFrame rows as (index, Series) pairs. - Parameters - ---------- - require_iso8601 : boolean, default False - If True, only try to infer ISO8601-compliant datetime string in - iterated rows. - Notes ----- @@ -761,10 +755,7 @@ def iterrows(self, require_iso8601=False): columns = self.columns klass = self._constructor_sliced for k, v in zip(self.index, self.values): - s = klass(v, - index=columns, - name=k, - require_iso8601=require_iso8601) + s = klass(v, index=columns, name=k) yield k, s def itertuples(self, index=True, name="Pandas"): From 068fde21f3a628efdeb9a98c2219b445d7594ff0 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Wed, 21 Feb 2018 02:23:46 +0800 Subject: [PATCH 08/39] revert pandas.core.series --- pandas/core/series.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index a770e2c84ae7d..655eaa5373f5a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -146,7 +146,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): 'from_csv', 'valid']) def __init__(self, data=None, index=None, dtype=None, name=None, - copy=False, fastpath=False, require_iso8601=False): + copy=False, fastpath=False): # we are called internally, so short-circuit if fastpath: @@ -236,8 +236,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, data = data.copy() else: data = _sanitize_array(data, index, dtype, copy, - raise_cast_failure=True, - require_iso8601=require_iso8601) + raise_cast_failure=True) data = SingleBlockManager(data, index, fastpath=True) @@ -3130,7 +3129,7 @@ def _sanitize_index(data, index, copy=False): def _sanitize_array(data, index, dtype=None, copy=False, - raise_cast_failure=False, require_iso8601=False): + raise_cast_failure=False): """ sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified """ @@ -3146,7 +3145,7 @@ def _sanitize_array(data, index, dtype=None, copy=False, else: data = data.copy() - def _try_cast(arr, take_fast_path, require_iso8601=require_iso8601): + def _try_cast(arr, take_fast_path): # perf shortcut as this is the most common case if take_fast_path: @@ -3154,9 +3153,7 @@ def _try_cast(arr, take_fast_path, require_iso8601=require_iso8601): return arr try: - subarr = maybe_cast_to_datetime(arr, - dtype, - require_iso8601=require_iso8601) + subarr = maybe_cast_to_datetime(arr, dtype) if not is_extension_type(subarr): subarr = np.array(subarr, dtype=dtype, copy=copy) except (ValueError, TypeError): @@ -3214,9 +3211,7 @@ def _try_cast(arr, take_fast_path, require_iso8601=require_iso8601): else: subarr = maybe_convert_platform(data) - subarr = maybe_cast_to_datetime(subarr, - dtype, - require_iso8601=require_iso8601) + subarr = maybe_cast_to_datetime(subarr, dtype) elif isinstance(data, range): # GH 16804 @@ -3238,9 +3233,7 @@ def _try_cast(arr, take_fast_path, require_iso8601=require_iso8601): dtype, value = infer_dtype_from_scalar(value) else: # need to possibly convert the value here - value = maybe_cast_to_datetime(subarr, - dtype, - require_iso8601=require_iso8601) + value = maybe_cast_to_datetime(value, dtype) subarr = construct_1d_arraylike_from_scalar( value, len(index), dtype) From 8ceeb62982590f675890ca60772969034286d900 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Wed, 21 Feb 2018 02:30:36 +0800 Subject: [PATCH 09/39] update documentation for typo and versionadded tag --- pandas/core/tools/datetimes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index e47839c21653c..fd7a26165b9b8 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -168,7 +168,9 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, method of parsing them. In some cases this can increase the parsing speed by ~5-10x. require_iso8601 : boolean, default False - If True, only try to infer ISO8601-compliant datetime string. + If True, only try to infer ISO8601-compliant datetime strings. + + .. versionadded:: 0.23.0 origin : scalar, default is 'unix' Define the reference date. The numeric values would be parsed as number of units (defined by `unit`) since this reference date. From d105732b207477c9ae507bc6826dac0015c87ac1 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Wed, 21 Feb 2018 02:44:16 +0800 Subject: [PATCH 10/39] change default behaviour to require iso8601 and revert unnecessary changes --- pandas/core/dtypes/cast.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 96347d2d95985..36fd1b7c83135 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -860,9 +860,7 @@ def maybe_castable(arr): return arr.dtype.name not in _POSSIBLY_CAST_DTYPES -def maybe_infer_to_datetimelike(value, - convert_dates=False, - require_iso8601=False): +def maybe_infer_to_datetimelike(value, convert_dates=False): """ we might have a array (or single object) that is datetime like, and no dtype is passed don't change the value unless we find a @@ -877,8 +875,6 @@ def maybe_infer_to_datetimelike(value, convert_dates : boolean, default False if True try really hard to convert dates (such as datetime.date), other leave inferred dtype 'date' alone - require_iso8601 : boolean, default False - If True, only try to infer ISO8601-compliant datetime string. """ @@ -905,19 +901,20 @@ def maybe_infer_to_datetimelike(value, if not len(v): return value - def try_datetime(v, require_iso8601=require_iso8601): + def try_datetime(v): # safe coerce to datetime64 try: v = tslib.array_to_datetime(v, - require_iso8601=require_iso8601, + require_iso8601=True, errors='raise') except ValueError: + # we might have a sequence of the same-datetimes with tz's # if so coerce to a DatetimeIndex; if they are not the same, # then these stay as object dtype try: from pandas import to_datetime - return to_datetime(v, require_iso8601=require_iso8601) + return to_datetime(v, require_iso8601=True) except Exception: pass @@ -962,8 +959,7 @@ def try_timedelta(v): return value -def maybe_cast_to_datetime(value, dtype, require_iso8601=False, - errors='raise'): +def maybe_cast_to_datetime(value, dtype, errors='raise'): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ @@ -1080,9 +1076,7 @@ def maybe_cast_to_datetime(value, dtype, require_iso8601=False, # conversion elif not (is_array and not (issubclass(value.dtype.type, np.integer) or value.dtype == np.object_)): - value = \ - maybe_infer_to_datetimelike(value, - require_iso8601=require_iso8601) + value = maybe_infer_to_datetimelike(value) return value From 26fd14f3601629ace151df9237bc08148875b40a Mon Sep 17 00:00:00 2001 From: Ming Li Date: Wed, 21 Feb 2018 03:53:30 +0800 Subject: [PATCH 11/39] add whatsnew documentation for `require_iso8601` parameter in to_datetime --- doc/source/whatsnew/v0.23.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 72f63a4da0f4d..78120d00048cd 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -323,6 +323,7 @@ Other Enhancements - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) - :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) +- :func:`to_datetime` has gained ``require_iso8601`` parameter to mandate ISO8601-compliant datetime string conversion. (:issue:`19671`) .. _whatsnew_0230.api_breaking: From ab5214a022a7da027cd26ed6a5552baac88c04fc Mon Sep 17 00:00:00 2001 From: Ming Li Date: Wed, 21 Feb 2018 03:55:18 +0800 Subject: [PATCH 12/39] new test case in test_maybe_infer_to_datetimelike for non-iso8601 string conversion during construction --- pandas/tests/dtypes/test_cast.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index d13d781f03117..a60640373109d 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -299,6 +299,9 @@ def test_maybe_infer_to_datetimelike(self): result = DataFrame(np.array([[NaT, 'a', 0], [NaT, 'b', 1]])) assert result.size == 6 + # GH19671 + result = Series(['M1701', Timestamp('20130101')]) + assert result.dtype.kind == 'O' class TestConvert(object): From 37aa8dd3661bf21b7077fec405ce2a775d0498ff Mon Sep 17 00:00:00 2001 From: Ming Li Date: Wed, 21 Feb 2018 03:55:46 +0800 Subject: [PATCH 13/39] comment with issue number --- pandas/core/dtypes/cast.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 36fd1b7c83135..240721b2bcc8a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -904,6 +904,7 @@ def maybe_infer_to_datetimelike(value, convert_dates=False): def try_datetime(v): # safe coerce to datetime64 try: + # GH19671 v = tslib.array_to_datetime(v, require_iso8601=True, errors='raise') @@ -914,6 +915,7 @@ def try_datetime(v): # then these stay as object dtype try: from pandas import to_datetime + # GH19671 return to_datetime(v, require_iso8601=True) except Exception: pass From 389a9d958dbda315738e92589f41540229bfa5aa Mon Sep 17 00:00:00 2001 From: Ming Li Date: Thu, 22 Feb 2018 03:25:35 +0800 Subject: [PATCH 14/39] example for to_datetime api --- pandas/core/tools/datetimes.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index fd7a26165b9b8..92e37fb48e3d1 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -250,6 +250,23 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, >>> %timeit pd.to_datetime(s,infer_datetime_format=False) 1 loop, best of 3: 471 ms per loop + Passing require_iso8601=True will only parse ISO8601-compliant datetime + strings and treat others as non-parseable dates. + + >>> s = pd.Series(['M1809', 'M1701', pd.Timestamp('20130101')]) + + >>> pd.to_datetime(s, require_iso8601=False, errors='raise') + 0 1809-01-01 + 1 1701-01-01 + 2 2013-01-01 + dtype: datetime64[ns] + + >>> pd.to_datetime(s, require_iso8601=True, errors='coerce') + 0 NaT + 1 NaT + 2 2013-01-01 + dtype: datetime64[ns] + Using a unix epoch time >>> pd.to_datetime(1490195805, unit='s') From 959ae622f5b7494062535d72ee3837cef38fffd7 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Thu, 22 Feb 2018 22:00:02 +0800 Subject: [PATCH 15/39] reference to iso8601 standard --- pandas/core/tools/datetimes.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 92e37fb48e3d1..ed48dfd9a4dbe 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -250,8 +250,9 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, >>> %timeit pd.to_datetime(s,infer_datetime_format=False) 1 loop, best of 3: 471 ms per loop - Passing require_iso8601=True will only parse ISO8601-compliant datetime - strings and treat others as non-parseable dates. + Passing require_iso8601=True will only parse datetime strings compliant + to `ISO8601`_ and treat others + as non-parseable dates. >>> s = pd.Series(['M1809', 'M1701', pd.Timestamp('20130101')]) From 700fa38de104d8bd415c6938fad5b7d7723316fc Mon Sep 17 00:00:00 2001 From: Ming Li Date: Thu, 22 Feb 2018 22:20:39 +0800 Subject: [PATCH 16/39] blank line before issue comment --- pandas/tests/dtypes/test_cast.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index a60640373109d..c92694027c663 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -299,6 +299,7 @@ def test_maybe_infer_to_datetimelike(self): result = DataFrame(np.array([[NaT, 'a', 0], [NaT, 'b', 1]])) assert result.size == 6 + # GH19671 result = Series(['M1701', Timestamp('20130101')]) assert result.dtype.kind == 'O' From f8159c2c6f5d948ccf68b402d86e383971e19ecb Mon Sep 17 00:00:00 2001 From: Ming Li Date: Thu, 22 Feb 2018 22:55:35 +0800 Subject: [PATCH 17/39] test datetime require iso8601 parameter --- pandas/tests/indexes/datetimes/test_tools.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index b5926933544e8..8ed71c7b56040 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -467,6 +467,22 @@ def test_datetime_bool(self, cache): NaT, to_datetime(0, cache=cache)])) + def test_datetime_require_iso8601(self): + # GH19671 + s = Series(['M1809', 'M1701', Timestamp('20130101')]) + with pytest.raises(ValueError): + to_datetime(s, require_iso8601=True, errors='raise') + tm.assert_series_equal( + to_datetime(s, require_iso8601=True, errors='ignore'), + Series(['M1809', 'M1701', Timestamp('20130101')])) + tm.assert_series_equal( + to_datetime(s, require_iso8601=True, errors='coerce'), + Series([NaT, NaT, Timestamp('20130101')])) + tm.assert_series_equal( + to_datetime(s, require_iso8601=False, errors='raise'), + Series([Timestamp('18090101'), Timestamp('17010101'), + Timestamp('20130101')])) + def test_datetime_invalid_datatype(self): # GH13176 From 3708f4b05a4a6b6f6c4cb8af5128352cae9fe990 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Thu, 22 Feb 2018 23:11:41 +0800 Subject: [PATCH 18/39] add wikipedia reference to ISO 8601 standard --- pandas/core/tools/datetimes.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index ed48dfd9a4dbe..de9da0ccaca6e 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -250,9 +250,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, >>> %timeit pd.to_datetime(s,infer_datetime_format=False) 1 loop, best of 3: 471 ms per loop - Passing require_iso8601=True will only parse datetime strings compliant - to `ISO8601`_ and treat others - as non-parseable dates. + Passing require_iso8601=True will only parse datetime strings compliant to + `ISO8601 `_ and treat others as non-parseable dates. >>> s = pd.Series(['M1809', 'M1701', pd.Timestamp('20130101')]) From cb798d2964911e58c13e7c5d432dd306d8f697aa Mon Sep 17 00:00:00 2001 From: Ming Li Date: Thu, 22 Feb 2018 23:12:15 +0800 Subject: [PATCH 19/39] add wikipedia reference to ISO 8601 standard --- pandas/core/tools/datetimes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index de9da0ccaca6e..66c0510cdf5f7 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -251,7 +251,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, 1 loop, best of 3: 471 ms per loop Passing require_iso8601=True will only parse datetime strings compliant to - `ISO8601 `_ and treat others as non-parseable dates. + `ISO8601 `_ and treat others as + non-parseable dates. >>> s = pd.Series(['M1809', 'M1701', pd.Timestamp('20130101')]) From 2e27f224512026dfc1eea53da052b34bee9e370b Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 23 Feb 2018 00:16:24 +0800 Subject: [PATCH 20/39] fix url --- pandas/core/tools/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 66c0510cdf5f7..26928842c27b0 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -251,7 +251,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, 1 loop, best of 3: 471 ms per loop Passing require_iso8601=True will only parse datetime strings compliant to - `ISO8601 `_ and treat others as + `ISO8601 `_ and treat others as non-parseable dates. >>> s = pd.Series(['M1809', 'M1701', pd.Timestamp('20130101')]) From 8384d5e88fe1560149a8c3d85a5d16392d90bba3 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 23 Feb 2018 21:18:21 +0800 Subject: [PATCH 21/39] private argument _require_iso8601 and remove example and param doc --- pandas/core/tools/datetimes.py | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 26928842c27b0..56a49464298a5 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -105,11 +105,17 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None): def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, utc=None, box=True, format=None, exact=True, - unit=None, infer_datetime_format=False, require_iso8601=False, - origin='unix', cache=False): + unit=None, infer_datetime_format=False, origin='unix', + cache=False, _require_iso8601=False): """ Convert argument to datetime. + Passing _require_iso8601=True will only parse datetime strings similar to + `ISO8601 `_ and treat others as + non-parseable dates. + + .. versionadded:: 0.23.0 + Parameters ---------- arg : integer, float, string, datetime, list, tuple, 1-d array, Series @@ -167,10 +173,6 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, datetime strings, and if it can be inferred, switch to a faster method of parsing them. In some cases this can increase the parsing speed by ~5-10x. - require_iso8601 : boolean, default False - If True, only try to infer ISO8601-compliant datetime strings. - - .. versionadded:: 0.23.0 origin : scalar, default is 'unix' Define the reference date. The numeric values would be parsed as number of units (defined by `unit`) since this reference date. @@ -250,24 +252,6 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, >>> %timeit pd.to_datetime(s,infer_datetime_format=False) 1 loop, best of 3: 471 ms per loop - Passing require_iso8601=True will only parse datetime strings compliant to - `ISO8601 `_ and treat others as - non-parseable dates. - - >>> s = pd.Series(['M1809', 'M1701', pd.Timestamp('20130101')]) - - >>> pd.to_datetime(s, require_iso8601=False, errors='raise') - 0 1809-01-01 - 1 1701-01-01 - 2 2013-01-01 - dtype: datetime64[ns] - - >>> pd.to_datetime(s, require_iso8601=True, errors='coerce') - 0 NaT - 1 NaT - 2 2013-01-01 - dtype: datetime64[ns] - Using a unix epoch time >>> pd.to_datetime(1490195805, unit='s') @@ -296,7 +280,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, tz = 'utc' if utc else None def _convert_listlike(arg, box, format, name=None, tz=tz, - require_iso8601=require_iso8601): + require_iso8601=_require_iso8601): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') From 166592242b4e44eb895c1b99875a40bc6722458f Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 23 Feb 2018 21:26:10 +0800 Subject: [PATCH 22/39] remove whatsnew entry --- doc/source/whatsnew/v0.23.0.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 72e68d2a829f7..ed93503388893 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -295,7 +295,6 @@ Other Enhancements - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) - :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) - Added :func:`SeriesGroupBy.is_monotonic_increasing` and :func:`SeriesGroupBy.is_monotonic_decreasing` (:issue:`17015`) -- :func:`to_datetime` has gained ``require_iso8601`` parameter to mandate ISO8601-compliant datetime string conversion. (:issue:`19671`) - :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) .. _whatsnew_0230.api_breaking: From 21f7c150ff65ffa5563d5c22bb400e83f31e5ae4 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 23 Feb 2018 21:26:31 +0800 Subject: [PATCH 23/39] modified kwarg --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 240721b2bcc8a..51f9954c21022 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -916,7 +916,7 @@ def try_datetime(v): try: from pandas import to_datetime # GH19671 - return to_datetime(v, require_iso8601=True) + return to_datetime(v, _require_iso8601=True) except Exception: pass From 5dc7a37ff0ce9317a2ec44adf23bb54457da304c Mon Sep 17 00:00:00 2001 From: Ming Li Date: Fri, 23 Feb 2018 21:26:46 +0800 Subject: [PATCH 24/39] modified kwarg --- pandas/tests/indexes/datetimes/test_tools.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 6b06c87608a12..8cb7b766ea2b7 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -471,15 +471,15 @@ def test_datetime_require_iso8601(self): # GH19671 s = Series(['M1809', 'M1701', Timestamp('20130101')]) with pytest.raises(ValueError): - to_datetime(s, require_iso8601=True, errors='raise') + to_datetime(s, _require_iso8601=True, errors='raise') tm.assert_series_equal( - to_datetime(s, require_iso8601=True, errors='ignore'), + to_datetime(s, _require_iso8601=True, errors='ignore'), Series(['M1809', 'M1701', Timestamp('20130101')])) tm.assert_series_equal( - to_datetime(s, require_iso8601=True, errors='coerce'), + to_datetime(s, _require_iso8601=True, errors='coerce'), Series([NaT, NaT, Timestamp('20130101')])) tm.assert_series_equal( - to_datetime(s, require_iso8601=False, errors='raise'), + to_datetime(s, _require_iso8601=False, errors='raise'), Series([Timestamp('18090101'), Timestamp('17010101'), Timestamp('20130101')])) From f9240b5285f5abf611ec31d4aa5827c0f17bb58d Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sat, 24 Feb 2018 00:08:52 +0800 Subject: [PATCH 25/39] use kwargs to hide require_iso8601 --- pandas/core/dtypes/cast.py | 2 +- pandas/core/tools/datetimes.py | 11 +++-------- pandas/tests/indexes/datetimes/test_tools.py | 8 ++++---- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 51f9954c21022..240721b2bcc8a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -916,7 +916,7 @@ def try_datetime(v): try: from pandas import to_datetime # GH19671 - return to_datetime(v, _require_iso8601=True) + return to_datetime(v, require_iso8601=True) except Exception: pass diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 56a49464298a5..730e990499f95 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -106,16 +106,10 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None): def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, utc=None, box=True, format=None, exact=True, unit=None, infer_datetime_format=False, origin='unix', - cache=False, _require_iso8601=False): + cache=False, **kwargs): """ Convert argument to datetime. - Passing _require_iso8601=True will only parse datetime strings similar to - `ISO8601 `_ and treat others as - non-parseable dates. - - .. versionadded:: 0.23.0 - Parameters ---------- arg : integer, float, string, datetime, list, tuple, 1-d array, Series @@ -278,9 +272,10 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, from pandas.core.indexes.datetimes import DatetimeIndex tz = 'utc' if utc else None + require_iso8601 = kwargs.get('require_iso8601', None) def _convert_listlike(arg, box, format, name=None, tz=tz, - require_iso8601=_require_iso8601): + require_iso8601=require_iso8601): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 8cb7b766ea2b7..6b06c87608a12 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -471,15 +471,15 @@ def test_datetime_require_iso8601(self): # GH19671 s = Series(['M1809', 'M1701', Timestamp('20130101')]) with pytest.raises(ValueError): - to_datetime(s, _require_iso8601=True, errors='raise') + to_datetime(s, require_iso8601=True, errors='raise') tm.assert_series_equal( - to_datetime(s, _require_iso8601=True, errors='ignore'), + to_datetime(s, require_iso8601=True, errors='ignore'), Series(['M1809', 'M1701', Timestamp('20130101')])) tm.assert_series_equal( - to_datetime(s, _require_iso8601=True, errors='coerce'), + to_datetime(s, require_iso8601=True, errors='coerce'), Series([NaT, NaT, Timestamp('20130101')])) tm.assert_series_equal( - to_datetime(s, _require_iso8601=False, errors='raise'), + to_datetime(s, require_iso8601=False, errors='raise'), Series([Timestamp('18090101'), Timestamp('17010101'), Timestamp('20130101')])) From 6e670707091d5d5e884ebd2488107a174ef33e17 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sat, 24 Feb 2018 22:51:06 +0800 Subject: [PATCH 26/39] revert core.tools.datetimes --- pandas/core/tools/datetimes.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 730e990499f95..1de43116d0b49 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -106,7 +106,7 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None): def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, utc=None, box=True, format=None, exact=True, unit=None, infer_datetime_format=False, origin='unix', - cache=False, **kwargs): + cache=False): """ Convert argument to datetime. @@ -272,10 +272,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, from pandas.core.indexes.datetimes import DatetimeIndex tz = 'utc' if utc else None - require_iso8601 = kwargs.get('require_iso8601', None) - def _convert_listlike(arg, box, format, name=None, tz=tz, - require_iso8601=require_iso8601): + def _convert_listlike(arg, box, format, name=None, tz=tz): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') @@ -315,8 +313,11 @@ def _convert_listlike(arg, box, format, name=None, tz=tz, '1-d array, or Series') arg = _ensure_object(arg) + require_iso8601 = False + if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) + if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred From 9e6e2a7291a228c16a8b0f31e9e47cc6d603f403 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sat, 24 Feb 2018 23:31:59 +0800 Subject: [PATCH 27/39] remove test case --- pandas/tests/indexes/datetimes/test_tools.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 6b06c87608a12..98e2cd6ce7dae 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -467,21 +467,6 @@ def test_datetime_bool(self, cache): NaT, to_datetime(0, cache=cache)])) - def test_datetime_require_iso8601(self): - # GH19671 - s = Series(['M1809', 'M1701', Timestamp('20130101')]) - with pytest.raises(ValueError): - to_datetime(s, require_iso8601=True, errors='raise') - tm.assert_series_equal( - to_datetime(s, require_iso8601=True, errors='ignore'), - Series(['M1809', 'M1701', Timestamp('20130101')])) - tm.assert_series_equal( - to_datetime(s, require_iso8601=True, errors='coerce'), - Series([NaT, NaT, Timestamp('20130101')])) - tm.assert_series_equal( - to_datetime(s, require_iso8601=False, errors='raise'), - Series([Timestamp('18090101'), Timestamp('17010101'), - Timestamp('20130101')])) def test_datetime_invalid_datatype(self): # GH13176 From 27fdfac441e4366d3c5a76bbca022d092242faff Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sat, 24 Feb 2018 23:32:34 +0800 Subject: [PATCH 28/39] replace to_datetime call with internal conversion func --- pandas/core/dtypes/cast.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 240721b2bcc8a..5fef5c3a11e81 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -913,10 +913,17 @@ def try_datetime(v): # we might have a sequence of the same-datetimes with tz's # if so coerce to a DatetimeIndex; if they are not the same, # then these stay as object dtype + + # GH19671 + # replaced to_datetime call with + # tslibs.conversion.datetime_to_datetime64 + # in order to avoid changes to public to_datetime API try: - from pandas import to_datetime - # GH19671 - return to_datetime(v, require_iso8601=True) + from pandas._libs.tslibs import conversion + from pandas.core.indexes.datetimes import DatetimeIndex + + values, tz = conversion.datetime_to_datetime64(v) + return DatetimeIndex._simple_new(values, tz=tz) except Exception: pass From 6aea33d66b5d7119c58321e06ab73712a7032d9b Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sat, 24 Feb 2018 15:59:40 +0000 Subject: [PATCH 29/39] revert test_tools --- pandas/tests/indexes/datetimes/test_tools.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 98e2cd6ce7dae..fbf0977a04d82 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -467,7 +467,6 @@ def test_datetime_bool(self, cache): NaT, to_datetime(0, cache=cache)])) - def test_datetime_invalid_datatype(self): # GH13176 From 14946f8525bc591753f75963694669e65e3c3515 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 24 Feb 2018 15:35:02 -0500 Subject: [PATCH 30/39] use DTI constructor --- pandas/core/dtypes/cast.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4dbb1d024c293..b1d0dc2a2442e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -912,19 +912,15 @@ def try_datetime(v): # we might have a sequence of the same-datetimes with tz's # if so coerce to a DatetimeIndex; if they are not the same, - # then these stay as object dtype - - # GH19671 - # replaced to_datetime call with - # tslibs.conversion.datetime_to_datetime64 - # in order to avoid changes to public to_datetime API + # then these stay as object dtype, xref GH19671 try: from pandas._libs.tslibs import conversion - from pandas.core.indexes.datetimes import DatetimeIndex + from pandas import DatetimeIndex values, tz = conversion.datetime_to_datetime64(v) - return DatetimeIndex._simple_new(values, tz=tz) - except Exception: + return DatetimeIndex(values).tz_localize( + 'UTC').tz_convert(tz=tz) + except (ValueError, TypeError): pass except Exception: From 9e11b4368ba752f8388b38abcd0884426aa53a4c Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sat, 24 Feb 2018 21:57:32 +0000 Subject: [PATCH 31/39] test case for issue 19671, iterrows --- pandas/tests/frame/test_api.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 91fe7f99ca681..c414666de91ca 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -214,6 +214,14 @@ def test_iterrows(self): exp = self.mixed_frame.loc[k] self._assert_series_equal(v, exp) + # GH19671 + s = DataFrame( + {'non_iso8601': ['M1701', 'M1802', 'M1903', 'M2004'], + 'iso8601': date_range('2000-01-01', periods=4, freq='M')}) + for k, v in s.iterrows(): + exp = s.loc[k] + self._assert_series_equal(v, exp) + def test_itertuples(self): for i, tup in enumerate(self.frame.itertuples()): s = self.klass._constructor_sliced(tup[1:]) From 2fe7057d6049b32efc01427e7dfeb1a41e2bb9d9 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sun, 25 Feb 2018 10:37:11 +0000 Subject: [PATCH 32/39] using klass for construction --- pandas/tests/frame/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index c414666de91ca..5172ba4a3ae7d 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -215,7 +215,7 @@ def test_iterrows(self): self._assert_series_equal(v, exp) # GH19671 - s = DataFrame( + s = self.klass( {'non_iso8601': ['M1701', 'M1802', 'M1903', 'M2004'], 'iso8601': date_range('2000-01-01', periods=4, freq='M')}) for k, v in s.iterrows(): From 910f759fdaa8887c785afe41b3003a4b5c1b5ddb Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sun, 25 Feb 2018 14:28:15 +0000 Subject: [PATCH 33/39] test DataFrame only --- pandas/tests/frame/test_api.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 5172ba4a3ae7d..3fd583389a956 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -214,14 +214,6 @@ def test_iterrows(self): exp = self.mixed_frame.loc[k] self._assert_series_equal(v, exp) - # GH19671 - s = self.klass( - {'non_iso8601': ['M1701', 'M1802', 'M1903', 'M2004'], - 'iso8601': date_range('2000-01-01', periods=4, freq='M')}) - for k, v in s.iterrows(): - exp = s.loc[k] - self._assert_series_equal(v, exp) - def test_itertuples(self): for i, tup in enumerate(self.frame.itertuples()): s = self.klass._constructor_sliced(tup[1:]) @@ -512,3 +504,12 @@ def test_tab_complete_warning(self, ip): with tm.assert_produces_warning(None): with provisionalcompleter('ignore'): list(ip.Completer.completions('df.', 1)) + + def test_iterrows_iso8601(self): + # GH19671, SparseBlock cannot be tested due to lack of implementation. + s = self.klass( + {'non_iso8601': ['M1701', 'M1802', 'M1903', 'M2004'], + 'iso8601': date_range('2000-01-01', periods=4, freq='M')}) + for k, v in s.iterrows(): + exp = s.loc[k] + self._assert_series_equal(v, exp) From 0b72b725cf2c258f2db6a8c2ca9a90a123e6109f Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sun, 25 Feb 2018 14:30:24 +0000 Subject: [PATCH 34/39] fix a typo --- pandas/core/internals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index d385185fbb558..4c6a67920e456 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2602,7 +2602,7 @@ def _maybe_coerce_values(self, values): """Input validation for values passed to __init__. Ensure that we have datetime64ns, coercing if necessary. - Parametetrs + Parameters ----------- values : array-like Must be convertible to datetime64 From e69f4abe1094feebb83f63b513685915be2bf005 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 25 Feb 2018 10:35:16 -0500 Subject: [PATCH 35/39] correction --- pandas/core/internals.py | 2 +- pandas/tests/frame/test_api.py | 17 ++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 4c6a67920e456..00ef8f9cef598 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2603,7 +2603,7 @@ def _maybe_coerce_values(self, values): we have datetime64ns, coercing if necessary. Parameters - ----------- + ---------- values : array-like Must be convertible to datetime64 diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 3fd583389a956..15255ef6af18e 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -214,6 +214,14 @@ def test_iterrows(self): exp = self.mixed_frame.loc[k] self._assert_series_equal(v, exp) + def test_iterrows_iso8601(self): + s = self.klass( + {'non_iso8601': ['M1701', 'M1802', 'M1903', 'M2004'], + 'iso8601': date_range('2000-01-01', periods=4, freq='M')}) + for k, v in s.iterrows(): + exp = s.loc[k] + self._assert_series_equal(v, exp) + def test_itertuples(self): for i, tup in enumerate(self.frame.itertuples()): s = self.klass._constructor_sliced(tup[1:]) @@ -504,12 +512,3 @@ def test_tab_complete_warning(self, ip): with tm.assert_produces_warning(None): with provisionalcompleter('ignore'): list(ip.Completer.completions('df.', 1)) - - def test_iterrows_iso8601(self): - # GH19671, SparseBlock cannot be tested due to lack of implementation. - s = self.klass( - {'non_iso8601': ['M1701', 'M1802', 'M1903', 'M2004'], - 'iso8601': date_range('2000-01-01', periods=4, freq='M')}) - for k, v in s.iterrows(): - exp = s.loc[k] - self._assert_series_equal(v, exp) From 5b12cfc51c753aa71172a51b6139781dc290b032 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sun, 25 Feb 2018 19:38:30 +0000 Subject: [PATCH 36/39] fix test_iterrows --- pandas/tests/frame/test_api.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 15255ef6af18e..1f8ed6acf0f4c 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -214,13 +214,17 @@ def test_iterrows(self): exp = self.mixed_frame.loc[k] self._assert_series_equal(v, exp) - def test_iterrows_iso8601(self): s = self.klass( {'non_iso8601': ['M1701', 'M1802', 'M1903', 'M2004'], 'iso8601': date_range('2000-01-01', periods=4, freq='M')}) for k, v in s.iterrows(): exp = s.loc[k] - self._assert_series_equal(v, exp) + try: + self._assert_series_equal(v, exp) + except AssertionError: + # GH19671, SparseDataFrame datetime type not implemented. + if self.klass == DataFrame: + raise def test_itertuples(self): for i, tup in enumerate(self.frame.itertuples()): From a5a1f57444b87e729be7aff9ae2e0e68b8c093bc Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sun, 25 Feb 2018 19:44:21 +0000 Subject: [PATCH 37/39] whatsnew entry --- doc/source/whatsnew/v0.23.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 99a3773603fc4..3f54a927010ab 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -766,7 +766,7 @@ Datetimelike - Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) - Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where name of the returned object was not always set consistently. (:issue:`19744`) - Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where operations with numpy arrays raised ``TypeError`` (:issue:`19847`) -- +- Bug in :func:`iterrows` infers strings not compliant to `ISO8601 `_ (:issue:`19671`) Timedelta ^^^^^^^^^ From 793ea23cccda08678055c8d88e10fa7fb7a1a545 Mon Sep 17 00:00:00 2001 From: Ming Li Date: Sun, 25 Feb 2018 21:43:46 +0000 Subject: [PATCH 38/39] imperative xfail in test --- pandas/tests/frame/test_api.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 1f8ed6acf0f4c..8ba5469480e64 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -15,7 +15,8 @@ from numpy.random import randn import numpy as np -from pandas import DataFrame, Series, date_range, timedelta_range, Categorical +from pandas import (DataFrame, Series, date_range, timedelta_range, + Categorical, SparseDataFrame) import pandas as pd from pandas.util.testing import (assert_almost_equal, @@ -214,17 +215,17 @@ def test_iterrows(self): exp = self.mixed_frame.loc[k] self._assert_series_equal(v, exp) + def test_iterrows_iso8601(self): + # GH19671 + if self.klass == SparseDataFrame: + pytest.xfail(reason='SparseBlock datetime type not implemented.') + s = self.klass( {'non_iso8601': ['M1701', 'M1802', 'M1903', 'M2004'], 'iso8601': date_range('2000-01-01', periods=4, freq='M')}) for k, v in s.iterrows(): exp = s.loc[k] - try: - self._assert_series_equal(v, exp) - except AssertionError: - # GH19671, SparseDataFrame datetime type not implemented. - if self.klass == DataFrame: - raise + self._assert_series_equal(v, exp) def test_itertuples(self): for i, tup in enumerate(self.frame.itertuples()): From 08d27182eaa158c2cae46a626925bb32e696b9f1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 25 Feb 2018 18:04:38 -0500 Subject: [PATCH 39/39] doc --- doc/source/whatsnew/v0.23.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 3f54a927010ab..7f33372f765fb 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -766,7 +766,6 @@ Datetimelike - Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) - Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where name of the returned object was not always set consistently. (:issue:`19744`) - Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where operations with numpy arrays raised ``TypeError`` (:issue:`19847`) -- Bug in :func:`iterrows` infers strings not compliant to `ISO8601 `_ (:issue:`19671`) Timedelta ^^^^^^^^^ @@ -918,6 +917,7 @@ Reshaping - :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`) - Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) - Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`) +- Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 `_ to datetimes (:issue:`19671`) Other ^^^^^