From 655ec319f48603b30d967dfe8a0e0d0521352148 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 15 Apr 2019 15:14:52 +0300 Subject: [PATCH 01/31] fixed _convert_and_box_cache that raised ValueError: Tz-aware datetime.datetime cannot be converted to datetime64 unless utc=True --- pandas/core/tools/datetimes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 5893ff0e0dd8f..b4956d75b5019 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -611,6 +611,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, elif isinstance(arg, ABCIndexClass): cache_array = _maybe_cache(arg, format, cache, convert_listlike) if not cache_array.empty: + errors = 'ignore' if not utc else errors result = _convert_and_box_cache(arg, cache_array, box, errors, name=arg.name) else: @@ -619,6 +620,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, elif is_list_like(arg): cache_array = _maybe_cache(arg, format, cache, convert_listlike) if not cache_array.empty: + errors = 'ignore' if not utc else errors result = _convert_and_box_cache(arg, cache_array, box, errors) else: result = convert_listlike(arg, box, format) From 6a9856e5868b30e75b1c765557e20494feecae85 Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Tue, 16 Apr 2019 12:09:58 -0500 Subject: [PATCH 02/31] Revert bandaid workaround --- pandas/core/tools/datetimes.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index b4956d75b5019..5893ff0e0dd8f 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -611,7 +611,6 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, elif isinstance(arg, ABCIndexClass): cache_array = _maybe_cache(arg, format, cache, convert_listlike) if not cache_array.empty: - errors = 'ignore' if not utc else errors result = _convert_and_box_cache(arg, cache_array, box, errors, name=arg.name) else: @@ -620,7 +619,6 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, elif is_list_like(arg): cache_array = _maybe_cache(arg, format, cache, convert_listlike) if not cache_array.empty: - errors = 'ignore' if not utc else errors result = _convert_and_box_cache(arg, cache_array, box, errors) else: result = convert_listlike(arg, box, format) From ffd9ecf4b675143d5165212cb84ad61e025e222a Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Tue, 16 Apr 2019 12:10:31 -0500 Subject: [PATCH 03/31] Add test that fails only when to_datetime gets cache=True --- pandas/tests/indexes/datetimes/test_tools.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index a971a1088860a..4c977198f7327 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -504,6 +504,19 @@ def test_to_datetime_tz(self, cache): with pytest.raises(ValueError, match=msg): pd.to_datetime(arr, cache=cache) + + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_offset(self, cache): + # inspired by asv timeseries.ToDatetimeNONISO8601 benchmark + # see GH-26097 for more + ts_string_1 = 'March 1, 2018 12:00:00+0400' + ts_string_2 = 'March 1, 2018 12:00:00+0500' + arr = [ts_string_1] * 5 + [ts_string_2] * 5 + expected = pd.Index([parse(x) for x in arr]) + result = pd.to_datetime(arr, cache=cache) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_tz_pytz(self, cache): # see gh-8260 From d6c584eb21598357098f54a3627dcbf056c8b1d3 Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Tue, 16 Apr 2019 12:59:30 -0500 Subject: [PATCH 04/31] Fix to_datetime caching logic so test_to_datetime_offset passes --- pandas/core/tools/datetimes.py | 60 ++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 5893ff0e0dd8f..e24d43f570964 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -60,7 +60,42 @@ def _maybe_cache(arg, format, cache, convert_listlike): return cache_array -def _convert_and_box_cache(arg, cache_array, box, errors, name=None): +def _box_if_needed(dt_array, box, default, tz, name): + """ + Properly boxes the ndarray of datetimes (if requested) to DatetimeIndex + if it is possible or to generic Index instead + + Parameters + ---------- + dt_array: 1-d array + array of datetimes to be boxed + box : boolean + True boxes result as an Index-like, False returns an ndarray + tz : object + None or 'utc' + name : string, default None + Name for a resulting index + + Returns + ------- + result : datetime of converted dates + Returns: + + - Index-like if box=True + - ndarray if box=False + """ + if box: + from pandas import DatetimeIndex, Index + print(type(dt_array)) + if is_datetime64_dtype(dt_array): + return DatetimeIndex(dt_array, tz=tz, name=name) + #elif is_object_dtype(dt_array): + # e.g. an Index of datetime objects + return Index(dt_array, name=name) + return default + + +def _convert_and_box_cache(arg, cache_array, box, name=None): """ Convert array of dates with a cache and box the result @@ -71,8 +106,6 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None): Cache of converted, unique dates box : boolean True boxes result as an Index-like, False returns an ndarray - errors : string - 'ignore' plus box=True will convert result to Index name : string, default None Name for a DatetimeIndex @@ -86,12 +119,7 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None): """ from pandas import Series, DatetimeIndex, Index result = Series(arg).map(cache_array) - if box: - if errors == 'ignore': - return Index(result, name=name) - else: - return DatetimeIndex(result, name=name) - return result.values + return _box_if_needed(result, box, result.values, None, name) def _return_parsed_timezone_results(result, timezones, box, tz, name): @@ -323,15 +351,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, for ts in result] return np.array(result, dtype=object) - if box: - # Ensure we return an Index in all cases where box=True - if is_datetime64_dtype(result): - return DatetimeIndex(result, tz=tz, name=name) - elif is_object_dtype(result): - # e.g. an Index of datetime objects - from pandas import Index - return Index(result, name=name) - return result + return _box_if_needed(result, box, result, tz, name) def _adjust_to_origin(arg, origin, unit): @@ -611,7 +631,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, elif isinstance(arg, ABCIndexClass): cache_array = _maybe_cache(arg, format, cache, convert_listlike) if not cache_array.empty: - result = _convert_and_box_cache(arg, cache_array, box, errors, + result = _convert_and_box_cache(arg, cache_array, box, name=arg.name) else: convert_listlike = partial(convert_listlike, name=arg.name) @@ -619,7 +639,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, elif is_list_like(arg): cache_array = _maybe_cache(arg, format, cache, convert_listlike) if not cache_array.empty: - result = _convert_and_box_cache(arg, cache_array, box, errors) + result = _convert_and_box_cache(arg, cache_array, box) else: result = convert_listlike(arg, box, format) else: From 00f72e04be9da63dda11d0d90eaefbf3c653b7c9 Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Tue, 16 Apr 2019 13:02:38 -0500 Subject: [PATCH 05/31] Fix flake8 issues --- pandas/core/tools/datetimes.py | 3 +-- pandas/tests/indexes/datetimes/test_tools.py | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index e24d43f570964..b7359d1c82888 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -89,7 +89,6 @@ def _box_if_needed(dt_array, box, default, tz, name): print(type(dt_array)) if is_datetime64_dtype(dt_array): return DatetimeIndex(dt_array, tz=tz, name=name) - #elif is_object_dtype(dt_array): # e.g. an Index of datetime objects return Index(dt_array, name=name) return default @@ -117,7 +116,7 @@ def _convert_and_box_cache(arg, cache_array, box, name=None): - Index-like if box=True - ndarray if box=False """ - from pandas import Series, DatetimeIndex, Index + from pandas import Series result = Series(arg).map(cache_array) return _box_if_needed(result, box, result.values, None, name) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 4c977198f7327..950b9a36629dd 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -504,7 +504,6 @@ def test_to_datetime_tz(self, cache): with pytest.raises(ValueError, match=msg): pd.to_datetime(arr, cache=cache) - @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_offset(self, cache): # inspired by asv timeseries.ToDatetimeNONISO8601 benchmark @@ -516,7 +515,6 @@ def test_to_datetime_offset(self, cache): result = pd.to_datetime(arr, cache=cache) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_tz_pytz(self, cache): # see gh-8260 From 5ad99112aff8f194766e2d182cd0b92e9a5e5204 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Wed, 17 Apr 2019 14:52:09 +0300 Subject: [PATCH 06/31] removed debugging stuff; 'name' is default argument now --- pandas/core/tools/datetimes.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index b7359d1c82888..9a6ebcfa03316 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -14,7 +14,7 @@ from pandas.core.dtypes.common import ( ensure_object, is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype, is_float, is_integer, is_integer_dtype, - is_list_like, is_numeric_dtype, is_object_dtype, is_scalar) + is_list_like, is_numeric_dtype, is_scalar) from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import notna @@ -60,7 +60,7 @@ def _maybe_cache(arg, format, cache, convert_listlike): return cache_array -def _box_if_needed(dt_array, box, default, tz, name): +def _box_if_needed(dt_array, box, default, tz, name=None): """ Properly boxes the ndarray of datetimes (if requested) to DatetimeIndex if it is possible or to generic Index instead @@ -86,7 +86,6 @@ def _box_if_needed(dt_array, box, default, tz, name): """ if box: from pandas import DatetimeIndex, Index - print(type(dt_array)) if is_datetime64_dtype(dt_array): return DatetimeIndex(dt_array, tz=tz, name=name) # e.g. an Index of datetime objects From 428cae04310247af7a45d43dfba0aaa48c331746 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Wed, 17 Apr 2019 15:34:07 +0300 Subject: [PATCH 07/31] added 'whatsnew' --- doc/source/whatsnew/v0.25.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 2030bb4d974c3..02210ed4897f5 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -705,6 +705,8 @@ Datetimelike - Bug in :func:`date_range` with unnecessary ``OverflowError`` being raised for very large or very small dates (:issue:`26651`) - Bug where adding :class:`Timestamp` to a ``np.timedelta64`` object would raise instead of returning a :class:`Timestamp` (:issue:`24775`) - Bug where comparing a zero-dimensional numpy array containing a ``np.datetime64`` object to a :class:`Timestamp` would incorrect raise ``TypeError`` (:issue:`26916`) +- Bug in :func:`to_datetime` which would raise ``ValueError: Tz-aware datetime.datetime cannot be converted to datetime64 unless utc=True`` when called with ``cache=True``, with ``arg`` including datetime strings with different offset (:issue:`26097`) +- Timedelta ^^^^^^^^^ From 7ed05f2f9bac6e94217aaf4d888c158bedfc4f88 Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Thu, 18 Apr 2019 05:58:18 -0500 Subject: [PATCH 08/31] Test that to_datetime produces equal result for cache on and off --- pandas/tests/indexes/datetimes/test_tools.py | 27 ++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 950b9a36629dd..e61ea26d79d44 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -26,6 +26,10 @@ from pandas.util import testing as tm from pandas.util.testing import assert_series_equal +from hypothesis.extra.pytz import timezones +from hypothesis.strategies import datetimes +from hypothesis import given + class TestTimeConversionFormats: @@ -515,6 +519,29 @@ def test_to_datetime_offset(self, cache): result = pd.to_datetime(arr, cache=cache) tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('errors', ('ignore', 'coerce', 'raise')) + @pytest.mark.parametrize('suffix', ([], ['foo'])) + @pytest.mark.parametrize('convertor', (lambda x: x, str)) + @given(date1=datetimes(timezones=timezones()), + date2=datetimes(timezones=timezones())) + def test_to_datetime_cache_errors(self, date1, date2, suffix, + errors, convertor): + arg = [convertor(date1), convertor(date2)] * 5 + suffix + + def _get_answer(cache): + try: + return pd.to_datetime(arg, cache=cache, errors=errors) + except ValueError as err: + return err.args + + cache_on = _get_answer(cache=True) + cache_off = _get_answer(cache=False) + assert type(cache_on) == type(cache_off) + if isinstance(cache_on, pd.Index): + tm.assert_index_equal(cache_on, cache_off) + else: + assert cache_on == cache_off + @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_tz_pytz(self, cache): # see gh-8260 From b60f1d5cc086a0980b5a75642c0262d1c0d78f36 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Thu, 18 Apr 2019 16:20:19 +0300 Subject: [PATCH 09/31] fixed isort errors --- pandas/tests/indexes/datetimes/test_tools.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index e61ea26d79d44..4a12cda459433 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -6,6 +6,9 @@ from dateutil.parser import parse from dateutil.tz.tz import tzoffset +from hypothesis import given +from hypothesis.extra.pytz import timezones +from hypothesis.strategies import datetimes import numpy as np import pytest import pytz @@ -26,10 +29,6 @@ from pandas.util import testing as tm from pandas.util.testing import assert_series_equal -from hypothesis.extra.pytz import timezones -from hypothesis.strategies import datetimes -from hypothesis import given - class TestTimeConversionFormats: From 3e2df79f9e8b5ec5b82263c91f5009ffa667ef7c Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Fri, 19 Apr 2019 11:45:15 +0300 Subject: [PATCH 10/31] Rework _box_if_needed into _box_as_indexlike --- pandas/core/tools/datetimes.py | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 9a6ebcfa03316..d904c5c6448d0 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -60,17 +60,15 @@ def _maybe_cache(arg, format, cache, convert_listlike): return cache_array -def _box_if_needed(dt_array, box, default, tz, name=None): +def _box_as_indexlike(dt_array, tz=None, name=None): """ - Properly boxes the ndarray of datetimes (if requested) to DatetimeIndex + Properly boxes the ndarray of datetimes to DatetimeIndex if it is possible or to generic Index instead Parameters ---------- dt_array: 1-d array array of datetimes to be boxed - box : boolean - True boxes result as an Index-like, False returns an ndarray tz : object None or 'utc' name : string, default None @@ -79,18 +77,13 @@ def _box_if_needed(dt_array, box, default, tz, name=None): Returns ------- result : datetime of converted dates - Returns: - - - Index-like if box=True - - ndarray if box=False + - DatetimeIndex if convertible to sole datetime64 type + - general Index otherwise """ - if box: - from pandas import DatetimeIndex, Index - if is_datetime64_dtype(dt_array): - return DatetimeIndex(dt_array, tz=tz, name=name) - # e.g. an Index of datetime objects - return Index(dt_array, name=name) - return default + from pandas import DatetimeIndex, Index + if is_datetime64_dtype(dt_array): + return DatetimeIndex(dt_array, tz=tz, name=name) + return Index(dt_array, name=name) def _convert_and_box_cache(arg, cache_array, box, name=None): @@ -117,7 +110,9 @@ def _convert_and_box_cache(arg, cache_array, box, name=None): """ from pandas import Series result = Series(arg).map(cache_array) - return _box_if_needed(result, box, result.values, None, name) + if box: + return _box_as_indexlike(result, tz=None, name=name) + return result.values def _return_parsed_timezone_results(result, timezones, box, tz, name): @@ -349,7 +344,9 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, for ts in result] return np.array(result, dtype=object) - return _box_if_needed(result, box, result, tz, name) + if box: + return _box_as_indexlike(result, tz=tz, name=name) + return result def _adjust_to_origin(arg, origin, unit): From 3f0285e6df4c29692f56c65cf24d395124bb194c Mon Sep 17 00:00:00 2001 From: Vasily Litvinov Date: Fri, 19 Apr 2019 12:01:27 +0300 Subject: [PATCH 11/31] Clarify added tests intention --- pandas/tests/indexes/datetimes/test_tools.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 4a12cda459433..b0970e225ac22 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -508,7 +508,7 @@ def test_to_datetime_tz(self, cache): pd.to_datetime(arr, cache=cache) @pytest.mark.parametrize('cache', [True, False]) - def test_to_datetime_offset(self, cache): + def test_to_datetime_different_offsets(self, cache): # inspired by asv timeseries.ToDatetimeNONISO8601 benchmark # see GH-26097 for more ts_string_1 = 'March 1, 2018 12:00:00+0400' @@ -523,8 +523,10 @@ def test_to_datetime_offset(self, cache): @pytest.mark.parametrize('convertor', (lambda x: x, str)) @given(date1=datetimes(timezones=timezones()), date2=datetimes(timezones=timezones())) - def test_to_datetime_cache_errors(self, date1, date2, suffix, - errors, convertor): + def test_to_datetime_cache_invariance(self, date1, date2, suffix, + errors, convertor): + # prepare a list of dates to parse with some duplicates + # and possible invalid string arg = [convertor(date1), convertor(date2)] * 5 + suffix def _get_answer(cache): From d19c2cfba8ea362c8824a9b9a0e0b039bd77d0d5 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 22 Apr 2019 14:41:55 +0300 Subject: [PATCH 12/31] first using notations --- pandas/core/tools/datetimes.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index d904c5c6448d0..736bc0ddaf78b 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -18,9 +18,16 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import notna +from pandas._typing import ( + ArrayLike, IndexLike, IndexLikeOrNdarray, SeriesLike, Union) from pandas.core import algorithms +# notations +IntFltStrDateLisTuplArrSer = Union[int, float, str, list, tuple, + datetime, ArrayLike, SeriesLike] + + def _guess_datetime_format_for_array(arr, **kwargs): # Try to guess the format based on the first non-NaN element non_nan_elements = notna(arr).nonzero()[0] @@ -60,7 +67,7 @@ def _maybe_cache(arg, format, cache, convert_listlike): return cache_array -def _box_as_indexlike(dt_array, tz=None, name=None): +def _box_as_indexlike(dt_array: ArrayLike, tz=None, name=None) -> IndexLike: """ Properly boxes the ndarray of datetimes to DatetimeIndex if it is possible or to generic Index instead @@ -86,7 +93,9 @@ def _box_as_indexlike(dt_array, tz=None, name=None): return Index(dt_array, name=name) -def _convert_and_box_cache(arg, cache_array, box, name=None): +def _convert_and_box_cache(arg: IntFltStrDateLisTuplArrSer, + cache_array: SeriesLike, + box: bool, name=None) -> IndexLikeOrNdarray: """ Convert array of dates with a cache and box the result From b1cf14055a6abe20345cd414ace285ccdc6d94c3 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 22 Apr 2019 22:08:05 +0300 Subject: [PATCH 13/31] changed wildcard import in '/pandas/core/index.py' to explicit import --- pandas/core/index.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 2d1c22f5623a1..04df32c891062 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1,3 +1,12 @@ # flake8: noqa -from pandas.core.indexes.api import * +from pandas.core.indexes.api import ( + Index, CategoricalIndex, Int64Index, UInt64Index, RangeIndex, Float64Index, + MultiIndex, IntervalIndex, TimedeltaIndex, DatetimeIndex, PeriodIndex, + NumericIndex, InvalidIndexError, ensure_index, ensure_index_from_sequences, + NaT, + + # private methods + _new_Index, _get_combined_index, _get_objs_combined_axis, _union_indexes, + _get_consensus_names, _all_indexes_same +) from pandas.core.indexes.multi import _sparsify From 56db677ca4efd28fcb80612e8509812273794d58 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 22 Apr 2019 22:10:34 +0300 Subject: [PATCH 14/31] changed 'notations' -> 'annotations' --- pandas/core/tools/datetimes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 736bc0ddaf78b..583ec3b124555 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -22,8 +22,7 @@ ArrayLike, IndexLike, IndexLikeOrNdarray, SeriesLike, Union) from pandas.core import algorithms - -# notations +# annotations IntFltStrDateLisTuplArrSer = Union[int, float, str, list, tuple, datetime, ArrayLike, SeriesLike] From 4f9ea367f4bf634f8a8cf1a2954df0eb410a9528 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Mon, 22 Apr 2019 23:18:12 +0300 Subject: [PATCH 15/31] fixed isort errors --- pandas/core/index.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 04df32c891062..f2215639daac4 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1,12 +1,8 @@ # flake8: noqa from pandas.core.indexes.api import ( - Index, CategoricalIndex, Int64Index, UInt64Index, RangeIndex, Float64Index, - MultiIndex, IntervalIndex, TimedeltaIndex, DatetimeIndex, PeriodIndex, - NumericIndex, InvalidIndexError, ensure_index, ensure_index_from_sequences, - NaT, - - # private methods - _new_Index, _get_combined_index, _get_objs_combined_axis, _union_indexes, - _get_consensus_names, _all_indexes_same -) + CategoricalIndex, DatetimeIndex, Float64Index, Index, Int64Index, + IntervalIndex, InvalidIndexError, MultiIndex, NaT, NumericIndex, + PeriodIndex, RangeIndex, TimedeltaIndex, UInt64Index, _all_indexes_same, + _get_combined_index, _get_consensus_names, _get_objs_combined_axis, + _new_Index, _union_indexes, ensure_index, ensure_index_from_sequences) from pandas.core.indexes.multi import _sparsify From c72a561b04a09b861266c3b3d0cd264f9888ef57 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 23 Apr 2019 22:43:58 +0300 Subject: [PATCH 16/31] rollback of a certain style for annotations --- pandas/core/tools/datetimes.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 583ec3b124555..d42784ee3b091 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -15,16 +15,16 @@ ensure_object, is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype, is_float, is_integer, is_integer_dtype, is_list_like, is_numeric_dtype, is_scalar) -from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, ABCDatetimeIndex, ABCIndex, ABCIndexClass, ABCSeries) from pandas.core.dtypes.missing import notna -from pandas._typing import ( - ArrayLike, IndexLike, IndexLikeOrNdarray, SeriesLike, Union) +from pandas._typing import ArrayLike, Union from pandas.core import algorithms # annotations -IntFltStrDateLisTuplArrSer = Union[int, float, str, list, tuple, - datetime, ArrayLike, SeriesLike] +DatetimeScalarOrArrayConvertible = Union[int, float, str, list, tuple, + datetime, ArrayLike, ABCSeries] def _guess_datetime_format_for_array(arr, **kwargs): @@ -66,7 +66,8 @@ def _maybe_cache(arg, format, cache, convert_listlike): return cache_array -def _box_as_indexlike(dt_array: ArrayLike, tz=None, name=None) -> IndexLike: +def _box_as_indexlike(dt_array: ArrayLike, + tz=None, name=None) -> Union[ABCIndex, ABCDatetimeIndex]: """ Properly boxes the ndarray of datetimes to DatetimeIndex if it is possible or to generic Index instead @@ -92,9 +93,10 @@ def _box_as_indexlike(dt_array: ArrayLike, tz=None, name=None) -> IndexLike: return Index(dt_array, name=name) -def _convert_and_box_cache(arg: IntFltStrDateLisTuplArrSer, - cache_array: SeriesLike, - box: bool, name=None) -> IndexLikeOrNdarray: +def _convert_and_box_cache(arg: DatetimeScalarOrArrayConvertible, + cache_array: ABCSeries, + box: bool, name=None) -> Union[ABCIndex, + np.ndarray]: """ Convert array of dates with a cache and box the result @@ -111,8 +113,6 @@ def _convert_and_box_cache(arg: IntFltStrDateLisTuplArrSer, Returns ------- result : datetime of converted dates - Returns: - - Index-like if box=True - ndarray if box=False """ From 67a0c40a2a61c1f28d9489cdbecca64c06f93953 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Wed, 24 Apr 2019 10:15:47 +0300 Subject: [PATCH 17/31] added 'Scalar' and 'DatetimeScalar' unions --- pandas/_typing.py | 1 + pandas/core/tools/datetimes.py | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 8947e98bf52ce..a833c2c67e3ec 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -1,3 +1,4 @@ +from datetime import datetime from pathlib import Path from typing import IO, AnyStr, TypeVar, Union diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index d42784ee3b091..e9742828cd5c6 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -1,6 +1,7 @@ from collections import abc from datetime import datetime, time from functools import partial +from typing import Union import numpy as np @@ -19,12 +20,12 @@ ABCDataFrame, ABCDatetimeIndex, ABCIndex, ABCIndexClass, ABCSeries) from pandas.core.dtypes.missing import notna -from pandas._typing import ArrayLike, Union +from pandas._typing import ArrayLike, DatetimeScalar from pandas.core import algorithms # annotations -DatetimeScalarOrArrayConvertible = Union[int, float, str, list, tuple, - datetime, ArrayLike, ABCSeries] +DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, list, tuple, + ArrayLike, ABCSeries] def _guess_datetime_format_for_array(arr, **kwargs): From 1e0d9538190a143be842b01fe9bdb48f8c8c9322 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 26 Apr 2019 11:23:47 +0300 Subject: [PATCH 18/31] added annotations for some arguments; changed formatting --- pandas/core/tools/datetimes.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index e9742828cd5c6..eeb4feea84a2a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -1,7 +1,7 @@ from collections import abc from datetime import datetime, time from functools import partial -from typing import Union +from typing import Optional, Union import numpy as np @@ -23,11 +23,18 @@ from pandas._typing import ArrayLike, DatetimeScalar from pandas.core import algorithms -# annotations + +# --------------------------------------------------------------------- +# types used in annotations + + DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, list, tuple, ArrayLike, ABCSeries] +# --------------------------------------------------------------------- + + def _guess_datetime_format_for_array(arr, **kwargs): # Try to guess the format based on the first non-NaN element non_nan_elements = notna(arr).nonzero()[0] @@ -67,8 +74,11 @@ def _maybe_cache(arg, format, cache, convert_listlike): return cache_array -def _box_as_indexlike(dt_array: ArrayLike, - tz=None, name=None) -> Union[ABCIndex, ABCDatetimeIndex]: +def _box_as_indexlike( + dt_array: ArrayLike, + tz: Optional[object] = None, + name: Optional[str] = None +) -> Union[ABCIndex, ABCDatetimeIndex]: """ Properly boxes the ndarray of datetimes to DatetimeIndex if it is possible or to generic Index instead @@ -94,10 +104,12 @@ def _box_as_indexlike(dt_array: ArrayLike, return Index(dt_array, name=name) -def _convert_and_box_cache(arg: DatetimeScalarOrArrayConvertible, - cache_array: ABCSeries, - box: bool, name=None) -> Union[ABCIndex, - np.ndarray]: +def _convert_and_box_cache( + arg: DatetimeScalarOrArrayConvertible, + cache_array: ABCSeries, + box: bool, + name: Optional[str] = None +) -> Union[ABCIndex, np.ndarray]: """ Convert array of dates with a cache and box the result From 1942bbe5b7e052fdff6915c97160d6572447a2bb Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Sun, 28 Apr 2019 22:17:57 +0300 Subject: [PATCH 19/31] using 'assert_almost_equal' now --- pandas/core/tools/datetimes.py | 1 - pandas/tests/indexes/datetimes/test_tools.py | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index eeb4feea84a2a..58d3f3673eb8c 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -23,7 +23,6 @@ from pandas._typing import ArrayLike, DatetimeScalar from pandas.core import algorithms - # --------------------------------------------------------------------- # types used in annotations diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index b0970e225ac22..9f020e60d98a2 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -537,11 +537,7 @@ def _get_answer(cache): cache_on = _get_answer(cache=True) cache_off = _get_answer(cache=False) - assert type(cache_on) == type(cache_off) - if isinstance(cache_on, pd.Index): - tm.assert_index_equal(cache_on, cache_off) - else: - assert cache_on == cache_off + tm.assert_almost_equal(cache_on, cache_off) @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_tz_pytz(self, cache): From 97e45489518722cabde0833d4e2bbbbc8a38849f Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Wed, 12 Jun 2019 20:58:13 +0300 Subject: [PATCH 20/31] rerun CI tests --- pandas/core/tools/datetimes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 58d3f3673eb8c..78a1c44112df1 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -159,7 +159,6 @@ def _return_parsed_timezone_results(result, timezones, box, tz, name): - Index-like if box=True - ndarray of Timestamps if box=False - """ if tz is not None: raise ValueError("Cannot pass a tz argument when " From 342d7d0e36437750c18954e8ba4472b659acdbaf Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Wed, 12 Jun 2019 22:27:29 +0300 Subject: [PATCH 21/31] fixed problems found by review --- pandas/_typing.py | 1 - pandas/core/tools/datetimes.py | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index a833c2c67e3ec..8947e98bf52ce 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -1,4 +1,3 @@ -from datetime import datetime from pathlib import Path from typing import IO, AnyStr, TypeVar, Union diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 78a1c44112df1..aca069c4e3d2e 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -20,13 +20,14 @@ ABCDataFrame, ABCDatetimeIndex, ABCIndex, ABCIndexClass, ABCSeries) from pandas.core.dtypes.missing import notna -from pandas._typing import ArrayLike, DatetimeScalar +from pandas._typing import ArrayLike from pandas.core import algorithms # --------------------------------------------------------------------- # types used in annotations - +Scalar = Union[int, float, str] +DatetimeScalar = Union[Scalar, datetime] DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, list, tuple, ArrayLike, ABCSeries] From 12f9853bac77b353ba8c3ad80937428c5d25563b Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 14 Jun 2019 15:59:36 +0300 Subject: [PATCH 22/31] replaced '# noqa' statement for flake8 linter --- pandas/core/index.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index f2215639daac4..855497eccebcb 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1,8 +1,7 @@ -# flake8: noqa -from pandas.core.indexes.api import ( +from pandas.core.indexes.api import ( # noqa CategoricalIndex, DatetimeIndex, Float64Index, Index, Int64Index, IntervalIndex, InvalidIndexError, MultiIndex, NaT, NumericIndex, PeriodIndex, RangeIndex, TimedeltaIndex, UInt64Index, _all_indexes_same, _get_combined_index, _get_consensus_names, _get_objs_combined_axis, _new_Index, _union_indexes, ensure_index, ensure_index_from_sequences) -from pandas.core.indexes.multi import _sparsify +from pandas.core.indexes.multi import _sparsify # noqa From 62e75f8b128be2a67f7d3908661751e9f6988995 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 14 Jun 2019 16:13:38 +0300 Subject: [PATCH 23/31] removed 'Optional' using --- pandas/core/index.py | 4 ++-- pandas/core/tools/datetimes.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 855497eccebcb..f14f32c67d4e1 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1,7 +1,7 @@ -from pandas.core.indexes.api import ( # noqa +from pandas.core.indexes.api import ( # noqa:F401 CategoricalIndex, DatetimeIndex, Float64Index, Index, Int64Index, IntervalIndex, InvalidIndexError, MultiIndex, NaT, NumericIndex, PeriodIndex, RangeIndex, TimedeltaIndex, UInt64Index, _all_indexes_same, _get_combined_index, _get_consensus_names, _get_objs_combined_axis, _new_Index, _union_indexes, ensure_index, ensure_index_from_sequences) -from pandas.core.indexes.multi import _sparsify # noqa +from pandas.core.indexes.multi import _sparsify # noqa:F401 diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index aca069c4e3d2e..90f4f4218bce2 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -1,7 +1,7 @@ from collections import abc from datetime import datetime, time from functools import partial -from typing import Optional, Union +from typing import Union import numpy as np @@ -76,8 +76,8 @@ def _maybe_cache(arg, format, cache, convert_listlike): def _box_as_indexlike( dt_array: ArrayLike, - tz: Optional[object] = None, - name: Optional[str] = None + tz=None, + name: str = None ) -> Union[ABCIndex, ABCDatetimeIndex]: """ Properly boxes the ndarray of datetimes to DatetimeIndex @@ -108,7 +108,7 @@ def _convert_and_box_cache( arg: DatetimeScalarOrArrayConvertible, cache_array: ABCSeries, box: bool, - name: Optional[str] = None + name: str = None ) -> Union[ABCIndex, np.ndarray]: """ Convert array of dates with a cache and box the result From 71ca9bed80ddf807b1d96f0ebfd30c92376f493b Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 14 Jun 2019 16:32:36 +0300 Subject: [PATCH 24/31] using TypeVar for 'DatetimeScalar' definition --- pandas/core/tools/datetimes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 90f4f4218bce2..d94d082218f67 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -1,7 +1,7 @@ from collections import abc from datetime import datetime, time from functools import partial -from typing import Union +from typing import Union, TypeVar import numpy as np @@ -27,7 +27,7 @@ # types used in annotations Scalar = Union[int, float, str] -DatetimeScalar = Union[Scalar, datetime] +DatetimeScalar = TypeVar('DatetimeScalar', Scalar, datetime) DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, list, tuple, ArrayLike, ABCSeries] From c35e124b0476ce17f4645d0cbb595512d6311d59 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 14 Jun 2019 19:01:34 +0300 Subject: [PATCH 25/31] fixed isort error --- pandas/core/tools/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index d94d082218f67..31777f00394ab 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -1,7 +1,7 @@ from collections import abc from datetime import datetime, time from functools import partial -from typing import Union, TypeVar +from typing import TypeVar, Union import numpy as np From d3412e250a52cb59b39bb36b6002713083178843 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Sun, 16 Jun 2019 18:09:33 +0300 Subject: [PATCH 26/31] fixed bug: 'UTC' and 'Etc/GMT' should be the same --- pandas/_libs/tslibs/timezones.pyx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index cbfbc14c35b35..3e3e9940f5f06 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -18,6 +18,9 @@ cimport numpy as cnp from numpy cimport int64_t cnp.import_array() +UTC_EQ_STR = 'Etc/GMT' +UTC_EQ = pytz.timezone(UTC_EQ_STR) + # ---------------------------------------------------------------------- from pandas._libs.tslibs.util cimport is_integer_object, get_nat @@ -76,6 +79,8 @@ cpdef inline object get_timezone(object tz): zone = tz.zone if zone is None: return tz + if zone == UTC_EQ or zone == UTC_EQ_STR: + return UTC return zone except AttributeError: return tz From 3141cb64aa023563cd184e3cf58a4a6e9e4b26fe Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Sun, 16 Jun 2019 19:04:22 +0300 Subject: [PATCH 27/31] added comment about timezones --- pandas/_libs/tslibs/timezones.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 3e3e9940f5f06..d621b33a8dee9 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -79,6 +79,8 @@ cpdef inline object get_timezone(object tz): zone = tz.zone if zone is None: return tz + + # UTC and Etc/GMT are the same timezones if zone == UTC_EQ or zone == UTC_EQ_STR: return UTC return zone From ca200cdf2c8b89278cdcf33994ea9d8fbaad7057 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Sun, 16 Jun 2019 20:51:25 +0300 Subject: [PATCH 28/31] removed 'UTC_EQ_STR', 'UTC_EQ' --- pandas/_libs/tslibs/timezones.pyx | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index d621b33a8dee9..c50c3512fb13b 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -18,9 +18,6 @@ cimport numpy as cnp from numpy cimport int64_t cnp.import_array() -UTC_EQ_STR = 'Etc/GMT' -UTC_EQ = pytz.timezone(UTC_EQ_STR) - # ---------------------------------------------------------------------- from pandas._libs.tslibs.util cimport is_integer_object, get_nat @@ -80,8 +77,13 @@ cpdef inline object get_timezone(object tz): if zone is None: return tz - # UTC and Etc/GMT are the same timezones - if zone == UTC_EQ or zone == UTC_EQ_STR: + # UTC and Etc/GMT or Etc/GMT+0 are the same timezones + if not isinstance(zone, str): + str_zone = str(zone) + else: + str_zone = zone + + if str_zone == 'Etc/GMT' or str_zone == 'Etc/GMT+0': return UTC return zone except AttributeError: From 1cc469ef8aa8d18474b9352d70bca580cd28ac1c Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 2 Jul 2019 14:04:57 +0300 Subject: [PATCH 29/31] removing extra stuff --- pandas/_libs/tslibs/timezones.pyx | 9 -------- pandas/tests/indexes/datetimes/test_tools.py | 24 -------------------- 2 files changed, 33 deletions(-) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index c50c3512fb13b..cbfbc14c35b35 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -76,15 +76,6 @@ cpdef inline object get_timezone(object tz): zone = tz.zone if zone is None: return tz - - # UTC and Etc/GMT or Etc/GMT+0 are the same timezones - if not isinstance(zone, str): - str_zone = str(zone) - else: - str_zone = zone - - if str_zone == 'Etc/GMT' or str_zone == 'Etc/GMT+0': - return UTC return zone except AttributeError: return tz diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 9f020e60d98a2..f401a7f7c9e9b 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -6,9 +6,6 @@ from dateutil.parser import parse from dateutil.tz.tz import tzoffset -from hypothesis import given -from hypothesis.extra.pytz import timezones -from hypothesis.strategies import datetimes import numpy as np import pytest import pytz @@ -518,27 +515,6 @@ def test_to_datetime_different_offsets(self, cache): result = pd.to_datetime(arr, cache=cache) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize('errors', ('ignore', 'coerce', 'raise')) - @pytest.mark.parametrize('suffix', ([], ['foo'])) - @pytest.mark.parametrize('convertor', (lambda x: x, str)) - @given(date1=datetimes(timezones=timezones()), - date2=datetimes(timezones=timezones())) - def test_to_datetime_cache_invariance(self, date1, date2, suffix, - errors, convertor): - # prepare a list of dates to parse with some duplicates - # and possible invalid string - arg = [convertor(date1), convertor(date2)] * 5 + suffix - - def _get_answer(cache): - try: - return pd.to_datetime(arg, cache=cache, errors=errors) - except ValueError as err: - return err.args - - cache_on = _get_answer(cache=True) - cache_off = _get_answer(cache=False) - tm.assert_almost_equal(cache_on, cache_off) - @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_tz_pytz(self, cache): # see gh-8260 From 137395f899e8983213d9601cb5d42c171da98648 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Tue, 2 Jul 2019 14:42:43 +0300 Subject: [PATCH 30/31] fixed mypy errors --- pandas/core/tools/datetimes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 31777f00394ab..a6beac4ae1c6e 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -1,7 +1,7 @@ from collections import abc from datetime import datetime, time from functools import partial -from typing import TypeVar, Union +from typing import Optional, TypeVar, Union import numpy as np @@ -77,7 +77,7 @@ def _maybe_cache(arg, format, cache, convert_listlike): def _box_as_indexlike( dt_array: ArrayLike, tz=None, - name: str = None + name: Optional[str] = None ) -> Union[ABCIndex, ABCDatetimeIndex]: """ Properly boxes the ndarray of datetimes to DatetimeIndex @@ -108,7 +108,7 @@ def _convert_and_box_cache( arg: DatetimeScalarOrArrayConvertible, cache_array: ABCSeries, box: bool, - name: str = None + name: Optional[str] = None ) -> Union[ABCIndex, np.ndarray]: """ Convert array of dates with a cache and box the result From 2d8921bb61cdfdd71f93a3a185ebb476cbc95179 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Wed, 3 Jul 2019 14:44:12 +0300 Subject: [PATCH 31/31] renamed arg in '_box_as_indexlike' func: tz -> utc --- pandas/core/tools/datetimes.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index a6beac4ae1c6e..d543ae91ad344 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -76,7 +76,7 @@ def _maybe_cache(arg, format, cache, convert_listlike): def _box_as_indexlike( dt_array: ArrayLike, - tz=None, + utc: Optional[bool] = None, name: Optional[str] = None ) -> Union[ABCIndex, ABCDatetimeIndex]: """ @@ -100,6 +100,7 @@ def _box_as_indexlike( """ from pandas import DatetimeIndex, Index if is_datetime64_dtype(dt_array): + tz = 'utc' if utc else None return DatetimeIndex(dt_array, tz=tz, name=name) return Index(dt_array, name=name) @@ -132,7 +133,7 @@ def _convert_and_box_cache( from pandas import Series result = Series(arg).map(cache_array) if box: - return _box_as_indexlike(result, tz=None, name=name) + return _box_as_indexlike(result, utc=None, name=name) return result.values @@ -365,7 +366,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, return np.array(result, dtype=object) if box: - return _box_as_indexlike(result, tz=tz, name=name) + utc = tz == 'utc' + return _box_as_indexlike(result, utc=utc, name=name) return result