Skip to content

Commit 9bee334

Browse files
anmyachevjreback
authored andcommitted
BUG: _convert_and_box_cache raise ValueError: Tz-aware datetime.datetime cannot be converted to datetime64 unless utc=True (#26097)
1 parent f5038de commit 9bee334

File tree

4 files changed

+79
-25
lines changed

4 files changed

+79
-25
lines changed

doc/source/whatsnew/v0.25.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,8 @@ Datetimelike
910910
- Bug in :func:`date_range` with unnecessary ``OverflowError`` being raised for very large or very small dates (:issue:`26651`)
911911
- Bug where adding :class:`Timestamp` to a ``np.timedelta64`` object would raise instead of returning a :class:`Timestamp` (:issue:`24775`)
912912
- Bug where comparing a zero-dimensional numpy array containing a ``np.datetime64`` object to a :class:`Timestamp` would incorrect raise ``TypeError`` (:issue:`26916`)
913+
- Bug in :func:`to_datetime` which would raise ``ValueError: Tz-aware datetime.datetime cannot be converted to datetime64 unless utc=True`` when called with ``cache=True``, with ``arg`` including datetime strings with different offset (:issue:`26097`)
914+
-
913915
914916
Timedelta
915917
^^^^^^^^^

pandas/core/index.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1-
# flake8: noqa
2-
from pandas.core.indexes.api import *
3-
from pandas.core.indexes.multi import _sparsify
1+
from pandas.core.indexes.api import ( # noqa:F401
2+
CategoricalIndex, DatetimeIndex, Float64Index, Index, Int64Index,
3+
IntervalIndex, InvalidIndexError, MultiIndex, NaT, NumericIndex,
4+
PeriodIndex, RangeIndex, TimedeltaIndex, UInt64Index, _all_indexes_same,
5+
_get_combined_index, _get_consensus_names, _get_objs_combined_axis,
6+
_new_Index, _union_indexes, ensure_index, ensure_index_from_sequences)
7+
from pandas.core.indexes.multi import _sparsify # noqa:F401

pandas/core/tools/datetimes.py

+59-22
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from collections import abc
22
from datetime import datetime, time
33
from functools import partial
4+
from typing import Optional, TypeVar, Union
45

56
import numpy as np
67

@@ -14,12 +15,25 @@
1415
from pandas.core.dtypes.common import (
1516
ensure_object, is_datetime64_dtype, is_datetime64_ns_dtype,
1617
is_datetime64tz_dtype, is_float, is_integer, is_integer_dtype,
17-
is_list_like, is_numeric_dtype, is_object_dtype, is_scalar)
18-
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
18+
is_list_like, is_numeric_dtype, is_scalar)
19+
from pandas.core.dtypes.generic import (
20+
ABCDataFrame, ABCDatetimeIndex, ABCIndex, ABCIndexClass, ABCSeries)
1921
from pandas.core.dtypes.missing import notna
2022

23+
from pandas._typing import ArrayLike
2124
from pandas.core import algorithms
2225

26+
# ---------------------------------------------------------------------
27+
# types used in annotations
28+
29+
Scalar = Union[int, float, str]
30+
DatetimeScalar = TypeVar('DatetimeScalar', Scalar, datetime)
31+
DatetimeScalarOrArrayConvertible = Union[DatetimeScalar, list, tuple,
32+
ArrayLike, ABCSeries]
33+
34+
35+
# ---------------------------------------------------------------------
36+
2337

2438
def _guess_datetime_format_for_array(arr, **kwargs):
2539
# Try to guess the format based on the first non-NaN element
@@ -60,7 +74,43 @@ def _maybe_cache(arg, format, cache, convert_listlike):
6074
return cache_array
6175

6276

63-
def _convert_and_box_cache(arg, cache_array, box, errors, name=None):
77+
def _box_as_indexlike(
78+
dt_array: ArrayLike,
79+
utc: Optional[bool] = None,
80+
name: Optional[str] = None
81+
) -> Union[ABCIndex, ABCDatetimeIndex]:
82+
"""
83+
Properly boxes the ndarray of datetimes to DatetimeIndex
84+
if it is possible or to generic Index instead
85+
86+
Parameters
87+
----------
88+
dt_array: 1-d array
89+
array of datetimes to be boxed
90+
tz : object
91+
None or 'utc'
92+
name : string, default None
93+
Name for a resulting index
94+
95+
Returns
96+
-------
97+
result : datetime of converted dates
98+
- DatetimeIndex if convertible to sole datetime64 type
99+
- general Index otherwise
100+
"""
101+
from pandas import DatetimeIndex, Index
102+
if is_datetime64_dtype(dt_array):
103+
tz = 'utc' if utc else None
104+
return DatetimeIndex(dt_array, tz=tz, name=name)
105+
return Index(dt_array, name=name)
106+
107+
108+
def _convert_and_box_cache(
109+
arg: DatetimeScalarOrArrayConvertible,
110+
cache_array: ABCSeries,
111+
box: bool,
112+
name: Optional[str] = None
113+
) -> Union[ABCIndex, np.ndarray]:
64114
"""
65115
Convert array of dates with a cache and box the result
66116
@@ -71,26 +121,19 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None):
71121
Cache of converted, unique dates
72122
box : boolean
73123
True boxes result as an Index-like, False returns an ndarray
74-
errors : string
75-
'ignore' plus box=True will convert result to Index
76124
name : string, default None
77125
Name for a DatetimeIndex
78126
79127
Returns
80128
-------
81129
result : datetime of converted dates
82-
Returns:
83-
84130
- Index-like if box=True
85131
- ndarray if box=False
86132
"""
87-
from pandas import Series, DatetimeIndex, Index
133+
from pandas import Series
88134
result = Series(arg).map(cache_array)
89135
if box:
90-
if errors == 'ignore':
91-
return Index(result, name=name)
92-
else:
93-
return DatetimeIndex(result, name=name)
136+
return _box_as_indexlike(result, utc=None, name=name)
94137
return result.values
95138

96139

@@ -118,7 +161,6 @@ def _return_parsed_timezone_results(result, timezones, box, tz, name):
118161
119162
- Index-like if box=True
120163
- ndarray of Timestamps if box=False
121-
122164
"""
123165
if tz is not None:
124166
raise ValueError("Cannot pass a tz argument when "
@@ -324,13 +366,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
324366
return np.array(result, dtype=object)
325367

326368
if box:
327-
# Ensure we return an Index in all cases where box=True
328-
if is_datetime64_dtype(result):
329-
return DatetimeIndex(result, tz=tz, name=name)
330-
elif is_object_dtype(result):
331-
# e.g. an Index of datetime objects
332-
from pandas import Index
333-
return Index(result, name=name)
369+
utc = tz == 'utc'
370+
return _box_as_indexlike(result, utc=utc, name=name)
334371
return result
335372

336373

@@ -611,15 +648,15 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
611648
elif isinstance(arg, ABCIndexClass):
612649
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
613650
if not cache_array.empty:
614-
result = _convert_and_box_cache(arg, cache_array, box, errors,
651+
result = _convert_and_box_cache(arg, cache_array, box,
615652
name=arg.name)
616653
else:
617654
convert_listlike = partial(convert_listlike, name=arg.name)
618655
result = convert_listlike(arg, box, format)
619656
elif is_list_like(arg):
620657
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
621658
if not cache_array.empty:
622-
result = _convert_and_box_cache(arg, cache_array, box, errors)
659+
result = _convert_and_box_cache(arg, cache_array, box)
623660
else:
624661
result = convert_listlike(arg, box, format)
625662
else:

pandas/tests/indexes/datetimes/test_tools.py

+11
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,17 @@ def test_to_datetime_tz(self, cache):
504504
with pytest.raises(ValueError, match=msg):
505505
pd.to_datetime(arr, cache=cache)
506506

507+
@pytest.mark.parametrize('cache', [True, False])
508+
def test_to_datetime_different_offsets(self, cache):
509+
# inspired by asv timeseries.ToDatetimeNONISO8601 benchmark
510+
# see GH-26097 for more
511+
ts_string_1 = 'March 1, 2018 12:00:00+0400'
512+
ts_string_2 = 'March 1, 2018 12:00:00+0500'
513+
arr = [ts_string_1] * 5 + [ts_string_2] * 5
514+
expected = pd.Index([parse(x) for x in arr])
515+
result = pd.to_datetime(arr, cache=cache)
516+
tm.assert_index_equal(result, expected)
517+
507518
@pytest.mark.parametrize('cache', [True, False])
508519
def test_to_datetime_tz_pytz(self, cache):
509520
# see gh-8260

0 commit comments

Comments
 (0)