Skip to content

Commit 58a59bd

Browse files
jbrockmendeljreback
authored andcommitted
BUG: Casting tz-aware DatetimeIndex to object-dtype ndarray/Index (pandas-dev#23524)
1 parent da23030 commit 58a59bd

File tree

9 files changed

+149
-14
lines changed

9 files changed

+149
-14
lines changed

doc/source/whatsnew/v0.24.0.txt

+4
Original file line numberDiff line numberDiff line change
@@ -1128,6 +1128,9 @@ Datetimelike
11281128
- Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`)
11291129
- Bug in :class:`Series` that interpreted string indices as lists of characters when setting datetimelike values (:issue:`23451`)
11301130
- Bug in :class:`Timestamp` constructor which would drop the frequency of an input :class:`Timestamp` (:issue:`22311`)
1131+
- Bug in :class:`DatetimeIndex` where calling ``np.array(dtindex, dtype=object)`` would incorrectly return an array of ``long`` objects (:issue:`23524`)
1132+
- Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and `dtype=object` would incorrectly raise a ``ValueError`` (:issue:`23524`)
1133+
- Bug in :class:`Index` where calling ``np.array(dtindex, dtype=object)`` on a timezone-naive :class:`DatetimeIndex` would return an array of ``datetime`` objects instead of :class:`Timestamp` objects, potentially losing nanosecond portions of the timestamps (:issue:`23524`)
11311134

11321135
Timedelta
11331136
^^^^^^^^^
@@ -1174,6 +1177,7 @@ Offsets
11741177
- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`)
11751178
- Bug in :class:`DateOffset` where keyword arguments ``week`` and ``milliseconds`` were accepted and ignored. Passing these will now raise ``ValueError`` (:issue:`19398`)
11761179
- Bug in adding :class:`DateOffset` with :class:`DataFrame` or :class:`PeriodIndex` incorrectly raising ``TypeError`` (:issue:`23215`)
1180+
- Bug in comparing :class:`DateOffset` objects with non-DateOffset objects, particularly strings, raising ``ValueError`` instead of returning ``False`` for equality checks and ``True`` for not-equal checks (:issue:`23524`)
11771181

11781182
Numeric
11791183
^^^^^^^

pandas/_libs/tslibs/offsets.pyx

+7-2
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,13 @@ class _BaseOffset(object):
308308

309309
def __eq__(self, other):
310310
if is_string_object(other):
311-
other = to_offset(other)
312-
311+
try:
312+
# GH#23524 if to_offset fails, we are dealing with an
313+
# incomparable type so == is False and != is True
314+
other = to_offset(other)
315+
except ValueError:
316+
# e.g. "infer"
317+
return False
313318
try:
314319
return self._params == other._params
315320
except AttributeError:

pandas/core/arrays/datetimes.py

+10
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from pandas.core.dtypes.common import (
2020
_NS_DTYPE,
2121
is_object_dtype,
22+
is_int64_dtype,
2223
is_datetime64tz_dtype,
2324
is_datetime64_dtype,
2425
ensure_int64)
@@ -388,6 +389,15 @@ def _resolution(self):
388389
# ----------------------------------------------------------------
389390
# Array-like Methods
390391

392+
def __array__(self, dtype=None):
393+
if is_object_dtype(dtype):
394+
return np.array(list(self), dtype=object)
395+
elif is_int64_dtype(dtype):
396+
return self.asi8
397+
398+
# TODO: warn that conversion may be lossy?
399+
return self._data.view(np.ndarray) # follow Index.__array__
400+
391401
def __iter__(self):
392402
"""
393403
Return an iterator over the boxed values

pandas/core/indexes/base.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -301,11 +301,19 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
301301
(dtype is not None and is_datetime64_any_dtype(dtype)) or
302302
'tz' in kwargs):
303303
from pandas import DatetimeIndex
304-
result = DatetimeIndex(data, copy=copy, name=name,
305-
dtype=dtype, **kwargs)
304+
306305
if dtype is not None and is_dtype_equal(_o_dtype, dtype):
307-
return Index(result.to_pydatetime(), dtype=_o_dtype)
306+
# GH#23524 passing `dtype=object` to DatetimeIndex is invalid,
307+
# will raise in the where `data` is already tz-aware. So
308+
# we leave it out of this step and cast to object-dtype after
309+
# the DatetimeIndex construction.
310+
# Note we can pass copy=False because the .astype below
311+
# will always make a copy
312+
result = DatetimeIndex(data, copy=False, name=name, **kwargs)
313+
return result.astype(object)
308314
else:
315+
result = DatetimeIndex(data, copy=copy, name=name,
316+
dtype=dtype, **kwargs)
309317
return result
310318

311319
elif (is_timedelta64_dtype(data) or

pandas/tests/arrays/test_datetimelike.py

+48
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,54 @@ def timedelta_index(request):
5757

5858
class TestDatetimeArray(object):
5959

60+
def test_array_object_dtype(self, tz_naive_fixture):
61+
# GH#23524
62+
tz = tz_naive_fixture
63+
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
64+
arr = DatetimeArrayMixin(dti)
65+
66+
expected = np.array(list(dti))
67+
68+
result = np.array(arr, dtype=object)
69+
tm.assert_numpy_array_equal(result, expected)
70+
71+
# also test the DatetimeIndex method while we're at it
72+
result = np.array(dti, dtype=object)
73+
tm.assert_numpy_array_equal(result, expected)
74+
75+
def test_array(self, tz_naive_fixture):
76+
# GH#23524
77+
tz = tz_naive_fixture
78+
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
79+
arr = DatetimeArrayMixin(dti)
80+
81+
expected = dti.asi8.view('M8[ns]')
82+
result = np.array(arr)
83+
tm.assert_numpy_array_equal(result, expected)
84+
85+
# check that we are not making copies when setting copy=False
86+
result = np.array(arr, copy=False)
87+
assert result.base is expected.base
88+
assert result.base is not None
89+
90+
def test_array_i8_dtype(self, tz_naive_fixture):
91+
# GH#23524
92+
tz = tz_naive_fixture
93+
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
94+
arr = DatetimeArrayMixin(dti)
95+
96+
expected = dti.asi8
97+
result = np.array(arr, dtype='i8')
98+
tm.assert_numpy_array_equal(result, expected)
99+
100+
result = np.array(arr, dtype=np.int64)
101+
tm.assert_numpy_array_equal(result, expected)
102+
103+
# check that we are not making copies when setting copy=False
104+
result = np.array(arr, dtype='i8', copy=False)
105+
assert result.base is expected.base
106+
assert result.base is not None
107+
60108
def test_from_dti(self, tz_naive_fixture):
61109
tz = tz_naive_fixture
62110
dti = pd.date_range('2016-01-01', periods=3, tz=tz)

pandas/tests/indexes/test_base.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def test_construction_list_tuples_nan(self, na_value, vtype):
132132
@pytest.mark.parametrize("cast_as_obj", [True, False])
133133
@pytest.mark.parametrize("index", [
134134
pd.date_range('2015-01-01 10:00', freq='D', periods=3,
135-
tz='US/Eastern'), # DTI with tz
135+
tz='US/Eastern', name='Green Eggs & Ham'), # DTI with tz
136136
pd.date_range('2015-01-01 10:00', freq='D', periods=3), # DTI no tz
137137
pd.timedelta_range('1 days', freq='D', periods=3), # td
138138
pd.period_range('2015-01-01', freq='D', periods=3) # period
@@ -145,8 +145,16 @@ def test_constructor_from_index_dtlike(self, cast_as_obj, index):
145145

146146
tm.assert_index_equal(result, index)
147147

148-
if isinstance(index, pd.DatetimeIndex) and hasattr(index, 'tz'):
148+
if isinstance(index, pd.DatetimeIndex):
149149
assert result.tz == index.tz
150+
if cast_as_obj:
151+
# GH#23524 check that Index(dti, dtype=object) does not
152+
# incorrectly raise ValueError, and that nanoseconds are not
153+
# dropped
154+
index += pd.Timedelta(nanoseconds=50)
155+
result = pd.Index(index, dtype=object)
156+
assert result.dtype == np.object_
157+
assert list(result) == list(index)
150158

151159
@pytest.mark.parametrize("index,has_tz", [
152160
(pd.date_range('2015-01-01 10:00', freq='D', periods=3,

pandas/tests/tseries/offsets/test_offsets.py

+13
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,19 @@ def testMult2(self):
187187
assert self.d + (-5 * self._offset(-10)) == self.d + self._offset(50)
188188
assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6)
189189

190+
def test_compare_str(self):
191+
# GH#23524
192+
# comparing to strings that cannot be cast to DateOffsets should
193+
# not raise for __eq__ or __ne__
194+
if self._offset is None:
195+
return
196+
off = self._get_offset(self._offset)
197+
198+
assert not off == "infer"
199+
assert off != "foo"
200+
# Note: inequalities are only implemented for Tick subclasses;
201+
# tests for this are in test_ticks
202+
190203

191204
class TestCommon(Base):
192205
# exected value created by Base._get_offset

pandas/tests/tseries/offsets/test_ticks.py

+22
Original file line numberDiff line numberDiff line change
@@ -267,3 +267,25 @@ def test_compare_ticks(cls):
267267
assert cls(4) > three
268268
assert cls(3) == cls(3)
269269
assert cls(3) != cls(4)
270+
271+
272+
@pytest.mark.parametrize('cls', tick_classes)
273+
def test_compare_ticks_to_strs(cls):
274+
# GH#23524
275+
off = cls(19)
276+
277+
# These tests should work with any strings, but we particularly are
278+
# interested in "infer" as that comparison is convenient to make in
279+
# Datetime/Timedelta Array/Index constructors
280+
assert not off == "infer"
281+
assert not "foo" == off
282+
283+
for left, right in [("infer", off), (off, "infer")]:
284+
with pytest.raises(TypeError):
285+
left < right
286+
with pytest.raises(TypeError):
287+
left <= right
288+
with pytest.raises(TypeError):
289+
left > right
290+
with pytest.raises(TypeError):
291+
left >= right

pandas/tseries/offsets.py

+24-7
Original file line numberDiff line numberDiff line change
@@ -2199,9 +2199,18 @@ def apply_index(self, i):
21992199

22002200

22012201
def _tick_comp(op):
2202+
assert op not in [operator.eq, operator.ne]
2203+
22022204
def f(self, other):
2203-
return op(self.delta, other.delta)
2205+
try:
2206+
return op(self.delta, other.delta)
2207+
except AttributeError:
2208+
# comparing with a non-Tick object
2209+
raise TypeError("Invalid comparison between {cls} and {typ}"
2210+
.format(cls=type(self).__name__,
2211+
typ=type(other).__name__))
22042212

2213+
f.__name__ = '__{opname}__'.format(opname=op.__name__)
22052214
return f
22062215

22072216

@@ -2220,8 +2229,6 @@ def __init__(self, n=1, normalize=False):
22202229
__ge__ = _tick_comp(operator.ge)
22212230
__lt__ = _tick_comp(operator.lt)
22222231
__le__ = _tick_comp(operator.le)
2223-
__eq__ = _tick_comp(operator.eq)
2224-
__ne__ = _tick_comp(operator.ne)
22252232

22262233
def __add__(self, other):
22272234
if isinstance(other, Tick):
@@ -2242,8 +2249,13 @@ def __add__(self, other):
22422249
def __eq__(self, other):
22432250
if isinstance(other, compat.string_types):
22442251
from pandas.tseries.frequencies import to_offset
2245-
2246-
other = to_offset(other)
2252+
try:
2253+
# GH#23524 if to_offset fails, we are dealing with an
2254+
# incomparable type so == is False and != is True
2255+
other = to_offset(other)
2256+
except ValueError:
2257+
# e.g. "infer"
2258+
return False
22472259

22482260
if isinstance(other, Tick):
22492261
return self.delta == other.delta
@@ -2258,8 +2270,13 @@ def __hash__(self):
22582270
def __ne__(self, other):
22592271
if isinstance(other, compat.string_types):
22602272
from pandas.tseries.frequencies import to_offset
2261-
2262-
other = to_offset(other)
2273+
try:
2274+
# GH#23524 if to_offset fails, we are dealing with an
2275+
# incomparable type so == is False and != is True
2276+
other = to_offset(other)
2277+
except ValueError:
2278+
# e.g. "infer"
2279+
return True
22632280

22642281
if isinstance(other, Tick):
22652282
return self.delta != other.delta

0 commit comments

Comments
 (0)