Skip to content

Commit e716520

Browse files
jbrockmendelyehoshuadimarsky
authored andcommitted
BUG: tz_localize(UTC) not making a copy (pandas-dev#46460)
1 parent acfdf23 commit e716520

File tree

5 files changed

+50
-10
lines changed

5 files changed

+50
-10
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,7 @@ Datetimelike
391391
- Bug in :class:`Timestamp` construction when passing datetime components as positional arguments and ``tzinfo`` as a keyword argument incorrectly raising (:issue:`31929`)
392392
- Bug in :meth:`Index.astype` when casting from object dtype to ``timedelta64[ns]`` dtype incorrectly casting ``np.datetime64("NaT")`` values to ``np.timedelta64("NaT")`` instead of raising (:issue:`45722`)
393393
- Bug in :meth:`SeriesGroupBy.value_counts` index when passing categorical column (:issue:`44324`)
394+
- Bug in :meth:`DatetimeIndex.tz_localize` localizing to UTC failing to make a copy of the underlying data (:issue:`46460`)
394395
-
395396

396397
Timedelta

pandas/_libs/tslibs/tzconversion.pyx

+17-7
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,6 @@ cdef int64_t tz_localize_to_utc_single(
6464
return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True)
6565

6666
elif is_fixed_offset(tz):
67-
# TODO: in this case we should be able to use get_utcoffset,
68-
# that returns None for e.g. 'dateutil//usr/share/zoneinfo/Etc/GMT-9'
6967
_, deltas, _ = get_dst_info(tz)
7068
delta = deltas[0]
7169
return val - delta
@@ -121,9 +119,10 @@ timedelta-like}
121119
Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right
122120
int64_t *tdata
123121
int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins
124-
int64_t first_delta
122+
int64_t first_delta, delta
125123
int64_t shift_delta = 0
126-
ndarray[int64_t] trans, result, result_a, result_b, dst_hours
124+
ndarray[int64_t] trans, result_a, result_b, dst_hours
125+
int64_t[::1] result
127126
npy_datetimestruct dts
128127
bint infer_dst = False, is_dst = False, fill = False
129128
bint shift_forward = False, shift_backward = False
@@ -132,7 +131,7 @@ timedelta-like}
132131

133132
# Vectorized version of DstTzInfo.localize
134133
if is_utc(tz) or tz is None:
135-
return vals
134+
return vals.copy()
136135

137136
result = np.empty(n, dtype=np.int64)
138137

@@ -143,7 +142,18 @@ timedelta-like}
143142
result[i] = NPY_NAT
144143
else:
145144
result[i] = v - _tz_localize_using_tzinfo_api(v, tz, to_utc=True)
146-
return result
145+
return result.base # to return underlying ndarray
146+
147+
elif is_fixed_offset(tz):
148+
_, deltas, _ = get_dst_info(tz)
149+
delta = deltas[0]
150+
for i in range(n):
151+
v = vals[i]
152+
if v == NPY_NAT:
153+
result[i] = NPY_NAT
154+
else:
155+
result[i] = v - delta
156+
return result.base # to return underlying ndarray
147157

148158
# silence false-positive compiler warning
149159
ambiguous_array = np.empty(0, dtype=bool)
@@ -298,7 +308,7 @@ timedelta-like}
298308
stamp = _render_tstamp(val)
299309
raise pytz.NonExistentTimeError(stamp)
300310

301-
return result
311+
return result.base # .base to get underlying ndarray
302312

303313

304314
cdef inline Py_ssize_t bisect_right_i8(int64_t *data,

pandas/core/arrays/datetimes.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -456,9 +456,13 @@ def _generate_range(
456456
endpoint_tz = start.tz if start is not None else end.tz
457457

458458
if tz is not None and endpoint_tz is None:
459-
i8values = tzconversion.tz_localize_to_utc(
460-
i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent
461-
)
459+
460+
if not timezones.is_utc(tz):
461+
# short-circuit tz_localize_to_utc which would make
462+
# an unnecessary copy with UTC but be a no-op.
463+
i8values = tzconversion.tz_localize_to_utc(
464+
i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent
465+
)
462466

463467
# i8values is localized datetime64 array -> have to convert
464468
# start/end as well to compare
@@ -2126,6 +2130,8 @@ def _sequence_to_dt64ns(
21262130
if tz is not None:
21272131
# Convert tz-naive to UTC
21282132
tz = timezones.maybe_get_tz(tz)
2133+
# TODO: if tz is UTC, are there situations where we *don't* want a
2134+
# copy? tz_localize_to_utc always makes one.
21292135
data = tzconversion.tz_localize_to_utc(
21302136
data.view("i8"), tz, ambiguous=ambiguous
21312137
)

pandas/tests/indexes/datetimes/test_timezones.py

+11
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,17 @@ def test_tz_convert_unsorted(self, tzstr):
327327
# -------------------------------------------------------------
328328
# DatetimeIndex.tz_localize
329329

330+
def test_tz_localize_utc_copies(self, utc_fixture):
331+
# GH#46460
332+
times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"]
333+
index = DatetimeIndex(times)
334+
335+
res = index.tz_localize(utc_fixture)
336+
assert not tm.shares_memory(res, index)
337+
338+
res2 = index._data.tz_localize(utc_fixture)
339+
assert not tm.shares_memory(index._data, res2)
340+
330341
def test_dti_tz_localize_nonexistent_raise_coerce(self):
331342
# GH#13057
332343
times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"]

pandas/tests/tslibs/test_conversion.py

+12
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,18 @@ def _compare_local_to_utc(tz_didx, naive_didx):
5050
tm.assert_numpy_array_equal(result, expected)
5151

5252

53+
def test_tz_localize_to_utc_copies():
54+
# GH#46460
55+
arr = np.arange(5, dtype="i8")
56+
result = tzconversion.tz_convert_from_utc(arr, tz=UTC)
57+
tm.assert_numpy_array_equal(result, arr)
58+
assert not np.shares_memory(arr, result)
59+
60+
result = tzconversion.tz_convert_from_utc(arr, tz=None)
61+
tm.assert_numpy_array_equal(result, arr)
62+
assert not np.shares_memory(arr, result)
63+
64+
5365
def test_tz_convert_single_matches_tz_convert_hourly(tz_aware_fixture):
5466
tz = tz_aware_fixture
5567
tz_didx = date_range("2014-03-01", "2015-01-10", freq="H", tz=tz)

0 commit comments

Comments
 (0)