Skip to content

BUG: tz_localize(UTC) not making a copy #46460

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 22, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 17 additions & 7 deletions pandas/_libs/tslibs/tzconversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ cdef int64_t tz_localize_to_utc_single(
return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True)

elif is_fixed_offset(tz):
# TODO: in this case we should be able to use get_utcoffset,
# that returns None for e.g. 'dateutil//usr/share/zoneinfo/Etc/GMT-9'
_, deltas, _ = get_dst_info(tz)
delta = deltas[0]
return val - delta
Expand Down Expand Up @@ -121,9 +119,10 @@ timedelta-like}
Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right
int64_t *tdata
int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins
int64_t first_delta
int64_t first_delta, delta
int64_t shift_delta = 0
ndarray[int64_t] trans, result, result_a, result_b, dst_hours
ndarray[int64_t] trans, result_a, result_b, dst_hours
int64_t[::1] result
npy_datetimestruct dts
bint infer_dst = False, is_dst = False, fill = False
bint shift_forward = False, shift_backward = False
Expand All @@ -132,7 +131,7 @@ timedelta-like}

# Vectorized version of DstTzInfo.localize
if is_utc(tz) or tz is None:
return vals
return vals.copy()

result = np.empty(n, dtype=np.int64)

Expand All @@ -143,7 +142,18 @@ timedelta-like}
result[i] = NPY_NAT
else:
result[i] = v - _tz_localize_using_tzinfo_api(v, tz, to_utc=True)
return result
return result.base # to return underlying ndarray

elif is_fixed_offset(tz):
_, deltas, _ = get_dst_info(tz)
delta = deltas[0]
for i in range(n):
v = vals[i]
if v == NPY_NAT:
result[i] = NPY_NAT
else:
result[i] = v - delta
return result.base # to return underlying ndarray

# silence false-positive compiler warning
ambiguous_array = np.empty(0, dtype=bool)
Expand Down Expand Up @@ -298,7 +308,7 @@ timedelta-like}
stamp = _render_tstamp(val)
raise pytz.NonExistentTimeError(stamp)

return result
return result.base # .base to get underlying ndarray


cdef inline Py_ssize_t bisect_right_i8(int64_t *data,
Expand Down
12 changes: 9 additions & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,9 +456,13 @@ def _generate_range(
endpoint_tz = start.tz if start is not None else end.tz

if tz is not None and endpoint_tz is None:
i8values = tzconversion.tz_localize_to_utc(
i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent
)

if not timezones.is_utc(tz):
# short-circuit tz_localize_to_utc which would make
# an unnecessary copy with UTC but be a no-op.
i8values = tzconversion.tz_localize_to_utc(
i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent
)

# i8values is localized datetime64 array -> have to convert
# start/end as well to compare
Expand Down Expand Up @@ -2126,6 +2130,8 @@ def _sequence_to_dt64ns(
if tz is not None:
# Convert tz-naive to UTC
tz = timezones.maybe_get_tz(tz)
# TODO: if tz is UTC, are there situations where we *don't* want a
# copy? tz_localize_to_utc always makes one.
data = tzconversion.tz_localize_to_utc(
data.view("i8"), tz, ambiguous=ambiguous
)
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/indexes/datetimes/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,16 @@ def test_tz_convert_unsorted(self, tzstr):
# -------------------------------------------------------------
# DatetimeIndex.tz_localize

def test_tz_localize_utc_copies(self):
times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"]
index = DatetimeIndex(times)

res = index.tz_localize("UTC")
Copy link
Member

@mroeschke mroeschke Mar 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably good to test with utc_fixture.

Also based on the code change None also returns a copy now too (and should be tested)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure. will use the fixture in the index tests and do the None just once in the tslibs tests

assert not tm.shares_memory(res, index)

res2 = index._data.tz_localize("UTC")
assert not tm.shares_memory(index._data, res2)

def test_dti_tz_localize_nonexistent_raise_coerce(self):
# GH#13057
times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"]
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/tslibs/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ def _compare_local_to_utc(tz_didx, naive_didx):
tm.assert_numpy_array_equal(result, expected)


def test_tz_localize_to_utc_copies():
arr = np.arange(5, dtype="i8")
result = tzconversion.tz_convert_from_utc(arr, tz=UTC)
tm.assert_numpy_array_equal(result, arr)
assert not np.shares_memory(arr, result)


def test_tz_convert_single_matches_tz_convert_hourly(tz_aware_fixture):
tz = tz_aware_fixture
tz_didx = date_range("2014-03-01", "2015-01-10", freq="H", tz=tz)
Expand Down