Skip to content

Commit 99c4f2c

Browse files
jbrockmendelluckyvs1
authored andcommitted
CLN: use .view(i8) instead of .astype(i8) for datetimelike values (pandas-dev#38535)
1 parent 1db5946 commit 99c4f2c

File tree

7 files changed

+20
-9
lines changed

7 files changed

+20
-9
lines changed

pandas/core/window/ewm.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas.util._decorators import Appender, Substitution, doc
1313

1414
from pandas.core.dtypes.common import is_datetime64_ns_dtype
15+
from pandas.core.dtypes.missing import isna
1516

1617
import pandas.core.common as common
1718
from pandas.core.util.numba_ import maybe_use_numba
@@ -252,7 +253,9 @@ def __init__(
252253
raise ValueError(
253254
"halflife must be a string or datetime.timedelta object"
254255
)
255-
self.times = np.asarray(times.astype(np.int64))
256+
if isna(times).any():
257+
raise ValueError("Cannot convert NaT values to integer")
258+
self.times = np.asarray(times.view(np.int64))
256259
self.halflife = Timedelta(halflife).value
257260
# Halflife is no longer applicable when calculating COM
258261
# But allow COM to still be calculated if the user passes other decay args

pandas/io/formats/format.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1739,7 +1739,7 @@ def get_format_timedelta64(
17391739
17401740
If box, then show the return in quotes
17411741
"""
1742-
values_int = values.astype(np.int64)
1742+
values_int = values.view(np.int64)
17431743

17441744
consider_values = values_int != iNaT
17451745

pandas/io/stata.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -371,15 +371,15 @@ def parse_dates_safe(dates, delta=False, year=False, days=False):
371371
if is_datetime64_dtype(dates.dtype):
372372
if delta:
373373
time_delta = dates - stata_epoch
374-
d["delta"] = time_delta._values.astype(np.int64) // 1000 # microseconds
374+
d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds
375375
if days or year:
376376
date_index = DatetimeIndex(dates)
377377
d["year"] = date_index._data.year
378378
d["month"] = date_index._data.month
379379
if days:
380-
days_in_ns = dates.astype(np.int64) - to_datetime(
380+
days_in_ns = dates.view(np.int64) - to_datetime(
381381
d["year"], format="%Y"
382-
).astype(np.int64)
382+
).view(np.int64)
383383
d["days"] = days_in_ns // NS_PER_DAY
384384

385385
elif infer_dtype(dates, skipna=False) == "datetime":

pandas/tests/indexes/period/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ def test_constructor_simple_new(self):
329329
msg = "Should be numpy array of type i8"
330330
with pytest.raises(AssertionError, match=msg):
331331
# Need ndarray, not Int64Index
332-
type(idx._data)._simple_new(idx.astype("i8"), freq=idx.freq)
332+
type(idx._data)._simple_new(idx._int64index, freq=idx.freq)
333333

334334
arr = type(idx._data)._simple_new(idx.asi8, freq=idx.freq)
335335
result = idx._simple_new(arr, name="p")

pandas/tests/io/json/test_pandas.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def test_frame_non_unique_columns(self, orient, data):
112112
# in milliseconds; these are internally stored in nanosecond,
113113
# so divide to get where we need
114114
# TODO: a to_epoch method would also solve; see GH 14772
115-
expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1000000
115+
expected.iloc[:, 0] = expected.iloc[:, 0].view(np.int64) // 1000000
116116
elif orient == "split":
117117
expected = df
118118

@@ -254,7 +254,7 @@ def test_roundtrip_timestamp(self, orient, convert_axes, numpy, datetime_frame):
254254

255255
if not convert_axes: # one off for ts handling
256256
# DTI gets converted to epoch values
257-
idx = expected.index.astype(np.int64) // 1000000
257+
idx = expected.index.view(np.int64) // 1000000
258258
if orient != "split": # TODO: handle consistently across orients
259259
idx = idx.astype(str)
260260

pandas/tests/io/test_sql.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ def test_timedelta(self):
841841
with tm.assert_produces_warning(UserWarning):
842842
df.to_sql("test_timedelta", self.conn)
843843
result = sql.read_sql_query("SELECT * FROM test_timedelta", self.conn)
844-
tm.assert_series_equal(result["foo"], df["foo"].astype("int64"))
844+
tm.assert_series_equal(result["foo"], df["foo"].view("int64"))
845845

846846
def test_complex_raises(self):
847847
df = DataFrame({"a": [1 + 1j, 2j]})

pandas/tests/window/test_ewm.py

+8
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,11 @@ def test_ewma_with_times_variable_spacing(tz_aware_fixture):
127127
result = df.ewm(halflife=halflife, times=times).mean()
128128
expected = DataFrame([0.0, 0.5674161888241773, 1.545239952073459])
129129
tm.assert_frame_equal(result, expected)
130+
131+
132+
def test_ewm_with_nat_raises(halflife_with_times):
133+
# GH#38535
134+
ser = Series(range(1))
135+
times = DatetimeIndex(["NaT"])
136+
with pytest.raises(ValueError, match="Cannot convert NaT values to integer"):
137+
ser.ewm(com=0.1, halflife=halflife_with_times, times=times)

0 commit comments

Comments
 (0)