CLN: use .view(i8) instead of .astype(i8) for datetimelike values (pandas-dev#38535)

jbrockmendel · luckyvs1 · commit 99c4f2c76260 · 2021-01-19T23:18:35.000-08:00
diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
@@ -12,6 +12,7 @@
 from pandas.util._decorators import Appender, Substitution, doc
 
 from pandas.core.dtypes.common import is_datetime64_ns_dtype
+from pandas.core.dtypes.missing import isna
 
 import pandas.core.common as common
 from pandas.core.util.numba_ import maybe_use_numba
@@ -252,7 +253,9 @@ def __init__(
                 raise ValueError(
                     "halflife must be a string or datetime.timedelta object"
                 )
-            self.times = np.asarray(times.astype(np.int64))
+            if isna(times).any():
+                raise ValueError("Cannot convert NaT values to integer")
+            self.times = np.asarray(times.view(np.int64))
             self.halflife = Timedelta(halflife).value
             # Halflife is no longer applicable when calculating COM
             # But allow COM to still be calculated if the user passes other decay args
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -1739,7 +1739,7 @@ def get_format_timedelta64(
 
     If box, then show the return in quotes
     """
-    values_int = values.astype(np.int64)
+    values_int = values.view(np.int64)
 
     consider_values = values_int != iNaT
 
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -371,15 +371,15 @@ def parse_dates_safe(dates, delta=False, year=False, days=False):
         if is_datetime64_dtype(dates.dtype):
             if delta:
                 time_delta = dates - stata_epoch
-                d["delta"] = time_delta._values.astype(np.int64) // 1000  # microseconds
+                d["delta"] = time_delta._values.view(np.int64) // 1000  # microseconds
             if days or year:
                 date_index = DatetimeIndex(dates)
                 d["year"] = date_index._data.year
                 d["month"] = date_index._data.month
             if days:
-                days_in_ns = dates.astype(np.int64) - to_datetime(
+                days_in_ns = dates.view(np.int64) - to_datetime(
                     d["year"], format="%Y"
-                ).astype(np.int64)
+                ).view(np.int64)
                 d["days"] = days_in_ns // NS_PER_DAY
 
         elif infer_dtype(dates, skipna=False) == "datetime":
diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py
@@ -329,7 +329,7 @@ def test_constructor_simple_new(self):
         msg = "Should be numpy array of type i8"
         with pytest.raises(AssertionError, match=msg):
             # Need ndarray, not Int64Index
-            type(idx._data)._simple_new(idx.astype("i8"), freq=idx.freq)
+            type(idx._data)._simple_new(idx._int64index, freq=idx.freq)
 
         arr = type(idx._data)._simple_new(idx.asi8, freq=idx.freq)
         result = idx._simple_new(arr, name="p")
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -112,7 +112,7 @@ def test_frame_non_unique_columns(self, orient, data):
                 # in milliseconds; these are internally stored in nanosecond,
                 # so divide to get where we need
                 # TODO: a to_epoch method would also solve; see GH 14772
-                expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1000000
+                expected.iloc[:, 0] = expected.iloc[:, 0].view(np.int64) // 1000000
         elif orient == "split":
             expected = df
 
@@ -254,7 +254,7 @@ def test_roundtrip_timestamp(self, orient, convert_axes, numpy, datetime_frame):
 
         if not convert_axes:  # one off for ts handling
             # DTI gets converted to epoch values
-            idx = expected.index.astype(np.int64) // 1000000
+            idx = expected.index.view(np.int64) // 1000000
             if orient != "split":  # TODO: handle consistently across orients
                 idx = idx.astype(str)
 
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
@@ -841,7 +841,7 @@ def test_timedelta(self):
         with tm.assert_produces_warning(UserWarning):
             df.to_sql("test_timedelta", self.conn)
         result = sql.read_sql_query("SELECT * FROM test_timedelta", self.conn)
-        tm.assert_series_equal(result["foo"], df["foo"].astype("int64"))
+        tm.assert_series_equal(result["foo"], df["foo"].view("int64"))
 
     def test_complex_raises(self):
         df = DataFrame({"a": [1 + 1j, 2j]})
diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py
@@ -127,3 +127,11 @@ def test_ewma_with_times_variable_spacing(tz_aware_fixture):
     result = df.ewm(halflife=halflife, times=times).mean()
     expected = DataFrame([0.0, 0.5674161888241773, 1.545239952073459])
     tm.assert_frame_equal(result, expected)
+
+
+def test_ewm_with_nat_raises(halflife_with_times):
+    # GH#38535
+    ser = Series(range(1))
+    times = DatetimeIndex(["NaT"])
+    with pytest.raises(ValueError, match="Cannot convert NaT values to integer"):
+        ser.ewm(com=0.1, halflife=halflife_with_times, times=times)