From 6a23a3cf95ee7e630de88835aa9bfd757fa73b58 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 16 Dec 2020 19:05:07 -0800 Subject: [PATCH 1/2] CLN: use .view(i8) instead of .astype(i8) --- pandas/core/window/ewm.py | 5 ++++- pandas/io/formats/format.py | 2 +- pandas/io/stata.py | 6 +++--- pandas/tests/indexes/period/test_constructors.py | 2 +- pandas/tests/io/json/test_pandas.py | 4 ++-- pandas/tests/io/test_sql.py | 2 +- 6 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index f8237a436f436..0fc6c8a23f5f2 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -12,6 +12,7 @@ from pandas.util._decorators import Appender, Substitution, doc from pandas.core.dtypes.common import is_datetime64_ns_dtype +from pandas.core.dtypes.missing import isna import pandas.core.common as common from pandas.core.util.numba_ import maybe_use_numba @@ -252,7 +253,9 @@ def __init__( raise ValueError( "halflife must be a string or datetime.timedelta object" ) - self.times = np.asarray(times.astype(np.int64)) + if isna(times).any(): + raise ValueError("Cannot convert NaT values to integer") + self.times = np.asarray(times.view(np.int64)) self.halflife = Timedelta(halflife).value # Halflife is no longer applicable when calculating COM # But allow COM to still be calculated if the user passes other decay args diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 527ee51873631..2620c562aefeb 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1739,7 +1739,7 @@ def get_format_timedelta64( If box, then show the return in quotes """ - values_int = values.astype(np.int64) + values_int = values.view(np.int64) consider_values = values_int != iNaT diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 853a982536d40..88485f99c07aa 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -371,15 +371,15 @@ def parse_dates_safe(dates, delta=False, year=False, days=False): if is_datetime64_dtype(dates.dtype): if delta: time_delta = dates - stata_epoch - d["delta"] = time_delta._values.astype(np.int64) // 1000 # microseconds + d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds if days or year: date_index = DatetimeIndex(dates) d["year"] = date_index._data.year d["month"] = date_index._data.month if days: - days_in_ns = dates.astype(np.int64) - to_datetime( + days_in_ns = dates.view(np.int64) - to_datetime( d["year"], format="%Y" - ).astype(np.int64) + ).view(np.int64) d["days"] = days_in_ns // NS_PER_DAY elif infer_dtype(dates, skipna=False) == "datetime": diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py index 678967db72a0b..75c8c766b0e67 100644 --- a/pandas/tests/indexes/period/test_constructors.py +++ b/pandas/tests/indexes/period/test_constructors.py @@ -329,7 +329,7 @@ def test_constructor_simple_new(self): msg = "Should be numpy array of type i8" with pytest.raises(AssertionError, match=msg): # Need ndarray, not Int64Index - type(idx._data)._simple_new(idx.astype("i8"), freq=idx.freq) + type(idx._data)._simple_new(idx._int64index, freq=idx.freq) arr = type(idx._data)._simple_new(idx.asi8, freq=idx.freq) result = idx._simple_new(arr, name="p") diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index ce95eb59ed3c4..8f9b6699503ee 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -112,7 +112,7 @@ def test_frame_non_unique_columns(self, orient, data): # in milliseconds; these are internally stored in nanosecond, # so divide to get where we need # TODO: a to_epoch method would also solve; see GH 14772 - expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1000000 + expected.iloc[:, 0] = expected.iloc[:, 0].view(np.int64) // 1000000 elif orient == "split": expected = df @@ -254,7 +254,7 @@ def test_roundtrip_timestamp(self, orient, convert_axes, numpy, datetime_frame): if not convert_axes: # one off for ts handling # DTI gets converted to epoch values - idx = expected.index.astype(np.int64) // 1000000 + idx = expected.index.view(np.int64) // 1000000 if orient != "split": # TODO: handle consistently across orients idx = idx.astype(str) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 497039de99196..4442d47c9f535 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -841,7 +841,7 @@ def test_timedelta(self): with tm.assert_produces_warning(UserWarning): df.to_sql("test_timedelta", self.conn) result = sql.read_sql_query("SELECT * FROM test_timedelta", self.conn) - tm.assert_series_equal(result["foo"], df["foo"].astype("int64")) + tm.assert_series_equal(result["foo"], df["foo"].view("int64")) def test_complex_raises(self): df = DataFrame({"a": [1 + 1j, 2j]}) From 8322bc0bf6cdf7a12fef0e1b359e3baa4c462771 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Dec 2020 20:12:37 -0800 Subject: [PATCH 2/2] test for ewm --- pandas/tests/window/test_ewm.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index c026f52e94482..6b57d2f55e4ff 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -127,3 +127,11 @@ def test_ewma_with_times_variable_spacing(tz_aware_fixture): result = df.ewm(halflife=halflife, times=times).mean() expected = DataFrame([0.0, 0.5674161888241773, 1.545239952073459]) tm.assert_frame_equal(result, expected) + + +def test_ewm_with_nat_raises(halflife_with_times): + # GH#38535 + ser = Series(range(1)) + times = DatetimeIndex(["NaT"]) + with pytest.raises(ValueError, match="Cannot convert NaT values to integer"): + ser.ewm(com=0.1, halflife=halflife_with_times, times=times)