From 29e55c67d22c36718f93c408919b2bf49edd3cde Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 22 Oct 2023 19:11:14 -0700 Subject: [PATCH 001/105] ENH: read_stata return non-nano --- doc/source/whatsnew/v2.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 9d29044c27833..36b5d6222d3e4 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -350,6 +350,7 @@ Other enhancements - Improved error message that appears in :meth:`DatetimeIndex.to_period` with frequencies which are not supported as period frequencies, such as ``"BMS"`` (:issue:`56243`) - Improved error message when constructing :class:`Period` with invalid offsets such as ``"QS"`` (:issue:`55785`) - The dtypes ``string[pyarrow]`` and ``string[pyarrow_numpy]`` now both utilize the ``large_string`` type from PyArrow to avoid overflow for long columns (:issue:`56259`) +- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`??`) .. --------------------------------------------------------------------------- .. _whatsnew_220.notable_bug_fixes: From d11a67c3d6211994f17c684763df21d09dbbd74a Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 22 Oct 2023 19:13:23 -0700 Subject: [PATCH 002/105] GH ref --- doc/source/whatsnew/v2.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 36b5d6222d3e4..91ab06747f83e 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -339,6 +339,7 @@ Other enhancements - :func:`read_spss` now returns a :class:`DataFrame` that stores the metadata in :attr:`DataFrame.attrs` (:issue:`54264`) - :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`) - :meth:`DataFrame.apply` now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) +- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) - :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area``; 3rd party :class:`.ExtensionArray` authors need to add this argument to the method ``_pad_or_backfill`` (:issue:`56492`) From dc53920ad098f8f98211a5463f81d3c2628da21f Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 24 Jan 2024 10:27:46 -0800 Subject: [PATCH 003/105] move whatsnew --- doc/source/whatsnew/v2.2.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 91ab06747f83e..36b5d6222d3e4 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -339,7 +339,6 @@ Other enhancements - :func:`read_spss` now returns a :class:`DataFrame` that stores the metadata in :attr:`DataFrame.attrs` (:issue:`54264`) - :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`) - :meth:`DataFrame.apply` now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) -- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) - :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area``; 3rd party :class:`.ExtensionArray` authors need to add this argument to the method ``_pad_or_backfill`` (:issue:`56492`) From 1bf05fa52658cada2b0aa3d98e22fd18b072537d Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 25 Jan 2024 18:55:38 -0800 Subject: [PATCH 004/105] remove outdated whatsnew --- doc/source/whatsnew/v2.2.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 36b5d6222d3e4..9d29044c27833 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -350,7 +350,6 @@ Other enhancements - Improved error message that appears in :meth:`DatetimeIndex.to_period` with frequencies which are not supported as period frequencies, such as ``"BMS"`` (:issue:`56243`) - Improved error message when constructing :class:`Period` with invalid offsets such as ``"QS"`` (:issue:`55785`) - The dtypes ``string[pyarrow]`` and ``string[pyarrow_numpy]`` now both utilize the ``large_string`` type from PyArrow to avoid overflow for long columns (:issue:`56259`) -- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`??`) .. --------------------------------------------------------------------------- .. _whatsnew_220.notable_bug_fixes: From 4371b178853ea12c26fe6444b6e5e228319d47c4 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 22 Oct 2023 19:11:14 -0700 Subject: [PATCH 005/105] ENH: read_stata return non-nano --- pandas/io/stata.py | 19 +++++++++---------- pandas/tests/io/test_stata.py | 4 +--- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 37ea940b3938a..170cdd39eb76d 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -284,14 +284,14 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series: if fmt.startswith(("%tc", "tc")): # Delta ms relative to base td = np.timedelta64(stata_epoch - unix_epoch, "ms") - res = np.array(dates._values, dtype="M8[ms]") + td - return Series(res, index=dates.index) + conv_dates = np.array(dates._values, dtype="M8[ms]") + td + return Series(conv_dates, index=dates.index) elif fmt.startswith(("%td", "td", "%d", "d")): # Delta days relative to base td = np.timedelta64(stata_epoch - unix_epoch, "D") - res = np.array(dates._values, dtype="M8[D]") + td - return Series(res, index=dates.index) + conv_dates = np.array(dates._values, dtype="M8[D]") + td + return Series(conv_dates, index=dates.index) elif fmt.startswith(("%tm", "tm")): # Delta months relative to base @@ -338,12 +338,11 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series: elif fmt.startswith(("%tw", "tw")): year = stata_epoch.year + dates // 52 days = (dates % 52) * 7 - per_y = (year - 1970).array.view("Period[Y]") - per_d = per_y.asfreq("D", how="S") - per_d_shifted = per_d + days._values - per_s = per_d_shifted.asfreq("s", how="S") - conv_dates_arr = per_s.view("M8[s]") - conv_dates = Series(conv_dates_arr, index=dates.index) + per_y = (year - 1970).view("Period[Y]") + per_d = per_y.dt.asfreq("D", how="S") + per_d_shifted = per_d + days + per_s = per_d_shifted.dt.asfreq("s", how="S") + conv_dates = per_s.view("M8[s]") else: raise ValueError(f"Date fmt {fmt} not understood") diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 42a9e84218a81..0d55bd48263c2 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -181,9 +181,7 @@ def test_read_dta2(self, datapath): expected["monthly_date"] = expected["monthly_date"].astype("M8[s]") expected["quarterly_date"] = expected["quarterly_date"].astype("M8[s]") expected["half_yearly_date"] = expected["half_yearly_date"].astype("M8[s]") - expected["yearly_date"] = ( - expected["yearly_date"].astype("Period[s]").array.view("M8[s]") - ) + expected["yearly_date"] = expected["yearly_date"].astype("M8[s]") path1 = datapath("io", "data", "stata", "stata2_114.dta") path2 = datapath("io", "data", "stata", "stata2_115.dta") From 9b37cd39bc42a58568ea5432c57d4bf845e044ac Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 30 Nov 2023 16:29:23 -0800 Subject: [PATCH 006/105] avoid Series.view --- pandas/io/stata.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 170cdd39eb76d..45d65c5bd505c 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -338,11 +338,12 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series: elif fmt.startswith(("%tw", "tw")): year = stata_epoch.year + dates // 52 days = (dates % 52) * 7 - per_y = (year - 1970).view("Period[Y]") - per_d = per_y.dt.asfreq("D", how="S") + per_y = (year - 1970)._values.view("Period[Y]") + per_d = per_y.asfreq("D", how="S") per_d_shifted = per_d + days - per_s = per_d_shifted.dt.asfreq("s", how="S") + per_s = per_d_shifted.asfreq("s", how="S") conv_dates = per_s.view("M8[s]") + conv_dates = Series(conv_dates, index=dates.index) else: raise ValueError(f"Date fmt {fmt} not understood") From 04a9a7d6d6a35b4cbfa9f9bd4afd0a7809fd8767 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 1 Dec 2023 08:32:45 -0800 Subject: [PATCH 007/105] dont go through Series --- pandas/io/stata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 45d65c5bd505c..347f9e40e37de 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -338,9 +338,9 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series: elif fmt.startswith(("%tw", "tw")): year = stata_epoch.year + dates // 52 days = (dates % 52) * 7 - per_y = (year - 1970)._values.view("Period[Y]") + per_y = (year - 1970).array.view("Period[Y]") per_d = per_y.asfreq("D", how="S") - per_d_shifted = per_d + days + per_d_shifted = per_d + days._values per_s = per_d_shifted.asfreq("s", how="S") conv_dates = per_s.view("M8[s]") conv_dates = Series(conv_dates, index=dates.index) From ef0de5abfc1d36d7ee423a8bd779ba132a2721a6 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 30 Nov 2023 08:31:14 -0800 Subject: [PATCH 008/105] TST: dt64 units --- pandas/tests/io/excel/test_writers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index ca5c98f49f09c..a8024fc44ddac 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -560,6 +560,7 @@ def test_sheets(self, frame, tmp_excel): columns=Index(list("ABCD")), index=date_range("2000-01-01", periods=5, freq="B"), ) + index = pd.DatetimeIndex(np.asarray(tsframe.index), freq=None) tsframe.index = index From 8a0eab1e6129bdc549fcf6730f3b511829feeb05 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Nov 2023 13:10:25 -0800 Subject: [PATCH 009/105] BUG: cut with non-nano --- pandas/core/reshape/tile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 82c697306edb2..7eb22fe83f58c 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -392,7 +392,6 @@ def _nbins_to_bins(x_idx: Index, nbins: int, right: bool) -> Index: else: # adjust end points after binning if _is_dt_or_td(x_idx.dtype): # Use DatetimeArray/TimedeltaArray method instead of linspace - # error: Argument 1 to "dtype_to_unit" has incompatible type # "dtype[Any] | ExtensionDtype"; expected "DatetimeTZDtype | dtype[Any]" unit = dtype_to_unit(x_idx.dtype) # type: ignore[arg-type] From 1c00ecfa08b9de5f3926199b6c931e05c78cf437 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 24 Nov 2023 15:07:39 -0800 Subject: [PATCH 010/105] BUG: round with non-nanosecond raising OverflowError --- pandas/_libs/tslibs/fields.pyx | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index ff4fb4d635d17..a375d07db9537 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -749,7 +749,7 @@ cdef ndarray[int64_t] _rounddown_int64(values, int64_t unit): cdef: Py_ssize_t i, n = len(values) ndarray[int64_t] result = np.empty(n, dtype="i8") - int64_t res, value, remainder, half + int64_t res, value, half, remainder, quotient half = unit // 2 @@ -761,15 +761,18 @@ cdef ndarray[int64_t] _rounddown_int64(values, int64_t unit): res = NPY_NAT else: # This adjustment is the only difference between rounddown_int64 - # and _ceil_int64 - value = value - half - remainder = value % unit - if remainder == 0: - res = value - else: + # and _round_nearest_int64 + value = value - unit // 2 + quotient, remainder = divmod(value, unit) + if remainder > half: + res = value + (unit - remainder) + elif remainder == half and quotient % 2: res = value + (unit - remainder) + else: + res = value - remainder result[i] = res + return result From 9b1a937fb4cc2448a37161c2e3ed111561e9d3ca Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 24 Nov 2023 18:35:03 -0800 Subject: [PATCH 011/105] woops --- pandas/_libs/tslibs/fields.pyx | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index a375d07db9537..ff4fb4d635d17 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -749,7 +749,7 @@ cdef ndarray[int64_t] _rounddown_int64(values, int64_t unit): cdef: Py_ssize_t i, n = len(values) ndarray[int64_t] result = np.empty(n, dtype="i8") - int64_t res, value, half, remainder, quotient + int64_t res, value, remainder, half half = unit // 2 @@ -761,18 +761,15 @@ cdef ndarray[int64_t] _rounddown_int64(values, int64_t unit): res = NPY_NAT else: # This adjustment is the only difference between rounddown_int64 - # and _round_nearest_int64 - value = value - unit // 2 - quotient, remainder = divmod(value, unit) - if remainder > half: - res = value + (unit - remainder) - elif remainder == half and quotient % 2: - res = value + (unit - remainder) + # and _ceil_int64 + value = value - half + remainder = value % unit + if remainder == 0: + res = value else: - res = value - remainder + res = value + (unit - remainder) result[i] = res - return result From 6f6936d48fbae2359d0c6075f6135625f1256393 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Nov 2023 13:10:25 -0800 Subject: [PATCH 012/105] BUG: cut with non-nano --- pandas/core/reshape/tile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 7eb22fe83f58c..d6b9b6349cf34 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -397,6 +397,7 @@ def _nbins_to_bins(x_idx: Index, nbins: int, right: bool) -> Index: unit = dtype_to_unit(x_idx.dtype) # type: ignore[arg-type] # error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]" # has no attribute "_generate_range" + unit = dtype_to_unit(x_idx.dtype) bins = x_idx._values._generate_range( # type: ignore[union-attr] start=mn, end=mx, periods=nbins + 1, freq=None, unit=unit ) From d08f6169d8e7e58a62fdead694fe632cb34e46bf Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 16:01:42 -0800 Subject: [PATCH 013/105] TST: parametrize tests over dt64 unit --- pandas/tests/util/test_hashing.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index a54e0071aa006..e654534ccd453 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -260,14 +260,14 @@ def test_categorical_consistency(s1, categorize): tm.assert_series_equal(h1, h3) -def test_categorical_with_nan_consistency(): - c = pd.Categorical.from_codes( - [-1, 0, 1, 2, 3, 4], categories=pd.date_range("2012-01-01", periods=5, name="B") - ) - expected = hash_array(c, categorize=False) - - c = pd.Categorical.from_codes([-1, 0], categories=[pd.Timestamp("2012-01-01")]) - result = hash_array(c, categorize=False) +def test_categorical_with_nan_consistency(unit): + dti = pd.date_range("2012-01-01", periods=5, name="B", unit=unit) + cat = pd.Categorical.from_codes([-1, 0, 1, 2, 3, 4], categories=dti) + expected = hash_array(cat, categorize=False) + + ts = pd.Timestamp("2012-01-01").as_unit(unit) + cat2 = pd.Categorical.from_codes([-1, 0], categories=[ts]) + result = hash_array(cat2, categorize=False) assert result[0] in expected assert result[1] in expected From b66ea82294f143897a1c335d55257ccde9df8340 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 15 Nov 2023 11:25:17 -0800 Subject: [PATCH 014/105] xfail non-nano --- pandas/tests/window/test_ewm.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 35c896dc0090b..2c9f123f1f308 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -102,6 +102,30 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "unit", + [ + pytest.param( + "s", + marks=pytest.mark.xfail( + reason="ExponentialMovingWindow constructor raises on non-nano" + ), + ), + pytest.param( + "ms", + marks=pytest.mark.xfail( + reason="ExponentialMovingWindow constructor raises on non-nano" + ), + ), + pytest.param( + "us", + marks=pytest.mark.xfail( + reason="ExponentialMovingWindow constructor raises on non-nano" + ), + ), + "ns", + ], +) def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit): tz = tz_aware_fixture halflife = "23 days" From a23e319b2ff76806bd7ff6225bf550e1716c40ec Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 15 Nov 2023 13:12:33 -0800 Subject: [PATCH 015/105] revert --- pandas/tests/util/test_hashing.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index e654534ccd453..a54e0071aa006 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -260,14 +260,14 @@ def test_categorical_consistency(s1, categorize): tm.assert_series_equal(h1, h3) -def test_categorical_with_nan_consistency(unit): - dti = pd.date_range("2012-01-01", periods=5, name="B", unit=unit) - cat = pd.Categorical.from_codes([-1, 0, 1, 2, 3, 4], categories=dti) - expected = hash_array(cat, categorize=False) - - ts = pd.Timestamp("2012-01-01").as_unit(unit) - cat2 = pd.Categorical.from_codes([-1, 0], categories=[ts]) - result = hash_array(cat2, categorize=False) +def test_categorical_with_nan_consistency(): + c = pd.Categorical.from_codes( + [-1, 0, 1, 2, 3, 4], categories=pd.date_range("2012-01-01", periods=5, name="B") + ) + expected = hash_array(c, categorize=False) + + c = pd.Categorical.from_codes([-1, 0], categories=[pd.Timestamp("2012-01-01")]) + result = hash_array(c, categorize=False) assert result[0] in expected assert result[1] in expected From c9e93d6672c3bb77fb1eb3b8cec29cec7b1b73ae Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Nov 2023 09:30:07 -0700 Subject: [PATCH 016/105] BUG: mixed-type mixed-timezone/awareness --- pandas/_libs/tslibs/strptime.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index cd2475830b013..9d77387dde87b 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -253,7 +253,6 @@ cdef class DatetimeParseState: # datetime. self.found_naive_str = False self.found_other = False - self.creso = creso self.creso_ever_changed = False From ffcdd1a38dda547d023911c3f4de28d084e9dce5 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 9 Nov 2023 08:29:17 -0800 Subject: [PATCH 017/105] commit so i can unstash something else i hope --- pandas/_libs/tslib.pyx | 2 +- pandas/tests/tslibs/test_array_to_datetime.py | 60 +++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a09f4321c0d3c..6ffabe69d5837 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -533,7 +533,7 @@ cpdef array_to_datetime( # GH#32264 np.str_ object val = str(val) - if parse_today_now(val, &iresult[i], utc, creso): + if parse_today_now(val, &iresult[i], utc, creso, infer_reso=infer_reso): # We can't _quite_ dispatch this to convert_str_to_tsobject # bc there isn't a nice way to pass "utc" item_reso = NPY_DATETIMEUNIT.NPY_FR_us diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 30ea3a70552aa..d9bec358858cf 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -277,3 +277,63 @@ def test_datetime_subclass(klass): expected = np.array(["2000-01-01T00:00:00.000000000"], dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected) + + +class TestArrayToDatetimeResolutionInference: + # TODO: tests that include tzs, ints + + def test_infer_homogeoneous_datetimes(self): + dt = datetime(2023, 10, 27, 18, 3, 5, 678000) + arr = np.array([dt, dt, dt], dtype=object) + result, tz = tslib.array_to_datetime(arr, creso=creso_infer) + assert tz is None + expected = np.array([dt, dt, dt], dtype="M8[us]") + tm.assert_numpy_array_equal(result, expected) + + def test_infer_homogeoneous_date_objects(self): + dt = datetime(2023, 10, 27, 18, 3, 5, 678000) + dt2 = dt.date() + arr = np.array([None, dt2, dt2, dt2], dtype=object) + result, tz = tslib.array_to_datetime(arr, creso=creso_infer) + assert tz is None + expected = np.array([np.datetime64("NaT"), dt2, dt2, dt2], dtype="M8[s]") + tm.assert_numpy_array_equal(result, expected) + + def test_infer_homogeoneous_dt64(self): + dt = datetime(2023, 10, 27, 18, 3, 5, 678000) + dt64 = np.datetime64(dt, "ms") + arr = np.array([None, dt64, dt64, dt64], dtype=object) + result, tz = tslib.array_to_datetime(arr, creso=creso_infer) + assert tz is None + expected = np.array([np.datetime64("NaT"), dt64, dt64, dt64], dtype="M8[ms]") + tm.assert_numpy_array_equal(result, expected) + + def test_infer_homogeoneous_timestamps(self): + dt = datetime(2023, 10, 27, 18, 3, 5, 678000) + ts = Timestamp(dt).as_unit("ns") + arr = np.array([None, ts, ts, ts], dtype=object) + result, tz = tslib.array_to_datetime(arr, creso=creso_infer) + assert tz is None + expected = np.array([np.datetime64("NaT")] + [ts.asm8] * 3, dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + def test_infer_homogeoneous_datetimes_strings(self): + item = "2023-10-27 18:03:05.678000" + arr = np.array([None, item, item, item], dtype=object) + result, tz = tslib.array_to_datetime(arr, creso=creso_infer) + assert tz is None + expected = np.array([np.datetime64("NaT"), item, item, item], dtype="M8[us]") + tm.assert_numpy_array_equal(result, expected) + + def test_infer_heterogeneous(self): + dtstr = "2023-10-27 18:03:05.678000" + + arr = np.array([dtstr, dtstr[:-3], dtstr[:-7], None], dtype=object) + result, tz = tslib.array_to_datetime(arr, creso=creso_infer) + assert tz is None + expected = np.array(arr, dtype="M8[us]") + tm.assert_numpy_array_equal(result, expected) + + result, tz = tslib.array_to_datetime(arr[::-1], creso=creso_infer) + assert tz is None + tm.assert_numpy_array_equal(result, expected[::-1]) From b1e587e9ad8eb9a0150c2708dc733c816512ea24 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 9 Nov 2023 14:00:53 -0800 Subject: [PATCH 018/105] ENH: infer resolution in to_datetime, DatetimeIndex --- pandas/_libs/lib.pyx | 10 +- pandas/_libs/tslib.pyx | 15 +- pandas/_libs/tslibs/strptime.pyx | 4 +- pandas/_testing/asserters.py | 3 + pandas/core/arrays/datetimes.py | 42 ++-- pandas/core/series.py | 5 +- pandas/core/window/ewm.py | 1 + pandas/tests/arrays/test_array.py | 6 +- pandas/tests/base/test_constructors.py | 6 +- pandas/tests/base/test_conversion.py | 4 +- pandas/tests/dtypes/test_inference.py | 6 +- .../frame/constructors/test_from_records.py | 2 +- pandas/tests/frame/indexing/test_setitem.py | 3 +- pandas/tests/frame/methods/test_asfreq.py | 5 +- .../tests/frame/methods/test_combine_first.py | 29 ++- .../tests/frame/methods/test_infer_objects.py | 2 +- pandas/tests/frame/methods/test_map.py | 2 +- pandas/tests/frame/methods/test_replace.py | 4 +- .../tests/frame/methods/test_reset_index.py | 19 +- pandas/tests/frame/methods/test_to_csv.py | 46 ++-- pandas/tests/frame/test_arithmetic.py | 1 + pandas/tests/frame/test_constructors.py | 70 ++++-- pandas/tests/groupby/test_apply.py | 4 +- pandas/tests/groupby/test_timegrouper.py | 31 ++- .../tests/groupby/transform/test_transform.py | 2 +- .../indexes/datetimes/methods/test_astype.py | 2 +- .../indexes/datetimes/test_constructors.py | 93 ++++--- .../indexes/datetimes/test_date_range.py | 2 +- .../tests/indexes/datetimes/test_timezones.py | 2 +- .../indexes/interval/test_interval_range.py | 12 +- pandas/tests/indexes/test_base.py | 2 +- pandas/tests/indexing/multiindex/test_loc.py | 2 +- .../tests/indexing/multiindex/test_setitem.py | 10 +- pandas/tests/indexing/test_coercion.py | 20 +- pandas/tests/indexing/test_loc.py | 9 +- pandas/tests/indexing/test_partial.py | 2 +- pandas/tests/io/excel/test_readers.py | 21 +- pandas/tests/io/excel/test_writers.py | 14 +- pandas/tests/io/json/test_pandas.py | 9 +- .../io/parser/common/test_common_basic.py | 11 +- pandas/tests/io/parser/common/test_index.py | 1 + pandas/tests/io/parser/test_parse_dates.py | 92 ++++--- pandas/tests/io/parser/test_read_fwf.py | 3 +- pandas/tests/io/parser/test_skiprows.py | 4 +- .../io/parser/usecols/test_parse_dates.py | 4 +- pandas/tests/io/pytables/test_store.py | 2 +- pandas/tests/io/test_fsspec.py | 16 +- pandas/tests/io/test_gcs.py | 6 +- pandas/tests/io/test_html.py | 16 +- pandas/tests/io/test_orc.py | 2 + pandas/tests/io/test_parquet.py | 28 +++ pandas/tests/io/test_sql.py | 25 +- pandas/tests/io/test_stata.py | 20 +- pandas/tests/resample/test_datetime_index.py | 2 + pandas/tests/resample/test_period_index.py | 2 +- .../tests/resample/test_resampler_grouper.py | 15 ++ pandas/tests/resample/test_time_grouper.py | 5 +- .../reshape/concat/test_append_common.py | 4 +- pandas/tests/reshape/concat/test_datetimes.py | 7 +- pandas/tests/reshape/merge/test_merge_asof.py | 2 + pandas/tests/reshape/test_cut.py | 39 ++- pandas/tests/reshape/test_pivot.py | 2 + pandas/tests/reshape/test_qcut.py | 2 +- pandas/tests/scalar/test_nat.py | 4 +- .../series/accessors/test_dt_accessor.py | 2 +- pandas/tests/series/indexing/test_setitem.py | 2 +- pandas/tests/series/methods/test_astype.py | 2 +- .../series/methods/test_combine_first.py | 2 +- pandas/tests/series/methods/test_fillna.py | 2 +- pandas/tests/series/methods/test_map.py | 4 +- pandas/tests/series/methods/test_to_csv.py | 5 +- .../tests/series/methods/test_value_counts.py | 2 +- pandas/tests/series/test_constructors.py | 24 +- pandas/tests/tools/test_to_datetime.py | 228 +++++++++++------- pandas/tests/tseries/holiday/test_holiday.py | 2 +- pandas/tests/tslibs/test_array_to_datetime.py | 61 +++-- pandas/tests/util/test_hashing.py | 4 +- pandas/tests/window/test_groupby.py | 10 +- pandas/tests/window/test_rolling.py | 16 +- pandas/tests/window/test_timeseries_window.py | 16 +- 80 files changed, 778 insertions(+), 440 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 00668576d5d53..133c63ca3fb8b 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -96,16 +96,12 @@ from pandas._libs.missing cimport ( is_null_datetime64, is_null_timedelta64, ) -from pandas._libs.tslibs.conversion cimport ( - _TSObject, - convert_to_tsobject, -) +from pandas._libs.tslibs.conversion cimport convert_to_tsobject from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_NaT as NaT, checknull_with_nat, ) -from pandas._libs.tslibs.np_datetime cimport NPY_FR_ns from pandas._libs.tslibs.offsets cimport is_offset_object from pandas._libs.tslibs.period cimport is_period_object from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64 @@ -2487,7 +2483,6 @@ def maybe_convert_objects(ndarray[object] objects, ndarray[uint8_t] mask Seen seen = Seen() object val - _TSObject tsobj float64_t fnan = NaN if dtype_if_all_nat is not None: @@ -2594,8 +2589,7 @@ def maybe_convert_objects(ndarray[object] objects, else: seen.datetime_ = True try: - tsobj = convert_to_tsobject(val, None, None, 0, 0) - tsobj.ensure_reso(NPY_FR_ns) + convert_to_tsobject(val, None, None, 0, 0) except OutOfBoundsDatetime: # e.g. test_out_of_s_bounds_datetime64 seen.object_ = True diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 6ffabe69d5837..38cbc5d561ca3 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -67,7 +67,10 @@ from pandas._libs.tslibs.conversion cimport ( get_datetime64_nanos, parse_pydatetime, ) -from pandas._libs.tslibs.dtypes cimport npy_unit_to_abbrev +from pandas._libs.tslibs.dtypes cimport ( + get_supported_reso, + npy_unit_to_abbrev, +) from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_NaT as NaT, @@ -410,7 +413,7 @@ cpdef array_to_datetime( bint dayfirst=False, bint yearfirst=False, bint utc=False, - NPY_DATETIMEUNIT creso=NPY_FR_ns, + NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_GENERIC, ): """ Converts a 1D array of date-like values to a numpy array of either: @@ -437,8 +440,8 @@ cpdef array_to_datetime( yearfirst parsing behavior when encountering datetime strings utc : bool, default False indicator whether the dates should be UTC - creso : NPY_DATETIMEUNIT, default NPY_FR_ns - Set to NPY_FR_GENERIC to infer a resolution. + creso : NPY_DATETIMEUNIT, default NPY_FR_GENERIC + If NPY_FR_GENERIC, conduct inference. Returns ------- @@ -710,7 +713,9 @@ def array_to_datetime_with_tz( if state.creso_ever_changed: # We encountered mismatched resolutions, need to re-parse with # the correct one. - return array_to_datetime_with_tz(values, tz=tz, creso=creso) + return array_to_datetime_with_tz( + values, tz=tz, dayfirst=dayfirst, yearfirst=yearfirst, creso=creso + ) elif creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: # i.e. we never encountered anything non-NaT, default to "s". This # ensures that insert and concat-like operations with NaT diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 9d77387dde87b..fdef43e665327 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -298,7 +298,7 @@ def array_strptime( bint exact=True, errors="raise", bint utc=False, - NPY_DATETIMEUNIT creso=NPY_FR_ns, + NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_GENERIC, ): """ Calculates the datetime structs represented by the passed array of strings @@ -684,7 +684,7 @@ cdef tzinfo _parse_with_format( elif len(s) <= 6: item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_us else: - item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_ns + item_reso[0] = NPY_FR_ns # Pad to always return nanoseconds s += "0" * (9 - len(s)) us = int(s) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 3aacd3099c334..cb4aa1c764ef7 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1010,6 +1010,9 @@ def assert_series_equal( # datetimelike may have different objects (e.g. datetime.datetime # vs Timestamp) but will compare equal + # TODO: this works for object-vs-dt64 but not e.g. dt64[ns] vs dt64[us], + # which AFAICT would have been intended at the time + # check_datetimelike_compat was implemented, xref GH#55638 if not Index(left._values).equals(Index(right._values)): msg = ( f"[datetimelike_compat=True] {left._values} " diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 29681539d146b..c29117bd73a0a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -603,7 +603,7 @@ def tz(self) -> tzinfo | None: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[ns, UTC] + dtype: datetime64[s, UTC] >>> s.dt.tz datetime.timezone.utc @@ -1045,7 +1045,7 @@ def tz_localize( 4 2018-10-28 02:30:00+01:00 5 2018-10-28 03:00:00+01:00 6 2018-10-28 03:30:00+01:00 - dtype: datetime64[ns, CET] + dtype: datetime64[s, CET] In some cases, inferring the DST is impossible. In such cases, you can pass an ndarray to the ambiguous parameter to set the DST explicitly @@ -1057,7 +1057,7 @@ def tz_localize( 0 2018-10-28 01:20:00+02:00 1 2018-10-28 02:36:00+02:00 2 2018-10-28 03:46:00+01:00 - dtype: datetime64[ns, CET] + dtype: datetime64[s, CET] If the DST transition causes nonexistent times, you can shift these dates forward or backwards with a timedelta object or `'shift_forward'` @@ -1068,17 +1068,17 @@ def tz_localize( >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') 0 2015-03-29 03:00:00+02:00 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, Europe/Warsaw] + dtype: datetime64[s, Europe/Warsaw] >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') 0 2015-03-29 01:59:59.999999999+01:00 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, Europe/Warsaw] + dtype: datetime64[s, Europe/Warsaw] >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1h')) 0 2015-03-29 03:30:00+02:00 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[ns, Europe/Warsaw] + dtype: datetime64[s, Europe/Warsaw] """ nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") if nonexistent not in nonexistent_options and not isinstance( @@ -1402,7 +1402,7 @@ def time(self) -> npt.NDArray[np.object_]: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[ns, UTC] + dtype: datetime64[s, UTC] >>> s.dt.time 0 10:00:00 1 11:00:00 @@ -1439,7 +1439,7 @@ def timetz(self) -> npt.NDArray[np.object_]: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[ns, UTC] + dtype: datetime64[s, UTC] >>> s.dt.timetz 0 10:00:00+00:00 1 11:00:00+00:00 @@ -1473,7 +1473,7 @@ def date(self) -> npt.NDArray[np.object_]: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[ns, UTC] + dtype: datetime64[s, UTC] >>> s.dt.date 0 2020-01-01 1 2020-02-01 @@ -1774,7 +1774,7 @@ def isocalendar(self) -> DataFrame: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[ns, UTC] + dtype: datetime64[s, UTC] >>> s.dt.dayofyear 0 1 1 32 @@ -1804,7 +1804,7 @@ def isocalendar(self) -> DataFrame: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-04-01 11:00:00+00:00 - dtype: datetime64[ns, UTC] + dtype: datetime64[s, UTC] >>> s.dt.quarter 0 1 1 2 @@ -1831,7 +1831,7 @@ def isocalendar(self) -> DataFrame: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[ns, UTC] + dtype: datetime64[s, UTC] >>> s.dt.daysinmonth 0 31 1 29 @@ -2246,9 +2246,11 @@ def _sequence_to_dt64( data, copy = maybe_convert_dtype(data, copy, tz=tz) data_dtype = getattr(data, "dtype", None) - if out_unit is None: - out_unit = "ns" - out_dtype = np.dtype(f"M8[{out_unit}]") + out_dtype = DT64NS_DTYPE + out_reso = abbrev_to_npy_unit(None) # NPY_FR_GENERIC + if out_unit is not None: + out_dtype = np.dtype(f"M8[{out_unit}]") + out_reso = abbrev_to_npy_unit(out_unit) if data_dtype == object or is_string_dtype(data_dtype): # TODO: We do not have tests specific to string-dtypes, @@ -2274,7 +2276,7 @@ def _sequence_to_dt64( dayfirst=dayfirst, yearfirst=yearfirst, allow_object=False, - out_unit=out_unit or "ns", + out_reso=out_reso, ) copy = False if tz and inferred_tz: @@ -2382,7 +2384,7 @@ def objects_to_datetime64( utc: bool = False, errors: DateTimeErrorChoices = "raise", allow_object: bool = False, - out_unit: str = "ns", + out_reso: int = 14, ) -> tuple[np.ndarray, tzinfo | None]: """ Convert data to array of timestamps. @@ -2398,7 +2400,9 @@ def objects_to_datetime64( allow_object : bool Whether to return an object-dtype ndarray instead of raising if the data contains more than one timezone. - out_unit : str, default "ns" + out_reso : int, default 14 + 14 corresponds to NPY_FR_GENERIC, which indicates to infer + a resolution. Returns ------- @@ -2425,7 +2429,7 @@ def objects_to_datetime64( utc=utc, dayfirst=dayfirst, yearfirst=yearfirst, - creso=abbrev_to_npy_unit(out_unit), + creso=out_reso, ) if tz_parsed is not None: diff --git a/pandas/core/series.py b/pandas/core/series.py index 5956fa59528a7..a4841de10fffe 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2105,14 +2105,14 @@ def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation >>> pd.Series([pd.Timestamp("2016-01-01") for _ in range(3)]).unique() ['2016-01-01 00:00:00'] - Length: 1, dtype: datetime64[ns] + Length: 1, dtype: datetime64[s] >>> pd.Series( ... [pd.Timestamp("2016-01-01", tz="US/Eastern") for _ in range(3)] ... ).unique() ['2016-01-01 00:00:00-05:00'] - Length: 1, dtype: datetime64[ns, US/Eastern] + Length: 1, dtype: datetime64[s, US/Eastern] An Categorical will return categories in the order of appearance and with the same dtype. @@ -3262,6 +3262,7 @@ def combine_first(self, other) -> Series: other = other.reindex(keep_other) if this.dtype.kind == "M" and other.dtype.kind != "M": + # TODO: try to match resos? other = to_datetime(other) combined = concat([this, other]) combined = combined.reindex(new_index) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 9b21a23b1aefe..d58dc3efb1ba8 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -7,6 +7,7 @@ import numpy as np +from pandas._libs import lib from pandas._libs.tslibs import Timedelta import pandas._libs.window.aggregations as window_aggregations from pandas.util._decorators import doc diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index a84fefebf044c..72b596e00f500 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -292,7 +292,7 @@ def test_array_copy(): ), ( [datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)], - DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"), + DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[us]"), ), ( np.array([1, 2], dtype="M8[ns]"), @@ -308,7 +308,7 @@ def test_array_copy(): ( [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")], DatetimeArray._from_sequence( - ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET", unit="ns") + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET", unit="s") ), ), ( @@ -317,7 +317,7 @@ def test_array_copy(): datetime.datetime(2001, 1, 1, tzinfo=cet), ], DatetimeArray._from_sequence( - ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet, unit="ns") + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet, unit="us") ), ), # timedelta diff --git a/pandas/tests/base/test_constructors.py b/pandas/tests/base/test_constructors.py index f3ac60f672ee1..c4b02423f8cf0 100644 --- a/pandas/tests/base/test_constructors.py +++ b/pandas/tests/base/test_constructors.py @@ -146,10 +146,12 @@ def test_constructor_datetime_outofbound( # No dtype specified (dtype inference) # datetime64[non-ns] raise error, other cases result in object dtype # and preserve original data - if a.dtype.kind == "M": + result = constructor(a) + if a.dtype.kind == "M" or isinstance(a[0], np.datetime64): # Can't fit in nanosecond bounds -> get the nearest supported unit - result = constructor(a) assert result.dtype == "M8[s]" + elif isinstance(a[0], datetime): + assert result.dtype == "M8[us]", result.dtype else: result = constructor(a) if using_infer_string and "object-string" in request.node.callspec.id: diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index ad35742a7b337..f18f2c84e44e2 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -412,7 +412,7 @@ def test_to_numpy_dtype(as_series, unit): [Timestamp("2000"), Timestamp("2000"), pd.NaT], None, Timestamp("2000"), - [np.datetime64("2000-01-01T00:00:00.000000000")] * 3, + [np.datetime64("2000-01-01T00:00:00", "s")] * 3, ), ], ) @@ -454,7 +454,7 @@ def test_to_numpy_na_value_numpy_dtype( [(0, Timestamp("2021")), (0, Timestamp("2022")), (1, Timestamp("2000"))], None, Timestamp("2000"), - [np.datetime64("2000-01-01T00:00:00.000000000")] * 3, + [np.datetime64("2000-01-01T00:00:00", "s")] * 3, ), ], ) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 0434ad7e50568..59663346e82f4 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -830,7 +830,11 @@ def test_maybe_convert_objects_datetime_overflow_safe(self, dtype): out = lib.maybe_convert_objects(arr, convert_non_numeric=True) # no OutOfBoundsDatetime/OutOfBoundsTimedeltas - tm.assert_numpy_array_equal(out, arr) + if dtype == "datetime64[ns]": + expected = np.array(["2363-10-04"], dtype="M8[us]") + else: + expected = arr + tm.assert_numpy_array_equal(out, expected) def test_maybe_convert_objects_mixed_datetimes(self): ts = Timestamp("now") diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index 66fc234e79b4d..35e143fcedf7b 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -39,7 +39,7 @@ def test_from_records_with_datetimes(self): expected = DataFrame({"EXPIRY": [datetime(2005, 3, 1, 0, 0), None]}) arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])] - dtypes = [("EXPIRY", " str: - return "ns" + unit = "us" + if (read_ext == ".ods") ^ (engine == "calamine"): + # TODO: why is .ods & calamine a separate special case? + unit = "s" + return unit -def adjust_expected(expected: DataFrame, read_ext: str, engine: str) -> None: +def adjust_expected(expected: DataFrame, read_ext: str, engine: str | None) -> None: expected.index.name = None unit = get_exp_unit(read_ext, engine) # error: "Index" has no attribute "as_unit" expected.index = expected.index.as_unit(unit) # type: ignore[attr-defined] + def xfail_datetimes_with_pyxlsb(engine, request): if engine == "pyxlsb": request.applymarker( @@ -483,6 +488,8 @@ def test_reader_special_dtypes(self, request, engine, read_ext): ), }, ) + if (read_ext == ".ods") ^ (engine == "calamine"): + expected["DateCol"] = expected["DateCol"].astype("M8[s]") basename = "test_types" # should read in correctly and infer types @@ -1117,13 +1124,12 @@ def test_read_excel_multiindex_blank_after_name( mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]], names=["c1", "c2"]) unit = get_exp_unit(read_ext, engine) - expected = DataFrame( [ - [1, 2.5, pd.Timestamp("2015-01-01"), True], - [2, 3.5, pd.Timestamp("2015-01-02"), False], - [3, 4.5, pd.Timestamp("2015-01-03"), False], - [4, 5.5, pd.Timestamp("2015-01-04"), True], + [1, 2.5, pd.Timestamp("2015-01-01").as_unit(unit), True], + [2, 3.5, pd.Timestamp("2015-01-02").as_unit(unit), False], + [3, 4.5, pd.Timestamp("2015-01-03").as_unit(unit), False], + [4, 5.5, pd.Timestamp("2015-01-04").as_unit(unit), True], ], columns=mi, index=MultiIndex.from_arrays( @@ -1675,6 +1681,7 @@ def test_read_datetime_multiindex(self, request, engine, read_ext): actual = pd.read_excel(excel, header=[0, 1], index_col=0, engine=engine) unit = get_exp_unit(read_ext, engine) + dti = pd.DatetimeIndex(["2020-02-29", "2020-03-01"], dtype=f"M8[{unit}]") expected_column_index = MultiIndex.from_arrays( [dti[:1], dti[1:]], diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index a8024fc44ddac..67e3668c48580 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -293,7 +293,9 @@ def test_read_excel_parse_dates(self, tmp_excel): tm.assert_frame_equal(df2, res) res = pd.read_excel(tmp_excel, parse_dates=["date_strings"], index_col=0) - tm.assert_frame_equal(df, res) + expected = df[:] + expected["date_strings"] = expected["date_strings"].astype("M8[s]") + tm.assert_frame_equal(res, expected) date_parser = lambda x: datetime.strptime(x, "%m/%d/%Y") with tm.assert_produces_warning( @@ -307,11 +309,16 @@ def test_read_excel_parse_dates(self, tmp_excel): date_parser=date_parser, index_col=0, ) - tm.assert_frame_equal(df, res) + + expected = df[:] + expected["date_strings"] = expected["date_strings"].astype("M8[us]") + tm.assert_frame_equal(expected, res) + res = pd.read_excel( tmp_excel, parse_dates=["date_strings"], date_format="%m/%d/%Y", index_col=0 ) - tm.assert_frame_equal(df, res) + expected["date_strings"] = expected["date_strings"].astype("M8[s]") + tm.assert_frame_equal(expected, res) def test_multiindex_interval_datetimes(self, tmp_excel): # GH 30986 @@ -709,7 +716,6 @@ def test_excel_date_datetime_format(self, ext, tmp_excel, tmp_path): # # Excel output format strings unit = get_exp_unit(tmp_excel) - df = DataFrame( [ [date(2014, 1, 31), date(1999, 9, 24)], diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index db120588b234c..d7ca7366854fd 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -141,7 +141,7 @@ def test_frame_non_unique_columns(self, orient, data): ) if orient == "values": expected = DataFrame(data) - if expected.iloc[:, 0].dtype == "datetime64[ns]": + if expected.iloc[:, 0].dtype == "datetime64[s]": # orient == "values" by default will write Timestamp objects out # in milliseconds; these are internally stored in nanosecond, # so divide to get where we need @@ -150,6 +150,8 @@ def test_frame_non_unique_columns(self, orient, data): elif orient == "split": expected = df expected.columns = ["x", "x.1"] + # if isinstance(data[0][0], Timestamp): + # # FIXME: in this case result is integer dtype instead of dt64 tm.assert_frame_equal(result, expected) @@ -835,6 +837,10 @@ def test_date_index_and_values(self, date_format, as_object, date_typ): data.append("a") ser = Series(data, index=data) + if not as_object: + ser = ser.astype("M8[ns]") + if isinstance(ser.index, DatetimeIndex): + ser.index = ser.index.as_unit("ns") result = ser.to_json(date_format=date_format) if date_format == "epoch": @@ -868,6 +874,7 @@ def test_convert_dates_infer(self, infer_word): expected = DataFrame( [[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word] ) + expected[infer_word] = expected[infer_word].astype("M8[ns]") result = read_json(StringIO(ujson_dumps(data)))[["id", infer_word]] tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 7ffc49e941c14..933b08bdfac53 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -90,9 +90,7 @@ def test_read_csv_local(all_parsers, csv1): fname = prefix + str(os.path.abspath(csv1)) result = parser.read_csv(fname, index_col=0, parse_dates=True) - # TODO: make unit check more specific - if parser.engine == "pyarrow": - result.index = result.index.as_unit("ns") + expected = DataFrame( [ [0.980269, 3.685731, -0.364216805298, -1.159738], @@ -115,7 +113,7 @@ def test_read_csv_local(all_parsers, csv1): datetime(2000, 1, 11), ], name="index", - ), + ).as_unit("s"), ) tm.assert_frame_equal(result, expected) @@ -194,9 +192,6 @@ def test_read_csv_low_memory_no_rows_with_index(all_parsers): def test_read_csv_dataframe(all_parsers, csv1): parser = all_parsers result = parser.read_csv(csv1, index_col=0, parse_dates=True) - # TODO: make unit check more specific - if parser.engine == "pyarrow": - result.index = result.index.as_unit("ns") expected = DataFrame( [ [0.980269, 3.685731, -0.364216805298, -1.159738], @@ -219,7 +214,7 @@ def test_read_csv_dataframe(all_parsers, csv1): datetime(2000, 1, 11), ], name="index", - ), + ).as_unit("s"), ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py index 7cdaac1a284cd..a4e5b580366c4 100644 --- a/pandas/tests/io/parser/common/test_index.py +++ b/pandas/tests/io/parser/common/test_index.py @@ -262,6 +262,7 @@ def test_read_csv_no_index_name(all_parsers, csv_dir_path): ] ), ) + expected.index = expected.index.as_unit("s") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 0bc0c3e744db7..1a5231dd4096c 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -127,6 +127,7 @@ def test_separator_date_conflict(all_parsers): expected = DataFrame( [[datetime(2013, 6, 2, 13, 0, 0), 1000.215]], columns=["Date", 2] ) + expected["Date"] = expected["Date"].astype("M8[s]") depr_msg = ( "Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated" @@ -442,6 +443,8 @@ def test_multiple_date_col(all_parsers, keep_date_col, request): "X8", ], ) + expected["X1_X2"] = expected["X1_X2"].astype("M8[s]") + expected["X1_X3"] = expected["X1_X3"].astype("M8[s]") if not keep_date_col: expected = expected.drop(["X1", "X2", "X3"], axis=1) @@ -475,7 +478,7 @@ def test_date_col_as_index_col(all_parsers): datetime(1999, 1, 27, 22, 0), ], name="X1", - ) + ).as_unit("s") expected = DataFrame( [ ["KORD", " 18:56:00", 0.81, 2.81, 7.2, 0.0, 280.0], @@ -551,6 +554,8 @@ def test_multiple_date_cols_int_cast(all_parsers): ], columns=["actual", "nominal", 0, 4], ) + expected["actual"] = expected["actual"].astype("M8[s]") + expected["nominal"] = expected["nominal"].astype("M8[s]") # Python can sometimes be flaky about how # the aggregated columns are entered, so @@ -692,6 +697,7 @@ def test_multiple_date_cols_with_header(all_parsers): "WindDir", ], ) + expected["nominal"] = expected["nominal"].astype("M8[s]") tm.assert_frame_equal(result, expected) @@ -752,6 +758,9 @@ def test_date_parser_int_bug(all_parsers): ), raise_on_extra_warnings=False, ) + dti = Index([Timestamp("2012-07-24 04:12:30")], name="posix_timestamp").as_unit( + "us" + ) expected = DataFrame( [ [ @@ -783,7 +792,7 @@ def test_date_parser_int_bug(all_parsers): "silo", "method", ], - index=Index([Timestamp("2012-07-24 04:12:30")], name="posix_timestamp"), + index=dti, ) tm.assert_frame_equal(result, expected) @@ -795,7 +804,7 @@ def test_nat_parse(all_parsers): df = DataFrame( { "A": np.arange(10, dtype="float64"), - "B": Timestamp("20010101").as_unit("ns"), + "B": Timestamp("20010101"), } ) df.iloc[3:6, :] = np.nan @@ -822,8 +831,10 @@ def test_csv_custom_parser(all_parsers): date_parser=lambda x: datetime.strptime(x, "%Y%m%d"), ) expected = parser.read_csv(StringIO(data), parse_dates=True) + expected.index = expected.index.as_unit("us") tm.assert_frame_equal(result, expected) result = parser.read_csv(StringIO(data), date_format="%Y%m%d") + expected.index = expected.index.as_unit("s") tm.assert_frame_equal(result, expected) @@ -851,7 +862,7 @@ def test_parse_dates_string(all_parsers): parser = all_parsers result = parser.read_csv(StringIO(data), index_col="date", parse_dates=["date"]) # freq doesn't round-trip - index = date_range("1/1/2009", periods=3, name="date")._with_freq(None) + index = date_range("1/1/2009", periods=3, name="date", unit="s")._with_freq(None) expected = DataFrame( {"A": ["a", "b", "c"], "B": [1, 3, 4], "C": [2, 4, 5]}, index=index @@ -899,6 +910,8 @@ def test_parse_dates_column_list(all_parsers, parse_dates): expected = DataFrame( {"a": [datetime(2010, 1, 1)], "b": [1], "c": [datetime(2010, 2, 15)]} ) + expected["a"] = expected["a"].astype("M8[s]") + expected["c"] = expected["c"].astype("M8[s]") expected = expected.set_index(["a", "b"]) result = parser.read_csv( @@ -922,9 +935,10 @@ def test_multi_index_parse_dates(all_parsers, index_col): 20090103,three,c,4,5 """ parser = all_parsers + dti = date_range("2009-01-01", periods=3, freq="D", unit="s") index = MultiIndex.from_product( [ - (datetime(2009, 1, 1), datetime(2009, 1, 2), datetime(2009, 1, 3)), + dti, ("one", "two", "three"), ], names=["index1", "index2"], @@ -1012,9 +1026,6 @@ def test_parse_tz_aware(all_parsers): data = "Date,x\n2012-06-13T01:39:00Z,0.5" result = parser.read_csv(StringIO(data), index_col=0, parse_dates=True) - # TODO: make unit check more specific - if parser.engine == "pyarrow": - result.index = result.index.as_unit("ns") expected = DataFrame( {"x": [0.5]}, index=Index([Timestamp("2012-06-13 01:39:00+00:00")], name="Date") ) @@ -1117,6 +1128,7 @@ def test_multiple_date_cols_index(all_parsers, parse_dates, index_col): ], ) expected = expected.set_index("nominal") + expected.index = expected.index.as_unit("s") if not isinstance(parse_dates, dict): expected.index.name = "date_NominalTime" @@ -1212,6 +1224,7 @@ def test_multiple_date_cols_chunked(all_parsers): columns=["nominal", "ID", "actualTime", "A", "B", "C", "D", "E"], ) expected = expected.set_index("nominal") + expected.index = expected.index.as_unit("s") depr_msg = ( "Support for nested sequences for 'parse_dates' in pd.read_csv is deprecated" @@ -1380,6 +1393,7 @@ def test_parse_dates_empty_string(all_parsers): expected = DataFrame( [[datetime(2012, 1, 1), 1], [pd.NaT, 2]], columns=["Date", "test"] ) + expected["Date"] = expected["Date"].astype("M8[s]") tm.assert_frame_equal(result, expected) @@ -1429,18 +1443,22 @@ def test_parse_dates_date_parser_and_date_format(all_parsers, reader): ( "a\n04.15.2016", {"parse_dates": ["a"]}, - DataFrame([datetime(2016, 4, 15)], columns=["a"]), + DataFrame([datetime(2016, 4, 15)], columns=["a"], dtype="M8[s]"), ), ( "a\n04.15.2016", {"parse_dates": True, "index_col": 0}, - DataFrame(index=DatetimeIndex(["2016-04-15"], name="a"), columns=[]), + DataFrame( + index=DatetimeIndex(["2016-04-15"], dtype="M8[s]", name="a"), columns=[] + ), ), ( "a,b\n04.15.2016,09.16.2013", {"parse_dates": ["a", "b"]}, DataFrame( - [[datetime(2016, 4, 15), datetime(2013, 9, 16)]], columns=["a", "b"] + [[datetime(2016, 4, 15), datetime(2013, 9, 16)]], + dtype="M8[s]", + columns=["a", "b"], ), ), ( @@ -1448,7 +1466,13 @@ def test_parse_dates_date_parser_and_date_format(all_parsers, reader): {"parse_dates": True, "index_col": [0, 1]}, DataFrame( index=MultiIndex.from_tuples( - [(datetime(2016, 4, 15), datetime(2013, 9, 16))], names=["a", "b"] + [ + ( + Timestamp(2016, 4, 15).as_unit("s"), + Timestamp(2013, 9, 16).as_unit("s"), + ) + ], + names=["a", "b"], ), columns=[], ), @@ -1486,6 +1510,7 @@ def test_parse_date_time_multi_level_column_name(all_parsers): [datetime(2001, 1, 6, 0, 0, 0), 1.0, 11.0], ] expected = DataFrame(expected_data, columns=["date_time", ("A", "a"), ("B", "b")]) + expected["date_time"] = expected["date_time"].astype("M8[s]") tm.assert_frame_equal(result, expected) @@ -1576,6 +1601,11 @@ def test_parse_date_time(all_parsers, data, kwargs, expected): # the aggregated columns are entered, so # this standardizes the order. result = result[expected.columns] + if "date_time" in expected.columns: + expected["date_time"] = expected["date_time"].astype("M8[s]") + else: + expected["actual"] = expected["actual"].astype("M8[s]") + expected["nominal"] = expected["nominal"].astype("M8[s]") tm.assert_frame_equal(result, expected) @@ -1596,6 +1626,7 @@ def test_parse_date_fields(all_parsers): [[datetime(2001, 1, 10), 10.0], [datetime(2001, 2, 1), 11.0]], columns=["ymd", "a"], ) + expected["ymd"] = expected["ymd"].astype("M8[s]") tm.assert_frame_equal(result, expected) @@ -1633,6 +1664,7 @@ def test_parse_date_all_fields(all_parsers, key, value, warn): ], columns=["ymdHMS", "a", "b"], ) + expected["ymdHMS"] = expected["ymdHMS"].astype("M8[s]") tm.assert_frame_equal(result, expected) @@ -1670,6 +1702,7 @@ def test_datetime_fractional_seconds(all_parsers, key, value, warn): ], columns=["ymdHMS", "a", "b"], ) + # expected["ymdHMS"] = expected["ymdHMS"].astype("M8[us]") tm.assert_frame_equal(result, expected) @@ -1693,7 +1726,7 @@ def parse_function(yy, mm): [[date(2001, 1, 1), 10, 10.0], [date(2001, 2, 1), 1, 11.0]], columns=["ym", "day", "a"], ) - expected["ym"] = expected["ym"].astype("datetime64[ns]") + expected["ym"] = expected["ym"].astype("datetime64[s]") tm.assert_frame_equal(result, expected) @@ -1789,7 +1822,9 @@ def test_parse_timezone(all_parsers): end="2018-01-04 09:05:00", freq="1min", tz=timezone(timedelta(minutes=540)), + unit="s", )._with_freq(None) + expected_data = {"dt": dti, "val": [23350, 23400, 23400, 23400, 23400]} expected = DataFrame(expected_data) @@ -1827,7 +1862,7 @@ def test_parse_delimited_date_swap_no_warning( all_parsers, date_string, dayfirst, expected, request ): parser = all_parsers - expected = DataFrame({0: [expected]}, dtype="datetime64[ns]") + expected = DataFrame({0: [expected]}, dtype="datetime64[s]") if parser.engine == "pyarrow": if not dayfirst: # "CSV parse error: Empty CSV file or block" @@ -1860,7 +1895,7 @@ def test_parse_delimited_date_swap_with_warning( all_parsers, date_string, dayfirst, expected ): parser = all_parsers - expected = DataFrame({0: [expected]}, dtype="datetime64[ns]") + expected = DataFrame({0: [expected]}, dtype="datetime64[s]") warning_msg = ( "Parsing dates in .* format when dayfirst=.* was specified. " "Pass `dayfirst=.*` or specify a format to silence this warning." @@ -1960,9 +1995,7 @@ def test_date_parser_multiindex_columns(all_parsers): 1,2 2019-12-31,6""" result = parser.read_csv(StringIO(data), parse_dates=[("a", "1")], header=[0, 1]) - expected = DataFrame( - {("a", "1"): Timestamp("2019-12-31").as_unit("ns"), ("b", "2"): [6]} - ) + expected = DataFrame({("a", "1"): Timestamp("2019-12-31"), ("b", "2"): [6]}) tm.assert_frame_equal(result, expected) @@ -2027,6 +2060,7 @@ def test_date_parser_usecols_thousands(all_parsers): thousands="-", ) expected = DataFrame({"B": [3, 4], "C": [Timestamp("20-09-2001 01:00:00")] * 2}) + expected["C"] = expected["C"].astype("M8[s]") tm.assert_frame_equal(result, expected) @@ -2046,7 +2080,7 @@ def test_parse_dates_and_keep_original_column(all_parsers): StringIO(data), parse_dates={"date": ["A"]}, keep_date_col=True ) expected_data = [Timestamp("2015-09-08"), Timestamp("2015-09-09")] - expected = DataFrame({"date": expected_data, "A": expected_data}) + expected = DataFrame({"date": expected_data, "A": expected_data}, dtype="M8[s]") tm.assert_frame_equal(result, expected) @@ -2056,7 +2090,7 @@ def test_dayfirst_warnings(): # CASE 1: valid input input = "date\n31/12/2014\n10/03/2011" expected = DatetimeIndex( - ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None, name="date" + ["2014-12-31", "2011-03-10"], dtype="datetime64[s]", freq=None, name="date" ) warning_msg = ( "Parsing dates in .* format when dayfirst=.* was specified. " @@ -2117,7 +2151,7 @@ def test_dayfirst_warnings_no_leading_zero(date_string, dayfirst): # GH47880 initial_value = f"date\n{date_string}" expected = DatetimeIndex( - ["2014-01-31"], dtype="datetime64[ns]", freq=None, name="date" + ["2014-01-31"], dtype="datetime64[s]", freq=None, name="date" ) warning_msg = ( "Parsing dates in .* format when dayfirst=.* was specified. " @@ -2181,7 +2215,8 @@ def test_replace_nans_before_parsing_dates(all_parsers, key, value, warn): pd.NaT, Timestamp("2017-09-09"), ] - } + }, + dtype="M8[s]", ) tm.assert_frame_equal(result, expected) @@ -2196,6 +2231,7 @@ def test_parse_dates_and_string_dtype(all_parsers): result = parser.read_csv(StringIO(data), dtype="string", parse_dates=["b"]) expected = DataFrame({"a": ["1"], "b": [Timestamp("2019-12-31")]}) expected["a"] = expected["a"].astype("string") + expected["b"] = expected["b"].astype("M8[s]") tm.assert_frame_equal(result, expected) @@ -2215,7 +2251,7 @@ def test_parse_dot_separated_dates(all_parsers): else: expected_index = DatetimeIndex( ["2003-03-27 14:55:00", "2003-08-03 15:20:00"], - dtype="datetime64[ns]", + dtype="datetime64[ms]", name="a", ) warn = UserWarning @@ -2248,7 +2284,8 @@ def test_parse_dates_dict_format(all_parsers): { "a": [Timestamp("2019-12-31"), Timestamp("2020-12-31")], "b": [Timestamp("2019-12-31"), Timestamp("2020-12-31")], - } + }, + dtype="M8[s]", ) tm.assert_frame_equal(result, expected) @@ -2275,7 +2312,8 @@ def test_parse_dates_dict_format_two_columns(all_parsers, key, parse_dates): expected = DataFrame( { key: [Timestamp("2019-12-31"), Timestamp("2020-12-31")], - } + }, + dtype="M8[s]", ) tm.assert_frame_equal(result, expected) @@ -2308,9 +2346,6 @@ def test_parse_dates_arrow_engine(all_parsers): 2000-01-01 00:00:01,1""" result = parser.read_csv(StringIO(data), parse_dates=["a"]) - # TODO: make unit check more specific - if parser.engine == "pyarrow": - result["a"] = result["a"].dt.as_unit("ns") expected = DataFrame( { "a": [ @@ -2320,6 +2355,7 @@ def test_parse_dates_arrow_engine(all_parsers): "b": 1, } ) + expected["a"] = expected["a"].astype("M8[s]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index b62fcc04c375c..ea43d3951af23 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -311,7 +311,7 @@ def test_fwf_regression(): "2009-06-13 20:50:00", "2009-06-13 21:00:00", ] - ), + ).as_unit("us"), columns=["SST", "T010", "T020", "T030", "T060", "T080", "T100"], ) tm.assert_frame_equal(result, expected) @@ -324,6 +324,7 @@ def test_fwf_regression(): parse_dates=True, date_format="%Y%j%H%M%S", ) + expected.index = expected.index.as_unit("s") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py index 3cd2351f84c7a..6d974344fcfe2 100644 --- a/pandas/tests/io/parser/test_skiprows.py +++ b/pandas/tests/io/parser/test_skiprows.py @@ -43,7 +43,7 @@ def test_skip_rows_bug(all_parsers, skiprows): ) index = Index( [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], name=0 - ) + ).as_unit("s") expected = DataFrame( np.arange(1.0, 10.0).reshape((3, 3)), columns=[1, 2, 3], index=index @@ -86,7 +86,7 @@ def test_skip_rows_blank(all_parsers): ) index = Index( [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], name=0 - ) + ).as_unit("s") expected = DataFrame( np.arange(1.0, 10.0).reshape((3, 3)), columns=[1, 2, 3], index=index diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py index bc66189ca064e..f444fe9c1b6ee 100644 --- a/pandas/tests/io/parser/usecols/test_parse_dates.py +++ b/pandas/tests/io/parser/usecols/test_parse_dates.py @@ -80,7 +80,7 @@ def test_usecols_with_parse_dates2(all_parsers): Timestamp("2008-02-07 10:00"), ], name="date", - ) + ) # .as_unit("ns") cols = {"values": [1032.43, 1042.54, 1051.65]} expected = DataFrame(cols, index=index) @@ -105,7 +105,7 @@ def test_usecols_with_parse_dates3(all_parsers): parse_dates = [0] cols = { - "a": Timestamp("2016-09-21").as_unit("ns"), + "a": Timestamp("2016-09-21"), # .as_unit("ns"), "b": [1], "c": [1], "d": [2], diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 4866ef78d79a2..f94955fbec657 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -614,7 +614,7 @@ def test_store_index_name_numpy_str(tmp_path, table_format, setup_path, unit, tz idx = DatetimeIndex( [dt.date(2000, 1, 1), dt.date(2000, 1, 2)], name="cols\u05d2", - ).tz_localize(tz) + ).tz_localize(tz).as_unit(unit) idx1 = ( DatetimeIndex( [dt.date(2010, 1, 1), dt.date(2010, 1, 2)], diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index f6fb032b9d51a..c609ae999d47d 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -72,7 +72,9 @@ def test_read_csv(cleared_fs, df1): w.write(text) df2 = read_csv("memory://test/test.csv", parse_dates=["dt"]) - tm.assert_frame_equal(df1, df2) + expected = df1.copy() + expected["dt"] = expected["dt"].astype("M8[s]") + tm.assert_frame_equal(df2, expected) def test_reasonable_error(monkeypatch, cleared_fs): @@ -95,7 +97,9 @@ def test_to_csv(cleared_fs, df1): df2 = read_csv("memory://test/test.csv", parse_dates=["dt"], index_col=0) - tm.assert_frame_equal(df1, df2) + expected = df1.copy() + expected["dt"] = expected["dt"].astype("M8[s]") + tm.assert_frame_equal(df2, expected) def test_to_excel(cleared_fs, df1): @@ -106,7 +110,9 @@ def test_to_excel(cleared_fs, df1): df2 = read_excel(path, parse_dates=["dt"], index_col=0) - tm.assert_frame_equal(df1, df2) + expected = df1.copy() + expected["dt"] = expected["dt"].astype("M8[s]") + tm.assert_frame_equal(df2, expected) @pytest.mark.parametrize("binary_mode", [False, True]) @@ -128,7 +134,9 @@ def test_to_csv_fsspec_object(cleared_fs, binary_mode, df1): ) assert not fsspec_object.closed - tm.assert_frame_equal(df1, df2) + expected = df1.copy() + expected["dt"] = expected["dt"].astype("M8[s]") + tm.assert_frame_equal(df2, expected) def test_csv_options(fsspectest): diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 0ce6a8bf82cd8..85ccc54f0c8d2 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -102,7 +102,11 @@ def from_uri(path): df1.to_markdown(path) df2 = df1 - tm.assert_frame_equal(df1, df2) + expected = df1[:] + if format in ["csv", "excel"]: + expected["dt"] = expected["dt"].dt.as_unit("s") + + tm.assert_frame_equal(df2, expected) def assert_equal_zip_safe(result: bytes, expected: bytes, compression: str): diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 2251fa20f0b63..2e6ea250f3f1b 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -1045,11 +1045,15 @@ def test_header_inferred_from_rows_with_only_th(self, flavor_read_html): def test_parse_dates_list(self, flavor_read_html): df = DataFrame({"date": date_range("1/1/2001", periods=10)}) - expected = df.to_html() - res = flavor_read_html(StringIO(expected), parse_dates=[1], index_col=0) - tm.assert_frame_equal(df, res[0]) - res = flavor_read_html(StringIO(expected), parse_dates=["date"], index_col=0) - tm.assert_frame_equal(df, res[0]) + + expected = df[:] + expected["date"] = expected["date"].dt.as_unit("s") + + str_df = df.to_html() + res = flavor_read_html(StringIO(str_df), parse_dates=[1], index_col=0) + tm.assert_frame_equal(expected, res[0]) + res = flavor_read_html(StringIO(str_df), parse_dates=["date"], index_col=0) + tm.assert_frame_equal(expected, res[0]) def test_parse_dates_combine(self, flavor_read_html): raw_dates = Series(date_range("1/1/2001", periods=10)) @@ -1062,7 +1066,7 @@ def test_parse_dates_combine(self, flavor_read_html): res = flavor_read_html( StringIO(df.to_html()), parse_dates={"datetime": [1, 2]}, index_col=1 ) - newdf = DataFrame({"datetime": raw_dates}) + newdf = DataFrame({"datetime": raw_dates}).astype("M8[s]") tm.assert_frame_equal(newdf, res[0]) def test_wikipedia_states_table(self, datapath, flavor_read_html): diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py index b4a8c713d99ab..8533e812c3b2d 100644 --- a/pandas/tests/io/test_orc.py +++ b/pandas/tests/io/test_orc.py @@ -320,6 +320,8 @@ def test_orc_dtype_backend_pyarrow(): ], } ) + # FIXME: without casting to ns we do not round-trip correctly + df["datetime_with_nat"] = df["datetime_with_nat"].astype("M8[ns]") bytes_data = df.copy().to_orc() result = read_orc(BytesIO(bytes_data), dtype_backend="pyarrow") diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 3cba7b7da347e..6b8e1520b7a66 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -668,6 +668,7 @@ def test_read_empty_array(self, pa, dtype): class TestParquetPyArrow(Base): + @pytest.mark.xfail(reason="datetime_with_nat unit doesn't round-trip") def test_basic(self, pa, df_full): df = df_full @@ -703,6 +704,7 @@ def test_to_bytes_without_path_or_buf_provided(self, pa, df_full): expected = df_full.copy() expected.loc[1, "string_with_nan"] = None + expected["datetime_with_nat"] = expected["datetime_with_nat"].astype("M8[ms]") tm.assert_frame_equal(res, expected) def test_duplicate_columns(self, pa): @@ -989,6 +991,10 @@ def test_read_dtype_backend_pyarrow_config(self, pa, df_full): expected["datetime_tz"] = expected["datetime_tz"].astype( pd.ArrowDtype(pyarrow.timestamp(unit="us", tz="Europe/Brussels")) ) + else: + expected["datetime_with_nat"] = expected["datetime_with_nat"].astype( + "timestamp[ms][pyarrow]" + ) check_round_trip( df, @@ -1013,6 +1019,7 @@ def test_read_dtype_backend_pyarrow_config_index(self, pa): expected=expected, ) + @pytest.mark.xfail(reason="pa.pandas_compat passes 'datetime64' to .astype") def test_columns_dtypes_not_invalid(self, pa): df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))}) @@ -1102,9 +1109,11 @@ def test_infer_string_large_string_type(self, tmp_path, pa): # df.to_parquet(tmp_path / "test.parquet") # result = read_parquet(tmp_path / "test.parquet") # assert result["strings"].dtype == "string" + # FIXME: don't leave commented-out class TestParquetFastParquet(Base): + @pytest.mark.xfail(reason="datetime_with_nat gets incorrect values") def test_basic(self, fp, df_full): df = df_full @@ -1249,6 +1258,25 @@ def test_error_on_using_partition_cols_and_partition_on( partition_cols=partition_cols, ) + def test_empty_dataframe(self, fp): + # GH #27339 + df = pd.DataFrame() + expected = df.copy() + check_round_trip(df, fp, expected=expected) + + @pytest.mark.xfail( + reason="fastparquet passed mismatched values/dtype to DatetimeArray " + "constructor, see https://github.com/dask/fastparquet/issues/891" + ) + def test_timezone_aware_index(self, fp, timezone_aware_date_list): + idx = 5 * [timezone_aware_date_list] + + df = pd.DataFrame(index=idx, data={"index_as_col": idx}) + + expected = df.copy() + expected.index.name = "index" + check_round_trip(df, fp, expected=expected) + def test_close_file_handle_on_read_error(self): with tm.ensure_clean("test.parquet") as path: pathlib.Path(path).write_bytes(b"breakit") diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index c8f4d68230e5b..65e04d7920f18 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1800,7 +1800,7 @@ def test_api_custom_dateparsing_error( pytest.mark.xfail(reason="failing combination of arguments") ) - expected = types_data_frame.astype({"DateCol": "datetime64[ns]"}) + expected = types_data_frame.astype({"DateCol": "datetime64[s]"}) result = read_sql( text, @@ -2819,7 +2819,7 @@ def test_datetime_with_timezone_roundtrip(conn, request): # For dbs that support timestamps with timezones, should get back UTC # otherwise naive data should be returned expected = DataFrame( - {"A": date_range("2013-01-01 09:00:00", periods=3, tz="US/Pacific")} + {"A": date_range("2013-01-01 09:00:00", periods=3, tz="US/Pacific", unit="us")} ) assert expected.to_sql(name="test_datetime_tz", con=conn, index=False) == 3 @@ -2837,7 +2837,7 @@ def test_datetime_with_timezone_roundtrip(conn, request): if "sqlite" in conn_name: # read_sql_query does not return datetime type like read_sql_table assert isinstance(result.loc[0, "A"], str) - result["A"] = to_datetime(result["A"]) + result["A"] = to_datetime(result["A"]).dt.as_unit("us") tm.assert_frame_equal(result, expected) @@ -2848,7 +2848,9 @@ def test_out_of_bounds_datetime(conn, request): data = DataFrame({"date": datetime(9999, 1, 1)}, index=[0]) assert data.to_sql(name="test_datetime_obb", con=conn, index=False) == 1 result = sql.read_sql_table("test_datetime_obb", conn) - expected = DataFrame([pd.NaT], columns=["date"]) + expected = DataFrame( + np.array([datetime(9999, 1, 1)], dtype="M8[us]"), columns=["date"] + ) tm.assert_frame_equal(result, expected) @@ -2857,7 +2859,7 @@ def test_naive_datetimeindex_roundtrip(conn, request): # GH 23510 # Ensure that a naive DatetimeIndex isn't converted to UTC conn = request.getfixturevalue(conn) - dates = date_range("2018-01-01", periods=5, freq="6h")._with_freq(None) + dates = date_range("2018-01-01", periods=5, freq="6h", unit="us")._with_freq(None) expected = DataFrame({"nums": range(5)}, index=dates) assert expected.to_sql(name="foo_table", con=conn, index_label="info_date") == 5 result = sql.read_sql_table("foo_table", conn, index_col="info_date") @@ -2909,7 +2911,10 @@ def test_datetime(conn, request): # with read_table -> type information from schema used result = sql.read_sql_table("test_datetime", conn) result = result.drop("index", axis=1) - tm.assert_frame_equal(result, df) + + expected = df[:] + expected["A"] = expected["A"].astype("M8[us]") + tm.assert_frame_equal(result, expected) # with read_sql -> no type information -> sqlite has no native result = sql.read_sql_query("SELECT * FROM test_datetime", conn) @@ -2917,7 +2922,7 @@ def test_datetime(conn, request): if "sqlite" in conn_name: assert isinstance(result.loc[0, "A"], str) result["A"] = to_datetime(result["A"]) - tm.assert_frame_equal(result, df) + tm.assert_frame_equal(result, expected) else: tm.assert_frame_equal(result, df) @@ -2934,14 +2939,16 @@ def test_datetime_NaT(conn, request): # with read_table -> type information from schema used result = sql.read_sql_table("test_datetime", conn) - tm.assert_frame_equal(result, df) + expected = df[:] + expected["A"] = expected["A"].astype("M8[us]") + tm.assert_frame_equal(result, expected) # with read_sql -> no type information -> sqlite has no native result = sql.read_sql_query("SELECT * FROM test_datetime", conn) if "sqlite" in conn_name: assert isinstance(result.loc[0, "A"], str) result["A"] = to_datetime(result["A"], errors="coerce") - tm.assert_frame_equal(result, df) + tm.assert_frame_equal(result, expected) else: tm.assert_frame_equal(result, df) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 0d55bd48263c2..94ef11fb59f7b 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -203,9 +203,9 @@ def test_read_dta2(self, datapath): # buggy test because of the NaT comparison on certain platforms # Format 113 test fails since it does not support tc and tC formats # tm.assert_frame_equal(parsed_113, expected) - tm.assert_frame_equal(parsed_114, expected, check_datetimelike_compat=True) - tm.assert_frame_equal(parsed_115, expected, check_datetimelike_compat=True) - tm.assert_frame_equal(parsed_117, expected, check_datetimelike_compat=True) + tm.assert_frame_equal(parsed_114, expected) # , check_datetimelike_compat=True) + tm.assert_frame_equal(parsed_115, expected) # , check_datetimelike_compat=True) + tm.assert_frame_equal(parsed_117, expected) # , check_datetimelike_compat=True) @pytest.mark.parametrize( "file", ["stata3_113", "stata3_114", "stata3_115", "stata3_117"] @@ -905,8 +905,8 @@ def test_big_dates(self, datapath): parsed_115 = read_stata(datapath("io", "data", "stata", "stata9_115.dta")) parsed_117 = read_stata(datapath("io", "data", "stata", "stata9_117.dta")) - tm.assert_frame_equal(expected, parsed_115, check_datetimelike_compat=True) - tm.assert_frame_equal(expected, parsed_117, check_datetimelike_compat=True) + tm.assert_frame_equal(expected, parsed_115) # , check_datetimelike_compat=True) + tm.assert_frame_equal(expected, parsed_117) # , check_datetimelike_compat=True) date_conversion = {c: c[-2:] for c in columns} # {c : c[-2:] for c in columns} @@ -918,7 +918,7 @@ def test_big_dates(self, datapath): tm.assert_frame_equal( written_and_read_again.set_index("index"), expected.set_index(expected.index.astype(np.int32)), - check_datetimelike_compat=True, + # check_datetimelike_compat=True, ) def test_dtype_conversion(self, datapath): @@ -1205,7 +1205,9 @@ def test_read_chunks_117( from_frame = parsed.iloc[pos : pos + chunksize, :].copy() from_frame = self._convert_categorical(from_frame) tm.assert_frame_equal( - from_frame, chunk, check_dtype=False, check_datetimelike_compat=True + from_frame, + chunk, + check_dtype=False, # , check_datetimelike_compat=True ) pos += chunksize @@ -1297,7 +1299,9 @@ def test_read_chunks_115( from_frame = parsed.iloc[pos : pos + chunksize, :].copy() from_frame = self._convert_categorical(from_frame) tm.assert_frame_equal( - from_frame, chunk, check_dtype=False, check_datetimelike_compat=True + from_frame, + chunk, + check_dtype=False, # , check_datetimelike_compat=True ) pos += chunksize diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index c5ef0f39ece19..2ff1c7642ea6a 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1264,6 +1264,7 @@ def test_resample_median_bug_1688(dtype, unit): index=dti, dtype=dtype, ) + df.index = df.index.as_unit("ns") result = df.resample("min").apply(lambda x: x.mean()) exp = df.asfreq("min") @@ -2129,6 +2130,7 @@ def test_resample_c_b_closed_right(freq: str, unit): }, index=exp_dti, ).astype(f"M8[{unit}]") + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 6fdc398b13835..76d4f04f9e796 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -551,7 +551,7 @@ def test_resample_tz_localized(self, unit): ts_local_naive.index = ts_local_naive.index.tz_localize(None) exp = ts_local_naive.resample("W").mean().tz_localize("America/Los_Angeles") - exp.index = pd.DatetimeIndex(exp.index, freq="W") + exp.index = pd.DatetimeIndex(exp.index, freq="W").as_unit("ns") tm.assert_series_equal(result, exp) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index b312d708ade1e..405942a1b68bc 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -128,6 +128,9 @@ def test_getitem_multiple(): index=exp_mi, name="buyer", ) + expected.index = expected.index.set_levels( + expected.index.levels[1].as_unit("ns"), level=1 + ) tm.assert_series_equal(result, expected) result = r["buyer"].count() @@ -545,6 +548,9 @@ def test_groupby_resample_with_list_of_keys(): }, index=mi_exp, ) + expected.index = expected.index.set_levels( + expected.index.levels[1].as_unit("ns"), level=1 + ) tm.assert_frame_equal(result, expected) @@ -618,6 +624,9 @@ def test_groupby_resample_size_all_index_same(): 3, index=mi_exp, ) + expected.index = expected.index.set_levels( + expected.index.levels[1].as_unit("ns"), level=1 + ) tm.assert_series_equal(result, expected) @@ -641,6 +650,9 @@ def test_groupby_resample_on_index_with_list_of_keys(): }, index=mi_exp, ) + expected.index = expected.index.set_levels( + expected.index.levels[1].as_unit("ns"), level=1 + ) tm.assert_frame_equal(result, expected) @@ -667,6 +679,9 @@ def test_groupby_resample_on_index_with_list_of_keys_multi_columns(): }, index=mi_exp, ) + expected.index = expected.index.set_levels( + expected.index.levels[1].as_unit("ns"), level=1 + ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index c5e202f36659b..b4a2a3d330114 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -331,7 +331,7 @@ def test_upsample_sum(method, method_args, expected_values): ["2017-01-01T00:00:00", "2017-01-01T00:30:00", "2017-01-01T01:00:00"], dtype="M8[ns]", freq="30min", - ) + ).as_unit("ns") result = methodcaller(method, **method_args)(resampled) expected = Series(expected_values, index=index) tm.assert_series_equal(result, expected) @@ -362,6 +362,9 @@ def test_groupby_resample_interpolate(): [volume, week_starting], names=["volume", "week_starting"], ) + expected_ind = expected_ind.set_levels( + expected_ind.levels[1].as_unit("ns"), level=1 + ) expected = DataFrame( data={ diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py index 31c3ef3176222..49bdcc67e4927 100644 --- a/pandas/tests/reshape/concat/test_append_common.py +++ b/pandas/tests/reshape/concat/test_append_common.py @@ -19,12 +19,12 @@ "float64": [1.1, np.nan, 3.3], "category": Categorical(["X", "Y", "Z"]), "object": ["a", "b", "c"], - "datetime64[ns]": [ + "datetime64[s]": [ pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02"), pd.Timestamp("2011-01-03"), ], - "datetime64[ns, US/Eastern]": [ + "datetime64[s, US/Eastern]": [ pd.Timestamp("2011-01-01", tz="US/Eastern"), pd.Timestamp("2011-01-02", tz="US/Eastern"), pd.Timestamp("2011-01-03", tz="US/Eastern"), diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index d7791ec38a7ae..158ff2a502cde 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -213,7 +213,7 @@ def test_concat_NaT_dataframes(self, tz): @pytest.mark.parametrize("tz1", [None, "UTC"]) @pytest.mark.parametrize("tz2", [None, "UTC"]) - @pytest.mark.parametrize("item", [pd.NaT, Timestamp("20150101")]) + @pytest.mark.parametrize("item", [pd.NaT, Timestamp("20150101").as_unit("ns")]) def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, item): # GH 12396 @@ -260,7 +260,8 @@ def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2): # GH 12396 # tz-naive - first = Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1) + # FIXME: without as_unit we get a FutureWarning about all-NA + first = Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1).dt.as_unit("s") second = DataFrame( [ [Timestamp("2015/01/01", tz=tz2)], @@ -367,7 +368,7 @@ def test_concat_tz_series_tzlocal(self): result = concat([Series(x), Series(y)], ignore_index=True) tm.assert_series_equal(result, Series(x + y)) - assert result.dtype == "datetime64[ns, tzlocal()]" + assert result.dtype == "datetime64[s, tzlocal()]" def test_concat_tz_series_with_datetimelike(self): # see gh-12620: tz and timedelta diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 4fc57c14ec4c3..542cc657c54ff 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -3482,6 +3482,7 @@ def test_merge_asof_array_as_on(unit): "ts": dti, } ) + right["ts"] = right["ts"].astype("M8[ns]") ts_merge = pd.date_range( start=pd.Timestamp("2021/01/01 00:00"), periods=3, freq="1h", unit=unit ) @@ -3512,6 +3513,7 @@ def test_merge_asof_array_as_on(unit): "b": [4, 8], } ) + expected["ts"] = expected["ts"].astype("M8[ns]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 0811c69859c0d..3f24b4becf592 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -438,11 +438,18 @@ def test_datetime_bin(conv): data = [np.datetime64("2012-12-13"), np.datetime64("2012-12-15")] bin_data = ["2012-12-12", "2012-12-14", "2012-12-16"] + unit = Timestamp(conv(bin_data[0])).unit expected = Series( IntervalIndex( [ - Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])), - Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2])), + Interval( + Timestamp(bin_data[0]).as_unit(unit), + Timestamp(bin_data[1]).as_unit(unit), + ), + Interval( + Timestamp(bin_data[1]).as_unit(unit), + Timestamp(bin_data[2]).as_unit(unit), + ), ] ) ).astype(CategoricalDtype(ordered=True)) @@ -514,10 +521,18 @@ def test_datetime_tz_cut_mismatched_tzawareness(box): [ 3, [ - Timestamp("2013-01-01 04:57:07.200000", tz="UTC").tz_convert("US/Eastern"), - Timestamp("2013-01-01 21:00:00", tz="UTC").tz_convert("US/Eastern"), - Timestamp("2013-01-02 13:00:00", tz="UTC").tz_convert("US/Eastern"), - Timestamp("2013-01-03 05:00:00", tz="UTC").tz_convert("US/Eastern"), + Timestamp("2013-01-01 04:57:07.200000", tz="UTC") + .tz_convert("US/Eastern") + .as_unit("ns"), + Timestamp("2013-01-01 21:00:00", tz="UTC") + .tz_convert("US/Eastern") + .as_unit("ns"), + Timestamp("2013-01-02 13:00:00", tz="UTC") + .tz_convert("US/Eastern") + .as_unit("ns"), + Timestamp("2013-01-03 05:00:00", tz="UTC") + .tz_convert("US/Eastern") + .as_unit("ns"), ], ], ) @@ -535,16 +550,16 @@ def test_datetime_tz_cut(bins, box): IntervalIndex( [ Interval( - Timestamp("2012-12-31 23:57:07.200000", tz=tz), - Timestamp("2013-01-01 16:00:00", tz=tz), + Timestamp("2012-12-31 23:57:07.200000", tz=tz).as_unit("ns"), + Timestamp("2013-01-01 16:00:00", tz=tz).as_unit("ns"), ), Interval( - Timestamp("2013-01-01 16:00:00", tz=tz), - Timestamp("2013-01-02 08:00:00", tz=tz), + Timestamp("2013-01-01 16:00:00", tz=tz).as_unit("ns"), + Timestamp("2013-01-02 08:00:00", tz=tz).as_unit("ns"), ), Interval( - Timestamp("2013-01-02 08:00:00", tz=tz), - Timestamp("2013-01-03 00:00:00", tz=tz), + Timestamp("2013-01-02 08:00:00", tz=tz).as_unit("ns"), + Timestamp("2013-01-03 00:00:00", tz=tz).as_unit("ns"), ), ] ) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index d6b61bae850af..3903034db490f 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -578,6 +578,8 @@ def test_pivot_with_tz(self, method, unit): "data2": np.arange(4, dtype="int64"), } ) + df["dt1"] = df["dt1"].astype("M8[ns]") + df["dt2"] = df["dt2"].astype("M8[ns]") exp_col1 = Index(["data1", "data1", "data2", "data2"]) exp_col2 = pd.DatetimeIndex( diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index 53af673e0f7b0..a3d1b677fcf86 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -267,7 +267,7 @@ def test_datetime_tz_qcut(bins): ], [ date_range("20180101", periods=3), - DatetimeIndex(["2018-01-01", "2018-01-02", "2018-01-03"]), + DatetimeIndex(["2018-01-01", "2018-01-02", "2018-01-03"]).as_unit("ns"), ], ], ) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index e352e2601cef3..59b970afaec89 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -439,8 +439,8 @@ def test_nat_rfloordiv_timedelta(val, expected): @pytest.mark.parametrize( "value", [ - DatetimeIndex(["2011-01-01", "2011-01-02"], name="x"), - DatetimeIndex(["2011-01-01", "2011-01-02"], tz="US/Eastern", name="x"), + DatetimeIndex(["2011-01-01", "2011-01-02"], dtype="M8[ns]", name="x"), + DatetimeIndex(["2011-01-01", "2011-01-02"], dtype="M8[ns, US/Eastern]", name="x"), DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"], dtype="M8[ns]"), DatetimeArray._from_sequence( ["2011-01-01", "2011-01-02"], dtype=DatetimeTZDtype(tz="US/Pacific") diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 5f0057ac50b47..0a18f9551d191 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -368,7 +368,7 @@ def test_dt_round_tz_nonexistent(self, method, ts_str, freq): tm.assert_series_equal(result, expected) result = getattr(ser.dt, method)(freq, nonexistent="NaT") - expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz) + expected = Series([pd.NaT], dtype="M8[s]").dt.tz_localize(result.dt.tz) tm.assert_series_equal(result, expected) with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"): diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 6be325073bb67..9da3d4e444019 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -506,7 +506,7 @@ def test_setitem_empty_series_datetimeindex_preserves_freq(self): # GH#33573 our index should retain its freq dti = DatetimeIndex([], freq="D", dtype="M8[ns]") series = Series([], index=dti, dtype=object) - key = Timestamp("2012-01-01") + key = Timestamp("2012-01-01").as_unit("ns") series[key] = 47 expected = Series(47, DatetimeIndex([key], freq="D").as_unit("ns")) tm.assert_series_equal(series, expected) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 4b2122e25f819..4676e1543abe5 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -236,7 +236,7 @@ def test_astype_datetime(self, unit): ser = ser.astype("O") assert ser.dtype == np.object_ - ser = Series([datetime(2001, 1, 2, 0, 0)]) + ser = Series([datetime(2001, 1, 2, 0, 0)], dtype="M8[ns]") ser = ser.astype("O") assert ser.dtype == np.object_ diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py index e1ec8afda33a9..7344272063a29 100644 --- a/pandas/tests/series/methods/test_combine_first.py +++ b/pandas/tests/series/methods/test_combine_first.py @@ -80,7 +80,7 @@ def test_combine_first_dt64(self, unit): s1 = Series([np.nan, "2011"]) rs = s0.combine_first(s1) - xp = Series([datetime(2010, 1, 1), "2011"], dtype="datetime64[ns]") + xp = Series([datetime(2010, 1, 1), "2011"], dtype=f"datetime64[{unit}]") tm.assert_series_equal(rs, xp) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index a458b31480375..8b264e04cd817 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -433,7 +433,7 @@ def test_datetime64_tz_fillna(self, tz, unit): Timestamp("2011-01-02 10:00", tz=tz), Timestamp("2011-01-03 10:00"), Timestamp("2011-01-02 10:00", tz=tz), - ] + ], ) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index f4f72854e50d3..a87907e28fbdb 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -450,8 +450,8 @@ def test_map_box_dt64(unit): def test_map_box_dt64tz(unit): vals = [ - pd.Timestamp("2011-01-01", tz="US/Eastern"), - pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timestamp("2011-01-01", tz="US/Eastern").as_unit("ns"), + pd.Timestamp("2011-01-02", tz="US/Eastern").as_unit("ns"), ] ser = Series(vals).dt.as_unit(unit) assert ser.dtype == f"datetime64[{unit}, US/Eastern]" diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py index e292861012c8f..e2919b38539db 100644 --- a/pandas/tests/series/methods/test_to_csv.py +++ b/pandas/tests/series/methods/test_to_csv.py @@ -31,7 +31,9 @@ def test_from_csv(self, datetime_series, string_series): with tm.ensure_clean() as path: datetime_series.to_csv(path, header=False) ts = self.read_csv(path, parse_dates=True) - tm.assert_series_equal(datetime_series, ts, check_names=False) + expected = datetime_series.copy() + expected.index = expected.index.as_unit("s") + tm.assert_series_equal(expected, ts, check_names=False) assert ts.name is None assert ts.index.name is None @@ -59,6 +61,7 @@ def test_from_csv(self, datetime_series, string_series): check_series = Series( {datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0} ) + check_series.index = check_series.index.as_unit("s") tm.assert_series_equal(check_series, series) series = self.read_csv(path, sep="|", parse_dates=False) diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py index 7f882fa348b7e..e5cef04afce23 100644 --- a/pandas/tests/series/methods/test_value_counts.py +++ b/pandas/tests/series/methods/test_value_counts.py @@ -52,7 +52,7 @@ def test_value_counts_datetime_tz(self, unit): exp_idx = pd.DatetimeIndex( ["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"], - tz="US/Eastern", + dtype="M8[ns, US/Eastern]", name="xxx", ).as_unit(unit) exp = Series([3, 2, 1], index=exp_idx, name="count") diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index b00074c04257e..70f8f4c09ac41 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -934,7 +934,7 @@ def test_constructor_datetimes_with_nulls(self): np.array([None, None, datetime.now(), None]), ]: result = Series(arr) - assert result.dtype == "M8[ns]" + assert result.dtype == "M8[us]" def test_constructor_dtype_datetime64(self): s = Series(iNaT, dtype="M8[ns]", index=range(5)) @@ -962,15 +962,15 @@ def test_constructor_dtype_datetime64_10(self): dates = [np.datetime64(x) for x in pydates] ser = Series(dates) - assert ser.dtype == "M8[ns]" + assert ser.dtype == "M8[us]" ser.iloc[0] = np.nan - assert ser.dtype == "M8[ns]" + assert ser.dtype == "M8[us]" # GH3414 related expected = Series(pydates, dtype="datetime64[ms]") - result = Series(Series(dates).astype(np.int64) / 1000000, dtype="M8[ms]") + result = Series(Series(dates).view(np.int64) / 1000, dtype="M8[ms]") tm.assert_series_equal(result, expected) result = Series(dates, dtype="datetime64[ms]") @@ -1155,7 +1155,7 @@ def test_constructor_with_datetime_tz4(self): Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific"), ] ) - assert ser.dtype == "datetime64[ns, US/Pacific]" + assert ser.dtype == "datetime64[s, US/Pacific]" assert lib.infer_dtype(ser, skipna=True) == "datetime64" def test_constructor_with_datetime_tz3(self): @@ -1215,7 +1215,7 @@ def test_construction_to_datetimelike_unit(self, arr_dtype, kind, unit): def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg): # GH 17415: With naive string result = Series([arg], dtype="datetime64[ns, CET]") - expected = Series(Timestamp(arg)).dt.tz_localize("CET") + expected = Series(Timestamp(arg).as_unit("ns")).dt.tz_localize("CET") tm.assert_series_equal(result, expected) def test_constructor_datetime64_bigendian(self): @@ -1356,14 +1356,8 @@ def test_constructor_dict_order(self): expected = Series([1, 0, 2], index=list("bac")) tm.assert_series_equal(result, expected) - def test_constructor_dict_extension(self, ea_scalar_and_dtype, request): + def test_constructor_dict_extension(self, ea_scalar_and_dtype): ea_scalar, ea_dtype = ea_scalar_and_dtype - if isinstance(ea_scalar, Timestamp): - mark = pytest.mark.xfail( - reason="Construction from dict goes through " - "maybe_convert_objects which casts to nano" - ) - request.applymarker(mark) d = {"a": ea_scalar} result = Series(d, index=["a"]) expected = Series(ea_scalar, index=["a"], dtype=ea_dtype) @@ -1408,7 +1402,9 @@ def create_data(constructor): result_Timestamp = Series(data_Timestamp) tm.assert_series_equal(result_datetime64, expected) - tm.assert_series_equal(result_datetime, expected) + tm.assert_series_equal( + result_datetime, expected.set_axis(expected.index.as_unit("us")) + ) tm.assert_series_equal(result_Timestamp, expected) def test_constructor_dict_tuple_indexer(self): diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 3e617138c4a6a..1982404d3bd05 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -116,7 +116,9 @@ def test_to_datetime_format_YYYYMMDD_with_nat(self, cache): ser = Series([19801222, 19801222] + [19810105] * 5, dtype="float") # with NaT expected = Series( - [Timestamp("19801222"), Timestamp("19801222")] + [Timestamp("19810105")] * 5 + [Timestamp("19801222"), Timestamp("19801222")] + + [Timestamp("19810105")] * 5, + dtype="M8[s]", ) expected[2] = np.nan ser[2] = np.nan @@ -142,19 +144,32 @@ def test_to_datetime_format_YYYYMM_with_nat(self, cache): # Explicit cast to float to explicit cast when setting np.nan ser = Series([198012, 198012] + [198101] * 5, dtype="float") expected = Series( - [Timestamp("19801201"), Timestamp("19801201")] + [Timestamp("19810101")] * 5 + [Timestamp("19801201"), Timestamp("19801201")] + + [Timestamp("19810101")] * 5, + dtype="M8[s]", ) expected[2] = np.nan ser[2] = np.nan result = to_datetime(ser, format="%Y%m", cache=cache) tm.assert_series_equal(result, expected) + def test_to_datetime_format_YYYYMMDD_oob_for_ns(self, cache): + # coercion + # GH 7930, GH 14487 + ser = Series([20121231, 20141231, 99991231]) + result = to_datetime(ser, format="%Y%m%d", errors="raise", cache=cache) + expected = Series( + np.array(["2012-12-31", "2014-12-31", "9999-12-31"], dtype="M8[s]"), + dtype="M8[s]", + ) + tm.assert_series_equal(result, expected) + def test_to_datetime_format_YYYYMMDD_coercion(self, cache): # coercion # GH 7930 - ser = Series([20121231, 20141231, 99991231]) + ser = Series([20121231, 20141231, 999999999999999999999999999991231]) result = to_datetime(ser, format="%Y%m%d", errors="coerce", cache=cache) - expected = Series(["20121231", "20141231", "NaT"], dtype="M8[ns]") + expected = Series(["20121231", "20141231", "NaT"], dtype="M8[s]") tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -231,6 +246,7 @@ def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected): ) def test_to_datetime_with_NA(self, data, format, expected): # GH#42957 + # expected = expected.as_unit("ns") result = to_datetime(data, format=format) expected = DatetimeIndex(expected) tm.assert_index_equal(result, expected) @@ -426,7 +442,7 @@ def test_to_datetime_format_weeks(self, value, fmt, expected, cache): def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates): # GH 13486 result = to_datetime(dates, format=fmt) - expected = Index(expected_dates) + expected = Index(expected_dates) # .as_unit("ns") tm.assert_equal(result, expected) @pytest.mark.parametrize( @@ -484,7 +500,7 @@ def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self): fmt = "%Y-%m-%d %H:%M:%S %z" result = to_datetime(dates, format=fmt, utc=True) - expected = DatetimeIndex(expected_dates) + expected = DatetimeIndex(expected_dates) # .as_unit("ns") tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -510,7 +526,9 @@ def test_to_datetime_parse_timezone_keeps_name(self): fmt = "%Y-%m-%d %H:%M:%S %z" arg = Index(["2010-01-01 12:00:00 Z"], name="foo") result = to_datetime(arg, format=fmt) - expected = DatetimeIndex(["2010-01-01 12:00:00"], tz="UTC", name="foo") + expected = DatetimeIndex( + ["2010-01-01 12:00:00"], tz="UTC", name="foo" + ) # .as_unit("ns") tm.assert_index_equal(result, expected) @@ -531,14 +549,17 @@ def test_to_datetime_overflow(self): res = to_datetime(arg, errors="coerce") assert res is NaT res = to_datetime([arg], errors="coerce") - tm.assert_index_equal(res, Index([NaT])) + exp = Index([NaT], dtype="M8[s]") + tm.assert_index_equal(res, exp) def test_to_datetime_mixed_datetime_and_string(self): # GH#47018 adapted old doctest with new behavior d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1))) d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1))) res = to_datetime(["2020-01-01 17:00 -0100", d2]) - expected = to_datetime([d1, d2]).tz_convert(timezone(timedelta(minutes=-60))) + expected = to_datetime([d1, d2]).tz_convert( + timezone(timedelta(minutes=-60)) + ) # .as_unit("ns") tm.assert_index_equal(res, expected) def test_to_datetime_mixed_string_and_numeric(self): @@ -562,7 +583,7 @@ def test_to_datetime_mixed_date_and_string(self, format): # https://github.com/pandas-dev/pandas/issues/50108 d1 = date(2020, 1, 2) res = to_datetime(["2020-01-01", d1], format=format) - expected = DatetimeIndex(["2020-01-01", "2020-01-02"], dtype="M8[ns]") + expected = DatetimeIndex(["2020-01-01", "2020-01-02"], dtype="M8[s]") tm.assert_index_equal(res, expected) @pytest.mark.parametrize( @@ -578,7 +599,7 @@ def test_to_datetime_mixed_date_and_string(self, format): ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00-08:00"], DatetimeIndex( ["2000-01-01 09:00:00+00:00", "2000-01-01 10:00:00+00:00"], - dtype="datetime64[ns, UTC]", + dtype="datetime64[us, UTC]", ), id="all tz-aware, with utc", ), @@ -587,7 +608,7 @@ def test_to_datetime_mixed_date_and_string(self, format): ["2000-01-01 01:00:00+00:00", "2000-01-01 02:00:00+00:00"], DatetimeIndex( ["2000-01-01 01:00:00+00:00", "2000-01-01 02:00:00+00:00"], - ), + ).as_unit("us"), id="all tz-aware, without utc", ), pytest.param( @@ -595,7 +616,7 @@ def test_to_datetime_mixed_date_and_string(self, format): ["2000-01-01 01:00:00-08:00", "2000-01-01 02:00:00+00:00"], DatetimeIndex( ["2000-01-01 09:00:00+00:00", "2000-01-01 02:00:00+00:00"], - dtype="datetime64[ns, UTC]", + dtype="datetime64[us, UTC]", ), id="all tz-aware, mixed offsets, with utc", ), @@ -604,7 +625,7 @@ def test_to_datetime_mixed_date_and_string(self, format): ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"], DatetimeIndex( ["2000-01-01 01:00:00+00:00", "2000-01-01 02:00:00+00:00"], - dtype="datetime64[ns, UTC]", + dtype="datetime64[us, UTC]", ), id="tz-aware string, naive pydatetime, with utc", ), @@ -624,6 +645,8 @@ def test_to_datetime_mixed_datetime_and_string_with_format( ts1 = constructor(args[0]) ts2 = args[1] result = to_datetime([ts1, ts2], format=fmt, utc=utc) + if constructor is Timestamp: + expected = expected.as_unit("s") tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -695,7 +718,7 @@ def test_to_datetime_mixed_offsets_with_none_tz_utc_false_removed( "%Y-%m-%d %H:%M:%S%z", DatetimeIndex( ["2000-01-01 08:00:00+00:00", "2000-01-02 00:00:00+00:00", "NaT"], - dtype="datetime64[ns, UTC]", + dtype="datetime64[s, UTC]", ), id="ISO8601, UTC", ), @@ -703,7 +726,7 @@ def test_to_datetime_mixed_offsets_with_none_tz_utc_false_removed( "%Y-%d-%m %H:%M:%S%z", DatetimeIndex( ["2000-01-01 08:00:00+00:00", "2000-02-01 00:00:00+00:00", "NaT"], - dtype="datetime64[ns, UTC]", + dtype="datetime64[s, UTC]", ), id="non-ISO8601, UTC", ), @@ -963,7 +986,7 @@ def test_to_datetime_now(self): # See GH#18666 with tm.set_timezone("US/Eastern"): # GH#18705 - now = Timestamp("now").as_unit("ns") + now = Timestamp("now") pdnow = to_datetime("now") pdnow2 = to_datetime(["now"])[0] @@ -985,12 +1008,12 @@ def test_to_datetime_today(self, tz): # this both of these timezones _and_ UTC will all be in the same day, # so this test will not detect the regression introduced in #18666. with tm.set_timezone(tz): - nptoday = np.datetime64("today").astype("datetime64[ns]").astype(np.int64) + nptoday = np.datetime64("today").astype("datetime64[us]").astype(np.int64) pdtoday = to_datetime("today") pdtoday2 = to_datetime(["today"])[0] - tstoday = Timestamp("today").as_unit("ns") - tstoday2 = Timestamp.today().as_unit("ns") + tstoday = Timestamp("today") + tstoday2 = Timestamp.today() # These should all be equal with infinite perf; this gives # a generous margin of 10 seconds @@ -1026,7 +1049,7 @@ def test_to_datetime_now_with_format(self, format, expected_ds, string, attribut # https://github.com/pandas-dev/pandas/issues/50359 result = to_datetime(["2020-01-03 00:00:00Z", string], format=format, utc=True) expected = DatetimeIndex( - [expected_ds, getattr(Timestamp, attribute)()], dtype="datetime64[ns, UTC]" + [expected_ds, getattr(Timestamp, attribute)()], dtype="datetime64[s, UTC]" ) assert (expected - result).max().total_seconds() < 1 @@ -1046,7 +1069,7 @@ def test_to_datetime_dt64s(self, cache, dt): def test_to_datetime_dt64s_and_str(self, arg, format): # https://github.com/pandas-dev/pandas/issues/50036 result = to_datetime([arg, np.datetime64("2020-01-01")], format=format) - expected = DatetimeIndex(["2001-01-01", "2020-01-01"]) + expected = DatetimeIndex(["2001-01-01", "2020-01-01"]) # .as_unit("ns") tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -1087,11 +1110,7 @@ def test_to_datetime_array_of_dt64s(self, cache, unit): # Assuming all datetimes are in bounds, to_datetime() returns # an array that is equal to Timestamp() parsing result = to_datetime(dts, cache=cache) - if cache: - # FIXME: behavior should not depend on cache - expected = DatetimeIndex([Timestamp(x).asm8 for x in dts], dtype="M8[s]") - else: - expected = DatetimeIndex([Timestamp(x).asm8 for x in dts], dtype="M8[ns]") + expected = DatetimeIndex([Timestamp(x).asm8 for x in dts], dtype="M8[s]") tm.assert_index_equal(result, expected) @@ -1102,14 +1121,7 @@ def test_to_datetime_array_of_dt64s(self, cache, unit): to_datetime(dts_with_oob, errors="raise") result = to_datetime(dts_with_oob, errors="coerce", cache=cache) - if not cache: - # FIXME: shouldn't depend on cache! - expected = DatetimeIndex( - [Timestamp(dts_with_oob[0]).asm8, Timestamp(dts_with_oob[1]).asm8] * 30 - + [NaT], - ) - else: - expected = DatetimeIndex(np.array(dts_with_oob, dtype="M8[s]")) + expected = DatetimeIndex(np.array(dts_with_oob, dtype="M8[s]")) tm.assert_index_equal(result, expected) def test_to_datetime_tz(self, cache): @@ -1122,7 +1134,7 @@ def test_to_datetime_tz(self, cache): result = to_datetime(arr, cache=cache) expected = DatetimeIndex( ["2013-01-01 13:00:00", "2013-01-02 14:00:00"], tz="US/Pacific" - ) + ).as_unit("s") tm.assert_index_equal(result, expected) def test_to_datetime_tz_mixed(self, cache): @@ -1141,7 +1153,7 @@ def test_to_datetime_tz_mixed(self, cache): result = to_datetime(arr, cache=cache, errors="coerce") expected = DatetimeIndex( - ["2013-01-01 13:00:00-08:00", "NaT"], dtype="datetime64[ns, US/Pacific]" + ["2013-01-01 13:00:00-08:00", "NaT"], dtype="datetime64[s, US/Pacific]" ) tm.assert_index_equal(result, expected) @@ -1173,7 +1185,7 @@ def test_to_datetime_tz_pytz(self, cache): result = to_datetime(arr, utc=True, cache=cache) expected = DatetimeIndex( ["2000-01-01 08:00:00+00:00", "2000-06-01 07:00:00+00:00"], - dtype="datetime64[ns, UTC]", + dtype="datetime64[us, UTC]", freq=None, ) tm.assert_index_equal(result, expected) @@ -1260,7 +1272,7 @@ def test_to_datetime_tz_psycopg2(self, request, cache): result = to_datetime(arr, errors="coerce", utc=True, cache=cache) expected = DatetimeIndex( ["2000-01-01 08:00:00+00:00", "2000-06-01 07:00:00+00:00"], - dtype="datetime64[ns, UTC]", + dtype="datetime64[us, UTC]", freq=None, ) tm.assert_index_equal(result, expected) @@ -1269,15 +1281,15 @@ def test_to_datetime_tz_psycopg2(self, request, cache): i = DatetimeIndex( ["2000-01-01 08:00:00"], tz=psycopg2_tz.FixedOffsetTimezone(offset=-300, name=None), - ) - assert is_datetime64_ns_dtype(i) + ).as_unit("us") + assert not is_datetime64_ns_dtype(i) # tz coercion result = to_datetime(i, errors="coerce", cache=cache) tm.assert_index_equal(result, i) result = to_datetime(i, errors="coerce", utc=True, cache=cache) - expected = DatetimeIndex(["2000-01-01 13:00:00"], dtype="datetime64[ns, UTC]") + expected = DatetimeIndex(["2000-01-01 13:00:00"], dtype="datetime64[us, UTC]") tm.assert_index_equal(result, expected) @pytest.mark.parametrize("arg", [True, False]) @@ -1347,16 +1359,20 @@ def test_datetime_invalid_scalar(self, value, format): def test_datetime_outofbounds_scalar(self, value, format): # GH24763 res = to_datetime(value, errors="coerce", format=format) - assert res is NaT + if format is None: + assert isinstance(res, Timestamp) + assert res == Timestamp(value) + else: + assert res is NaT if format is not None: msg = r'^time data ".*" doesn\'t match format ".*", at position 0.' with pytest.raises(ValueError, match=msg): to_datetime(value, errors="raise", format=format) else: - msg = "^Out of bounds .*, at position 0$" - with pytest.raises(OutOfBoundsDatetime, match=msg): - to_datetime(value, errors="raise", format=format) + res = to_datetime(value, errors="raise", format=format) + assert isinstance(res, Timestamp) + assert res == Timestamp(value) @pytest.mark.parametrize( ("values"), [(["a"]), (["00:01:99"]), (["a", "b", "99:00:00"])] @@ -1429,15 +1445,17 @@ def test_to_datetime_cache_scalar(self): assert result == expected @pytest.mark.parametrize( - "datetimelikes,expected_values", + "datetimelikes,expected_values,exp_unit", ( ( (None, np.nan) + (NaT,) * start_caching_at, (NaT,) * (start_caching_at + 2), + "s", ), ( (None, Timestamp("2012-07-26")) + (NaT,) * start_caching_at, (NaT, Timestamp("2012-07-26")) + (NaT,) * start_caching_at, + "s", ), ( (None,) @@ -1445,11 +1463,12 @@ def test_to_datetime_cache_scalar(self): + ("2012 July 26", Timestamp("2012-07-26")), (NaT,) * (start_caching_at + 1) + (Timestamp("2012-07-26"), Timestamp("2012-07-26")), + "s", ), ), ) def test_convert_object_to_datetime_with_cache( - self, datetimelikes, expected_values + self, datetimelikes, expected_values, exp_unit ): # GH#39882 ser = Series( @@ -1459,7 +1478,7 @@ def test_convert_object_to_datetime_with_cache( result_series = to_datetime(ser, errors="coerce") expected_series = Series( expected_values, - dtype="datetime64[ns]", + dtype=f"datetime64[{exp_unit}]", ) tm.assert_series_equal(result_series, expected_series) @@ -1480,7 +1499,7 @@ def test_convert_object_to_datetime_with_cache( ) def test_to_datetime_converts_null_like_to_nat(self, cache, input): # GH35888 - expected = Series([NaT] * len(input), dtype="M8[ns]") + expected = Series([NaT] * len(input), dtype="M8[s]") result = to_datetime(input, cache=cache) tm.assert_series_equal(result, expected) @@ -1531,7 +1550,17 @@ def test_to_datetime_coerce_oob(self, string_arg, format, outofbounds): # https://github.com/pandas-dev/pandas/issues/50255 ts_strings = [string_arg, outofbounds] result = to_datetime(ts_strings, errors="coerce", format=format) - expected = DatetimeIndex([datetime(2018, 3, 1), NaT]) + if isinstance(outofbounds, str) and ( + format.startswith("%B") ^ outofbounds.startswith("J") + ): + # the strings don't match the given format, so they raise and we coerce + expected = DatetimeIndex([datetime(2018, 3, 1), NaT], dtype="M8[s]") + elif isinstance(outofbounds, datetime): + expected = DatetimeIndex( + [datetime(2018, 3, 1), outofbounds], dtype="M8[us]" + ) + else: + expected = DatetimeIndex([datetime(2018, 3, 1), outofbounds], dtype="M8[s]") tm.assert_index_equal(result, expected) def test_to_datetime_malformed_no_raise(self): @@ -1542,7 +1571,9 @@ def test_to_datetime_malformed_no_raise(self): UserWarning, match="Could not infer format", raise_on_extra_warnings=False ): result = to_datetime(ts_strings, errors="coerce") - tm.assert_index_equal(result, Index([NaT, NaT])) + # TODO: should Index get "s" by default here? + exp = Index([NaT, NaT], dtype="M8[s]") + tm.assert_index_equal(result, exp) def test_to_datetime_malformed_raise(self): # GH 48633 @@ -1571,11 +1602,11 @@ def test_iso_8601_strings_with_same_offset(self): expected = Timestamp(ts_str) assert result == expected - expected = DatetimeIndex([Timestamp(ts_str)] * 2) + expected = DatetimeIndex([Timestamp(ts_str)] * 2) # .as_unit("ns") result = to_datetime([ts_str] * 2) tm.assert_index_equal(result, expected) - result = DatetimeIndex([ts_str] * 2) + result = DatetimeIndex([ts_str] * 2) # .as_unit("ns") tm.assert_index_equal(result, expected) def test_iso_8601_strings_with_different_offsets_removed(self): @@ -1590,7 +1621,7 @@ def test_iso_8601_strings_with_different_offsets_utc(self): result = to_datetime(ts_strings, utc=True) expected = DatetimeIndex( [Timestamp(2015, 11, 18, 10), Timestamp(2015, 11, 18, 10), NaT], tz="UTC" - ) + ).as_unit("s") tm.assert_index_equal(result, expected) def test_mixed_offsets_with_native_datetime_utc_false_raises(self): @@ -1616,7 +1647,7 @@ def test_non_iso_strings_with_tz_offset(self): result = to_datetime(["March 1, 2018 12:00:00+0400"] * 2) expected = DatetimeIndex( [datetime(2018, 3, 1, 12, tzinfo=timezone(timedelta(minutes=240)))] * 2 - ) + ).as_unit("s") tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -1637,9 +1668,13 @@ def test_timestamp_utc_true(self, ts, expected): @pytest.mark.parametrize("dt_str", ["00010101", "13000101", "30000101", "99990101"]) def test_to_datetime_with_format_out_of_bounds(self, dt_str): # GH 9107 - msg = "Out of bounds nanosecond timestamp" - with pytest.raises(OutOfBoundsDatetime, match=msg): - to_datetime(dt_str, format="%Y%m%d") + # msg = "Out of bounds nanosecond timestamp" + # with pytest.raises(OutOfBoundsDatetime, match=msg): + res = to_datetime(dt_str, format="%Y%m%d") + dtobj = datetime.strptime(dt_str, "%Y%m%d") + expected = Timestamp(dtobj).as_unit("s") + assert res == expected + assert res.unit == expected.unit def test_to_datetime_utc(self): arr = np.array([parse("2012-06-13T01:39:00Z")], dtype=object) @@ -2137,7 +2172,7 @@ def test_dataframe_utc_true(self): df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) result = to_datetime(df, utc=True) expected = Series( - np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[ns]") + np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[s]") ).dt.tz_localize("UTC") tm.assert_series_equal(result, expected) @@ -2349,7 +2384,9 @@ def test_to_datetime_with_space_in_series(self, cache): with pytest.raises(ValueError, match=msg): to_datetime(ser, errors="raise", cache=cache) result_coerce = to_datetime(ser, errors="coerce", cache=cache) - expected_coerce = Series([datetime(2006, 10, 18), datetime(2008, 10, 18), NaT]) + expected_coerce = Series( + [datetime(2006, 10, 18), datetime(2008, 10, 18), NaT] + ).dt.as_unit("s") tm.assert_series_equal(result_coerce, expected_coerce) @td.skip_if_not_us_locale @@ -2461,7 +2498,7 @@ def test_string_na_nat_conversion(self, cache): strings = np.array(["1/1/2000", "1/2/2000", np.nan, "1/4/2000"], dtype=object) - expected = np.empty(4, dtype="M8[ns]") + expected = np.empty(4, dtype="M8[s]") for i, val in enumerate(strings): if isna(val): expected[i] = iNaT @@ -2506,7 +2543,7 @@ def test_string_na_nat_conversion_with_name(self, cache): result = to_datetime(series, cache=cache) dresult = to_datetime(dseries, cache=cache) - expected = Series(np.empty(5, dtype="M8[ns]"), index=idx) + expected = Series(np.empty(5, dtype="M8[s]"), index=idx) for i in range(5): x = series.iloc[i] if isna(x): @@ -2546,7 +2583,7 @@ def test_dayfirst(self, cache): arr = ["10/02/2014", "11/02/2014", "12/02/2014"] expected = DatetimeIndex( [datetime(2014, 2, 10), datetime(2014, 2, 11), datetime(2014, 2, 12)] - ) + ).as_unit("s") idx1 = DatetimeIndex(arr, dayfirst=True) idx2 = DatetimeIndex(np.array(arr), dayfirst=True) idx3 = to_datetime(arr, dayfirst=True, cache=cache) @@ -2570,7 +2607,7 @@ def test_dayfirst_warnings_valid_input(self): # CASE 1: valid input arr = ["31/12/2014", "10/03/2011"] expected = DatetimeIndex( - ["2014-12-31", "2011-03-10"], dtype="datetime64[ns]", freq=None + ["2014-12-31", "2011-03-10"], dtype="datetime64[s]", freq=None ) # A. dayfirst arg correct, no warning @@ -2675,7 +2712,7 @@ def test_to_datetime_consistent_format(self, cache): ser = Series(np.array(data)) result = to_datetime(ser, cache=cache) expected = Series( - ["2011-01-01", "2011-02-01", "2011-03-01"], dtype="datetime64[ns]" + ["2011-01-01", "2011-02-01", "2011-03-01"], dtype="datetime64[s]" ) tm.assert_series_equal(result, expected) @@ -2687,9 +2724,7 @@ def test_to_datetime_series_with_nans(self, cache): ) ) result = to_datetime(ser, cache=cache) - expected = Series( - ["2011-01-01", NaT, "2011-01-03", NaT], dtype="datetime64[ns]" - ) + expected = Series(["2011-01-01", NaT, "2011-01-03", NaT], dtype="datetime64[s]") tm.assert_series_equal(result, expected) def test_to_datetime_series_start_with_nans(self, cache): @@ -2708,7 +2743,7 @@ def test_to_datetime_series_start_with_nans(self, cache): result = to_datetime(ser, cache=cache) expected = Series( - [NaT, NaT, "2011-01-01", "2011-01-02", "2011-01-03"], dtype="datetime64[ns]" + [NaT, NaT, "2011-01-01", "2011-01-02", "2011-01-03"], dtype="datetime64[s]" ) tm.assert_series_equal(result, expected) @@ -2722,6 +2757,7 @@ def test_infer_datetime_format_tz_name(self, tz_name, offset): result = to_datetime(ser) tz = timezone(timedelta(minutes=offset)) expected = Series([Timestamp("2019-02-02 08:07:13").tz_localize(tz)]) + expected = expected.dt.as_unit("s") tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -2878,9 +2914,16 @@ def test_parsers(self, date_str, expected, cache): # https://github.com/dateutil/dateutil/issues/217 yearfirst = True - result1, _ = parsing.parse_datetime_string_with_reso( + result1, reso_attrname = parsing.parse_datetime_string_with_reso( date_str, yearfirst=yearfirst ) + + reso = { + "nanosecond": "ns", + "microsecond": "us", + "millisecond": "ms", + "second": "s", + }.get(reso_attrname, "s") result2 = to_datetime(date_str, yearfirst=yearfirst) result3 = to_datetime([date_str], yearfirst=yearfirst) # result5 is used below @@ -2894,8 +2937,16 @@ def test_parsers(self, date_str, expected, cache): for res in [result1, result2]: assert res == expected - for res in [result3, result4, result6, result8, result9]: + for res in [result3, result4]: exp = DatetimeIndex([Timestamp(expected)]) + exp = exp.as_unit(reso) + tm.assert_index_equal(res, exp) + + for res in [result6, result8, result9]: + # These cases go through array_to_datetime, not array_to_strptime. + # This means that as of GH#??? they do resolution inference + # while the other cases do not. + exp = DatetimeIndex([Timestamp(expected)]).as_unit(reso) tm.assert_index_equal(res, exp) # these really need to have yearfirst, but we don't support @@ -2909,7 +2960,7 @@ def test_na_values_with_cache( self, cache, unique_nulls_fixture, unique_nulls_fixture2 ): # GH22305 - expected = Index([NaT, NaT], dtype="datetime64[ns]") + expected = Index([NaT, NaT], dtype="datetime64[s]") result = to_datetime([unique_nulls_fixture, unique_nulls_fixture2], cache=cache) tm.assert_index_equal(result, expected) @@ -3185,9 +3236,16 @@ def test_incorrect_value_exception(self): ) def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning): # see gh-23830 - msg = r"^Out of bounds nanosecond timestamp: 2417-10-10 00:00:00, at position 0" - with pytest.raises(OutOfBoundsDatetime, match=msg): - to_datetime("2417-10-10 00:00:00", format=format) + if format is None: + res = to_datetime("2417-10-10 00:00:00.00", format=format) + assert isinstance(res, Timestamp) + assert res.year == 2417 + assert res.month == 10 + assert res.day == 10 + else: + msg = "unconverted data remains when parsing with format.*, at position 0" + with pytest.raises(ValueError, match=msg): + to_datetime("2417-10-10 00:00:00.00", format=format) @pytest.mark.parametrize( "arg, origin, expected_str", @@ -3319,7 +3377,7 @@ def test_empty_string_datetime(errors, args, format): # coerce empty string to pd.NaT result = to_datetime(td, format=format, errors=errors) - expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[ns]") + expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[s]") tm.assert_series_equal(expected, result) @@ -3359,14 +3417,12 @@ def test_to_datetime_cache_coerce_50_lines_outofbounds(series_length): ) result1 = to_datetime(ser, errors="coerce", utc=True) - expected1 = Series( - [NaT] + ([Timestamp("1991-10-20 00:00:00+00:00")] * series_length) - ) - + expected1 = Series([Timestamp(x) for x in ser]) + assert expected1.dtype == "M8[us, UTC]" tm.assert_series_equal(result1, expected1) - with pytest.raises(OutOfBoundsDatetime, match="Out of bounds nanosecond timestamp"): - to_datetime(ser, errors="raise", utc=True) + result3 = to_datetime(ser, errors="raise", utc=True) + tm.assert_series_equal(result3, expected1) def test_to_datetime_format_f_parse_nanos(): @@ -3390,7 +3446,9 @@ def test_to_datetime_format_f_parse_nanos(): def test_to_datetime_mixed_iso8601(): # https://github.com/pandas-dev/pandas/issues/50411 result = to_datetime(["2020-01-01", "2020-01-01 05:00:00"], format="ISO8601") - expected = DatetimeIndex(["2020-01-01 00:00:00", "2020-01-01 05:00:00"]) + expected = DatetimeIndex( + ["2020-01-01 00:00:00", "2020-01-01 05:00:00"] + ) # .as_unit("ns") tm.assert_index_equal(result, expected) @@ -3451,7 +3509,7 @@ def test_to_datetime_with_empty_str_utc_false_format_mixed(): # GH 50887 vals = ["2020-01-01 00:00+00:00", ""] result = to_datetime(vals, format="mixed") - expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype="M8[ns, UTC]") + expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype="M8[s, UTC]") tm.assert_index_equal(result, expected) # Check that a couple of other similar paths work the same way diff --git a/pandas/tests/tseries/holiday/test_holiday.py b/pandas/tests/tseries/holiday/test_holiday.py index b2eefd04ef93b..56b457743628e 100644 --- a/pandas/tests/tseries/holiday/test_holiday.py +++ b/pandas/tests/tseries/holiday/test_holiday.py @@ -321,7 +321,7 @@ def test_holidays_with_timezone_specified_but_no_occurences(): # GH 54580 # _apply_rule() in holiday.py was silently dropping timezones if you passed it # an empty list of holiday dates that had timezone information - start_date = Timestamp("2018-01-01", tz="America/Chicago") + start_date = Timestamp("2018-01-01", tz="America/Chicago").as_unit("ns") end_date = Timestamp("2018-01-11", tz="America/Chicago") test_case = USFederalHolidayCalendar().holidays( start_date, end_date, return_name=True diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index d9bec358858cf..0d8858aa77e59 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -156,7 +156,7 @@ def test_parsing_valid_dates(data, expected): arr = np.array(data, dtype=object) result, _ = tslib.array_to_datetime(arr) - expected = np.array(expected, dtype="M8[ns]") + expected = np.array(expected, dtype="M8[s]") tm.assert_numpy_array_equal(result, expected) @@ -174,6 +174,8 @@ def test_parsing_timezone_offsets(dt_string, expected_tz): # to the same datetime after the timezone offset is added. arr = np.array(["01-01-2013 00:00:00"], dtype=object) expected, _ = tslib.array_to_datetime(arr) + if "000000000" in dt_string: + expected = expected.astype("M8[ns]") arr = np.array([dt_string], dtype=object) result, result_tz = tslib.array_to_datetime(arr) @@ -183,6 +185,7 @@ def test_parsing_timezone_offsets(dt_string, expected_tz): def test_parsing_non_iso_timezone_offset(): + # FIXME: Timestamp(dt_string).unit should be nanos, is seconds dt_string = "01-01-2013T00:00:00.000000000+0000" arr = np.array([dt_string], dtype=object) @@ -206,38 +209,48 @@ def test_parsing_different_timezone_offsets(): @pytest.mark.parametrize( - "invalid_date", + "invalid_date,exp_unit", [ - date(1000, 1, 1), - datetime(1000, 1, 1), - "1000-01-01", - "Jan 1, 1000", - np.datetime64("1000-01-01"), + (date(1000, 1, 1), "s"), + (datetime(1000, 1, 1), "us"), + ("1000-01-01", "s"), + ("Jan 1, 1000", "s"), + (np.datetime64("1000-01-01"), "s"), ], ) @pytest.mark.parametrize("errors", ["coerce", "raise"]) -def test_coerce_outside_ns_bounds(invalid_date, errors): +def test_coerce_outside_ns_bounds(invalid_date, exp_unit, errors): arr = np.array([invalid_date], dtype="object") - kwargs = {"values": arr, "errors": errors} - if errors == "raise": - msg = "^Out of bounds nanosecond timestamp: .*, at position 0$" + result, _ = tslib.array_to_datetime(arr, errors=errors) + out_reso = np.datetime_data(result.dtype)[0] + assert out_reso == exp_unit + ts = Timestamp(invalid_date) + assert ts.unit == exp_unit - with pytest.raises(OutOfBoundsDatetime, match=msg): - tslib.array_to_datetime(**kwargs) - else: # coerce. - result, _ = tslib.array_to_datetime(**kwargs) - expected = np.array([iNaT], dtype="M8[ns]") + expected = np.array([ts._value], dtype=f"M8[{exp_unit}]") + tm.assert_numpy_array_equal(result, expected) - tm.assert_numpy_array_equal(result, expected) + # FIXME: don't leave commented-out + # kwargs = {"values": arr, "errors": errors} + # if errors == "raise": + # msg = "^Out of bounds nanosecond timestamp: .*, at position 0$" + + # with pytest.raises(ValueError, match=msg): + # tslib.array_to_datetime(**kwargs) + # else: # coerce. + # result, _, _ = tslib.array_to_datetime(**kwargs) + # expected = np.array([iNaT], dtype="M8[ns]") + # + # tm.assert_numpy_array_equal(result, expected) def test_coerce_outside_ns_bounds_one_valid(): arr = np.array(["1/1/1000", "1/1/2000"], dtype=object) result, _ = tslib.array_to_datetime(arr, errors="coerce") - expected = [iNaT, "2000-01-01T00:00:00.000000000"] - expected = np.array(expected, dtype="M8[ns]") + expected = ["1000-01-01T00:00:00.000000000", "2000-01-01T00:00:00.000000000"] + expected = np.array(expected, dtype="M8[s]") tm.assert_numpy_array_equal(result, expected) @@ -247,7 +260,13 @@ def test_coerce_of_invalid_datetimes(): # With coercing, the invalid dates becomes iNaT result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT] - tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[ns]")) + tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[s]")) + + # With coercing, the invalid dates becomes iNaT + result, _ = tslib.array_to_datetime(arr, errors="coerce") + expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT] + + tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[s]")) def test_to_datetime_barely_out_of_bounds(): @@ -275,7 +294,7 @@ def test_datetime_subclass(klass): arr = np.array([klass(2000, 1, 1)], dtype=object) result, _ = tslib.array_to_datetime(arr) - expected = np.array(["2000-01-01T00:00:00.000000000"], dtype="M8[ns]") + expected = np.array(["2000-01-01T00:00:00.000000"], dtype="M8[us]") tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index a54e0071aa006..e86459d0776c0 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -266,7 +266,9 @@ def test_categorical_with_nan_consistency(): ) expected = hash_array(c, categorize=False) - c = pd.Categorical.from_codes([-1, 0], categories=[pd.Timestamp("2012-01-01")]) + c = pd.Categorical.from_codes( + [-1, 0], categories=[pd.Timestamp("2012-01-01").as_unit("ns")] + ) result = hash_array(c, categorize=False) assert result[0] in expected diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 120470b09a92b..7a4c975771265 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -579,6 +579,7 @@ def test_groupby_rolling_string_index(self): ], columns=["index", "group", "eventTime"], ).set_index("index") + df["eventTime"] = df["eventTime"].astype("M8[ns]") groups = df.groupby("group") df["count_to_date"] = groups.cumcount() @@ -594,6 +595,7 @@ def test_groupby_rolling_string_index(self): ], columns=["index", "group", "eventTime", "count_to_date"], ).set_index(["group", "index"]) + expected["eventTime"] = expected["eventTime"].astype("M8[ns]") tm.assert_frame_equal(result, expected) def test_groupby_rolling_no_sort(self): @@ -871,10 +873,10 @@ def test_groupby_level(self): ["id", "index"], { "date": [ - Timestamp("2018-01-01"), - Timestamp("2018-01-02"), - Timestamp("2018-01-01"), - Timestamp("2018-01-02"), + Timestamp("2018-01-01").as_unit("ns"), + Timestamp("2018-01-02").as_unit("ns"), + Timestamp("2018-01-01").as_unit("ns"), + Timestamp("2018-01-02").as_unit("ns"), ], "num": [100.0, 200.0, 150.0, 250.0], }, diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 47bfc219d0fe9..9684097795cc1 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -257,11 +257,13 @@ def test_datetimelike_centered_offset_covers_all( ): # GH 42753 - index = [ - Timestamp("20130101 09:00:01"), - Timestamp("20130101 09:00:02"), - Timestamp("20130101 09:00:02"), - ] + index = DatetimeIndex( + [ + Timestamp("20130101 09:00:01"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:02"), + ] + ).as_unit("ns") df = frame_or_series([1, 1, 1], index=index) result = df.rolling(window, closed=closed, center=True).sum() @@ -292,7 +294,7 @@ def test_datetimelike_nonunique_index_centering( "2020-01-04", "2020-01-04", ] - ) + ).as_unit("ns") df = frame_or_series([1] * 8, index=index, dtype=float) expected = frame_or_series(expected, index=index, dtype=float) @@ -326,7 +328,7 @@ def test_variable_window_nonunique(closed, expected, frame_or_series): "2011-01-05", "2011-01-06", ] - ) + ).as_unit("ns") df = frame_or_series(range(10), index=index, dtype=float) expected = frame_or_series(expected, index=index, dtype=float) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 820b0134cc577..4dd02e9f3e828 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -28,13 +28,15 @@ def regular(): @pytest.fixture def ragged(): df = DataFrame({"B": range(5)}) - df.index = [ - Timestamp("20130101 09:00:00"), - Timestamp("20130101 09:00:02"), - Timestamp("20130101 09:00:03"), - Timestamp("20130101 09:00:05"), - Timestamp("20130101 09:00:06"), - ] + df.index = Index( + [ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ] + ).as_unit("ns") return df From 3b5251f34b38538260e70ec6b7d2b912e9e208a8 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 08:14:39 -0800 Subject: [PATCH 019/105] revert commented-out --- pandas/tests/indexes/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 75193214cc772..4c703c3af944b 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -144,7 +144,7 @@ def test_constructor_from_series_freq(self): # GH 6273 # create from a series, passing a freq dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"] - expected = DatetimeIndex(dts, freq="MS") # .as_unit("ns") + expected = DatetimeIndex(dts, freq="MS") s = Series(pd.to_datetime(dts)) result = DatetimeIndex(s, freq="MS") From aea3846cff88df5c3ae47d08e4e955f4aa09ca4f Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 08:14:53 -0800 Subject: [PATCH 020/105] revert commented-out --- pandas/tests/indexing/multiindex/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 87f024321f273..de7d644698f2c 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -624,7 +624,7 @@ def test_loc_period_string_indexing(): def test_loc_datetime_mask_slicing(): # GH 16699 - dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"]) # .as_unit("s") + dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"]) m_idx = MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"]) df = DataFrame( data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"] From cec8317e4cb29c153f575f53fe42e61a96ae2da4 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 08:17:20 -0800 Subject: [PATCH 021/105] revert commented-out --- pandas/tests/io/parser/usecols/test_parse_dates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py index f444fe9c1b6ee..bc66189ca064e 100644 --- a/pandas/tests/io/parser/usecols/test_parse_dates.py +++ b/pandas/tests/io/parser/usecols/test_parse_dates.py @@ -80,7 +80,7 @@ def test_usecols_with_parse_dates2(all_parsers): Timestamp("2008-02-07 10:00"), ], name="date", - ) # .as_unit("ns") + ) cols = {"values": [1032.43, 1042.54, 1051.65]} expected = DataFrame(cols, index=index) @@ -105,7 +105,7 @@ def test_usecols_with_parse_dates3(all_parsers): parse_dates = [0] cols = { - "a": Timestamp("2016-09-21"), # .as_unit("ns"), + "a": Timestamp("2016-09-21").as_unit("ns"), "b": [1], "c": [1], "d": [2], From 700e54bfbc0d2e605e6612a4d7a5d1af9c4a714a Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 08:22:20 -0800 Subject: [PATCH 022/105] remove commented-out --- pandas/tests/io/test_stata.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 94ef11fb59f7b..0eea5476af36f 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -203,9 +203,9 @@ def test_read_dta2(self, datapath): # buggy test because of the NaT comparison on certain platforms # Format 113 test fails since it does not support tc and tC formats # tm.assert_frame_equal(parsed_113, expected) - tm.assert_frame_equal(parsed_114, expected) # , check_datetimelike_compat=True) - tm.assert_frame_equal(parsed_115, expected) # , check_datetimelike_compat=True) - tm.assert_frame_equal(parsed_117, expected) # , check_datetimelike_compat=True) + tm.assert_frame_equal(parsed_114, expected) + tm.assert_frame_equal(parsed_115, expected) + tm.assert_frame_equal(parsed_117, expected) @pytest.mark.parametrize( "file", ["stata3_113", "stata3_114", "stata3_115", "stata3_117"] @@ -905,8 +905,8 @@ def test_big_dates(self, datapath): parsed_115 = read_stata(datapath("io", "data", "stata", "stata9_115.dta")) parsed_117 = read_stata(datapath("io", "data", "stata", "stata9_117.dta")) - tm.assert_frame_equal(expected, parsed_115) # , check_datetimelike_compat=True) - tm.assert_frame_equal(expected, parsed_117) # , check_datetimelike_compat=True) + tm.assert_frame_equal(expected, parsed_115) + tm.assert_frame_equal(expected, parsed_117) date_conversion = {c: c[-2:] for c in columns} # {c : c[-2:] for c in columns} @@ -918,7 +918,6 @@ def test_big_dates(self, datapath): tm.assert_frame_equal( written_and_read_again.set_index("index"), expected.set_index(expected.index.astype(np.int32)), - # check_datetimelike_compat=True, ) def test_dtype_conversion(self, datapath): @@ -1207,7 +1206,7 @@ def test_read_chunks_117( tm.assert_frame_equal( from_frame, chunk, - check_dtype=False, # , check_datetimelike_compat=True + check_dtype=False, ) pos += chunksize @@ -1301,7 +1300,7 @@ def test_read_chunks_115( tm.assert_frame_equal( from_frame, chunk, - check_dtype=False, # , check_datetimelike_compat=True + check_dtype=False, ) pos += chunksize From a1253b074aff6ac460e933086759da84cfb0da27 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 08:36:04 -0800 Subject: [PATCH 023/105] remove comment --- pandas/tests/io/test_stata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 0eea5476af36f..a31abfde61323 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -193,7 +193,6 @@ def test_read_dta2(self, datapath): parsed_115 = self.read_dta(path2) with tm.assert_produces_warning(UserWarning): parsed_117 = self.read_dta(path3) - # FIXME: don't leave commented-out # 113 is buggy due to limits of date format support in Stata # parsed_113 = self.read_dta( # datapath("io", "data", "stata", "stata2_113.dta") From 38faad64145d12f8f10319c3d031e5d72241e6e2 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 09:10:03 -0800 Subject: [PATCH 024/105] revert unnecessary --- pandas/tests/window/test_timeseries_window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 4dd02e9f3e828..07f63212d9d02 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -36,7 +36,7 @@ def ragged(): Timestamp("20130101 09:00:05"), Timestamp("20130101 09:00:06"), ] - ).as_unit("ns") + ) return df From 543172e69e6d6e48b6c66638f2bcfaee3531c060 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 09:18:06 -0800 Subject: [PATCH 025/105] revert unnecessary --- pandas/tests/window/test_rolling.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 9684097795cc1..356356e221dc5 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -263,7 +263,7 @@ def test_datetimelike_centered_offset_covers_all( Timestamp("20130101 09:00:02"), Timestamp("20130101 09:00:02"), ] - ).as_unit("ns") + ) df = frame_or_series([1, 1, 1], index=index) result = df.rolling(window, closed=closed, center=True).sum() @@ -294,7 +294,7 @@ def test_datetimelike_nonunique_index_centering( "2020-01-04", "2020-01-04", ] - ).as_unit("ns") + ) df = frame_or_series([1] * 8, index=index, dtype=float) expected = frame_or_series(expected, index=index, dtype=float) @@ -328,7 +328,7 @@ def test_variable_window_nonunique(closed, expected, frame_or_series): "2011-01-05", "2011-01-06", ] - ).as_unit("ns") + ) df = frame_or_series(range(10), index=index, dtype=float) expected = frame_or_series(expected, index=index, dtype=float) From 176717aa291f57af425c5e52cffd6fc7399c4094 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 14:24:38 -0800 Subject: [PATCH 026/105] fix window tests --- pandas/tests/window/test_timeseries_window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 07f63212d9d02..b7f1027f52666 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -28,7 +28,7 @@ def regular(): @pytest.fixture def ragged(): df = DataFrame({"B": range(5)}) - df.index = Index( + df.index = DatetimeIndex( [ Timestamp("20130101 09:00:00"), Timestamp("20130101 09:00:02"), From 49329427c65c69474c0f815f0c9a8ccd9a53d59b Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 14:29:46 -0800 Subject: [PATCH 027/105] Fix resample tests --- pandas/tests/resample/test_datetime_index.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 2ff1c7642ea6a..ae486578da0b1 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1375,7 +1375,6 @@ def test_resample_timegrouper(dates, unit): @pytest.mark.parametrize("dates", [dates1, dates2, dates3]) def test_resample_timegrouper2(dates, unit): dates = DatetimeIndex(dates).as_unit(unit) - df = DataFrame({"A": dates, "B": np.arange(len(dates)), "C": np.arange(len(dates))}) result = df.set_index("A").resample("ME").count() From 085a192f334e08acf0fa65c690d3b7cdef910f12 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 14:49:00 -0800 Subject: [PATCH 028/105] restore comment --- pandas/tests/io/test_stata.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index a31abfde61323..0eea5476af36f 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -193,6 +193,7 @@ def test_read_dta2(self, datapath): parsed_115 = self.read_dta(path2) with tm.assert_produces_warning(UserWarning): parsed_117 = self.read_dta(path3) + # FIXME: don't leave commented-out # 113 is buggy due to limits of date format support in Stata # parsed_113 = self.read_dta( # datapath("io", "data", "stata", "stata2_113.dta") From b8df28170ee0fa5150dd75e38e26fc56e5742d6b Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 15:24:56 -0800 Subject: [PATCH 029/105] revert unnecessary --- pandas/tests/window/test_rolling.py | 12 +++++------- pandas/tests/window/test_timeseries_window.py | 16 +++++++--------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 356356e221dc5..47bfc219d0fe9 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -257,13 +257,11 @@ def test_datetimelike_centered_offset_covers_all( ): # GH 42753 - index = DatetimeIndex( - [ - Timestamp("20130101 09:00:01"), - Timestamp("20130101 09:00:02"), - Timestamp("20130101 09:00:02"), - ] - ) + index = [ + Timestamp("20130101 09:00:01"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:02"), + ] df = frame_or_series([1, 1, 1], index=index) result = df.rolling(window, closed=closed, center=True).sum() diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index b7f1027f52666..820b0134cc577 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -28,15 +28,13 @@ def regular(): @pytest.fixture def ragged(): df = DataFrame({"B": range(5)}) - df.index = DatetimeIndex( - [ - Timestamp("20130101 09:00:00"), - Timestamp("20130101 09:00:02"), - Timestamp("20130101 09:00:03"), - Timestamp("20130101 09:00:05"), - Timestamp("20130101 09:00:06"), - ] - ) + df.index = [ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ] return df From 1cd91b434ca2e97674b6cae30153645c899f49e5 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 15:37:20 -0800 Subject: [PATCH 030/105] remove no-longer necessary --- pandas/tests/tseries/holiday/test_holiday.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tseries/holiday/test_holiday.py b/pandas/tests/tseries/holiday/test_holiday.py index 56b457743628e..b2eefd04ef93b 100644 --- a/pandas/tests/tseries/holiday/test_holiday.py +++ b/pandas/tests/tseries/holiday/test_holiday.py @@ -321,7 +321,7 @@ def test_holidays_with_timezone_specified_but_no_occurences(): # GH 54580 # _apply_rule() in holiday.py was silently dropping timezones if you passed it # an empty list of holiday dates that had timezone information - start_date = Timestamp("2018-01-01", tz="America/Chicago").as_unit("ns") + start_date = Timestamp("2018-01-01", tz="America/Chicago") end_date = Timestamp("2018-01-11", tz="America/Chicago") test_case = USFederalHolidayCalendar().holidays( start_date, end_date, return_name=True From 99cd5818794589cbbb1b69c32b0ba259e28d732c Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 15:43:13 -0800 Subject: [PATCH 031/105] revert no-longer-necessary --- pandas/tests/series/methods/test_value_counts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py index e5cef04afce23..7f882fa348b7e 100644 --- a/pandas/tests/series/methods/test_value_counts.py +++ b/pandas/tests/series/methods/test_value_counts.py @@ -52,7 +52,7 @@ def test_value_counts_datetime_tz(self, unit): exp_idx = pd.DatetimeIndex( ["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"], - dtype="M8[ns, US/Eastern]", + tz="US/Eastern", name="xxx", ).as_unit(unit) exp = Series([3, 2, 1], index=exp_idx, name="count") From 237076f977cf0cf6d9c8603cc91596a0f7888e89 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 14 Nov 2023 15:43:42 -0800 Subject: [PATCH 032/105] revert no-longer-necessary --- pandas/tests/series/methods/test_map.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index a87907e28fbdb..f4f72854e50d3 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -450,8 +450,8 @@ def test_map_box_dt64(unit): def test_map_box_dt64tz(unit): vals = [ - pd.Timestamp("2011-01-01", tz="US/Eastern").as_unit("ns"), - pd.Timestamp("2011-01-02", tz="US/Eastern").as_unit("ns"), + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), ] ser = Series(vals).dt.as_unit(unit) assert ser.dtype == f"datetime64[{unit}, US/Eastern]" From 5eb5e4e8d72361ab25ca921c3a2bf1b5c6893f51 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 15 Nov 2023 07:44:23 -0800 Subject: [PATCH 033/105] update tests --- pandas/tests/io/parser/usecols/test_parse_dates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py index bc66189ca064e..5cb9efc5bbcbe 100644 --- a/pandas/tests/io/parser/usecols/test_parse_dates.py +++ b/pandas/tests/io/parser/usecols/test_parse_dates.py @@ -105,7 +105,7 @@ def test_usecols_with_parse_dates3(all_parsers): parse_dates = [0] cols = { - "a": Timestamp("2016-09-21").as_unit("ns"), + "a": Timestamp("2016-09-21"), "b": [1], "c": [1], "d": [2], From 0f726bec07c0e4a89a142541e12c02a38cd818b8 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 15 Nov 2023 07:46:55 -0800 Subject: [PATCH 034/105] revert no-longer-necessary --- pandas/tests/frame/methods/test_asfreq.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index ef176ff0136e7..67bd616b6d939 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -203,8 +203,7 @@ def test_asfreq_with_unsorted_index(self, frame_or_series): result = frame_or_series(range(4), index=index) expected = result.reindex(sorted(index)) - # TODO: better for asfreq to return "s" and avoid as_unit here? - expected.index = expected.index._with_freq("infer").as_unit("ns") + expected.index = expected.index._with_freq("infer") result = result.asfreq("D") tm.assert_equal(result, expected) From 15bfc7f99ce03cec891effda6d5a1fe9179c6452 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 15 Nov 2023 08:22:32 -0800 Subject: [PATCH 035/105] update tests --- pandas/tests/tseries/holiday/test_calendar.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/tseries/holiday/test_calendar.py b/pandas/tests/tseries/holiday/test_calendar.py index 99829857e6836..90e2e117852a2 100644 --- a/pandas/tests/tseries/holiday/test_calendar.py +++ b/pandas/tests/tseries/holiday/test_calendar.py @@ -57,10 +57,10 @@ def __init__(self, name=None, rules=None) -> None: jan2 = TestCalendar(rules=[Holiday("jan2", year=2015, month=1, day=2)]) # Getting holidays for Jan 1 should not alter results for Jan 2. - expected = DatetimeIndex(["01-Jan-2015"]).as_unit("ns") + expected = DatetimeIndex(["01-Jan-2015"]).as_unit("us") tm.assert_index_equal(jan1.holidays(), expected) - expected2 = DatetimeIndex(["02-Jan-2015"]).as_unit("ns") + expected2 = DatetimeIndex(["02-Jan-2015"]).as_unit("us") tm.assert_index_equal(jan2.holidays(), expected2) From 01d31740dc5fe54ff41ae6c4ebcb72c4d4110efc Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 15 Nov 2023 15:46:51 -0800 Subject: [PATCH 036/105] revert bits --- pandas/_libs/tslibs/strptime.pyx | 6 +- pandas/tests/tslibs/test_array_to_datetime.py | 61 ------------------- pandas/tests/util/test_hashing.py | 18 +++--- 3 files changed, 11 insertions(+), 74 deletions(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index fdef43e665327..ab0604d4d1f3e 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -62,7 +62,6 @@ from pandas._libs.tslibs.nattype cimport ( ) from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, - NPY_FR_ns, get_datetime64_unit, import_pandas_datetime, npy_datetimestruct, @@ -253,6 +252,7 @@ cdef class DatetimeParseState: # datetime. self.found_naive_str = False self.found_other = False + self.creso = creso self.creso_ever_changed = False @@ -309,7 +309,7 @@ def array_strptime( fmt : string-like regex exact : matches must be exact if True, search if False errors : string specifying error handling, {'raise', 'coerce'} - creso : NPY_DATETIMEUNIT, default NPY_FR_ns + creso : NPY_DATETIMEUNIT, default NPY_FR_GENERIC Set to NPY_FR_GENERIC to infer a resolution. """ @@ -684,7 +684,7 @@ cdef tzinfo _parse_with_format( elif len(s) <= 6: item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_us else: - item_reso[0] = NPY_FR_ns + item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_ns # Pad to always return nanoseconds s += "0" * (9 - len(s)) us = int(s) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 0d8858aa77e59..5990f7722f2fb 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -15,7 +15,6 @@ tslib, ) from pandas._libs.tslibs.dtypes import NpyDatetimeUnit -from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas import Timestamp import pandas._testing as tm @@ -296,63 +295,3 @@ def test_datetime_subclass(klass): expected = np.array(["2000-01-01T00:00:00.000000"], dtype="M8[us]") tm.assert_numpy_array_equal(result, expected) - - -class TestArrayToDatetimeResolutionInference: - # TODO: tests that include tzs, ints - - def test_infer_homogeoneous_datetimes(self): - dt = datetime(2023, 10, 27, 18, 3, 5, 678000) - arr = np.array([dt, dt, dt], dtype=object) - result, tz = tslib.array_to_datetime(arr, creso=creso_infer) - assert tz is None - expected = np.array([dt, dt, dt], dtype="M8[us]") - tm.assert_numpy_array_equal(result, expected) - - def test_infer_homogeoneous_date_objects(self): - dt = datetime(2023, 10, 27, 18, 3, 5, 678000) - dt2 = dt.date() - arr = np.array([None, dt2, dt2, dt2], dtype=object) - result, tz = tslib.array_to_datetime(arr, creso=creso_infer) - assert tz is None - expected = np.array([np.datetime64("NaT"), dt2, dt2, dt2], dtype="M8[s]") - tm.assert_numpy_array_equal(result, expected) - - def test_infer_homogeoneous_dt64(self): - dt = datetime(2023, 10, 27, 18, 3, 5, 678000) - dt64 = np.datetime64(dt, "ms") - arr = np.array([None, dt64, dt64, dt64], dtype=object) - result, tz = tslib.array_to_datetime(arr, creso=creso_infer) - assert tz is None - expected = np.array([np.datetime64("NaT"), dt64, dt64, dt64], dtype="M8[ms]") - tm.assert_numpy_array_equal(result, expected) - - def test_infer_homogeoneous_timestamps(self): - dt = datetime(2023, 10, 27, 18, 3, 5, 678000) - ts = Timestamp(dt).as_unit("ns") - arr = np.array([None, ts, ts, ts], dtype=object) - result, tz = tslib.array_to_datetime(arr, creso=creso_infer) - assert tz is None - expected = np.array([np.datetime64("NaT")] + [ts.asm8] * 3, dtype="M8[ns]") - tm.assert_numpy_array_equal(result, expected) - - def test_infer_homogeoneous_datetimes_strings(self): - item = "2023-10-27 18:03:05.678000" - arr = np.array([None, item, item, item], dtype=object) - result, tz = tslib.array_to_datetime(arr, creso=creso_infer) - assert tz is None - expected = np.array([np.datetime64("NaT"), item, item, item], dtype="M8[us]") - tm.assert_numpy_array_equal(result, expected) - - def test_infer_heterogeneous(self): - dtstr = "2023-10-27 18:03:05.678000" - - arr = np.array([dtstr, dtstr[:-3], dtstr[:-7], None], dtype=object) - result, tz = tslib.array_to_datetime(arr, creso=creso_infer) - assert tz is None - expected = np.array(arr, dtype="M8[us]") - tm.assert_numpy_array_equal(result, expected) - - result, tz = tslib.array_to_datetime(arr[::-1], creso=creso_infer) - assert tz is None - tm.assert_numpy_array_equal(result, expected[::-1]) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index e86459d0776c0..e654534ccd453 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -260,16 +260,14 @@ def test_categorical_consistency(s1, categorize): tm.assert_series_equal(h1, h3) -def test_categorical_with_nan_consistency(): - c = pd.Categorical.from_codes( - [-1, 0, 1, 2, 3, 4], categories=pd.date_range("2012-01-01", periods=5, name="B") - ) - expected = hash_array(c, categorize=False) - - c = pd.Categorical.from_codes( - [-1, 0], categories=[pd.Timestamp("2012-01-01").as_unit("ns")] - ) - result = hash_array(c, categorize=False) +def test_categorical_with_nan_consistency(unit): + dti = pd.date_range("2012-01-01", periods=5, name="B", unit=unit) + cat = pd.Categorical.from_codes([-1, 0, 1, 2, 3, 4], categories=dti) + expected = hash_array(cat, categorize=False) + + ts = pd.Timestamp("2012-01-01").as_unit(unit) + cat2 = pd.Categorical.from_codes([-1, 0], categories=[ts]) + result = hash_array(cat2, categorize=False) assert result[0] in expected assert result[1] in expected From 32efbe7cf83681b62c7b855b9ba27f08fb23420d Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 15 Nov 2023 17:19:47 -0800 Subject: [PATCH 037/105] update tests --- pandas/tests/indexes/datetimes/test_date_range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index f7fecbae5598f..e26f35f4e8258 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -328,7 +328,7 @@ def test_date_range_convenience_periods(self, unit): ["2018-04-24 00:00:00", "2018-04-25 12:00:00", "2018-04-27 00:00:00"], dtype=f"M8[{unit}]", freq=None, - ).as_unit("ns") + ) tm.assert_index_equal(result, expected) From 55faffad7d2bcf2525756de4ef5041c119d7e080 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 15 Nov 2023 19:22:26 -0800 Subject: [PATCH 038/105] cleanup --- pandas/core/arrays/datetimes.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c29117bd73a0a..faad71844659e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2247,10 +2247,8 @@ def _sequence_to_dt64( data_dtype = getattr(data, "dtype", None) out_dtype = DT64NS_DTYPE - out_reso = abbrev_to_npy_unit(None) # NPY_FR_GENERIC if out_unit is not None: out_dtype = np.dtype(f"M8[{out_unit}]") - out_reso = abbrev_to_npy_unit(out_unit) if data_dtype == object or is_string_dtype(data_dtype): # TODO: We do not have tests specific to string-dtypes, @@ -2276,7 +2274,7 @@ def _sequence_to_dt64( dayfirst=dayfirst, yearfirst=yearfirst, allow_object=False, - out_reso=out_reso, + out_unit=out_unit, ) copy = False if tz and inferred_tz: @@ -2384,7 +2382,7 @@ def objects_to_datetime64( utc: bool = False, errors: DateTimeErrorChoices = "raise", allow_object: bool = False, - out_reso: int = 14, + out_unit: str | None = None, ) -> tuple[np.ndarray, tzinfo | None]: """ Convert data to array of timestamps. @@ -2400,9 +2398,8 @@ def objects_to_datetime64( allow_object : bool Whether to return an object-dtype ndarray instead of raising if the data contains more than one timezone. - out_reso : int, default 14 - 14 corresponds to NPY_FR_GENERIC, which indicates to infer - a resolution. + out_unit : str or None, default None + None indicates we should do resolution inference. Returns ------- @@ -2429,7 +2426,7 @@ def objects_to_datetime64( utc=utc, dayfirst=dayfirst, yearfirst=yearfirst, - creso=out_reso, + creso=abbrev_to_npy_unit(out_unit), ) if tz_parsed is not None: From e4f8549c8cdb7459b678583833387e304f015ee7 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 18 Nov 2023 08:06:21 -0800 Subject: [PATCH 039/105] revert --- pandas/tests/series/methods/test_astype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 4676e1543abe5..4b2122e25f819 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -236,7 +236,7 @@ def test_astype_datetime(self, unit): ser = ser.astype("O") assert ser.dtype == np.object_ - ser = Series([datetime(2001, 1, 2, 0, 0)], dtype="M8[ns]") + ser = Series([datetime(2001, 1, 2, 0, 0)]) ser = ser.astype("O") assert ser.dtype == np.object_ From c0ea5305089487dcd9320c55d5f81e4ca7c74a53 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 18 Nov 2023 08:28:37 -0800 Subject: [PATCH 040/105] revert --- pandas/tests/resample/test_datetime_index.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index ae486578da0b1..c5ef0f39ece19 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1264,7 +1264,6 @@ def test_resample_median_bug_1688(dtype, unit): index=dti, dtype=dtype, ) - df.index = df.index.as_unit("ns") result = df.resample("min").apply(lambda x: x.mean()) exp = df.asfreq("min") @@ -1375,6 +1374,7 @@ def test_resample_timegrouper(dates, unit): @pytest.mark.parametrize("dates", [dates1, dates2, dates3]) def test_resample_timegrouper2(dates, unit): dates = DatetimeIndex(dates).as_unit(unit) + df = DataFrame({"A": dates, "B": np.arange(len(dates)), "C": np.arange(len(dates))}) result = df.set_index("A").resample("ME").count() @@ -2129,7 +2129,6 @@ def test_resample_c_b_closed_right(freq: str, unit): }, index=exp_dti, ).astype(f"M8[{unit}]") - tm.assert_frame_equal(result, expected) From f342b5f4c7804fe239c5b81c5b0302c43e58bc04 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 18 Nov 2023 17:19:54 -0800 Subject: [PATCH 041/105] parametrize over unit --- pandas/tests/reshape/test_qcut.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index a3d1b677fcf86..18e5453eb8aea 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -271,8 +271,10 @@ def test_datetime_tz_qcut(bins): ], ], ) -def test_date_like_qcut_bins(arg, expected_bins): +def test_date_like_qcut_bins(arg, expected_bins, unit): # see gh-19891 + arg = arg.as_unit(unit) + expected_bins = expected_bins.as_unit(unit) ser = Series(arg) result, result_bins = qcut(ser, 2, retbins=True) tm.assert_index_equal(result_bins, expected_bins) From c94fbe25f76a970cdb5971cd188c83a43e6c7dd7 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 19 Nov 2023 10:02:06 -0800 Subject: [PATCH 042/105] update tests --- pandas/tests/arrays/test_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 72b596e00f500..b2c0d30e4bcd4 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -128,7 +128,7 @@ def test_dt64_array(dtype_unit): ( pd.DatetimeIndex(["2000", "2001"]), None, - DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"), + DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[s]"), ), ( ["2000", "2001"], From 0a649e0fc99586a0cf9aedd120c208901f486fb7 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 19 Nov 2023 10:11:26 -0800 Subject: [PATCH 043/105] update tests --- pandas/tests/reshape/merge/test_merge_asof.py | 2 -- pandas/tests/reshape/test_qcut.py | 2 +- pandas/tests/series/test_constructors.py | 8 ++++---- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 542cc657c54ff..4fc57c14ec4c3 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -3482,7 +3482,6 @@ def test_merge_asof_array_as_on(unit): "ts": dti, } ) - right["ts"] = right["ts"].astype("M8[ns]") ts_merge = pd.date_range( start=pd.Timestamp("2021/01/01 00:00"), periods=3, freq="1h", unit=unit ) @@ -3513,7 +3512,6 @@ def test_merge_asof_array_as_on(unit): "b": [4, 8], } ) - expected["ts"] = expected["ts"].astype("M8[ns]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index 18e5453eb8aea..5f769db7f8acf 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -267,7 +267,7 @@ def test_datetime_tz_qcut(bins): ], [ date_range("20180101", periods=3), - DatetimeIndex(["2018-01-01", "2018-01-02", "2018-01-03"]).as_unit("ns"), + DatetimeIndex(["2018-01-01", "2018-01-02", "2018-01-03"]), ], ], ) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 70f8f4c09ac41..dd6016e015f5f 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1084,16 +1084,16 @@ def test_constructor_dtype_datetime64_4(self): def test_constructor_dtype_datetime64_3(self): # if we passed a NaT it remains ser = Series([datetime(2010, 1, 1), datetime(2, 1, 1), NaT]) - assert ser.dtype == "object" + assert ser.dtype == "M8[us]" assert ser[2] is NaT assert "NaT" in str(ser) def test_constructor_dtype_datetime64_2(self): # if we passed a nan it remains ser = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan]) - assert ser.dtype == "object" - assert ser[2] is np.nan - assert "NaN" in str(ser) + assert ser.dtype == "M8[us]" + assert ser[2] is NaT + assert "NaT" in str(ser) def test_constructor_with_datetime_tz(self): # 8260 From 16d6c2d10160423631c82d33b5f50d999562946f Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 19 Nov 2023 10:12:29 -0800 Subject: [PATCH 044/105] revert no-longer-needed --- pandas/tests/frame/test_arithmetic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index f975fa44d33de..3cf6d31390c2f 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1032,7 +1032,6 @@ def test_frame_with_frame_reindex(self): {"foo": [pd.Timedelta(0), pd.Timedelta(0)], "bar": [np.nan, np.nan]}, columns=["bar", "foo"], ) - expected["foo"] = expected["foo"].astype("m8[s]") tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( From 0c735b6cc5db73ecacba9a56eee7fcbfc6ccb069 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 19 Nov 2023 10:14:09 -0800 Subject: [PATCH 045/105] revert no-longer-necessary --- pandas/tests/indexes/datetimes/methods/test_astype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/methods/test_astype.py b/pandas/tests/indexes/datetimes/methods/test_astype.py index 6dbdc59031d6d..c0bc6601769b1 100644 --- a/pandas/tests/indexes/datetimes/methods/test_astype.py +++ b/pandas/tests/indexes/datetimes/methods/test_astype.py @@ -171,7 +171,7 @@ def test_astype_datetime64(self): tm.assert_index_equal(result, idx) assert result is not idx - result = idx.astype("datetime64[s]", copy=False) + result = idx.astype("datetime64[ns]", copy=False) tm.assert_index_equal(result, idx) assert result is idx From ff431f2052a8b90a12d692a7266e35afe01696d9 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 20 Nov 2023 13:51:15 -0800 Subject: [PATCH 046/105] revert no-longer-necessary --- pandas/tests/arithmetic/test_timedelta64.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index e47acd7d09dc0..2ac1c47868adf 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -497,7 +497,6 @@ def test_addition_ops(self): tdi + Index([1, 2, 3], dtype=np.int64) # this is a union! - # FIXME: don't leave commented-out # pytest.raises(TypeError, lambda : Index([1,2,3]) + tdi) result = tdi + dti # name will be reset From 50bf67516cc6e7490d5e55b37b5ebf9897404424 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 20 Nov 2023 13:52:58 -0800 Subject: [PATCH 047/105] revert no-longer-necessary --- pandas/_testing/asserters.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index cb4aa1c764ef7..3aacd3099c334 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1010,9 +1010,6 @@ def assert_series_equal( # datetimelike may have different objects (e.g. datetime.datetime # vs Timestamp) but will compare equal - # TODO: this works for object-vs-dt64 but not e.g. dt64[ns] vs dt64[us], - # which AFAICT would have been intended at the time - # check_datetimelike_compat was implemented, xref GH#55638 if not Index(left._values).equals(Index(right._values)): msg = ( f"[datetimelike_compat=True] {left._values} " From 93a3ee4114d28852f6046ae17573af14c58e58a1 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 20 Nov 2023 13:54:52 -0800 Subject: [PATCH 048/105] revert no-longer-necessary --- pandas/tests/frame/methods/test_asfreq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index 67bd616b6d939..f6b71626b6fee 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -31,7 +31,7 @@ def test_asfreq2(self, frame_or_series): ], dtype="M8[ns]", freq="BME", - ).as_unit("ns"), + ), ) daily_ts = ts.asfreq("B") From 9c259d531580b290b79c679d6e917aa4831f09a9 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 20 Nov 2023 14:46:11 -0800 Subject: [PATCH 049/105] Revert no-longer-necessary --- pandas/tests/window/test_groupby.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 7a4c975771265..120470b09a92b 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -579,7 +579,6 @@ def test_groupby_rolling_string_index(self): ], columns=["index", "group", "eventTime"], ).set_index("index") - df["eventTime"] = df["eventTime"].astype("M8[ns]") groups = df.groupby("group") df["count_to_date"] = groups.cumcount() @@ -595,7 +594,6 @@ def test_groupby_rolling_string_index(self): ], columns=["index", "group", "eventTime", "count_to_date"], ).set_index(["group", "index"]) - expected["eventTime"] = expected["eventTime"].astype("M8[ns]") tm.assert_frame_equal(result, expected) def test_groupby_rolling_no_sort(self): @@ -873,10 +871,10 @@ def test_groupby_level(self): ["id", "index"], { "date": [ - Timestamp("2018-01-01").as_unit("ns"), - Timestamp("2018-01-02").as_unit("ns"), - Timestamp("2018-01-01").as_unit("ns"), - Timestamp("2018-01-02").as_unit("ns"), + Timestamp("2018-01-01"), + Timestamp("2018-01-02"), + Timestamp("2018-01-01"), + Timestamp("2018-01-02"), ], "num": [100.0, 200.0, 150.0, 250.0], }, From d876178147dc6221bca8c347e8ed4b3bd2be5cbf Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Nov 2023 11:09:48 -0800 Subject: [PATCH 050/105] update test --- pandas/tests/arithmetic/test_period.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 5535fe8ff928d..5951fb734d155 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -1362,7 +1362,12 @@ def test_period_add_timestamp_raises(self, box_with_array): arr + ts with pytest.raises(TypeError, match=msg): ts + arr - msg = "cannot add PeriodArray and DatetimeArray" + if box_with_array is pd.DataFrame: + # TODO: before implementing resolution-inference we got the same + # message with DataFrame and non-DataFrame. Why did that change? + msg = "cannot add PeriodArray and Timestamp" + else: + msg = "cannot add PeriodArray and DatetimeArray" with pytest.raises(TypeError, match=msg): arr + Series([ts]) with pytest.raises(TypeError, match=msg): From 589d0c5442e754226a56fe96e7f04bd5e72c74ae Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Nov 2023 14:11:49 -0800 Subject: [PATCH 051/105] update test --- pandas/tests/reshape/test_cut.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 3f24b4becf592..5b937ea667f98 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -468,10 +468,6 @@ def test_datetime_cut(unit, box): data = box(data) result, _ = cut(data, 3, retbins=True) - if box is list: - # We don't (yet) do inference on these, so get nanos - unit = "ns" - if unit == "s": # See https://github.com/pandas-dev/pandas/pull/56101#discussion_r1405325425 # for why we round to 8 seconds instead of 7 From 63cdcecaac3b1acf0aa9459b7a555f79a87efe25 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 21 Nov 2023 14:36:06 -0800 Subject: [PATCH 052/105] simplify --- pandas/tests/io/excel/test_readers.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index e11111c306eaf..6d42ecddac588 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -143,7 +143,6 @@ def df_ref(datapath): def get_exp_unit(read_ext: str, engine: str | None) -> str: unit = "us" if (read_ext == ".ods") ^ (engine == "calamine"): - # TODO: why is .ods & calamine a separate special case? unit = "s" return unit @@ -155,7 +154,6 @@ def adjust_expected(expected: DataFrame, read_ext: str, engine: str | None) -> N expected.index = expected.index.as_unit(unit) # type: ignore[attr-defined] - def xfail_datetimes_with_pyxlsb(engine, request): if engine == "pyxlsb": request.applymarker( @@ -488,8 +486,6 @@ def test_reader_special_dtypes(self, request, engine, read_ext): ), }, ) - if (read_ext == ".ods") ^ (engine == "calamine"): - expected["DateCol"] = expected["DateCol"].astype("M8[s]") basename = "test_types" # should read in correctly and infer types From fc27070219e84c1a06aa6b6b40434c935fa9c4f6 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 22 Nov 2023 10:55:14 -0800 Subject: [PATCH 053/105] update tests --- pandas/_libs/tslibs/strptime.pyx | 2 +- pandas/tests/io/excel/test_readers.py | 8 ++++---- pandas/tests/io/parser/common/test_common_basic.py | 6 ++++-- pandas/tests/io/parser/common/test_index.py | 4 ++-- pandas/tests/io/parser/test_parse_dates.py | 8 ++++---- pandas/tests/io/parser/test_read_fwf.py | 5 +++-- pandas/tests/io/parser/test_skiprows.py | 12 ++++++++---- pandas/tests/series/accessors/test_dt_accessor.py | 2 +- 8 files changed, 27 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index ab0604d4d1f3e..760ec5fd975a2 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -467,7 +467,7 @@ def array_strptime( ival, tz, ambiguous="raise", nonexistent=None, creso=creso ) nsecs = (ival - iresult[i]) - if creso == NPY_FR_ns: + if creso == NPY_DATETIMEUNIT.NPY_FR_ns: nsecs = nsecs // 10**9 elif creso == NPY_DATETIMEUNIT.NPY_FR_us: nsecs = nsecs // 10**6 diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 6d42ecddac588..6d6c3ad6b77a7 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1122,10 +1122,10 @@ def test_read_excel_multiindex_blank_after_name( unit = get_exp_unit(read_ext, engine) expected = DataFrame( [ - [1, 2.5, pd.Timestamp("2015-01-01").as_unit(unit), True], - [2, 3.5, pd.Timestamp("2015-01-02").as_unit(unit), False], - [3, 4.5, pd.Timestamp("2015-01-03").as_unit(unit), False], - [4, 5.5, pd.Timestamp("2015-01-04").as_unit(unit), True], + [1, 2.5, pd.Timestamp("2015-01-01"), True], + [2, 3.5, pd.Timestamp("2015-01-02"), False], + [3, 4.5, pd.Timestamp("2015-01-03"), False], + [4, 5.5, pd.Timestamp("2015-01-04"), True], ], columns=mi, index=MultiIndex.from_arrays( diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 933b08bdfac53..5eb15b30bc1d4 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -112,8 +112,9 @@ def test_read_csv_local(all_parsers, csv1): datetime(2000, 1, 10), datetime(2000, 1, 11), ], + dtype="M8[s]", name="index", - ).as_unit("s"), + ), ) tm.assert_frame_equal(result, expected) @@ -213,8 +214,9 @@ def test_read_csv_dataframe(all_parsers, csv1): datetime(2000, 1, 10), datetime(2000, 1, 11), ], + dtype="M8[s]", name="index", - ).as_unit("s"), + ), ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py index a4e5b580366c4..7d2bde452ebf8 100644 --- a/pandas/tests/io/parser/common/test_index.py +++ b/pandas/tests/io/parser/common/test_index.py @@ -259,10 +259,10 @@ def test_read_csv_no_index_name(all_parsers, csv_dir_path): datetime(2000, 1, 5), datetime(2000, 1, 6), datetime(2000, 1, 7), - ] + ], + dtype="M8[s]", ), ) - expected.index = expected.index.as_unit("s") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 1a5231dd4096c..f3e7ad6a68f4b 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -477,8 +477,9 @@ def test_date_col_as_index_col(all_parsers): datetime(1999, 1, 27, 21, 0), datetime(1999, 1, 27, 22, 0), ], + dtype="M8[s]", name="X1", - ).as_unit("s") + ) expected = DataFrame( [ ["KORD", " 18:56:00", 0.81, 2.81, 7.2, 0.0, 280.0], @@ -758,8 +759,8 @@ def test_date_parser_int_bug(all_parsers): ), raise_on_extra_warnings=False, ) - dti = Index([Timestamp("2012-07-24 04:12:30")], name="posix_timestamp").as_unit( - "us" + dti = Index( + [Timestamp("2012-07-24 04:12:30")], dtype="M8[us]", name="posix_timestamp" ) expected = DataFrame( [ @@ -1824,7 +1825,6 @@ def test_parse_timezone(all_parsers): tz=timezone(timedelta(minutes=540)), unit="s", )._with_freq(None) - expected_data = {"dt": dti, "val": [23350, 23400, 23400, 23400, 23400]} expected = DataFrame(expected_data) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index ea43d3951af23..2972f0fa3e897 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -310,8 +310,9 @@ def test_fwf_regression(): "2009-06-13 20:40:00", "2009-06-13 20:50:00", "2009-06-13 21:00:00", - ] - ).as_unit("us"), + ], + dtype="M8[us]", + ), columns=["SST", "T010", "T020", "T030", "T060", "T080", "T100"], ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py index 6d974344fcfe2..fa3c5f462d917 100644 --- a/pandas/tests/io/parser/test_skiprows.py +++ b/pandas/tests/io/parser/test_skiprows.py @@ -42,8 +42,10 @@ def test_skip_rows_bug(all_parsers, skiprows): StringIO(text), skiprows=skiprows, header=None, index_col=0, parse_dates=True ) index = Index( - [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], name=0 - ).as_unit("s") + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + dtype="M8[s]", + name=0, + ) expected = DataFrame( np.arange(1.0, 10.0).reshape((3, 3)), columns=[1, 2, 3], index=index @@ -85,8 +87,10 @@ def test_skip_rows_blank(all_parsers): StringIO(text), skiprows=6, header=None, index_col=0, parse_dates=True ) index = Index( - [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], name=0 - ).as_unit("s") + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + dtype="M8[s]", + name=0, + ) expected = DataFrame( np.arange(1.0, 10.0).reshape((3, 3)), columns=[1, 2, 3], index=index diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 0a18f9551d191..5f0057ac50b47 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -368,7 +368,7 @@ def test_dt_round_tz_nonexistent(self, method, ts_str, freq): tm.assert_series_equal(result, expected) result = getattr(ser.dt, method)(freq, nonexistent="NaT") - expected = Series([pd.NaT], dtype="M8[s]").dt.tz_localize(result.dt.tz) + expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz) tm.assert_series_equal(result, expected) with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"): From 060aeb9156c4bafaffe668303db80179c38f96a4 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 22 Nov 2023 15:55:29 -0800 Subject: [PATCH 054/105] update tests --- pandas/core/dtypes/cast.py | 2 +- pandas/tests/indexes/datetimes/test_date_range.py | 1 + pandas/tests/indexes/test_base.py | 2 +- pandas/tests/indexes/test_index_new.py | 3 +++ 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b8b73e7dc6ddb..cdc0722bec346 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1195,7 +1195,7 @@ def maybe_infer_to_datetimelike( # numpy would have done it for us. convert_numeric=False, convert_non_numeric=True, - dtype_if_all_nat=np.dtype("M8[ns]"), + dtype_if_all_nat=np.dtype("M8[s]"), ) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index e26f35f4e8258..229b45c15cc39 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -186,6 +186,7 @@ def test_date_range_edges(self, freq): [ts + n * td for n in range(1, 5)], dtype="M8[ns]", freq=freq, + dtype="M8[ns]", ) tm.assert_index_equal(idx, exp) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 4c703c3af944b..83432ce07fa88 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -186,7 +186,7 @@ def test_constructor_int_dtype_nan(self): "klass,dtype,na_val", [ (Index, np.float64, np.nan), - (DatetimeIndex, "datetime64[ns]", pd.NaT), + (DatetimeIndex, "datetime64[s]", pd.NaT), ], ) def test_index_ctor_infer_nan_nat(self, klass, dtype, na_val): diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index 867d32e5c86a2..ddff225a2d32e 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -137,6 +137,9 @@ def test_constructor_infer_nat_dt_like( ) expected = klass([NaT, NaT]) + if dtype[0] == "d": + # we infer all-NaT as second resolution + expected = expected.astype("M8[ns]") assert expected.dtype == dtype data = [ctor] data.insert(pos, nulls_fixture) From 9cd33e9267a67b8fe86cf1c80c2b26a519bc7036 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 23 Nov 2023 08:05:10 -0800 Subject: [PATCH 055/105] update tests --- pandas/tests/arrays/categorical/test_missing.py | 2 +- pandas/tests/frame/methods/test_combine_first.py | 4 ---- pandas/tests/frame/test_constructors.py | 3 +++ pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/indexes/test_index_new.py | 8 ++++---- pandas/tests/series/test_constructors.py | 4 ++-- 6 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index 332d31e9e3fc2..b14e11a016701 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -149,7 +149,7 @@ def test_compare_categorical_with_missing(self, a1, a2, categories): @pytest.mark.parametrize( "na_value, dtype", [ - (pd.NaT, "datetime64[ns]"), + (pd.NaT, "datetime64[s]"), (None, "float64"), (np.nan, "float64"), (pd.NA, "float64"), diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index d1285e256b270..99c8ddc643fee 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -193,9 +193,6 @@ def test_combine_first_convert_datatime_correctly( df1, df2 = DataFrame({"a": data1}), DataFrame({"a": data2}) result = df1.combine_first(df2) expected = DataFrame({"a": data_expected}) - if df1.isna().all(axis=None): - expected = expected.astype("M8[ns]") - # equiv: expected = expected.astype(df1["a"].dtype) tm.assert_frame_equal(result, expected) def test_combine_first_align_nan(self): @@ -451,7 +448,6 @@ def test_combine_first_timestamp_bug_NaT(): expected = DataFrame( [[pd.NaT, datetime(2020, 1, 1), datetime(2020, 1, 2)]], columns=["a", "b", "c"] ) - expected["b"] = expected["b"].astype("M8[ns]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 1b1b0f267974b..ea635f2d05558 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1976,6 +1976,9 @@ def test_constructor_datetimes_with_nulls(self, arr): if isinstance(arr, np.ndarray): # inferred from a pydatetime object unit = "us" + elif not any(isinstance(x, np.datetime64) for y in arr for x in y): + # TODO: this condition is not clear about why we have different behavior + unit = "s" expected = Series([np.dtype(f"datetime64[{unit}]")]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 2d06c2f258539..5d565b997ab66 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1214,7 +1214,7 @@ def test_groupby_nat_exclude(): {"nan": [np.nan, np.nan, np.nan], "nat": [pd.NaT, pd.NaT, pd.NaT]} ) assert nan_df["nan"].dtype == "float64" - assert nan_df["nat"].dtype == "datetime64[ns]" + assert nan_df["nat"].dtype == "datetime64[s]" for key in ["nan", "nat"]: grouped = nan_df.groupby(key) diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index ddff225a2d32e..e904ac74d0ae9 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -60,16 +60,16 @@ def test_infer_nat(self, val): values = [NaT, val] idx = Index(values) - assert idx.dtype == "datetime64[ns]" and idx.isna().all() + assert idx.dtype == "datetime64[s]" and idx.isna().all() idx = Index(values[::-1]) - assert idx.dtype == "datetime64[ns]" and idx.isna().all() + assert idx.dtype == "datetime64[s]" and idx.isna().all() idx = Index(np.array(values, dtype=object)) - assert idx.dtype == "datetime64[ns]" and idx.isna().all() + assert idx.dtype == "datetime64[s]" and idx.isna().all() idx = Index(np.array(values, dtype=object)[::-1]) - assert idx.dtype == "datetime64[ns]" and idx.isna().all() + assert idx.dtype == "datetime64[s]" and idx.isna().all() @pytest.mark.parametrize("na_value", [None, np.nan]) @pytest.mark.parametrize("vtype", [list, tuple, iter]) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index dd6016e015f5f..057af01d99569 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -752,7 +752,7 @@ def test_constructor_pass_nan_nat(self): tm.assert_series_equal(Series(np.array([np.nan, np.nan])), exp) exp = Series([NaT, NaT]) - assert exp.dtype == "datetime64[ns]" + assert exp.dtype == "datetime64[s]" tm.assert_series_equal(Series([NaT, NaT]), exp) tm.assert_series_equal(Series(np.array([NaT, NaT])), exp) @@ -1215,7 +1215,7 @@ def test_construction_to_datetimelike_unit(self, arr_dtype, kind, unit): def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg): # GH 17415: With naive string result = Series([arg], dtype="datetime64[ns, CET]") - expected = Series(Timestamp(arg).as_unit("ns")).dt.tz_localize("CET") + expected = Series([Timestamp(arg)], dtype="M8[ns]").dt.tz_localize("CET") tm.assert_series_equal(result, expected) def test_constructor_datetime64_bigendian(self): From aeebb39dbd4ebf08899e580b7af03e5836bc141e Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 24 Nov 2023 12:10:35 -0800 Subject: [PATCH 056/105] revert no-longer-necessary --- pandas/tests/resample/test_period_index.py | 2 +- pandas/tests/reshape/test_pivot.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 76d4f04f9e796..6fdc398b13835 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -551,7 +551,7 @@ def test_resample_tz_localized(self, unit): ts_local_naive.index = ts_local_naive.index.tz_localize(None) exp = ts_local_naive.resample("W").mean().tz_localize("America/Los_Angeles") - exp.index = pd.DatetimeIndex(exp.index, freq="W").as_unit("ns") + exp.index = pd.DatetimeIndex(exp.index, freq="W") tm.assert_series_equal(result, exp) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 3903034db490f..d6b61bae850af 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -578,8 +578,6 @@ def test_pivot_with_tz(self, method, unit): "data2": np.arange(4, dtype="int64"), } ) - df["dt1"] = df["dt1"].astype("M8[ns]") - df["dt2"] = df["dt2"].astype("M8[ns]") exp_col1 = Index(["data1", "data1", "data2", "data2"]) exp_col2 = pd.DatetimeIndex( From 40c09debd0fcdda2507d6bdafde47fd9d6855393 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 24 Nov 2023 12:12:12 -0800 Subject: [PATCH 057/105] post-merge fixup --- pandas/tests/indexes/datetimes/test_date_range.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 229b45c15cc39..e26f35f4e8258 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -186,7 +186,6 @@ def test_date_range_edges(self, freq): [ts + n * td for n in range(1, 5)], dtype="M8[ns]", freq=freq, - dtype="M8[ns]", ) tm.assert_index_equal(idx, exp) From fba3b79fa6f78e7e5e175c38de21ad48ea7cfdaf Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 24 Nov 2023 14:35:14 -0800 Subject: [PATCH 058/105] revert no-longer-necessary --- pandas/tests/resample/test_resampler_grouper.py | 15 --------------- pandas/tests/resample/test_time_grouper.py | 5 +---- 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 405942a1b68bc..b312d708ade1e 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -128,9 +128,6 @@ def test_getitem_multiple(): index=exp_mi, name="buyer", ) - expected.index = expected.index.set_levels( - expected.index.levels[1].as_unit("ns"), level=1 - ) tm.assert_series_equal(result, expected) result = r["buyer"].count() @@ -548,9 +545,6 @@ def test_groupby_resample_with_list_of_keys(): }, index=mi_exp, ) - expected.index = expected.index.set_levels( - expected.index.levels[1].as_unit("ns"), level=1 - ) tm.assert_frame_equal(result, expected) @@ -624,9 +618,6 @@ def test_groupby_resample_size_all_index_same(): 3, index=mi_exp, ) - expected.index = expected.index.set_levels( - expected.index.levels[1].as_unit("ns"), level=1 - ) tm.assert_series_equal(result, expected) @@ -650,9 +641,6 @@ def test_groupby_resample_on_index_with_list_of_keys(): }, index=mi_exp, ) - expected.index = expected.index.set_levels( - expected.index.levels[1].as_unit("ns"), level=1 - ) tm.assert_frame_equal(result, expected) @@ -679,9 +667,6 @@ def test_groupby_resample_on_index_with_list_of_keys_multi_columns(): }, index=mi_exp, ) - expected.index = expected.index.set_levels( - expected.index.levels[1].as_unit("ns"), level=1 - ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index b4a2a3d330114..c5e202f36659b 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -331,7 +331,7 @@ def test_upsample_sum(method, method_args, expected_values): ["2017-01-01T00:00:00", "2017-01-01T00:30:00", "2017-01-01T01:00:00"], dtype="M8[ns]", freq="30min", - ).as_unit("ns") + ) result = methodcaller(method, **method_args)(resampled) expected = Series(expected_values, index=index) tm.assert_series_equal(result, expected) @@ -362,9 +362,6 @@ def test_groupby_resample_interpolate(): [volume, week_starting], names=["volume", "week_starting"], ) - expected_ind = expected_ind.set_levels( - expected_ind.levels[1].as_unit("ns"), level=1 - ) expected = DataFrame( data={ From 6422cce0dd1aaa667b3687370a3a1257644a935e Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 24 Nov 2023 19:46:04 -0800 Subject: [PATCH 059/105] update tests --- pandas/tests/frame/methods/test_map.py | 7 +++-- pandas/tests/groupby/test_timegrouper.py | 31 +++++-------------- .../tests/groupby/transform/test_transform.py | 2 +- .../tests/indexes/datetimes/test_timezones.py | 2 +- .../indexes/interval/test_interval_range.py | 12 +++---- .../tests/indexing/multiindex/test_setitem.py | 10 ++---- pandas/tests/reshape/concat/test_datetimes.py | 3 +- pandas/tests/tslibs/test_array_to_datetime.py | 1 - 8 files changed, 23 insertions(+), 45 deletions(-) diff --git a/pandas/tests/frame/methods/test_map.py b/pandas/tests/frame/methods/test_map.py index 841ef1c02ac82..9850de14b2092 100644 --- a/pandas/tests/frame/methods/test_map.py +++ b/pandas/tests/frame/methods/test_map.py @@ -158,14 +158,15 @@ def test_map_box(): tm.assert_frame_equal(result, expected) -def test_frame_map_dont_convert_datetime64(): - df = DataFrame({"x1": [datetime(1996, 1, 1)]}) +def test_frame_map_dont_convert_datetime64(unit): + dtype = f"M8[{unit}]" + df = DataFrame({"x1": [datetime(1996, 1, 1)]}, dtype=dtype) df = df.map(lambda x: x + BDay()) df = df.map(lambda x: x + BDay()) result = df.x1.dtype - assert result == "M8[us]" + assert result == dtype def test_map_function_runs_once(): diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 69301d0b0c0b7..aba3b2f27c633 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -46,7 +46,6 @@ def frame_for_truncated_bingrouper(): ], } ) - df["Date"] = df["Date"].astype("M8[ns]") return df @@ -92,7 +91,6 @@ def test_groupby_with_timegrouper(self): ], } ) - df_original["Date"] = df_original["Date"].astype("M8[ns]") # GH 6908 change target column's order df_reordered = df_original.sort_values(by="Quantity") @@ -183,7 +181,6 @@ def test_timegrouper_with_reg_groups(self): ], } ).set_index("Date") - df_original.index = df_original.index.as_unit("ns") df_sorted = df_original.sort_values(by="Quantity", ascending=False) @@ -198,9 +195,7 @@ def test_timegrouper_with_reg_groups(self): datetime(2013, 12, 31, 0, 0), ], } - ) - expected["Date"] = expected["Date"].astype("M8[ns]") - expected = expected.set_index(["Date", "Buyer"]) + ).set_index(["Date", "Buyer"]) msg = "The default value of numeric_only" result = df.groupby([Grouper(freq="YE"), "Buyer"]).sum(numeric_only=True) @@ -217,9 +212,7 @@ def test_timegrouper_with_reg_groups(self): datetime(2013, 7, 1, 0, 0), ], } - ) - expected["Date"] = expected["Date"].astype("M8[ns]") - expected = expected.set_index(["Date", "Buyer"]) + ).set_index(["Date", "Buyer"]) result = df.groupby([Grouper(freq="6MS"), "Buyer"]).sum(numeric_only=True) tm.assert_frame_equal(result, expected) @@ -239,9 +232,7 @@ def test_timegrouper_with_reg_groups(self): datetime(2013, 10, 2, 14, 0), ], } - ) - df_original["Date"] = df_original["Date"].astype("M8[ns]") - df_original = df_original.set_index("Date") + ).set_index("Date") df_sorted = df_original.sort_values(by="Quantity", ascending=False) for df in [df_original, df_sorted]: @@ -257,9 +248,7 @@ def test_timegrouper_with_reg_groups(self): datetime(2013, 10, 2, 0, 0), ], } - ) - expected["Date"] = expected["Date"].astype("M8[ns]") - expected = expected.set_index(["Date", "Buyer"]) + ).set_index(["Date", "Buyer"]) result = df.groupby([Grouper(freq="1D"), "Buyer"]).sum(numeric_only=True) tm.assert_frame_equal(result, expected) @@ -275,9 +264,7 @@ def test_timegrouper_with_reg_groups(self): datetime(2013, 10, 31, 0, 0), ], } - ) - expected["Date"] = expected["Date"].astype("M8[ns]") - expected = expected.set_index(["Date", "Buyer"]) + ).set_index(["Date", "Buyer"]) tm.assert_frame_equal(result, expected) # passing the name @@ -320,9 +307,7 @@ def test_timegrouper_with_reg_groups(self): datetime(2013, 11, 30, 0, 0), ], } - ) - expected["Date"] = expected["Date"].astype("M8[ns]") - expected = expected.set_index(["Date", "Buyer"]) + ).set_index(["Date", "Buyer"]) tm.assert_frame_equal(result, expected) # error as we have both a level and a name! @@ -338,7 +323,7 @@ def test_timegrouper_with_reg_groups(self): columns=["Quantity"], index=DatetimeIndex( [datetime(2013, 10, 31, 0, 0)], freq=offsets.MonthEnd(), name="Date" - ).as_unit("ns"), + ), ) result = df.groupby(Grouper(freq="1ME")).sum(numeric_only=True) tm.assert_frame_equal(result, expected) @@ -587,7 +572,7 @@ def test_groupby_groups_datetimeindex_tz(self): ], tz="US/Pacific", name="datetime", - ).as_unit("s") + ) exp_idx2 = Index(["a", "b"] * 3, name="label") exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) expected = DataFrame( diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index dbf62af0e67c4..1bb3539830900 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -113,7 +113,7 @@ def test_transform_fast2(): { "grouping": [0, 1, 1, 3], "f": [1.1, 2.1, 3.1, 4.5], - "d": date_range("2014-1-1", "2014-1-4", unit="s"), + "d": date_range("2014-1-1", "2014-1-4"), "i": [1, 2, 3, 4], }, columns=["grouping", "f", "i", "d"], diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 02c4cc0d0c7e2..daa5b346eb4ec 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -108,7 +108,7 @@ def test_drop_dst_boundary(self): False, False, ], - ).as_unit("ns") + ) result = index.drop(index[0]) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/interval/test_interval_range.py b/pandas/tests/indexes/interval/test_interval_range.py index f6ce59b743ef1..7aea481b49221 100644 --- a/pandas/tests/indexes/interval/test_interval_range.py +++ b/pandas/tests/indexes/interval/test_interval_range.py @@ -168,14 +168,14 @@ def test_no_invalid_float_truncation(self, start, end, freq): "start, mid, end", [ ( - Timestamp("2018-03-10", tz="US/Eastern").as_unit("ns"), - Timestamp("2018-03-10 23:30:00", tz="US/Eastern").as_unit("ns"), - Timestamp("2018-03-12", tz="US/Eastern").as_unit("ns"), + Timestamp("2018-03-10", tz="US/Eastern"), + Timestamp("2018-03-10 23:30:00", tz="US/Eastern"), + Timestamp("2018-03-12", tz="US/Eastern"), ), ( - Timestamp("2018-11-03", tz="US/Eastern").as_unit("ns"), - Timestamp("2018-11-04 00:30:00", tz="US/Eastern").as_unit("ns"), - Timestamp("2018-11-05", tz="US/Eastern").as_unit("ns"), + Timestamp("2018-11-03", tz="US/Eastern"), + Timestamp("2018-11-04 00:30:00", tz="US/Eastern"), + Timestamp("2018-11-05", tz="US/Eastern"), ), ], ) diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index ac555fd12af09..abf89c2b0d096 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -77,16 +77,10 @@ def test_setitem_multiindex2(self): def test_setitem_multiindex3(self): # GH#11372 idx = MultiIndex.from_product( - [ - ["A", "B", "C"], - date_range("2015-01-01", "2015-04-01", freq="MS", unit="s"), - ] + [["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")] ) cols = MultiIndex.from_product( - [ - ["foo", "bar"], - date_range("2016-01-01", "2016-02-01", freq="MS", unit="s"), - ] + [["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")] ) df = DataFrame( diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index 158ff2a502cde..6a9d359c96697 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -260,8 +260,7 @@ def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2): # GH 12396 # tz-naive - # FIXME: without as_unit we get a FutureWarning about all-NA - first = Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1).dt.as_unit("s") + first = Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1) second = DataFrame( [ [Timestamp("2015/01/01", tz=tz2)], diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 5990f7722f2fb..1262d9d630ba0 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -184,7 +184,6 @@ def test_parsing_timezone_offsets(dt_string, expected_tz): def test_parsing_non_iso_timezone_offset(): - # FIXME: Timestamp(dt_string).unit should be nanos, is seconds dt_string = "01-01-2013T00:00:00.000000000+0000" arr = np.array([dt_string], dtype=object) From 553e90c45454951d031a6fb48bd990f7584773bf Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 26 Nov 2023 08:51:47 -0800 Subject: [PATCH 060/105] update test --- pandas/tests/test_algos.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 057a5a627370e..7fcc77f6f7ac6 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1280,11 +1280,17 @@ def test_value_counts_datetime_outofbounds(self, dtype): ], dtype=dtype, ) - res = ser.value_counts() + + warn = FutureWarning if dtype == object else None + msg = "The behavior of value_counts with object-dtype is deprecated" + with tm.assert_produces_warning(warn, match=msg): + res = ser.value_counts() exp_index = Index( [datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)], - dtype=dtype, + # TODO(3.0): once the value_counts inference deprecation is enforced, + # this will be `dtype=dtype` + dtype="M8[us]", ) exp = Series([3, 2, 1], index=exp_index, name="count") tm.assert_series_equal(res, exp) From 861ecb1e9d010cf793cf85a50d071a258ed457f5 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 27 Nov 2023 13:53:55 -0800 Subject: [PATCH 061/105] update tests --- .../indexes/datetimes/test_constructors.py | 2 +- pandas/tests/series/test_constructors.py | 2 +- pandas/tests/tools/test_to_datetime.py | 23 +++++++------------ 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 16ee2aac5a9fb..9f9c638df4534 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -960,7 +960,7 @@ def test_dti_tz_constructors(self, tzstr): start="2005-11-10 08:00:00", freq="h", periods=2, tz=tzstr, unit="s" ) idx2 = idx2._with_freq(None) # the others all have freq=None - idx3 = DatetimeIndex(arr, tz=tzstr).as_unit("s") # .as_unit("ns") + idx3 = DatetimeIndex(arr, tz=tzstr).as_unit("s") idx4 = DatetimeIndex(np.array(arr), tz=tzstr).as_unit("s") tm.assert_index_equal(idx1, idx2) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 057af01d99569..a67d11c5c69c4 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -970,7 +970,7 @@ def test_constructor_dtype_datetime64_10(self): # GH3414 related expected = Series(pydates, dtype="datetime64[ms]") - result = Series(Series(dates).view(np.int64) / 1000, dtype="M8[ms]") + result = Series(Series(dates).astype(np.int64) / 1000, dtype="M8[ms]") tm.assert_series_equal(result, expected) result = Series(dates, dtype="datetime64[ms]") diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 1982404d3bd05..8b6c422dbae04 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -246,7 +246,6 @@ def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected): ) def test_to_datetime_with_NA(self, data, format, expected): # GH#42957 - # expected = expected.as_unit("ns") result = to_datetime(data, format=format) expected = DatetimeIndex(expected) tm.assert_index_equal(result, expected) @@ -442,7 +441,7 @@ def test_to_datetime_format_weeks(self, value, fmt, expected, cache): def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates): # GH 13486 result = to_datetime(dates, format=fmt) - expected = Index(expected_dates) # .as_unit("ns") + expected = Index(expected_dates) tm.assert_equal(result, expected) @pytest.mark.parametrize( @@ -500,7 +499,7 @@ def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self): fmt = "%Y-%m-%d %H:%M:%S %z" result = to_datetime(dates, format=fmt, utc=True) - expected = DatetimeIndex(expected_dates) # .as_unit("ns") + expected = DatetimeIndex(expected_dates) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -526,9 +525,7 @@ def test_to_datetime_parse_timezone_keeps_name(self): fmt = "%Y-%m-%d %H:%M:%S %z" arg = Index(["2010-01-01 12:00:00 Z"], name="foo") result = to_datetime(arg, format=fmt) - expected = DatetimeIndex( - ["2010-01-01 12:00:00"], tz="UTC", name="foo" - ) # .as_unit("ns") + expected = DatetimeIndex(["2010-01-01 12:00:00"], tz="UTC", name="foo") tm.assert_index_equal(result, expected) @@ -557,9 +554,7 @@ def test_to_datetime_mixed_datetime_and_string(self): d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1))) d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1))) res = to_datetime(["2020-01-01 17:00 -0100", d2]) - expected = to_datetime([d1, d2]).tz_convert( - timezone(timedelta(minutes=-60)) - ) # .as_unit("ns") + expected = to_datetime([d1, d2]).tz_convert(timezone(timedelta(minutes=-60))) tm.assert_index_equal(res, expected) def test_to_datetime_mixed_string_and_numeric(self): @@ -1069,7 +1064,7 @@ def test_to_datetime_dt64s(self, cache, dt): def test_to_datetime_dt64s_and_str(self, arg, format): # https://github.com/pandas-dev/pandas/issues/50036 result = to_datetime([arg, np.datetime64("2020-01-01")], format=format) - expected = DatetimeIndex(["2001-01-01", "2020-01-01"]) # .as_unit("ns") + expected = DatetimeIndex(["2001-01-01", "2020-01-01"]) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -1602,11 +1597,11 @@ def test_iso_8601_strings_with_same_offset(self): expected = Timestamp(ts_str) assert result == expected - expected = DatetimeIndex([Timestamp(ts_str)] * 2) # .as_unit("ns") + expected = DatetimeIndex([Timestamp(ts_str)] * 2) result = to_datetime([ts_str] * 2) tm.assert_index_equal(result, expected) - result = DatetimeIndex([ts_str] * 2) # .as_unit("ns") + result = DatetimeIndex([ts_str] * 2) tm.assert_index_equal(result, expected) def test_iso_8601_strings_with_different_offsets_removed(self): @@ -3446,9 +3441,7 @@ def test_to_datetime_format_f_parse_nanos(): def test_to_datetime_mixed_iso8601(): # https://github.com/pandas-dev/pandas/issues/50411 result = to_datetime(["2020-01-01", "2020-01-01 05:00:00"], format="ISO8601") - expected = DatetimeIndex( - ["2020-01-01 00:00:00", "2020-01-01 05:00:00"] - ) # .as_unit("ns") + expected = DatetimeIndex(["2020-01-01 00:00:00", "2020-01-01 05:00:00"]) tm.assert_index_equal(result, expected) From eedb25670d92d2bb0328ed44356a89d05c355658 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 28 Nov 2023 10:47:47 -0800 Subject: [PATCH 062/105] update tests --- pandas/tests/frame/methods/test_reset_index.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 1af995c2da6b0..448c57273f739 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -71,9 +71,7 @@ def test_reset_index_tz(self, tz_aware_fixture): # GH 3950 # reset_index with single level tz = tz_aware_fixture - idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx").as_unit( - "us" - ) + idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx") df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx) expected = DataFrame( @@ -478,12 +476,9 @@ def test_reset_index_allow_duplicates_check(self, multiindex_df, allow_duplicate def test_reset_index_datetime(self, tz_naive_fixture): # GH#3950 tz = tz_naive_fixture - idx1 = date_range( - "1/1/2011", periods=5, freq="D", tz=tz, name="idx1", unit="us" - ) + idx1 = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") idx2 = Index(range(5), name="idx2", dtype="int64") idx = MultiIndex.from_arrays([idx1, idx2]) - assert idx.levels[0].unit == "us" df = DataFrame( {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, index=idx, @@ -506,7 +501,7 @@ def test_reset_index_datetime2(self, tz_naive_fixture): idx1 = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") idx2 = Index(range(5), name="idx2", dtype="int64") idx3 = date_range( - "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3", unit="us" + "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" ) idx = MultiIndex.from_arrays([idx1, idx2, idx3]) df = DataFrame( @@ -530,7 +525,7 @@ def test_reset_index_datetime2(self, tz_naive_fixture): def test_reset_index_datetime3(self, tz_naive_fixture): # GH#7793 tz = tz_naive_fixture - dti = date_range("20130101", periods=3, tz=tz, unit="us") + dti = date_range("20130101", periods=3, tz=tz) idx = MultiIndex.from_product([["a", "b"], dti]) df = DataFrame( np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx @@ -704,7 +699,7 @@ def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby( def test_reset_index_multiindex_nat(): # GH 11479 idx = range(3) - tstamp = date_range("2015-07-01", freq="D", periods=3, unit="s") + tstamp = date_range("2015-07-01", freq="D", periods=3) df = DataFrame({"id": idx, "tstamp": tstamp, "a": list("abc")}) df.loc[2, "tstamp"] = pd.NaT result = df.set_index(["id", "tstamp"]).reset_index("id") From 5e24887ab075e5bb6a6c9999919ac74fcc7825fe Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 28 Nov 2023 15:21:13 -0800 Subject: [PATCH 063/105] remove commented-out --- pandas/tests/io/json/test_pandas.py | 2 -- pandas/tests/io/parser/test_parse_dates.py | 1 - pandas/tests/tslibs/test_array_to_datetime.py | 13 ------------- 3 files changed, 16 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index d7ca7366854fd..c64ff587bfab3 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -150,8 +150,6 @@ def test_frame_non_unique_columns(self, orient, data): elif orient == "split": expected = df expected.columns = ["x", "x.1"] - # if isinstance(data[0][0], Timestamp): - # # FIXME: in this case result is integer dtype instead of dt64 tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index f3e7ad6a68f4b..ee28895e54f0a 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1703,7 +1703,6 @@ def test_datetime_fractional_seconds(all_parsers, key, value, warn): ], columns=["ymdHMS", "a", "b"], ) - # expected["ymdHMS"] = expected["ymdHMS"].astype("M8[us]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 1262d9d630ba0..a491816a72f0a 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -229,19 +229,6 @@ def test_coerce_outside_ns_bounds(invalid_date, exp_unit, errors): expected = np.array([ts._value], dtype=f"M8[{exp_unit}]") tm.assert_numpy_array_equal(result, expected) - # FIXME: don't leave commented-out - # kwargs = {"values": arr, "errors": errors} - # if errors == "raise": - # msg = "^Out of bounds nanosecond timestamp: .*, at position 0$" - - # with pytest.raises(ValueError, match=msg): - # tslib.array_to_datetime(**kwargs) - # else: # coerce. - # result, _, _ = tslib.array_to_datetime(**kwargs) - # expected = np.array([iNaT], dtype="M8[ns]") - # - # tm.assert_numpy_array_equal(result, expected) - def test_coerce_outside_ns_bounds_one_valid(): arr = np.array(["1/1/1000", "1/1/2000"], dtype=object) From deb4a17e5d7a339ec4e82ffefeed2e3ff1bf696d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 28 Nov 2023 15:44:53 -0800 Subject: [PATCH 064/105] revert no-longer-necessary --- pandas/tests/frame/test_constructors.py | 12 ---------- .../indexes/datetimes/test_constructors.py | 24 +++++++++---------- pandas/tests/series/indexing/test_setitem.py | 2 +- 3 files changed, 13 insertions(+), 25 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index ea635f2d05558..467bff1136603 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2733,18 +2733,6 @@ def test_construction_datetime_resolution_inference(self, cons): res_dtype2 = tm.get_dtype(obj2) assert res_dtype2 == "M8[us, US/Pacific]", res_dtype2 - # FIXME: do not leave commented-out - # df = DataFrame({ - # "a": [1, 2, 3], - # "b": [ - # Timestamp("1970-01-01 00:00:00.000000001"), - # Timestamp("1970-01-01 00:00:00.000000002"), - # pd.NaT - # ], - # "c": [1, 2, 3], - # }) - # raise NotImplementedError("Write the test!") - class TestDataFrameConstructorIndexInference: def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self): diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 9f9c638df4534..c70fa4a43839e 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -432,10 +432,10 @@ def test_construction_dti_with_mixed_timezones(self): # no tz results in DatetimeIndex result = DatetimeIndex( [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" - ).as_unit("ns") + ) exp = DatetimeIndex( [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" - ).as_unit("ns") + ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) @@ -446,12 +446,12 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), ], name="idx", - ).as_unit("ns") + ) exp = DatetimeIndex( [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")], tz="Asia/Tokyo", name="idx", - ).as_unit("ns") + ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) @@ -462,12 +462,12 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp("2011-08-01 10:00", tz="US/Eastern"), ], name="idx", - ).as_unit("ns") + ) exp = DatetimeIndex( [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")], tz="US/Eastern", name="idx", - ).as_unit("ns") + ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) @@ -493,7 +493,7 @@ def test_construction_dti_with_mixed_timezones(self): ], tz="Asia/Tokyo", name="idx", - ).as_unit("ns") + ) expected = DatetimeIndex( [ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), @@ -501,7 +501,7 @@ def test_construction_dti_with_mixed_timezones(self): ], tz="Asia/Tokyo", name="idx", - ).as_unit("ns") + ) tm.assert_index_equal(dti, expected) # pre-2.0 mixed-tz scalars raised even if a tz/dtype was specified. @@ -513,7 +513,7 @@ def test_construction_dti_with_mixed_timezones(self): ], tz="US/Eastern", name="idx", - ).as_unit("ns") + ) expected = DatetimeIndex( [ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo").tz_convert("US/Eastern"), @@ -521,7 +521,7 @@ def test_construction_dti_with_mixed_timezones(self): ], tz="US/Eastern", name="idx", - ).as_unit("ns") + ) tm.assert_index_equal(dti, expected) # same thing but pass dtype instead of tz @@ -530,7 +530,7 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], - dtype="M8[ns, US/Eastern]", + dtype="M8[s, US/Eastern]", name="idx", ) tm.assert_index_equal(dti, expected) @@ -783,7 +783,7 @@ def test_constructor_start_end_with_tz(self, tz): ["2013-01-01 06:00:00", "2013-01-02 06:00:00"], dtype="M8[ns, America/Los_Angeles]", freq="D", - ).as_unit("ns") + ) tm.assert_index_equal(result, expected) # Especially assert that the timezone is consistent for pytz assert pytz.timezone("America/Los_Angeles") is result.tz diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 9da3d4e444019..6be325073bb67 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -506,7 +506,7 @@ def test_setitem_empty_series_datetimeindex_preserves_freq(self): # GH#33573 our index should retain its freq dti = DatetimeIndex([], freq="D", dtype="M8[ns]") series = Series([], index=dti, dtype=object) - key = Timestamp("2012-01-01").as_unit("ns") + key = Timestamp("2012-01-01") series[key] = 47 expected = Series(47, DatetimeIndex([key], freq="D").as_unit("ns")) tm.assert_series_equal(series, expected) From 1e0e47a1e56be06f1326fa26824b26564ae0c43a Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 28 Nov 2023 15:51:31 -0800 Subject: [PATCH 065/105] as_unit->astype --- pandas/tests/io/parser/test_read_fwf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 2972f0fa3e897..7c9f5b7ca65d8 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -325,7 +325,7 @@ def test_fwf_regression(): parse_dates=True, date_format="%Y%j%H%M%S", ) - expected.index = expected.index.as_unit("s") + expected.index = expected.index.astype("M8[s]") tm.assert_frame_equal(result, expected) From 9576079c25b6f03c22aae6f47d5baa236addc844 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 30 Nov 2023 08:14:00 -0800 Subject: [PATCH 066/105] cleanup --- .../indexes/datetimes/test_constructors.py | 21 +++---------------- pandas/tests/io/excel/test_writers.py | 4 +++- pandas/tests/tools/test_to_datetime.py | 2 -- 3 files changed, 6 insertions(+), 21 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index c70fa4a43839e..02bef9e69ee8e 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -557,26 +557,11 @@ def test_construction_outofbounds(self): tm.assert_index_equal(Index(dates).astype(object), exp) DatetimeIndex(dates) - # msg = ( - # "^Out of bounds nanosecond timestamp: 3000-01-01 00:00:00, " - # "at position 0$" - # ) - # with pytest.raises(OutOfBoundsDatetime, match=msg): - # # can't create DatetimeIndex - # DatetimeIndex(dates) @pytest.mark.parametrize("data", [["1400-01-01"], [datetime(1400, 1, 1)]]) def test_dti_date_out_of_range(self, data): # GH#1475 - pass - # FIXME: don't leave commented-out - # dti = DatetimeIndex(data) - # msg = ( - # "^Out of bounds nanosecond timestamp: " - # "1400-01-01( 00:00:00)?, at position 0$" - # ) - # with pytest.raises(OutOfBoundsDatetime, match=msg): - # DatetimeIndex(data) + DatetimeIndex(data) def test_construction_with_ndarray(self): # GH 5152 @@ -584,8 +569,8 @@ def test_construction_with_ndarray(self): data = DatetimeIndex(dates, freq=offsets.BDay()).values result = DatetimeIndex(data, freq=offsets.BDay()) expected = DatetimeIndex( - ["2013-10-07", "2013-10-08", "2013-10-09"], freq="B" - ).as_unit("us") + ["2013-10-07", "2013-10-08", "2013-10-09"], dtype="M8[us]", freq="B" + ) tm.assert_index_equal(result, expected) def test_integer_values_and_tz_interpreted_as_utc(self): diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 67e3668c48580..edfc533e6bc57 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -37,7 +37,9 @@ def get_exp_unit(path: str) -> str: - return "ns" + if path.endswith(".ods"): + return "s" + return "us" @pytest.fixture diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 8b6c422dbae04..2be2e1712d3e1 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1663,8 +1663,6 @@ def test_timestamp_utc_true(self, ts, expected): @pytest.mark.parametrize("dt_str", ["00010101", "13000101", "30000101", "99990101"]) def test_to_datetime_with_format_out_of_bounds(self, dt_str): # GH 9107 - # msg = "Out of bounds nanosecond timestamp" - # with pytest.raises(OutOfBoundsDatetime, match=msg): res = to_datetime(dt_str, format="%Y%m%d") dtobj = datetime.strptime(dt_str, "%Y%m%d") expected = Timestamp(dtobj).as_unit("s") From 2b427580c4cb9126dc8195f3b991ebcb825f5107 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 4 Dec 2023 12:14:41 -0800 Subject: [PATCH 067/105] merge fixup --- pandas/core/reshape/tile.py | 2 +- pandas/core/window/ewm.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index d6b9b6349cf34..82c697306edb2 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -392,12 +392,12 @@ def _nbins_to_bins(x_idx: Index, nbins: int, right: bool) -> Index: else: # adjust end points after binning if _is_dt_or_td(x_idx.dtype): # Use DatetimeArray/TimedeltaArray method instead of linspace + # error: Argument 1 to "dtype_to_unit" has incompatible type # "dtype[Any] | ExtensionDtype"; expected "DatetimeTZDtype | dtype[Any]" unit = dtype_to_unit(x_idx.dtype) # type: ignore[arg-type] # error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]" # has no attribute "_generate_range" - unit = dtype_to_unit(x_idx.dtype) bins = x_idx._values._generate_range( # type: ignore[union-attr] start=mn, end=mx, periods=nbins + 1, freq=None, unit=unit ) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index d58dc3efb1ba8..9b21a23b1aefe 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -7,7 +7,6 @@ import numpy as np -from pandas._libs import lib from pandas._libs.tslibs import Timedelta import pandas._libs.window.aggregations as window_aggregations from pandas.util._decorators import doc From c195475883b76d58a8715eddee8f3a49ac0d3d91 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 4 Dec 2023 12:15:25 -0800 Subject: [PATCH 068/105] revert bit --- pandas/tests/window/test_ewm.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 2c9f123f1f308..35c896dc0090b 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -102,30 +102,6 @@ def test_ewma_with_times_equal_spacing(halflife_with_times, times, min_periods): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "unit", - [ - pytest.param( - "s", - marks=pytest.mark.xfail( - reason="ExponentialMovingWindow constructor raises on non-nano" - ), - ), - pytest.param( - "ms", - marks=pytest.mark.xfail( - reason="ExponentialMovingWindow constructor raises on non-nano" - ), - ), - pytest.param( - "us", - marks=pytest.mark.xfail( - reason="ExponentialMovingWindow constructor raises on non-nano" - ), - ), - "ns", - ], -) def test_ewma_with_times_variable_spacing(tz_aware_fixture, unit): tz = tz_aware_fixture halflife = "23 days" From 8e45823080a301141c0352cde2db887dd016caa1 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 9 Dec 2023 10:01:15 -0800 Subject: [PATCH 069/105] revert no-longer-necessary, xfail --- pandas/_libs/tslibs/strptime.pyx | 5 ++- pandas/tests/extension/test_arrow.py | 2 +- pandas/tests/frame/methods/test_replace.py | 4 +- .../tests/frame/methods/test_reset_index.py | 4 +- pandas/tests/io/json/test_pandas.py | 7 ++- pandas/tests/reshape/test_cut.py | 43 +++++++------------ pandas/tests/reshape/test_qcut.py | 4 +- pandas/tests/tools/test_to_timedelta.py | 2 +- 8 files changed, 31 insertions(+), 40 deletions(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 760ec5fd975a2..955e0eb319b59 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -62,6 +62,7 @@ from pandas._libs.tslibs.nattype cimport ( ) from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, + NPY_FR_ns, get_datetime64_unit, import_pandas_datetime, npy_datetimestruct, @@ -467,7 +468,7 @@ def array_strptime( ival, tz, ambiguous="raise", nonexistent=None, creso=creso ) nsecs = (ival - iresult[i]) - if creso == NPY_DATETIMEUNIT.NPY_FR_ns: + if creso == NPY_FR_ns: nsecs = nsecs // 10**9 elif creso == NPY_DATETIMEUNIT.NPY_FR_us: nsecs = nsecs // 10**6 @@ -684,7 +685,7 @@ cdef tzinfo _parse_with_format( elif len(s) <= 6: item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_us else: - item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_ns + item_reso[0] = NPY_FR_ns # Pad to always return nanoseconds s += "0" * (9 - len(s)) us = int(s) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 5d634c9aeb14f..7678c0073b97a 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3440,7 +3440,7 @@ def test_arrow_floor_division_large_divisor(dtype): def test_string_to_datetime_parsing_cast(): # GH 56266 string_dates = ["2020-01-01 04:30:00", "2020-01-02 00:00:00", "2020-01-03 00:00:00"] - result = pd.Series(string_dates, dtype="timestamp[ns][pyarrow]") + result = pd.Series(string_dates, dtype="timestamp[s][pyarrow]") expected = pd.Series( ArrowExtensionArray(pa.array(pd.to_datetime(string_dates), from_pandas=True)) ) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index f7e6e42c19ebe..6ca6cbad02d51 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -817,9 +817,7 @@ def test_replace_for_new_dtypes(self, datetime_frame): ( DataFrame( { - "A": date_range( - "20130101", periods=3, tz="US/Eastern", unit="s" - ), + "A": date_range("20130101", periods=3, tz="US/Eastern"), "B": [0, np.nan, 2], } ), diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 448c57273f739..22ce091d4ed62 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -673,7 +673,7 @@ def test_reset_index_empty_frame_with_datetime64_multiindex(): expected = DataFrame( columns=list("abcd"), index=RangeIndex(start=0, stop=0, step=1) ) - expected["a"] = expected["a"].astype("datetime64[s]") + expected["a"] = expected["a"].astype("datetime64[ns]") expected["b"] = expected["b"].astype("int64") tm.assert_frame_equal(result, expected) @@ -689,7 +689,7 @@ def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby( expected = DataFrame( columns=["c2", "c3", "c1"], index=RangeIndex(start=0, stop=0, step=1) ) - expected["c3"] = expected["c3"].astype("datetime64[s]") + expected["c3"] = expected["c3"].astype("datetime64[ns]") expected["c1"] = expected["c1"].astype("float64") if using_infer_string: expected["c2"] = expected["c2"].astype("string[pyarrow_numpy]") diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index c64ff587bfab3..aa7bd0999a38b 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -130,7 +130,12 @@ def test_frame_non_unique_index_raises(self, orient): [["a", "b"], ["c", "d"]], [[1.5, 2.5], [3.5, 4.5]], [[1, 2.5], [3, 4.5]], - [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]], + pytest.param( + [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]], + marks=pytest.mark.xfail( + reason="GH#GH#55827 non-nanosecond dt64 fails to round-trip" + ), + ), ], ) def test_frame_non_unique_columns(self, orient, data): diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 5b937ea667f98..0811c69859c0d 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -438,18 +438,11 @@ def test_datetime_bin(conv): data = [np.datetime64("2012-12-13"), np.datetime64("2012-12-15")] bin_data = ["2012-12-12", "2012-12-14", "2012-12-16"] - unit = Timestamp(conv(bin_data[0])).unit expected = Series( IntervalIndex( [ - Interval( - Timestamp(bin_data[0]).as_unit(unit), - Timestamp(bin_data[1]).as_unit(unit), - ), - Interval( - Timestamp(bin_data[1]).as_unit(unit), - Timestamp(bin_data[2]).as_unit(unit), - ), + Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])), + Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2])), ] ) ).astype(CategoricalDtype(ordered=True)) @@ -468,6 +461,10 @@ def test_datetime_cut(unit, box): data = box(data) result, _ = cut(data, 3, retbins=True) + if box is list: + # We don't (yet) do inference on these, so get nanos + unit = "ns" + if unit == "s": # See https://github.com/pandas-dev/pandas/pull/56101#discussion_r1405325425 # for why we round to 8 seconds instead of 7 @@ -517,18 +514,10 @@ def test_datetime_tz_cut_mismatched_tzawareness(box): [ 3, [ - Timestamp("2013-01-01 04:57:07.200000", tz="UTC") - .tz_convert("US/Eastern") - .as_unit("ns"), - Timestamp("2013-01-01 21:00:00", tz="UTC") - .tz_convert("US/Eastern") - .as_unit("ns"), - Timestamp("2013-01-02 13:00:00", tz="UTC") - .tz_convert("US/Eastern") - .as_unit("ns"), - Timestamp("2013-01-03 05:00:00", tz="UTC") - .tz_convert("US/Eastern") - .as_unit("ns"), + Timestamp("2013-01-01 04:57:07.200000", tz="UTC").tz_convert("US/Eastern"), + Timestamp("2013-01-01 21:00:00", tz="UTC").tz_convert("US/Eastern"), + Timestamp("2013-01-02 13:00:00", tz="UTC").tz_convert("US/Eastern"), + Timestamp("2013-01-03 05:00:00", tz="UTC").tz_convert("US/Eastern"), ], ], ) @@ -546,16 +535,16 @@ def test_datetime_tz_cut(bins, box): IntervalIndex( [ Interval( - Timestamp("2012-12-31 23:57:07.200000", tz=tz).as_unit("ns"), - Timestamp("2013-01-01 16:00:00", tz=tz).as_unit("ns"), + Timestamp("2012-12-31 23:57:07.200000", tz=tz), + Timestamp("2013-01-01 16:00:00", tz=tz), ), Interval( - Timestamp("2013-01-01 16:00:00", tz=tz).as_unit("ns"), - Timestamp("2013-01-02 08:00:00", tz=tz).as_unit("ns"), + Timestamp("2013-01-01 16:00:00", tz=tz), + Timestamp("2013-01-02 08:00:00", tz=tz), ), Interval( - Timestamp("2013-01-02 08:00:00", tz=tz).as_unit("ns"), - Timestamp("2013-01-03 00:00:00", tz=tz).as_unit("ns"), + Timestamp("2013-01-02 08:00:00", tz=tz), + Timestamp("2013-01-03 00:00:00", tz=tz), ), ] ) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index 5f769db7f8acf..53af673e0f7b0 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -271,10 +271,8 @@ def test_datetime_tz_qcut(bins): ], ], ) -def test_date_like_qcut_bins(arg, expected_bins, unit): +def test_date_like_qcut_bins(arg, expected_bins): # see gh-19891 - arg = arg.as_unit(unit) - expected_bins = expected_bins.as_unit(unit) ser = Series(arg) result, result_bins = qcut(ser, 2, retbins=True) tm.assert_index_equal(result_bins, expected_bins) diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index c052ca58f5873..f31b349c62fa5 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -26,7 +26,7 @@ def test_to_timedelta_dt64_raises(self): # supported GH#29794 msg = r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]" - ser = Series([pd.NaT]) + ser = Series([pd.NaT], dtype="M8[ns]") with pytest.raises(TypeError, match=msg): to_timedelta(ser) with pytest.raises(TypeError, match=msg): From f41da0ac1a066897139475dc6dd20abbe4dcd668 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 11 Dec 2023 18:27:31 -0800 Subject: [PATCH 070/105] update multithread test --- pandas/tests/io/parser/test_multi_thread.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/parser/test_multi_thread.py b/pandas/tests/io/parser/test_multi_thread.py index da9b9bddd30cd..be9d95151cf81 100644 --- a/pandas/tests/io/parser/test_multi_thread.py +++ b/pandas/tests/io/parser/test_multi_thread.py @@ -144,7 +144,8 @@ def test_multi_thread_path_multipart_read_csv(all_parsers): with tm.ensure_clean(file_name) as path: df.to_csv(path) - final_dataframe = _generate_multi_thread_dataframe( - parser, path, num_rows, num_tasks - ) - tm.assert_frame_equal(df, final_dataframe) + result = _generate_multi_thread_dataframe(parser, path, num_rows, num_tasks) + + expected = df[:] + expected["date"] = expected["date"].astype("M8[s]") + tm.assert_frame_equal(result, expected) From 3737198b04bd662c329283086712f4d5070233cc Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 12 Dec 2023 10:33:55 -0800 Subject: [PATCH 071/105] update tests --- pandas/tests/frame/methods/test_to_csv.py | 15 ++++++++++++--- pandas/tests/reshape/test_qcut.py | 4 +++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index c1d75ae44e741..4c7c32717acdf 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -230,12 +230,16 @@ def make_dtnat_arr(n, nnat=None): s1 = make_dtnat_arr(chunksize + 5) s2 = make_dtnat_arr(chunksize + 5, 0) + df = DataFrame({"a": s1, "b": s2}) with tm.ensure_clean("1.csv") as pth: - df = DataFrame({"a": s1, "b": s2}) df.to_csv(pth, chunksize=chunksize) - recons = self.read_csv(pth).apply(to_datetime) - tm.assert_frame_equal(df, recons, check_names=False) + result = self.read_csv(pth).apply(to_datetime) + + expected = df[:] + expected["a"] = expected["a"].astype("M8[s]") + expected["b"] = expected["b"].astype("M8[s]") + tm.assert_frame_equal(result, expected, check_names=False) def _return_result_expected( self, @@ -353,6 +357,7 @@ def test_to_csv_nrows(self, nrows): columns=Index(list("abcd"), dtype=object), ) result, expected = self._return_result_expected(df, 1000, "dt", "s") + expected.index = expected.index.astype("M8[ns]") tm.assert_frame_equal(result, expected, check_names=False) @pytest.mark.slow @@ -382,6 +387,10 @@ def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols): r_idx_type, c_idx_type, ) + if r_idx_type in ["dt", "p"]: + expected.index = expected.index.astype("M8[ns]") + if c_idx_type in ["dt", "p"]: + expected.columns = expected.columns.astype("M8[ns]") tm.assert_frame_equal(result, expected, check_names=False) @pytest.mark.slow diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index 53af673e0f7b0..5f769db7f8acf 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -271,8 +271,10 @@ def test_datetime_tz_qcut(bins): ], ], ) -def test_date_like_qcut_bins(arg, expected_bins): +def test_date_like_qcut_bins(arg, expected_bins, unit): # see gh-19891 + arg = arg.as_unit(unit) + expected_bins = expected_bins.as_unit(unit) ser = Series(arg) result, result_bins = qcut(ser, 2, retbins=True) tm.assert_index_equal(result_bins, expected_bins) From 8e2ff655c42193d2b2ac559a1c2592393ce9a3e6 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 14 Dec 2023 10:23:09 -0800 Subject: [PATCH 072/105] update doctest --- pandas/core/arrays/datetimelike.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 38d5205e6b7cb..f256a17845526 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1869,11 +1869,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: >>> rng_tz.floor("2h", ambiguous=False) DatetimeIndex(['2021-10-31 02:00:00+01:00'], - dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + dtype='datetime64[s, Europe/Amsterdam]', freq=None) >>> rng_tz.floor("2h", ambiguous=True) DatetimeIndex(['2021-10-31 02:00:00+02:00'], - dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + dtype='datetime64[s, Europe/Amsterdam]', freq=None) """ _floor_example = """>>> rng.floor('h') @@ -1896,11 +1896,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: >>> rng_tz.floor("2h", ambiguous=False) DatetimeIndex(['2021-10-31 02:00:00+01:00'], - dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + dtype='datetime64[s, Europe/Amsterdam]', freq=None) >>> rng_tz.floor("2h", ambiguous=True) DatetimeIndex(['2021-10-31 02:00:00+02:00'], - dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + dtype='datetime64[s, Europe/Amsterdam]', freq=None) """ _ceil_example = """>>> rng.ceil('h') @@ -1923,11 +1923,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: >>> rng_tz.ceil("h", ambiguous=False) DatetimeIndex(['2021-10-31 02:00:00+01:00'], - dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + dtype='datetime64[s, Europe/Amsterdam]', freq=None) >>> rng_tz.ceil("h", ambiguous=True) DatetimeIndex(['2021-10-31 02:00:00+02:00'], - dtype='datetime64[ns, Europe/Amsterdam]', freq=None) + dtype='datetime64[s, Europe/Amsterdam]', freq=None) """ From 771d5f60258ad6022e7304f9598a7d64075ce4fa Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 14 Dec 2023 10:58:17 -0800 Subject: [PATCH 073/105] update tests --- pandas/tests/io/test_sql.py | 26 ++++++---------- pandas/tests/reshape/test_cut.py | 52 ++++++++++++++++++-------------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 65e04d7920f18..f97e6cf6db54c 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -19,10 +19,7 @@ import pytest from pandas._libs import lib -from pandas.compat import ( - pa_version_under13p0, - pa_version_under14p1, -) +from pandas.compat import pa_version_under14p1 from pandas.compat._optional import import_optional_dependency import pandas.util._test_decorators as td @@ -368,7 +365,7 @@ def create_and_load_postgres_datetz(conn): Timestamp("2000-01-01 08:00:00", tz="UTC"), Timestamp("2000-06-01 07:00:00", tz="UTC"), ] - return Series(expected_data, name="DateColWithTz") + return Series(expected_data, name="DateColWithTz").astype("M8[us, UTC]") def check_iris_frame(frame: DataFrame): @@ -1823,10 +1820,7 @@ def test_api_custom_dateparsing_error( } ) - if not pa_version_under13p0: - # TODO: is this astype safe? - expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") - + expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") tm.assert_frame_equal(result, expected) @@ -2807,7 +2801,9 @@ def test_datetime_with_timezone_table(conn, request): conn = request.getfixturevalue(conn) expected = create_and_load_postgres_datetz(conn) result = sql.read_sql_table("datetz", conn) - tm.assert_frame_equal(result, expected.to_frame()) + + exp_frame = expected.to_frame() + tm.assert_frame_equal(result, exp_frame) @pytest.mark.parametrize("conn", sqlalchemy_connectable) @@ -2922,9 +2918,7 @@ def test_datetime(conn, request): if "sqlite" in conn_name: assert isinstance(result.loc[0, "A"], str) result["A"] = to_datetime(result["A"]) - tm.assert_frame_equal(result, expected) - else: - tm.assert_frame_equal(result, df) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("conn", sqlalchemy_connectable) @@ -2948,9 +2942,8 @@ def test_datetime_NaT(conn, request): if "sqlite" in conn_name: assert isinstance(result.loc[0, "A"], str) result["A"] = to_datetime(result["A"], errors="coerce") - tm.assert_frame_equal(result, expected) - else: - tm.assert_frame_equal(result, df) + + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("conn", sqlalchemy_connectable) @@ -3942,6 +3935,7 @@ def test_self_join_date_columns(postgresql_psycopg2_engine): expected = DataFrame( [[1, Timestamp("2021", tz="UTC")] * 2], columns=["id", "created_dt"] * 2 ) + expected["created_dt"] = expected["created_dt"].astype("M8[us]") tm.assert_frame_equal(result, expected) # Cleanup diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 0811c69859c0d..8974c8138f508 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -1,3 +1,5 @@ +from datetime import datetime + import numpy as np import pytest @@ -445,10 +447,16 @@ def test_datetime_bin(conv): Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2])), ] ) - ).astype(CategoricalDtype(ordered=True)) + ) bins = [conv(v) for v in bin_data] result = Series(cut(data, bins=bins)) + + if type(bins[0]) is datetime: + # The bins have microsecond dtype -> so does result + expected = expected.astype("interval[datetime64[us]]") + + expected = expected.astype(CategoricalDtype(ordered=True)) tm.assert_series_equal(result, expected) @@ -461,10 +469,6 @@ def test_datetime_cut(unit, box): data = box(data) result, _ = cut(data, 3, retbins=True) - if box is list: - # We don't (yet) do inference on these, so get nanos - unit = "ns" - if unit == "s": # See https://github.com/pandas-dev/pandas/pull/56101#discussion_r1405325425 # for why we round to 8 seconds instead of 7 @@ -531,24 +535,26 @@ def test_datetime_tz_cut(bins, box): bins = box(bins) result = cut(ser, bins) - expected = Series( - IntervalIndex( - [ - Interval( - Timestamp("2012-12-31 23:57:07.200000", tz=tz), - Timestamp("2013-01-01 16:00:00", tz=tz), - ), - Interval( - Timestamp("2013-01-01 16:00:00", tz=tz), - Timestamp("2013-01-02 08:00:00", tz=tz), - ), - Interval( - Timestamp("2013-01-02 08:00:00", tz=tz), - Timestamp("2013-01-03 00:00:00", tz=tz), - ), - ] - ) - ).astype(CategoricalDtype(ordered=True)) + ii = IntervalIndex( + [ + Interval( + Timestamp("2012-12-31 23:57:07.200000", tz=tz), + Timestamp("2013-01-01 16:00:00", tz=tz), + ), + Interval( + Timestamp("2013-01-01 16:00:00", tz=tz), + Timestamp("2013-01-02 08:00:00", tz=tz), + ), + Interval( + Timestamp("2013-01-02 08:00:00", tz=tz), + Timestamp("2013-01-03 00:00:00", tz=tz), + ), + ] + ) + if isinstance(bins, int): + # the dtype is inferred from ser, which has nanosecond unit + ii = ii.astype("interval[datetime64[ns, US/Eastern]]") + expected = Series(ii).astype(CategoricalDtype(ordered=True)) tm.assert_series_equal(result, expected) From fe8b50f00ee2ef8de7ff498f6229fb681567bc0f Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 18 Dec 2023 09:58:56 -0800 Subject: [PATCH 074/105] update doctests --- pandas/core/algorithms.py | 8 +++++--- pandas/core/arrays/datetimes.py | 10 +++++----- pandas/core/base.py | 2 +- pandas/core/dtypes/dtypes.py | 2 +- pandas/core/dtypes/missing.py | 4 ++-- pandas/core/frame.py | 2 +- pandas/core/generic.py | 11 +++++------ pandas/core/indexes/base.py | 2 +- pandas/core/indexes/datetimes.py | 5 +++-- pandas/core/tools/datetimes.py | 14 +++++++------- 10 files changed, 31 insertions(+), 29 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3672cdb13d4a3..c6a83cdd9bdb6 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -349,14 +349,15 @@ def unique(values): array([2, 1]) >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])) - array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]') + array(['2016-01-01T00:00:00'], dtype='datetime64[s]') >>> pd.unique( ... pd.Series( ... [ ... pd.Timestamp("20160101", tz="US/Eastern"), ... pd.Timestamp("20160101", tz="US/Eastern"), - ... ] + ... ], + ... dtype="M8[ns, US/Eastern]", ... ) ... ) @@ -368,7 +369,8 @@ def unique(values): ... [ ... pd.Timestamp("20160101", tz="US/Eastern"), ... pd.Timestamp("20160101", tz="US/Eastern"), - ... ] + ... ], + ... dtype="M8[ns, US/Eastern]", ... ) ... ) DatetimeIndex(['2016-01-01 00:00:00-05:00'], diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index faad71844659e..773c56db91c46 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -214,7 +214,7 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): # type: ignore[misc] ... ) ['2023-01-01 00:00:00', '2023-01-02 00:00:00'] - Length: 2, dtype: datetime64[ns] + Length: 2, dtype: datetime64[s] """ _typ = "datetimearray" @@ -1064,21 +1064,21 @@ def tz_localize( or `'shift_backwards'`. >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00', - ... '2015-03-29 03:30:00'])) + ... '2015-03-29 03:30:00'], dtype="M8[ns]")) >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') 0 2015-03-29 03:00:00+02:00 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[s, Europe/Warsaw] + dtype: datetime64[ns, Europe/Warsaw] >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') 0 2015-03-29 01:59:59.999999999+01:00 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[s, Europe/Warsaw] + dtype: datetime64[ns, Europe/Warsaw] >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1h')) 0 2015-03-29 03:30:00+02:00 1 2015-03-29 03:30:00+02:00 - dtype: datetime64[s, Europe/Warsaw] + dtype: datetime64[ns, Europe/Warsaw] """ nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") if nonexistent not in nonexistent_options and not isinstance( diff --git a/pandas/core/base.py b/pandas/core/base.py index 7a3d6cb866ea5..a475024229a7c 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1274,7 +1274,7 @@ def factorize( 0 2000-03-11 1 2000-03-12 2 2000-03-13 - dtype: datetime64[ns] + dtype: datetime64[s] >>> ser.searchsorted('3/14/2000') 3 diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 68c7ab6cbdbd1..a7fc6de2d7ef7 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -201,7 +201,7 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): by providing an empty index. As follows, >>> pd.CategoricalDtype(pd.DatetimeIndex([])).categories.dtype - dtype(' bool | npt.NDArray[np.bool_] | NDFrame: >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"]) >>> index DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], - dtype='datetime64[ns]', freq=None) + dtype='datetime64[s]', freq=None) >>> pd.isna(index) array([False, False, True, False]) @@ -365,7 +365,7 @@ def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"]) >>> index DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], - dtype='datetime64[ns]', freq=None) + dtype='datetime64[s]', freq=None) >>> pd.notna(index) array([ True, True, False, True]) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index abb043361483c..36b939bc13d42 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12455,7 +12455,7 @@ def to_period( >>> idx DatetimeIndex(['2001-03-31', '2002-05-31', '2003-08-31'], - dtype='datetime64[ns]', freq=None) + dtype='datetime64[s]', freq=None) >>> idx.to_period("M") PeriodIndex(['2001-03', '2002-05', '2003-08'], dtype='period[M]') diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 668672ed05f72..4c01bdf028a7c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3161,7 +3161,7 @@ class (index) object 32B 'bird' 'bird' 'mammal' 'mammal' Dimensions: (date: 2, animal: 2) Coordinates: - * date (date) datetime64[ns] 2018-01-01 2018-01-02 + * date (date) datetime64[s] 2018-01-01 2018-01-02 * animal (animal) object 'falcon' 'parrot' Data variables: speed (date, animal) int64 350 18 361 15 @@ -6127,7 +6127,7 @@ def dtypes(self): >>> df.dtypes float float64 int int64 - datetime datetime64[ns] + datetime datetime64[s] string object dtype: object """ @@ -10749,10 +10749,9 @@ def tz_localize( dates forward or backward with a timedelta object or `'shift_forward'` or `'shift_backward'`. - >>> s = pd.Series( - ... range(2), - ... index=pd.DatetimeIndex(["2015-03-29 02:30:00", "2015-03-29 03:30:00"]), - ... ) + >>> dti = pd.DatetimeIndex(["2015-03-29 02:30:00", "2015-03-29 03:30:00"], + ... dtype="M8[ns]") + >>> s = pd.Series(range(2), index=dti) >>> s.tz_localize("Europe/Warsaw", nonexistent="shift_forward") 2015-03-29 03:00:00+02:00 0 2015-03-29 03:30:00+02:00 1 diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c17e01b85fa84..3edc74b573568 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2501,7 +2501,7 @@ def isna(self) -> npt.NDArray[np.bool_]: ... ) >>> idx DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'], - dtype='datetime64[ns]', freq=None) + dtype='datetime64[s]', freq=None) >>> idx.isna() array([False, True, True, True]) """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 282a11122211b..ad0f3e96f9cac 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -254,7 +254,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin): >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"]) >>> idx DatetimeIndex(['2020-01-01 10:00:00+00:00', '2020-02-01 11:00:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) + dtype='datetime64[s, UTC]', freq=None) """ _typ = "datetimeindex" @@ -489,7 +489,8 @@ def snap(self, freq: Frequency = "S") -> DatetimeIndex: Examples -------- >>> idx = pd.DatetimeIndex( - ... ["2023-01-01", "2023-01-02", "2023-02-01", "2023-02-02"] + ... ["2023-01-01", "2023-01-02", "2023-02-01", "2023-02-02"], + ... dtype="M8[ns]", ... ) >>> idx DatetimeIndex(['2023-01-01', '2023-01-02', '2023-02-01', '2023-02-02'], diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index c785f0c3a6985..fdf23fa4c6e8c 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -871,7 +871,7 @@ def to_datetime( >>> pd.to_datetime(df) 0 2015-02-04 1 2016-03-05 - dtype: datetime64[ns] + dtype: datetime64[s] Using a unix epoch time @@ -902,7 +902,7 @@ def to_datetime( in addition to forcing non-dates (or non-parseable dates) to :const:`NaT`. >>> pd.to_datetime("13000101", format="%Y%m%d", errors="coerce") - NaT + Timestamp('1300-01-01 00:00:00') .. _to_datetime_tz_examples: @@ -914,14 +914,14 @@ def to_datetime( >>> pd.to_datetime(["2018-10-26 12:00:00", "2018-10-26 13:00:15"]) DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'], - dtype='datetime64[ns]', freq=None) + dtype='datetime64[s]', freq=None) - Timezone-aware inputs *with constant time offset* are converted to timezone-aware :class:`DatetimeIndex`: >>> pd.to_datetime(["2018-10-26 12:00 -0500", "2018-10-26 13:00 -0500"]) DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'], - dtype='datetime64[ns, UTC-05:00]', freq=None) + dtype='datetime64[s, UTC-05:00]', freq=None) - However, timezone-aware inputs *with mixed time offsets* (for example issued from a timezone with daylight savings, such as Europe/Paris) @@ -961,21 +961,21 @@ def to_datetime( >>> pd.to_datetime(["2018-10-26 12:00", "2018-10-26 13:00"], utc=True) DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) + dtype='datetime64[s, UTC]', freq=None) - Timezone-aware inputs are *converted* to UTC (the output represents the exact same datetime, but viewed from the UTC time offset `+00:00`). >>> pd.to_datetime(["2018-10-26 12:00 -0530", "2018-10-26 12:00 -0500"], utc=True) DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) + dtype='datetime64[s, UTC]', freq=None) - Inputs can contain both string or datetime, the above rules still apply >>> pd.to_datetime(["2018-10-26 12:00", datetime(2020, 1, 1, 18)], utc=True) DatetimeIndex(['2018-10-26 12:00:00+00:00', '2020-01-01 18:00:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) + dtype='datetime64[us, UTC]', freq=None) """ if exact is not lib.no_default and format in {"mixed", "ISO8601"}: raise ValueError("Cannot use 'exact' when 'format' is 'mixed' or 'ISO8601'") From 5bbe0100807bc65a3ecb5cf1b0553bc8ce544b3a Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 18 Dec 2023 10:24:48 -0800 Subject: [PATCH 075/105] update tests --- pandas/tests/arrays/test_array.py | 2 +- pandas/tests/frame/indexing/test_setitem.py | 4 ++-- pandas/tests/io/json/test_pandas.py | 15 ++++++++------- pandas/tests/io/test_sql.py | 2 +- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index b2c0d30e4bcd4..3a51e9a825ab4 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -288,7 +288,7 @@ def test_array_copy(): # datetime ( [pd.Timestamp("2000"), pd.Timestamp("2001")], - DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"), + DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[s]"), ), ( [datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)], diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 303cb7353a823..1b62929b10b21 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -791,8 +791,8 @@ def test_setitem_object_inferring(self): expected = DataFrame( { "a": [1], - "b": Series([Timestamp("2019-12-31")], dtype="datetime64[ns]"), - "c": Series([Timestamp("2019-12-31")], dtype="datetime64[ns]"), + "b": Series([Timestamp("2019-12-31")], dtype="datetime64[s]"), + "c": Series([Timestamp("2019-12-31")], dtype="datetime64[s]"), } ) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index aa7bd0999a38b..531cd0578fc7d 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -130,15 +130,16 @@ def test_frame_non_unique_index_raises(self, orient): [["a", "b"], ["c", "d"]], [[1.5, 2.5], [3.5, 4.5]], [[1, 2.5], [3, 4.5]], - pytest.param( - [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]], - marks=pytest.mark.xfail( - reason="GH#GH#55827 non-nanosecond dt64 fails to round-trip" - ), - ), + [[Timestamp("20130101"), 3.5], [Timestamp("20130102"), 4.5]], ], ) - def test_frame_non_unique_columns(self, orient, data): + def test_frame_non_unique_columns(self, orient, data, request): + if isinstance(data[0][0], Timestamp) and orient == "split": + mark = pytest.mark.xfail( + reason="GH#55827 non-nanosecond dt64 fails to round-trip" + ) + request.applymarker(mark) + df = DataFrame(data, index=[1, 2], columns=["x", "x"]) result = read_json( diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index f97e6cf6db54c..c68d2be3e7f42 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1820,7 +1820,7 @@ def test_api_custom_dateparsing_error( } ) - expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") + expected["DateCol"] = expected["DateCol"].astype("datetime64[s]") tm.assert_frame_equal(result, expected) From c797270c84b8a89f76c481dca3ba6af3516aec03 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 18 Dec 2023 14:29:56 -0800 Subject: [PATCH 076/105] update db tests --- pandas/tests/io/test_sql.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index c68d2be3e7f42..518653548aab8 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1820,7 +1820,10 @@ def test_api_custom_dateparsing_error( } ) - expected["DateCol"] = expected["DateCol"].astype("datetime64[s]") + if "postgres" in conn_name: + expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") + else: + expected["DateCol"] = expected["DateCol"].astype("datetime64[s]") tm.assert_frame_equal(result, expected) @@ -3935,7 +3938,7 @@ def test_self_join_date_columns(postgresql_psycopg2_engine): expected = DataFrame( [[1, Timestamp("2021", tz="UTC")] * 2], columns=["id", "created_dt"] * 2 ) - expected["created_dt"] = expected["created_dt"].astype("M8[us]") + expected["created_dt"] = expected["created_dt"].astype("M8[us, UTC]") tm.assert_frame_equal(result, expected) # Cleanup From ad54a8c1eceebbe6033c05c82480b9ff17339b71 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 18 Dec 2023 15:41:54 -0800 Subject: [PATCH 077/105] troubleshoot db tests --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 518653548aab8..5487f8b9ad201 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1820,7 +1820,7 @@ def test_api_custom_dateparsing_error( } ) - if "postgres" in conn_name: + if "postgres" in conn_name or "mysql" in conn_name: expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") else: expected["DateCol"] = expected["DateCol"].astype("datetime64[s]") From 0188ba90db0e19a0ac315cd3fac11921975900cf Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 18 Dec 2023 16:47:39 -0800 Subject: [PATCH 078/105] update test --- pandas/tests/io/test_parquet.py | 11 ++++------- pandas/tests/io/test_sql.py | 5 ++++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 6b8e1520b7a66..85c58a2202f97 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -985,16 +985,13 @@ def test_read_dtype_backend_pyarrow_config(self, pa, df_full): if pa_version_under13p0: # pyarrow infers datetimes as us instead of ns expected["datetime"] = expected["datetime"].astype("timestamp[us][pyarrow]") - expected["datetime_with_nat"] = expected["datetime_with_nat"].astype( - "timestamp[us][pyarrow]" - ) expected["datetime_tz"] = expected["datetime_tz"].astype( pd.ArrowDtype(pyarrow.timestamp(unit="us", tz="Europe/Brussels")) ) - else: - expected["datetime_with_nat"] = expected["datetime_with_nat"].astype( - "timestamp[ms][pyarrow]" - ) + + expected["datetime_with_nat"] = expected["datetime_with_nat"].astype( + "timestamp[ms][pyarrow]" + ) check_round_trip( df, diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 5487f8b9ad201..d79d8255c651d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1820,7 +1820,10 @@ def test_api_custom_dateparsing_error( } ) - if "postgres" in conn_name or "mysql" in conn_name: + if "adbc" in conn_name: + # keep nanos + pass + elif "postgres" in conn_name or "mysql" in conn_name: expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") else: expected["DateCol"] = expected["DateCol"].astype("datetime64[s]") From 7a8c3cf3720ec2f62656bab3dfe9349fbeb52d80 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 20 Dec 2023 13:40:42 -0800 Subject: [PATCH 079/105] troubleshoot sql tests --- pandas/tests/io/test_sql.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index d79d8255c651d..5487f8b9ad201 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1820,10 +1820,7 @@ def test_api_custom_dateparsing_error( } ) - if "adbc" in conn_name: - # keep nanos - pass - elif "postgres" in conn_name or "mysql" in conn_name: + if "postgres" in conn_name or "mysql" in conn_name: expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") else: expected["DateCol"] = expected["DateCol"].astype("datetime64[s]") From c033795d806a0321ed87204cff1d91e2b63fd1b8 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 21 Dec 2023 07:33:45 -0800 Subject: [PATCH 080/105] update test --- pandas/tests/io/parser/test_parse_dates.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index ee28895e54f0a..6bf621a8623aa 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -2023,9 +2023,7 @@ def test_date_parser_multiindex_columns_combine_cols(all_parsers, parse_spec, co parse_dates=parse_spec, header=[0, 1], ) - expected = DataFrame( - {col_name: Timestamp("2019-12-31").as_unit("ns"), ("c", "3"): [6]} - ) + expected = DataFrame({col_name: Timestamp("2019-12-31"), ("c", "3"): [6]}) tm.assert_frame_equal(result, expected) From c63d7de67460a9f80177748fc4576ecf72cec85c Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 25 Jan 2024 08:19:27 -0800 Subject: [PATCH 081/105] update tests --- pandas/tests/indexes/datetimes/test_date_range.py | 4 +++- pandas/tests/io/parser/test_parse_dates.py | 1 - pandas/tests/io/pytables/test_store.py | 12 ++++++++---- pandas/tests/scalar/test_nat.py | 4 +++- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index e26f35f4e8258..1dd3eb06354e7 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -826,7 +826,9 @@ def test_to_offset_with_lowercase_deprecated_freq(self) -> None: ) with tm.assert_produces_warning(FutureWarning, match=msg): result = date_range("2010-01-01", periods=2, freq="m") - expected = DatetimeIndex(["2010-01-31", "2010-02-28"], freq="ME") + expected = DatetimeIndex( + ["2010-01-31", "2010-02-28"], dtype="M8[ns]", freq="ME" + ) tm.assert_index_equal(result, expected) def test_date_range_bday(self): diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 6bf621a8623aa..bd5b784b67a77 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -2352,7 +2352,6 @@ def test_parse_dates_arrow_engine(all_parsers): "b": 1, } ) - expected["a"] = expected["a"].astype("M8[s]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index f94955fbec657..655d36e927e94 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -611,10 +611,14 @@ def test_store_index_name(setup_path): @pytest.mark.parametrize("table_format", ["table", "fixed"]) def test_store_index_name_numpy_str(tmp_path, table_format, setup_path, unit, tz): # GH #13492 - idx = DatetimeIndex( - [dt.date(2000, 1, 1), dt.date(2000, 1, 2)], - name="cols\u05d2", - ).tz_localize(tz).as_unit(unit) + idx = ( + DatetimeIndex( + [dt.date(2000, 1, 1), dt.date(2000, 1, 2)], + name="cols\u05d2", + ) + .tz_localize(tz) + .as_unit(unit) + ) idx1 = ( DatetimeIndex( [dt.date(2010, 1, 1), dt.date(2010, 1, 2)], diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 59b970afaec89..131be7a77f2e5 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -440,7 +440,9 @@ def test_nat_rfloordiv_timedelta(val, expected): "value", [ DatetimeIndex(["2011-01-01", "2011-01-02"], dtype="M8[ns]", name="x"), - DatetimeIndex(["2011-01-01", "2011-01-02"], dtype="M8[ns, US/Eastern]", name="x"), + DatetimeIndex( + ["2011-01-01", "2011-01-02"], dtype="M8[ns, US/Eastern]", name="x" + ), DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"], dtype="M8[ns]"), DatetimeArray._from_sequence( ["2011-01-01", "2011-01-02"], dtype=DatetimeTZDtype(tz="US/Pacific") From 4ec85b06de828ed4258a9c0ff502c692128c1c79 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 25 Jan 2024 18:57:43 -0800 Subject: [PATCH 082/105] mypy fixup --- pandas/io/stata.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 347f9e40e37de..37ea940b3938a 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -284,14 +284,14 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series: if fmt.startswith(("%tc", "tc")): # Delta ms relative to base td = np.timedelta64(stata_epoch - unix_epoch, "ms") - conv_dates = np.array(dates._values, dtype="M8[ms]") + td - return Series(conv_dates, index=dates.index) + res = np.array(dates._values, dtype="M8[ms]") + td + return Series(res, index=dates.index) elif fmt.startswith(("%td", "td", "%d", "d")): # Delta days relative to base td = np.timedelta64(stata_epoch - unix_epoch, "D") - conv_dates = np.array(dates._values, dtype="M8[D]") + td - return Series(conv_dates, index=dates.index) + res = np.array(dates._values, dtype="M8[D]") + td + return Series(res, index=dates.index) elif fmt.startswith(("%tm", "tm")): # Delta months relative to base @@ -342,8 +342,8 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series: per_d = per_y.asfreq("D", how="S") per_d_shifted = per_d + days._values per_s = per_d_shifted.asfreq("s", how="S") - conv_dates = per_s.view("M8[s]") - conv_dates = Series(conv_dates, index=dates.index) + conv_dates_arr = per_s.view("M8[s]") + conv_dates = Series(conv_dates_arr, index=dates.index) else: raise ValueError(f"Date fmt {fmt} not understood") From 5b3d7693b71e705ac700801928e5dc59f36f2f10 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 2 Feb 2024 17:04:15 -0800 Subject: [PATCH 083/105] Update test --- pandas/core/groupby/generic.py | 4 ++-- pandas/tests/resample/test_base.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c90ae4d590b45..9c7e246051111 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1199,7 +1199,7 @@ def idxmin(self, skipna: bool = True) -> Series: >>> ser.groupby(["a", "a", "b", "b"]).idxmin() a 2023-01-01 b 2023-02-01 - dtype: datetime64[ns] + dtype: datetime64[s] """ return self._idxmax_idxmin("idxmin", skipna=skipna) @@ -1253,7 +1253,7 @@ def idxmax(self, skipna: bool = True) -> Series: >>> ser.groupby(["a", "a", "b", "b"]).idxmax() a 2023-01-15 b 2023-02-15 - dtype: datetime64[ns] + dtype: datetime64[s] """ return self._idxmax_idxmin("idxmax", skipna=skipna) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 9cd51b95d6efd..2e3064ae15091 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -484,7 +484,8 @@ def test_first_last_skipna(any_real_nullable_dtype, skipna, how): method = getattr(rs, how) result = method(skipna=skipna) - gb = df.groupby(df.shape[0] * [pd.to_datetime("2020-01-31")]) + ts = pd.to_datetime("2020-01-31").as_unit("ns") + gb = df.groupby(df.shape[0] * [ts]) expected = getattr(gb, how)(skipna=skipna) expected.index.freq = "ME" tm.assert_frame_equal(result, expected) From b0e9a25a5dd44c0ac9073c4626eb2193891e61dc Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 21 Feb 2024 15:31:51 -0800 Subject: [PATCH 084/105] kludge test --- pandas/tests/io/excel/test_writers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index edfc533e6bc57..3464a3bcb7f2d 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -754,6 +754,9 @@ def test_excel_date_datetime_format(self, ext, tmp_excel, tmp_path): with ExcelFile(filename2) as reader2: rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0) + # TODO: why do we get different units? + rs2 = rs2.astype(f"M8[{unit}]") + tm.assert_frame_equal(rs1, rs2) # Since the reader returns a datetime object for dates, From f71187a82490402490bbc5c34ca40863121c43f3 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 7 Mar 2024 08:51:52 -0800 Subject: [PATCH 085/105] update test --- pandas/tests/interchange/test_impl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index e4fa6e4451a4c..74b138db9f056 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -447,7 +447,8 @@ def test_empty_dataframe(): ), ( pd.Series( - [datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2022, 1, 3)] + [datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2022, 1, 3)], + dtype="M8[ns]", ), (DtypeKind.DATETIME, 64, "tsn:", "="), (DtypeKind.INT, 64, ArrowCTypes.INT64, "="), From d78531edfa0ca84a7e262be32f705305f279dca7 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 12 Mar 2024 10:08:00 -0700 Subject: [PATCH 086/105] update for min-version tests --- pandas/tests/io/test_parquet.py | 15 +++++++++++++-- pandas/tests/io/test_sql.py | 9 ++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 8919f713fe448..2a42212571451 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -705,7 +705,14 @@ def test_to_bytes_without_path_or_buf_provided(self, pa, df_full): expected = df_full.copy() expected.loc[1, "string_with_nan"] = None - expected["datetime_with_nat"] = expected["datetime_with_nat"].astype("M8[ms]") + if pa_version_under11p0: + expected["datetime_with_nat"] = expected["datetime_with_nat"].astype( + "M8[ns]" + ) + else: + expected["datetime_with_nat"] = expected["datetime_with_nat"].astype( + "M8[ms]" + ) tm.assert_frame_equal(res, expected) def test_duplicate_columns(self, pa): @@ -959,7 +966,11 @@ def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): # they both implement datetime.tzinfo # they both wrap datetime.timedelta() # this use-case sets the resolution to 1 minute - check_round_trip(df, pa, check_dtype=False) + + expected = df[:] + if pa_version_under11p0: + expected.index = expected.index.as_unit("ns") + check_round_trip(df, pa, check_dtype=False, expected=expected) def test_filter_row_groups(self, pa): # https://github.com/pandas-dev/pandas/issues/26551 diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 5487f8b9ad201..1cbda6690181e 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -92,6 +92,11 @@ def sql_strings(): } +def is_adbc_080(): + adbc = import_optional_dependency("adbc_driver_manager").__version__ == "0.8.0" + return adbc.__version__ == "0.8.0" + + def iris_table_metadata(): import sqlalchemy from sqlalchemy import ( @@ -1820,7 +1825,9 @@ def test_api_custom_dateparsing_error( } ) - if "postgres" in conn_name or "mysql" in conn_name: + if conn_name == "postgresql_adbc_types" and is_adbc_080(): + expected["DateCol"] = expected["DateCol"].astype("datetime64[ns]") + elif "postgres" in conn_name or "mysql" in conn_name: expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") else: expected["DateCol"] = expected["DateCol"].astype("datetime64[s]") From ffe75ddeba28bb84e5d8ef8dbd1c4d320446ab68 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 13 Mar 2024 09:02:22 -0700 Subject: [PATCH 087/105] fix adbc check --- pandas/tests/io/test_sql.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 1cbda6690181e..3c9bfa4c77f31 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -93,7 +93,9 @@ def sql_strings(): def is_adbc_080(): - adbc = import_optional_dependency("adbc_driver_manager").__version__ == "0.8.0" + adbc = import_optional_dependency("adbc_driver_manager") + if isinstance(adbc, bool): + return False return adbc.__version__ == "0.8.0" From 0d3eab03db2ddb48d2408abee0d2ee3b346dfe95 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 21 Mar 2024 19:56:01 -0700 Subject: [PATCH 088/105] troubleshoot minimum version deps --- ci/deps/actions-39-minimum_versions.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/deps/actions-39-minimum_versions.yaml b/ci/deps/actions-39-minimum_versions.yaml index 94cb21d1621b6..4399aa748af5c 100644 --- a/ci/deps/actions-39-minimum_versions.yaml +++ b/ci/deps/actions-39-minimum_versions.yaml @@ -60,6 +60,6 @@ dependencies: - zstandard=0.19.0 - pip: - - adbc-driver-postgresql==0.8.0 + - adbc-driver-postgresql==0.10.0 - adbc-driver-sqlite==0.8.0 - tzdata==2022.7 From 521c58a5bb785487329ff808206a8a18adbb0128 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 22 Mar 2024 07:42:12 -0700 Subject: [PATCH 089/105] troubleshoot --- ci/deps/actions-39-minimum_versions.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/deps/actions-39-minimum_versions.yaml b/ci/deps/actions-39-minimum_versions.yaml index 4399aa748af5c..b360a3e0566d1 100644 --- a/ci/deps/actions-39-minimum_versions.yaml +++ b/ci/deps/actions-39-minimum_versions.yaml @@ -60,6 +60,6 @@ dependencies: - zstandard=0.19.0 - pip: - - adbc-driver-postgresql==0.10.0 + - adbc-driver-postgresql==0.9.0 - adbc-driver-sqlite==0.8.0 - tzdata==2022.7 From 6d8301b4ce395f01a9c5769105596b4077041431 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 22 Mar 2024 10:11:54 -0700 Subject: [PATCH 090/105] troubleshoot --- ci/deps/actions-39-minimum_versions.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/deps/actions-39-minimum_versions.yaml b/ci/deps/actions-39-minimum_versions.yaml index b360a3e0566d1..94cb21d1621b6 100644 --- a/ci/deps/actions-39-minimum_versions.yaml +++ b/ci/deps/actions-39-minimum_versions.yaml @@ -60,6 +60,6 @@ dependencies: - zstandard=0.19.0 - pip: - - adbc-driver-postgresql==0.9.0 + - adbc-driver-postgresql==0.8.0 - adbc-driver-sqlite==0.8.0 - tzdata==2022.7 From 003e9ddc1416933f3ac89b33367312ab39552c52 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 22 Mar 2024 10:12:45 -0700 Subject: [PATCH 091/105] troubleshoot --- pandas/tests/io/test_sql.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 3c9bfa4c77f31..5487f8b9ad201 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -92,13 +92,6 @@ def sql_strings(): } -def is_adbc_080(): - adbc = import_optional_dependency("adbc_driver_manager") - if isinstance(adbc, bool): - return False - return adbc.__version__ == "0.8.0" - - def iris_table_metadata(): import sqlalchemy from sqlalchemy import ( @@ -1827,9 +1820,7 @@ def test_api_custom_dateparsing_error( } ) - if conn_name == "postgresql_adbc_types" and is_adbc_080(): - expected["DateCol"] = expected["DateCol"].astype("datetime64[ns]") - elif "postgres" in conn_name or "mysql" in conn_name: + if "postgres" in conn_name or "mysql" in conn_name: expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") else: expected["DateCol"] = expected["DateCol"].astype("datetime64[s]") From 2e80e9b2cfebc829d0468be2b2cdfd8e1baff838 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 22 Mar 2024 11:45:32 -0700 Subject: [PATCH 092/105] whatsnew --- doc/source/whatsnew/v3.0.0.rst | 62 ++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 10d5a518f686d..ab824319524e2 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -113,6 +113,68 @@ notable_bug_fix2 Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _whatsnew_300.api_breaking.datetime_resolution_inference: + +Datetime resolution inference +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Converting a sequence of strings, ``datetime`` objects, or ``np.datetime64`` objects to +a ``datetime64`` dtype now performs inference on the appropriate resolution (AKA unit) for the output dtype. This affects :class:`Series`, :class:`DataFrame`, :class:`Index`, :class:`DatetimeIndex`, and :func:`to_datetime`. + +Previously, these would always give nanosecond resolution: + +.. code-block:: ipython + + In [1]: dt = pd.Timestamp("2024-03-22 11:36").to_pydatetime() + In [2]: pd.to_datetime([dt]).dtype + Out[2]: dtype(' Date: Sat, 23 Mar 2024 12:28:15 -0700 Subject: [PATCH 093/105] update abdc-driver-postgresql minimum version --- ci/deps/actions-39-minimum_versions.yaml | 2 +- doc/source/whatsnew/v3.0.0.rst | 12 +++++++----- pandas/compat/_optional.py | 2 +- pyproject.toml | 6 +++--- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/ci/deps/actions-39-minimum_versions.yaml b/ci/deps/actions-39-minimum_versions.yaml index 94cb21d1621b6..4399aa748af5c 100644 --- a/ci/deps/actions-39-minimum_versions.yaml +++ b/ci/deps/actions-39-minimum_versions.yaml @@ -60,6 +60,6 @@ dependencies: - zstandard=0.19.0 - pip: - - adbc-driver-postgresql==0.8.0 + - adbc-driver-postgresql==0.10.0 - adbc-driver-sqlite==0.8.0 - tzdata==2022.7 diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 0e127cf6c8e39..1f0fdcce640e5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -192,11 +192,13 @@ For `optional libraries =1.20.1', 'tables>=3.8.0'] spss = ['pyreadstat>=1.2.0'] -postgresql = ['SQLAlchemy>=2.0.0', 'psycopg2>=2.9.6', 'adbc-driver-postgresql>=0.8.0'] +postgresql = ['SQLAlchemy>=2.0.0', 'psycopg2>=2.9.6', 'adbc-driver-postgresql>=0.10.0'] mysql = ['SQLAlchemy>=2.0.0', 'pymysql>=1.0.2'] -sql-other = ['SQLAlchemy>=2.0.0', 'adbc-driver-postgresql>=0.8.0', 'adbc-driver-sqlite>=0.8.0'] +sql-other = ['SQLAlchemy>=2.0.0', 'adbc-driver-postgresql>=0.10.0', 'adbc-driver-sqlite>=0.8.0'] html = ['beautifulsoup4>=4.11.2', 'html5lib>=1.1', 'lxml>=4.9.2'] xml = ['lxml>=4.9.2'] plot = ['matplotlib>=3.6.3'] output-formatting = ['jinja2>=3.1.2', 'tabulate>=0.9.0'] clipboard = ['PyQt5>=5.15.9', 'qtpy>=2.3.0'] compression = ['zstandard>=0.19.0'] -all = ['adbc-driver-postgresql>=0.8.0', +all = ['adbc-driver-postgresql>=0.10.0', 'adbc-driver-sqlite>=0.8.0', 'beautifulsoup4>=4.11.2', # blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297) From 9c9998ea18d9ec4910187816ba48720df017e373 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 25 Mar 2024 13:49:21 -0700 Subject: [PATCH 094/105] update doctest --- pandas/core/tools/datetimes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index a69deb5a26dce..4b2b3e58295cf 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -898,8 +898,8 @@ def to_datetime( Passing ``errors='coerce'`` will force an out-of-bounds date to :const:`NaT`, in addition to forcing non-dates (or non-parseable dates) to :const:`NaT`. - >>> pd.to_datetime("13000101", format="%Y%m%d", errors="coerce") - Timestamp('1300-01-01 00:00:00') + >>> pd.to_datetime("invalid for Ymd", format="%Y%m%d", errors="coerce") + NaT .. _to_datetime_tz_examples: From 2aae24fcda677f7c3017a5b8406bf92a89998369 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 29 Apr 2024 12:14:03 -0700 Subject: [PATCH 095/105] fix doc example --- doc/source/whatsnew/v3.0.0.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 8546aef552cad..d6e04d0875102 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -178,9 +178,10 @@ The inferred resolution now matches that of the input strings: In cases with mixed-resolution inputs, the highest resolution is used: -.. ipython:: python +.. code-block:: ipython - In [2]: pd.to_datetime(["2024-03-22 11:43:01", "2024-03-22 11:43:01.002"]).dtype + In [2]: pd.to_datetime([pd.Timestamp("2024-03-22 11:43:01"), "2024-03-22 11:43:01.002"]).dtype + Out[2]: dtype(' Date: Tue, 30 Apr 2024 11:52:15 -0700 Subject: [PATCH 096/105] troubleshoot test_api_custom_dateparsing_error --- ci/deps/actions-39-minimum_versions.yaml | 2 +- pandas/compat/_optional.py | 2 +- pyproject.toml | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/deps/actions-39-minimum_versions.yaml b/ci/deps/actions-39-minimum_versions.yaml index b760f27a3d4d3..bc067a38f442c 100644 --- a/ci/deps/actions-39-minimum_versions.yaml +++ b/ci/deps/actions-39-minimum_versions.yaml @@ -60,6 +60,6 @@ dependencies: - zstandard=0.19.0 - pip: - - adbc-driver-postgresql==0.10.0 + - adbc-driver-postgresql==0.11.0 - adbc-driver-sqlite==0.8.0 - tzdata==2022.7 diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index f4e717c26d6fd..fb41ee2f3cb0e 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -20,7 +20,7 @@ # deps_minimum.toml & pyproject.toml when updating versions! VERSIONS = { - "adbc-driver-postgresql": "0.10.0", + "adbc-driver-postgresql": "0.11.0", "adbc-driver-sqlite": "0.8.0", "bs4": "4.11.2", "blosc": "1.21.3", diff --git a/pyproject.toml b/pyproject.toml index 085c054f8241a..f42de22ed0d3f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,16 +73,16 @@ hdf5 = [# blosc only available on conda (https://github.com/Blosc/python-blosc/i #'blosc>=1.20.1', 'tables>=3.8.0'] spss = ['pyreadstat>=1.2.0'] -postgresql = ['SQLAlchemy>=2.0.0', 'psycopg2>=2.9.6', 'adbc-driver-postgresql>=0.10.0'] +postgresql = ['SQLAlchemy>=2.0.0', 'psycopg2>=2.9.6', 'adbc-driver-postgresql>=0.11.0'] mysql = ['SQLAlchemy>=2.0.0', 'pymysql>=1.0.2'] -sql-other = ['SQLAlchemy>=2.0.0', 'adbc-driver-postgresql>=0.10.0', 'adbc-driver-sqlite>=0.8.0'] +sql-other = ['SQLAlchemy>=2.0.0', 'adbc-driver-postgresql>=0.11.0', 'adbc-driver-sqlite>=0.8.0'] html = ['beautifulsoup4>=4.11.2', 'html5lib>=1.1', 'lxml>=4.9.2'] xml = ['lxml>=4.9.2'] plot = ['matplotlib>=3.6.3'] output-formatting = ['jinja2>=3.1.2', 'tabulate>=0.9.0'] clipboard = ['PyQt5>=5.15.9', 'qtpy>=2.3.0'] compression = ['zstandard>=0.19.0'] -all = ['adbc-driver-postgresql>=0.10.0', +all = ['adbc-driver-postgresql>=0.11.0', 'adbc-driver-sqlite>=0.8.0', 'beautifulsoup4>=4.11.2', # blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297) From a10cae55372df8c5eb5463ebdbc11aa586a16ff0 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 May 2024 09:57:46 -0700 Subject: [PATCH 097/105] troubleshoot --- ci/deps/actions-39-minimum_versions.yaml | 2 +- pandas/compat/_optional.py | 2 +- pandas/tests/io/test_sql.py | 3 ++- pyproject.toml | 6 +++--- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ci/deps/actions-39-minimum_versions.yaml b/ci/deps/actions-39-minimum_versions.yaml index bc067a38f442c..b760f27a3d4d3 100644 --- a/ci/deps/actions-39-minimum_versions.yaml +++ b/ci/deps/actions-39-minimum_versions.yaml @@ -60,6 +60,6 @@ dependencies: - zstandard=0.19.0 - pip: - - adbc-driver-postgresql==0.11.0 + - adbc-driver-postgresql==0.10.0 - adbc-driver-sqlite==0.8.0 - tzdata==2022.7 diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index fb41ee2f3cb0e..f4e717c26d6fd 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -20,7 +20,7 @@ # deps_minimum.toml & pyproject.toml when updating versions! VERSIONS = { - "adbc-driver-postgresql": "0.11.0", + "adbc-driver-postgresql": "0.10.0", "adbc-driver-sqlite": "0.8.0", "bs4": "4.11.2", "blosc": "1.21.3", diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index aaefd93a23b31..93bc55a764c9b 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1841,10 +1841,11 @@ def test_api_custom_dateparsing_error( "IntDateCol": "int32", "IntDateOnlyCol": "int32", "IntCol": "int32", + "DateCol": "datetime64[ns]", } ) - if "postgres" in conn_name or "mysql" in conn_name: + elif "postgres" in conn_name or "mysql" in conn_name: expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") else: expected["DateCol"] = expected["DateCol"].astype("datetime64[s]") diff --git a/pyproject.toml b/pyproject.toml index f42de22ed0d3f..085c054f8241a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,16 +73,16 @@ hdf5 = [# blosc only available on conda (https://github.com/Blosc/python-blosc/i #'blosc>=1.20.1', 'tables>=3.8.0'] spss = ['pyreadstat>=1.2.0'] -postgresql = ['SQLAlchemy>=2.0.0', 'psycopg2>=2.9.6', 'adbc-driver-postgresql>=0.11.0'] +postgresql = ['SQLAlchemy>=2.0.0', 'psycopg2>=2.9.6', 'adbc-driver-postgresql>=0.10.0'] mysql = ['SQLAlchemy>=2.0.0', 'pymysql>=1.0.2'] -sql-other = ['SQLAlchemy>=2.0.0', 'adbc-driver-postgresql>=0.11.0', 'adbc-driver-sqlite>=0.8.0'] +sql-other = ['SQLAlchemy>=2.0.0', 'adbc-driver-postgresql>=0.10.0', 'adbc-driver-sqlite>=0.8.0'] html = ['beautifulsoup4>=4.11.2', 'html5lib>=1.1', 'lxml>=4.9.2'] xml = ['lxml>=4.9.2'] plot = ['matplotlib>=3.6.3'] output-formatting = ['jinja2>=3.1.2', 'tabulate>=0.9.0'] clipboard = ['PyQt5>=5.15.9', 'qtpy>=2.3.0'] compression = ['zstandard>=0.19.0'] -all = ['adbc-driver-postgresql>=0.11.0', +all = ['adbc-driver-postgresql>=0.10.0', 'adbc-driver-sqlite>=0.8.0', 'beautifulsoup4>=4.11.2', # blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297) From 92c0df31e0b54c4a8b62ea7c5f538f7a9e46e048 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 May 2024 10:55:58 -0700 Subject: [PATCH 098/105] troubleshoot --- pandas/tests/io/test_sql.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 93bc55a764c9b..8323d4b2d13f9 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1841,10 +1841,11 @@ def test_api_custom_dateparsing_error( "IntDateCol": "int32", "IntDateOnlyCol": "int32", "IntCol": "int32", - "DateCol": "datetime64[ns]", } ) + if conn_name == "postgresql_adbc_types" and is_adbc_010(): + pass elif "postgres" in conn_name or "mysql" in conn_name: expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") else: @@ -1852,6 +1853,13 @@ def test_api_custom_dateparsing_error( tm.assert_frame_equal(result, expected) +def is_adbc_010(): + adbc = import_optional_dependency("adbc_driver_manager") + if isinstance(adbc, bool): + return False + return adbc.__version__ == "0.10.0" + + @pytest.mark.parametrize("conn", all_connectable_types) def test_api_date_and_index(conn, request): # Test case where same column appears in parse_date and index_col From 124c3d971ff9ae261e284148e7703a13b468902a Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 May 2024 08:52:48 -0700 Subject: [PATCH 099/105] troubleshoot --- pandas/tests/io/test_sql.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 8323d4b2d13f9..ea48e90dc2978 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1844,8 +1844,9 @@ def test_api_custom_dateparsing_error( } ) - if conn_name == "postgresql_adbc_types" and is_adbc_010(): - pass + if conn_name == "postgresql_adbc_types": + adbc = import_optional_dependency("adbc_driver_manager") + assert adbc == "0.10.0", adbc elif "postgres" in conn_name or "mysql" in conn_name: expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") else: @@ -1853,13 +1854,6 @@ def test_api_custom_dateparsing_error( tm.assert_frame_equal(result, expected) -def is_adbc_010(): - adbc = import_optional_dependency("adbc_driver_manager") - if isinstance(adbc, bool): - return False - return adbc.__version__ == "0.10.0" - - @pytest.mark.parametrize("conn", all_connectable_types) def test_api_date_and_index(conn, request): # Test case where same column appears in parse_date and index_col From f9213161d63e01b5312e0a082d7691b6888a408d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 May 2024 12:05:16 -0700 Subject: [PATCH 100/105] troubleshoot --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index ea48e90dc2978..ea0caf2fa11cd 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1846,7 +1846,7 @@ def test_api_custom_dateparsing_error( if conn_name == "postgresql_adbc_types": adbc = import_optional_dependency("adbc_driver_manager") - assert adbc == "0.10.0", adbc + assert adbc.__version__ == "0.10.0", adbc.__version__ elif "postgres" in conn_name or "mysql" in conn_name: expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") else: From 8c3bb89556ec9c1310a9b4f4e09ce69aa5f4404e Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 May 2024 13:09:46 -0700 Subject: [PATCH 101/105] troubleshoot --- pandas/tests/io/test_sql.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index ea0caf2fa11cd..af1bebfda664a 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1846,7 +1846,9 @@ def test_api_custom_dateparsing_error( if conn_name == "postgresql_adbc_types": adbc = import_optional_dependency("adbc_driver_manager") - assert adbc.__version__ == "0.10.0", adbc.__version__ + assert adbc.__version__ == "0.11.0", adbc.__version__ + assert result["DateCol"].dtype == "M8[us]", result["DateCol"].dtype + expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") elif "postgres" in conn_name or "mysql" in conn_name: expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") else: From 5327bb6dd49ca0965aece5b2469b41b620d3cfc7 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 May 2024 14:21:36 -0700 Subject: [PATCH 102/105] troubleshoot --- pandas/tests/io/test_sql.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index af1bebfda664a..2d86e08d3ff44 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1844,11 +1844,8 @@ def test_api_custom_dateparsing_error( } ) - if conn_name == "postgresql_adbc_types": - adbc = import_optional_dependency("adbc_driver_manager") - assert adbc.__version__ == "0.11.0", adbc.__version__ - assert result["DateCol"].dtype == "M8[us]", result["DateCol"].dtype - expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") + if conn_name == "postgresql_adbc_types" and pa_version_under14p1: + expected["DateCol"] = expected["DateCol"].astype("datetime64[ns]") elif "postgres" in conn_name or "mysql" in conn_name: expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") else: From f322e2c5f3c98abf1181dbd9001c1fa718128397 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 May 2024 14:55:56 -0700 Subject: [PATCH 103/105] update exp instead of object cast --- pandas/tests/indexes/datetimes/test_constructors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 239f054f20683..43a7cdf63d9b9 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -540,9 +540,9 @@ def test_construction_outofbounds(self): datetime(5000, 1, 1), datetime(6000, 1, 1), ] - exp = Index(dates, dtype=object) - # coerces to object - tm.assert_index_equal(Index(dates).astype(object), exp) + exp = Index(dates, dtype="M8[us]") + res = Index(dates) + tm.assert_index_equal(res, exp) DatetimeIndex(dates) From e2ed6120b074994854ab09d0ea71650a5c13e3d4 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 May 2024 15:05:15 -0700 Subject: [PATCH 104/105] revert accidental --- pandas/tests/arithmetic/test_timedelta64.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 66f84a16b11d9..0ecb8f9bef468 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -494,6 +494,7 @@ def test_addition_ops(self): tdi + Index([1, 2, 3], dtype=np.int64) # this is a union! + # FIXME: don't leave commented-out # pytest.raises(TypeError, lambda : Index([1,2,3]) + tdi) result = tdi + dti # name will be reset From 4aa851306a84f2827a5e055a6feca04c5af2cf08 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 22 May 2024 11:12:09 -0700 Subject: [PATCH 105/105] simplify test --- pandas/tests/tools/test_to_datetime.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index a699f7bcab5b2..cbbd018720bad 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2942,15 +2942,7 @@ def test_parsers(self, date_str, expected, cache): for res in [result1, result2]: assert res == expected - for res in [result3, result4]: - exp = DatetimeIndex([Timestamp(expected)]) - exp = exp.as_unit(reso) - tm.assert_index_equal(res, exp) - - for res in [result6, result8, result9]: - # These cases go through array_to_datetime, not array_to_strptime. - # This means that as of GH#??? they do resolution inference - # while the other cases do not. + for res in [result3, result4, result6, result8, result9]: exp = DatetimeIndex([Timestamp(expected)]).as_unit(reso) tm.assert_index_equal(res, exp)