From d0067ba42e0f20814301c5bd1afe1bb1194944b6 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 4 Oct 2023 12:02:35 -0700 Subject: [PATCH 1/7] BUG: TimedeltaIndex.__repr__ with non-nano and round values --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/core/arrays/datetimelike.py | 26 +++++++++++++++++++ pandas/io/formats/format.py | 11 +------- .../tests/indexes/timedeltas/test_formats.py | 11 ++++++++ 4 files changed, 39 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 4749ceec4a330..ea6539f93de86 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -296,7 +296,7 @@ Datetimelike Timedelta ^^^^^^^^^ -- +- Bug in rendering (``__repr__``) of :class:`TimedeltaIndex` and :class:`Series` with timedelta64 values with non-nanosecond resolution entries that are all multiples of 24 hours failing to use the compact representation used in the nanosecond cases (:issue:`??`) - Timezones diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 52596f29ffc0c..594da1a0c0619 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -41,6 +41,7 @@ iNaT, ints_to_pydatetime, ints_to_pytimedelta, + periods_per_day, to_offset, ) from pandas._libs.tslibs.fields import ( @@ -2312,6 +2313,31 @@ def interpolate( return self return type(self)._simple_new(out_data, dtype=self.dtype) + # -------------------------------------------------------------- + # Unsorted + + @property + def _is_dates_only(self) -> bool: + """ + Check if we are round times at midnight (and no timezone), which will + be given a more compact __repr__ than other cases. For TimedeltaArray + we are checking for multiples of 24H. + """ + if not lib.is_np_dtype(self.dtype): + # i.e. we have a timezone + return False + + values_int = self.asi8 + consider_values = values_int != iNaT + dtype = cast(np.dtype, self.dtype) # since we checked tz above + reso = get_unit_from_dtype(dtype) + ppd = periods_per_day(reso) + + # TODO: can we reuse is_date_array_normalized? would need a skipna kwd + # (first attempt at this was less performant than this implementation) + even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0 + return even_days + # ------------------------------------------------------------------- # Shared Constructor Helpers diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index cf9324cdbb9f2..ad7e9ed9bb847 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -106,7 +106,6 @@ SequenceNotStr, StorageOptions, WriteBuffer, - npt, ) from pandas import ( @@ -1814,15 +1813,7 @@ def get_format_timedelta64( If box, then show the return in quotes """ - values_int = values.view(np.int64) - values_int = cast("npt.NDArray[np.int64]", values_int) - - consider_values = values_int != iNaT - - one_day_nanos = 86400 * 10**9 - not_midnight = values_int % one_day_nanos != 0 - both = np.logical_and(consider_values, not_midnight) - even_days = both.sum() == 0 + even_days = values._is_dates_only if even_days: format = None diff --git a/pandas/tests/indexes/timedeltas/test_formats.py b/pandas/tests/indexes/timedeltas/test_formats.py index 751f9e4cc9eee..fb652a7dbc14b 100644 --- a/pandas/tests/indexes/timedeltas/test_formats.py +++ b/pandas/tests/indexes/timedeltas/test_formats.py @@ -8,6 +8,17 @@ class TestTimedeltaIndexRendering: + def test_repr_round_days_non_nano(self): + # we should get "1 days", not "1 days 00:00:00" with non-nano + tdi = TimedeltaIndex(["1 days"], freq="D").as_unit("s") + result = repr(tdi) + expected = "TimedeltaIndex(['1 days'], dtype='timedelta64[s]', freq='D')" + assert result == expected + + result2 = repr(Series(tdi)) + expected2 = "0 1 days\ndtype: timedelta64[s]" + assert result2 == expected2 + @pytest.mark.parametrize("method", ["__repr__", "__str__"]) def test_representation(self, method): idx1 = TimedeltaIndex([], freq="D") From 9ee860a34ab32d24d01ed47ddc4da26bde73a0b3 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 4 Oct 2023 12:03:33 -0700 Subject: [PATCH 2/7] GH ref --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/tests/indexes/timedeltas/test_formats.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index ea6539f93de86..7743f762d8898 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -296,7 +296,7 @@ Datetimelike Timedelta ^^^^^^^^^ -- Bug in rendering (``__repr__``) of :class:`TimedeltaIndex` and :class:`Series` with timedelta64 values with non-nanosecond resolution entries that are all multiples of 24 hours failing to use the compact representation used in the nanosecond cases (:issue:`??`) +- Bug in rendering (``__repr__``) of :class:`TimedeltaIndex` and :class:`Series` with timedelta64 values with non-nanosecond resolution entries that are all multiples of 24 hours failing to use the compact representation used in the nanosecond cases (:issue:`55405`) - Timezones diff --git a/pandas/tests/indexes/timedeltas/test_formats.py b/pandas/tests/indexes/timedeltas/test_formats.py index fb652a7dbc14b..ee090bd0aaf0a 100644 --- a/pandas/tests/indexes/timedeltas/test_formats.py +++ b/pandas/tests/indexes/timedeltas/test_formats.py @@ -9,6 +9,7 @@ class TestTimedeltaIndexRendering: def test_repr_round_days_non_nano(self): + # GH#55405 # we should get "1 days", not "1 days 00:00:00" with non-nano tdi = TimedeltaIndex(["1 days"], freq="D").as_unit("s") result = repr(tdi) From 5a2e17302215de2c7729faec65a918ca9c20c842 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 4 Oct 2023 12:45:52 -0700 Subject: [PATCH 3/7] mypy fixup --- pandas/core/arrays/datetimelike.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 594da1a0c0619..a2960a2870882 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -2329,8 +2329,7 @@ def _is_dates_only(self) -> bool: values_int = self.asi8 consider_values = values_int != iNaT - dtype = cast(np.dtype, self.dtype) # since we checked tz above - reso = get_unit_from_dtype(dtype) + reso = get_unit_from_dtype(self.dtype) ppd = periods_per_day(reso) # TODO: can we reuse is_date_array_normalized? would need a skipna kwd From e9e5f18d54e7cd6fb497983324e1e6b3002c2024 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 4 Oct 2023 13:30:30 -0700 Subject: [PATCH 4/7] update doctest --- pandas/core/indexes/timedeltas.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index cd6a4883946d2..5ce3dd33eee48 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -336,8 +336,7 @@ def timedelta_range( **Specify a unit** >>> pd.timedelta_range("1 Day", periods=3, freq="100000D", unit="s") - TimedeltaIndex(['1 days 00:00:00', '100001 days 00:00:00', - '200001 days 00:00:00'], + TimedeltaIndex(['1 days', '100001 days', '200001 days'], dtype='timedelta64[s]', freq='100000D') """ if freq is None and com.any_none(periods, start, end): From 8ab0c6ad4603f83aef94d5f5795a5767b6a199c2 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 4 Oct 2023 13:58:37 -0700 Subject: [PATCH 5/7] REF: remove redundant _is_dates_only --- pandas/core/arrays/datetimes.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index fae42f170a6b6..9544a6163562f 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -28,14 +28,12 @@ get_resolution, get_supported_reso, get_unit_from_dtype, - iNaT, ints_to_pydatetime, is_date_array_normalized, is_supported_unit, is_unitless, normalize_i8_timestamps, npy_unit_to_abbrev, - periods_per_day, timezones, to_offset, tz_convert_from_utc, @@ -745,25 +743,6 @@ def _format_native_types( self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso ) - @property - def _is_dates_only(self) -> bool: - """ - Check if we are round times at midnight (and no timezone), which will - be given a more compact __repr__ than other cases. - """ - if self.tz is not None: - return False - - values_int = self.asi8 - consider_values = values_int != iNaT - dtype = cast(np.dtype, self.dtype) # since we checked tz above - reso = get_unit_from_dtype(dtype) - ppd = periods_per_day(reso) - - # TODO: can we reuse is_date_array_normalized? would need a skipna kwd - even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0 - return even_days - # ----------------------------------------------------------------- # Comparison Methods From b2071313283b38733eaa474fb21b09884fde8871 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 4 Oct 2023 14:03:47 -0700 Subject: [PATCH 6/7] Fix wrong types passed to formatters --- pandas/core/arrays/categorical.py | 6 ++--- pandas/tests/io/formats/test_format.py | 32 +++++++++++++------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 5b4a2a524f600..5059f5d000ccd 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2167,11 +2167,11 @@ def _repr_categories(self) -> list[str]: ) if len(self.categories) > max_categories: num = max_categories // 2 - head = format_array(self.categories[:num]) - tail = format_array(self.categories[-num:]) + head = format_array(self.categories[:num]._values) + tail = format_array(self.categories[-num:]._values) category_strs = head + ["..."] + tail else: - category_strs = format_array(self.categories) + category_strs = format_array(self.categories._values) # Strip all leading spaces, which format_array adds for columns... category_strs = [x.strip() for x in category_strs] diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 642ee6446e200..7e5737dfadca0 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3186,7 +3186,7 @@ def test_all(self): class TestTimedelta64Formatter: def test_days(self): - x = pd.to_timedelta(list(range(5)) + [NaT], unit="D") + x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values result = fmt.Timedelta64Formatter(x, box=True).get_result() assert result[0].strip() == "'0 days'" assert result[1].strip() == "'1 days'" @@ -3202,48 +3202,48 @@ def test_days(self): assert result[0].strip() == "1 days" def test_days_neg(self): - x = pd.to_timedelta(list(range(5)) + [NaT], unit="D") + x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values result = fmt.Timedelta64Formatter(-x, box=True).get_result() assert result[0].strip() == "'0 days'" assert result[1].strip() == "'-1 days'" def test_subdays(self): - y = pd.to_timedelta(list(range(5)) + [NaT], unit="s") + y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values result = fmt.Timedelta64Formatter(y, box=True).get_result() assert result[0].strip() == "'0 days 00:00:00'" assert result[1].strip() == "'0 days 00:00:01'" def test_subdays_neg(self): - y = pd.to_timedelta(list(range(5)) + [NaT], unit="s") + y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values result = fmt.Timedelta64Formatter(-y, box=True).get_result() assert result[0].strip() == "'0 days 00:00:00'" assert result[1].strip() == "'-1 days +23:59:59'" def test_zero(self): - x = pd.to_timedelta(list(range(1)) + [NaT], unit="D") + x = pd.to_timedelta(list(range(1)) + [NaT], unit="D")._values result = fmt.Timedelta64Formatter(x, box=True).get_result() assert result[0].strip() == "'0 days'" - x = pd.to_timedelta(list(range(1)), unit="D") + x = pd.to_timedelta(list(range(1)), unit="D")._values result = fmt.Timedelta64Formatter(x, box=True).get_result() assert result[0].strip() == "'0 days'" class TestDatetime64Formatter: def test_mixed(self): - x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), NaT]) + x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), NaT])._values result = fmt.Datetime64Formatter(x).get_result() assert result[0].strip() == "2013-01-01 00:00:00" assert result[1].strip() == "2013-01-01 12:00:00" def test_dates(self): - x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), NaT]) + x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), NaT])._values result = fmt.Datetime64Formatter(x).get_result() assert result[0].strip() == "2013-01-01" assert result[1].strip() == "2013-01-02" def test_date_nanos(self): - x = Series([Timestamp(200)]) + x = Series([Timestamp(200)])._values result = fmt.Datetime64Formatter(x).get_result() assert result[0].strip() == "1970-01-01 00:00:00.000000200" @@ -3252,41 +3252,41 @@ def test_dates_display(self): # make sure that we are consistently display date formatting x = Series(date_range("20130101 09:00:00", periods=5, freq="D")) x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() + result = fmt.Datetime64Formatter(x._values).get_result() assert result[0].strip() == "2013-01-01 09:00:00" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-05 09:00:00" x = Series(date_range("20130101 09:00:00", periods=5, freq="s")) x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() + result = fmt.Datetime64Formatter(x._values).get_result() assert result[0].strip() == "2013-01-01 09:00:00" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-01 09:00:04" x = Series(date_range("20130101 09:00:00", periods=5, freq="ms")) x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() + result = fmt.Datetime64Formatter(x._values).get_result() assert result[0].strip() == "2013-01-01 09:00:00.000" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-01 09:00:00.004" x = Series(date_range("20130101 09:00:00", periods=5, freq="us")) x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() + result = fmt.Datetime64Formatter(x._values).get_result() assert result[0].strip() == "2013-01-01 09:00:00.000000" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-01 09:00:00.000004" x = Series(date_range("20130101 09:00:00", periods=5, freq="ns")) x.iloc[1] = np.nan - result = fmt.Datetime64Formatter(x).get_result() + result = fmt.Datetime64Formatter(x._values).get_result() assert result[0].strip() == "2013-01-01 09:00:00.000000000" assert result[1].strip() == "NaT" assert result[4].strip() == "2013-01-01 09:00:00.000000004" def test_datetime64formatter_yearmonth(self): - x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)]) + x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)])._values def format_func(x): return x.strftime("%Y-%m") @@ -3298,7 +3298,7 @@ def format_func(x): def test_datetime64formatter_hoursecond(self): x = Series( pd.to_datetime(["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f") - ) + )._values def format_func(x): return x.strftime("%H:%M") From 3b91b0b2a4d83b3b595b04d1538b2f53b3f1ffa2 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 4 Oct 2023 14:45:33 -0700 Subject: [PATCH 7/7] CLN: remove unused import --- pandas/io/formats/format.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index fc823e3a499d2..e17201a94a3d7 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -42,7 +42,6 @@ NaT, Timedelta, Timestamp, - iNaT, ) from pandas._libs.tslibs.nattype import NaTType