Skip to content

Commit e4b7174

Browse files
authored
BUG: TimedeltaIndex.__repr__ with non-nano and round values (#55405)
* BUG: TimedeltaIndex.__repr__ with non-nano and round values * GH ref * mypy fixup * update doctest * REF: remove redundant _is_dates_only * Fix wrong types passed to formatters * CLN: remove unused import
1 parent 6c58a21 commit e4b7174

File tree

8 files changed

+59
-54
lines changed

8 files changed

+59
-54
lines changed

doc/source/whatsnew/v2.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ Datetimelike
296296

297297
Timedelta
298298
^^^^^^^^^
299-
-
299+
- Bug in rendering (``__repr__``) of :class:`TimedeltaIndex` and :class:`Series` with timedelta64 values with non-nanosecond resolution entries that are all multiples of 24 hours failing to use the compact representation used in the nanosecond cases (:issue:`55405`)
300300
-
301301

302302
Timezones

pandas/core/arrays/categorical.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -2167,11 +2167,11 @@ def _repr_categories(self) -> list[str]:
21672167
)
21682168
if len(self.categories) > max_categories:
21692169
num = max_categories // 2
2170-
head = format_array(self.categories[:num])
2171-
tail = format_array(self.categories[-num:])
2170+
head = format_array(self.categories[:num]._values)
2171+
tail = format_array(self.categories[-num:]._values)
21722172
category_strs = head + ["..."] + tail
21732173
else:
2174-
category_strs = format_array(self.categories)
2174+
category_strs = format_array(self.categories._values)
21752175

21762176
# Strip all leading spaces, which format_array adds for columns...
21772177
category_strs = [x.strip() for x in category_strs]

pandas/core/arrays/datetimelike.py

+25
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
iNaT,
4242
ints_to_pydatetime,
4343
ints_to_pytimedelta,
44+
periods_per_day,
4445
to_offset,
4546
)
4647
from pandas._libs.tslibs.fields import (
@@ -2312,6 +2313,30 @@ def interpolate(
23122313
return self
23132314
return type(self)._simple_new(out_data, dtype=self.dtype)
23142315

2316+
# --------------------------------------------------------------
2317+
# Unsorted
2318+
2319+
@property
2320+
def _is_dates_only(self) -> bool:
2321+
"""
2322+
Check if we are round times at midnight (and no timezone), which will
2323+
be given a more compact __repr__ than other cases. For TimedeltaArray
2324+
we are checking for multiples of 24H.
2325+
"""
2326+
if not lib.is_np_dtype(self.dtype):
2327+
# i.e. we have a timezone
2328+
return False
2329+
2330+
values_int = self.asi8
2331+
consider_values = values_int != iNaT
2332+
reso = get_unit_from_dtype(self.dtype)
2333+
ppd = periods_per_day(reso)
2334+
2335+
# TODO: can we reuse is_date_array_normalized? would need a skipna kwd
2336+
# (first attempt at this was less performant than this implementation)
2337+
even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
2338+
return even_days
2339+
23152340

23162341
# -------------------------------------------------------------------
23172342
# Shared Constructor Helpers

pandas/core/arrays/datetimes.py

-21
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,12 @@
2828
get_resolution,
2929
get_supported_reso,
3030
get_unit_from_dtype,
31-
iNaT,
3231
ints_to_pydatetime,
3332
is_date_array_normalized,
3433
is_supported_unit,
3534
is_unitless,
3635
normalize_i8_timestamps,
3736
npy_unit_to_abbrev,
38-
periods_per_day,
3937
timezones,
4038
to_offset,
4139
tz_convert_from_utc,
@@ -745,25 +743,6 @@ def _format_native_types(
745743
self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso
746744
)
747745

748-
@property
749-
def _is_dates_only(self) -> bool:
750-
"""
751-
Check if we are round times at midnight (and no timezone), which will
752-
be given a more compact __repr__ than other cases.
753-
"""
754-
if self.tz is not None:
755-
return False
756-
757-
values_int = self.asi8
758-
consider_values = values_int != iNaT
759-
dtype = cast(np.dtype, self.dtype) # since we checked tz above
760-
reso = get_unit_from_dtype(dtype)
761-
ppd = periods_per_day(reso)
762-
763-
# TODO: can we reuse is_date_array_normalized? would need a skipna kwd
764-
even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
765-
return even_days
766-
767746
# -----------------------------------------------------------------
768747
# Comparison Methods
769748

pandas/core/indexes/timedeltas.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -336,8 +336,7 @@ def timedelta_range(
336336
**Specify a unit**
337337
338338
>>> pd.timedelta_range("1 Day", periods=3, freq="100000D", unit="s")
339-
TimedeltaIndex(['1 days 00:00:00', '100001 days 00:00:00',
340-
'200001 days 00:00:00'],
339+
TimedeltaIndex(['1 days', '100001 days', '200001 days'],
341340
dtype='timedelta64[s]', freq='100000D')
342341
"""
343342
if freq is None and com.any_none(periods, start, end):

pandas/io/formats/format.py

+1-11
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
NaT,
4242
Timedelta,
4343
Timestamp,
44-
iNaT,
4544
)
4645
from pandas._libs.tslibs.nattype import NaTType
4746

@@ -103,7 +102,6 @@
103102
SequenceNotStr,
104103
StorageOptions,
105104
WriteBuffer,
106-
npt,
107105
)
108106

109107
from pandas import (
@@ -1775,15 +1773,7 @@ def get_format_timedelta64(
17751773
17761774
If box, then show the return in quotes
17771775
"""
1778-
values_int = values.view(np.int64)
1779-
values_int = cast("npt.NDArray[np.int64]", values_int)
1780-
1781-
consider_values = values_int != iNaT
1782-
1783-
one_day_nanos = 86400 * 10**9
1784-
not_midnight = values_int % one_day_nanos != 0
1785-
both = np.logical_and(consider_values, not_midnight)
1786-
even_days = both.sum() == 0
1776+
even_days = values._is_dates_only
17871777

17881778
if even_days:
17891779
format = None

pandas/tests/indexes/timedeltas/test_formats.py

+12
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,18 @@
88

99

1010
class TestTimedeltaIndexRendering:
11+
def test_repr_round_days_non_nano(self):
12+
# GH#55405
13+
# we should get "1 days", not "1 days 00:00:00" with non-nano
14+
tdi = TimedeltaIndex(["1 days"], freq="D").as_unit("s")
15+
result = repr(tdi)
16+
expected = "TimedeltaIndex(['1 days'], dtype='timedelta64[s]', freq='D')"
17+
assert result == expected
18+
19+
result2 = repr(Series(tdi))
20+
expected2 = "0 1 days\ndtype: timedelta64[s]"
21+
assert result2 == expected2
22+
1123
@pytest.mark.parametrize("method", ["__repr__", "__str__"])
1224
def test_representation(self, method):
1325
idx1 = TimedeltaIndex([], freq="D")

pandas/tests/io/formats/test_format.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -3186,7 +3186,7 @@ def test_all(self):
31863186

31873187
class TestTimedelta64Formatter:
31883188
def test_days(self):
3189-
x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")
3189+
x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values
31903190
result = fmt._Timedelta64Formatter(x, box=True).get_result()
31913191
assert result[0].strip() == "'0 days'"
31923192
assert result[1].strip() == "'1 days'"
@@ -3202,48 +3202,48 @@ def test_days(self):
32023202
assert result[0].strip() == "1 days"
32033203

32043204
def test_days_neg(self):
3205-
x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")
3205+
x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values
32063206
result = fmt._Timedelta64Formatter(-x, box=True).get_result()
32073207
assert result[0].strip() == "'0 days'"
32083208
assert result[1].strip() == "'-1 days'"
32093209

32103210
def test_subdays(self):
3211-
y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")
3211+
y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values
32123212
result = fmt._Timedelta64Formatter(y, box=True).get_result()
32133213
assert result[0].strip() == "'0 days 00:00:00'"
32143214
assert result[1].strip() == "'0 days 00:00:01'"
32153215

32163216
def test_subdays_neg(self):
3217-
y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")
3217+
y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values
32183218
result = fmt._Timedelta64Formatter(-y, box=True).get_result()
32193219
assert result[0].strip() == "'0 days 00:00:00'"
32203220
assert result[1].strip() == "'-1 days +23:59:59'"
32213221

32223222
def test_zero(self):
3223-
x = pd.to_timedelta(list(range(1)) + [NaT], unit="D")
3223+
x = pd.to_timedelta(list(range(1)) + [NaT], unit="D")._values
32243224
result = fmt._Timedelta64Formatter(x, box=True).get_result()
32253225
assert result[0].strip() == "'0 days'"
32263226

3227-
x = pd.to_timedelta(list(range(1)), unit="D")
3227+
x = pd.to_timedelta(list(range(1)), unit="D")._values
32283228
result = fmt._Timedelta64Formatter(x, box=True).get_result()
32293229
assert result[0].strip() == "'0 days'"
32303230

32313231

32323232
class Test_Datetime64Formatter:
32333233
def test_mixed(self):
3234-
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), NaT])
3234+
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), NaT])._values
32353235
result = fmt._Datetime64Formatter(x).get_result()
32363236
assert result[0].strip() == "2013-01-01 00:00:00"
32373237
assert result[1].strip() == "2013-01-01 12:00:00"
32383238

32393239
def test_dates(self):
3240-
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), NaT])
3240+
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), NaT])._values
32413241
result = fmt._Datetime64Formatter(x).get_result()
32423242
assert result[0].strip() == "2013-01-01"
32433243
assert result[1].strip() == "2013-01-02"
32443244

32453245
def test_date_nanos(self):
3246-
x = Series([Timestamp(200)])
3246+
x = Series([Timestamp(200)])._values
32473247
result = fmt._Datetime64Formatter(x).get_result()
32483248
assert result[0].strip() == "1970-01-01 00:00:00.000000200"
32493249

@@ -3252,41 +3252,41 @@ def test_dates_display(self):
32523252
# make sure that we are consistently display date formatting
32533253
x = Series(date_range("20130101 09:00:00", periods=5, freq="D"))
32543254
x.iloc[1] = np.nan
3255-
result = fmt._Datetime64Formatter(x).get_result()
3255+
result = fmt._Datetime64Formatter(x._values).get_result()
32563256
assert result[0].strip() == "2013-01-01 09:00:00"
32573257
assert result[1].strip() == "NaT"
32583258
assert result[4].strip() == "2013-01-05 09:00:00"
32593259

32603260
x = Series(date_range("20130101 09:00:00", periods=5, freq="s"))
32613261
x.iloc[1] = np.nan
3262-
result = fmt._Datetime64Formatter(x).get_result()
3262+
result = fmt._Datetime64Formatter(x._values).get_result()
32633263
assert result[0].strip() == "2013-01-01 09:00:00"
32643264
assert result[1].strip() == "NaT"
32653265
assert result[4].strip() == "2013-01-01 09:00:04"
32663266

32673267
x = Series(date_range("20130101 09:00:00", periods=5, freq="ms"))
32683268
x.iloc[1] = np.nan
3269-
result = fmt._Datetime64Formatter(x).get_result()
3269+
result = fmt._Datetime64Formatter(x._values).get_result()
32703270
assert result[0].strip() == "2013-01-01 09:00:00.000"
32713271
assert result[1].strip() == "NaT"
32723272
assert result[4].strip() == "2013-01-01 09:00:00.004"
32733273

32743274
x = Series(date_range("20130101 09:00:00", periods=5, freq="us"))
32753275
x.iloc[1] = np.nan
3276-
result = fmt._Datetime64Formatter(x).get_result()
3276+
result = fmt._Datetime64Formatter(x._values).get_result()
32773277
assert result[0].strip() == "2013-01-01 09:00:00.000000"
32783278
assert result[1].strip() == "NaT"
32793279
assert result[4].strip() == "2013-01-01 09:00:00.000004"
32803280

32813281
x = Series(date_range("20130101 09:00:00", periods=5, freq="ns"))
32823282
x.iloc[1] = np.nan
3283-
result = fmt._Datetime64Formatter(x).get_result()
3283+
result = fmt._Datetime64Formatter(x._values).get_result()
32843284
assert result[0].strip() == "2013-01-01 09:00:00.000000000"
32853285
assert result[1].strip() == "NaT"
32863286
assert result[4].strip() == "2013-01-01 09:00:00.000000004"
32873287

32883288
def test_datetime64formatter_yearmonth(self):
3289-
x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)])
3289+
x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)])._values
32903290

32913291
def format_func(x):
32923292
return x.strftime("%Y-%m")
@@ -3298,7 +3298,7 @@ def format_func(x):
32983298
def test_datetime64formatter_hoursecond(self):
32993299
x = Series(
33003300
pd.to_datetime(["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f")
3301-
)
3301+
)._values
33023302

33033303
def format_func(x):
33043304
return x.strftime("%H:%M")

0 commit comments

Comments
 (0)