Skip to content

Commit 8476683

Browse files
committed
BUG: fixed Series.dt methods in ArrowDtype class that were returning incorrect time values. (pandas-dev#57355)
1 parent eadc129 commit 8476683

File tree

3 files changed

+44
-17
lines changed

3 files changed

+44
-17
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,7 @@ Other
483483
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
484484
- Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`)
485485
- Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
486+
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
486487
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
487488
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
488489
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)

pandas/core/arrays/arrow/array.py

+18-17
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
from pandas._libs import lib
2020
from pandas._libs.tslibs import (
21-
NaT,
2221
Timedelta,
2322
Timestamp,
2423
timezones,
@@ -2613,17 +2612,19 @@ def _str_wrap(self, width: int, **kwargs) -> Self:
26132612
@property
26142613
def _dt_days(self) -> Self:
26152614
return type(self)(
2616-
pa.array(self._to_timedeltaarray().days, from_pandas=True, type=pa.int32())
2615+
pa.array(
2616+
self._to_timedeltaarray().components.days,
2617+
from_pandas=True,
2618+
type=pa.int32(),
2619+
)
26172620
)
26182621

26192622
@property
26202623
def _dt_hours(self) -> Self:
26212624
return type(self)(
26222625
pa.array(
2623-
[
2624-
td.components.hours if td is not NaT else None
2625-
for td in self._to_timedeltaarray()
2626-
],
2626+
self._to_timedeltaarray().components.hours,
2627+
from_pandas=True,
26272628
type=pa.int32(),
26282629
)
26292630
)
@@ -2632,10 +2633,8 @@ def _dt_hours(self) -> Self:
26322633
def _dt_minutes(self) -> Self:
26332634
return type(self)(
26342635
pa.array(
2635-
[
2636-
td.components.minutes if td is not NaT else None
2637-
for td in self._to_timedeltaarray()
2638-
],
2636+
self._to_timedeltaarray().components.minutes,
2637+
from_pandas=True,
26392638
type=pa.int32(),
26402639
)
26412640
)
@@ -2644,18 +2643,18 @@ def _dt_minutes(self) -> Self:
26442643
def _dt_seconds(self) -> Self:
26452644
return type(self)(
26462645
pa.array(
2647-
self._to_timedeltaarray().seconds, from_pandas=True, type=pa.int32()
2646+
self._to_timedeltaarray().components.seconds,
2647+
from_pandas=True,
2648+
type=pa.int32(),
26482649
)
26492650
)
26502651

26512652
@property
26522653
def _dt_milliseconds(self) -> Self:
26532654
return type(self)(
26542655
pa.array(
2655-
[
2656-
td.components.milliseconds if td is not NaT else None
2657-
for td in self._to_timedeltaarray()
2658-
],
2656+
self._to_timedeltaarray().components.milliseconds,
2657+
from_pandas=True,
26592658
type=pa.int32(),
26602659
)
26612660
)
@@ -2664,7 +2663,7 @@ def _dt_milliseconds(self) -> Self:
26642663
def _dt_microseconds(self) -> Self:
26652664
return type(self)(
26662665
pa.array(
2667-
self._to_timedeltaarray().microseconds,
2666+
self._to_timedeltaarray().components.microseconds,
26682667
from_pandas=True,
26692668
type=pa.int32(),
26702669
)
@@ -2674,7 +2673,9 @@ def _dt_microseconds(self) -> Self:
26742673
def _dt_nanoseconds(self) -> Self:
26752674
return type(self)(
26762675
pa.array(
2677-
self._to_timedeltaarray().nanoseconds, from_pandas=True, type=pa.int32()
2676+
self._to_timedeltaarray().components.nanoseconds,
2677+
from_pandas=True,
2678+
type=pa.int32(),
26782679
)
26792680
)
26802681

pandas/tests/extension/test_arrow.py

+25
Original file line numberDiff line numberDiff line change
@@ -2905,6 +2905,31 @@ def test_dt_components():
29052905
tm.assert_frame_equal(result, expected)
29062906

29072907

2908+
def test_dt_components_large_values():
2909+
ser = pd.Series(
2910+
[
2911+
pd.Timedelta("365 days 23:59:59.999000"),
2912+
None,
2913+
],
2914+
dtype=ArrowDtype(pa.duration("ns")),
2915+
)
2916+
result = ser.dt.components
2917+
expected = pd.DataFrame(
2918+
[[365, 23, 59, 59, 999, 0, 0], [None, None, None, None, None, None, None]],
2919+
columns=[
2920+
"days",
2921+
"hours",
2922+
"minutes",
2923+
"seconds",
2924+
"milliseconds",
2925+
"microseconds",
2926+
"nanoseconds",
2927+
],
2928+
dtype="int32[pyarrow]",
2929+
)
2930+
tm.assert_frame_equal(result, expected)
2931+
2932+
29082933
@pytest.mark.parametrize("skipna", [True, False])
29092934
def test_boolean_reduce_series_all_null(all_boolean_reductions, skipna):
29102935
# GH51624

0 commit comments

Comments
 (0)