Skip to content

Commit ddf075c

Browse files
committed
BUG: fixed Series.dt methods in ArrowDtype class that were returning incorrect time values. (pandas-dev#57355)
1 parent eadc129 commit ddf075c

File tree

3 files changed

+40
-4
lines changed

3 files changed

+40
-4
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,7 @@ Other
483483
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
484484
- Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`)
485485
- Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
486+
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
486487
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
487488
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
488489
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)

pandas/core/arrays/arrow/array.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -2644,7 +2644,11 @@ def _dt_minutes(self) -> Self:
26442644
def _dt_seconds(self) -> Self:
26452645
return type(self)(
26462646
pa.array(
2647-
self._to_timedeltaarray().seconds, from_pandas=True, type=pa.int32()
2647+
[
2648+
td.components.seconds if td is not NaT else None
2649+
for td in self._to_timedeltaarray()
2650+
],
2651+
type=pa.int32(),
26482652
)
26492653
)
26502654

@@ -2664,8 +2668,10 @@ def _dt_milliseconds(self) -> Self:
26642668
def _dt_microseconds(self) -> Self:
26652669
return type(self)(
26662670
pa.array(
2667-
self._to_timedeltaarray().microseconds,
2668-
from_pandas=True,
2671+
[
2672+
td.components.microseconds if td is not NaT else None
2673+
for td in self._to_timedeltaarray()
2674+
],
26692675
type=pa.int32(),
26702676
)
26712677
)
@@ -2674,7 +2680,11 @@ def _dt_microseconds(self) -> Self:
26742680
def _dt_nanoseconds(self) -> Self:
26752681
return type(self)(
26762682
pa.array(
2677-
self._to_timedeltaarray().nanoseconds, from_pandas=True, type=pa.int32()
2683+
[
2684+
td.components.nanoseconds if td is not NaT else None
2685+
for td in self._to_timedeltaarray()
2686+
],
2687+
type=pa.int32(),
26782688
)
26792689
)
26802690

pandas/tests/extension/test_arrow.py

+25
Original file line numberDiff line numberDiff line change
@@ -2905,6 +2905,31 @@ def test_dt_components():
29052905
tm.assert_frame_equal(result, expected)
29062906

29072907

2908+
def test_dt_components_large_values():
2909+
ser = pd.Series(
2910+
[
2911+
pd.Timedelta("365 days 23:59:59.999000"),
2912+
None,
2913+
],
2914+
dtype=ArrowDtype(pa.duration("ns")),
2915+
)
2916+
result = ser.dt.components
2917+
expected = pd.DataFrame(
2918+
[[365, 23, 59, 59, 999, 0, 0], [None, None, None, None, None, None, None]],
2919+
columns=[
2920+
"days",
2921+
"hours",
2922+
"minutes",
2923+
"seconds",
2924+
"milliseconds",
2925+
"microseconds",
2926+
"nanoseconds",
2927+
],
2928+
dtype="int32[pyarrow]",
2929+
)
2930+
tm.assert_frame_equal(result, expected)
2931+
2932+
29082933
@pytest.mark.parametrize("skipna", [True, False])
29092934
def test_boolean_reduce_series_all_null(all_boolean_reductions, skipna):
29102935
# GH51624

0 commit comments

Comments
 (0)