Skip to content

Commit f9f12de

Browse files
authored
BUG: fixed Series.dt methods in ArrowDtype class that were returning incorrect values #57355 (#58052)
BUG: fixed Series.dt methods in ArrowDtype class that were returning incorrect time values. (#57355)
1 parent bcbc5b2 commit f9f12de

File tree

3 files changed

+44
-17
lines changed

3 files changed

+44
-17
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,7 @@ Other
608608
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
609609
- Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`)
610610
- Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
611+
- Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
611612
- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
612613
- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
613614
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)

pandas/core/arrays/arrow/array.py

+18-17
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
from pandas._libs import lib
2020
from pandas._libs.tslibs import (
21-
NaT,
2221
Timedelta,
2322
Timestamp,
2423
timezones,
@@ -2612,17 +2611,19 @@ def _str_wrap(self, width: int, **kwargs) -> Self:
26122611
@property
26132612
def _dt_days(self) -> Self:
26142613
return type(self)(
2615-
pa.array(self._to_timedeltaarray().days, from_pandas=True, type=pa.int32())
2614+
pa.array(
2615+
self._to_timedeltaarray().components.days,
2616+
from_pandas=True,
2617+
type=pa.int32(),
2618+
)
26162619
)
26172620

26182621
@property
26192622
def _dt_hours(self) -> Self:
26202623
return type(self)(
26212624
pa.array(
2622-
[
2623-
td.components.hours if td is not NaT else None
2624-
for td in self._to_timedeltaarray()
2625-
],
2625+
self._to_timedeltaarray().components.hours,
2626+
from_pandas=True,
26262627
type=pa.int32(),
26272628
)
26282629
)
@@ -2631,10 +2632,8 @@ def _dt_hours(self) -> Self:
26312632
def _dt_minutes(self) -> Self:
26322633
return type(self)(
26332634
pa.array(
2634-
[
2635-
td.components.minutes if td is not NaT else None
2636-
for td in self._to_timedeltaarray()
2637-
],
2635+
self._to_timedeltaarray().components.minutes,
2636+
from_pandas=True,
26382637
type=pa.int32(),
26392638
)
26402639
)
@@ -2643,18 +2642,18 @@ def _dt_minutes(self) -> Self:
26432642
def _dt_seconds(self) -> Self:
26442643
return type(self)(
26452644
pa.array(
2646-
self._to_timedeltaarray().seconds, from_pandas=True, type=pa.int32()
2645+
self._to_timedeltaarray().components.seconds,
2646+
from_pandas=True,
2647+
type=pa.int32(),
26472648
)
26482649
)
26492650

26502651
@property
26512652
def _dt_milliseconds(self) -> Self:
26522653
return type(self)(
26532654
pa.array(
2654-
[
2655-
td.components.milliseconds if td is not NaT else None
2656-
for td in self._to_timedeltaarray()
2657-
],
2655+
self._to_timedeltaarray().components.milliseconds,
2656+
from_pandas=True,
26582657
type=pa.int32(),
26592658
)
26602659
)
@@ -2663,7 +2662,7 @@ def _dt_milliseconds(self) -> Self:
26632662
def _dt_microseconds(self) -> Self:
26642663
return type(self)(
26652664
pa.array(
2666-
self._to_timedeltaarray().microseconds,
2665+
self._to_timedeltaarray().components.microseconds,
26672666
from_pandas=True,
26682667
type=pa.int32(),
26692668
)
@@ -2673,7 +2672,9 @@ def _dt_microseconds(self) -> Self:
26732672
def _dt_nanoseconds(self) -> Self:
26742673
return type(self)(
26752674
pa.array(
2676-
self._to_timedeltaarray().nanoseconds, from_pandas=True, type=pa.int32()
2675+
self._to_timedeltaarray().components.nanoseconds,
2676+
from_pandas=True,
2677+
type=pa.int32(),
26772678
)
26782679
)
26792680

pandas/tests/extension/test_arrow.py

+25
Original file line numberDiff line numberDiff line change
@@ -2905,6 +2905,31 @@ def test_dt_components():
29052905
tm.assert_frame_equal(result, expected)
29062906

29072907

2908+
def test_dt_components_large_values():
2909+
ser = pd.Series(
2910+
[
2911+
pd.Timedelta("365 days 23:59:59.999000"),
2912+
None,
2913+
],
2914+
dtype=ArrowDtype(pa.duration("ns")),
2915+
)
2916+
result = ser.dt.components
2917+
expected = pd.DataFrame(
2918+
[[365, 23, 59, 59, 999, 0, 0], [None, None, None, None, None, None, None]],
2919+
columns=[
2920+
"days",
2921+
"hours",
2922+
"minutes",
2923+
"seconds",
2924+
"milliseconds",
2925+
"microseconds",
2926+
"nanoseconds",
2927+
],
2928+
dtype="int32[pyarrow]",
2929+
)
2930+
tm.assert_frame_equal(result, expected)
2931+
2932+
29082933
@pytest.mark.parametrize("skipna", [True, False])
29092934
def test_boolean_reduce_series_all_null(all_boolean_reductions, skipna):
29102935
# GH51624

0 commit comments

Comments
 (0)