Skip to content

Commit 3cec943

Browse files
committed
BUG: fixed Series.dt methods in ArrowDtype class that were returning incorrect time values. (pandas-dev#57355)
1 parent 192db0d commit 3cec943

File tree

3 files changed

+46
-4
lines changed

3 files changed

+46
-4
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,7 @@ Bug fixes
324324
- Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
325325
- Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
326326
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
327+
- Fixed bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`)
327328
- Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
328329
- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
329330

pandas/core/arrays/arrow/array.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -2642,7 +2642,11 @@ def _dt_minutes(self) -> Self:
26422642
def _dt_seconds(self) -> Self:
26432643
return type(self)(
26442644
pa.array(
2645-
self._to_timedeltaarray().seconds, from_pandas=True, type=pa.int32()
2645+
[
2646+
td.components.seconds if td is not NaT else None
2647+
for td in self._to_timedeltaarray()
2648+
],
2649+
type=pa.int32(),
26462650
)
26472651
)
26482652

@@ -2662,8 +2666,10 @@ def _dt_milliseconds(self) -> Self:
26622666
def _dt_microseconds(self) -> Self:
26632667
return type(self)(
26642668
pa.array(
2665-
self._to_timedeltaarray().microseconds,
2666-
from_pandas=True,
2669+
[
2670+
td.components.microseconds if td is not NaT else None
2671+
for td in self._to_timedeltaarray()
2672+
],
26672673
type=pa.int32(),
26682674
)
26692675
)
@@ -2672,7 +2678,11 @@ def _dt_microseconds(self) -> Self:
26722678
def _dt_nanoseconds(self) -> Self:
26732679
return type(self)(
26742680
pa.array(
2675-
self._to_timedeltaarray().nanoseconds, from_pandas=True, type=pa.int32()
2681+
[
2682+
td.components.nanoseconds if td is not NaT else None
2683+
for td in self._to_timedeltaarray()
2684+
],
2685+
type=pa.int32(),
26762686
)
26772687
)
26782688

pandas/tests/extension/test_arrow.py

+31
Original file line numberDiff line numberDiff line change
@@ -2901,6 +2901,37 @@ def test_dt_components():
29012901
tm.assert_frame_equal(result, expected)
29022902

29032903

2904+
def test_duration_dt_components_overflow():
2905+
start_dates = pd.Series(
2906+
[
2907+
"2015-03-08",
2908+
],
2909+
dtype="datetime64[ns]",
2910+
).astype("timestamp[ns][pyarrow]")
2911+
2912+
end_dates = pd.Series(
2913+
[
2914+
"2020-05-28 23:59:59.999",
2915+
],
2916+
dtype="datetime64[ns]",
2917+
).astype("timestamp[ns][pyarrow]")
2918+
2919+
result = end_dates - start_dates
2920+
expected_days = pd.Series([1908]).astype("int32[pyarrow]").rename("days")
2921+
expected_hours = pd.Series([23]).astype("int32[pyarrow]").rename("hours")
2922+
expected_minutes = pd.Series([59]).astype("int32[pyarrow]").rename("minutes")
2923+
expected_seconds = pd.Series([59]).astype("int32[pyarrow]").rename("seconds")
2924+
expected_milliseconds = (
2925+
pd.Series([999]).astype("int32[pyarrow]").rename("milliseconds")
2926+
)
2927+
2928+
tm.assert_series_equal(result.dt.components.days, expected_days)
2929+
tm.assert_series_equal(result.dt.components.hours, expected_hours)
2930+
tm.assert_series_equal(result.dt.components.minutes, expected_minutes)
2931+
tm.assert_series_equal(result.dt.components.seconds, expected_seconds)
2932+
tm.assert_series_equal(result.dt.components.milliseconds, expected_milliseconds)
2933+
2934+
29042935
@pytest.mark.parametrize("skipna", [True, False])
29052936
def test_boolean_reduce_series_all_null(all_boolean_reductions, skipna):
29062937
# GH51624

0 commit comments

Comments
 (0)