Skip to content

Commit 5c9b63c

Browse files
Backport PR #54707 on branch 2.1.x (BUG: ArrowExtensionArray.fillna with duration types) (#54711)
Backport PR #54707: BUG: ArrowExtensionArray.fillna with duration types Co-authored-by: Luke Manley <[email protected]>
1 parent 968b517 commit 5c9b63c

File tree

2 files changed

+18
-26
lines changed

2 files changed

+18
-26
lines changed

pandas/core/arrays/arrow/array.py

+8-26
Original file line numberDiff line numberDiff line change
@@ -381,8 +381,7 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
381381
elif isna(value):
382382
pa_scalar = pa.scalar(None, type=pa_type)
383383
else:
384-
# GH 53171: pyarrow does not yet handle pandas non-nano correctly
385-
# see https://github.com/apache/arrow/issues/33321
384+
# Workaround https://github.com/apache/arrow/issues/37291
386385
if isinstance(value, Timedelta):
387386
if pa_type is None:
388387
pa_type = pa.duration(value.unit)
@@ -448,8 +447,7 @@ def _box_pa_array(
448447
and pa.types.is_duration(pa_type)
449448
and (not isinstance(value, np.ndarray) or value.dtype.kind not in "mi")
450449
):
451-
# GH 53171: pyarrow does not yet handle pandas non-nano correctly
452-
# see https://github.com/apache/arrow/issues/33321
450+
# Workaround https://github.com/apache/arrow/issues/37291
453451
from pandas.core.tools.timedeltas import to_timedelta
454452

455453
value = to_timedelta(value, unit=pa_type.unit).as_unit(pa_type.unit)
@@ -462,8 +460,7 @@ def _box_pa_array(
462460
pa_array = pa.array(value, from_pandas=True)
463461

464462
if pa_type is None and pa.types.is_duration(pa_array.type):
465-
# GH 53171: pyarrow does not yet handle pandas non-nano correctly
466-
# see https://github.com/apache/arrow/issues/33321
463+
# Workaround https://github.com/apache/arrow/issues/37291
467464
from pandas.core.tools.timedeltas import to_timedelta
468465

469466
value = to_timedelta(value)
@@ -965,26 +962,11 @@ def fillna(
965962
f" expected {len(self)}"
966963
)
967964

968-
def convert_fill_value(value, pa_type, dtype):
969-
if value is None:
970-
return value
971-
if isinstance(value, (pa.Scalar, pa.Array, pa.ChunkedArray)):
972-
return value
973-
if isinstance(value, Timedelta) and value.unit in ("s", "ms"):
974-
# Workaround https://github.com/apache/arrow/issues/37291
975-
value = value.to_numpy()
976-
if is_array_like(value):
977-
pa_box = pa.array
978-
else:
979-
pa_box = pa.scalar
980-
try:
981-
value = pa_box(value, type=pa_type, from_pandas=True)
982-
except pa.ArrowTypeError as err:
983-
msg = f"Invalid value '{str(value)}' for dtype {dtype}"
984-
raise TypeError(msg) from err
985-
return value
986-
987-
fill_value = convert_fill_value(value, self._pa_array.type, self.dtype)
965+
try:
966+
fill_value = self._box_pa(value, pa_type=self._pa_array.type)
967+
except pa.ArrowTypeError as err:
968+
msg = f"Invalid value '{str(value)}' for dtype {self.dtype}"
969+
raise TypeError(msg) from err
988970

989971
try:
990972
if method is None:

pandas/tests/extension/test_arrow.py

+10
Original file line numberDiff line numberDiff line change
@@ -3049,3 +3049,13 @@ def test_arrowextensiondtype_dataframe_repr():
30493049
# pyarrow.ExtensionType values are displayed
30503050
expected = " col\n0 15340\n1 15341\n2 15342"
30513051
assert result == expected
3052+
3053+
3054+
@pytest.mark.parametrize("pa_type", tm.TIMEDELTA_PYARROW_DTYPES)
3055+
def test_duration_fillna_numpy(pa_type):
3056+
# GH 54707
3057+
ser1 = pd.Series([None, 2], dtype=ArrowDtype(pa_type))
3058+
ser2 = pd.Series(np.array([1, 3], dtype=f"m8[{pa_type.unit}]"))
3059+
result = ser1.fillna(ser2)
3060+
expected = pd.Series([1, 2], dtype=ArrowDtype(pa_type))
3061+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)