Skip to content

Commit 42273e9

Browse files
lukemanleyim-vinicius
authored and
im-vinicius
committed
BUG: pyarrow duration arrays constructed from data containing NaT can overflow (pandas-dev#52843)
1 parent bf84851 commit 42273e9

File tree

3 files changed

+24
-1
lines changed

3 files changed

+24
-1
lines changed

doc/source/whatsnew/v2.0.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Bug fixes
2828
~~~~~~~~~
2929
- Bug in :attr:`Series.dt.days` that would overflow ``int32`` number of days (:issue:`52391`)
3030
- Bug in :class:`arrays.DatetimeArray` constructor returning an incorrect unit when passed a non-nanosecond numpy datetime array (:issue:`52555`)
31+
- Bug in :class:`~arrays.ArrowExtensionArray` with duration dtype overflowing when constructed from data containing numpy ``NaT`` (:issue:`52843`)
3132
- Bug in :func:`Series.dt.round` when passing a ``freq`` of equal or higher resolution compared to the :class:`Series` would raise a ``ZeroDivisionError`` (:issue:`52761`)
3233
- Bug in :func:`Series.median` with :class:`ArrowDtype` returning an approximate median (:issue:`52679`)
3334
- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on categorical dtypes (:issue:`49889`)

pandas/core/arrays/arrow/array.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,13 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
258258
scalars = pa.array(scalars, from_pandas=True)
259259
if pa_dtype and scalars.type != pa_dtype:
260260
scalars = scalars.cast(pa_dtype)
261-
return cls(scalars)
261+
arr = cls(scalars)
262+
if pa.types.is_duration(scalars.type) and scalars.null_count > 0:
263+
# GH52843: upstream bug for duration types when originally
264+
# constructed with data containing numpy NaT.
265+
# https://github.com/apache/arrow/issues/35088
266+
arr = arr.fillna(arr.dtype.na_value)
267+
return arr
262268

263269
@classmethod
264270
def _from_sequence_of_strings(

pandas/tests/extension/test_arrow.py

+16
Original file line numberDiff line numberDiff line change
@@ -2830,3 +2830,19 @@ def test_date32_repr():
28302830
arrow_dt = pa.array([date.fromisoformat("2020-01-01")], type=pa.date32())
28312831
ser = pd.Series(arrow_dt, dtype=ArrowDtype(arrow_dt.type))
28322832
assert repr(ser) == "0 2020-01-01\ndtype: date32[day][pyarrow]"
2833+
2834+
2835+
@pytest.mark.xfail(
2836+
pa_version_under8p0,
2837+
reason="Function 'add_checked' has no kernel matching input types",
2838+
raises=pa.ArrowNotImplementedError,
2839+
)
2840+
def test_duration_overflow_from_ndarray_containing_nat():
2841+
# GH52843
2842+
data_ts = pd.to_datetime([1, None])
2843+
data_td = pd.to_timedelta([1, None])
2844+
ser_ts = pd.Series(data_ts, dtype=ArrowDtype(pa.timestamp("ns")))
2845+
ser_td = pd.Series(data_td, dtype=ArrowDtype(pa.duration("ns")))
2846+
result = ser_ts + ser_td
2847+
expected = pd.Series([2, None], dtype=ArrowDtype(pa.timestamp("ns")))
2848+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)