|
4 | 4 | TYPE_CHECKING,
|
5 | 5 | Any,
|
6 | 6 | TypeVar,
|
| 7 | + cast, |
7 | 8 | )
|
8 | 9 |
|
9 | 10 | import numpy as np
|
10 | 11 |
|
11 | 12 | from pandas._typing import (
|
| 13 | + ArrayLike, |
12 | 14 | Dtype,
|
| 15 | + FillnaOptions, |
13 | 16 | PositionalIndexer,
|
14 | 17 | SortKind,
|
15 | 18 | TakeIndexer,
|
|
20 | 23 | pa_version_under7p0,
|
21 | 24 | )
|
22 | 25 | from pandas.util._decorators import doc
|
| 26 | +from pandas.util._validators import validate_fillna_kwargs |
23 | 27 |
|
24 | 28 | from pandas.core.dtypes.common import (
|
25 | 29 | is_array_like,
|
@@ -521,6 +525,66 @@ def dropna(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
|
521 | 525 | else:
|
522 | 526 | return type(self)(pc.drop_null(self._data))
|
523 | 527 |
|
| 528 | + @doc(ExtensionArray.fillna) |
| 529 | + def fillna( |
| 530 | + self: ArrowExtensionArrayT, |
| 531 | + value: object | ArrayLike | None = None, |
| 532 | + method: FillnaOptions | None = None, |
| 533 | + limit: int | None = None, |
| 534 | + ) -> ArrowExtensionArrayT: |
| 535 | + |
| 536 | + value, method = validate_fillna_kwargs(value, method) |
| 537 | + |
| 538 | + if limit is not None: |
| 539 | + return super().fillna(value=value, method=method, limit=limit) |
| 540 | + |
| 541 | + if method is not None and pa_version_under7p0: |
| 542 | + # fill_null_{forward|backward} added in pyarrow 7.0 |
| 543 | + fallback_performancewarning(version="7") |
| 544 | + return super().fillna(value=value, method=method, limit=limit) |
| 545 | + |
| 546 | + if is_array_like(value): |
| 547 | + value = cast(ArrayLike, value) |
| 548 | + if len(value) != len(self): |
| 549 | + raise ValueError( |
| 550 | + f"Length of 'value' does not match. Got ({len(value)}) " |
| 551 | + f" expected {len(self)}" |
| 552 | + ) |
| 553 | + |
| 554 | + def convert_fill_value(value, pa_type, dtype): |
| 555 | + if value is None: |
| 556 | + return value |
| 557 | + if isinstance(value, (pa.Scalar, pa.Array, pa.ChunkedArray)): |
| 558 | + return value |
| 559 | + if is_array_like(value): |
| 560 | + pa_box = pa.array |
| 561 | + else: |
| 562 | + pa_box = pa.scalar |
| 563 | + try: |
| 564 | + value = pa_box(value, type=pa_type, from_pandas=True) |
| 565 | + except pa.ArrowTypeError as err: |
| 566 | + msg = f"Invalid value '{str(value)}' for dtype {dtype}" |
| 567 | + raise TypeError(msg) from err |
| 568 | + return value |
| 569 | + |
| 570 | + fill_value = convert_fill_value(value, self._data.type, self.dtype) |
| 571 | + |
| 572 | + try: |
| 573 | + if method is None: |
| 574 | + return type(self)(pc.fill_null(self._data, fill_value=fill_value)) |
| 575 | + elif method == "pad": |
| 576 | + return type(self)(pc.fill_null_forward(self._data)) |
| 577 | + elif method == "backfill": |
| 578 | + return type(self)(pc.fill_null_backward(self._data)) |
| 579 | + except pa.ArrowNotImplementedError: |
| 580 | + # ArrowNotImplementedError: Function 'coalesce' has no kernel |
| 581 | + # matching input types (duration[ns], duration[ns]) |
| 582 | + # TODO: remove try/except wrapper if/when pyarrow implements |
| 583 | + # a kernel for duration types. |
| 584 | + pass |
| 585 | + |
| 586 | + return super().fillna(value=value, method=method, limit=limit) |
| 587 | + |
524 | 588 | def isin(self, values) -> npt.NDArray[np.bool_]:
|
525 | 589 | # short-circuit to return all False array.
|
526 | 590 | if not len(values):
|
|
0 commit comments