Skip to content

Commit edc0870

Browse files
authored
ENH: EA.fillna copy=True (#53728)
1 parent 317290a commit edc0870

File tree

11 files changed

+192
-35
lines changed

11 files changed

+192
-35
lines changed

pandas/core/arrays/_mixins.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,9 @@ def _fill_mask_inplace(
296296
func(self._ndarray.T, limit=limit, mask=mask.T)
297297

298298
@doc(ExtensionArray.fillna)
299-
def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
299+
def fillna(
300+
self, value=None, method=None, limit: int | None = None, copy: bool = True
301+
) -> Self:
300302
value, method = validate_fillna_kwargs(
301303
value, method, validate_scalar_dict_value=False
302304
)
@@ -313,7 +315,9 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
313315
# TODO: check value is None
314316
# (for now) when self.ndim == 2, we assume axis=0
315317
func = missing.get_fill_func(method, ndim=self.ndim)
316-
npvalues = self._ndarray.T.copy()
318+
npvalues = self._ndarray.T
319+
if copy:
320+
npvalues = npvalues.copy()
317321
func(npvalues, limit=limit, mask=mask.T)
318322
npvalues = npvalues.T
319323

@@ -322,14 +326,20 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
322326
new_values = self._from_backing_data(npvalues)
323327
else:
324328
# fill with value
325-
new_values = self.copy()
329+
if copy:
330+
new_values = self.copy()
331+
else:
332+
new_values = self[:]
326333
new_values[mask] = value
327334
else:
328335
# We validate the fill_value even if there is nothing to fill
329336
if value is not None:
330337
self._validate_setitem_value(value)
331338

332-
new_values = self.copy()
339+
if not copy:
340+
new_values = self[:]
341+
else:
342+
new_values = self.copy()
333343
return new_values
334344

335345
# ------------------------------------------------------------------------

pandas/core/arrays/arrow/array.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -905,18 +905,16 @@ def fillna(
905905
value: object | ArrayLike | None = None,
906906
method: FillnaOptions | None = None,
907907
limit: int | None = None,
908+
copy: bool = True,
908909
) -> Self:
909910
value, method = validate_fillna_kwargs(value, method)
910911

911912
if not self._hasna:
912913
# TODO(CoW): Not necessary anymore when CoW is the default
913914
return self.copy()
914915

915-
if limit is not None:
916-
return super().fillna(value=value, method=method, limit=limit)
917-
918-
if method is not None:
919-
return super().fillna(value=value, method=method, limit=limit)
916+
if limit is not None or method is not None:
917+
return super().fillna(value=value, method=method, limit=limit, copy=copy)
920918

921919
if isinstance(value, (np.ndarray, ExtensionArray)):
922920
# Similar to check_value_size, but we do not mask here since we may
@@ -959,7 +957,7 @@ def convert_fill_value(value, pa_type, dtype):
959957
# a kernel for duration types.
960958
pass
961959

962-
return super().fillna(value=value, method=method, limit=limit)
960+
return super().fillna(value=value, method=method, limit=limit, copy=copy)
963961

964962
def isin(self, values) -> npt.NDArray[np.bool_]:
965963
# short-circuit to return all False array.

pandas/core/arrays/base.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,7 @@ def fillna(
872872
value: object | ArrayLike | None = None,
873873
method: FillnaOptions | None = None,
874874
limit: int | None = None,
875+
copy: bool = True,
875876
) -> Self:
876877
"""
877878
Fill NA/NaN values using the specified method.
@@ -896,6 +897,14 @@ def fillna(
896897
maximum number of entries along the entire axis where NaNs will be
897898
filled.
898899
900+
copy : bool, default True
901+
Whether to make a copy of the data before filling. If False, then
902+
the original should be modified and no new memory should be allocated.
903+
For ExtensionArray subclasses that cannot do this, it is at the
904+
author's discretion whether to ignore "copy=False" or to raise.
905+
The base class implementation ignores the keyword in pad/backfill
906+
cases.
907+
899908
Returns
900909
-------
901910
ExtensionArray
@@ -932,10 +941,16 @@ def fillna(
932941
return self[::-1].take(indexer, allow_fill=True)
933942
else:
934943
# fill with value
935-
new_values = self.copy()
944+
if not copy:
945+
new_values = self[:]
946+
else:
947+
new_values = self.copy()
936948
new_values[mask] = value
937949
else:
938-
new_values = self.copy()
950+
if not copy:
951+
new_values = self[:]
952+
else:
953+
new_values = self.copy()
939954
return new_values
940955

941956
def dropna(self) -> Self:

pandas/core/arrays/interval.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -889,7 +889,9 @@ def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOr
889889
indexer = obj.argsort()[-1]
890890
return obj[indexer]
891891

892-
def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
892+
def fillna(
893+
self, value=None, method=None, limit: int | None = None, copy: bool = True
894+
) -> Self:
893895
"""
894896
Fill NA/NaN values using the specified method.
895897
@@ -911,11 +913,18 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
911913
be partially filled. If method is not specified, this is the
912914
maximum number of entries along the entire axis where NaNs will be
913915
filled.
916+
copy : bool, default True
917+
Whether to make a copy of the data before filling. If False, then
918+
the original should be modified and no new memory should be allocated.
919+
For ExtensionArray subclasses that cannot do this, it is at the
920+
author's discretion whether to ignore "copy=False" or to raise.
914921
915922
Returns
916923
-------
917924
filled : IntervalArray with NA/NaN filled
918925
"""
926+
if copy is False:
927+
raise NotImplementedError
919928
if method is not None:
920929
return super().fillna(value=value, method=method, limit=limit)
921930

pandas/core/arrays/masked.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,9 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
185185
return self._simple_new(self._data[item], newmask)
186186

187187
@doc(ExtensionArray.fillna)
188-
def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
188+
def fillna(
189+
self, value=None, method=None, limit: int | None = None, copy: bool = True
190+
) -> Self:
189191
value, method = validate_fillna_kwargs(value, method)
190192

191193
mask = self._mask
@@ -195,16 +197,25 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
195197
if mask.any():
196198
if method is not None:
197199
func = missing.get_fill_func(method, ndim=self.ndim)
198-
npvalues = self._data.copy().T
199-
new_mask = mask.copy().T
200+
npvalues = self._data.T
201+
new_mask = mask.T
202+
if copy:
203+
npvalues = npvalues.copy()
204+
new_mask = new_mask.copy()
200205
func(npvalues, limit=limit, mask=new_mask)
201206
return self._simple_new(npvalues.T, new_mask.T)
202207
else:
203208
# fill with value
204-
new_values = self.copy()
209+
if copy:
210+
new_values = self.copy()
211+
else:
212+
new_values = self[:]
205213
new_values[mask] = value
206214
else:
207-
new_values = self.copy()
215+
if copy:
216+
new_values = self.copy()
217+
else:
218+
new_values = self[:]
208219
return new_values
209220

210221
@classmethod

pandas/core/arrays/period.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -790,16 +790,18 @@ def searchsorted(
790790
m8arr = self._ndarray.view("M8[ns]")
791791
return m8arr.searchsorted(npvalue, side=side, sorter=sorter)
792792

793-
def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
793+
def fillna(
794+
self, value=None, method=None, limit: int | None = None, copy: bool = True
795+
) -> Self:
794796
if method is not None:
795797
# view as dt64 so we get treated as timelike in core.missing,
796798
# similar to dtl._period_dispatch
797799
dta = self.view("M8[ns]")
798-
result = dta.fillna(value=value, method=method, limit=limit)
800+
result = dta.fillna(value=value, method=method, limit=limit, copy=copy)
799801
# error: Incompatible return value type (got "Union[ExtensionArray,
800802
# ndarray[Any, Any]]", expected "PeriodArray")
801803
return result.view(self.dtype) # type: ignore[return-value]
802-
return super().fillna(value=value, method=method, limit=limit)
804+
return super().fillna(value=value, method=method, limit=limit, copy=copy)
803805

804806
# ------------------------------------------------------------------
805807
# Arithmetic Methods

pandas/core/arrays/sparse/array.py

+4
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,7 @@ def fillna(
717717
value=None,
718718
method: FillnaOptions | None = None,
719719
limit: int | None = None,
720+
copy: bool = True,
720721
) -> Self:
721722
"""
722723
Fill missing values with `value`.
@@ -734,6 +735,9 @@ def fillna(
734735
735736
limit : int, optional
736737
738+
copy: bool, default True
739+
Ignored for SparseArray.
740+
737741
Returns
738742
-------
739743
SparseArray

pandas/core/internals/blocks.py

+48-5
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
cast,
1111
final,
1212
)
13+
import warnings
1314

1415
import numpy as np
1516

@@ -41,6 +42,7 @@
4142
)
4243
from pandas.errors import AbstractMethodError
4344
from pandas.util._decorators import cache_readonly
45+
from pandas.util._exceptions import find_stack_level
4446
from pandas.util._validators import validate_bool_kwarg
4547

4648
from pandas.core.dtypes.astype import (
@@ -1895,12 +1897,32 @@ def pad_or_backfill(
18951897
using_cow: bool = False,
18961898
) -> list[Block]:
18971899
values = self.values
1900+
copy, refs = self._get_refs_and_copy(using_cow, inplace)
1901+
18981902
if values.ndim == 2 and axis == 1:
18991903
# NDArrayBackedExtensionArray.fillna assumes axis=0
1900-
new_values = values.T.fillna(method=method, limit=limit).T
1904+
new_values = values.T.fillna(method=method, limit=limit, copy=copy).T
19011905
else:
1902-
new_values = values.fillna(method=method, limit=limit)
1903-
return [self.make_block_same_class(new_values)]
1906+
try:
1907+
new_values = values.fillna(method=method, limit=limit, copy=copy)
1908+
except TypeError:
1909+
# 3rd party EA that has not implemented copy keyword yet
1910+
refs = None
1911+
new_values = values.fillna(method=method, limit=limit)
1912+
# issue the warning *after* retrying, in case the TypeError
1913+
# was caused by an invalid fill_value
1914+
warnings.warn(
1915+
# GH#53278
1916+
"ExtensionArray.fillna added a 'copy' keyword in pandas "
1917+
"2.1.0. In a future version, ExtensionArray subclasses will "
1918+
"need to implement this keyword or an exception will be "
1919+
"raised. In the interim, the keyword is ignored by "
1920+
f"{type(self.values).__name__}.",
1921+
FutureWarning,
1922+
stacklevel=find_stack_level(),
1923+
)
1924+
1925+
return [self.make_block_same_class(new_values, refs=refs)]
19041926

19051927

19061928
class ExtensionBlock(libinternals.Block, EABackedBlock):
@@ -1938,8 +1960,29 @@ def fillna(
19381960
refs = self.refs
19391961
new_values = self.values
19401962
else:
1941-
refs = None
1942-
new_values = self.values.fillna(value=value, method=None, limit=limit)
1963+
copy, refs = self._get_refs_and_copy(using_cow, inplace)
1964+
1965+
try:
1966+
new_values = self.values.fillna(
1967+
value=value, method=None, limit=limit, copy=copy
1968+
)
1969+
except TypeError:
1970+
# 3rd party EA that has not implemented copy keyword yet
1971+
refs = None
1972+
new_values = self.values.fillna(value=value, method=None, limit=limit)
1973+
# issue the warning *after* retrying, in case the TypeError
1974+
# was caused by an invalid fill_value
1975+
warnings.warn(
1976+
# GH#53278
1977+
"ExtensionArray.fillna added a 'copy' keyword in pandas "
1978+
"2.1.0. In a future version, ExtensionArray subclasses will "
1979+
"need to implement this keyword or an exception will be "
1980+
"raised. In the interim, the keyword is ignored by "
1981+
f"{type(self.values).__name__}.",
1982+
FutureWarning,
1983+
stacklevel=find_stack_level(),
1984+
)
1985+
19431986
nb = self.make_block_same_class(new_values, refs=refs)
19441987
return nb._maybe_downcast([nb], downcast, using_cow=using_cow)
19451988

pandas/tests/copy_view/test_interp_fillna.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -331,19 +331,23 @@ def test_fillna_inplace_ea_noop_shares_memory(
331331
view = df[:]
332332
df.fillna(100, inplace=True)
333333

334-
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
334+
if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write:
335+
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
336+
else:
337+
# MaskedArray can actually respect inplace=True
338+
assert np.shares_memory(get_array(df, "a"), get_array(view, "a"))
335339

340+
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
336341
if using_copy_on_write:
337-
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
338342
assert not df._mgr._has_no_reference(1)
339343
assert not view._mgr._has_no_reference(1)
340-
elif isinstance(df.dtypes.iloc[0], ArrowDtype):
341-
# arrow is immutable, so no-ops do not need to copy underlying array
342-
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
343-
else:
344-
assert not np.shares_memory(get_array(df, "b"), get_array(view, "b"))
344+
345345
df.iloc[0, 1] = 100
346-
tm.assert_frame_equal(df_orig, view)
346+
if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write:
347+
tm.assert_frame_equal(df_orig, view)
348+
else:
349+
# we actually have a view
350+
tm.assert_frame_equal(df, view)
347351

348352

349353
def test_fillna_chained_assignment(using_copy_on_write):

pandas/tests/extension/decimal/array.py

+11
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,17 @@ def convert_values(param):
281281
def value_counts(self, dropna: bool = True):
282282
return value_counts(self.to_numpy(), dropna=dropna)
283283

284+
# Simulate a 3rd-party EA that has not yet updated to include a "copy"
285+
# keyword in its fillna method.
286+
# error: Signature of "fillna" incompatible with supertype "ExtensionArray"
287+
def fillna( # type: ignore[override]
288+
self,
289+
value=None,
290+
method=None,
291+
limit: int | None = None,
292+
):
293+
return super().fillna(value=value, method=method, limit=limit, copy=True)
294+
284295

285296
def to_decimal(values, context=None):
286297
return DecimalArray([decimal.Decimal(x) for x in values], context=context)

0 commit comments

Comments
 (0)