Skip to content

Commit 900cd03

Browse files
jbrockmendelPingviinituutti
authored andcommitted
implement fillna from 24024, with fixes and tests (pandas-dev#24536)
1 parent e0f1260 commit 900cd03

File tree

5 files changed

+91
-41
lines changed

5 files changed

+91
-41
lines changed

pandas/core/arrays/datetimelike.py

+49-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from pandas.errors import (
1717
AbstractMethodError, NullFrequencyError, PerformanceWarning)
1818
from pandas.util._decorators import Appender, Substitution
19+
from pandas.util._validators import validate_fillna_kwargs
1920

2021
from pandas.core.dtypes.common import (
2122
is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype,
@@ -25,9 +26,10 @@
2526
is_string_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype,
2627
needs_i8_conversion, pandas_dtype)
2728
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
29+
from pandas.core.dtypes.inference import is_array_like
2830
from pandas.core.dtypes.missing import isna
2931

30-
from pandas.core import nanops
32+
from pandas.core import missing, nanops
3133
from pandas.core.algorithms import (
3234
checked_add_with_arr, take, unique1d, value_counts)
3335
import pandas.core.common as com
@@ -787,6 +789,52 @@ def _maybe_mask_results(self, result, fill_value=iNaT, convert=None):
787789
result[self._isnan] = fill_value
788790
return result
789791

792+
def fillna(self, value=None, method=None, limit=None):
793+
# TODO(GH-20300): remove this
794+
# Just overriding to ensure that we avoid an astype(object).
795+
# Either 20300 or a `_values_for_fillna` would avoid this duplication.
796+
if isinstance(value, ABCSeries):
797+
value = value.array
798+
799+
value, method = validate_fillna_kwargs(value, method)
800+
801+
mask = self.isna()
802+
803+
if is_array_like(value):
804+
if len(value) != len(self):
805+
raise ValueError("Length of 'value' does not match. Got ({}) "
806+
" expected {}".format(len(value), len(self)))
807+
value = value[mask]
808+
809+
if mask.any():
810+
if method is not None:
811+
if method == 'pad':
812+
func = missing.pad_1d
813+
else:
814+
func = missing.backfill_1d
815+
816+
values = self._data
817+
if not is_period_dtype(self):
818+
# For PeriodArray self._data is i8, which gets copied
819+
# by `func`. Otherwise we need to make a copy manually
820+
# to avoid modifying `self` in-place.
821+
values = values.copy()
822+
823+
new_values = func(values, limit=limit,
824+
mask=mask)
825+
if is_datetime64tz_dtype(self):
826+
# we need to pass int64 values to the constructor to avoid
827+
# re-localizing incorrectly
828+
new_values = new_values.view("i8")
829+
new_values = type(self)(new_values, dtype=self.dtype)
830+
else:
831+
# fill with value
832+
new_values = self.copy()
833+
new_values[mask] = value
834+
else:
835+
new_values = self.copy()
836+
return new_values
837+
790838
# ------------------------------------------------------------------
791839
# Frequency Properties/Methods
792840

pandas/core/arrays/period.py

+2-39
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,17 @@
1212
from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
1313
import pandas.compat as compat
1414
from pandas.util._decorators import Appender, cache_readonly
15-
from pandas.util._validators import validate_fillna_kwargs
1615

1716
from pandas.core.dtypes.common import (
18-
_TD_DTYPE, ensure_object, is_array_like, is_datetime64_dtype,
19-
is_float_dtype, is_list_like, is_period_dtype, pandas_dtype)
17+
_TD_DTYPE, ensure_object, is_datetime64_dtype, is_float_dtype,
18+
is_list_like, is_period_dtype, pandas_dtype)
2019
from pandas.core.dtypes.dtypes import PeriodDtype
2120
from pandas.core.dtypes.generic import ABCIndexClass, ABCPeriodIndex, ABCSeries
2221
from pandas.core.dtypes.missing import isna, notna
2322

2423
import pandas.core.algorithms as algos
2524
from pandas.core.arrays import datetimelike as dtl
2625
import pandas.core.common as com
27-
from pandas.core.missing import backfill_1d, pad_1d
2826

2927
from pandas.tseries import frequencies
3028
from pandas.tseries.offsets import DateOffset, Tick, _delta_to_tick
@@ -381,41 +379,6 @@ def _validate_fill_value(self, fill_value):
381379
"Got '{got}'.".format(got=fill_value))
382380
return fill_value
383381

384-
def fillna(self, value=None, method=None, limit=None):
385-
# TODO(#20300)
386-
# To avoid converting to object, we re-implement here with the changes
387-
# 1. Passing `_data` to func instead of self.astype(object)
388-
# 2. Re-boxing output of 1.
389-
# #20300 should let us do this kind of logic on ExtensionArray.fillna
390-
# and we can use it.
391-
392-
if isinstance(value, ABCSeries):
393-
value = value._values
394-
395-
value, method = validate_fillna_kwargs(value, method)
396-
397-
mask = self.isna()
398-
399-
if is_array_like(value):
400-
if len(value) != len(self):
401-
raise ValueError("Length of 'value' does not match. Got ({}) "
402-
" expected {}".format(len(value), len(self)))
403-
value = value[mask]
404-
405-
if mask.any():
406-
if method is not None:
407-
func = pad_1d if method == 'pad' else backfill_1d
408-
new_values = func(self._data, limit=limit,
409-
mask=mask)
410-
new_values = type(self)(new_values, freq=self.freq)
411-
else:
412-
# fill with value
413-
new_values = self.copy()
414-
new_values[mask] = value
415-
else:
416-
new_values = self.copy()
417-
return new_values
418-
419382
# --------------------------------------------------------------------
420383

421384
def _time_shift(self, periods, freq=None):

pandas/core/missing.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from pandas.core.dtypes.common import (
1414
ensure_float64, is_datetime64_dtype, is_datetime64tz_dtype, is_float_dtype,
1515
is_integer, is_integer_dtype, is_numeric_v_string_like, is_scalar,
16-
needs_i8_conversion)
16+
is_timedelta64_dtype, needs_i8_conversion)
1717
from pandas.core.dtypes.missing import isna
1818

1919

@@ -481,6 +481,10 @@ def pad_1d(values, limit=None, mask=None, dtype=None):
481481
_method = algos.pad_inplace_float64
482482
elif values.dtype == np.object_:
483483
_method = algos.pad_inplace_object
484+
elif is_timedelta64_dtype(values):
485+
# NaTs are treated identically to datetime64, so we can dispatch
486+
# to that implementation
487+
_method = _pad_1d_datetime
484488

485489
if _method is None:
486490
raise ValueError('Invalid dtype for pad_1d [{name}]'
@@ -507,6 +511,10 @@ def backfill_1d(values, limit=None, mask=None, dtype=None):
507511
_method = algos.backfill_inplace_float64
508512
elif values.dtype == np.object_:
509513
_method = algos.backfill_inplace_object
514+
elif is_timedelta64_dtype(values):
515+
# NaTs are treated identically to datetime64, so we can dispatch
516+
# to that implementation
517+
_method = _backfill_1d_datetime
510518

511519
if _method is None:
512520
raise ValueError('Invalid dtype for backfill_1d [{name}]'

pandas/tests/arrays/test_datetimelike.py

+14
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,20 @@ def test_reduce_invalid(self):
164164
with pytest.raises(TypeError, match='cannot perform'):
165165
arr._reduce("not a method")
166166

167+
@pytest.mark.parametrize('method', ['pad', 'backfill'])
168+
def test_fillna_method_doesnt_change_orig(self, method):
169+
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
170+
arr = self.array_cls(data, freq='D')
171+
arr[4] = pd.NaT
172+
173+
fill_value = arr[3] if method == 'pad' else arr[5]
174+
175+
result = arr.fillna(method=method)
176+
assert result[4] == fill_value
177+
178+
# check that the original was not changed
179+
assert arr[4] is pd.NaT
180+
167181
def test_searchsorted(self):
168182
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
169183
arr = self.array_cls(data, freq='D')

pandas/tests/arrays/test_datetimes.py

+17
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,23 @@ def test_value_counts_preserves_tz(self):
138138
index=[pd.NaT, dti[0], dti[1]])
139139
tm.assert_series_equal(result, expected)
140140

141+
@pytest.mark.parametrize('method', ['pad', 'backfill'])
142+
def test_fillna_preserves_tz(self, method):
143+
dti = pd.date_range('2000-01-01', periods=5, freq='D', tz='US/Central')
144+
arr = DatetimeArray(dti, copy=True)
145+
arr[2] = pd.NaT
146+
147+
fill_val = dti[1] if method == 'pad' else dti[3]
148+
expected = DatetimeArray([dti[0], dti[1], fill_val, dti[3], dti[4]],
149+
freq=None, tz='US/Central')
150+
151+
result = arr.fillna(method=method)
152+
tm.assert_extension_array_equal(result, expected)
153+
154+
# assert that arr and dti were not modified in-place
155+
assert arr[2] is pd.NaT
156+
assert dti[2] == pd.Timestamp('2000-01-03', tz='US/Central')
157+
141158

142159
class TestSequenceToDT64NS(object):
143160

0 commit comments

Comments
 (0)