Skip to content

Commit 2540d5a

Browse files
nchmura4jreback
authored andcommitted
ENH: add fill_value to asfreq
closes #3715 closes #14791
1 parent a0eac6c commit 2540d5a

File tree

6 files changed

+164
-21
lines changed

6 files changed

+164
-21
lines changed

doc/source/whatsnew/v0.20.0.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -135,13 +135,13 @@ Other enhancements
135135
- ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
136136
- ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs <timedeltas.isoformat>` (:issue:`15136`)
137137
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)
138-
139138
- ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`)
140139
- ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`)
140+
- ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`).
141+
- ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`).
141142

142143
.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
143144

144-
145145
.. _whatsnew_0200.api_breaking:
146146

147147
Backwards incompatible API changes

pandas/core/generic.py

+68-5
Original file line numberDiff line numberDiff line change
@@ -4066,12 +4066,17 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
40664066
sort=sort, group_keys=group_keys, squeeze=squeeze,
40674067
**kwargs)
40684068

4069-
def asfreq(self, freq, method=None, how=None, normalize=False):
4069+
def asfreq(self, freq, method=None, how=None, normalize=False,
4070+
fill_value=None):
40704071
"""
40714072
Convert TimeSeries to specified frequency.
40724073
40734074
Optionally provide filling method to pad/backfill missing values.
40744075
4076+
Returns the original data conformed to a new index with the specified
4077+
frequency. ``resample`` is more appropriate if an operation, such as
4078+
summarization, is necessary to represent the data at the new frequency.
4079+
40754080
Parameters
40764081
----------
40774082
freq : DateOffset object, or string
@@ -4086,18 +4091,79 @@ def asfreq(self, freq, method=None, how=None, normalize=False):
40864091
For PeriodIndex only, see PeriodIndex.asfreq
40874092
normalize : bool, default False
40884093
Whether to reset output index to midnight
4094+
fill_value: scalar, optional
4095+
Value to use for missing values, applied during upsampling (note
4096+
this does not fill NaNs that already were present).
4097+
4098+
.. versionadded:: 0.20.0
40894099
40904100
Returns
40914101
-------
40924102
converted : type of caller
40934103
4104+
Examples
4105+
--------
4106+
4107+
Start by creating a series with 4 one minute timestamps.
4108+
4109+
>>> index = pd.date_range('1/1/2000', periods=4, freq='T')
4110+
>>> series = pd.Series([0.0, None, 2.0, 3.0], index=index)
4111+
>>> df = pd.DataFrame({'s':series})
4112+
>>> df
4113+
s
4114+
2000-01-01 00:00:00 0.0
4115+
2000-01-01 00:01:00 NaN
4116+
2000-01-01 00:02:00 2.0
4117+
2000-01-01 00:03:00 3.0
4118+
4119+
Upsample the series into 30 second bins.
4120+
4121+
>>> df.asfreq(freq='30S')
4122+
s
4123+
2000-01-01 00:00:00 0.0
4124+
2000-01-01 00:00:30 NaN
4125+
2000-01-01 00:01:00 NaN
4126+
2000-01-01 00:01:30 NaN
4127+
2000-01-01 00:02:00 2.0
4128+
2000-01-01 00:02:30 NaN
4129+
2000-01-01 00:03:00 3.0
4130+
4131+
Upsample again, providing a ``fill value``.
4132+
4133+
>>> df.asfreq(freq='30S', fill_value=9.0)
4134+
s
4135+
2000-01-01 00:00:00 0.0
4136+
2000-01-01 00:00:30 9.0
4137+
2000-01-01 00:01:00 NaN
4138+
2000-01-01 00:01:30 9.0
4139+
2000-01-01 00:02:00 2.0
4140+
2000-01-01 00:02:30 9.0
4141+
2000-01-01 00:03:00 3.0
4142+
4143+
Upsample again, providing a ``method``.
4144+
4145+
>>> df.asfreq(freq='30S', method='bfill')
4146+
s
4147+
2000-01-01 00:00:00 0.0
4148+
2000-01-01 00:00:30 NaN
4149+
2000-01-01 00:01:00 NaN
4150+
2000-01-01 00:01:30 2.0
4151+
2000-01-01 00:02:00 2.0
4152+
2000-01-01 00:02:30 3.0
4153+
2000-01-01 00:03:00 3.0
4154+
4155+
See Also
4156+
--------
4157+
reindex
4158+
40944159
Notes
40954160
-----
40964161
To learn more about the frequency strings, please see `this link
40974162
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
40984163
"""
40994164
from pandas.tseries.resample import asfreq
4100-
return asfreq(self, freq, method=method, how=how, normalize=normalize)
4165+
return asfreq(self, freq, method=method, how=how, normalize=normalize,
4166+
fill_value=fill_value)
41014167

41024168
def at_time(self, time, asof=False):
41034169
"""
@@ -4178,9 +4244,6 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
41784244
41794245
.. versionadded:: 0.19.0
41804246
4181-
Notes
4182-
-----
4183-
41844247
To learn more about the offset strings, please see `this link
41854248
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
41864249

pandas/tests/frame/test_timeseries.py

+20
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,26 @@ def test_asfreq_datetimeindex(self):
323323
ts = df['A'].asfreq('B')
324324
tm.assertIsInstance(ts.index, DatetimeIndex)
325325

326+
def test_asfreq_fillvalue(self):
327+
# test for fill value during upsampling, related to issue 3715
328+
329+
# setup
330+
rng = pd.date_range('1/1/2016', periods=10, freq='2S')
331+
ts = pd.Series(np.arange(len(rng)), index=rng)
332+
df = pd.DataFrame({'one': ts})
333+
334+
# insert pre-existing missing value
335+
df.loc['2016-01-01 00:00:08', 'one'] = None
336+
337+
actual_df = df.asfreq(freq='1S', fill_value=9.0)
338+
expected_df = df.asfreq(freq='1S').fillna(9.0)
339+
expected_df.loc['2016-01-01 00:00:08', 'one'] = None
340+
assert_frame_equal(expected_df, actual_df)
341+
342+
expected_series = ts.asfreq(freq='1S').fillna(9.0)
343+
actual_series = ts.asfreq(freq='1S', fill_value=9.0)
344+
assert_series_equal(expected_series, actual_series)
345+
326346
def test_first_last_valid(self):
327347
N = len(self.frame.index)
328348
mat = randn(N)

pandas/tests/series/test_timeseries.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -410,20 +410,26 @@ def test_asfreq(self):
410410

411411
daily_ts = ts.asfreq('B')
412412
monthly_ts = daily_ts.asfreq('BM')
413-
self.assert_series_equal(monthly_ts, ts)
413+
assert_series_equal(monthly_ts, ts)
414414

415415
daily_ts = ts.asfreq('B', method='pad')
416416
monthly_ts = daily_ts.asfreq('BM')
417-
self.assert_series_equal(monthly_ts, ts)
417+
assert_series_equal(monthly_ts, ts)
418418

419419
daily_ts = ts.asfreq(BDay())
420420
monthly_ts = daily_ts.asfreq(BMonthEnd())
421-
self.assert_series_equal(monthly_ts, ts)
421+
assert_series_equal(monthly_ts, ts)
422422

423423
result = ts[:0].asfreq('M')
424424
self.assertEqual(len(result), 0)
425425
self.assertIsNot(result, ts)
426426

427+
daily_ts = ts.asfreq('D', fill_value=-1)
428+
result = daily_ts.value_counts().sort_index()
429+
expected = Series([60, 1, 1, 1],
430+
index=[-1.0, 2.0, 1.0, 0.0]).sort_index()
431+
assert_series_equal(result, expected)
432+
427433
def test_diff(self):
428434
# Just run the function
429435
self.ts.diff()

pandas/tseries/resample.py

+28-11
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def transform(self, arg, *args, **kwargs):
353353
def _downsample(self, f):
354354
raise AbstractMethodError(self)
355355

356-
def _upsample(self, f, limit=None):
356+
def _upsample(self, f, limit=None, fill_value=None):
357357
raise AbstractMethodError(self)
358358

359359
def _gotitem(self, key, ndim, subset=None):
@@ -509,12 +509,25 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
509509
limit_direction=limit_direction,
510510
downcast=downcast, **kwargs)
511511

512-
def asfreq(self):
512+
def asfreq(self, fill_value=None):
513513
"""
514514
return the values at the new freq,
515-
essentially a reindex with (no filling)
515+
essentially a reindex
516+
517+
Parameters
518+
----------
519+
fill_value: scalar, optional
520+
Value to use for missing values, applied during upsampling (note
521+
this does not fill NaNs that already were present).
522+
523+
.. versionadded:: 0.20.0
524+
525+
See Also
526+
--------
527+
Series.asfreq
528+
DataFrame.asfreq
516529
"""
517-
return self._upsample('asfreq')
530+
return self._upsample('asfreq', fill_value=fill_value)
518531

519532
def std(self, ddof=1, *args, **kwargs):
520533
"""
@@ -713,12 +726,14 @@ def _adjust_binner_for_upsample(self, binner):
713726
binner = binner[:-1]
714727
return binner
715728

716-
def _upsample(self, method, limit=None):
729+
def _upsample(self, method, limit=None, fill_value=None):
717730
"""
718731
method : string {'backfill', 'bfill', 'pad',
719732
'ffill', 'asfreq'} method for upsampling
720733
limit : int, default None
721734
Maximum size gap to fill when reindexing
735+
fill_value : scalar, default None
736+
Value to use for missing values
722737
723738
See also
724739
--------
@@ -745,7 +760,7 @@ def _upsample(self, method, limit=None):
745760
result.index = res_index
746761
else:
747762
result = obj.reindex(res_index, method=method,
748-
limit=limit)
763+
limit=limit, fill_value=fill_value)
749764

750765
return self._wrap_result(result)
751766

@@ -865,12 +880,14 @@ def _downsample(self, how, **kwargs):
865880
'Frequency {} cannot be resampled to {}, as they are not '
866881
'sub or super periods'.format(ax.freq, self.freq))
867882

868-
def _upsample(self, method, limit=None):
883+
def _upsample(self, method, limit=None, fill_value=None):
869884
"""
870885
method : string {'backfill', 'bfill', 'pad', 'ffill'}
871886
method for upsampling
872887
limit : int, default None
873888
Maximum size gap to fill when reindexing
889+
fill_value : scalar, default None
890+
Value to use for missing values
874891
875892
See also
876893
--------
@@ -884,8 +901,8 @@ def _upsample(self, method, limit=None):
884901
" datetime-like")
885902
# we may need to actually resample as if we are timestamps
886903
if self.kind == 'timestamp':
887-
return super(PeriodIndexResampler, self)._upsample(method,
888-
limit=limit)
904+
return super(PeriodIndexResampler, self)._upsample(
905+
method, limit=limit, fill_value=fill_value)
889906

890907
ax = self.ax
891908
obj = self.obj
@@ -1346,7 +1363,7 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
13461363
Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst))
13471364

13481365

1349-
def asfreq(obj, freq, method=None, how=None, normalize=False):
1366+
def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None):
13501367
"""
13511368
Utility frequency conversion method for Series/DataFrame
13521369
"""
@@ -1366,7 +1383,7 @@ def asfreq(obj, freq, method=None, how=None, normalize=False):
13661383
return obj.copy()
13671384
dti = date_range(obj.index[0], obj.index[-1], freq=freq)
13681385
dti.name = obj.index.name
1369-
rs = obj.reindex(dti, method=method)
1386+
rs = obj.reindex(dti, method=method, fill_value=fill_value)
13701387
if normalize:
13711388
rs.index = rs.index.normalize()
13721389
return rs

pandas/tseries/tests/test_resample.py

+37
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,24 @@ def test_asfreq_upsample(self):
693693
expected = frame.reindex(new_index)
694694
assert_frame_equal(result, expected)
695695

696+
def test_asfreq_fill_value(self):
697+
# test for fill value during resampling, issue 3715
698+
699+
s = self.create_series()
700+
701+
result = s.resample('1H').asfreq()
702+
new_index = self.create_index(s.index[0], s.index[-1], freq='1H')
703+
expected = s.reindex(new_index)
704+
assert_series_equal(result, expected)
705+
706+
frame = s.to_frame('value')
707+
frame.iloc[1] = None
708+
result = frame.resample('1H').asfreq(fill_value=4.0)
709+
new_index = self.create_index(frame.index[0],
710+
frame.index[-1], freq='1H')
711+
expected = frame.reindex(new_index, fill_value=4.0)
712+
assert_frame_equal(result, expected)
713+
696714
def test_resample_interpolate(self):
697715
# # 12925
698716
df = self.create_series().to_frame('value')
@@ -2159,6 +2177,25 @@ def test_asfreq_upsample(self):
21592177
result = frame.resample('1H').asfreq()
21602178
assert_frame_equal(result, expected)
21612179

2180+
def test_asfreq_fill_value(self):
2181+
# test for fill value during resampling, issue 3715
2182+
2183+
s = self.create_series()
2184+
new_index = date_range(s.index[0].to_timestamp(how='start'),
2185+
(s.index[-1]).to_timestamp(how='start'),
2186+
freq='1H')
2187+
expected = s.to_timestamp().reindex(new_index, fill_value=4.0)
2188+
result = s.resample('1H', kind='timestamp').asfreq(fill_value=4.0)
2189+
assert_series_equal(result, expected)
2190+
2191+
frame = s.to_frame('value')
2192+
new_index = date_range(frame.index[0].to_timestamp(how='start'),
2193+
(frame.index[-1]).to_timestamp(how='start'),
2194+
freq='1H')
2195+
expected = frame.to_timestamp().reindex(new_index, fill_value=3.0)
2196+
result = frame.resample('1H', kind='timestamp').asfreq(fill_value=3.0)
2197+
assert_frame_equal(result, expected)
2198+
21622199
def test_selection(self):
21632200
index = self.create_series().index
21642201
# This is a bug, these should be implemented

0 commit comments

Comments
 (0)