Skip to content

Commit 45a9e8e

Browse files
committed
ENH: add fill_value to asfreq, see pandas-dev#3715
1 parent 0252385 commit 45a9e8e

File tree

5 files changed

+154
-19
lines changed

5 files changed

+154
-19
lines changed

doc/source/whatsnew/v0.20.0.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,8 @@ Other enhancements
111111
- ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`)
112112
- ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
113113
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)
114-
115114
- ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`)
116-
115+
- ``DataFrame.asfreq()`` now accepts a ``fill_value`` option to fill missing values during resampling (:issue:`3715`).
117116

118117
.. _whatsnew_0200.api_breaking:
119118

pandas/core/generic.py

+68-5
Original file line numberDiff line numberDiff line change
@@ -4054,12 +4054,17 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
40544054
sort=sort, group_keys=group_keys, squeeze=squeeze,
40554055
**kwargs)
40564056

4057-
def asfreq(self, freq, method=None, how=None, normalize=False):
4057+
def asfreq(self, freq, method=None, how=None, normalize=False,
4058+
fill_value=None):
40584059
"""
40594060
Convert TimeSeries to specified frequency.
40604061
40614062
Optionally provide filling method to pad/backfill missing values.
40624063
4064+
Returns the original data conformed to a new index with the specified
4065+
frequency. ``resample`` is more appropriate if an operation, such as
4066+
summarization, is necessary to represent the data at the new frequency.
4067+
40634068
Parameters
40644069
----------
40654070
freq : DateOffset object, or string
@@ -4074,18 +4079,79 @@ def asfreq(self, freq, method=None, how=None, normalize=False):
40744079
For PeriodIndex only, see PeriodIndex.asfreq
40754080
normalize : bool, default False
40764081
Whether to reset output index to midnight
4082+
fill_value: scalar, optional
4083+
Value to use for missing values, applied during upsampling (note
4084+
this does not fill NaNs that already were present).
4085+
4086+
.. versionadded:: 0.20.0
40774087
40784088
Returns
40794089
-------
40804090
converted : type of caller
40814091
4092+
Examples
4093+
--------
4094+
4095+
Start by creating a series with 4 one minute timestamps.
4096+
4097+
>>> index = pd.date_range('1/1/2000', periods=4, freq='T')
4098+
>>> series = pd.Series([0.0, None, 2.0, 3.0], index=index)
4099+
>>> df = pd.DataFrame({'s':series})
4100+
>>> df
4101+
s
4102+
2000-01-01 00:00:00 0.0
4103+
2000-01-01 00:01:00 NaN
4104+
2000-01-01 00:02:00 2.0
4105+
2000-01-01 00:03:00 3.0
4106+
4107+
Upsample the series into 30 second bins.
4108+
4109+
>>> df.asfreq(freq='30S')
4110+
s
4111+
2000-01-01 00:00:00 0.0
4112+
2000-01-01 00:00:30 NaN
4113+
2000-01-01 00:01:00 NaN
4114+
2000-01-01 00:01:30 NaN
4115+
2000-01-01 00:02:00 2.0
4116+
2000-01-01 00:02:30 NaN
4117+
2000-01-01 00:03:00 3.0
4118+
4119+
Upsample again, providing a ``fill value``.
4120+
4121+
>>> df.asfreq(freq='30S', fill_value=9.0)
4122+
s
4123+
2000-01-01 00:00:00 0.0
4124+
2000-01-01 00:00:30 9.0
4125+
2000-01-01 00:01:00 NaN
4126+
2000-01-01 00:01:30 9.0
4127+
2000-01-01 00:02:00 2.0
4128+
2000-01-01 00:02:30 9.0
4129+
2000-01-01 00:03:00 3.0
4130+
4131+
Upsample again, providing a ``method``.
4132+
4133+
>>> df.asfreq(freq='30S', method='bfill')
4134+
s
4135+
2000-01-01 00:00:00 0.0
4136+
2000-01-01 00:00:30 NaN
4137+
2000-01-01 00:01:00 NaN
4138+
2000-01-01 00:01:30 2.0
4139+
2000-01-01 00:02:00 2.0
4140+
2000-01-01 00:02:30 3.0
4141+
2000-01-01 00:03:00 3.0
4142+
4143+
See Also
4144+
--------
4145+
reindex
4146+
40824147
Notes
40834148
-----
40844149
To learn more about the frequency strings, please see `this link
40854150
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
40864151
"""
40874152
from pandas.tseries.resample import asfreq
4088-
return asfreq(self, freq, method=method, how=how, normalize=normalize)
4153+
return asfreq(self, freq, method=method, how=how, normalize=normalize,
4154+
fill_value=fill_value)
40894155

40904156
def at_time(self, time, asof=False):
40914157
"""
@@ -4166,9 +4232,6 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
41664232
41674233
.. versionadded:: 0.19.0
41684234
4169-
Notes
4170-
-----
4171-
41724235
To learn more about the offset strings, please see `this link
41734236
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
41744237

pandas/tests/frame/test_timeseries.py

+20
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,26 @@ def test_asfreq_datetimeindex(self):
323323
ts = df['A'].asfreq('B')
324324
tm.assertIsInstance(ts.index, DatetimeIndex)
325325

326+
def test_asfreq_fillvalue(self):
327+
# test for fill value during upsampling, related to issue 3715
328+
329+
# setup
330+
rng = pd.date_range('1/1/2016', periods=10, freq='2S')
331+
ts = pd.Series(np.arange(len(rng)), index=rng)
332+
df = pd.DataFrame({'one': ts})
333+
334+
# insert pre-existing missing value
335+
df.loc['2016-01-01 00:00:08', 'one'] = None
336+
337+
actual_df = df.asfreq(freq='1S', fill_value=9.0)
338+
expected_df = df.asfreq(freq='1S').fillna(9.0)
339+
expected_df.loc['2016-01-01 00:00:08', 'one'] = None
340+
assert_frame_equal(expected_df, actual_df)
341+
342+
expected_series = ts.asfreq(freq='1S').fillna(9.0)
343+
actual_series = ts.asfreq(freq='1S', fill_value=9.0)
344+
assert_series_equal(expected_series, actual_series)
345+
326346
def test_first_last_valid(self):
327347
N = len(self.frame.index)
328348
mat = randn(N)

pandas/tseries/resample.py

+28-12
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ def transform(self, arg, *args, **kwargs):
352352
def _downsample(self, f):
353353
raise AbstractMethodError(self)
354354

355-
def _upsample(self, f, limit=None):
355+
def _upsample(self, f, limit=None, fill_value=None):
356356
raise AbstractMethodError(self)
357357

358358
def _gotitem(self, key, ndim, subset=None):
@@ -499,12 +499,25 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
499499
limit_direction=limit_direction,
500500
downcast=downcast, **kwargs)
501501

502-
def asfreq(self):
502+
def asfreq(self, fill_value=None):
503503
"""
504504
return the values at the new freq,
505-
essentially a reindex with (no filling)
505+
essentially a reindex
506+
507+
Parameters
508+
----------
509+
fill_value: scalar, optional
510+
Value to use for missing values, applied during upsampling (note
511+
this does not fill NaNs that already were present).
512+
513+
.. versionadded:: 0.20.0
514+
515+
See Also
516+
--------
517+
Series.asfreq
518+
DataFrame.asfreq
506519
"""
507-
return self._upsample('asfreq')
520+
return self._upsample('asfreq', fill_value=fill_value)
508521

509522
def std(self, ddof=1, *args, **kwargs):
510523
"""
@@ -704,12 +717,14 @@ def _adjust_binner_for_upsample(self, binner):
704717
binner = binner[:-1]
705718
return binner
706719

707-
def _upsample(self, method, limit=None):
720+
def _upsample(self, method, limit=None, fill_value=None):
708721
"""
709722
method : string {'backfill', 'bfill', 'pad',
710723
'ffill', 'asfreq'} method for upsampling
711724
limit : int, default None
712725
Maximum size gap to fill when reindexing
726+
fill_value : scalar, default None
727+
Value to use for missing values
713728
714729
See also
715730
--------
@@ -736,7 +751,7 @@ def _upsample(self, method, limit=None):
736751
result.index = res_index
737752
else:
738753
result = obj.reindex(res_index, method=method,
739-
limit=limit)
754+
limit=limit, fill_value=fill_value)
740755

741756
return self._wrap_result(result)
742757

@@ -855,12 +870,14 @@ def _downsample(self, how, **kwargs):
855870
'Frequency {} cannot be resampled to {}, as they are not '
856871
'sub or super periods'.format(ax.freq, self.freq))
857872

858-
def _upsample(self, method, limit=None):
873+
def _upsample(self, method, limit=None, fill_value=None):
859874
"""
860875
method : string {'backfill', 'bfill', 'pad', 'ffill'}
861876
method for upsampling
862877
limit : int, default None
863878
Maximum size gap to fill when reindexing
879+
fill_value : scalar, default None
880+
Value to use for missing values
864881
865882
See also
866883
--------
@@ -874,8 +891,8 @@ def _upsample(self, method, limit=None):
874891
" datetime-like")
875892
# we may need to actually resample as if we are timestamps
876893
if self.kind == 'timestamp':
877-
return super(PeriodIndexResampler, self)._upsample(method,
878-
limit=limit)
894+
return super(PeriodIndexResampler, self)._upsample(
895+
method, limit=limit, fill_value=fill_value)
879896

880897
ax = self.ax
881898
obj = self.obj
@@ -889,7 +906,6 @@ def _upsample(self, method, limit=None):
889906
return self._wrap_result(_take_new_index(
890907
obj, indexer, new_index, axis=self.axis))
891908

892-
893909
class PeriodIndexResamplerGroupby(_GroupByMixin, PeriodIndexResampler):
894910
"""
895911
Provides a resample of a groupby implementation
@@ -1333,7 +1349,7 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
13331349
Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst))
13341350

13351351

1336-
def asfreq(obj, freq, method=None, how=None, normalize=False):
1352+
def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None):
13371353
"""
13381354
Utility frequency conversion method for Series/DataFrame
13391355
"""
@@ -1353,7 +1369,7 @@ def asfreq(obj, freq, method=None, how=None, normalize=False):
13531369
return obj.copy()
13541370
dti = date_range(obj.index[0], obj.index[-1], freq=freq)
13551371
dti.name = obj.index.name
1356-
rs = obj.reindex(dti, method=method)
1372+
rs = obj.reindex(dti, method=method, fill_value=fill_value)
13571373
if normalize:
13581374
rs.index = rs.index.normalize()
13591375
return rs

pandas/tseries/tests/test_resample.py

+37
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,24 @@ def test_asfreq_upsample(self):
693693
expected = frame.reindex(new_index)
694694
assert_frame_equal(result, expected)
695695

696+
def test_asfreq_fill_value(self):
697+
# test for fill value during resampling, issue 3715
698+
699+
s = self.create_series()
700+
701+
result = s.resample('1H').asfreq()
702+
new_index = self.create_index(s.index[0], s.index[-1], freq='1H')
703+
expected = s.reindex(new_index)
704+
assert_series_equal(result, expected)
705+
706+
frame = s.to_frame('value')
707+
frame.iloc[1] = None
708+
result = frame.resample('1H').asfreq(fill_value=4.0)
709+
new_index = self.create_index(frame.index[0],
710+
frame.index[-1], freq='1H')
711+
expected = frame.reindex(new_index, fill_value=4.0)
712+
assert_frame_equal(result, expected)
713+
696714
def test_resample_interpolate(self):
697715
# # 12925
698716
df = self.create_series().to_frame('value')
@@ -2122,6 +2140,25 @@ def test_asfreq_upsample(self):
21222140
result = frame.resample('1H').asfreq()
21232141
assert_frame_equal(result, expected)
21242142

2143+
def test_asfreq_fill_value(self):
2144+
# test for fill value during resampling, issue 3715
2145+
2146+
s = self.create_series()
2147+
new_index = date_range(s.index[0].to_timestamp(how='start'),
2148+
(s.index[-1]).to_timestamp(how='start'),
2149+
freq='1H')
2150+
expected = s.to_timestamp().reindex(new_index, fill_value=4.0)
2151+
result = s.resample('1H', kind='timestamp').asfreq(fill_value=4.0)
2152+
assert_series_equal(result, expected)
2153+
2154+
frame = s.to_frame('value')
2155+
new_index = date_range(frame.index[0].to_timestamp(how='start'),
2156+
(frame.index[-1]).to_timestamp(how='start'),
2157+
freq='1H')
2158+
expected = frame.to_timestamp().reindex(new_index, fill_value=3.0)
2159+
result = frame.resample('1H', kind='timestamp').asfreq(fill_value=3.0)
2160+
assert_frame_equal(result, expected)
2161+
21252162
def test_selection(self):
21262163
index = self.create_series().index
21272164
# This is a bug, these should be implemented

0 commit comments

Comments
 (0)