Skip to content

ENH: add fill_value to resample #14591

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,8 @@ Other enhancements
- ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`)
- ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)

- ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`)

- ``DataFrame.asfreq()`` now accepts a ``fill_value`` option to fill missing values during resampling (:issue:`3715`).

.. _whatsnew_0200.api_breaking:

Expand Down
73 changes: 68 additions & 5 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4072,12 +4072,17 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
sort=sort, group_keys=group_keys, squeeze=squeeze,
**kwargs)

def asfreq(self, freq, method=None, how=None, normalize=False):
def asfreq(self, freq, method=None, how=None, normalize=False,
fill_value=None):
"""
Convert TimeSeries to specified frequency.

Optionally provide filling method to pad/backfill missing values.

Returns the original data conformed to a new index with the specified
frequency. ``resample`` is more appropriate if an operation, such as
summarization, is necessary to represent the data at the new frequency.

Parameters
----------
freq : DateOffset object, or string
Expand All @@ -4092,18 +4097,79 @@ def asfreq(self, freq, method=None, how=None, normalize=False):
For PeriodIndex only, see PeriodIndex.asfreq
normalize : bool, default False
Whether to reset output index to midnight
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a sentence or 2 on the use / diffs of .asfreq() / resample.

fill_value: scalar, optional
Value to use for missing values, applied during upsampling (note
this does not fill NaNs that already were present).

.. versionadded:: 0.20.0

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add some Examples here

Returns
-------
converted : type of caller

Examples
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice!

--------

Start by creating a series with 4 one minute timestamps.

>>> index = pd.date_range('1/1/2000', periods=4, freq='T')
>>> series = pd.Series([0.0, None, 2.0, 3.0], index=index)
>>> df = pd.DataFrame({'s':series})
>>> df
s
2000-01-01 00:00:00 0.0
2000-01-01 00:01:00 NaN
2000-01-01 00:02:00 2.0
2000-01-01 00:03:00 3.0

Upsample the series into 30 second bins.

>>> df.asfreq(freq='30S')
s
2000-01-01 00:00:00 0.0
2000-01-01 00:00:30 NaN
2000-01-01 00:01:00 NaN
2000-01-01 00:01:30 NaN
2000-01-01 00:02:00 2.0
2000-01-01 00:02:30 NaN
2000-01-01 00:03:00 3.0

Upsample again, providing a ``fill value``.

>>> df.asfreq(freq='30S', fill_value=9.0)
s
2000-01-01 00:00:00 0.0
2000-01-01 00:00:30 9.0
2000-01-01 00:01:00 NaN
2000-01-01 00:01:30 9.0
2000-01-01 00:02:00 2.0
2000-01-01 00:02:30 9.0
2000-01-01 00:03:00 3.0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really nice example, thanks!
Can you add the output using the existing method keyword (also for filling NaNs) with this example as well? (pick one of the possible values for the keyword)


Upsample again, providing a ``method``.

>>> df.asfreq(freq='30S', method='bfill')
s
2000-01-01 00:00:00 0.0
2000-01-01 00:00:30 NaN
2000-01-01 00:01:00 NaN
2000-01-01 00:01:30 2.0
2000-01-01 00:02:00 2.0
2000-01-01 00:02:30 3.0
2000-01-01 00:03:00 3.0

See Also
--------
reindex

Notes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a See Also to .reindex (which has fill_value)

-----
To learn more about the frequency strings, please see `this link
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
"""
from pandas.tseries.resample import asfreq
return asfreq(self, freq, method=method, how=how, normalize=normalize)
return asfreq(self, freq, method=method, how=how, normalize=normalize,
fill_value=fill_value)

def at_time(self, time, asof=False):
"""
Expand Down Expand Up @@ -4184,9 +4250,6 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,

.. versionadded:: 0.19.0

Notes
-----

To learn more about the offset strings, please see `this link
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.

Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/frame/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,26 @@ def test_asfreq_datetimeindex(self):
ts = df['A'].asfreq('B')
tm.assertIsInstance(ts.index, DatetimeIndex)

def test_asfreq_fillvalue(self):
# test for fill value during upsampling, related to issue 3715

# setup
rng = pd.date_range('1/1/2016', periods=10, freq='2S')
ts = pd.Series(np.arange(len(rng)), index=rng)
df = pd.DataFrame({'one': ts})

# insert pre-existing missing value
df.loc['2016-01-01 00:00:08', 'one'] = None

actual_df = df.asfreq(freq='1S', fill_value=9.0)
expected_df = df.asfreq(freq='1S').fillna(9.0)
expected_df.loc['2016-01-01 00:00:08', 'one'] = None
assert_frame_equal(expected_df, actual_df)

expected_series = ts.asfreq(freq='1S').fillna(9.0)
actual_series = ts.asfreq(freq='1S', fill_value=9.0)
assert_series_equal(expected_series, actual_series)

def test_first_last_valid(self):
N = len(self.frame.index)
mat = randn(N)
Expand Down
39 changes: 28 additions & 11 deletions pandas/tseries/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ def transform(self, arg, *args, **kwargs):
def _downsample(self, f):
raise AbstractMethodError(self)

def _upsample(self, f, limit=None):
def _upsample(self, f, limit=None, fill_value=None):
raise AbstractMethodError(self)

def _gotitem(self, key, ndim, subset=None):
Expand Down Expand Up @@ -509,12 +509,25 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
limit_direction=limit_direction,
downcast=downcast, **kwargs)

def asfreq(self):
def asfreq(self, fill_value=None):
"""
return the values at the new freq,
essentially a reindex with (no filling)
essentially a reindex

Parameters
----------
fill_value: scalar, optional
Value to use for missing values, applied during upsampling (note
this does not fill NaNs that already were present).

.. versionadded:: 0.20.0

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a See Also to Series.asfreq/DataFrame.asfreq

See Also
--------
Series.asfreq
DataFrame.asfreq
"""
return self._upsample('asfreq')
return self._upsample('asfreq', fill_value=fill_value)

def std(self, ddof=1, *args, **kwargs):
"""
Expand Down Expand Up @@ -713,12 +726,14 @@ def _adjust_binner_for_upsample(self, binner):
binner = binner[:-1]
return binner

def _upsample(self, method, limit=None):
def _upsample(self, method, limit=None, fill_value=None):
"""
method : string {'backfill', 'bfill', 'pad',
'ffill', 'asfreq'} method for upsampling
limit : int, default None
Maximum size gap to fill when reindexing
fill_value : scalar, default None
Value to use for missing values

See also
--------
Expand All @@ -745,7 +760,7 @@ def _upsample(self, method, limit=None):
result.index = res_index
else:
result = obj.reindex(res_index, method=method,
limit=limit)
limit=limit, fill_value=fill_value)

return self._wrap_result(result)

Expand Down Expand Up @@ -865,12 +880,14 @@ def _downsample(self, how, **kwargs):
'Frequency {} cannot be resampled to {}, as they are not '
'sub or super periods'.format(ax.freq, self.freq))

def _upsample(self, method, limit=None):
def _upsample(self, method, limit=None, fill_value=None):
"""
method : string {'backfill', 'bfill', 'pad', 'ffill'}
method for upsampling
limit : int, default None
Maximum size gap to fill when reindexing
fill_value : scalar, default None
Value to use for missing values

See also
--------
Expand All @@ -884,8 +901,8 @@ def _upsample(self, method, limit=None):
" datetime-like")
# we may need to actually resample as if we are timestamps
if self.kind == 'timestamp':
return super(PeriodIndexResampler, self)._upsample(method,
limit=limit)
return super(PeriodIndexResampler, self)._upsample(
method, limit=limit, fill_value=fill_value)

ax = self.ax
obj = self.obj
Expand Down Expand Up @@ -1346,7 +1363,7 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst))


def asfreq(obj, freq, method=None, how=None, normalize=False):
def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None):
"""
Utility frequency conversion method for Series/DataFrame
"""
Expand All @@ -1366,7 +1383,7 @@ def asfreq(obj, freq, method=None, how=None, normalize=False):
return obj.copy()
dti = date_range(obj.index[0], obj.index[-1], freq=freq)
dti.name = obj.index.name
rs = obj.reindex(dti, method=method)
rs = obj.reindex(dti, method=method, fill_value=fill_value)
if normalize:
rs.index = rs.index.normalize()
return rs
37 changes: 37 additions & 0 deletions pandas/tseries/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,24 @@ def test_asfreq_upsample(self):
expected = frame.reindex(new_index)
assert_frame_equal(result, expected)

def test_asfreq_fill_value(self):
# test for fill value during resampling, issue 3715

s = self.create_series()

result = s.resample('1H').asfreq()
new_index = self.create_index(s.index[0], s.index[-1], freq='1H')
expected = s.reindex(new_index)
assert_series_equal(result, expected)

frame = s.to_frame('value')
frame.iloc[1] = None
result = frame.resample('1H').asfreq(fill_value=4.0)
new_index = self.create_index(frame.index[0],
frame.index[-1], freq='1H')
expected = frame.reindex(new_index, fill_value=4.0)
assert_frame_equal(result, expected)

def test_resample_interpolate(self):
# # 12925
df = self.create_series().to_frame('value')
Expand Down Expand Up @@ -2159,6 +2177,25 @@ def test_asfreq_upsample(self):
result = frame.resample('1H').asfreq()
assert_frame_equal(result, expected)

def test_asfreq_fill_value(self):
# test for fill value during resampling, issue 3715

s = self.create_series()
new_index = date_range(s.index[0].to_timestamp(how='start'),
(s.index[-1]).to_timestamp(how='start'),
freq='1H')
expected = s.to_timestamp().reindex(new_index, fill_value=4.0)
result = s.resample('1H', kind='timestamp').asfreq(fill_value=4.0)
assert_series_equal(result, expected)

frame = s.to_frame('value')
new_index = date_range(frame.index[0].to_timestamp(how='start'),
(frame.index[-1]).to_timestamp(how='start'),
freq='1H')
expected = frame.to_timestamp().reindex(new_index, fill_value=3.0)
result = frame.resample('1H', kind='timestamp').asfreq(fill_value=3.0)
assert_frame_equal(result, expected)

def test_selection(self):
index = self.create_series().index
# This is a bug, these should be implemented
Expand Down