-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: add fill_value to resample #14591
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4072,12 +4072,17 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, | |
sort=sort, group_keys=group_keys, squeeze=squeeze, | ||
**kwargs) | ||
|
||
def asfreq(self, freq, method=None, how=None, normalize=False): | ||
def asfreq(self, freq, method=None, how=None, normalize=False, | ||
fill_value=None): | ||
""" | ||
Convert TimeSeries to specified frequency. | ||
|
||
Optionally provide filling method to pad/backfill missing values. | ||
|
||
Returns the original data conformed to a new index with the specified | ||
frequency. ``resample`` is more appropriate if an operation, such as | ||
summarization, is necessary to represent the data at the new frequency. | ||
|
||
Parameters | ||
---------- | ||
freq : DateOffset object, or string | ||
|
@@ -4092,18 +4097,79 @@ def asfreq(self, freq, method=None, how=None, normalize=False): | |
For PeriodIndex only, see PeriodIndex.asfreq | ||
normalize : bool, default False | ||
Whether to reset output index to midnight | ||
fill_value: scalar, optional | ||
Value to use for missing values, applied during upsampling (note | ||
this does not fill NaNs that already were present). | ||
|
||
.. versionadded:: 0.20.0 | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add some Examples here |
||
Returns | ||
------- | ||
converted : type of caller | ||
|
||
Examples | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice! |
||
-------- | ||
|
||
Start by creating a series with 4 one minute timestamps. | ||
|
||
>>> index = pd.date_range('1/1/2000', periods=4, freq='T') | ||
>>> series = pd.Series([0.0, None, 2.0, 3.0], index=index) | ||
>>> df = pd.DataFrame({'s':series}) | ||
>>> df | ||
s | ||
2000-01-01 00:00:00 0.0 | ||
2000-01-01 00:01:00 NaN | ||
2000-01-01 00:02:00 2.0 | ||
2000-01-01 00:03:00 3.0 | ||
|
||
Upsample the series into 30 second bins. | ||
|
||
>>> df.asfreq(freq='30S') | ||
s | ||
2000-01-01 00:00:00 0.0 | ||
2000-01-01 00:00:30 NaN | ||
2000-01-01 00:01:00 NaN | ||
2000-01-01 00:01:30 NaN | ||
2000-01-01 00:02:00 2.0 | ||
2000-01-01 00:02:30 NaN | ||
2000-01-01 00:03:00 3.0 | ||
|
||
Upsample again, providing a ``fill value``. | ||
|
||
>>> df.asfreq(freq='30S', fill_value=9.0) | ||
s | ||
2000-01-01 00:00:00 0.0 | ||
2000-01-01 00:00:30 9.0 | ||
2000-01-01 00:01:00 NaN | ||
2000-01-01 00:01:30 9.0 | ||
2000-01-01 00:02:00 2.0 | ||
2000-01-01 00:02:30 9.0 | ||
2000-01-01 00:03:00 3.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Really nice example, thanks! |
||
|
||
Upsample again, providing a ``method``. | ||
|
||
>>> df.asfreq(freq='30S', method='bfill') | ||
s | ||
2000-01-01 00:00:00 0.0 | ||
2000-01-01 00:00:30 NaN | ||
2000-01-01 00:01:00 NaN | ||
2000-01-01 00:01:30 2.0 | ||
2000-01-01 00:02:00 2.0 | ||
2000-01-01 00:02:30 3.0 | ||
2000-01-01 00:03:00 3.0 | ||
|
||
See Also | ||
-------- | ||
reindex | ||
|
||
Notes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a See Also to |
||
----- | ||
To learn more about the frequency strings, please see `this link | ||
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__. | ||
""" | ||
from pandas.tseries.resample import asfreq | ||
return asfreq(self, freq, method=method, how=how, normalize=normalize) | ||
return asfreq(self, freq, method=method, how=how, normalize=normalize, | ||
fill_value=fill_value) | ||
|
||
def at_time(self, time, asof=False): | ||
""" | ||
|
@@ -4184,9 +4250,6 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, | |
|
||
.. versionadded:: 0.19.0 | ||
|
||
Notes | ||
----- | ||
|
||
To learn more about the offset strings, please see `this link | ||
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__. | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -353,7 +353,7 @@ def transform(self, arg, *args, **kwargs): | |
def _downsample(self, f): | ||
raise AbstractMethodError(self) | ||
|
||
def _upsample(self, f, limit=None): | ||
def _upsample(self, f, limit=None, fill_value=None): | ||
raise AbstractMethodError(self) | ||
|
||
def _gotitem(self, key, ndim, subset=None): | ||
|
@@ -509,12 +509,25 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, | |
limit_direction=limit_direction, | ||
downcast=downcast, **kwargs) | ||
|
||
def asfreq(self): | ||
def asfreq(self, fill_value=None): | ||
""" | ||
return the values at the new freq, | ||
essentially a reindex with (no filling) | ||
essentially a reindex | ||
|
||
Parameters | ||
---------- | ||
fill_value: scalar, optional | ||
Value to use for missing values, applied during upsampling (note | ||
this does not fill NaNs that already were present). | ||
|
||
.. versionadded:: 0.20.0 | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a See Also to Series.asfreq/DataFrame.asfreq |
||
See Also | ||
-------- | ||
Series.asfreq | ||
DataFrame.asfreq | ||
""" | ||
return self._upsample('asfreq') | ||
return self._upsample('asfreq', fill_value=fill_value) | ||
|
||
def std(self, ddof=1, *args, **kwargs): | ||
""" | ||
|
@@ -713,12 +726,14 @@ def _adjust_binner_for_upsample(self, binner): | |
binner = binner[:-1] | ||
return binner | ||
|
||
def _upsample(self, method, limit=None): | ||
def _upsample(self, method, limit=None, fill_value=None): | ||
""" | ||
method : string {'backfill', 'bfill', 'pad', | ||
'ffill', 'asfreq'} method for upsampling | ||
limit : int, default None | ||
Maximum size gap to fill when reindexing | ||
fill_value : scalar, default None | ||
Value to use for missing values | ||
|
||
See also | ||
-------- | ||
|
@@ -745,7 +760,7 @@ def _upsample(self, method, limit=None): | |
result.index = res_index | ||
else: | ||
result = obj.reindex(res_index, method=method, | ||
limit=limit) | ||
limit=limit, fill_value=fill_value) | ||
|
||
return self._wrap_result(result) | ||
|
||
|
@@ -865,12 +880,14 @@ def _downsample(self, how, **kwargs): | |
'Frequency {} cannot be resampled to {}, as they are not ' | ||
'sub or super periods'.format(ax.freq, self.freq)) | ||
|
||
def _upsample(self, method, limit=None): | ||
def _upsample(self, method, limit=None, fill_value=None): | ||
""" | ||
method : string {'backfill', 'bfill', 'pad', 'ffill'} | ||
method for upsampling | ||
limit : int, default None | ||
Maximum size gap to fill when reindexing | ||
fill_value : scalar, default None | ||
Value to use for missing values | ||
|
||
See also | ||
-------- | ||
|
@@ -884,8 +901,8 @@ def _upsample(self, method, limit=None): | |
" datetime-like") | ||
# we may need to actually resample as if we are timestamps | ||
if self.kind == 'timestamp': | ||
return super(PeriodIndexResampler, self)._upsample(method, | ||
limit=limit) | ||
return super(PeriodIndexResampler, self)._upsample( | ||
method, limit=limit, fill_value=fill_value) | ||
|
||
ax = self.ax | ||
obj = self.obj | ||
|
@@ -1346,7 +1363,7 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0): | |
Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst)) | ||
|
||
|
||
def asfreq(obj, freq, method=None, how=None, normalize=False): | ||
def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None): | ||
""" | ||
Utility frequency conversion method for Series/DataFrame | ||
""" | ||
|
@@ -1366,7 +1383,7 @@ def asfreq(obj, freq, method=None, how=None, normalize=False): | |
return obj.copy() | ||
dti = date_range(obj.index[0], obj.index[-1], freq=freq) | ||
dti.name = obj.index.name | ||
rs = obj.reindex(dti, method=method) | ||
rs = obj.reindex(dti, method=method, fill_value=fill_value) | ||
if normalize: | ||
rs.index = rs.index.normalize() | ||
return rs |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you add a sentence or 2 on the use / diffs of .asfreq() / resample.