diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 4ce7e6d2cd3b8..f123cb9071b4a 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -133,9 +133,8 @@ Other enhancements - ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) - ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) - - ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`) - +- ``DataFrame.asfreq()`` now accepts a ``fill_value`` option to fill missing values during resampling (:issue:`3715`). .. _whatsnew_0200.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0b5767da74cad..071dcf7a46517 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4072,12 +4072,17 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, sort=sort, group_keys=group_keys, squeeze=squeeze, **kwargs) - def asfreq(self, freq, method=None, how=None, normalize=False): + def asfreq(self, freq, method=None, how=None, normalize=False, + fill_value=None): """ Convert TimeSeries to specified frequency. Optionally provide filling method to pad/backfill missing values. + Returns the original data conformed to a new index with the specified + frequency. ``resample`` is more appropriate if an operation, such as + summarization, is necessary to represent the data at the new frequency. + Parameters ---------- freq : DateOffset object, or string @@ -4092,18 +4097,79 @@ def asfreq(self, freq, method=None, how=None, normalize=False): For PeriodIndex only, see PeriodIndex.asfreq normalize : bool, default False Whether to reset output index to midnight + fill_value: scalar, optional + Value to use for missing values, applied during upsampling (note + this does not fill NaNs that already were present). + + .. versionadded:: 0.20.0 Returns ------- converted : type of caller + Examples + -------- + + Start by creating a series with 4 one minute timestamps. + + >>> index = pd.date_range('1/1/2000', periods=4, freq='T') + >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index) + >>> df = pd.DataFrame({'s':series}) + >>> df + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:01:00 NaN + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:03:00 3.0 + + Upsample the series into 30 second bins. + + >>> df.asfreq(freq='30S') + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 NaN + 2000-01-01 00:01:00 NaN + 2000-01-01 00:01:30 NaN + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:02:30 NaN + 2000-01-01 00:03:00 3.0 + + Upsample again, providing a ``fill value``. + + >>> df.asfreq(freq='30S', fill_value=9.0) + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 9.0 + 2000-01-01 00:01:00 NaN + 2000-01-01 00:01:30 9.0 + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:02:30 9.0 + 2000-01-01 00:03:00 3.0 + + Upsample again, providing a ``method``. + + >>> df.asfreq(freq='30S', method='bfill') + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 NaN + 2000-01-01 00:01:00 NaN + 2000-01-01 00:01:30 2.0 + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:02:30 3.0 + 2000-01-01 00:03:00 3.0 + + See Also + -------- + reindex + Notes ----- To learn more about the frequency strings, please see `this link `__. """ from pandas.tseries.resample import asfreq - return asfreq(self, freq, method=method, how=how, normalize=normalize) + return asfreq(self, freq, method=method, how=how, normalize=normalize, + fill_value=fill_value) def at_time(self, time, asof=False): """ @@ -4184,9 +4250,6 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None, .. versionadded:: 0.19.0 - Notes - ----- - To learn more about the offset strings, please see `this link `__. diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index c6c3b4f43b55a..889d31c39820f 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -323,6 +323,26 @@ def test_asfreq_datetimeindex(self): ts = df['A'].asfreq('B') tm.assertIsInstance(ts.index, DatetimeIndex) + def test_asfreq_fillvalue(self): + # test for fill value during upsampling, related to issue 3715 + + # setup + rng = pd.date_range('1/1/2016', periods=10, freq='2S') + ts = pd.Series(np.arange(len(rng)), index=rng) + df = pd.DataFrame({'one': ts}) + + # insert pre-existing missing value + df.loc['2016-01-01 00:00:08', 'one'] = None + + actual_df = df.asfreq(freq='1S', fill_value=9.0) + expected_df = df.asfreq(freq='1S').fillna(9.0) + expected_df.loc['2016-01-01 00:00:08', 'one'] = None + assert_frame_equal(expected_df, actual_df) + + expected_series = ts.asfreq(freq='1S').fillna(9.0) + actual_series = ts.asfreq(freq='1S', fill_value=9.0) + assert_series_equal(expected_series, actual_series) + def test_first_last_valid(self): N = len(self.frame.index) mat = randn(N) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index e6d500144fa44..e93e5637099c1 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -353,7 +353,7 @@ def transform(self, arg, *args, **kwargs): def _downsample(self, f): raise AbstractMethodError(self) - def _upsample(self, f, limit=None): + def _upsample(self, f, limit=None, fill_value=None): raise AbstractMethodError(self) def _gotitem(self, key, ndim, subset=None): @@ -509,12 +509,25 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, limit_direction=limit_direction, downcast=downcast, **kwargs) - def asfreq(self): + def asfreq(self, fill_value=None): """ return the values at the new freq, - essentially a reindex with (no filling) + essentially a reindex + + Parameters + ---------- + fill_value: scalar, optional + Value to use for missing values, applied during upsampling (note + this does not fill NaNs that already were present). + + .. versionadded:: 0.20.0 + + See Also + -------- + Series.asfreq + DataFrame.asfreq """ - return self._upsample('asfreq') + return self._upsample('asfreq', fill_value=fill_value) def std(self, ddof=1, *args, **kwargs): """ @@ -713,12 +726,14 @@ def _adjust_binner_for_upsample(self, binner): binner = binner[:-1] return binner - def _upsample(self, method, limit=None): + def _upsample(self, method, limit=None, fill_value=None): """ method : string {'backfill', 'bfill', 'pad', 'ffill', 'asfreq'} method for upsampling limit : int, default None Maximum size gap to fill when reindexing + fill_value : scalar, default None + Value to use for missing values See also -------- @@ -745,7 +760,7 @@ def _upsample(self, method, limit=None): result.index = res_index else: result = obj.reindex(res_index, method=method, - limit=limit) + limit=limit, fill_value=fill_value) return self._wrap_result(result) @@ -865,12 +880,14 @@ def _downsample(self, how, **kwargs): 'Frequency {} cannot be resampled to {}, as they are not ' 'sub or super periods'.format(ax.freq, self.freq)) - def _upsample(self, method, limit=None): + def _upsample(self, method, limit=None, fill_value=None): """ method : string {'backfill', 'bfill', 'pad', 'ffill'} method for upsampling limit : int, default None Maximum size gap to fill when reindexing + fill_value : scalar, default None + Value to use for missing values See also -------- @@ -884,8 +901,8 @@ def _upsample(self, method, limit=None): " datetime-like") # we may need to actually resample as if we are timestamps if self.kind == 'timestamp': - return super(PeriodIndexResampler, self)._upsample(method, - limit=limit) + return super(PeriodIndexResampler, self)._upsample( + method, limit=limit, fill_value=fill_value) ax = self.ax obj = self.obj @@ -1346,7 +1363,7 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0): Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst)) -def asfreq(obj, freq, method=None, how=None, normalize=False): +def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None): """ Utility frequency conversion method for Series/DataFrame """ @@ -1366,7 +1383,7 @@ def asfreq(obj, freq, method=None, how=None, normalize=False): return obj.copy() dti = date_range(obj.index[0], obj.index[-1], freq=freq) dti.name = obj.index.name - rs = obj.reindex(dti, method=method) + rs = obj.reindex(dti, method=method, fill_value=fill_value) if normalize: rs.index = rs.index.normalize() return rs diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 26c311b4a72f8..05e372c79968f 100755 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -693,6 +693,24 @@ def test_asfreq_upsample(self): expected = frame.reindex(new_index) assert_frame_equal(result, expected) + def test_asfreq_fill_value(self): + # test for fill value during resampling, issue 3715 + + s = self.create_series() + + result = s.resample('1H').asfreq() + new_index = self.create_index(s.index[0], s.index[-1], freq='1H') + expected = s.reindex(new_index) + assert_series_equal(result, expected) + + frame = s.to_frame('value') + frame.iloc[1] = None + result = frame.resample('1H').asfreq(fill_value=4.0) + new_index = self.create_index(frame.index[0], + frame.index[-1], freq='1H') + expected = frame.reindex(new_index, fill_value=4.0) + assert_frame_equal(result, expected) + def test_resample_interpolate(self): # # 12925 df = self.create_series().to_frame('value') @@ -2159,6 +2177,25 @@ def test_asfreq_upsample(self): result = frame.resample('1H').asfreq() assert_frame_equal(result, expected) + def test_asfreq_fill_value(self): + # test for fill value during resampling, issue 3715 + + s = self.create_series() + new_index = date_range(s.index[0].to_timestamp(how='start'), + (s.index[-1]).to_timestamp(how='start'), + freq='1H') + expected = s.to_timestamp().reindex(new_index, fill_value=4.0) + result = s.resample('1H', kind='timestamp').asfreq(fill_value=4.0) + assert_series_equal(result, expected) + + frame = s.to_frame('value') + new_index = date_range(frame.index[0].to_timestamp(how='start'), + (frame.index[-1]).to_timestamp(how='start'), + freq='1H') + expected = frame.to_timestamp().reindex(new_index, fill_value=3.0) + result = frame.resample('1H', kind='timestamp').asfreq(fill_value=3.0) + assert_frame_equal(result, expected) + def test_selection(self): index = self.create_series().index # This is a bug, these should be implemented