diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6ca8f6731bbb8..a0886eb431882 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6495,40 +6495,98 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, def asof(self, where, subset=None): """ - The last row without any NaN is taken (or the last row without - NaN considering only the subset of columns in the case of a DataFrame) + Return the last row(s) without any `NaN`s before `where`. + + The last row (for each element in `where`, if list) without any + `NaN` is taken. + In case of a :class:`~pandas.DataFrame`, the last row without `NaN` + considering only the subset of columns (if not `None`) .. versionadded:: 0.19.0 For DataFrame - If there is no good value, NaN is returned for a Series + If there is no good value, `NaN` is returned for a Series or a Series of NaN values for a DataFrame Parameters ---------- - where : date or array of dates - subset : string or list of strings, default None - if not None use these columns for NaN propagation + where : date or array-like of dates + Date(s) before which the last row(s) are returned. + subset : str or array-like of str, default `None` + For DataFrame, if not `None`, only use these columns to + check for `NaN`s. Notes ----- - Dates are assumed to be sorted - Raises if this is not the case + Dates are assumed to be sorted. Raises if this is not the case. Returns ------- - where is scalar - - - value or NaN if input is Series - - Series if input is DataFrame + scalar, Series, or DataFrame - where is Index: same shape object as input + * scalar : when `self` is a Series and `where` is a scalar + * Series: when `self` is a Series and `where` is an array-like, + or when `self` is a DataFrame and `where` is a scalar + * DataFrame : when `self` is a DataFrame and `where` is an + array-like See Also -------- - merge_asof + merge_asof : Perform an asof merge. Similar to left join. - """ + Examples + -------- + A Series and a scalar `where`. + + >>> s = pd.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40]) + >>> s + 10 1.0 + 20 2.0 + 30 NaN + 40 4.0 + dtype: float64 + + >>> s.asof(20) + 2.0 + For a sequence `where`, a Series is returned. The first value is + ``NaN``, because the first element of `where` is before the first + index value. + + >>> s.asof([5, 20]) + 5 NaN + 20 2.0 + dtype: float64 + + Missing values are not considered. The following is ``2.0``, not + ``NaN``, even though ``NaN`` is at the index location for ``30``. + + >>> s.asof(30) + 2.0 + + Take all columns into consideration + + >>> df = pd.DataFrame({'a': [10, 20, 30, 40, 50], + ... 'b': [None, None, None, None, 500]}, + ... index=pd.DatetimeIndex(['2018-02-27 09:01:00', + ... '2018-02-27 09:02:00', + ... '2018-02-27 09:03:00', + ... '2018-02-27 09:04:00', + ... '2018-02-27 09:05:00'])) + >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', + ... '2018-02-27 09:04:30'])) + a b + 2018-02-27 09:03:30 NaN NaN + 2018-02-27 09:04:30 NaN NaN + + Take a single column into consideration + + >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', + ... '2018-02-27 09:04:30']), + ... subset=['a']) + a b + 2018-02-27 09:03:30 30.0 NaN + 2018-02-27 09:04:30 40.0 NaN + """ if isinstance(where, compat.string_types): from pandas import to_datetime where = to_datetime(where)