diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ce63cb2473bc4..1f5948649c5e2 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1061,7 +1061,7 @@ Indexing - Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (issue:`19726`) - Bug in ``Index`` subclasses constructors that ignore unexpected keyword arguments (:issue:`19348`) - Bug in :meth:`Index.difference` when taking difference of an ``Index`` with itself (:issue:`20040`) - +- Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` in presence of entire rows of NaNs in the middle of values (:issue:`20499`). MultiIndex ^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9626079660771..35f3a7c20e270 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5015,31 +5015,6 @@ def update(self, other, join='left', overwrite=True, filter_func=None, self[col] = expressions.where(mask, this, that) - # ---------------------------------------------------------------------- - # Misc methods - - def _get_valid_indices(self): - is_valid = self.count(1) > 0 - return self.index[is_valid] - - @Appender(_shared_docs['valid_index'] % { - 'position': 'first', 'klass': 'DataFrame'}) - def first_valid_index(self): - if len(self) == 0: - return None - - valid_indices = self._get_valid_indices() - return valid_indices[0] if len(valid_indices) else None - - @Appender(_shared_docs['valid_index'] % { - 'position': 'last', 'klass': 'DataFrame'}) - def last_valid_index(self): - if len(self) == 0: - return None - - valid_indices = self._get_valid_indices() - return valid_indices[-1] if len(valid_indices) else None - # ---------------------------------------------------------------------- # Data reshaping diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d5cd22732f0a9..1931875799c73 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8763,6 +8763,51 @@ def transform(self, func, *args, **kwargs): scalar : type of index """ + def _find_valid_index(self, how): + """Retrieves the index of the first valid value. + + Parameters + ---------- + how : {'first', 'last'} + Use this parameter to change between the first or last valid index. + + Returns + ------- + idx_first_valid : type of index + """ + assert how in ['first', 'last'] + + if len(self) == 0: # early stop + return None + is_valid = ~self.isna() + + if self.ndim == 2: + is_valid = is_valid.any(1) # reduce axis 1 + + if how == 'first': + # First valid value case + i = is_valid.idxmax() + if not is_valid[i]: + return None + return i + + elif how == 'last': + # Last valid value case + i = is_valid.values[::-1].argmax() + if not is_valid.iat[len(self) - i - 1]: + return None + return self.index[len(self) - i - 1] + + @Appender(_shared_docs['valid_index'] % {'position': 'first', + 'klass': 'NDFrame'}) + def first_valid_index(self): + return self._find_valid_index('first') + + @Appender(_shared_docs['valid_index'] % {'position': 'last', + 'klass': 'NDFrame'}) + def last_valid_index(self): + return self._find_valid_index('last') + def _doc_parms(cls): """Return a tuple of the doc parms.""" diff --git a/pandas/core/series.py b/pandas/core/series.py index f3630dc43fbd1..808ac5e721fc8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3887,32 +3887,6 @@ def valid(self, inplace=False, **kwargs): "Use .dropna instead.", FutureWarning, stacklevel=2) return self.dropna(inplace=inplace, **kwargs) - @Appender(generic._shared_docs['valid_index'] % { - 'position': 'first', 'klass': 'Series'}) - def first_valid_index(self): - if len(self) == 0: - return None - - mask = isna(self._values) - i = mask.argmin() - if mask[i]: - return None - else: - return self.index[i] - - @Appender(generic._shared_docs['valid_index'] % { - 'position': 'last', 'klass': 'Series'}) - def last_valid_index(self): - if len(self) == 0: - return None - - mask = isna(self._values[::-1]) - i = mask.argmin() - if mask[i]: - return None - else: - return self.index[len(self) - i - 1] - # ---------------------------------------------------------------------- # Time series-oriented methods diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index ceb6c942c81b1..277c3c9bc5c23 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -530,6 +530,15 @@ def test_first_last_valid(self): assert frame.last_valid_index() is None assert frame.first_valid_index() is None + # GH20499: its preserves freq with holes + frame.index = date_range("20110101", periods=N, freq="B") + frame.iloc[1] = 1 + frame.iloc[-2] = 1 + assert frame.first_valid_index() == frame.index[1] + assert frame.last_valid_index() == frame.index[-2] + assert frame.first_valid_index().freq == frame.index.freq + assert frame.last_valid_index().freq == frame.index.freq + def test_at_time_frame(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index baf2619c7b022..8e537b137baaf 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -432,6 +432,15 @@ def test_first_last_valid(self): assert empty.last_valid_index() is None assert empty.first_valid_index() is None + # GH20499: its preserves freq with holes + ts.index = date_range("20110101", periods=len(ts), freq="B") + ts.iloc[1] = 1 + ts.iloc[-2] = 1 + assert ts.first_valid_index() == ts.index[1] + assert ts.last_valid_index() == ts.index[-2] + assert ts.first_valid_index().freq == ts.index.freq + assert ts.last_valid_index().freq == ts.index.freq + def test_mpl_compat_hack(self): result = self.ts[:, np.newaxis] expected = self.ts.values[:, np.newaxis]