From 144de64656aaba304e7ab24b5942b68c265cd2a6 Mon Sep 17 00:00:00 2001 From: Shota Kawabuchi Date: Sun, 10 Sep 2017 14:39:23 +0900 Subject: [PATCH 1/2] BUG: DataFrame.first_valid_index() fails if there is no valid entry. Also fixed same issue on DataFrame.last_valid_index(). Add docstrings to both methods of DataFrame and Series. Closes #17400 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/frame.py | 82 +++++++++++++++++++++++++-- pandas/core/series.py | 62 +++++++++++++++++++- pandas/tests/frame/test_timeseries.py | 5 ++ 4 files changed, 144 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f50052347cfb5..53bd6a153b5c4 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -399,6 +399,7 @@ Indexing - Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`) - Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`) - Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`) +- Bug in ``DataFrame.first_valid_index`` and ``DataFrame.last_valid_index`` when no valid entry (:issue:`17400`) I/O ^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5991ec825c841..15e81ac46aa28 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4069,23 +4069,97 @@ def update(self, other, join='left', overwrite=True, filter_func=None, # ---------------------------------------------------------------------- # Misc methods + def _get_valid_indices(self): + is_valid = self.count(1) > 0 + return self.index[is_valid] + def first_valid_index(self): """ - Return label for first non-NA/null value + Return index for first non-NA/null value. + If all elements are non-NA/null, returns None. + Also returns None for empty DataFrame. + + Examples + -------- + + When no null value in a DataFrame, returns first index. + + >>> df = DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']}) + >>> df.first_valid_index() + 0 + + When all elements in first row are null, returns second index. + + >>> df = DataFrame({'A': [None, 2, 3], 'B': [None, 'b', 'f']}) + >>> df.first_valid_index() + 1 + + When only part of elements in first row are null, recognized as valid. + + >>> df = DataFrame({'A': [1, 2, 3], 'B': [None, 'b', 'f']}) + >>> df.first_valid_index() + 0 + + When all elements in a dataframe are null, returns None + + >>> df = DataFrame({'A': [None, None, None], 'B': [None, None, None]}) + >>> df.first_valid_index() + + Returns None for empty DataFrame + + >>> df = DataFrame() + >>> df.first_valid_index() + """ if len(self) == 0: return None - return self.index[self.count(1) > 0][0] + valid_indices = self._get_valid_indices() + return valid_indices[0] if len(valid_indices) else None def last_valid_index(self): """ - Return label for last non-NA/null value + Return index for last non-NA/null value. + If all elements are non-NA/null, returns None. + Also returns None for empty DataFrame. + + Examples + -------- + + When no null value in a DataFrame, returns last index. + + >>> df = DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']}) + >>> df.last_valid_index() + 2 + + When all elements in last row are null, returns second from last index. + + >>> df = DataFrame({'A': [1, 2, None], 'B': ['a', 'b', None]}) + >>> df.last_valid_index() + 1 + + When only part of elements in last row are null, recognized as valid. + + >>> df = DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', None]}) + >>> df.last_valid_index() + 2 + + When all elements in a dataframe are null, returns None + + >>> df = DataFrame({'A': [None, None, None], 'B': [None, None, None]}) + >>> df.last_valid_index() + + Returns None for empty DataFrame + + >>> df = DataFrame() + >>> df.last_valid_index() + """ if len(self) == 0: return None - return self.index[self.count(1) > 0][-1] + valid_indices = self._get_valid_indices() + return valid_indices[-1] if len(valid_indices) else None # ---------------------------------------------------------------------- # Data reshaping diff --git a/pandas/core/series.py b/pandas/core/series.py index 6905fc1aced74..cd53ca67b0e75 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2840,7 +2840,36 @@ def dropna(self, axis=0, inplace=False, **kwargs): def first_valid_index(self): """ - Return label for first non-NA/null value + Return index for first non-NA/null value. + If all elements are non-NA/null, returns None. + Also returns None for empty Series. + + Examples + -------- + + When no null value in a Series, returns first index. + + >>> s = Series([3, 4, 5]) + >>> s.first_valid_index() + 0 + + When first element is null, returns second index. + + >>> s = Series([None, 4, 5]) + >>> s.first_valid_index() + 1 + + When all elements are null, returns None + + >>> s = Series([None, None, None]) + >>> s.first_valid_index() + + + Returns None for empty Series + + >>> s = Series() + >>> s.first_valid_index() + """ if len(self) == 0: return None @@ -2854,7 +2883,36 @@ def first_valid_index(self): def last_valid_index(self): """ - Return label for last non-NA/null value + Return index for last non-NA/null value + If all elements are non-NA/null, returns None. + Also returns None for empty Series. + + Examples + -------- + + When no null value in a Series, returns last index. + + >>> s = Series([3, 4, 5]) + >>> s.last_valid_index() + 2 + + When last element is null, returns second from the last index. + + >>> s = Series([3, 4, None]) + >>> s.last_valid_index() + 1 + + When all elements are null, returns None + + >>> s = Series([None, None, None]) + >>> s.last_valid_index() + + + Returns None for empty Series + + >>> s = Series() + >>> s.last_valid_index() + """ if len(self) == 0: return None diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 19fbf854256c6..8f2501dd5222a 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -440,6 +440,11 @@ def test_first_last_valid(self): assert empty.last_valid_index() is None assert empty.first_valid_index() is None + # GH17400 + frame[:] = nan + assert frame.last_valid_index() is None + assert frame.first_valid_index() is None + def test_at_time_frame(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) From 6e974cb51bbc700ce7576ad3cb53a0ebd890a7d9 Mon Sep 17 00:00:00 2001 From: Shota Kawabuchi Date: Mon, 11 Sep 2017 22:59:09 +0900 Subject: [PATCH 2/2] Modified documents. This commit will be squashed to previous commit. --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/frame.py | 78 ++------------------------- pandas/core/generic.py | 16 ++++++ pandas/core/series.py | 68 ++--------------------- pandas/tests/frame/test_timeseries.py | 2 +- 5 files changed, 26 insertions(+), 140 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 53bd6a153b5c4..d5195cb1a6bf3 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -399,7 +399,7 @@ Indexing - Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`) - Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`) - Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`) -- Bug in ``DataFrame.first_valid_index`` and ``DataFrame.last_valid_index`` when no valid entry (:issue:`17400`) +- Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` when no valid entry (:issue:`17400`) I/O ^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 15e81ac46aa28..2318f6133fc19 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4073,88 +4073,18 @@ def _get_valid_indices(self): is_valid = self.count(1) > 0 return self.index[is_valid] + @Appender(_shared_docs['valid_index'] % { + 'position': 'first', 'klass': 'DataFrame'}) def first_valid_index(self): - """ - Return index for first non-NA/null value. - If all elements are non-NA/null, returns None. - Also returns None for empty DataFrame. - - Examples - -------- - - When no null value in a DataFrame, returns first index. - - >>> df = DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']}) - >>> df.first_valid_index() - 0 - - When all elements in first row are null, returns second index. - - >>> df = DataFrame({'A': [None, 2, 3], 'B': [None, 'b', 'f']}) - >>> df.first_valid_index() - 1 - - When only part of elements in first row are null, recognized as valid. - - >>> df = DataFrame({'A': [1, 2, 3], 'B': [None, 'b', 'f']}) - >>> df.first_valid_index() - 0 - - When all elements in a dataframe are null, returns None - - >>> df = DataFrame({'A': [None, None, None], 'B': [None, None, None]}) - >>> df.first_valid_index() - - Returns None for empty DataFrame - - >>> df = DataFrame() - >>> df.first_valid_index() - - """ if len(self) == 0: return None valid_indices = self._get_valid_indices() return valid_indices[0] if len(valid_indices) else None + @Appender(_shared_docs['valid_index'] % { + 'position': 'first', 'klass': 'DataFrame'}) def last_valid_index(self): - """ - Return index for last non-NA/null value. - If all elements are non-NA/null, returns None. - Also returns None for empty DataFrame. - - Examples - -------- - - When no null value in a DataFrame, returns last index. - - >>> df = DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']}) - >>> df.last_valid_index() - 2 - - When all elements in last row are null, returns second from last index. - - >>> df = DataFrame({'A': [1, 2, None], 'B': ['a', 'b', None]}) - >>> df.last_valid_index() - 1 - - When only part of elements in last row are null, recognized as valid. - - >>> df = DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', None]}) - >>> df.last_valid_index() - 2 - - When all elements in a dataframe are null, returns None - - >>> df = DataFrame({'A': [None, None, None], 'B': [None, None, None]}) - >>> df.last_valid_index() - - Returns None for empty DataFrame - - >>> df = DataFrame() - >>> df.last_valid_index() - - """ if len(self) == 0: return None diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8d16b079ba2c8..1d2fc29c2ea7b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6763,6 +6763,22 @@ def transform(self, func, *args, **kwargs): cls.transform = transform + # ---------------------------------------------------------------------- + # Misc methods + + _shared_docs['valid_index'] = """ + Return index for %(position)s non-NA/null value. + + Notes + -------- + If all elements are non-NA/null, returns None. + Also returns None for empty %(klass)s. + + Returns + -------- + scalar : type of index + """ + def _doc_parms(cls): """Return a tuple of the doc parms.""" diff --git a/pandas/core/series.py b/pandas/core/series.py index cd53ca67b0e75..a2a16af0d37e3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2838,39 +2838,9 @@ def dropna(self, axis=0, inplace=False, **kwargs): valid = lambda self, inplace=False, **kwargs: self.dropna(inplace=inplace, **kwargs) + @Appender(generic._shared_docs['valid_index'] % { + 'position': 'first', 'klass': 'Series'}) def first_valid_index(self): - """ - Return index for first non-NA/null value. - If all elements are non-NA/null, returns None. - Also returns None for empty Series. - - Examples - -------- - - When no null value in a Series, returns first index. - - >>> s = Series([3, 4, 5]) - >>> s.first_valid_index() - 0 - - When first element is null, returns second index. - - >>> s = Series([None, 4, 5]) - >>> s.first_valid_index() - 1 - - When all elements are null, returns None - - >>> s = Series([None, None, None]) - >>> s.first_valid_index() - - - Returns None for empty Series - - >>> s = Series() - >>> s.first_valid_index() - - """ if len(self) == 0: return None @@ -2881,39 +2851,9 @@ def first_valid_index(self): else: return self.index[i] + @Appender(generic._shared_docs['valid_index'] % { + 'position': 'last', 'klass': 'Series'}) def last_valid_index(self): - """ - Return index for last non-NA/null value - If all elements are non-NA/null, returns None. - Also returns None for empty Series. - - Examples - -------- - - When no null value in a Series, returns last index. - - >>> s = Series([3, 4, 5]) - >>> s.last_valid_index() - 2 - - When last element is null, returns second from the last index. - - >>> s = Series([3, 4, None]) - >>> s.last_valid_index() - 1 - - When all elements are null, returns None - - >>> s = Series([None, None, None]) - >>> s.last_valid_index() - - - Returns None for empty Series - - >>> s = Series() - >>> s.last_valid_index() - - """ if len(self) == 0: return None diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 8f2501dd5222a..26a2c6f9a5045 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -440,7 +440,7 @@ def test_first_last_valid(self): assert empty.last_valid_index() is None assert empty.first_valid_index() is None - # GH17400 + # GH17400: no valid entries frame[:] = nan assert frame.last_valid_index() is None assert frame.first_valid_index() is None