Skip to content

Commit 5d1f5ab

Browse files
Maximiliano Grecojreback
Maximiliano Greco
authored andcommitted
BUG: Fix first_last_valid_index, now preserves the frequency. (pandas-dev#20569)
1 parent d2ab407 commit 5d1f5ab

File tree

6 files changed

+64
-52
lines changed

6 files changed

+64
-52
lines changed

doc/source/whatsnew/v0.23.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1061,7 +1061,7 @@ Indexing
10611061
- Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (issue:`19726`)
10621062
- Bug in ``Index`` subclasses constructors that ignore unexpected keyword arguments (:issue:`19348`)
10631063
- Bug in :meth:`Index.difference` when taking difference of an ``Index`` with itself (:issue:`20040`)
1064-
1064+
- Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` in presence of entire rows of NaNs in the middle of values (:issue:`20499`).
10651065

10661066
MultiIndex
10671067
^^^^^^^^^^

pandas/core/frame.py

-25
Original file line numberDiff line numberDiff line change
@@ -5015,31 +5015,6 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
50155015

50165016
self[col] = expressions.where(mask, this, that)
50175017

5018-
# ----------------------------------------------------------------------
5019-
# Misc methods
5020-
5021-
def _get_valid_indices(self):
5022-
is_valid = self.count(1) > 0
5023-
return self.index[is_valid]
5024-
5025-
@Appender(_shared_docs['valid_index'] % {
5026-
'position': 'first', 'klass': 'DataFrame'})
5027-
def first_valid_index(self):
5028-
if len(self) == 0:
5029-
return None
5030-
5031-
valid_indices = self._get_valid_indices()
5032-
return valid_indices[0] if len(valid_indices) else None
5033-
5034-
@Appender(_shared_docs['valid_index'] % {
5035-
'position': 'last', 'klass': 'DataFrame'})
5036-
def last_valid_index(self):
5037-
if len(self) == 0:
5038-
return None
5039-
5040-
valid_indices = self._get_valid_indices()
5041-
return valid_indices[-1] if len(valid_indices) else None
5042-
50435018
# ----------------------------------------------------------------------
50445019
# Data reshaping
50455020

pandas/core/generic.py

+45
Original file line numberDiff line numberDiff line change
@@ -8763,6 +8763,51 @@ def transform(self, func, *args, **kwargs):
87638763
scalar : type of index
87648764
"""
87658765

8766+
def _find_valid_index(self, how):
8767+
"""Retrieves the index of the first valid value.
8768+
8769+
Parameters
8770+
----------
8771+
how : {'first', 'last'}
8772+
Use this parameter to change between the first or last valid index.
8773+
8774+
Returns
8775+
-------
8776+
idx_first_valid : type of index
8777+
"""
8778+
assert how in ['first', 'last']
8779+
8780+
if len(self) == 0: # early stop
8781+
return None
8782+
is_valid = ~self.isna()
8783+
8784+
if self.ndim == 2:
8785+
is_valid = is_valid.any(1) # reduce axis 1
8786+
8787+
if how == 'first':
8788+
# First valid value case
8789+
i = is_valid.idxmax()
8790+
if not is_valid[i]:
8791+
return None
8792+
return i
8793+
8794+
elif how == 'last':
8795+
# Last valid value case
8796+
i = is_valid.values[::-1].argmax()
8797+
if not is_valid.iat[len(self) - i - 1]:
8798+
return None
8799+
return self.index[len(self) - i - 1]
8800+
8801+
@Appender(_shared_docs['valid_index'] % {'position': 'first',
8802+
'klass': 'NDFrame'})
8803+
def first_valid_index(self):
8804+
return self._find_valid_index('first')
8805+
8806+
@Appender(_shared_docs['valid_index'] % {'position': 'last',
8807+
'klass': 'NDFrame'})
8808+
def last_valid_index(self):
8809+
return self._find_valid_index('last')
8810+
87668811

87678812
def _doc_parms(cls):
87688813
"""Return a tuple of the doc parms."""

pandas/core/series.py

-26
Original file line numberDiff line numberDiff line change
@@ -3887,32 +3887,6 @@ def valid(self, inplace=False, **kwargs):
38873887
"Use .dropna instead.", FutureWarning, stacklevel=2)
38883888
return self.dropna(inplace=inplace, **kwargs)
38893889

3890-
@Appender(generic._shared_docs['valid_index'] % {
3891-
'position': 'first', 'klass': 'Series'})
3892-
def first_valid_index(self):
3893-
if len(self) == 0:
3894-
return None
3895-
3896-
mask = isna(self._values)
3897-
i = mask.argmin()
3898-
if mask[i]:
3899-
return None
3900-
else:
3901-
return self.index[i]
3902-
3903-
@Appender(generic._shared_docs['valid_index'] % {
3904-
'position': 'last', 'klass': 'Series'})
3905-
def last_valid_index(self):
3906-
if len(self) == 0:
3907-
return None
3908-
3909-
mask = isna(self._values[::-1])
3910-
i = mask.argmin()
3911-
if mask[i]:
3912-
return None
3913-
else:
3914-
return self.index[len(self) - i - 1]
3915-
39163890
# ----------------------------------------------------------------------
39173891
# Time series-oriented methods
39183892

pandas/tests/frame/test_timeseries.py

+9
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,15 @@ def test_first_last_valid(self):
530530
assert frame.last_valid_index() is None
531531
assert frame.first_valid_index() is None
532532

533+
# GH20499: its preserves freq with holes
534+
frame.index = date_range("20110101", periods=N, freq="B")
535+
frame.iloc[1] = 1
536+
frame.iloc[-2] = 1
537+
assert frame.first_valid_index() == frame.index[1]
538+
assert frame.last_valid_index() == frame.index[-2]
539+
assert frame.first_valid_index().freq == frame.index.freq
540+
assert frame.last_valid_index().freq == frame.index.freq
541+
533542
def test_at_time_frame(self):
534543
rng = date_range('1/1/2000', '1/5/2000', freq='5min')
535544
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)

pandas/tests/series/test_timeseries.py

+9
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,15 @@ def test_first_last_valid(self):
432432
assert empty.last_valid_index() is None
433433
assert empty.first_valid_index() is None
434434

435+
# GH20499: its preserves freq with holes
436+
ts.index = date_range("20110101", periods=len(ts), freq="B")
437+
ts.iloc[1] = 1
438+
ts.iloc[-2] = 1
439+
assert ts.first_valid_index() == ts.index[1]
440+
assert ts.last_valid_index() == ts.index[-2]
441+
assert ts.first_valid_index().freq == ts.index.freq
442+
assert ts.last_valid_index().freq == ts.index.freq
443+
435444
def test_mpl_compat_hack(self):
436445
result = self.ts[:, np.newaxis]
437446
expected = self.ts.values[:, np.newaxis]

0 commit comments

Comments
 (0)