Skip to content

Commit ec20207

Browse files
KalyanGokhalejreback
authored andcommitted
REGR: Fixes first_valid_index when DataFrame or Series has duplicate row index (GH21441) (#21497)
1 parent 89874d3 commit ec20207

File tree

3 files changed

+27
-14
lines changed

3 files changed

+27
-14
lines changed

doc/source/whatsnew/v0.23.2.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ Fixed Regressions
1717
~~~~~~~~~~~~~~~~~
1818

1919
- Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
20-
-
20+
- Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
21+
-
2122

2223
.. _whatsnew_0232.performance:
2324

pandas/core/generic.py

+11-12
Original file line numberDiff line numberDiff line change
@@ -8968,18 +8968,17 @@ def _find_valid_index(self, how):
89688968
is_valid = is_valid.any(1) # reduce axis 1
89698969

89708970
if how == 'first':
8971-
# First valid value case
8972-
i = is_valid.idxmax()
8973-
if not is_valid[i]:
8974-
return None
8975-
return i
8976-
8977-
elif how == 'last':
8978-
# Last valid value case
8979-
i = is_valid.values[::-1].argmax()
8980-
if not is_valid.iat[len(self) - i - 1]:
8981-
return None
8982-
return self.index[len(self) - i - 1]
8971+
idxpos = is_valid.values[::].argmax()
8972+
8973+
if how == 'last':
8974+
idxpos = len(self) - 1 - is_valid.values[::-1].argmax()
8975+
8976+
chk_notna = is_valid.iat[idxpos]
8977+
idx = self.index[idxpos]
8978+
8979+
if not chk_notna:
8980+
return None
8981+
return idx
89838982

89848983
@Appender(_shared_docs['valid_index'] % {'position': 'first',
89858984
'klass': 'NDFrame'})

pandas/tests/frame/test_timeseries.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,15 @@ def test_asfreq_fillvalue(self):
506506
actual_series = ts.asfreq(freq='1S', fill_value=9.0)
507507
assert_series_equal(expected_series, actual_series)
508508

509-
def test_first_last_valid(self):
509+
@pytest.mark.parametrize("data,idx,expected_first,expected_last", [
510+
({'A': [1, 2, 3]}, [1, 1, 2], 1, 2),
511+
({'A': [1, 2, 3]}, [1, 2, 2], 1, 2),
512+
({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd'),
513+
({'A': [1, np.nan, 3]}, [1, 1, 2], 1, 2),
514+
({'A': [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2),
515+
({'A': [1, np.nan, 3]}, [1, 2, 2], 1, 2)])
516+
def test_first_last_valid(self, data, idx,
517+
expected_first, expected_last):
510518
N = len(self.frame.index)
511519
mat = randn(N)
512520
mat[:5] = nan
@@ -539,6 +547,11 @@ def test_first_last_valid(self):
539547
assert frame.first_valid_index().freq == frame.index.freq
540548
assert frame.last_valid_index().freq == frame.index.freq
541549

550+
# GH 21441
551+
df = DataFrame(data, index=idx)
552+
assert expected_first == df.first_valid_index()
553+
assert expected_last == df.last_valid_index()
554+
542555
def test_first_subset(self):
543556
ts = tm.makeTimeDataFrame(freq='12h')
544557
result = ts.first('10d')

0 commit comments

Comments
 (0)