Skip to content

Commit a920f8f

Browse files
committed
Fix performance regression in Series.asof by avoiding pre-computing nulls and returning value by indexing the underlying ndarray.
1 parent 233d51d commit a920f8f

File tree

1 file changed

+22
-12
lines changed

1 file changed

+22
-12
lines changed

pandas/core/generic.py

+22-12
Original file line numberDiff line numberDiff line change
@@ -3735,10 +3735,10 @@ def asof(self, where, subset=None):
37353735
if not self.index.is_monotonic:
37363736
raise ValueError("asof requires a sorted index")
37373737

3738-
if isinstance(self, ABCSeries):
3738+
is_series = isinstance(self, ABCSeries)
3739+
if is_series:
37393740
if subset is not None:
37403741
raise ValueError("subset is not valid for Series")
3741-
nulls = self.isnull()
37423742
elif self.ndim > 2:
37433743
raise NotImplementedError("asof is not implemented "
37443744
"for {type}".format(type(self)))
@@ -3747,9 +3747,9 @@ def asof(self, where, subset=None):
37473747
subset = self.columns
37483748
if not is_list_like(subset):
37493749
subset = [subset]
3750-
nulls = self[subset].isnull().any(1)
37513750

3752-
if not is_list_like(where):
3751+
is_list = is_list_like(where)
3752+
if not is_list:
37533753
start = self.index[0]
37543754
if isinstance(self.index, PeriodIndex):
37553755
where = Period(where, freq=self.index.freq).ordinal
@@ -3758,24 +3758,34 @@ def asof(self, where, subset=None):
37583758
if where < start:
37593759
return np.nan
37603760

3761-
loc = self.index.searchsorted(where, side='right')
3762-
if loc > 0:
3763-
loc -= 1
3764-
while nulls[loc] and loc > 0:
3765-
loc -= 1
3766-
return self.iloc[loc]
3761+
# It's always much faster to use a *while* loop here for
3762+
# Series than pre-computing all the NAs. However a
3763+
# *while* loop is extremely expensive for DataFrame
3764+
# so we later pre-compute all the NAs and use the same
3765+
# code path whether *where* is a scalar or list.
3766+
# See PR: https://github.com/pandas-dev/pandas/pull/14476
3767+
if is_series:
3768+
loc = self.index.searchsorted(where, side='right')
3769+
if loc > 0:
3770+
loc -= 1
3771+
3772+
values = self._values
3773+
while loc > 0 and isnull(values[loc]):
3774+
loc -= 1
3775+
return values[loc]
37673776

37683777
if not isinstance(where, Index):
3769-
where = Index(where)
3778+
where = Index(where) if is_list else Index([where])
37703779

3780+
nulls = self.isnull() if is_series else self[subset].isnull().any(1)
37713781
locs = self.index.asof_locs(where, ~(nulls.values))
37723782

37733783
# mask the missing
37743784
missing = locs == -1
37753785
data = self.take(locs, is_copy=False)
37763786
data.index = where
37773787
data.loc[missing] = np.nan
3778-
return data
3788+
return data if is_list else data.iloc[-1]
37793789

37803790
# ----------------------------------------------------------------------
37813791
# Action Methods

0 commit comments

Comments
 (0)