Skip to content

Commit 4d5f6d7

Browse files
committed
Fix performance regression in Series.asof by avoiding pre-computing nulls and returning value by indexing the underlying ndarray.
1 parent 233d51d commit 4d5f6d7

File tree

1 file changed

+20
-12
lines changed

1 file changed

+20
-12
lines changed

pandas/core/generic.py

+20-12
Original file line numberDiff line numberDiff line change
@@ -3735,10 +3735,10 @@ def asof(self, where, subset=None):
37353735
if not self.index.is_monotonic:
37363736
raise ValueError("asof requires a sorted index")
37373737

3738-
if isinstance(self, ABCSeries):
3738+
is_series = isinstance(self, ABCSeries)
3739+
if is_series:
37393740
if subset is not None:
37403741
raise ValueError("subset is not valid for Series")
3741-
nulls = self.isnull()
37423742
elif self.ndim > 2:
37433743
raise NotImplementedError("asof is not implemented "
37443744
"for {type}".format(type(self)))
@@ -3747,9 +3747,9 @@ def asof(self, where, subset=None):
37473747
subset = self.columns
37483748
if not is_list_like(subset):
37493749
subset = [subset]
3750-
nulls = self[subset].isnull().any(1)
37513750

3752-
if not is_list_like(where):
3751+
is_list = is_list_like(where)
3752+
if not is_list:
37533753
start = self.index[0]
37543754
if isinstance(self.index, PeriodIndex):
37553755
where = Period(where, freq=self.index.freq).ordinal
@@ -3758,24 +3758,32 @@ def asof(self, where, subset=None):
37583758
if where < start:
37593759
return np.nan
37603760

3761-
loc = self.index.searchsorted(where, side='right')
3762-
if loc > 0:
3763-
loc -= 1
3764-
while nulls[loc] and loc > 0:
3765-
loc -= 1
3766-
return self.iloc[loc]
3761+
# A *while* loop test is extremely expensive for DataFrame
3762+
# so we later pre-compute all the NAs and use the same
3763+
# code path whether *where* is a scalar or list.
3764+
# See PR: https://github.com/pandas-dev/pandas/pull/14476
3765+
if is_series:
3766+
loc = self.index.searchsorted(where, side='right')
3767+
if loc > 0:
3768+
loc -= 1
3769+
3770+
values = self._values
3771+
while loc > 0 and isnull(values[loc]):
3772+
loc -= 1
3773+
return values[loc]
37673774

37683775
if not isinstance(where, Index):
3769-
where = Index(where)
3776+
where = Index(where) if is_list else Index([where])
37703777

3778+
nulls = self.isnull() if is_series else self[subset].isnull().any(1)
37713779
locs = self.index.asof_locs(where, ~(nulls.values))
37723780

37733781
# mask the missing
37743782
missing = locs == -1
37753783
data = self.take(locs, is_copy=False)
37763784
data.index = where
37773785
data.loc[missing] = np.nan
3778-
return data
3786+
return data if is_list else data.iloc[-1]
37793787

37803788
# ----------------------------------------------------------------------
37813789
# Action Methods

0 commit comments

Comments
 (0)