Skip to content

Commit 99f2ccb

Browse files
authored
PERF: remove large-array-creating path in fast_xs (#33032)
1 parent 2bf54a8 commit 99f2ccb

File tree

3 files changed

+37
-17
lines changed

3 files changed

+37
-17
lines changed

asv_bench/benchmarks/indexing.py

+25
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,31 @@ def time_frame_getitem_single_column_int(self):
308308
self.df_int_col[0]
309309

310310

311+
class IndexSingleRow:
312+
params = [True, False]
313+
param_names = ["unique_cols"]
314+
315+
def setup(self, unique_cols):
316+
arr = np.arange(10 ** 7).reshape(-1, 10)
317+
df = DataFrame(arr)
318+
dtypes = ["u1", "u2", "u4", "u8", "i1", "i2", "i4", "i8", "f8", "f4"]
319+
for i, d in enumerate(dtypes):
320+
df[i] = df[i].astype(d)
321+
322+
if not unique_cols:
323+
# GH#33032 single-row lookups with non-unique columns were
324+
# 15x slower than with unique columns
325+
df.columns = ["A", "A"] + list(df.columns[2:])
326+
327+
self.df = df
328+
329+
def time_iloc_row(self, unique_cols):
330+
self.df.iloc[10000]
331+
332+
def time_loc_row(self, unique_cols):
333+
self.df.loc[10000]
334+
335+
311336
class AssignTimeseriesIndex:
312337
def setup(self):
313338
N = 100000

asv_bench/benchmarks/sparse.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
import scipy.sparse
33

44
import pandas as pd
5-
from pandas import MultiIndex, Series, SparseArray, date_range
5+
from pandas import MultiIndex, Series, date_range
6+
from pandas.arrays import SparseArray
67

78

89
def make_array(size, dense_proportion, fill_value, dtype):

pandas/core/internals/managers.py

+10-16
Original file line numberDiff line numberDiff line change
@@ -901,30 +901,24 @@ def to_dict(self, copy: bool = True):
901901

902902
return {dtype: self.combine(blocks, copy=copy) for dtype, blocks in bd.items()}
903903

904-
def fast_xs(self, loc: int):
904+
def fast_xs(self, loc: int) -> ArrayLike:
905905
"""
906-
get a cross sectional for a given location in the
907-
items ; handle dups
906+
Return the array corresponding to `frame.iloc[loc]`.
908907
909-
return the result, is *could* be a view in the case of a
910-
single block
908+
Parameters
909+
----------
910+
loc : int
911+
912+
Returns
913+
-------
914+
np.ndarray or ExtensionArray
911915
"""
912916
if len(self.blocks) == 1:
913917
return self.blocks[0].iget((slice(None), loc))
914918

915-
items = self.items
916-
917-
# non-unique (GH4726)
918-
if not items.is_unique:
919-
result = self._interleave()
920-
if self.ndim == 2:
921-
result = result.T
922-
return result[loc]
923-
924-
# unique
925919
dtype = _interleaved_dtype(self.blocks)
926920

927-
n = len(items)
921+
n = len(self)
928922
if is_extension_array_dtype(dtype):
929923
# we'll eventually construct an ExtensionArray.
930924
result = np.empty(n, dtype=object)

0 commit comments

Comments
 (0)