Skip to content

Commit 6e05fa5

Browse files
topper-123ukarroum
authored andcommitted
PERF: faster dir() calls (pandas-dev#37450)
1 parent 8b1f996 commit 6e05fa5

File tree

8 files changed

+28
-14
lines changed

8 files changed

+28
-14
lines changed

doc/source/whatsnew/v1.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,7 @@ Performance improvements
346346
- Performance improvement in :meth:`RollingGroupby.count` (:issue:`35625`)
347347
- Small performance decrease to :meth:`Rolling.min` and :meth:`Rolling.max` for fixed windows (:issue:`36567`)
348348
- Reduced peak memory usage in :meth:`DataFrame.to_pickle` when using ``protocol=5`` in python 3.8+ (:issue:`34244`)
349+
- faster ``dir`` calls when many index labels, e.g. ``dir(ser)`` (:issue:`37450`)
349350
- Performance improvement in :class:`ExpandingGroupby` (:issue:`37064`)
350351

351352
.. ---------------------------------------------------------------------------

pandas/core/accessor.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,7 @@ def _dir_additions(self) -> Set[str]:
2424
"""
2525
Add additional __dir__ for this object.
2626
"""
27-
rv = set()
28-
for accessor in self._accessors:
29-
try:
30-
getattr(self, accessor)
31-
rv.add(accessor)
32-
except AttributeError:
33-
pass
34-
return rv
27+
return {accessor for accessor in self._accessors if hasattr(self, accessor)}
3528

3629
def __dir__(self) -> List[str]:
3730
"""

pandas/core/generic.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -5439,12 +5439,10 @@ def _dir_additions(self) -> Set[str]:
54395439
add the string-like attributes from the info_axis.
54405440
If info_axis is a MultiIndex, it's first level values are used.
54415441
"""
5442-
additions = {
5443-
c
5444-
for c in self._info_axis.unique(level=0)[:100]
5445-
if isinstance(c, str) and c.isidentifier()
5446-
}
5447-
return super()._dir_additions().union(additions)
5442+
additions = super()._dir_additions()
5443+
if self._info_axis._can_hold_strings:
5444+
additions.update(self._info_axis._dir_additions_for_owner)
5445+
return additions
54485446

54495447
# ----------------------------------------------------------------------
54505448
# Consolidation of internals

pandas/core/indexes/base.py

+15
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
NewType,
1313
Optional,
1414
Sequence,
15+
Set,
1516
Tuple,
1617
TypeVar,
1718
Union,
@@ -230,6 +231,7 @@ def _outer_indexer(self, left, right):
230231
_attributes = ["name"]
231232
_is_numeric_dtype = False
232233
_can_hold_na = True
234+
_can_hold_strings = True
233235

234236
# would we like our indexing holder to defer to us
235237
_defer_to_indexing = False
@@ -568,6 +570,19 @@ def _engine(self):
568570
target_values = self._get_engine_target()
569571
return self._engine_type(lambda: target_values, len(self))
570572

573+
@cache_readonly
574+
def _dir_additions_for_owner(self) -> Set[str_t]:
575+
"""
576+
Add the string-like labels to the owner dataframe/series dir output.
577+
578+
If this is a MultiIndex, it's first level values are used.
579+
"""
580+
return {
581+
c
582+
for c in self.unique(level=0)[:100]
583+
if isinstance(c, str) and c.isidentifier()
584+
}
585+
571586
# --------------------------------------------------------------------
572587
# Array-Like Methods
573588

pandas/core/indexes/category.py

+4
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,10 @@ class CategoricalIndex(ExtensionIndex, accessor.PandasDelegate):
161161

162162
_typ = "categoricalindex"
163163

164+
@property
165+
def _can_hold_strings(self):
166+
return self.categories._can_hold_strings
167+
164168
codes: np.ndarray
165169
categories: Index
166170
_data: Categorical

pandas/core/indexes/datetimelike.py

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ class DatetimeIndexOpsMixin(ExtensionIndex):
8888
Common ops mixin to support a unified interface datetimelike Index.
8989
"""
9090

91+
_can_hold_strings = False
9192
_data: Union[DatetimeArray, TimedeltaArray, PeriodArray]
9293
freq: Optional[BaseOffset]
9394
freqstr: Optional[str]

pandas/core/indexes/interval.py

+1
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ class IntervalIndex(IntervalMixin, ExtensionIndex):
193193

194194
_data: IntervalArray
195195
_values: IntervalArray
196+
_can_hold_strings = False
196197

197198
# --------------------------------------------------------------------
198199
# Constructors

pandas/core/indexes/numeric.py

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class NumericIndex(Index):
4242
_default_dtype: np.dtype
4343

4444
_is_numeric_dtype = True
45+
_can_hold_strings = False
4546

4647
def __new__(cls, data=None, dtype=None, copy=False, name=None):
4748
cls._validate_dtype(dtype)

0 commit comments

Comments
 (0)