Skip to content

Commit f86b129

Browse files
Backport PR #35712: PERF: RangeIndex.format performance (#35904)
Co-authored-by: Terji Petersen <[email protected]>
1 parent c9fb752 commit f86b129

File tree

10 files changed

+52
-12
lines changed

10 files changed

+52
-12
lines changed

doc/source/whatsnew/v0.25.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,7 @@ with :attr:`numpy.nan` in the case of an empty :class:`DataFrame` (:issue:`26397
540540

541541
.. ipython:: python
542542
543-
df.describe()
543+
df.describe()
544544
545545
``__str__`` methods now call ``__repr__`` rather than vice versa
546546
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

doc/source/whatsnew/v1.1.2.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`)
18-
-
18+
- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`)
1919
-
2020

2121
.. ---------------------------------------------------------------------------
@@ -25,7 +25,7 @@ Fixed regressions
2525
Bug fixes
2626
~~~~~~~~~
2727
- Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
28-
-
28+
- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`)
2929
-
3030

3131
.. ---------------------------------------------------------------------------

pandas/core/indexes/base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,9 @@ def format(
924924

925925
return self._format_with_header(header, na_rep=na_rep)
926926

927-
def _format_with_header(self, header, na_rep="NaN") -> List[str_t]:
927+
def _format_with_header(
928+
self, header: List[str_t], na_rep: str_t = "NaN"
929+
) -> List[str_t]:
928930
from pandas.io.formats.format import format_array
929931

930932
values = self._values

pandas/core/indexes/category.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ def _format_attrs(self):
347347
attrs.append(("length", len(self)))
348348
return attrs
349349

350-
def _format_with_header(self, header, na_rep="NaN") -> List[str]:
350+
def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]:
351351
from pandas.io.formats.printing import pprint_thing
352352

353353
result = [

pandas/core/indexes/datetimelike.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -350,15 +350,20 @@ def format(
350350
"""
351351
header = []
352352
if name:
353-
fmt_name = ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
354-
header.append(fmt_name)
353+
header.append(
354+
ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
355+
if self.name is not None
356+
else ""
357+
)
355358

356359
if formatter is not None:
357360
return header + list(self.map(formatter))
358361

359362
return self._format_with_header(header, na_rep=na_rep, date_format=date_format)
360363

361-
def _format_with_header(self, header, na_rep="NaT", date_format=None) -> List[str]:
364+
def _format_with_header(
365+
self, header: List[str], na_rep: str = "NaT", date_format: Optional[str] = None
366+
) -> List[str]:
362367
return header + list(
363368
self._format_native_types(na_rep=na_rep, date_format=date_format)
364369
)

pandas/core/indexes/interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
948948
# Rendering Methods
949949
# __repr__ associated methods are based on MultiIndex
950950

951-
def _format_with_header(self, header, na_rep="NaN") -> List[str]:
951+
def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]:
952952
return header + list(self._format_native_types(na_rep=na_rep))
953953

954954
def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs):

pandas/core/indexes/range.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import timedelta
22
import operator
33
from sys import getsizeof
4-
from typing import Any, Optional
4+
from typing import Any, List, Optional
55
import warnings
66

77
import numpy as np
@@ -195,6 +195,15 @@ def _format_data(self, name=None):
195195
# we are formatting thru the attributes
196196
return None
197197

198+
def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]:
199+
if not len(self._range):
200+
return header
201+
first_val_str = str(self._range[0])
202+
last_val_str = str(self._range[-1])
203+
max_length = max(len(first_val_str), len(last_val_str))
204+
205+
return header + [f"{x:<{max_length}}" for x in self._range]
206+
198207
# --------------------------------------------------------------------
199208
_deprecation_message = (
200209
"RangeIndex.{} is deprecated and will be "

pandas/tests/indexes/common.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import gc
2-
from typing import Optional, Type
2+
from typing import Type
33

44
import numpy as np
55
import pytest
@@ -33,7 +33,7 @@
3333
class Base:
3434
""" base class for index sub-class tests """
3535

36-
_holder: Optional[Type[Index]] = None
36+
_holder: Type[Index]
3737
_compat_props = ["shape", "ndim", "size", "nbytes"]
3838

3939
def create_index(self) -> Index:
@@ -648,6 +648,12 @@ def test_format(self):
648648
expected = [str(x) for x in idx]
649649
assert idx.format() == expected
650650

651+
def test_format_empty(self):
652+
# GH35712
653+
empty_idx = self._holder([])
654+
assert empty_idx.format() == []
655+
assert empty_idx.format(name=True) == [""]
656+
651657
def test_hasnans_isnans(self, index):
652658
# GH 11343, added tests for hasnans / isnans
653659
if isinstance(index, MultiIndex):

pandas/tests/indexes/period/test_period.py

+6
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,12 @@ def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key):
536536
with pytest.raises(KeyError, match=msg):
537537
df.loc[key]
538538

539+
def test_format_empty(self):
540+
# GH35712
541+
empty_idx = self._holder([], freq="A")
542+
assert empty_idx.format() == []
543+
assert empty_idx.format(name=True) == [""]
544+
539545

540546
def test_maybe_convert_timedelta():
541547
pi = PeriodIndex(["2000", "2001"], freq="D")

pandas/tests/indexes/ranges/test_range.py

+12
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,14 @@ def test_cached_data(self):
166166
idx.any()
167167
assert idx._cached_data is None
168168

169+
idx.format()
170+
assert idx._cache == {}
171+
169172
df = pd.DataFrame({"a": range(10)}, index=idx)
170173

174+
str(df)
175+
assert idx._cache == {}
176+
171177
df.loc[50]
172178
assert idx._cached_data is None
173179

@@ -506,3 +512,9 @@ def test_engineless_lookup(self):
506512
idx.get_loc("a")
507513

508514
assert "_engine" not in idx._cache
515+
516+
def test_format_empty(self):
517+
# GH35712
518+
empty_idx = self._holder(0)
519+
assert empty_idx.format() == []
520+
assert empty_idx.format(name=True) == [""]

0 commit comments

Comments
 (0)