Skip to content

Commit b7a31eb

Browse files
authored
PERF: RangeIndex.format performance (pandas-dev#35712)
1 parent d90b73b commit b7a31eb

File tree

10 files changed

+53
-12
lines changed

10 files changed

+53
-12
lines changed

doc/source/whatsnew/v0.25.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,7 @@ with :attr:`numpy.nan` in the case of an empty :class:`DataFrame` (:issue:`26397
540540

541541
.. ipython:: python
542542
543-
df.describe()
543+
df.describe()
544544
545545
``__str__`` methods now call ``__repr__`` rather than vice versa
546546
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

doc/source/whatsnew/v1.1.2.rst

+3-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`)
18+
- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`)
1819
-
19-
-
20+
2021

2122
.. ---------------------------------------------------------------------------
2223
@@ -26,7 +27,7 @@ Bug fixes
2627
~~~~~~~~~
2728
- Bug in :meth:`DataFrame.eval` with ``object`` dtype column binary operations (:issue:`35794`)
2829
- Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
29-
-
30+
- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`)
3031
-
3132

3233
.. ---------------------------------------------------------------------------

pandas/core/indexes/base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -933,7 +933,9 @@ def format(
933933

934934
return self._format_with_header(header, na_rep=na_rep)
935935

936-
def _format_with_header(self, header, na_rep="NaN") -> List[str_t]:
936+
def _format_with_header(
937+
self, header: List[str_t], na_rep: str_t = "NaN"
938+
) -> List[str_t]:
937939
from pandas.io.formats.format import format_array
938940

939941
values = self._values

pandas/core/indexes/category.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ def _format_attrs(self):
347347
attrs.append(("length", len(self)))
348348
return attrs
349349

350-
def _format_with_header(self, header, na_rep="NaN") -> List[str]:
350+
def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]:
351351
from pandas.io.formats.printing import pprint_thing
352352

353353
result = [

pandas/core/indexes/datetimelike.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -354,15 +354,20 @@ def format(
354354
"""
355355
header = []
356356
if name:
357-
fmt_name = ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
358-
header.append(fmt_name)
357+
header.append(
358+
ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
359+
if self.name is not None
360+
else ""
361+
)
359362

360363
if formatter is not None:
361364
return header + list(self.map(formatter))
362365

363366
return self._format_with_header(header, na_rep=na_rep, date_format=date_format)
364367

365-
def _format_with_header(self, header, na_rep="NaT", date_format=None) -> List[str]:
368+
def _format_with_header(
369+
self, header: List[str], na_rep: str = "NaT", date_format: Optional[str] = None
370+
) -> List[str]:
366371
return header + list(
367372
self._format_native_types(na_rep=na_rep, date_format=date_format)
368373
)

pandas/core/indexes/interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
948948
# Rendering Methods
949949
# __repr__ associated methods are based on MultiIndex
950950

951-
def _format_with_header(self, header, na_rep="NaN") -> List[str]:
951+
def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]:
952952
return header + list(self._format_native_types(na_rep=na_rep))
953953

954954
def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs):

pandas/core/indexes/range.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import timedelta
22
import operator
33
from sys import getsizeof
4-
from typing import Any
4+
from typing import Any, List
55
import warnings
66

77
import numpy as np
@@ -187,6 +187,15 @@ def _format_data(self, name=None):
187187
# we are formatting thru the attributes
188188
return None
189189

190+
def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]:
191+
if not len(self._range):
192+
return header
193+
first_val_str = str(self._range[0])
194+
last_val_str = str(self._range[-1])
195+
max_length = max(len(first_val_str), len(last_val_str))
196+
197+
return header + [f"{x:<{max_length}}" for x in self._range]
198+
190199
# --------------------------------------------------------------------
191200
_deprecation_message = (
192201
"RangeIndex.{} is deprecated and will be "

pandas/tests/indexes/common.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import gc
2-
from typing import Optional, Type
2+
from typing import Type
33

44
import numpy as np
55
import pytest
@@ -33,7 +33,7 @@
3333
class Base:
3434
""" base class for index sub-class tests """
3535

36-
_holder: Optional[Type[Index]] = None
36+
_holder: Type[Index]
3737
_compat_props = ["shape", "ndim", "size", "nbytes"]
3838

3939
def create_index(self) -> Index:
@@ -686,6 +686,12 @@ def test_format(self):
686686
expected = [str(x) for x in idx]
687687
assert idx.format() == expected
688688

689+
def test_format_empty(self):
690+
# GH35712
691+
empty_idx = self._holder([])
692+
assert empty_idx.format() == []
693+
assert empty_idx.format(name=True) == [""]
694+
689695
def test_hasnans_isnans(self, index):
690696
# GH 11343, added tests for hasnans / isnans
691697
if isinstance(index, MultiIndex):

pandas/tests/indexes/period/test_period.py

+6
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,12 @@ def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key):
536536
with pytest.raises(KeyError, match=msg):
537537
df.loc[key]
538538

539+
def test_format_empty(self):
540+
# GH35712
541+
empty_idx = self._holder([], freq="A")
542+
assert empty_idx.format() == []
543+
assert empty_idx.format(name=True) == [""]
544+
539545

540546
def test_maybe_convert_timedelta():
541547
pi = PeriodIndex(["2000", "2001"], freq="D")

pandas/tests/indexes/ranges/test_range.py

+12
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,14 @@ def test_cache(self):
171171
pass
172172
assert idx._cache == {}
173173

174+
idx.format()
175+
assert idx._cache == {}
176+
174177
df = pd.DataFrame({"a": range(10)}, index=idx)
175178

179+
str(df)
180+
assert idx._cache == {}
181+
176182
df.loc[50]
177183
assert idx._cache == {}
178184

@@ -515,3 +521,9 @@ def test_engineless_lookup(self):
515521
idx.get_loc("a")
516522

517523
assert "_engine" not in idx._cache
524+
525+
def test_format_empty(self):
526+
# GH35712
527+
empty_idx = self._holder(0)
528+
assert empty_idx.format() == []
529+
assert empty_idx.format(name=True) == [""]

0 commit comments

Comments
 (0)