From 731b38ed904b7f0fb7a9fad4fe1a53a1a67acd1d Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 26 Aug 2020 12:12:55 +0100 Subject: [PATCH 1/2] Backport PR #35712: PERF: RangeIndex.format performance --- doc/source/whatsnew/v0.25.0.rst | 2 +- doc/source/whatsnew/v1.1.2.rst | 8 +++++--- pandas/core/indexes/base.py | 4 +++- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimelike.py | 11 ++++++++--- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/range.py | 11 ++++++++++- pandas/tests/indexes/common.py | 10 ++++++++-- pandas/tests/indexes/period/test_period.py | 6 ++++++ pandas/tests/indexes/ranges/test_range.py | 12 ++++++++++++ 10 files changed, 55 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 3cd920158f774..0f0f009307c75 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -540,7 +540,7 @@ with :attr:`numpy.nan` in the case of an empty :class:`DataFrame` (:issue:`26397 .. ipython:: python - df.describe() + df.describe() ``__str__`` methods now call ``__repr__`` rather than vice versa ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index 97bd4dccdcd84..7739a483e3d38 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -15,8 +15,9 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`) +- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`) - -- + .. --------------------------------------------------------------------------- @@ -24,8 +25,9 @@ Fixed regressions Bug fixes ~~~~~~~~~ - -- +- Bug in :meth:`DataFrame.eval` with ``object`` dtype column binary operations (:issue:`35794`) +- Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`) +- Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1be381e38b157..32bbdf425acab 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -924,7 +924,9 @@ def format( return self._format_with_header(header, na_rep=na_rep) - def _format_with_header(self, header, na_rep="NaN") -> List[str_t]: + def _format_with_header( + self, header: List[str_t], na_rep: str_t = "NaN" + ) -> List[str_t]: from pandas.io.formats.format import format_array values = self._values diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 74b235655e345..8af6ee555306a 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -347,7 +347,7 @@ def _format_attrs(self): attrs.append(("length", len(self))) return attrs - def _format_with_header(self, header, na_rep="NaN") -> List[str]: + def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: from pandas.io.formats.printing import pprint_thing result = [ diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index ab0b3a394446d..9b57a25f1b0e9 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -350,15 +350,20 @@ def format( """ header = [] if name: - fmt_name = ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) - header.append(fmt_name) + header.append( + ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) + if self.name is not None + else "" + ) if formatter is not None: return header + list(self.map(formatter)) return self._format_with_header(header, na_rep=na_rep, date_format=date_format) - def _format_with_header(self, header, na_rep="NaT", date_format=None) -> List[str]: + def _format_with_header( + self, header: List[str], na_rep: str = "NaT", date_format: Optional[str] = None + ) -> List[str]: return header + list( self._format_native_types(na_rep=na_rep, date_format=date_format) ) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 9548ebbd9c3b2..446e57d58a779 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -948,7 +948,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): # Rendering Methods # __repr__ associated methods are based on MultiIndex - def _format_with_header(self, header, na_rep="NaN") -> List[str]: + def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: return header + list(self._format_native_types(na_rep=na_rep)) def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs): diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index eee610681087d..dcc0bdd86a98b 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1,7 +1,7 @@ from datetime import timedelta import operator from sys import getsizeof -from typing import Any, Optional +from typing import Any, List, Optional import warnings import numpy as np @@ -195,6 +195,15 @@ def _format_data(self, name=None): # we are formatting thru the attributes return None + def _format_with_header(self, header: List[str], na_rep: str = "NaN") -> List[str]: + if not len(self._range): + return header + first_val_str = str(self._range[0]) + last_val_str = str(self._range[-1]) + max_length = max(len(first_val_str), len(last_val_str)) + + return header + [f"{x:<{max_length}}" for x in self._range] + # -------------------------------------------------------------------- _deprecation_message = ( "RangeIndex.{} is deprecated and will be " diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 3b41c4bfacf73..5f82203d92dc3 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -1,5 +1,5 @@ import gc -from typing import Optional, Type +from typing import Type import numpy as np import pytest @@ -33,7 +33,7 @@ class Base: """ base class for index sub-class tests """ - _holder: Optional[Type[Index]] = None + _holder: Type[Index] _compat_props = ["shape", "ndim", "size", "nbytes"] def create_index(self) -> Index: @@ -648,6 +648,12 @@ def test_format(self): expected = [str(x) for x in idx] assert idx.format() == expected + def test_format_empty(self): + # GH35712 + empty_idx = self._holder([]) + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] + def test_hasnans_isnans(self, index): # GH 11343, added tests for hasnans / isnans if isinstance(index, MultiIndex): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 15a88ab3819ce..085d41aaa5b76 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -536,6 +536,12 @@ def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key): with pytest.raises(KeyError, match=msg): df.loc[key] + def test_format_empty(self): + # GH35712 + empty_idx = self._holder([], freq="A") + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] + def test_maybe_convert_timedelta(): pi = PeriodIndex(["2000", "2001"], freq="D") diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 5b6f9cb358b7d..3bd3f6cc09db7 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -166,8 +166,14 @@ def test_cached_data(self): idx.any() assert idx._cached_data is None + idx.format() + assert idx._cache == {} + df = pd.DataFrame({"a": range(10)}, index=idx) + str(df) + assert idx._cache == {} + df.loc[50] assert idx._cached_data is None @@ -506,3 +512,9 @@ def test_engineless_lookup(self): idx.get_loc("a") assert "_engine" not in idx._cache + + def test_format_empty(self): + # GH35712 + empty_idx = self._holder(0) + assert empty_idx.format() == [] + assert empty_idx.format(name=True) == [""] From b012ddcaf0f6b9b5bb839f04909068378fc7232a Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 26 Aug 2020 12:30:04 +0100 Subject: [PATCH 2/2] fix whatsnew --- doc/source/whatsnew/v1.1.2.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index 7739a483e3d38..5cbd97f65bbf5 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -18,15 +18,12 @@ Fixed regressions - Performance regression for :meth:`RangeIndex.format` (:issue:`35712`) - - .. --------------------------------------------------------------------------- .. _whatsnew_112.bug_fixes: Bug fixes ~~~~~~~~~ -- Bug in :meth:`DataFrame.eval` with ``object`` dtype column binary operations (:issue:`35794`) -- Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`) - Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`) -