diff --git a/doc/source/whatsnew/v1.1.1.rst b/doc/source/whatsnew/v1.1.1.rst index 443589308ad4c..815ce2c4c2905 100644 --- a/doc/source/whatsnew/v1.1.1.rst +++ b/doc/source/whatsnew/v1.1.1.rst @@ -26,6 +26,13 @@ Fixed regressions Bug fixes ~~~~~~~~~ + +Categorical +^^^^^^^^^^^ + +- Bug in :meth:`CategoricalIndex.format` where, when stringified scalars had different lengths, the shorter string would be right-filled with spaces, so it had the same length as the longest string (:issue:`35439`) + + **Datetimelike** - diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index b0b008de69a94..74b235655e345 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -20,7 +20,7 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna +from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna from pandas.core import accessor from pandas.core.algorithms import take_1d @@ -348,12 +348,12 @@ def _format_attrs(self): return attrs def _format_with_header(self, header, na_rep="NaN") -> List[str]: - from pandas.io.formats.format import format_array + from pandas.io.formats.printing import pprint_thing - formatted_values = format_array( - self._values, formatter=None, na_rep=na_rep, justify="left" - ) - result = ibase.trim_front(formatted_values) + result = [ + pprint_thing(x, escape_chars=("\t", "\r", "\n")) if notna(x) else na_rep + for x in self._values + ] return header + result # -------------------------------------------------------------------- diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index e5e98039ff77b..eee610681087d 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1,7 +1,7 @@ from datetime import timedelta import operator from sys import getsizeof -from typing import Any, List, Optional +from typing import Any, Optional import warnings import numpy as np @@ -33,8 +33,6 @@ from pandas.core.indexes.numeric import Int64Index from pandas.core.ops.common import unpack_zerodim_and_defer -from pandas.io.formats.printing import pprint_thing - _empty_range = range(0) @@ -197,9 +195,6 @@ def _format_data(self, name=None): # we are formatting thru the attributes return None - def _format_with_header(self, header, na_rep="NaN") -> List[str]: - return header + [pprint_thing(x) for x in self._range] - # -------------------------------------------------------------------- _deprecation_message = ( "RangeIndex.{} is deprecated and will be " diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 7f30a77872bc1..8af26eef504fc 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -478,3 +478,9 @@ def test_reindex_base(self): def test_map_str(self): # See test_map.py pass + + def test_format_different_scalar_lengths(self): + # GH35439 + idx = CategoricalIndex(["aaaaaaaaa", "b"]) + expected = ["aaaaaaaaa", "b"] + assert idx.format() == expected diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index f5b9f4a401e60..3b41c4bfacf73 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -642,6 +642,12 @@ def test_equals_op(self): tm.assert_numpy_array_equal(index_a == item, expected3) tm.assert_series_equal(series_a == item, Series(expected3)) + def test_format(self): + # GH35439 + idx = self.create_index() + expected = [str(x) for x in idx] + assert idx.format() == expected + def test_hasnans_isnans(self, index): # GH 11343, added tests for hasnans / isnans if isinstance(index, MultiIndex): diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index 7345ae3032463..a5abf2946feda 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -20,6 +20,12 @@ def index(self, request): def create_index(self) -> DatetimeIndex: return date_range("20130101", periods=5) + def test_format(self): + # GH35439 + idx = self.create_index() + expected = [f"{x:%Y-%m-%d}" for x in idx] + assert idx.format() == expected + def test_shift(self): pass # handled in test_ops diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index eaf48421dc071..59ee88117a984 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1171,8 +1171,11 @@ def test_summary_bug(self): assert "~:{range}:0" in result assert "{other}%s" in result - def test_format(self, index): - self._check_method_works(Index.format, index) + def test_format_different_scalar_lengths(self): + # GH35439 + idx = Index(["aaaaaaaaa", "b"]) + expected = ["aaaaaaaaa", "b"] + assert idx.format() == expected def test_format_bug(self): # GH 14626 diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index a7c5734ef9b02..bfcac5d433d2c 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -21,6 +21,13 @@ def test_can_hold_identifiers(self): key = idx[0] assert idx._can_hold_identifiers_and_holds_name(key) is False + def test_format(self): + # GH35439 + idx = self.create_index() + max_width = max(len(str(x)) for x in idx) + expected = [str(x).ljust(max_width) for x in idx] + assert idx.format() == expected + def test_numeric_compat(self): pass # override Base method diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index e236b3da73c69..84805d06df4a8 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2141,6 +2141,15 @@ def test_dict_entries(self): assert "'a': 1" in val assert "'b': 2" in val + def test_categorical_columns(self): + # GH35439 + data = [[4, 2], [3, 2], [4, 3]] + cols = ["aaaaaaaaa", "b"] + df = pd.DataFrame(data, columns=cols) + df_cat_cols = pd.DataFrame(data, columns=pd.CategoricalIndex(cols)) + + assert df.to_string() == df_cat_cols.to_string() + def test_period(self): # GH 12615 df = pd.DataFrame(