Skip to content

Commit 1ab59a8

Browse files
Backport PR #35440: BUG: CategoricalIndex.format (#35539)
Co-authored-by: Terji Petersen <[email protected]>
1 parent 69165d1 commit 1ab59a8

File tree

9 files changed

+53
-14
lines changed

9 files changed

+53
-14
lines changed

doc/source/whatsnew/v1.1.1.rst

+7
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@ Fixed regressions
2626
Bug fixes
2727
~~~~~~~~~
2828

29+
30+
Categorical
31+
^^^^^^^^^^^
32+
33+
- Bug in :meth:`CategoricalIndex.format` where, when stringified scalars had different lengths, the shorter string would be right-filled with spaces, so it had the same length as the longest string (:issue:`35439`)
34+
35+
2936
**Datetimelike**
3037

3138
-

pandas/core/indexes/category.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
pandas_dtype,
2121
)
2222
from pandas.core.dtypes.dtypes import CategoricalDtype
23-
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
23+
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
2424

2525
from pandas.core import accessor
2626
from pandas.core.algorithms import take_1d
@@ -348,12 +348,12 @@ def _format_attrs(self):
348348
return attrs
349349

350350
def _format_with_header(self, header, na_rep="NaN") -> List[str]:
351-
from pandas.io.formats.format import format_array
351+
from pandas.io.formats.printing import pprint_thing
352352

353-
formatted_values = format_array(
354-
self._values, formatter=None, na_rep=na_rep, justify="left"
355-
)
356-
result = ibase.trim_front(formatted_values)
353+
result = [
354+
pprint_thing(x, escape_chars=("\t", "\r", "\n")) if notna(x) else na_rep
355+
for x in self._values
356+
]
357357
return header + result
358358

359359
# --------------------------------------------------------------------

pandas/core/indexes/range.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import timedelta
22
import operator
33
from sys import getsizeof
4-
from typing import Any, List, Optional
4+
from typing import Any, Optional
55
import warnings
66

77
import numpy as np
@@ -33,8 +33,6 @@
3333
from pandas.core.indexes.numeric import Int64Index
3434
from pandas.core.ops.common import unpack_zerodim_and_defer
3535

36-
from pandas.io.formats.printing import pprint_thing
37-
3836
_empty_range = range(0)
3937

4038

@@ -197,9 +195,6 @@ def _format_data(self, name=None):
197195
# we are formatting thru the attributes
198196
return None
199197

200-
def _format_with_header(self, header, na_rep="NaN") -> List[str]:
201-
return header + [pprint_thing(x) for x in self._range]
202-
203198
# --------------------------------------------------------------------
204199
_deprecation_message = (
205200
"RangeIndex.{} is deprecated and will be "

pandas/tests/indexes/categorical/test_category.py

+6
Original file line numberDiff line numberDiff line change
@@ -478,3 +478,9 @@ def test_reindex_base(self):
478478
def test_map_str(self):
479479
# See test_map.py
480480
pass
481+
482+
def test_format_different_scalar_lengths(self):
483+
# GH35439
484+
idx = CategoricalIndex(["aaaaaaaaa", "b"])
485+
expected = ["aaaaaaaaa", "b"]
486+
assert idx.format() == expected

pandas/tests/indexes/common.py

+6
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,12 @@ def test_equals_op(self):
642642
tm.assert_numpy_array_equal(index_a == item, expected3)
643643
tm.assert_series_equal(series_a == item, Series(expected3))
644644

645+
def test_format(self):
646+
# GH35439
647+
idx = self.create_index()
648+
expected = [str(x) for x in idx]
649+
assert idx.format() == expected
650+
645651
def test_hasnans_isnans(self, index):
646652
# GH 11343, added tests for hasnans / isnans
647653
if isinstance(index, MultiIndex):

pandas/tests/indexes/datetimes/test_datetimelike.py

+6
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ def index(self, request):
2020
def create_index(self) -> DatetimeIndex:
2121
return date_range("20130101", periods=5)
2222

23+
def test_format(self):
24+
# GH35439
25+
idx = self.create_index()
26+
expected = [f"{x:%Y-%m-%d}" for x in idx]
27+
assert idx.format() == expected
28+
2329
def test_shift(self):
2430
pass # handled in test_ops
2531

pandas/tests/indexes/test_base.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1171,8 +1171,11 @@ def test_summary_bug(self):
11711171
assert "~:{range}:0" in result
11721172
assert "{other}%s" in result
11731173

1174-
def test_format(self, index):
1175-
self._check_method_works(Index.format, index)
1174+
def test_format_different_scalar_lengths(self):
1175+
# GH35439
1176+
idx = Index(["aaaaaaaaa", "b"])
1177+
expected = ["aaaaaaaaa", "b"]
1178+
assert idx.format() == expected
11761179

11771180
def test_format_bug(self):
11781181
# GH 14626

pandas/tests/indexes/test_numeric.py

+7
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@ def test_can_hold_identifiers(self):
2121
key = idx[0]
2222
assert idx._can_hold_identifiers_and_holds_name(key) is False
2323

24+
def test_format(self):
25+
# GH35439
26+
idx = self.create_index()
27+
max_width = max(len(str(x)) for x in idx)
28+
expected = [str(x).ljust(max_width) for x in idx]
29+
assert idx.format() == expected
30+
2431
def test_numeric_compat(self):
2532
pass # override Base method
2633

pandas/tests/io/formats/test_format.py

+9
Original file line numberDiff line numberDiff line change
@@ -2141,6 +2141,15 @@ def test_dict_entries(self):
21412141
assert "'a': 1" in val
21422142
assert "'b': 2" in val
21432143

2144+
def test_categorical_columns(self):
2145+
# GH35439
2146+
data = [[4, 2], [3, 2], [4, 3]]
2147+
cols = ["aaaaaaaaa", "b"]
2148+
df = pd.DataFrame(data, columns=cols)
2149+
df_cat_cols = pd.DataFrame(data, columns=pd.CategoricalIndex(cols))
2150+
2151+
assert df.to_string() == df_cat_cols.to_string()
2152+
21442153
def test_period(self):
21452154
# GH 12615
21462155
df = pd.DataFrame(

0 commit comments

Comments
 (0)