Skip to content

BUG: CategoricalIndex.format #35440

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/source/whatsnew/v1.1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ Fixed regressions
Bug fixes
~~~~~~~~~


Categorical
^^^^^^^^^^^

- Bug in :meth:`CategoricalIndex.format` where, when stringified scalars had different lengths, the shorter string would be right-filled with spaces, so it had the same length as the longest string (:issue:`35439`)


**Datetimelike**

-
Expand Down
12 changes: 6 additions & 6 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
pandas_dtype,
)
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna

from pandas.core import accessor
from pandas.core.algorithms import take_1d
Expand Down Expand Up @@ -348,12 +348,12 @@ def _format_attrs(self):
return attrs

def _format_with_header(self, header, na_rep="NaN") -> List[str]:
from pandas.io.formats.format import format_array
from pandas.io.formats.printing import pprint_thing

formatted_values = format_array(
self._values, formatter=None, na_rep=na_rep, justify="left"
)
result = ibase.trim_front(formatted_values)
result = [
pprint_thing(x, escape_chars=("\t", "\r", "\n")) if notna(x) else na_rep
for x in self._values
]
return header + result

# --------------------------------------------------------------------
Expand Down
7 changes: 1 addition & 6 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import timedelta
import operator
from sys import getsizeof
from typing import Any, List, Optional
from typing import Any, Optional
import warnings

import numpy as np
Expand Down Expand Up @@ -33,8 +33,6 @@
from pandas.core.indexes.numeric import Int64Index
from pandas.core.ops.common import unpack_zerodim_and_defer

from pandas.io.formats.printing import pprint_thing

_empty_range = range(0)


Expand Down Expand Up @@ -197,9 +195,6 @@ def _format_data(self, name=None):
# we are formatting thru the attributes
return None

def _format_with_header(self, header, na_rep="NaN") -> List[str]:
return header + [pprint_thing(x) for x in self._range]

Copy link
Contributor Author

@topper-123 topper-123 Aug 2, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The added tests revealed that this method in master made the output from RangeIndex.format different than for Int64Index.format:

>>> pd.RangeIndex(0, 18, 2).format()
['0', '2', '4', '6', '8', '10', '12', '14', '16']
>>> pd.Int64Index(range(0, 18, 2)).format()
 ['0 ', '2 ', '4 ', '6 ', '8 ', '10', '12', '14', '16']

Notice the extra space for one-digit scalars in the Int64Index case. The outputs from the two methods are identical after merging this PR.

# --------------------------------------------------------------------
_deprecation_message = (
"RangeIndex.{} is deprecated and will be "
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/indexes/categorical/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,3 +478,9 @@ def test_reindex_base(self):
def test_map_str(self):
# See test_map.py
pass

def test_format_different_scalar_lengths(self):
# GH35439
idx = CategoricalIndex(["aaaaaaaaa", "b"])
expected = ["aaaaaaaaa", "b"]
assert idx.format() == expected
6 changes: 6 additions & 0 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,12 @@ def test_equals_op(self):
tm.assert_numpy_array_equal(index_a == item, expected3)
tm.assert_series_equal(series_a == item, Series(expected3))

def test_format(self):
# GH35439
idx = self.create_index()
expected = [str(x) for x in idx]
assert idx.format() == expected

def test_hasnans_isnans(self, index):
# GH 11343, added tests for hasnans / isnans
if isinstance(index, MultiIndex):
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/indexes/datetimes/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ def index(self, request):
def create_index(self) -> DatetimeIndex:
return date_range("20130101", periods=5)

def test_format(self):
# GH35439
idx = self.create_index()
expected = [f"{x:%Y-%m-%d}" for x in idx]
assert idx.format() == expected

def test_shift(self):
pass # handled in test_ops

Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1171,8 +1171,11 @@ def test_summary_bug(self):
assert "~:{range}:0" in result
assert "{other}%s" in result

def test_format(self, index):
self._check_method_works(Index.format, index)
def test_format_different_scalar_lengths(self):
# GH35439
idx = Index(["aaaaaaaaa", "b"])
expected = ["aaaaaaaaa", "b"]
assert idx.format() == expected

def test_format_bug(self):
# GH 14626
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ def test_can_hold_identifiers(self):
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is False

def test_format(self):
# GH35439
idx = self.create_index()
max_width = max(len(str(x)) for x in idx)
expected = [str(x).ljust(max_width) for x in idx]
assert idx.format() == expected

def test_numeric_compat(self):
pass # override Base method

Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -2141,6 +2141,15 @@ def test_dict_entries(self):
assert "'a': 1" in val
assert "'b': 2" in val

def test_categorical_columns(self):
# GH35439
data = [[4, 2], [3, 2], [4, 3]]
cols = ["aaaaaaaaa", "b"]
df = pd.DataFrame(data, columns=cols)
df_cat_cols = pd.DataFrame(data, columns=pd.CategoricalIndex(cols))

assert df.to_string() == df_cat_cols.to_string()

def test_period(self):
# GH 12615
df = pd.DataFrame(
Expand Down