Skip to content

Fix repr of DataFrame with IntervalIndex #24134

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1015,10 +1015,11 @@ def _format_with_header(self, header, **kwargs):

def _format_native_types(self, na_rep='', quoting=None, **kwargs):
""" actually format my specific types """
from pandas.io.formats.format import IntervalArrayFormatter
return IntervalArrayFormatter(values=self,
na_rep=na_rep,
justify='all').get_result()
from pandas.io.formats.format import ExtensionArrayFormatter
return ExtensionArrayFormatter(values=self,
na_rep=na_rep,
justify='all',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need this new arg? just change the output tests, which are incorrect

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In 0.23.4, we didn't have the leading space for indexes.

In [2]: df = pd.Series(1, index=pd.IntervalIndex.from_breaks([1, 2, 3, 4])).to_frame()

In [3]: df
Out[3]:
        0
(1, 2]  1
(2, 3]  1
(3, 4]  1

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wasn’t this the same issue you recently adjusted for DTi? this keywords just promote inconsistency

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No I don't think so.

AFAICT, this is happing because IntervalIndex and Series[Interval] are now both using GenericArrayFormatter to format the values. Series need a leading space, but indexes don't. So I think things should be more consistent. If you want I can remove the keyword and go back to the old implementation which just did the formatting on its own, but I suspect you don't want that :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok i guess

leading_space=False).get_result()

def _format_data(self, name=None):

Expand Down
50 changes: 44 additions & 6 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,34 @@ def _get_column_name_list(self):


def format_array(values, formatter, float_format=None, na_rep='NaN',
digits=None, space=None, justify='right', decimal='.'):
digits=None, space=None, justify='right', decimal='.',
leading_space=None):
"""
Format an array for printing.

Parameters
----------
values
formatter
float_format
na_rep
digits
space
justify
decimal
leading_space : bool, optional
Whether the array should be formatted with a leading space.
When an array as a column of a Series or DataFrame, we do want
the leading space to pad between columns.

When formatting an Index subclass
(e.g. IntervalIndex._format_native_types), we don't want the
leading space since it should be left-aligned.

Returns
-------
List[str]
"""

if is_datetime64_dtype(values.dtype):
fmt_klass = Datetime64Formatter
Expand Down Expand Up @@ -868,7 +895,8 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',

fmt_obj = fmt_klass(values, digits=digits, na_rep=na_rep,
float_format=float_format, formatter=formatter,
space=space, justify=justify, decimal=decimal)
space=space, justify=justify, decimal=decimal,
leading_space=leading_space)

return fmt_obj.get_result()

Expand All @@ -877,7 +905,7 @@ class GenericArrayFormatter(object):

def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
space=12, float_format=None, justify='right', decimal='.',
quoting=None, fixed_width=True):
quoting=None, fixed_width=True, leading_space=None):
self.values = values
self.digits = digits
self.na_rep = na_rep
Expand All @@ -888,6 +916,7 @@ def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
self.decimal = decimal
self.quoting = quoting
self.fixed_width = fixed_width
self.leading_space = leading_space

def get_result(self):
fmt_values = self._format_strings()
Expand Down Expand Up @@ -927,7 +956,9 @@ def _format(x):
vals = vals.values

is_float_type = lib.map_infer(vals, is_float) & notna(vals)
leading_space = is_float_type.any()
leading_space = self.leading_space
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you document when this is set.

if leading_space is None:
leading_space = is_float_type.any()

fmt_values = []
for i, v in enumerate(vals):
Expand All @@ -936,7 +967,13 @@ def _format(x):
elif is_float_type[i]:
fmt_values.append(float_format(v))
else:
fmt_values.append(u' {v}'.format(v=_format(v)))
if leading_space is False:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is so hacky, we should not be doing this

# False specifically, so that the default is
# to include a space if we get here.
tpl = u'{v}'
else:
tpl = u' {v}'
fmt_values.append(tpl.format(v=_format(v)))

return fmt_values

Expand Down Expand Up @@ -1135,7 +1172,8 @@ def _format_strings(self):
formatter,
float_format=self.float_format,
na_rep=self.na_rep, digits=self.digits,
space=self.space, justify=self.justify)
space=self.space, justify=self.justify,
leading_space=self.leading_space)
return fmt_values


Expand Down
16 changes: 15 additions & 1 deletion pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,21 @@ def test_repr_max_seq_item_setting(self):
def test_repr_roundtrip(self):
super(TestIntervalIndex, self).test_repr_roundtrip()

# TODO: check this behavior is consistent with test_interval_new.py
def test_frame_repr(self):
# https://github.com/pandas-dev/pandas/pull/24134/files
df = pd.DataFrame({'A': [1, 2, 3, 4]},
index=pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4]))
result = repr(df)
expected = (
' A\n'
'(0, 1] 1\n'
'(1, 2] 2\n'
'(2, 3] 3\n'
'(3, 4] 4'
)
assert result == expected

# TODO: check this behavior is consistent with test_interval_new.py
def test_get_item(self, closed):
i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan),
closed=closed)
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexes/period/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@ def test_to_native_types():


class TestPeriodIndexRendering(object):

def test_frame_repr(self):
df = pd.DataFrame({"A": [1, 2, 3]},
index=pd.date_range('2000', periods=3))
result = repr(df)
expected = (
' A\n'
'2000-01-01 1\n'
'2000-01-02 2\n'
'2000-01-03 3')
assert result == expected

@pytest.mark.parametrize('method', ['__repr__', '__unicode__', '__str__'])
def test_representation(self, method):
# GH#7601
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,13 @@ def test_equals_categoridcal_unordered(self):
assert not a.equals(c)
assert not b.equals(c)

def test_frame_repr(self):
df = pd.DataFrame({"A": [1, 2, 3]},
index=pd.CategoricalIndex(['a', 'b', 'c']))
result = repr(df)
expected = ' A\na 1\nb 2\nc 3'
assert result == expected

def test_string_categorical_index_repr(self):
# short
idx = pd.CategoricalIndex(['a', 'bb', 'ccc'])
Expand Down