Skip to content

Commit b2eec25

Browse files
authored
CLN: move common printing utilties to pandas.io.formats.printing (pandas-dev#21234)
1 parent b64e9d5 commit b2eec25

File tree

2 files changed

+160
-117
lines changed

2 files changed

+160
-117
lines changed

pandas/core/indexes/base.py

+6-117
Original file line numberDiff line numberDiff line change
@@ -57,17 +57,11 @@
5757
import pandas.core.missing as missing
5858
import pandas.core.algorithms as algos
5959
import pandas.core.sorting as sorting
60-
from pandas.io.formats.printing import pprint_thing
60+
from pandas.io.formats.printing import (
61+
pprint_thing, default_pprint, format_object_summary, format_object_attrs)
6162
from pandas.core.ops import make_invalid_op
62-
from pandas.core.config import get_option
6363
from pandas.core.strings import StringMethods
6464

65-
66-
# simplify
67-
default_pprint = lambda x, max_seq_items=None: \
68-
pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True,
69-
max_seq_items=max_seq_items)
70-
7165
__all__ = ['Index']
7266

7367
_unsortable_types = frozenset(('mixed', 'mixed-integer'))
@@ -1034,133 +1028,28 @@ def _format_space(self):
10341028
@property
10351029
def _formatter_func(self):
10361030
"""
1037-
Return the formatted data as a unicode string
1031+
Return the formatter function
10381032
"""
10391033
return default_pprint
10401034

10411035
def _format_data(self, name=None):
10421036
"""
10431037
Return the formatted data as a unicode string
10441038
"""
1045-
from pandas.io.formats.console import get_console_size
1046-
from pandas.io.formats.format import _get_adjustment
1047-
display_width, _ = get_console_size()
1048-
if display_width is None:
1049-
display_width = get_option('display.width') or 80
1050-
if name is None:
1051-
name = self.__class__.__name__
1052-
1053-
space1 = "\n%s" % (' ' * (len(name) + 1))
1054-
space2 = "\n%s" % (' ' * (len(name) + 2))
1055-
1056-
n = len(self)
1057-
sep = ','
1058-
max_seq_items = get_option('display.max_seq_items') or n
1059-
formatter = self._formatter_func
10601039

10611040
# do we want to justify (only do so for non-objects)
10621041
is_justify = not (self.inferred_type in ('string', 'unicode') or
10631042
(self.inferred_type == 'categorical' and
10641043
is_object_dtype(self.categories)))
10651044

1066-
# are we a truncated display
1067-
is_truncated = n > max_seq_items
1068-
1069-
# adj can optionally handle unicode eastern asian width
1070-
adj = _get_adjustment()
1071-
1072-
def _extend_line(s, line, value, display_width, next_line_prefix):
1073-
1074-
if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >=
1075-
display_width):
1076-
s += line.rstrip()
1077-
line = next_line_prefix
1078-
line += value
1079-
return s, line
1080-
1081-
def best_len(values):
1082-
if values:
1083-
return max(adj.len(x) for x in values)
1084-
else:
1085-
return 0
1086-
1087-
if n == 0:
1088-
summary = '[], '
1089-
elif n == 1:
1090-
first = formatter(self[0])
1091-
summary = '[%s], ' % first
1092-
elif n == 2:
1093-
first = formatter(self[0])
1094-
last = formatter(self[-1])
1095-
summary = '[%s, %s], ' % (first, last)
1096-
else:
1097-
1098-
if n > max_seq_items:
1099-
n = min(max_seq_items // 2, 10)
1100-
head = [formatter(x) for x in self[:n]]
1101-
tail = [formatter(x) for x in self[-n:]]
1102-
else:
1103-
head = []
1104-
tail = [formatter(x) for x in self]
1105-
1106-
# adjust all values to max length if needed
1107-
if is_justify:
1108-
1109-
# however, if we are not truncated and we are only a single
1110-
# line, then don't justify
1111-
if (is_truncated or
1112-
not (len(', '.join(head)) < display_width and
1113-
len(', '.join(tail)) < display_width)):
1114-
max_len = max(best_len(head), best_len(tail))
1115-
head = [x.rjust(max_len) for x in head]
1116-
tail = [x.rjust(max_len) for x in tail]
1117-
1118-
summary = ""
1119-
line = space2
1120-
1121-
for i in range(len(head)):
1122-
word = head[i] + sep + ' '
1123-
summary, line = _extend_line(summary, line, word,
1124-
display_width, space2)
1125-
1126-
if is_truncated:
1127-
# remove trailing space of last line
1128-
summary += line.rstrip() + space2 + '...'
1129-
line = space2
1130-
1131-
for i in range(len(tail) - 1):
1132-
word = tail[i] + sep + ' '
1133-
summary, line = _extend_line(summary, line, word,
1134-
display_width, space2)
1135-
1136-
# last value: no sep added + 1 space of width used for trailing ','
1137-
summary, line = _extend_line(summary, line, tail[-1],
1138-
display_width - 2, space2)
1139-
summary += line
1140-
summary += '],'
1141-
1142-
if len(summary) > (display_width):
1143-
summary += space1
1144-
else: # one row
1145-
summary += ' '
1146-
1147-
# remove initial space
1148-
summary = '[' + summary[len(space2):]
1149-
1150-
return summary
1045+
return format_object_summary(self, self._formatter_func,
1046+
is_justify=is_justify, name=name)
11511047

11521048
def _format_attrs(self):
11531049
"""
11541050
Return a list of tuples of the (attr,formatted_value)
11551051
"""
1156-
attrs = []
1157-
attrs.append(('dtype', "'%s'" % self.dtype))
1158-
if self.name is not None:
1159-
attrs.append(('name', default_pprint(self.name)))
1160-
max_seq_items = get_option('display.max_seq_items') or len(self)
1161-
if len(self) > max_seq_items:
1162-
attrs.append(('length', len(self)))
1163-
return attrs
1052+
return format_object_attrs(self)
11641053

11651054
def to_series(self, index=None, name=None):
11661055
"""

pandas/io/formats/printing.py

+154
Original file line numberDiff line numberDiff line change
@@ -261,3 +261,157 @@ class TableSchemaFormatter(BaseFormatter):
261261
# unregister tableschema mime-type
262262
if mimetype in formatters:
263263
formatters[mimetype].enabled = False
264+
265+
266+
default_pprint = lambda x, max_seq_items=None: \
267+
pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True,
268+
max_seq_items=max_seq_items)
269+
270+
271+
def format_object_summary(obj, formatter, is_justify=True, name=None):
272+
"""
273+
Return the formatted obj as a unicode string
274+
275+
Parameters
276+
----------
277+
obj : object
278+
must be iterable and support __getitem__
279+
formatter : callable
280+
string formatter for an element
281+
is_justify : boolean
282+
should justify the display
283+
name : name, optiona
284+
defaults to the class name of the obj
285+
286+
Returns
287+
-------
288+
summary string
289+
290+
"""
291+
from pandas.io.formats.console import get_console_size
292+
from pandas.io.formats.format import _get_adjustment
293+
294+
display_width, _ = get_console_size()
295+
if display_width is None:
296+
display_width = get_option('display.width') or 80
297+
if name is None:
298+
name = obj.__class__.__name__
299+
300+
space1 = "\n%s" % (' ' * (len(name) + 1))
301+
space2 = "\n%s" % (' ' * (len(name) + 2))
302+
303+
n = len(obj)
304+
sep = ','
305+
max_seq_items = get_option('display.max_seq_items') or n
306+
307+
# are we a truncated display
308+
is_truncated = n > max_seq_items
309+
310+
# adj can optionally handle unicode eastern asian width
311+
adj = _get_adjustment()
312+
313+
def _extend_line(s, line, value, display_width, next_line_prefix):
314+
315+
if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >=
316+
display_width):
317+
s += line.rstrip()
318+
line = next_line_prefix
319+
line += value
320+
return s, line
321+
322+
def best_len(values):
323+
if values:
324+
return max(adj.len(x) for x in values)
325+
else:
326+
return 0
327+
328+
if n == 0:
329+
summary = '[], '
330+
elif n == 1:
331+
first = formatter(obj[0])
332+
summary = '[%s], ' % first
333+
elif n == 2:
334+
first = formatter(obj[0])
335+
last = formatter(obj[-1])
336+
summary = '[%s, %s], ' % (first, last)
337+
else:
338+
339+
if n > max_seq_items:
340+
n = min(max_seq_items // 2, 10)
341+
head = [formatter(x) for x in obj[:n]]
342+
tail = [formatter(x) for x in obj[-n:]]
343+
else:
344+
head = []
345+
tail = [formatter(x) for x in obj]
346+
347+
# adjust all values to max length if needed
348+
if is_justify:
349+
350+
# however, if we are not truncated and we are only a single
351+
# line, then don't justify
352+
if (is_truncated or
353+
not (len(', '.join(head)) < display_width and
354+
len(', '.join(tail)) < display_width)):
355+
max_len = max(best_len(head), best_len(tail))
356+
head = [x.rjust(max_len) for x in head]
357+
tail = [x.rjust(max_len) for x in tail]
358+
359+
summary = ""
360+
line = space2
361+
362+
for i in range(len(head)):
363+
word = head[i] + sep + ' '
364+
summary, line = _extend_line(summary, line, word,
365+
display_width, space2)
366+
367+
if is_truncated:
368+
# remove trailing space of last line
369+
summary += line.rstrip() + space2 + '...'
370+
line = space2
371+
372+
for i in range(len(tail) - 1):
373+
word = tail[i] + sep + ' '
374+
summary, line = _extend_line(summary, line, word,
375+
display_width, space2)
376+
377+
# last value: no sep added + 1 space of width used for trailing ','
378+
summary, line = _extend_line(summary, line, tail[-1],
379+
display_width - 2, space2)
380+
summary += line
381+
summary += '],'
382+
383+
if len(summary) > (display_width):
384+
summary += space1
385+
else: # one row
386+
summary += ' '
387+
388+
# remove initial space
389+
summary = '[' + summary[len(space2):]
390+
391+
return summary
392+
393+
394+
def format_object_attrs(obj):
395+
"""
396+
Return a list of tuples of the (attr, formatted_value)
397+
for common attrs, including dtype, name, length
398+
399+
Parameters
400+
----------
401+
obj : object
402+
must be iterable
403+
404+
Returns
405+
-------
406+
list
407+
408+
"""
409+
attrs = []
410+
if hasattr(obj, 'dtype'):
411+
attrs.append(('dtype', "'{}'".format(obj.dtype)))
412+
if getattr(obj, 'name', None) is not None:
413+
attrs.append(('name', default_pprint(obj.name)))
414+
max_seq_items = get_option('display.max_seq_items') or len(obj)
415+
if len(obj) > max_seq_items:
416+
attrs.append(('length', len(obj)))
417+
return attrs

0 commit comments

Comments
 (0)