Skip to content

ENH: Removing unnecessary whitespace when formatting to a HTML table. #5012

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 39 additions & 28 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
-------
formatted : string (or unicode, depending on data and options)"""


class CategoricalFormatter(object):
def __init__(self, categorical, buf=None, length=True,
na_rep='NaN', name=False, footer=True):
Expand Down Expand Up @@ -140,7 +141,7 @@ def __init__(self, series, buf=None, header=True, length=True,
if float_format is None:
float_format = get_option("display.float_format")
self.float_format = float_format
self.dtype = dtype
self.dtype = dtype

def _get_footer(self):
footer = u('')
Expand All @@ -163,7 +164,7 @@ def _get_footer(self):
footer += 'Length: %d' % len(self.series)

if self.dtype:
if getattr(self.series.dtype,'name',None):
if getattr(self.series.dtype, 'name', None):
if footer:
footer += ', '
footer += 'dtype: %s' % com.pprint_thing(self.series.dtype.name)
Expand Down Expand Up @@ -213,6 +214,7 @@ def to_string(self):

return compat.text_type(u('\n').join(result))


def _strlen_func():
if compat.PY3: # pragma: no cover
_strlen = len
Expand Down Expand Up @@ -304,32 +306,31 @@ def _to_str_columns(self):

for i, c in enumerate(self.columns):
if self.header:
fmt_values = self._format_col(i)
cheader = str_columns[i]

max_colwidth = max(self.col_space or 0,
*(_strlen(x) for x in cheader))

fmt_values = _make_fixed_width(fmt_values, self.justify,
minimum=max_colwidth)
fmt_values = self._format_col(i, justify=self.justify,
minimum=max_colwidth)

max_len = max(np.max([_strlen(x) for x in fmt_values]),
max_colwidth)
if self.justify == 'left':
cheader = [x.ljust(max_len) for x in cheader]
else:
elif self.justify == 'right':
cheader = [x.rjust(max_len) for x in cheader]
elif self.justify == 'center':
cheader = [x.center(max_len) for x in cheader]
else:
cheader = [x.strip() for x in cheader]

stringified.append(cheader + fmt_values)
else:
stringified = [_make_fixed_width(self._format_col(i),
self.justify)
stringified = [self._format_col(i, justify=self.justify)
for i, c in enumerate(self.columns)]

strcols = stringified
if self.index:
strcols.insert(0, str_index)

return strcols

def to_string(self, force_unicode=None):
Expand Down Expand Up @@ -450,12 +451,14 @@ def write(buf, frame, column_format, strcols):
raise TypeError('buf is not a file name and it has no write '
'method')

def _format_col(self, i):
def _format_col(self, i, justify='right', minimum=None):
formatter = self._get_formatter(i)
return format_array(self.frame.icol(i).get_values(), formatter,
float_format=self.float_format,
na_rep=self.na_rep,
space=self.col_space)
space=self.col_space,
justify=justify,
minimum=minimum)

def to_html(self, classes=None):
"""
Expand Down Expand Up @@ -735,7 +738,7 @@ def _write_body(self, indent):

fmt_values = {}
for i in range(len(self.columns)):
fmt_values[i] = self.fmt._format_col(i)
fmt_values[i] = self.fmt._format_col(i, justify=None)

# write values
if self.fmt.index:
Expand Down Expand Up @@ -1485,7 +1488,7 @@ def get_formatted_cells(self):


def format_array(values, formatter, float_format=None, na_rep='NaN',
digits=None, space=None, justify='right'):
digits=None, space=None, justify='right', minimum=None):
if com.is_float_dtype(values.dtype):
fmt_klass = FloatArrayFormatter
elif com.is_integer_dtype(values.dtype):
Expand All @@ -1509,26 +1512,27 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
fmt_obj = fmt_klass(values, digits, na_rep=na_rep,
float_format=float_format,
formatter=formatter, space=space,
justify=justify)
justify=justify, minimum=minimum)

return fmt_obj.get_result()


class GenericArrayFormatter(object):

def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
space=12, float_format=None, justify='right'):
space=12, float_format=None, justify='right', minimum=None):
self.values = values
self.digits = digits
self.na_rep = na_rep
self.space = space
self.formatter = formatter
self.float_format = float_format
self.justify = justify
self.minimum = minimum

def get_result(self):
fmt_values = self._format_strings()
return _make_fixed_width(fmt_values, self.justify)
return _make_fixed_width(fmt_values, self.justify, self.minimum)

def _format_strings(self):
if self.float_format is None:
Expand Down Expand Up @@ -1584,19 +1588,19 @@ def __init__(self, *args, **kwargs):
def _format_with(self, fmt_str):
def _val(x, threshold):
if notnull(x):
if threshold is None or abs(x) > get_option("display.chop_threshold"):
if threshold is None or abs(x) > get_option("display.chop_threshold"):
return fmt_str % x
else:
if fmt_str.endswith("e"): # engineering format
return "0"
if fmt_str.endswith("e"): # engineering format
return "0"
else:
return fmt_str % 0
return fmt_str % 0
else:

return self.na_rep

threshold = get_option("display.chop_threshold")
fmt_values = [ _val(x, threshold) for x in self.values]
fmt_values = [_val(x, threshold) for x in self.values]
return _trim_zeros(fmt_values, self.na_rep)

def get_result(self):
Expand Down Expand Up @@ -1627,7 +1631,7 @@ def get_result(self):
fmt_str = '%% .%de' % (self.digits - 1)
fmt_values = self._format_with(fmt_str)

return _make_fixed_width(fmt_values, self.justify)
return _make_fixed_width(fmt_values, self.justify, self.minimum)


class IntArrayFormatter(GenericArrayFormatter):
Expand All @@ -1640,7 +1644,7 @@ def get_result(self):

fmt_values = [formatter(x) for x in self.values]

return _make_fixed_width(fmt_values, self.justify)
return _make_fixed_width(fmt_values, self.justify, self.minimum)


class Datetime64Formatter(GenericArrayFormatter):
Expand All @@ -1652,7 +1656,8 @@ def get_result(self):
formatter = _format_datetime64

fmt_values = [formatter(x) for x in self.values]
return _make_fixed_width(fmt_values, self.justify)
return _make_fixed_width(fmt_values, self.justify, self.minimum)


def _format_datetime64(x, tz=None):
if isnull(x):
Expand All @@ -1672,14 +1677,16 @@ def get_result(self):
formatter = _format_timedelta64

fmt_values = [formatter(x) for x in self.values]
return _make_fixed_width(fmt_values, self.justify)
return _make_fixed_width(fmt_values, self.justify, self.minimum)


def _format_timedelta64(x):
if isnull(x):
return 'NaT'

return lib.repr_timedelta64(x)


def _make_fixed_width(strings, justify='right', minimum=None):
if len(strings) == 0:
return strings
Expand All @@ -1697,8 +1704,12 @@ def _make_fixed_width(strings, justify='right', minimum=None):

if justify == 'left':
justfunc = lambda self, x: self.ljust(x)
else:
elif justify == 'right':
justfunc = lambda self, x: self.rjust(x)
elif justify == 'center':
justfunc = lambda self, x: self.center(x)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can these be of the form str.center, str.rjust etc?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, because it can also be a unicode.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry i meant compat.text_type.center ... or are we allowing str and unicode? if allowing both then disregard my suggestion

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's just unicode in python 2 and will throw an error if receives a str:

>>> pd.compat.text_type.center('test', 30)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: descriptor 'center' requires a 'unicode' object but received a 'str'

And as I said in the previous comment, the same thing applies for the other way round:

>>> pd.compat.binary_type.center(u'test', 30)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: descriptor 'center' requires a 'str' object but received a 'unicode'

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay forget it then :)

else:
justfunc = lambda self, _: self.strip()

def just(x):
eff_len = max_len
Expand Down
Loading