Skip to content

to_html formatter not called for float values in a mixed-type column (2) #26000

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
71e8b31
to_html formatter not called for float values in a mixed-type column
simonjayhawkins Apr 3, 2019
2a2bb57
changes to test as requested
simonjayhawkins Apr 4, 2019
1e5615b
Merge remote-tracking branch 'upstream/master' into GenericArrayForma…
simonjayhawkins Apr 4, 2019
4ca48a1
Merge branch 'master' into GenericArrayFormatter
simonjayhawkins Apr 4, 2019
4ef3149
Merge remote-tracking branch 'upstream/master' into GenericArrayForma…
simonjayhawkins Apr 5, 2019
d7a8510
Merge remote-tracking branch 'upstream/master' into GenericArrayForma…
simonjayhawkins Apr 5, 2019
4b60e4b
shortcut format_array
simonjayhawkins Apr 5, 2019
5ac441b
Merge remote-tracking branch 'upstream/master' into GenericArrayForma…
simonjayhawkins Apr 5, 2019
8a64459
add shortcut parameter to format_array
simonjayhawkins Apr 5, 2019
9c1354c
add whatsnew for #26002
simonjayhawkins Apr 5, 2019
d0df1d6
remove shortcut parameter from format_array
simonjayhawkins Apr 6, 2019
c74b0aa
Merge remote-tracking branch 'upstream/master' into GenericArrayForma…
simonjayhawkins Apr 6, 2019
4262113
remove whatsnew for #26002
simonjayhawkins Apr 6, 2019
f0cf9b7
defer to GenericArrayFormatter for IntervalArray
simonjayhawkins Apr 7, 2019
d6bee41
Merge remote-tracking branch 'upstream/master' into GenericArrayForma…
simonjayhawkins Jun 13, 2019
1c535a1
Merge remote-tracking branch 'upstream/master' into GenericArrayForma…
simonjayhawkins Jun 18, 2019
5ecf91a
pre-format instead of shortcut
simonjayhawkins Jun 18, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,8 @@ I/O
^^^

- Bug in :func:`DataFrame.to_html()` where values were truncated using display options instead of outputting the full content (:issue:`17004`)
- Bug in :meth:`DataFrame.to_html` that would ignore ``formatters`` argument for float values in a column with ``dtype=object`` (:issue:`13021`)
- Bug in :meth:`DataFrame.to_string` and :meth:`DataFrame.to_latex` resulting in incorrect column spacing using ``formatters`` on a column with ``dtype=object`` (:issue:`26002`)
- Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
- Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
- Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`)
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1973,11 +1973,11 @@ def _repr_categories(self):
from pandas.io.formats import format as fmt
if len(self.categories) > max_categories:
num = max_categories // 2
head = fmt.format_array(self.categories[:num], None)
tail = fmt.format_array(self.categories[-num:], None)
head = fmt.format_array(self.categories[:num])
tail = fmt.format_array(self.categories[-num:])
category_strs = head + ["..."] + tail
else:
category_strs = fmt.format_array(self.categories, None)
category_strs = fmt.format_array(self.categories)

# Strip all leading spaces, which format_array adds for columns...
category_strs = [x.strip() for x in category_strs]
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def _from_factorized(cls, values, original):
def _formatter(self, boxed=False):
def fmt(x):
if isna(x):
return 'NaN'
return np.nan if boxed else 'NaN'
return str(x)
return fmt

Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,10 @@ def value_counts(self, dropna=True):

# Formatting

def _formatter(self, boxed=False):
# Defer to GenericArrayFormatter's formatter.
return None

def _format_data(self):

# TODO: integrate with categorical and make generic
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1041,7 +1041,7 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs):
result = result.tolist()

else:
result = _trim_front(format_array(values, None, justify='left'))
result = _trim_front(format_array(values, justify='left'))
return header + result

def to_native_types(self, slicer=None, **kwargs):
Expand Down
50 changes: 24 additions & 26 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def _get_footer(self):
return str(footer)

def _get_formatted_values(self):
return format_array(self.categorical.get_values(), None,
return format_array(self.categorical.get_values(),
float_format=None, na_rep=self.na_rep)

def to_string(self):
Expand Down Expand Up @@ -249,7 +249,7 @@ def _get_formatted_index(self):

def _get_formatted_values(self):
values_to_format = self.tr_series._formatting_values()
return format_array(values_to_format, None,
return format_array(values_to_format,
float_format=self.float_format, na_rep=self.na_rep)

def to_string(self):
Expand Down Expand Up @@ -853,7 +853,7 @@ def _get_column_name_list(self):
# Array formatters


def format_array(values, formatter, float_format=None, na_rep='NaN',
def format_array(values, formatter=None, float_format=None, na_rep='NaN',
digits=None, space=None, justify='right', decimal='.',
leading_space=None):
"""
Expand Down Expand Up @@ -883,6 +883,17 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
List[str]
"""

if is_datetime64_dtype(values.dtype):
if not isinstance(values, DatetimeIndex):
values = DatetimeIndex(values)

# we apply an optional formatter function passed as a parameter before
# applying additional formatting parameters. This allows EA to control
# formatting and also honour additional formatting options.
# We specify dtype and dispatch to GenericArrayFormatter.
if formatter is not None and callable(formatter):
values = np.array([formatter(x) for x in values], dtype=object)

if is_datetime64_dtype(values.dtype):
fmt_klass = Datetime64Formatter
elif is_datetime64tz_dtype(values):
Expand All @@ -908,7 +919,7 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
digits = get_option("display.precision")

fmt_obj = fmt_klass(values, digits=digits, na_rep=na_rep,
float_format=float_format, formatter=formatter,
float_format=float_format,
space=space, justify=justify, decimal=decimal,
leading_space=leading_space)

Expand All @@ -917,14 +928,13 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',

class GenericArrayFormatter:

def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
def __init__(self, values, digits=7, na_rep='NaN',
space=12, float_format=None, justify='right', decimal='.',
quoting=None, fixed_width=True, leading_space=None):
self.values = values
self.digits = digits
self.na_rep = na_rep
self.space = space
self.formatter = formatter
self.float_format = float_format
self.justify = justify
self.decimal = decimal
Expand All @@ -946,9 +956,7 @@ def _format_strings(self):
else:
float_format = self.float_format

formatter = (
self.formatter if self.formatter is not None else
(lambda x: pprint_thing(x, escape_chars=('\t', '\r', '\n'))))
formatter = lambda x: pprint_thing(x, escape_chars=('\t', '\r', '\n'))

def _format(x):
if self.na_rep is not None and is_scalar(x) and isna(x):
Expand Down Expand Up @@ -1004,7 +1012,8 @@ class FloatArrayFormatter(GenericArrayFormatter):
"""

def __init__(self, *args, **kwargs):
GenericArrayFormatter.__init__(self, *args, **kwargs)
super().__init__(*args, **kwargs)
self.formatter = None

# float_format is expected to be a string
# formatter should be used to pass a function
Expand Down Expand Up @@ -1062,9 +1071,6 @@ def get_result_as_array(self):
the parameters given at initialisation, as a numpy array
"""

if self.formatter is not None:
return np.array([self.formatter(x) for x in self.values])

if self.fixed_width:
threshold = get_option("display.chop_threshold")
else:
Expand Down Expand Up @@ -1154,7 +1160,7 @@ def _format_strings(self):
class IntArrayFormatter(GenericArrayFormatter):

def _format_strings(self):
formatter = self.formatter or (lambda x: '{x: d}'.format(x=x))
formatter = lambda x: '{x: d}'.format(x=x)
fmt_values = [formatter(x) for x in self.values]
return fmt_values

Expand All @@ -1171,12 +1177,6 @@ def _format_strings(self):

values = self.values

if not isinstance(values, DatetimeIndex):
values = DatetimeIndex(values)

if self.formatter is not None and callable(self.formatter):
return [self.formatter(x) for x in values]

fmt_values = format_array_from_datetime(
values.asi8.ravel(),
format=_get_format_datetime64_from_values(values,
Expand Down Expand Up @@ -1337,9 +1337,8 @@ def _format_strings(self):

values = self.values.astype(object)
is_dates_only = _is_dates_only(values)
formatter = (self.formatter or
_get_format_datetime64(is_dates_only,
date_format=self.date_format))
formatter = _get_format_datetime64(is_dates_only,
date_format=self.date_format)
fmt_values = [formatter(x) for x in values]

return fmt_values
Expand All @@ -1353,9 +1352,8 @@ def __init__(self, values, nat_rep='NaT', box=False, **kwargs):
self.box = box

def _format_strings(self):
formatter = (self.formatter or
_get_format_timedelta64(self.values, nat_rep=self.nat_rep,
box=self.box))
formatter = _get_format_timedelta64(self.values, nat_rep=self.nat_rep,
box=self.box)
fmt_values = np.array([formatter(x) for x in self.values])
return fmt_values

Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/arrays/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,14 @@ def test_frame_repr(data_missing):
assert result == expected


def test_frame_to_string_na_rep(data_missing):

df = pd.DataFrame({'A': data_missing})
result = df.to_string(na_rep='foo')
expected = ' A\n0 foo\n1 1'
assert result == expected


def test_conversions(data_missing):

# astype to object series
Expand Down
26 changes: 26 additions & 0 deletions pandas/tests/io/formats/data/html/gh13021_expected_output.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>x</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>a</td>
</tr>
<tr>
<th>1</th>
<td>$0</td>
</tr>
<tr>
<th>2</th>
<td>$10</td>
</tr>
<tr>
<th>3</th>
<td>$3</td>
</tr>
</tbody>
</table>
38 changes: 18 additions & 20 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,10 +498,10 @@ def test_to_string_with_formatters(self):
('object', lambda x: '-{x!s}-'.format(x=x))]
result = df.to_string(formatters=dict(formatters))
result2 = df.to_string(formatters=list(zip(*formatters))[1])
assert result == (' int float object\n'
'0 0x1 [ 1.0] -(1, 2)-\n'
'1 0x2 [ 2.0] -True-\n'
'2 0x3 [ 3.0] -False-')
assert result == (' int float object\n'
'0 0x1 [ 1.0] -(1, 2)-\n'
'1 0x2 [ 2.0] -True-\n'
'2 0x3 [ 3.0] -False-')
assert result == result2

def test_to_string_with_datetime64_monthformatter(self):
Expand All @@ -511,7 +511,7 @@ def test_to_string_with_datetime64_monthformatter(self):
def format_func(x):
return x.strftime('%Y-%m')
result = x.to_string(formatters={'months': format_func})
expected = 'months\n0 2016-01\n1 2016-02'
expected = 'months\n0 2016-01\n1 2016-02'
assert result.strip() == expected

def test_to_string_with_datetime64_hourformatter(self):
Expand All @@ -523,7 +523,7 @@ def format_func(x):
return x.strftime('%H:%M')

result = x.to_string(formatters={'hod': format_func})
expected = 'hod\n0 10:10\n1 12:12'
expected = 'hod\n0 10:10\n1 12:12'
assert result.strip() == expected

def test_to_string_with_formatters_unicode(self):
Expand Down Expand Up @@ -2547,19 +2547,19 @@ class TestDatetime64Formatter:

def test_mixed(self):
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), pd.NaT])
result = fmt.Datetime64Formatter(x).get_result()
result = fmt.format_array(x)
assert result[0].strip() == "2013-01-01 00:00:00"
assert result[1].strip() == "2013-01-01 12:00:00"

def test_dates(self):
x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT])
result = fmt.Datetime64Formatter(x).get_result()
result = fmt.format_array(x)
assert result[0].strip() == "2013-01-01"
assert result[1].strip() == "2013-01-02"

def test_date_nanos(self):
x = Series([Timestamp(200)])
result = fmt.Datetime64Formatter(x).get_result()
result = fmt.format_array(x)
assert result[0].strip() == "1970-01-01 00:00:00.000000200"

def test_dates_display(self):
Expand All @@ -2568,35 +2568,35 @@ def test_dates_display(self):
# make sure that we are consistently display date formatting
x = Series(date_range('20130101 09:00:00', periods=5, freq='D'))
x.iloc[1] = np.nan
result = fmt.Datetime64Formatter(x).get_result()
result = fmt.format_array(x)
assert result[0].strip() == "2013-01-01 09:00:00"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-05 09:00:00"

x = Series(date_range('20130101 09:00:00', periods=5, freq='s'))
x.iloc[1] = np.nan
result = fmt.Datetime64Formatter(x).get_result()
result = fmt.format_array(x)
assert result[0].strip() == "2013-01-01 09:00:00"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:04"

x = Series(date_range('20130101 09:00:00', periods=5, freq='ms'))
x.iloc[1] = np.nan
result = fmt.Datetime64Formatter(x).get_result()
result = fmt.format_array(x)
assert result[0].strip() == "2013-01-01 09:00:00.000"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:00.004"

x = Series(date_range('20130101 09:00:00', periods=5, freq='us'))
x.iloc[1] = np.nan
result = fmt.Datetime64Formatter(x).get_result()
result = fmt.format_array(x)
assert result[0].strip() == "2013-01-01 09:00:00.000000"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:00.000004"

x = Series(date_range('20130101 09:00:00', periods=5, freq='N'))
x.iloc[1] = np.nan
result = fmt.Datetime64Formatter(x).get_result()
result = fmt.format_array(x)
assert result[0].strip() == "2013-01-01 09:00:00.000000000"
assert result[1].strip() == "NaT"
assert result[4].strip() == "2013-01-01 09:00:00.000000004"
Expand All @@ -2607,9 +2607,8 @@ def test_datetime64formatter_yearmonth(self):
def format_func(x):
return x.strftime('%Y-%m')

formatter = fmt.Datetime64Formatter(x, formatter=format_func)
result = formatter.get_result()
assert result == ['2016-01', '2016-02']
result = fmt.format_array(x, formatter=format_func)
assert result == [' 2016-01', ' 2016-02']

def test_datetime64formatter_hoursecond(self):

Expand All @@ -2619,9 +2618,8 @@ def test_datetime64formatter_hoursecond(self):
def format_func(x):
return x.strftime('%H:%M')

formatter = fmt.Datetime64Formatter(x, formatter=format_func)
result = formatter.get_result()
assert result == ['10:10', '12:12']
result = fmt.format_array(x, formatter=format_func)
assert result == [' 10:10', ' 12:12']


class TestNaTFormatting:
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/io/formats/test_to_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,17 @@ def test_to_html_invalid_classes_type(classes):
df.to_html(classes=classes)


def test_to_html_formatters_object_type(datapath):
# GH 13021
def f(x):
return x if isinstance(x, str) else '${:,.0f}'.format(x)

df = pd.DataFrame([['a'], [0], [10.4], [3]], columns=['x'])
result = df.to_html(formatters=dict(x=f))
expected = expected_html(datapath, 'gh13021_expected_output')
assert result == expected


def test_to_html_round_column_headers():
# GH 17280
df = DataFrame([1], columns=[0.55555])
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/io/formats/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ def test_to_latex_with_formatters(self):

expected = r"""\begin{tabular}{llrrl}
\toprule
{} & datetime64 & float & int & object \\
{} & datetime64 & float & int & object \\
\midrule
index: 0 & 2016-01 & [ 1.0] & 0x1 & -(1, 2)- \\
index: 1 & 2016-02 & [ 2.0] & 0x2 & -True- \\
index: 2 & 2016-03 & [ 3.0] & 0x3 & -False- \\
index: 0 & 2016-01 & [ 1.0] & 0x1 & -(1, 2)- \\
index: 1 & 2016-02 & [ 2.0] & 0x2 & -True- \\
index: 2 & 2016-03 & [ 3.0] & 0x3 & -False- \\
\bottomrule
\end{tabular}
"""
Expand Down