From 7c571101907a2b38dfab6a5e7f4a87879bb9e63a Mon Sep 17 00:00:00 2001 From: Nicolas Bonnotte Date: Fri, 12 Feb 2016 22:30:09 +0100 Subject: [PATCH] CLN FloatArrayFormatter, #12164 closes #12164 closes #12194 --- pandas/core/format.py | 184 +++++++++++++++++++++++--------------- pandas/core/internals.py | 5 +- pandas/indexes/numeric.py | 5 +- 3 files changed, 118 insertions(+), 76 deletions(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index d7f3a669de9f4..adaf462c08479 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -2008,7 +2008,7 @@ def format_array(values, formatter, float_format=None, na_rep='NaN', class GenericArrayFormatter(object): def __init__(self, values, digits=7, formatter=None, na_rep='NaN', space=12, float_format=None, justify='right', decimal='.', - quoting=None): + quoting=None, fixed_width=True): self.values = values self.digits = digits self.na_rep = na_rep @@ -2018,6 +2018,7 @@ def __init__(self, values, digits=7, formatter=None, na_rep='NaN', self.justify = justify self.decimal = decimal self.quoting = quoting + self.fixed_width = fixed_width def get_result(self): fmt_values = self._format_strings() @@ -2076,96 +2077,135 @@ class FloatArrayFormatter(GenericArrayFormatter): def __init__(self, *args, **kwargs): GenericArrayFormatter.__init__(self, *args, **kwargs) + # float_format is expected to be a string + # formatter should be used to pass a function if self.float_format is not None and self.formatter is None: - self.formatter = self.float_format - - def _format_with(self, fmt_str): - def _val(x, threshold): - if notnull(x): - if (threshold is None or - abs(x) > get_option("display.chop_threshold")): - return fmt_str % x + if callable(self.float_format): + self.formatter = self.float_format + self.float_format = None + + def _value_formatter(self, float_format=None, threshold=None): + """Returns a function to be applied on each value to format it + """ + + # the float_format parameter supersedes self.float_format + if float_format is None: + float_format = self.float_format + + # we are going to compose different functions, to first convert to + # a string, then replace the decimal symbol, and finally chop according + # to the threshold + + # when there is no float_format, we use str instead of '%g' + # because str(0.0) = '0.0' while '%g' % 0.0 = '0' + if float_format: + def base_formatter(v): + return (float_format % v) if notnull(v) else self.na_rep + else: + def base_formatter(v): + return str(v) if notnull(v) else self.na_rep + + if self.decimal != '.': + def decimal_formatter(v): + return base_formatter(v).replace('.', self.decimal, 1) + else: + decimal_formatter = base_formatter + + if threshold is None: + return decimal_formatter + + def formatter(value): + if notnull(value): + if abs(value) > threshold: + return decimal_formatter(value) else: - if fmt_str.endswith("e"): # engineering format - return "0" - else: - return fmt_str % 0 + return decimal_formatter(0.0) else: - return self.na_rep - threshold = get_option("display.chop_threshold") - fmt_values = [_val(x, threshold) for x in self.values] - return _trim_zeros(fmt_values, self.na_rep) + return formatter + + def _format_values_as_array(self): + """Returns a numpy array containing the formatted values + """ - def _format_strings(self): if self.formatter is not None: - fmt_values = [self.formatter(x) for x in self.values] + return np.array([self.formatter(x) for x in self.values]) + + if self.fixed_width: + threshold = get_option("display.chop_threshold") else: - fmt_str = '%% .%df' % self.digits - fmt_values = self._format_with(fmt_str) + threshold = None - if len(fmt_values) > 0: - maxlen = max(len(x) for x in fmt_values) - else: - maxlen = 0 + # if we have a fixed_width, we'll need to try different float_format + def format_values_with(float_format): + formatter = self._value_formatter(float_format, threshold) - too_long = maxlen > self.digits + 6 + # separate the wheat from the chaff + values = self.values + mask = isnull(values) + if hasattr(values, 'to_dense'): # sparse numpy ndarray + values = values.to_dense() + values = np.array(values, dtype='object') + values[mask] = self.na_rep + imask = (~mask).ravel() + values.flat[imask] = np.array([formatter(val) + for val in values.ravel()[imask]]) - abs_vals = np.abs(self.values) + if self.fixed_width: + return _trim_zeros(values, self.na_rep) - # this is pretty arbitrary for now - # large values: more that 8 characters including decimal symbol - # and first digit, hence > 1e6 - has_large_values = (abs_vals > 1e6).any() - has_small_values = ((abs_vals < 10**(-self.digits)) & - (abs_vals > 0)).any() + return values - if too_long and has_large_values: - fmt_str = '%% .%de' % self.digits - fmt_values = self._format_with(fmt_str) - elif has_small_values: - fmt_str = '%% .%de' % self.digits - fmt_values = self._format_with(fmt_str) + # There is a special default string when we are fixed-width + # The default is otherwise to use str instead of a formatting string + if self.float_format is None and self.fixed_width: + float_format = '%% .%df' % self.digits + else: + float_format = self.float_format - return fmt_values + formatted_values = format_values_with(float_format) - def get_formatted_data(self): - """Returns the array with its float values converted into strings using - the parameters given at initalisation. + if not self.fixed_width: + return formatted_values - Note: the method `.get_result()` does something similar, but with a - fixed-width output suitable for screen printing. The output here is not - fixed-width. - """ - values = self.values - mask = isnull(values) - - # the following variable is to be applied on each value to format it - # according to the string containing the float format, - # self.float_format and the character to use as decimal separator, - # self.decimal - formatter = None - if self.float_format and self.decimal != '.': - formatter = lambda v: ( - (self.float_format % v).replace('.', self.decimal, 1)) - elif self.decimal != '.': # no float format - formatter = lambda v: str(v).replace('.', self.decimal, 1) - elif self.float_format: # no special decimal separator - formatter = lambda v: self.float_format % v - - if formatter is None and not self.quoting: - values = values.astype(str) + # we need do convert to engineering format if some values are too small + # and would appear as 0, or if some values are too big and take too + # much space + + if len(formatted_values) > 0: + maxlen = max(len(x) for x in formatted_values) + too_long = maxlen > self.digits + 6 else: - values = np.array(values, dtype='object') + too_long = False - values[mask] = self.na_rep - if formatter: - imask = (~mask).ravel() - values.flat[imask] = np.array([formatter(val) - for val in values.ravel()[imask]]) + abs_vals = np.abs(self.values) + + # this is pretty arbitrary for now + # large values: more that 8 characters including decimal symbol + # and first digit, hence > 1e6 + has_large_values = (abs_vals > 1e6).any() + has_small_values = ((abs_vals < 10**(-self.digits)) & + (abs_vals > 0)).any() + + if has_small_values or (too_long and has_large_values): + float_format = '%% .%de' % self.digits + formatted_values = format_values_with(float_format) + + return formatted_values - return values + def _format_strings(self): + # shortcut + if self.formatter is not None: + return [self.formatter(x) for x in self.values] + + return list(self._format_values_as_array()) + + def get_result_as_array(self): + """Returns the float values converted into strings using + the parameters given at initalisation, as a numpy array + """ + return self._format_values_as_array() class IntArrayFormatter(GenericArrayFormatter): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 10053d33d6b51..8973ea025e611 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1380,8 +1380,9 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None, from pandas.core.format import FloatArrayFormatter formatter = FloatArrayFormatter(values, na_rep=na_rep, float_format=float_format, - decimal=decimal, quoting=quoting) - return formatter.get_formatted_data() + decimal=decimal, quoting=quoting, + fixed_width=False) + return formatter.get_result_as_array() def should_store(self, value): # when inserting a column should not coerce integers to floats diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 61d93284adbbb..fa707056ff2b7 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -272,8 +272,9 @@ def _format_native_types(self, na_rep='', float_format=None, decimal='.', from pandas.core.format import FloatArrayFormatter formatter = FloatArrayFormatter(self.values, na_rep=na_rep, float_format=float_format, - decimal=decimal, quoting=quoting) - return formatter.get_formatted_data() + decimal=decimal, quoting=quoting, + fixed_width=False) + return formatter.get_result_as_array() def get_value(self, series, key): """ we always want to get an index value, never a value """