Skip to content

Commit 2d51b33

Browse files
Nicolas Bonnottejreback
Nicolas Bonnotte
authored andcommitted
CLN: FloatArrayFormatter cleanup
closes pandas-dev#12164 closes pandas-dev#12194
1 parent 76cb401 commit 2d51b33

File tree

3 files changed

+114
-76
lines changed

3 files changed

+114
-76
lines changed

pandas/core/format.py

+108-72
Original file line numberDiff line numberDiff line change
@@ -2008,7 +2008,7 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
20082008
class GenericArrayFormatter(object):
20092009
def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
20102010
space=12, float_format=None, justify='right', decimal='.',
2011-
quoting=None):
2011+
quoting=None, fixed_width=True):
20122012
self.values = values
20132013
self.digits = digits
20142014
self.na_rep = na_rep
@@ -2018,6 +2018,7 @@ def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
20182018
self.justify = justify
20192019
self.decimal = decimal
20202020
self.quoting = quoting
2021+
self.fixed_width = fixed_width
20212022

20222023
def get_result(self):
20232024
fmt_values = self._format_strings()
@@ -2076,96 +2077,131 @@ class FloatArrayFormatter(GenericArrayFormatter):
20762077
def __init__(self, *args, **kwargs):
20772078
GenericArrayFormatter.__init__(self, *args, **kwargs)
20782079

2080+
# float_format is expected to be a string
2081+
# formatter should be used to pass a function
20792082
if self.float_format is not None and self.formatter is None:
2080-
self.formatter = self.float_format
2081-
2082-
def _format_with(self, fmt_str):
2083-
def _val(x, threshold):
2084-
if notnull(x):
2085-
if (threshold is None or
2086-
abs(x) > get_option("display.chop_threshold")):
2087-
return fmt_str % x
2083+
if callable(self.float_format):
2084+
self.formatter = self.float_format
2085+
self.float_format = None
2086+
2087+
def _value_formatter(self, float_format=None, threshold=None):
2088+
"""Returns a function to be applied on each value to format it
2089+
"""
2090+
2091+
# the float_format parameter supersedes self.float_format
2092+
if float_format is None:
2093+
float_format = self.float_format
2094+
2095+
# we are going to compose different functions, to first convert to
2096+
# a string, then replace the decimal symbol, and finally chop according
2097+
# to the threshold
2098+
2099+
# when there is no float_format, we use str instead of '%g'
2100+
# because str(0.0) = '0.0' while '%g' % 0.0 = '0'
2101+
if float_format:
2102+
def base_formatter(v):
2103+
return (float_format % v) if notnull(v) else self.na_rep
2104+
else:
2105+
def base_formatter(v):
2106+
return str(v) if notnull(v) else self.na_rep
2107+
2108+
if self.decimal != '.':
2109+
def decimal_formatter(v):
2110+
return base_formatter(v).replace('.', self.decimal, 1)
2111+
else:
2112+
decimal_formatter = base_formatter
2113+
2114+
if threshold is None:
2115+
return decimal_formatter
2116+
2117+
def formatter(value):
2118+
if notnull(value):
2119+
if abs(value) > threshold:
2120+
return decimal_formatter(value)
20882121
else:
2089-
if fmt_str.endswith("e"): # engineering format
2090-
return "0"
2091-
else:
2092-
return fmt_str % 0
2122+
return decimal_formatter(0.0)
20932123
else:
2094-
20952124
return self.na_rep
20962125

2097-
threshold = get_option("display.chop_threshold")
2098-
fmt_values = [_val(x, threshold) for x in self.values]
2099-
return _trim_zeros(fmt_values, self.na_rep)
2126+
return formatter
2127+
2128+
def get_result_as_array(self):
2129+
"""
2130+
Returns the float values converted into strings using
2131+
the parameters given at initalisation, as a numpy array
2132+
"""
21002133

2101-
def _format_strings(self):
21022134
if self.formatter is not None:
2103-
fmt_values = [self.formatter(x) for x in self.values]
2135+
return np.array([self.formatter(x) for x in self.values])
2136+
2137+
if self.fixed_width:
2138+
threshold = get_option("display.chop_threshold")
21042139
else:
2105-
fmt_str = '%% .%df' % self.digits
2106-
fmt_values = self._format_with(fmt_str)
2140+
threshold = None
21072141

2108-
if len(fmt_values) > 0:
2109-
maxlen = max(len(x) for x in fmt_values)
2110-
else:
2111-
maxlen = 0
2142+
# if we have a fixed_width, we'll need to try different float_format
2143+
def format_values_with(float_format):
2144+
formatter = self._value_formatter(float_format, threshold)
21122145

2113-
too_long = maxlen > self.digits + 6
2146+
# separate the wheat from the chaff
2147+
values = self.values
2148+
mask = isnull(values)
2149+
if hasattr(values, 'to_dense'): # sparse numpy ndarray
2150+
values = values.to_dense()
2151+
values = np.array(values, dtype='object')
2152+
values[mask] = self.na_rep
2153+
imask = (~mask).ravel()
2154+
values.flat[imask] = np.array([formatter(val)
2155+
for val in values.ravel()[imask]])
21142156

2115-
abs_vals = np.abs(self.values)
2157+
if self.fixed_width:
2158+
return _trim_zeros(values, self.na_rep)
21162159

2117-
# this is pretty arbitrary for now
2118-
# large values: more that 8 characters including decimal symbol
2119-
# and first digit, hence > 1e6
2120-
has_large_values = (abs_vals > 1e6).any()
2121-
has_small_values = ((abs_vals < 10**(-self.digits)) &
2122-
(abs_vals > 0)).any()
2160+
return values
21232161

2124-
if too_long and has_large_values:
2125-
fmt_str = '%% .%de' % self.digits
2126-
fmt_values = self._format_with(fmt_str)
2127-
elif has_small_values:
2128-
fmt_str = '%% .%de' % self.digits
2129-
fmt_values = self._format_with(fmt_str)
2162+
# There is a special default string when we are fixed-width
2163+
# The default is otherwise to use str instead of a formatting string
2164+
if self.float_format is None and self.fixed_width:
2165+
float_format = '%% .%df' % self.digits
2166+
else:
2167+
float_format = self.float_format
21302168

2131-
return fmt_values
2169+
formatted_values = format_values_with(float_format)
21322170

2133-
def get_formatted_data(self):
2134-
"""Returns the array with its float values converted into strings using
2135-
the parameters given at initalisation.
2171+
if not self.fixed_width:
2172+
return formatted_values
21362173

2137-
Note: the method `.get_result()` does something similar, but with a
2138-
fixed-width output suitable for screen printing. The output here is not
2139-
fixed-width.
2140-
"""
2141-
values = self.values
2142-
mask = isnull(values)
2143-
2144-
# the following variable is to be applied on each value to format it
2145-
# according to the string containing the float format,
2146-
# self.float_format and the character to use as decimal separator,
2147-
# self.decimal
2148-
formatter = None
2149-
if self.float_format and self.decimal != '.':
2150-
formatter = lambda v: (
2151-
(self.float_format % v).replace('.', self.decimal, 1))
2152-
elif self.decimal != '.': # no float format
2153-
formatter = lambda v: str(v).replace('.', self.decimal, 1)
2154-
elif self.float_format: # no special decimal separator
2155-
formatter = lambda v: self.float_format % v
2156-
2157-
if formatter is None and not self.quoting:
2158-
values = values.astype(str)
2174+
# we need do convert to engineering format if some values are too small
2175+
# and would appear as 0, or if some values are too big and take too
2176+
# much space
2177+
2178+
if len(formatted_values) > 0:
2179+
maxlen = max(len(x) for x in formatted_values)
2180+
too_long = maxlen > self.digits + 6
21592181
else:
2160-
values = np.array(values, dtype='object')
2182+
too_long = False
21612183

2162-
values[mask] = self.na_rep
2163-
if formatter:
2164-
imask = (~mask).ravel()
2165-
values.flat[imask] = np.array([formatter(val)
2166-
for val in values.ravel()[imask]])
2184+
abs_vals = np.abs(self.values)
2185+
2186+
# this is pretty arbitrary for now
2187+
# large values: more that 8 characters including decimal symbol
2188+
# and first digit, hence > 1e6
2189+
has_large_values = (abs_vals > 1e6).any()
2190+
has_small_values = ((abs_vals < 10**(-self.digits)) &
2191+
(abs_vals > 0)).any()
2192+
2193+
if has_small_values or (too_long and has_large_values):
2194+
float_format = '%% .%de' % self.digits
2195+
formatted_values = format_values_with(float_format)
2196+
2197+
return formatted_values
2198+
2199+
def _format_strings(self):
2200+
# shortcut
2201+
if self.formatter is not None:
2202+
return [self.formatter(x) for x in self.values]
21672203

2168-
return values
2204+
return list(self.get_result_as_array())
21692205

21702206

21712207
class IntArrayFormatter(GenericArrayFormatter):

pandas/core/internals.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1380,8 +1380,9 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None,
13801380
from pandas.core.format import FloatArrayFormatter
13811381
formatter = FloatArrayFormatter(values, na_rep=na_rep,
13821382
float_format=float_format,
1383-
decimal=decimal, quoting=quoting)
1384-
return formatter.get_formatted_data()
1383+
decimal=decimal, quoting=quoting,
1384+
fixed_width=False)
1385+
return formatter.get_result_as_array()
13851386

13861387
def should_store(self, value):
13871388
# when inserting a column should not coerce integers to floats

pandas/indexes/numeric.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,9 @@ def _format_native_types(self, na_rep='', float_format=None, decimal='.',
289289
from pandas.core.format import FloatArrayFormatter
290290
formatter = FloatArrayFormatter(self.values, na_rep=na_rep,
291291
float_format=float_format,
292-
decimal=decimal, quoting=quoting)
293-
return formatter.get_formatted_data()
292+
decimal=decimal, quoting=quoting,
293+
fixed_width=False)
294+
return formatter.get_result_as_array()
294295

295296
def get_value(self, series, key):
296297
""" we always want to get an index value, never a value """

0 commit comments

Comments
 (0)