From 4817541398d910e0e4ea3dd47c92e0a7635d867b Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Mon, 28 Dec 2020 15:59:36 -0500 Subject: [PATCH 1/2] BUG: float-like string, trailing 0 truncation --- doc/source/whatsnew/v1.2.1.rst | 1 + pandas/io/formats/format.py | 31 +++++++++++++++++++++---------- pandas/tests/series/test_repr.py | 12 ++++++++++++ 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index c630fc26a93a2..c6f36938ddf6c 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -14,6 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ +- Bug in float-like strings having trailing 0's truncated (:issue:`38708`) - - diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 2620c562aefeb..4034854e749c3 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1310,7 +1310,9 @@ def _format(x): tpl = " {v}" fmt_values.append(tpl.format(v=_format(v))) - fmt_values = _trim_zeros_float(str_floats=fmt_values, decimal=".") + fmt_values = _trim_zeros_float( + str_floats=fmt_values, decimal=".", is_float_type=is_float_type + ) return fmt_values @@ -1828,35 +1830,44 @@ def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> List[s def _trim_zeros_float( - str_floats: Union[np.ndarray, List[str]], decimal: str = "." + str_floats: Union[np.ndarray, List[str]], + decimal: str = ".", + is_float_type: Optional[np.ndarray] = None, ) -> List[str]: """ Trims zeros, leaving just one before the decimal points if need be. """ trimmed = str_floats number_regex = re.compile(fr"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$") + float_locs = ( + is_float_type + if is_float_type is not None + else np.ones(len(trimmed), dtype=bool) + ) - def is_number_with_decimal(x): - return re.match(number_regex, x) is not None + def is_number_with_decimal(x, i): + return re.match(number_regex, x) is not None and float_locs[i] def should_trim(values: Union[np.ndarray, List[str]]) -> bool: """ Determine if an array of strings should be trimmed. Returns True if all numbers containing decimals (defined by the - above regular expression) within the array end in a zero, otherwise - returns False. + above regular expression and the specification of float locations + `float_locs`) within the array end in a zero, otherwise returns False. """ - numbers = [x for x in values if is_number_with_decimal(x)] + numbers = [x for i, x in enumerate(values) if is_number_with_decimal(x, i)] return len(numbers) > 0 and all(x.endswith("0") for x in numbers) while should_trim(trimmed): - trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed] + trimmed = [ + x[:-1] if is_number_with_decimal(x, i) else x for i, x in enumerate(trimmed) + ] # leave one 0 after the decimal points if need be. result = [ - x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x - for x in trimmed + x + "0" if is_number_with_decimal(x, i) and x.endswith(decimal) else x + for i, x in enumerate(trimmed) ] return result diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 026f6bd2d453d..9de81a4d71d52 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -237,6 +237,18 @@ def test_series_repr_nat(self): ) assert result == expected + def test_series_repr_float_like_object_no_truncate(self): + series = Series(["3.50"]) + result = repr(series) + expected = "0 3.50\ndtype: object" + assert result == expected + + def test_mixed_series_repr_float_like_object_no_truncate(self): + series = Series([1.20, "1.00"]) + result = repr(series) + expected = "0 1.2\n1 1.00\ndtype: object" + assert result == expected + class TestCategoricalRepr: def test_categorical_repr_unicode(self): From d1d2be18a9de4314d90ac2caa17cb035bf80e183 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Mon, 28 Dec 2020 18:44:09 -0500 Subject: [PATCH 2/2] Don't use _trim_zeros_float --- doc/source/whatsnew/v1.2.1.rst | 2 +- pandas/io/formats/format.py | 49 ++++++++++++++++---------------- pandas/tests/series/test_repr.py | 24 ++++++++++------ 3 files changed, 41 insertions(+), 34 deletions(-) diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 0c14152a49bb3..4ca84b53cde2e 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -15,7 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - The deprecated attributes ``_AXIS_NAMES`` and ``_AXIS_NUMBERS`` of :class:`DataFrame` and :class:`Series` will no longer show up in ``dir`` or ``inspect.getmembers`` calls (:issue:`38740`) -- Bug in float-like strings having trailing 0's truncated (:issue:`38708`) +- Bug in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`) - .. --------------------------------------------------------------------------- diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 4034854e749c3..b3c2411304f6b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1300,7 +1300,7 @@ def _format(x): if not is_float_type[i] and leading_space: fmt_values.append(f" {_format(v)}") elif is_float_type[i]: - fmt_values.append(float_format(v)) + fmt_values.append(_trim_zeros_single_float(float_format(v))) else: if leading_space is False: # False specifically, so that the default is @@ -1310,10 +1310,6 @@ def _format(x): tpl = " {v}" fmt_values.append(tpl.format(v=_format(v))) - fmt_values = _trim_zeros_float( - str_floats=fmt_values, decimal=".", is_float_type=is_float_type - ) - return fmt_values @@ -1829,45 +1825,50 @@ def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> List[s return padded +def _trim_zeros_single_float(str_float: str) -> str: + """ + Trims trailing zeros after a decimal point, + leaving just one if necessary. + """ + str_float = str_float.rstrip("0") + if str_float.endswith("."): + str_float += "0" + + return str_float + + def _trim_zeros_float( - str_floats: Union[np.ndarray, List[str]], - decimal: str = ".", - is_float_type: Optional[np.ndarray] = None, + str_floats: Union[np.ndarray, List[str]], decimal: str = "." ) -> List[str]: """ - Trims zeros, leaving just one before the decimal points if need be. + Trims the maximum number of trailing zeros equally from + all numbers containing decimals, leaving just one if + necessary. """ trimmed = str_floats number_regex = re.compile(fr"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$") - float_locs = ( - is_float_type - if is_float_type is not None - else np.ones(len(trimmed), dtype=bool) - ) - def is_number_with_decimal(x, i): - return re.match(number_regex, x) is not None and float_locs[i] + def is_number_with_decimal(x): + return re.match(number_regex, x) is not None def should_trim(values: Union[np.ndarray, List[str]]) -> bool: """ Determine if an array of strings should be trimmed. Returns True if all numbers containing decimals (defined by the - above regular expression and the specification of float locations - `float_locs`) within the array end in a zero, otherwise returns False. + above regular expression) within the array end in a zero, otherwise + returns False. """ - numbers = [x for i, x in enumerate(values) if is_number_with_decimal(x, i)] + numbers = [x for x in values if is_number_with_decimal(x)] return len(numbers) > 0 and all(x.endswith("0") for x in numbers) while should_trim(trimmed): - trimmed = [ - x[:-1] if is_number_with_decimal(x, i) else x for i, x in enumerate(trimmed) - ] + trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed] # leave one 0 after the decimal points if need be. result = [ - x + "0" if is_number_with_decimal(x, i) and x.endswith(decimal) else x - for i, x in enumerate(trimmed) + x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x + for x in trimmed ] return result diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 9de81a4d71d52..4ae3b8ddf3920 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -237,16 +237,22 @@ def test_series_repr_nat(self): ) assert result == expected - def test_series_repr_float_like_object_no_truncate(self): - series = Series(["3.50"]) + @pytest.mark.parametrize( + "data, expected", + [ + (["3.50"], "0 3.50\ndtype: object"), + ([1.20, "1.00"], "0 1.2\n1 1.00\ndtype: object"), + ([np.nan], "0 NaN\ndtype: float64"), + ([None], "0 None\ndtype: object"), + (["3.50", np.nan], "0 3.50\n1 NaN\ndtype: object"), + ([3.50, np.nan], "0 3.5\n1 NaN\ndtype: float64"), + ([3.50, np.nan, "3.50"], "0 3.5\n1 NaN\n2 3.50\ndtype: object"), + ([3.50, None, "3.50"], "0 3.5\n1 None\n2 3.50\ndtype: object"), + ], + ) + def test_repr_str_float_truncation(self, data, expected): + series = Series(data) result = repr(series) - expected = "0 3.50\ndtype: object" - assert result == expected - - def test_mixed_series_repr_float_like_object_no_truncate(self): - series = Series([1.20, "1.00"]) - result = repr(series) - expected = "0 1.2\n1 1.00\ndtype: object" assert result == expected