-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: float-like string, trailing 0 truncation #38759
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1310,7 +1310,9 @@ def _format(x): | |
tpl = " {v}" | ||
fmt_values.append(tpl.format(v=_format(v))) | ||
|
||
fmt_values = _trim_zeros_float(str_floats=fmt_values, decimal=".") | ||
fmt_values = _trim_zeros_float( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is a larger issue to address first. these are not floats at all. these are object dtypes that are reprering. why are they hitting this path is the issue. we likley do not want to touch the formatters. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there are likley getting inferred as floats and that is why the formatter is picked. this is incorrect. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the
which should hit There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, sure but a string should not be truncated. that's where the issue is. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i see the issue here. instead of _trim_zeros_float being a function that accepts a ndarray/list of strings, just have it process a single value, then call it when needed in the loop on L1299. this makes the code much simpler. i am not entirely sure why it was done this way. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the reason why it was done that way is that floats are formatted with fixed-width, e.g.
gives
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok that sounds fine, tests are pretty sensitive here so you can refactor. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the help here, this is much cleaner! |
||
str_floats=fmt_values, decimal=".", is_float_type=is_float_type | ||
) | ||
|
||
return fmt_values | ||
|
||
|
@@ -1828,35 +1830,44 @@ def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> List[s | |
|
||
|
||
def _trim_zeros_float( | ||
str_floats: Union[np.ndarray, List[str]], decimal: str = "." | ||
str_floats: Union[np.ndarray, List[str]], | ||
decimal: str = ".", | ||
is_float_type: Optional[np.ndarray] = None, | ||
) -> List[str]: | ||
""" | ||
Trims zeros, leaving just one before the decimal points if need be. | ||
""" | ||
trimmed = str_floats | ||
number_regex = re.compile(fr"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$") | ||
float_locs = ( | ||
is_float_type | ||
if is_float_type is not None | ||
else np.ones(len(trimmed), dtype=bool) | ||
) | ||
|
||
def is_number_with_decimal(x): | ||
return re.match(number_regex, x) is not None | ||
def is_number_with_decimal(x, i): | ||
return re.match(number_regex, x) is not None and float_locs[i] | ||
|
||
def should_trim(values: Union[np.ndarray, List[str]]) -> bool: | ||
""" | ||
Determine if an array of strings should be trimmed. | ||
|
||
Returns True if all numbers containing decimals (defined by the | ||
above regular expression) within the array end in a zero, otherwise | ||
returns False. | ||
above regular expression and the specification of float locations | ||
`float_locs`) within the array end in a zero, otherwise returns False. | ||
""" | ||
numbers = [x for x in values if is_number_with_decimal(x)] | ||
numbers = [x for i, x in enumerate(values) if is_number_with_decimal(x, i)] | ||
return len(numbers) > 0 and all(x.endswith("0") for x in numbers) | ||
|
||
while should_trim(trimmed): | ||
trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed] | ||
trimmed = [ | ||
x[:-1] if is_number_with_decimal(x, i) else x for i, x in enumerate(trimmed) | ||
] | ||
|
||
# leave one 0 after the decimal points if need be. | ||
result = [ | ||
x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x | ||
for x in trimmed | ||
x + "0" if is_number_with_decimal(x, i) and x.endswith(decimal) else x | ||
for i, x in enumerate(trimmed) | ||
] | ||
return result | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -237,6 +237,18 @@ def test_series_repr_nat(self): | |
) | ||
assert result == expected | ||
|
||
def test_series_repr_float_like_object_no_truncate(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you parametrize/combine these two tests using the parameters There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
series = Series(["3.50"]) | ||
result = repr(series) | ||
expected = "0 3.50\ndtype: object" | ||
assert result == expected | ||
|
||
def test_mixed_series_repr_float_like_object_no_truncate(self): | ||
series = Series([1.20, "1.00"]) | ||
result = repr(series) | ||
expected = "0 1.2\n1 1.00\ndtype: object" | ||
assert result == expected | ||
|
||
|
||
class TestCategoricalRepr: | ||
def test_categorical_repr_unicode(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add in "repr of" and "after the decimal" here for clarity that it is just a bug in regards to presentation (which is still important!).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these are not floats, rather strings in an
object
dtypeThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done