-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: extra leading space in to_string when index=False #29670
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
7e461a1
1314059
8bcb313
0f1d700
7d96cd3
41bceb9
2e46bb4
fe63d83
af9d721
b9affd0
17941b6
e5fa13e
eaccb40
e027044
c63cc82
5e6a730
d75b4da
67d4ae8
fb88243
9b8b8f7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -337,11 +337,17 @@ def _get_formatted_index(self) -> Tuple[List[str], bool]: | |
return fmt_index, have_header | ||
|
||
def _get_formatted_values(self) -> List[str]: | ||
if self.index: | ||
leading_space = "compat" | ||
else: | ||
leading_space = False | ||
|
||
return format_array( | ||
self.tr_series._values, | ||
None, | ||
float_format=self.float_format, | ||
na_rep=self.na_rep, | ||
leading_space=leading_space, | ||
) | ||
|
||
def to_string(self) -> str: | ||
|
@@ -937,13 +943,18 @@ def to_latex( | |
def _format_col(self, i: int) -> List[str]: | ||
frame = self.tr_frame | ||
formatter = self._get_formatter(i) | ||
if self.index: | ||
leading_space = "compat" | ||
else: | ||
leading_space = False | ||
return format_array( | ||
frame.iloc[:, i]._values, | ||
formatter, | ||
float_format=self.float_format, | ||
na_rep=self.na_rep, | ||
space=self.col_space, | ||
decimal=self.decimal, | ||
leading_space=leading_space, | ||
) | ||
|
||
def to_html( | ||
|
@@ -1095,7 +1106,7 @@ def format_array( | |
space: Optional[Union[str, int]] = None, | ||
justify: str = "right", | ||
decimal: str = ".", | ||
leading_space: Optional[bool] = None, | ||
leading_space: bool = "compat", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this isn't a boolean? Can you run mypy on your changes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, yeah, didn't see your comment when I committed my latest change. I changed already |
||
) -> List[str]: | ||
""" | ||
Format an array for printing. | ||
|
@@ -1110,7 +1121,7 @@ def format_array( | |
space | ||
justify | ||
decimal | ||
leading_space : bool, optional | ||
leading_space : bool, default is 'compat' | ||
Whether the array should be formatted with a leading space. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment as previous here what is the intended type of this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I didn't notice this so just copy paste the change in my previous PR. my bad, changed. |
||
When an array as a column of a Series or DataFrame, we do want | ||
the leading space to pad between columns. | ||
|
@@ -1176,7 +1187,7 @@ def __init__( | |
decimal: str = ".", | ||
quoting: Optional[int] = None, | ||
fixed_width: bool = True, | ||
leading_space: Optional[bool] = None, | ||
leading_space: bool = "compat", | ||
): | ||
self.values = values | ||
self.digits = digits | ||
|
@@ -1238,7 +1249,7 @@ def _format(x): | |
|
||
is_float_type = lib.map_infer(vals, is_float) & notna(vals) | ||
leading_space = self.leading_space | ||
if leading_space is None: | ||
if leading_space == "compat": | ||
leading_space = is_float_type.any() | ||
|
||
fmt_values = [] | ||
|
@@ -1377,8 +1388,12 @@ def format_values_with(float_format): | |
# The default is otherwise to use str instead of a formatting string | ||
if self.float_format is None: | ||
if self.fixed_width: | ||
if self.leading_space is not False: | ||
fmt_str = "{value: .{digits:d}f}" | ||
else: | ||
fmt_str = "{value:.{digits:d}f}" | ||
float_format = partial( | ||
"{value: .{digits:d}f}".format, digits=self.digits | ||
fmt_str.format, digits=self.digits | ||
) # type: Optional[float_format_type] | ||
else: | ||
float_format = self.float_format | ||
|
@@ -1411,7 +1426,11 @@ def format_values_with(float_format): | |
).any() | ||
|
||
if has_small_values or (too_long and has_large_values): | ||
float_format = partial("{value: .{digits:d}e}".format, digits=self.digits) | ||
if self.leading_space is not False: | ||
fmt_str = "{value: .{digits:d}e}" | ||
else: | ||
fmt_str = "{value:.{digits:d}e}" | ||
float_format = partial(fmt_str.format, digits=self.digits) | ||
formatted_values = format_values_with(float_format) | ||
|
||
return formatted_values | ||
|
@@ -1426,7 +1445,11 @@ def _format_strings(self) -> List[str]: | |
|
||
class IntArrayFormatter(GenericArrayFormatter): | ||
def _format_strings(self) -> List[str]: | ||
formatter = self.formatter or (lambda x: "{x: d}".format(x=x)) | ||
if self.leading_space is False: | ||
fmt_str = "{x:d}" | ||
else: | ||
fmt_str = "{x: d}" | ||
formatter = self.formatter or (lambda x: fmt_str.format(x=x)) | ||
fmt_values = [formatter(x) for x in self.values] | ||
return fmt_values | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1503,11 +1503,11 @@ def test_to_string_no_index(self): | |
|
||
df_s = df.to_string(index=False) | ||
# Leading space is expected for positive numbers. | ||
expected = " x y z\n 11 33 AAA\n 22 -44 " | ||
expected = " x y z\n11 33 AAA\n22 -44 " | ||
assert df_s == expected | ||
|
||
df_s = df[["y", "x", "z"]].to_string(index=False) | ||
expected = " y x z\n 33 11 AAA\n-44 22 " | ||
expected = " y x z\n 33 11 AAA\n-44 22 " | ||
assert df_s == expected | ||
|
||
def test_to_string_line_width_no_index(self): | ||
|
@@ -1522,7 +1522,7 @@ def test_to_string_line_width_no_index(self): | |
df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) | ||
|
||
df_s = df.to_string(line_width=1, index=False) | ||
expected = " x \\\n 11 \n 22 \n 33 \n\n y \n 4 \n 5 \n 6 " | ||
expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 " | ||
|
||
assert df_s == expected | ||
|
||
|
@@ -2219,7 +2219,7 @@ def test_to_string_without_index(self): | |
# GH 11729 Test index=False option | ||
s = Series([1, 2, 3, 4]) | ||
result = s.to_string(index=False) | ||
expected = " 1\n" + " 2\n" + " 3\n" + " 4" | ||
expected = "1\n" + "2\n" + "3\n" + "4" | ||
assert result == expected | ||
|
||
def test_unicode_name_in_footer(self): | ||
|
@@ -3272,3 +3272,43 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method): | |
msg = "buf is not a file name and it has no write method" | ||
with pytest.raises(TypeError, match=msg): | ||
getattr(float_frame, method)(buf=object()) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"inputs, expected", | ||
[([" a", " b"], " a\n b"), ([".1", "1"], ".1\n 1"), (["10", "-10"], " 10\n-10")], | ||
) | ||
def test_to_string_index_false_corner_case(inputs, expected): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think these cases should just be added as param's to your test method below There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure, this was just some corner cases that was found during the previous PR. will move. |
||
s = pd.Series(inputs).to_string(index=False) | ||
assert s == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"input_array, expected", | ||
[ | ||
("a", "a"), | ||
(["a", "b"], "a\nb"), | ||
([1, "a"], "1\na"), | ||
(1, "1"), | ||
([0, -1], " 0\n-1"), | ||
(1.0, "1.0"), | ||
], | ||
) | ||
def test_format_remove_leading_space_series(input_array, expected): | ||
# GH: 24980 | ||
s = pd.Series(input_array).to_string(index=False) | ||
assert s == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"input_array, expected", | ||
[ | ||
({"A": ["a"]}, "A\na"), | ||
({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"), | ||
({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"), | ||
], | ||
) | ||
def test_format_remove_leading_space_dataframe(input_array, expected): | ||
# GH: 24980 | ||
df = pd.DataFrame(input_array).to_string(index=False) | ||
assert df == expected |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like there was already some discussion around this here https://github.com/pandas-dev/pandas/pull/25000/files#r252237505 -
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
emm, i think i addressed it already in this PR?