Skip to content

BUG: extra leading space in to_string when index=False #29670

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@ I/O
- Bug in :meth:`Styler.background_gradient` not able to work with dtype ``Int64`` (:issue:`28869`)
- Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`)
- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
- Bug in :meth:`Series.to_string` adding a leading space when ``index=False`` (:issue:`24980`)
-

Plotting
Expand Down
37 changes: 30 additions & 7 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,11 +337,17 @@ def _get_formatted_index(self) -> Tuple[List[str], bool]:
return fmt_index, have_header

def _get_formatted_values(self) -> List[str]:
if self.index:
leading_space = "compat"
else:
leading_space = False

return format_array(
self.tr_series._values,
None,
float_format=self.float_format,
na_rep=self.na_rep,
leading_space=leading_space,
)

def to_string(self) -> str:
Expand Down Expand Up @@ -937,13 +943,18 @@ def to_latex(
def _format_col(self, i: int) -> List[str]:
frame = self.tr_frame
formatter = self._get_formatter(i)
if self.index:
leading_space = "compat"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like there was already some discussion around this here https://github.com/pandas-dev/pandas/pull/25000/files#r252237505 -

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

emm, i think i addressed it already in this PR?

else:
leading_space = False
return format_array(
frame.iloc[:, i]._values,
formatter,
float_format=self.float_format,
na_rep=self.na_rep,
space=self.col_space,
decimal=self.decimal,
leading_space=leading_space,
)

def to_html(
Expand Down Expand Up @@ -1095,7 +1106,7 @@ def format_array(
space: Optional[Union[str, int]] = None,
justify: str = "right",
decimal: str = ".",
leading_space: Optional[bool] = None,
leading_space: bool = "compat",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this isn't a boolean? Can you run mypy on your changes

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, yeah, didn't see your comment when I committed my latest change. I changed already

) -> List[str]:
"""
Format an array for printing.
Expand All @@ -1110,7 +1121,7 @@ def format_array(
space
justify
decimal
leading_space : bool, optional
leading_space : bool, default is 'compat'
Whether the array should be formatted with a leading space.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as previous here what is the intended type of this?

Copy link
Member Author

@charlesdong1991 charlesdong1991 Nov 17, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, I didn't notice this so just copy paste the change in my previous PR. my bad, changed.

When an array as a column of a Series or DataFrame, we do want
the leading space to pad between columns.
Expand Down Expand Up @@ -1176,7 +1187,7 @@ def __init__(
decimal: str = ".",
quoting: Optional[int] = None,
fixed_width: bool = True,
leading_space: Optional[bool] = None,
leading_space: bool = "compat",
):
self.values = values
self.digits = digits
Expand Down Expand Up @@ -1238,7 +1249,7 @@ def _format(x):

is_float_type = lib.map_infer(vals, is_float) & notna(vals)
leading_space = self.leading_space
if leading_space is None:
if leading_space == "compat":
leading_space = is_float_type.any()

fmt_values = []
Expand Down Expand Up @@ -1377,8 +1388,12 @@ def format_values_with(float_format):
# The default is otherwise to use str instead of a formatting string
if self.float_format is None:
if self.fixed_width:
if self.leading_space is not False:
fmt_str = "{value: .{digits:d}f}"
else:
fmt_str = "{value:.{digits:d}f}"
float_format = partial(
"{value: .{digits:d}f}".format, digits=self.digits
fmt_str.format, digits=self.digits
) # type: Optional[float_format_type]
else:
float_format = self.float_format
Expand Down Expand Up @@ -1411,7 +1426,11 @@ def format_values_with(float_format):
).any()

if has_small_values or (too_long and has_large_values):
float_format = partial("{value: .{digits:d}e}".format, digits=self.digits)
if self.leading_space is not False:
fmt_str = "{value: .{digits:d}e}"
else:
fmt_str = "{value:.{digits:d}e}"
float_format = partial(fmt_str.format, digits=self.digits)
formatted_values = format_values_with(float_format)

return formatted_values
Expand All @@ -1426,7 +1445,11 @@ def _format_strings(self) -> List[str]:

class IntArrayFormatter(GenericArrayFormatter):
def _format_strings(self) -> List[str]:
formatter = self.formatter or (lambda x: "{x: d}".format(x=x))
if self.leading_space is False:
fmt_str = "{x:d}"
else:
fmt_str = "{x: d}"
formatter = self.formatter or (lambda x: fmt_str.format(x=x))
fmt_values = [formatter(x) for x in self.values]
return fmt_values

Expand Down
48 changes: 44 additions & 4 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1503,11 +1503,11 @@ def test_to_string_no_index(self):

df_s = df.to_string(index=False)
# Leading space is expected for positive numbers.
expected = " x y z\n 11 33 AAA\n 22 -44 "
expected = " x y z\n11 33 AAA\n22 -44 "
assert df_s == expected

df_s = df[["y", "x", "z"]].to_string(index=False)
expected = " y x z\n 33 11 AAA\n-44 22 "
expected = " y x z\n 33 11 AAA\n-44 22 "
assert df_s == expected

def test_to_string_line_width_no_index(self):
Expand All @@ -1522,7 +1522,7 @@ def test_to_string_line_width_no_index(self):
df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]})

df_s = df.to_string(line_width=1, index=False)
expected = " x \\\n 11 \n 22 \n 33 \n\n y \n 4 \n 5 \n 6 "
expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 "

assert df_s == expected

Expand Down Expand Up @@ -2219,7 +2219,7 @@ def test_to_string_without_index(self):
# GH 11729 Test index=False option
s = Series([1, 2, 3, 4])
result = s.to_string(index=False)
expected = " 1\n" + " 2\n" + " 3\n" + " 4"
expected = "1\n" + "2\n" + "3\n" + "4"
assert result == expected

def test_unicode_name_in_footer(self):
Expand Down Expand Up @@ -3272,3 +3272,43 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method):
msg = "buf is not a file name and it has no write method"
with pytest.raises(TypeError, match=msg):
getattr(float_frame, method)(buf=object())


@pytest.mark.parametrize(
"inputs, expected",
[([" a", " b"], " a\n b"), ([".1", "1"], ".1\n 1"), (["10", "-10"], " 10\n-10")],
)
def test_to_string_index_false_corner_case(inputs, expected):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these cases should just be added as param's to your test method below test_format_remove_leading_space_series

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, this was just some corner cases that was found during the previous PR. will move.

s = pd.Series(inputs).to_string(index=False)
assert s == expected


@pytest.mark.parametrize(
"input_array, expected",
[
("a", "a"),
(["a", "b"], "a\nb"),
([1, "a"], "1\na"),
(1, "1"),
([0, -1], " 0\n-1"),
(1.0, "1.0"),
],
)
def test_format_remove_leading_space_series(input_array, expected):
# GH: 24980
s = pd.Series(input_array).to_string(index=False)
assert s == expected


@pytest.mark.parametrize(
"input_array, expected",
[
({"A": ["a"]}, "A\na"),
({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"),
({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"),
],
)
def test_format_remove_leading_space_dataframe(input_array, expected):
# GH: 24980
df = pd.DataFrame(input_array).to_string(index=False)
assert df == expected
22 changes: 11 additions & 11 deletions pandas/tests/io/formats/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ def test_to_latex(self, float_frame):
withoutindex_result = df.to_latex(index=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
a & b \\
a & b \\
\midrule
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
Expand Down Expand Up @@ -413,7 +413,7 @@ def test_to_latex_longtable(self):
withoutindex_result = df.to_latex(index=False, longtable=True)
withoutindex_expected = r"""\begin{longtable}{rl}
\toprule
a & b \\
a & b \\
\midrule
\endhead
\midrule
Expand All @@ -423,8 +423,8 @@ def test_to_latex_longtable(self):

\bottomrule
\endlastfoot
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\end{longtable}
"""

Expand Down Expand Up @@ -614,8 +614,8 @@ def test_to_latex_no_header(self):
withoutindex_result = df.to_latex(index=False, header=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
Expand All @@ -641,10 +641,10 @@ def test_to_latex_specified_header(self):
withoutindex_result = df.to_latex(header=["AA", "BB"], index=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
AA & BB \\
AA & BB \\
\midrule
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
Expand Down