Skip to content

BUG: extra leading space in to_string when index=False #36094

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,6 @@ Performance improvements

Bug fixes
~~~~~~~~~
- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
-

Categorical
^^^^^^^^^^^
Expand Down Expand Up @@ -257,7 +255,7 @@ Conversion

Strings
^^^^^^^

- Bug in :meth:`Series.to_string` adding a leading space when ``index=False`` (:issue:`24980`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you also that this would affect DataFrame.to_latex and DataFrame.to_string

Copy link
Contributor Author

@onshek onshek Sep 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you also that this would affect DataFrame.to_latex and DataFrame.to_string

"also" waht? Do you mean give a mark here like:

Bug in :meth:Series.to_string adding a leading space when index=False and this would affect DataFrame.to_latex and DataFrame.to_string (:issue:24980)

Copy link
Member

@charlesdong1991 charlesdong1991 Sep 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
- Bug in :meth:`Series.to_string` adding a leading space when ``index=False`` (:issue:`24980`)
- Bug in :meth:`Series.to_string`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` adding a leading space when ``index=False`` (:issue:`24980`)

ping if you could make this change.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated :)

-
-

Expand Down Expand Up @@ -313,6 +311,7 @@ Groupby/resample/rolling
- Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`)
- Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
- Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
-

Reshaping
Expand Down
26 changes: 19 additions & 7 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ def _get_formatted_values(self) -> List[str]:
None,
float_format=self.float_format,
na_rep=self.na_rep,
leading_space=self.index,
)

def to_string(self) -> str:
Expand Down Expand Up @@ -960,6 +961,7 @@ def _format_col(self, i: int) -> List[str]:
na_rep=self.na_rep,
space=self.col_space.get(frame.columns[i]),
decimal=self.decimal,
leading_space=self.index,
)

def to_html(
Expand Down Expand Up @@ -1111,7 +1113,7 @@ def format_array(
space: Optional[Union[str, int]] = None,
justify: str = "right",
decimal: str = ".",
leading_space: Optional[bool] = None,
leading_space: Optional[bool] = True,
quoting: Optional[int] = None,
) -> List[str]:
"""
Expand Down Expand Up @@ -1194,7 +1196,7 @@ def __init__(
decimal: str = ".",
quoting: Optional[int] = None,
fixed_width: bool = True,
leading_space: Optional[bool] = None,
leading_space: Optional[bool] = True,
):
self.values = values
self.digits = digits
Expand Down Expand Up @@ -1395,9 +1397,11 @@ def format_values_with(float_format):
float_format: Optional[FloatFormatType]
if self.float_format is None:
if self.fixed_width:
float_format = partial(
"{value: .{digits:d}f}".format, digits=self.digits
)
if self.leading_space is True:
fmt_str = "{value: .{digits:d}f}"
else:
fmt_str = "{value:.{digits:d}f}"
float_format = partial(fmt_str.format, digits=self.digits)
else:
float_format = self.float_format
else:
Expand Down Expand Up @@ -1429,7 +1433,11 @@ def format_values_with(float_format):
).any()

if has_small_values or (too_long and has_large_values):
float_format = partial("{value: .{digits:d}e}".format, digits=self.digits)
if self.leading_space is True:
fmt_str = "{value: .{digits:d}e}"
else:
fmt_str = "{value:.{digits:d}e}"
float_format = partial(fmt_str.format, digits=self.digits)
formatted_values = format_values_with(float_format)

return formatted_values
Expand All @@ -1444,7 +1452,11 @@ def _format_strings(self) -> List[str]:

class IntArrayFormatter(GenericArrayFormatter):
def _format_strings(self) -> List[str]:
formatter = self.formatter or (lambda x: f"{x: d}")
if self.leading_space is False:
formatter_str = lambda x: f"{x:d}".format(x=x)
else:
formatter_str = lambda x: f"{x: d}".format(x=x)
formatter = self.formatter or formatter_str
fmt_values = [formatter(x) for x in self.values]
return fmt_values

Expand Down
42 changes: 38 additions & 4 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1546,11 +1546,11 @@ def test_to_string_no_index(self):

df_s = df.to_string(index=False)
# Leading space is expected for positive numbers.
expected = " x y z\n 11 33 AAA\n 22 -44 "
expected = " x y z\n11 33 AAA\n22 -44 "
assert df_s == expected

df_s = df[["y", "x", "z"]].to_string(index=False)
expected = " y x z\n 33 11 AAA\n-44 22 "
expected = " y x z\n 33 11 AAA\n-44 22 "
assert df_s == expected

def test_to_string_line_width_no_index(self):
Expand All @@ -1565,7 +1565,7 @@ def test_to_string_line_width_no_index(self):
df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]})

df_s = df.to_string(line_width=1, index=False)
expected = " x \\\n 11 \n 22 \n 33 \n\n y \n 4 \n 5 \n 6 "
expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 "

assert df_s == expected

Expand Down Expand Up @@ -2269,7 +2269,7 @@ def test_to_string_without_index(self):
# GH 11729 Test index=False option
s = Series([1, 2, 3, 4])
result = s.to_string(index=False)
expected = " 1\n" + " 2\n" + " 3\n" + " 4"
expected = "1\n" + "2\n" + "3\n" + "4"
assert result == expected

def test_unicode_name_in_footer(self):
Expand Down Expand Up @@ -3391,3 +3391,37 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method):
msg = "buf is not a file name and it has no write method"
with pytest.raises(TypeError, match=msg):
getattr(float_frame, method)(buf=object())


@pytest.mark.parametrize(
"input_array, expected",
[
("a", "a"),
(["a", "b"], "a\nb"),
([1, "a"], "1\na"),
(1, "1"),
([0, -1], " 0\n-1"),
(1.0, "1.0"),
([" a", " b"], " a\n b"),
([".1", "1"], ".1\n 1"),
(["10", "-10"], " 10\n-10"),
],
)
def test_format_remove_leading_space_series(input_array, expected):
# GH: 24980
s = pd.Series(input_array).to_string(index=False)
assert s == expected


@pytest.mark.parametrize(
"input_array, expected",
[
({"A": ["a"]}, "A\na"),
({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"),
({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"),
],
)
def test_format_remove_leading_space_dataframe(input_array, expected):
# GH: 24980
df = pd.DataFrame(input_array).to_string(index=False)
assert df == expected
22 changes: 11 additions & 11 deletions pandas/tests/io/formats/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ def test_to_latex(self, float_frame):
withoutindex_result = df.to_latex(index=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
a & b \\
a & b \\
\midrule
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
Expand Down Expand Up @@ -413,7 +413,7 @@ def test_to_latex_longtable(self):
withoutindex_result = df.to_latex(index=False, longtable=True)
withoutindex_expected = r"""\begin{longtable}{rl}
\toprule
a & b \\
a & b \\
\midrule
\endhead
\midrule
Expand All @@ -423,8 +423,8 @@ def test_to_latex_longtable(self):

\bottomrule
\endlastfoot
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\end{longtable}
"""

Expand Down Expand Up @@ -663,8 +663,8 @@ def test_to_latex_no_header(self):
withoutindex_result = df.to_latex(index=False, header=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
Expand All @@ -690,10 +690,10 @@ def test_to_latex_specified_header(self):
withoutindex_result = df.to_latex(header=["AA", "BB"], index=False)
withoutindex_expected = r"""\begin{tabular}{rl}
\toprule
AA & BB \\
AA & BB \\
\midrule
1 & b1 \\
2 & b2 \\
1 & b1 \\
2 & b2 \\
\bottomrule
\end{tabular}
"""
Expand Down