Skip to content

Commit aca77f7

Browse files
authored
BUG: extra leading space in to_string when index=False (#36094)
1 parent ba552ec commit aca77f7

File tree

4 files changed

+71
-26
lines changed

4 files changed

+71
-26
lines changed

doc/source/whatsnew/v1.2.0.rst

+2-3
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,6 @@ Performance improvements
214214

215215
Bug fixes
216216
~~~~~~~~~
217-
- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
218-
-
219217

220218
Categorical
221219
^^^^^^^^^^^
@@ -257,7 +255,7 @@ Conversion
257255

258256
Strings
259257
^^^^^^^
260-
258+
- Bug in :meth:`Series.to_string`, :meth:`DataFrame.to_string`, and :meth:`DataFrame.to_latex` adding a leading space when ``index=False`` (:issue:`24980`)
261259
-
262260
-
263261

@@ -315,6 +313,7 @@ Groupby/resample/rolling
315313
- Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`)
316314
- Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
317315
- Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
316+
- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
318317
-
319318

320319
Reshaping

pandas/io/formats/format.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ def _get_formatted_values(self) -> List[str]:
345345
None,
346346
float_format=self.float_format,
347347
na_rep=self.na_rep,
348+
leading_space=self.index,
348349
)
349350

350351
def to_string(self) -> str:
@@ -960,6 +961,7 @@ def _format_col(self, i: int) -> List[str]:
960961
na_rep=self.na_rep,
961962
space=self.col_space.get(frame.columns[i]),
962963
decimal=self.decimal,
964+
leading_space=self.index,
963965
)
964966

965967
def to_html(
@@ -1111,7 +1113,7 @@ def format_array(
11111113
space: Optional[Union[str, int]] = None,
11121114
justify: str = "right",
11131115
decimal: str = ".",
1114-
leading_space: Optional[bool] = None,
1116+
leading_space: Optional[bool] = True,
11151117
quoting: Optional[int] = None,
11161118
) -> List[str]:
11171119
"""
@@ -1127,7 +1129,7 @@ def format_array(
11271129
space
11281130
justify
11291131
decimal
1130-
leading_space : bool, optional
1132+
leading_space : bool, optional, default True
11311133
Whether the array should be formatted with a leading space.
11321134
When an array as a column of a Series or DataFrame, we do want
11331135
the leading space to pad between columns.
@@ -1194,7 +1196,7 @@ def __init__(
11941196
decimal: str = ".",
11951197
quoting: Optional[int] = None,
11961198
fixed_width: bool = True,
1197-
leading_space: Optional[bool] = None,
1199+
leading_space: Optional[bool] = True,
11981200
):
11991201
self.values = values
12001202
self.digits = digits
@@ -1395,9 +1397,11 @@ def format_values_with(float_format):
13951397
float_format: Optional[FloatFormatType]
13961398
if self.float_format is None:
13971399
if self.fixed_width:
1398-
float_format = partial(
1399-
"{value: .{digits:d}f}".format, digits=self.digits
1400-
)
1400+
if self.leading_space is True:
1401+
fmt_str = "{value: .{digits:d}f}"
1402+
else:
1403+
fmt_str = "{value:.{digits:d}f}"
1404+
float_format = partial(fmt_str.format, digits=self.digits)
14011405
else:
14021406
float_format = self.float_format
14031407
else:
@@ -1429,7 +1433,11 @@ def format_values_with(float_format):
14291433
).any()
14301434

14311435
if has_small_values or (too_long and has_large_values):
1432-
float_format = partial("{value: .{digits:d}e}".format, digits=self.digits)
1436+
if self.leading_space is True:
1437+
fmt_str = "{value: .{digits:d}e}"
1438+
else:
1439+
fmt_str = "{value:.{digits:d}e}"
1440+
float_format = partial(fmt_str.format, digits=self.digits)
14331441
formatted_values = format_values_with(float_format)
14341442

14351443
return formatted_values
@@ -1444,7 +1452,11 @@ def _format_strings(self) -> List[str]:
14441452

14451453
class IntArrayFormatter(GenericArrayFormatter):
14461454
def _format_strings(self) -> List[str]:
1447-
formatter = self.formatter or (lambda x: f"{x: d}")
1455+
if self.leading_space is False:
1456+
formatter_str = lambda x: f"{x:d}".format(x=x)
1457+
else:
1458+
formatter_str = lambda x: f"{x: d}".format(x=x)
1459+
formatter = self.formatter or formatter_str
14481460
fmt_values = [formatter(x) for x in self.values]
14491461
return fmt_values
14501462

pandas/tests/io/formats/test_format.py

+38-4
Original file line numberDiff line numberDiff line change
@@ -1546,11 +1546,11 @@ def test_to_string_no_index(self):
15461546

15471547
df_s = df.to_string(index=False)
15481548
# Leading space is expected for positive numbers.
1549-
expected = " x y z\n 11 33 AAA\n 22 -44 "
1549+
expected = " x y z\n11 33 AAA\n22 -44 "
15501550
assert df_s == expected
15511551

15521552
df_s = df[["y", "x", "z"]].to_string(index=False)
1553-
expected = " y x z\n 33 11 AAA\n-44 22 "
1553+
expected = " y x z\n 33 11 AAA\n-44 22 "
15541554
assert df_s == expected
15551555

15561556
def test_to_string_line_width_no_index(self):
@@ -1565,7 +1565,7 @@ def test_to_string_line_width_no_index(self):
15651565
df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]})
15661566

15671567
df_s = df.to_string(line_width=1, index=False)
1568-
expected = " x \\\n 11 \n 22 \n 33 \n\n y \n 4 \n 5 \n 6 "
1568+
expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 "
15691569

15701570
assert df_s == expected
15711571

@@ -2269,7 +2269,7 @@ def test_to_string_without_index(self):
22692269
# GH 11729 Test index=False option
22702270
s = Series([1, 2, 3, 4])
22712271
result = s.to_string(index=False)
2272-
expected = " 1\n" + " 2\n" + " 3\n" + " 4"
2272+
expected = "1\n" + "2\n" + "3\n" + "4"
22732273
assert result == expected
22742274

22752275
def test_unicode_name_in_footer(self):
@@ -3391,3 +3391,37 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method):
33913391
msg = "buf is not a file name and it has no write method"
33923392
with pytest.raises(TypeError, match=msg):
33933393
getattr(float_frame, method)(buf=object())
3394+
3395+
3396+
@pytest.mark.parametrize(
3397+
"input_array, expected",
3398+
[
3399+
("a", "a"),
3400+
(["a", "b"], "a\nb"),
3401+
([1, "a"], "1\na"),
3402+
(1, "1"),
3403+
([0, -1], " 0\n-1"),
3404+
(1.0, "1.0"),
3405+
([" a", " b"], " a\n b"),
3406+
([".1", "1"], ".1\n 1"),
3407+
(["10", "-10"], " 10\n-10"),
3408+
],
3409+
)
3410+
def test_format_remove_leading_space_series(input_array, expected):
3411+
# GH: 24980
3412+
s = pd.Series(input_array).to_string(index=False)
3413+
assert s == expected
3414+
3415+
3416+
@pytest.mark.parametrize(
3417+
"input_array, expected",
3418+
[
3419+
({"A": ["a"]}, "A\na"),
3420+
({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"),
3421+
({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"),
3422+
],
3423+
)
3424+
def test_format_remove_leading_space_dataframe(input_array, expected):
3425+
# GH: 24980
3426+
df = pd.DataFrame(input_array).to_string(index=False)
3427+
assert df == expected

pandas/tests/io/formats/test_to_latex.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,10 @@ def test_to_latex(self, float_frame):
5050
withoutindex_result = df.to_latex(index=False)
5151
withoutindex_expected = r"""\begin{tabular}{rl}
5252
\toprule
53-
a & b \\
53+
a & b \\
5454
\midrule
55-
1 & b1 \\
56-
2 & b2 \\
55+
1 & b1 \\
56+
2 & b2 \\
5757
\bottomrule
5858
\end{tabular}
5959
"""
@@ -413,7 +413,7 @@ def test_to_latex_longtable(self):
413413
withoutindex_result = df.to_latex(index=False, longtable=True)
414414
withoutindex_expected = r"""\begin{longtable}{rl}
415415
\toprule
416-
a & b \\
416+
a & b \\
417417
\midrule
418418
\endhead
419419
\midrule
@@ -423,8 +423,8 @@ def test_to_latex_longtable(self):
423423
424424
\bottomrule
425425
\endlastfoot
426-
1 & b1 \\
427-
2 & b2 \\
426+
1 & b1 \\
427+
2 & b2 \\
428428
\end{longtable}
429429
"""
430430

@@ -663,8 +663,8 @@ def test_to_latex_no_header(self):
663663
withoutindex_result = df.to_latex(index=False, header=False)
664664
withoutindex_expected = r"""\begin{tabular}{rl}
665665
\toprule
666-
1 & b1 \\
667-
2 & b2 \\
666+
1 & b1 \\
667+
2 & b2 \\
668668
\bottomrule
669669
\end{tabular}
670670
"""
@@ -690,10 +690,10 @@ def test_to_latex_specified_header(self):
690690
withoutindex_result = df.to_latex(header=["AA", "BB"], index=False)
691691
withoutindex_expected = r"""\begin{tabular}{rl}
692692
\toprule
693-
AA & BB \\
693+
AA & BB \\
694694
\midrule
695-
1 & b1 \\
696-
2 & b2 \\
695+
1 & b1 \\
696+
2 & b2 \\
697697
\bottomrule
698698
\end{tabular}
699699
"""

0 commit comments

Comments
 (0)