diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 330510c2c883c..17ad81ed5f42b 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -940,6 +940,7 @@ I/O - Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`) - Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`) - :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`) +- Bug in :meth:`Series.to_string` adding a leading space when ``index=False`` (:issue:`24980`) - Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`) - :func:`read_excel` now accepts binary data (:issue:`15914`) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 5c4b7d103d271..fd366b2c28bf8 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -341,11 +341,18 @@ def _get_formatted_index(self) -> Tuple[List[str], bool]: return fmt_index, have_header def _get_formatted_values(self) -> List[str]: + leading_space: Union[bool, str] + if self.index: + leading_space = "compat" + else: + leading_space = False + return format_array( self.tr_series._values, None, float_format=self.float_format, na_rep=self.na_rep, + leading_space=leading_space, ) def to_string(self) -> str: @@ -947,6 +954,12 @@ def to_latex( def _format_col(self, i: int) -> List[str]: frame = self.tr_frame formatter = self._get_formatter(i) + + leading_space: Union[bool, str] + if self.index: + leading_space = "compat" + else: + leading_space = False return format_array( frame.iloc[:, i]._values, formatter, @@ -954,6 +967,7 @@ def _format_col(self, i: int) -> List[str]: na_rep=self.na_rep, space=self.col_space, decimal=self.decimal, + leading_space=leading_space, ) def to_html( @@ -1105,7 +1119,7 @@ def format_array( space: Optional[Union[str, int]] = None, justify: str = "right", decimal: str = ".", - leading_space: Optional[bool] = None, + leading_space: Union[str, bool] = "compat", ) -> List[str]: """ Format an array for printing. @@ -1120,7 +1134,7 @@ def format_array( space justify decimal - leading_space : bool, optional + leading_space : bool or 'compat', default is 'compat' Whether the array should be formatted with a leading space. When an array as a column of a Series or DataFrame, we do want the leading space to pad between columns. @@ -1187,7 +1201,7 @@ def __init__( decimal: str = ".", quoting: Optional[int] = None, fixed_width: bool = True, - leading_space: Optional[bool] = None, + leading_space: Union[str, bool] = "compat", ): self.values = values self.digits = digits @@ -1251,7 +1265,7 @@ def _format(x): is_float_type = lib.map_infer(vals, is_float) & notna(vals) leading_space = self.leading_space - if leading_space is None: + if leading_space == "compat": leading_space = is_float_type.any() fmt_values = [] @@ -1391,9 +1405,11 @@ def format_values_with(float_format): float_format: Optional[float_format_type] if self.float_format is None: if self.fixed_width: - float_format = partial( - "{value: .{digits:d}f}".format, digits=self.digits - ) + if self.leading_space is not False: + fmt_str = "{value: .{digits:d}f}" + else: + fmt_str = "{value:.{digits:d}f}" + float_format = partial(fmt_str.format, digits=self.digits) else: float_format = self.float_format else: @@ -1425,7 +1441,11 @@ def format_values_with(float_format): ).any() if has_small_values or (too_long and has_large_values): - float_format = partial("{value: .{digits:d}e}".format, digits=self.digits) + if self.leading_space is not False: + fmt_str = "{value: .{digits:d}e}" + else: + fmt_str = "{value:.{digits:d}e}" + float_format = partial(fmt_str.format, digits=self.digits) formatted_values = format_values_with(float_format) return formatted_values @@ -1440,7 +1460,11 @@ def _format_strings(self) -> List[str]: class IntArrayFormatter(GenericArrayFormatter): def _format_strings(self) -> List[str]: - formatter = self.formatter or (lambda x: "{x: d}".format(x=x)) + if self.leading_space is False: + fmt_str = "{x:d}" + else: + fmt_str = "{x: d}" + formatter = self.formatter or (lambda x: fmt_str.format(x=x)) fmt_values = [formatter(x) for x in self.values] return fmt_values diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index b70a006ca3603..7a29dcbee435a 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1508,11 +1508,11 @@ def test_to_string_no_index(self): df_s = df.to_string(index=False) # Leading space is expected for positive numbers. - expected = " x y z\n 11 33 AAA\n 22 -44 " + expected = " x y z\n11 33 AAA\n22 -44 " assert df_s == expected df_s = df[["y", "x", "z"]].to_string(index=False) - expected = " y x z\n 33 11 AAA\n-44 22 " + expected = " y x z\n 33 11 AAA\n-44 22 " assert df_s == expected def test_to_string_line_width_no_index(self): @@ -1527,7 +1527,7 @@ def test_to_string_line_width_no_index(self): df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) df_s = df.to_string(line_width=1, index=False) - expected = " x \\\n 11 \n 22 \n 33 \n\n y \n 4 \n 5 \n 6 " + expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 " assert df_s == expected @@ -2222,7 +2222,7 @@ def test_to_string_without_index(self): # GH 11729 Test index=False option s = Series([1, 2, 3, 4]) result = s.to_string(index=False) - expected = " 1\n" + " 2\n" + " 3\n" + " 4" + expected = "1\n" + "2\n" + "3\n" + "4" assert result == expected def test_unicode_name_in_footer(self): @@ -3277,3 +3277,37 @@ def test_filepath_or_buffer_bad_arg_raises(float_frame, method): msg = "buf is not a file name and it has no write method" with pytest.raises(TypeError, match=msg): getattr(float_frame, method)(buf=object()) + + +@pytest.mark.parametrize( + "input_array, expected", + [ + ("a", "a"), + (["a", "b"], "a\nb"), + ([1, "a"], "1\na"), + (1, "1"), + ([0, -1], " 0\n-1"), + (1.0, "1.0"), + ([" a", " b"], " a\n b"), + ([".1", "1"], ".1\n 1"), + (["10", "-10"], " 10\n-10"), + ], +) +def test_format_remove_leading_space_series(input_array, expected): + # GH: 24980 + s = pd.Series(input_array).to_string(index=False) + assert s == expected + + +@pytest.mark.parametrize( + "input_array, expected", + [ + ({"A": ["a"]}, "A\na"), + ({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"), + ({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"), + ], +) +def test_format_remove_leading_space_dataframe(input_array, expected): + # GH: 24980 + df = pd.DataFrame(input_array).to_string(index=False) + assert df == expected diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index bd681032f155d..3e73f3c0e972b 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -50,10 +50,10 @@ def test_to_latex(self, float_frame): withoutindex_result = df.to_latex(index=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule - a & b \\ + a & b \\ \midrule - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \bottomrule \end{tabular} """ @@ -413,7 +413,7 @@ def test_to_latex_longtable(self): withoutindex_result = df.to_latex(index=False, longtable=True) withoutindex_expected = r"""\begin{longtable}{rl} \toprule - a & b \\ + a & b \\ \midrule \endhead \midrule @@ -423,8 +423,8 @@ def test_to_latex_longtable(self): \bottomrule \endlastfoot - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \end{longtable} """ @@ -614,8 +614,8 @@ def test_to_latex_no_header(self): withoutindex_result = df.to_latex(index=False, header=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule - 1 & b1 \\ - 2 & b2 \\ +1 & b1 \\ +2 & b2 \\ \bottomrule \end{tabular} """ @@ -641,10 +641,10 @@ def test_to_latex_specified_header(self): withoutindex_result = df.to_latex(header=["AA", "BB"], index=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule -AA & BB \\ +AA & BB \\ \midrule - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \bottomrule \end{tabular} """