From 5a6b8679cf1b3a4b510fa4060c06f95aae75a05a Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Wed, 12 Jun 2019 21:47:51 +0200 Subject: [PATCH] merge master --- doc/source/whatsnew/v0.25.0.rst | 2 + pandas/core/generic.py | 6 +-- pandas/io/formats/format.py | 44 ++++++++++++++++----- pandas/tests/io/formats/test_format.py | 50 ++++++++++++++++++++---- pandas/tests/io/formats/test_to_latex.py | 22 +++++------ 5 files changed, 93 insertions(+), 31 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 76ee21b4c9a50..0203392d9fc5a 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -647,6 +647,7 @@ I/O - Bug in :func:`DataFrame.to_html()` where values were truncated using display options instead of outputting the full content (:issue:`17004`) - Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`) +- Bug in :meth:`Series.to_string` adding a leading space when ``index=False`` (:issue:`24980`) - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`) - Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`) - Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to :class:`Timestamp`, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`) @@ -671,6 +672,7 @@ I/O - :func:`read_excel` now raises a ``ValueError`` when input is of type :class:`pandas.io.excel.ExcelFile` and ``engine`` param is passed since :class:`pandas.io.excel.ExcelFile` has an engine defined (:issue:`26566`) - Bug while selecting from :class:`HDFStore` with ``where=''`` specified (:issue:`26610`). + Plotting ^^^^^^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 903fd7ffe706a..fa36723324ea2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2859,9 +2859,9 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True, ... 'mask': ['red', 'purple'], ... 'weapon': ['sai', 'bo staff']}) >>> df.to_latex(index=False) # doctest: +NORMALIZE_WHITESPACE - '\\begin{tabular}{lll}\n\\toprule\n name & mask & weapon - \\\\\n\\midrule\n Raphael & red & sai \\\\\n Donatello & - purple & bo staff \\\\\n\\bottomrule\n\\end{tabular}\n' + '\\begin{tabular}{lll}\n\\toprule\n name & mask & weapon + \\\\\n\\midrule\n Raphael & red & sai \\\\\nDonatello & + purple & bo staff \\\\\n\\bottomrule\n\\end{tabular}\n' """ # Get defaults from the pandas config if self.ndim == 1: diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index f632bc13a5b24..83b7b03b7254b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -247,8 +247,15 @@ def _get_formatted_index(self): def _get_formatted_values(self): values_to_format = self.tr_series._formatting_values() + + if self.index: + leading_space = 'compat' + else: + leading_space = False return format_array(values_to_format, None, - float_format=self.float_format, na_rep=self.na_rep) + float_format=self.float_format, + na_rep=self.na_rep, + leading_space=leading_space) def to_string(self): series = self.tr_series @@ -712,9 +719,15 @@ def _format_col(self, i): frame = self.tr_frame formatter = self._get_formatter(i) values_to_format = frame.iloc[:, i]._formatting_values() + + if self.index: + leading_space = 'compat' + else: + leading_space = False return format_array(values_to_format, formatter, float_format=self.float_format, na_rep=self.na_rep, - space=self.col_space, decimal=self.decimal) + space=self.col_space, decimal=self.decimal, + leading_space=leading_space) def to_html(self, classes=None, notebook=False, border=None): """ @@ -851,7 +864,7 @@ def _get_column_name_list(self): def format_array(values, formatter, float_format=None, na_rep='NaN', digits=None, space=None, justify='right', decimal='.', - leading_space=None): + leading_space='compat'): """ Format an array for printing. @@ -865,7 +878,7 @@ def format_array(values, formatter, float_format=None, na_rep='NaN', space justify decimal - leading_space : bool, optional + leading_space : bool, default is 'compat' Whether the array should be formatted with a leading space. When an array as a column of a Series or DataFrame, we do want the leading space to pad between columns. @@ -915,7 +928,7 @@ class GenericArrayFormatter: def __init__(self, values, digits=7, formatter=None, na_rep='NaN', space=12, float_format=None, justify='right', decimal='.', - quoting=None, fixed_width=True, leading_space=None): + quoting=None, fixed_width=True, leading_space='compat'): self.values = values self.digits = digits self.na_rep = na_rep @@ -973,7 +986,7 @@ def _format(x): is_float_type = lib.map_infer(vals, is_float) & notna(vals) leading_space = self.leading_space - if leading_space is None: + if leading_space == 'compat': leading_space = is_float_type.any() fmt_values = [] @@ -1101,7 +1114,11 @@ def format_values_with(float_format): # The default is otherwise to use str instead of a formatting string if self.float_format is None: if self.fixed_width: - float_format = partial('{value: .{digits:d}f}'.format, + if self.leading_space is not False: + fmt_str = '{value: .{digits:d}f}' + else: + fmt_str = '{value:.{digits:d}f}' + float_format = partial(fmt_str.format, digits=self.digits) else: float_format = self.float_format @@ -1133,7 +1150,11 @@ def format_values_with(float_format): (abs_vals > 0)).any() if has_small_values or (too_long and has_large_values): - float_format = partial('{value: .{digits:d}e}'.format, + if self.leading_space is not False: + fmt_str = '{value: .{digits:d}e}' + else: + fmt_str = '{value:.{digits:d}e}' + float_format = partial(fmt_str.format, digits=self.digits) formatted_values = format_values_with(float_format) @@ -1150,7 +1171,12 @@ def _format_strings(self): class IntArrayFormatter(GenericArrayFormatter): def _format_strings(self): - formatter = self.formatter or (lambda x: '{x: d}'.format(x=x)) + if self.leading_space is False: + fmt_str = '{x:d}' + else: + fmt_str = '{x: d}' + formatter = self.formatter or (lambda x: fmt_str.format(x=x)) +# formatter = self.formatter or (lambda x: '{x: d}'.format(x=x)) fmt_values = [formatter(x) for x in self.values] return fmt_values diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index edb7c2136825d..5c63be92b9226 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1232,15 +1232,15 @@ def test_to_string_no_index(self): df_s = df.to_string(index=False) # Leading space is expected for positive numbers. - expected = (" x y z\n" - " 11 33 AAA\n" - " 22 -44 ") + expected = (" x y z\n" + "11 33 AAA\n" + "22 -44 ") assert df_s == expected df_s = df[['y', 'x', 'z']].to_string(index=False) - expected = (" y x z\n" - " 33 11 AAA\n" - "-44 22 ") + expected = (" y x z\n" + " 33 11 AAA\n" + "-44 22 ") assert df_s == expected def test_to_string_line_width_no_index(self): @@ -1255,7 +1255,7 @@ def test_to_string_line_width_no_index(self): df = DataFrame({'x': [11, 22, 33], 'y': [4, 5, 6]}) df_s = df.to_string(line_width=1, index=False) - expected = " x \\\n 11 \n 22 \n 33 \n\n y \n 4 \n 5 \n 6 " + expected = " x \\\n11 \n22 \n33 \n\n y \n 4 \n 5 \n 6 " assert df_s == expected @@ -1844,7 +1844,7 @@ def test_to_string_without_index(self): # GH 11729 Test index=False option s = Series([1, 2, 3, 4]) result = s.to_string(index=False) - expected = (' 1\n' + ' 2\n' + ' 3\n' + ' 4') + expected = ('1\n' + '2\n' + '3\n' + '4') assert result == expected def test_unicode_name_in_footer(self): @@ -2332,6 +2332,15 @@ def test_to_string_header(self): exp = '0 0\n ..\n9 9' assert res == exp + @pytest.mark.parametrize("inputs, expected", [ + ([' a', ' b'], ' a\n b'), + (['.1', '1'], '.1\n 1'), + (['10', '-10'], ' 10\n-10') + ]) + def test_to_string_index_false_corner_case(self, inputs, expected): + s = pd.Series(inputs).to_string(index=False) + assert s == expected + def test_to_string_multindex_header(self): # GH 16718 df = (pd.DataFrame({'a': [0], 'b': [1], 'c': [2], 'd': [3]}) @@ -2740,6 +2749,31 @@ def test_format_percentiles(): fmt.format_percentiles([0.1, 0.5, 'a']) +@pytest.mark.parametrize("input_array, expected", [ + ("a", "a"), + (["a", "b"], "a\nb"), + ([1, "a"], "1\na"), + (1, "1"), + ([0, -1], " 0\n-1"), + (1.0, '1.0') +]) +def test_format_remove_leading_space_series(input_array, expected): + # GH: 24980 + s = pd.Series(input_array).to_string(index=False) + assert s == expected + + +@pytest.mark.parametrize("input_array, expected", [ + ({"A": ["a"]}, "A\na"), + ({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"), + ({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1") +]) +def test_format_remove_leading_space_dataframe(input_array, expected): + # GH: 24980 + df = pd.DataFrame(input_array).to_string(index=False) + assert df == expected + + def test_format_percentiles_integer_idx(): # Issue #26660 result = fmt.format_percentiles(np.linspace(0, 1, 10 + 1)) diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index b9f28ec36d021..0d2e12c051725 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -51,10 +51,10 @@ def test_to_latex(self, float_frame): withoutindex_result = df.to_latex(index=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule - a & b \\ + a & b \\ \midrule - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \bottomrule \end{tabular} """ @@ -410,7 +410,7 @@ def test_to_latex_longtable(self, float_frame): withoutindex_result = df.to_latex(index=False, longtable=True) withoutindex_expected = r"""\begin{longtable}{rl} \toprule - a & b \\ + a & b \\ \midrule \endhead \midrule @@ -420,8 +420,8 @@ def test_to_latex_longtable(self, float_frame): \bottomrule \endlastfoot - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \end{longtable} """ @@ -477,8 +477,8 @@ def test_to_latex_no_header(self): withoutindex_result = df.to_latex(index=False, header=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule - 1 & b1 \\ - 2 & b2 \\ +1 & b1 \\ +2 & b2 \\ \bottomrule \end{tabular} """ @@ -504,10 +504,10 @@ def test_to_latex_specified_header(self): withoutindex_result = df.to_latex(header=['AA', 'BB'], index=False) withoutindex_expected = r"""\begin{tabular}{rl} \toprule -AA & BB \\ +AA & BB \\ \midrule - 1 & b1 \\ - 2 & b2 \\ + 1 & b1 \\ + 2 & b2 \\ \bottomrule \end{tabular} """