merge master

charlesdong1991 · charlesdong1991 · commit 439fd186e0fd · 2019-04-12T14:44:41.000+02:00
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -349,6 +349,7 @@ I/O
 
 - Bug in :func:`DataFrame.to_html()` where values were truncated using display options instead of outputting the full content (:issue:`17004`)
 - Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
+- Bug in :meth:`Series.to_string` adding a leading space when ``index=False`` (:issue:`24980`)
 - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
 - Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`)
 - Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to Timestamp, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`)
@@ -364,6 +365,7 @@ I/O
 - Improved :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` to read incorrectly formatted 118 format files saved by Stata (:issue:`25960`)
 - Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`)
 
+
 Plotting
 ^^^^^^^^
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2880,9 +2880,9 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
         ...                    'mask': ['red', 'purple'],
         ...                    'weapon': ['sai', 'bo staff']})
         >>> df.to_latex(index=False) # doctest: +NORMALIZE_WHITESPACE
-        '\\begin{tabular}{lll}\n\\toprule\n      name &    mask &    weapon
-        \\\\\n\\midrule\n   Raphael &     red &       sai \\\\\n Donatello &
-         purple &  bo staff \\\\\n\\bottomrule\n\\end{tabular}\n'
+        '\\begin{tabular}{lll}\n\\toprule\n     name &   mask &   weapon
+        \\\\\n\\midrule\n  Raphael &    red &      sai \\\\\nDonatello &
+        purple & bo staff \\\\\n\\bottomrule\n\\end{tabular}\n'
         """
         # Get defaults from the pandas config
         if self.ndim == 1:
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -252,8 +252,15 @@ def _get_formatted_index(self):
 
     def _get_formatted_values(self):
         values_to_format = self.tr_series._formatting_values()
+
+        if self.index:
+            leading_space = 'compat'
+        else:
+            leading_space = False
         return format_array(values_to_format, None,
-                            float_format=self.float_format, na_rep=self.na_rep)
+                            float_format=self.float_format,
+                            na_rep=self.na_rep,
+                            leading_space=leading_space)
 
     def to_string(self):
         series = self.tr_series
@@ -717,9 +724,15 @@ def _format_col(self, i):
         frame = self.tr_frame
         formatter = self._get_formatter(i)
         values_to_format = frame.iloc[:, i]._formatting_values()
+
+        if self.index:
+            leading_space = 'compat'
+        else:
+            leading_space = False
         return format_array(values_to_format, formatter,
                             float_format=self.float_format, na_rep=self.na_rep,
-                            space=self.col_space, decimal=self.decimal)
+                            space=self.col_space, decimal=self.decimal,
+                            leading_space=leading_space)
 
     def to_html(self, classes=None, notebook=False, border=None):
         """
@@ -856,7 +869,7 @@ def _get_column_name_list(self):
 
 def format_array(values, formatter, float_format=None, na_rep='NaN',
                  digits=None, space=None, justify='right', decimal='.',
-                 leading_space=None):
+                 leading_space='compat'):
     """
     Format an array for printing.
 
@@ -870,7 +883,7 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
     space
     justify
     decimal
-    leading_space : bool, optional
+    leading_space : bool, default is 'compat'
         Whether the array should be formatted with a leading space.
         When an array as a column of a Series or DataFrame, we do want
         the leading space to pad between columns.
@@ -920,7 +933,7 @@ class GenericArrayFormatter(object):
 
     def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
                  space=12, float_format=None, justify='right', decimal='.',
-                 quoting=None, fixed_width=True, leading_space=None):
+                 quoting=None, fixed_width=True, leading_space='compat'):
         self.values = values
         self.digits = digits
         self.na_rep = na_rep
@@ -978,7 +991,7 @@ def _format(x):
 
         is_float_type = lib.map_infer(vals, is_float) & notna(vals)
         leading_space = self.leading_space
-        if leading_space is None:
+        if leading_space == 'compat':
             leading_space = is_float_type.any()
 
         fmt_values = []
@@ -1102,7 +1115,11 @@ def format_values_with(float_format):
         # The default is otherwise to use str instead of a formatting string
         if self.float_format is None:
             if self.fixed_width:
-                float_format = partial('{value: .{digits:d}f}'.format,
+                if self.leading_space is not False:
+                    fmt_str = '{value: .{digits:d}f}'
+                else:
+                    fmt_str = '{value:.{digits:d}f}'
+                float_format = partial(fmt_str.format,
                                        digits=self.digits)
             else:
                 float_format = self.float_format
@@ -1134,7 +1151,11 @@ def format_values_with(float_format):
                                 (abs_vals > 0)).any()
 
         if has_small_values or (too_long and has_large_values):
-            float_format = partial('{value: .{digits:d}e}'.format,
+            if self.leading_space is not False:
+                fmt_str = '{value: .{digits:d}e}'
+            else:
+                fmt_str = '{value:.{digits:d}e}'
+            float_format = partial(fmt_str.format,
                                    digits=self.digits)
             formatted_values = format_values_with(float_format)
 
@@ -1151,7 +1172,12 @@ def _format_strings(self):
 class IntArrayFormatter(GenericArrayFormatter):
 
     def _format_strings(self):
-        formatter = self.formatter or (lambda x: '{x: d}'.format(x=x))
+        if self.leading_space is False:
+            fmt_str = '{x:d}'
+        else:
+            fmt_str = '{x: d}'
+        formatter = self.formatter or (lambda x: fmt_str.format(x=x))
+#        formatter = self.formatter or (lambda x: '{x: d}'.format(x=x))
         fmt_values = [formatter(x) for x in self.values]
         return fmt_values
 
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -1245,15 +1245,15 @@ def test_to_string_no_index(self):
 
         df_s = df.to_string(index=False)
         # Leading space is expected for positive numbers.
-        expected = ("  x   y    z\n"
-                    " 11  33  AAA\n"
-                    " 22 -44     ")
+        expected = (" x   y   z\n"
+                    "11  33 AAA\n"
+                    "22 -44    ")
         assert df_s == expected
 
         df_s = df[['y', 'x', 'z']].to_string(index=False)
-        expected = ("  y   x    z\n"
-                    " 33  11  AAA\n"
-                    "-44  22     ")
+        expected = ("  y  x   z\n"
+                    " 33 11 AAA\n"
+                    "-44 22    ")
         assert df_s == expected
 
     def test_to_string_line_width_no_index(self):
@@ -1268,7 +1268,7 @@ def test_to_string_line_width_no_index(self):
         df = DataFrame({'x': [11, 22, 33], 'y': [4, 5, 6]})
 
         df_s = df.to_string(line_width=1, index=False)
-        expected = "  x  \\\n 11   \n 22   \n 33   \n\n y  \n 4  \n 5  \n 6  "
+        expected = " x  \\\n11   \n22   \n33   \n\n y  \n 4  \n 5  \n 6  "
 
         assert df_s == expected
 
@@ -2329,6 +2329,15 @@ def test_to_string_header(self):
         exp = '0    0\n    ..\n9    9'
         assert res == exp
 
+    @pytest.mark.parametrize("inputs, expected", [
+        ([' a', ' b'], ' a\n b'),
+        (['.1', '1'], '.1\n 1'),
+        (['10', '-10'], ' 10\n-10')
+    ])
+    def test_to_string_index_false_corner_case(self, inputs, expected):
+        s = pd.Series(inputs).to_string(index=False)
+        assert s == expected
+
     def test_to_string_multindex_header(self):
         # GH 16718
         df = (pd.DataFrame({'a': [0], 'b': [1], 'c': [2], 'd': [3]})
@@ -2737,6 +2746,31 @@ def test_format_percentiles():
         fmt.format_percentiles([0.1, 0.5, 'a'])
 
 
+@pytest.mark.parametrize("input_array, expected", [
+    ("a", "a"),
+    (["a", "b"], "a\nb"),
+    ([1, "a"], "1\na"),
+    (1, "1"),
+    ([0, -1], " 0\n-1"),
+    (1.0, '1.0')
+])
+def test_format_remove_leading_space_series(input_array, expected):
+    # GH: 24980
+    s = pd.Series(input_array).to_string(index=False)
+    assert s == expected
+
+
+@pytest.mark.parametrize("input_array, expected", [
+    ({"A": ["a"]}, "A\na"),
+    ({"A": ["a", "b"], "B": ["c", "dd"]}, "A  B\na  c\nb dd"),
+    ({"A": ["a", 1], "B": ["aa", 1]}, "A  B\na aa\n1  1")
+])
+def test_format_remove_leading_space_dataframe(input_array, expected):
+    # GH: 24980
+    df = pd.DataFrame(input_array).to_string(index=False)
+    assert df == expected
+
+
 def test_repr_html_ipython_config(ip):
     code = textwrap.dedent("""\
     import pandas as pd
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
@@ -56,10 +56,10 @@ def test_to_latex(self, frame):
         withoutindex_result = df.to_latex(index=False)
         withoutindex_expected = r"""\begin{tabular}{rl}
 \toprule
- a &   b \\
+ a &  b \\
 \midrule
- 1 &  b1 \\
- 2 &  b2 \\
+ 1 & b1 \\
+ 2 & b2 \\
 \bottomrule
 \end{tabular}
 """
@@ -415,7 +415,7 @@ def test_to_latex_longtable(self, frame):
         withoutindex_result = df.to_latex(index=False, longtable=True)
         withoutindex_expected = r"""\begin{longtable}{rl}
 \toprule
- a &   b \\
+ a &  b \\
 \midrule
 \endhead
 \midrule
@@ -425,8 +425,8 @@ def test_to_latex_longtable(self, frame):
 
 \bottomrule
 \endlastfoot
- 1 &  b1 \\
- 2 &  b2 \\
+ 1 & b1 \\
+ 2 & b2 \\
 \end{longtable}
 """
 
@@ -482,8 +482,8 @@ def test_to_latex_no_header(self):
         withoutindex_result = df.to_latex(index=False, header=False)
         withoutindex_expected = r"""\begin{tabular}{rl}
 \toprule
- 1 &  b1 \\
- 2 &  b2 \\
+1 & b1 \\
+2 & b2 \\
 \bottomrule
 \end{tabular}
 """
@@ -509,10 +509,10 @@ def test_to_latex_specified_header(self):
         withoutindex_result = df.to_latex(header=['AA', 'BB'], index=False)
         withoutindex_expected = r"""\begin{tabular}{rl}
 \toprule
-AA &  BB \\
+AA & BB \\
 \midrule
- 1 &  b1 \\
- 2 &  b2 \\
+ 1 & b1 \\
+ 2 & b2 \\
 \bottomrule
 \end{tabular}
 """