merge master

charlesdong1991 · charlesdong1991 · commit 5a6b8679cf1b · 2019-06-12T21:47:51.000+02:00
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -647,6 +647,7 @@ I/O
 
 - Bug in :func:`DataFrame.to_html()` where values were truncated using display options instead of outputting the full content (:issue:`17004`)
 - Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
+- Bug in :meth:`Series.to_string` adding a leading space when ``index=False`` (:issue:`24980`)
 - Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
 - Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`)
 - Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to :class:`Timestamp`, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`)
@@ -671,6 +672,7 @@ I/O
 - :func:`read_excel` now raises a ``ValueError`` when input is of type :class:`pandas.io.excel.ExcelFile` and ``engine`` param is passed since :class:`pandas.io.excel.ExcelFile` has an engine defined (:issue:`26566`)
 - Bug while selecting from :class:`HDFStore` with ``where=''`` specified (:issue:`26610`).
 
+
 Plotting
 ^^^^^^^^
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2859,9 +2859,9 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
         ...                    'mask': ['red', 'purple'],
         ...                    'weapon': ['sai', 'bo staff']})
         >>> df.to_latex(index=False) # doctest: +NORMALIZE_WHITESPACE
-        '\\begin{tabular}{lll}\n\\toprule\n      name &    mask &    weapon
-        \\\\\n\\midrule\n   Raphael &     red &       sai \\\\\n Donatello &
-         purple &  bo staff \\\\\n\\bottomrule\n\\end{tabular}\n'
+        '\\begin{tabular}{lll}\n\\toprule\n     name &   mask &   weapon
+        \\\\\n\\midrule\n  Raphael &    red &      sai \\\\\nDonatello &
+        purple & bo staff \\\\\n\\bottomrule\n\\end{tabular}\n'
         """
         # Get defaults from the pandas config
         if self.ndim == 1:
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -247,8 +247,15 @@ def _get_formatted_index(self):
 
     def _get_formatted_values(self):
         values_to_format = self.tr_series._formatting_values()
+
+        if self.index:
+            leading_space = 'compat'
+        else:
+            leading_space = False
         return format_array(values_to_format, None,
-                            float_format=self.float_format, na_rep=self.na_rep)
+                            float_format=self.float_format,
+                            na_rep=self.na_rep,
+                            leading_space=leading_space)
 
     def to_string(self):
         series = self.tr_series
@@ -712,9 +719,15 @@ def _format_col(self, i):
         frame = self.tr_frame
         formatter = self._get_formatter(i)
         values_to_format = frame.iloc[:, i]._formatting_values()
+
+        if self.index:
+            leading_space = 'compat'
+        else:
+            leading_space = False
         return format_array(values_to_format, formatter,
                             float_format=self.float_format, na_rep=self.na_rep,
-                            space=self.col_space, decimal=self.decimal)
+                            space=self.col_space, decimal=self.decimal,
+                            leading_space=leading_space)
 
     def to_html(self, classes=None, notebook=False, border=None):
         """
@@ -851,7 +864,7 @@ def _get_column_name_list(self):
 
 def format_array(values, formatter, float_format=None, na_rep='NaN',
                  digits=None, space=None, justify='right', decimal='.',
-                 leading_space=None):
+                 leading_space='compat'):
     """
     Format an array for printing.
 
@@ -865,7 +878,7 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
     space
     justify
     decimal
-    leading_space : bool, optional
+    leading_space : bool, default is 'compat'
         Whether the array should be formatted with a leading space.
         When an array as a column of a Series or DataFrame, we do want
         the leading space to pad between columns.
@@ -915,7 +928,7 @@ class GenericArrayFormatter:
 
     def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
                  space=12, float_format=None, justify='right', decimal='.',
-                 quoting=None, fixed_width=True, leading_space=None):
+                 quoting=None, fixed_width=True, leading_space='compat'):
         self.values = values
         self.digits = digits
         self.na_rep = na_rep
@@ -973,7 +986,7 @@ def _format(x):
 
         is_float_type = lib.map_infer(vals, is_float) & notna(vals)
         leading_space = self.leading_space
-        if leading_space is None:
+        if leading_space == 'compat':
             leading_space = is_float_type.any()
 
         fmt_values = []
@@ -1101,7 +1114,11 @@ def format_values_with(float_format):
         # The default is otherwise to use str instead of a formatting string
         if self.float_format is None:
             if self.fixed_width:
-                float_format = partial('{value: .{digits:d}f}'.format,
+                if self.leading_space is not False:
+                    fmt_str = '{value: .{digits:d}f}'
+                else:
+                    fmt_str = '{value:.{digits:d}f}'
+                float_format = partial(fmt_str.format,
                                        digits=self.digits)
             else:
                 float_format = self.float_format
@@ -1133,7 +1150,11 @@ def format_values_with(float_format):
                                 (abs_vals > 0)).any()
 
         if has_small_values or (too_long and has_large_values):
-            float_format = partial('{value: .{digits:d}e}'.format,
+            if self.leading_space is not False:
+                fmt_str = '{value: .{digits:d}e}'
+            else:
+                fmt_str = '{value:.{digits:d}e}'
+            float_format = partial(fmt_str.format,
                                    digits=self.digits)
             formatted_values = format_values_with(float_format)
 
@@ -1150,7 +1171,12 @@ def _format_strings(self):
 class IntArrayFormatter(GenericArrayFormatter):
 
     def _format_strings(self):
-        formatter = self.formatter or (lambda x: '{x: d}'.format(x=x))
+        if self.leading_space is False:
+            fmt_str = '{x:d}'
+        else:
+            fmt_str = '{x: d}'
+        formatter = self.formatter or (lambda x: fmt_str.format(x=x))
+#        formatter = self.formatter or (lambda x: '{x: d}'.format(x=x))
         fmt_values = [formatter(x) for x in self.values]
         return fmt_values
 
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
@@ -1232,15 +1232,15 @@ def test_to_string_no_index(self):
 
         df_s = df.to_string(index=False)
         # Leading space is expected for positive numbers.
-        expected = ("  x   y    z\n"
-                    " 11  33  AAA\n"
-                    " 22 -44     ")
+        expected = (" x   y   z\n"
+                    "11  33 AAA\n"
+                    "22 -44    ")
         assert df_s == expected
 
         df_s = df[['y', 'x', 'z']].to_string(index=False)
-        expected = ("  y   x    z\n"
-                    " 33  11  AAA\n"
-                    "-44  22     ")
+        expected = ("  y  x   z\n"
+                    " 33 11 AAA\n"
+                    "-44 22    ")
         assert df_s == expected
 
     def test_to_string_line_width_no_index(self):
@@ -1255,7 +1255,7 @@ def test_to_string_line_width_no_index(self):
         df = DataFrame({'x': [11, 22, 33], 'y': [4, 5, 6]})
 
         df_s = df.to_string(line_width=1, index=False)
-        expected = "  x  \\\n 11   \n 22   \n 33   \n\n y  \n 4  \n 5  \n 6  "
+        expected = " x  \\\n11   \n22   \n33   \n\n y  \n 4  \n 5  \n 6  "
 
         assert df_s == expected
 
@@ -1844,7 +1844,7 @@ def test_to_string_without_index(self):
         # GH 11729 Test index=False option
         s = Series([1, 2, 3, 4])
         result = s.to_string(index=False)
-        expected = (' 1\n' + ' 2\n' + ' 3\n' + ' 4')
+        expected = ('1\n' + '2\n' + '3\n' + '4')
         assert result == expected
 
     def test_unicode_name_in_footer(self):
@@ -2332,6 +2332,15 @@ def test_to_string_header(self):
         exp = '0    0\n    ..\n9    9'
         assert res == exp
 
+    @pytest.mark.parametrize("inputs, expected", [
+        ([' a', ' b'], ' a\n b'),
+        (['.1', '1'], '.1\n 1'),
+        (['10', '-10'], ' 10\n-10')
+    ])
+    def test_to_string_index_false_corner_case(self, inputs, expected):
+        s = pd.Series(inputs).to_string(index=False)
+        assert s == expected
+
     def test_to_string_multindex_header(self):
         # GH 16718
         df = (pd.DataFrame({'a': [0], 'b': [1], 'c': [2], 'd': [3]})
@@ -2740,6 +2749,31 @@ def test_format_percentiles():
         fmt.format_percentiles([0.1, 0.5, 'a'])
 
 
+@pytest.mark.parametrize("input_array, expected", [
+    ("a", "a"),
+    (["a", "b"], "a\nb"),
+    ([1, "a"], "1\na"),
+    (1, "1"),
+    ([0, -1], " 0\n-1"),
+    (1.0, '1.0')
+])
+def test_format_remove_leading_space_series(input_array, expected):
+    # GH: 24980
+    s = pd.Series(input_array).to_string(index=False)
+    assert s == expected
+
+
+@pytest.mark.parametrize("input_array, expected", [
+    ({"A": ["a"]}, "A\na"),
+    ({"A": ["a", "b"], "B": ["c", "dd"]}, "A  B\na  c\nb dd"),
+    ({"A": ["a", 1], "B": ["aa", 1]}, "A  B\na aa\n1  1")
+])
+def test_format_remove_leading_space_dataframe(input_array, expected):
+    # GH: 24980
+    df = pd.DataFrame(input_array).to_string(index=False)
+    assert df == expected
+
+
 def test_format_percentiles_integer_idx():
     # Issue #26660
     result = fmt.format_percentiles(np.linspace(0, 1, 10 + 1))
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
@@ -51,10 +51,10 @@ def test_to_latex(self, float_frame):
         withoutindex_result = df.to_latex(index=False)
         withoutindex_expected = r"""\begin{tabular}{rl}
 \toprule
- a &   b \\
+ a &  b \\
 \midrule
- 1 &  b1 \\
- 2 &  b2 \\
+ 1 & b1 \\
+ 2 & b2 \\
 \bottomrule
 \end{tabular}
 """
@@ -410,7 +410,7 @@ def test_to_latex_longtable(self, float_frame):
         withoutindex_result = df.to_latex(index=False, longtable=True)
         withoutindex_expected = r"""\begin{longtable}{rl}
 \toprule
- a &   b \\
+ a &  b \\
 \midrule
 \endhead
 \midrule
@@ -420,8 +420,8 @@ def test_to_latex_longtable(self, float_frame):
 
 \bottomrule
 \endlastfoot
- 1 &  b1 \\
- 2 &  b2 \\
+ 1 & b1 \\
+ 2 & b2 \\
 \end{longtable}
 """
 
@@ -477,8 +477,8 @@ def test_to_latex_no_header(self):
         withoutindex_result = df.to_latex(index=False, header=False)
         withoutindex_expected = r"""\begin{tabular}{rl}
 \toprule
- 1 &  b1 \\
- 2 &  b2 \\
+1 & b1 \\
+2 & b2 \\
 \bottomrule
 \end{tabular}
 """
@@ -504,10 +504,10 @@ def test_to_latex_specified_header(self):
         withoutindex_result = df.to_latex(header=['AA', 'BB'], index=False)
         withoutindex_expected = r"""\begin{tabular}{rl}
 \toprule
-AA &  BB \\
+AA & BB \\
 \midrule
- 1 &  b1 \\
- 2 &  b2 \\
+ 1 & b1 \\
+ 2 & b2 \\
 \bottomrule
 \end{tabular}
 """