pandas-dev · adamklein · Jan 5, 2012 · Jan 5, 2012 · Jan 5, 2012 · Jan 5, 2012
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -49,6 +49,7 @@ pandas 0.7.0
   - Add attribute-based item access to ``Panel`` and add IPython completion (PR
     #554)
   - Add ``logy`` option to ``Series.plot`` for log-scaling on the Y axis
+  - Add ``index`` and ``header`` options to ``DataFrame.to_string`` (GH #570)
 
 **API Changes**
 
@@ -59,6 +60,8 @@ pandas 0.7.0
     prevent accidentally modifying the data source (GH #316)
   - Refactor to remove deprecated ``LongPanel`` class (PR #552)
   - Deprecated ``Panel.to_long``, renamed to ``to_frame``
+  - Deprecated ``colSpace`` argument in ``DataFrame.to_string``, renamed to
+    ``col_space``
 
 **Improvements to existing features**
 
@@ -137,6 +140,9 @@ pandas 0.7.0
   - Handle non-string index name passed to DataFrame.from_records
   - DataFrame.insert caused the columns name(s) field to be discarded (GH #527)
   - Fix erroneous in monotonic many-to-one left joins
+  - Fix DataFrame.to_string to remove extra column white space (GH #571)
+  - Format floats to default to same number of digits (GH #395)
+  - Added decorator to copy docstring from one function to another (GH #449)
 
 Thanks
 ------

diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -278,7 +278,7 @@ over the string representation of the object. All arguments are optional:
 
   - ``buf`` default None, for example a StringIO object
   - ``columns`` default None, which columns to write
-  - ``colSpace`` default None, number of spaces to write between columns
+  - ``col_space`` default None, number of spaces to write between columns
   - ``na_rep`` default ``NaN``, representation of NA value
   - ``formatters`` default None, a dictionary (by column) of functions each of
     which takes a single argument and returns a formatted string
@@ -288,6 +288,8 @@ over the string representation of the object. All arguments are optional:
   - ``sparsify`` default True, set to False for a DataFrame with a hierarchical
     index to print every multiindex key at each row.
   - ``index_names`` default True, will print the names of the indices
+  - ``index`` default True, will print the index (ie, row labels)
+  - ``header`` default True, will print the column labels
 
 The Series object also has a ``to_string`` method, but with only the ``buf``,
 ``na_rep``, ``float_format`` arguments. There is also a ``length`` argument

diff --git a/doc/source/whatsnew/v0.7.0.txt b/doc/source/whatsnew/v0.7.0.txt
@@ -50,6 +50,9 @@ New features
 - :ref:`Add <visualization.basic>` ``logy`` option to ``Series.plot`` for
   log-scaling on the Y axis
 
+- :ref:`Add <io.formatting>` ``index`` and ``header`` options to
+  ``DataFrame.to_string``
+
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -360,8 +360,8 @@ def _try_sort(iterable):
     except Exception:
         return listed
 
-def set_printoptions(precision=None, column_space=None, max_rows=None,
-        max_columns=None):
+def set_printoptions(precision=None, column_space=None, max_rows=None, 
+                     max_columns=None):
     """
     Alter default behavior of DataFrame.toString
 
@@ -491,64 +491,48 @@ def set_eng_float_format(precision=3, use_eng_prefix=False):
     _float_format = EngFormatter(precision, use_eng_prefix)
     _column_space = max(12, precision + 9)
 
-_float_format = lambda x: '%.4g' % x
+_float_format = lambda x: '% .4g' % x
 _column_space = 12
 _max_rows = 500
 _max_columns = 0
 
-def _pfixed(s, space, na_rep=None, float_format=None):
-    if isinstance(s, float):
-        if na_rep is not None and isnull(s):
-            if np.isnan(s):
-                s = na_rep
-            return (' %s' % s).ljust(space)
-
-        if float_format:
-            formatted = float_format(s)
-        else:
-            is_neg = s < 0
-            formatted = _float_format(np.abs(s))
-
-            if is_neg:
-                formatted = '-' + formatted
-            else:
-                formatted = ' ' + formatted
-
-        return formatted.ljust(space)
-    else:
-        stringified = _stringify(s)
-        return (' %s' % stringified)[:space].ljust(space)
-
 def _stringify(col):
     # unicode workaround
     if isinstance(col, tuple):
         return str(col)
     else:
         return '%s' % col
 
-def _format(s, na_rep=None, float_format=None):
+def _format(s, space=None, na_rep=None, float_format=None, col_width=None):
+    def _just_help(x):
+        if space is None:
+            return x
+        return x[:space].ljust(space)
+
     if isinstance(s, float):
         if na_rep is not None and isnull(s):
             if np.isnan(s):
                 s = na_rep
-            return ' %s' % s
+            return _just_help('%s' % s)
 
         if float_format:
             formatted = float_format(s)
         else:
-            is_neg = s < 0
-            formatted = _float_format(np.abs(s))
+            formatted = _float_format(s)
 
-            if is_neg:
-                formatted = '-' + formatted
-            else:
-                formatted = ' ' + formatted
+        # if we pass col_width, pad-zero the floats so all are same in column
+        if col_width is not None and formatted != ' 0':
+            padzeros = col_width - len(formatted)
+            if padzeros > 0:
+                formatted = formatted + ('0' * padzeros)
 
-        return formatted
+        return _just_help(formatted)
+    elif isinstance(s, int):
+        return _just_help('% d' % s)
     else:
-        return ' %s' % _stringify(s)
+        return _just_help('%s' % _stringify(s))
 
-#-------------------------------------------------------------------------------
+#------------------------------------------------------------------------------
 # miscellaneous python tools
 
 def rands(n):

diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -1,19 +1,52 @@
 from StringIO import StringIO
-from pandas.core.common import adjoin, _pfixed
+from pandas.core.common import adjoin
 from pandas.core.index import MultiIndex, _ensure_index
 
+import numpy as np
+
+docstring_to_string = """
+    Parameters
+    ----------
+    frame : DataFrame
+        object to render
+    buf : StringIO-like, optional
+        buffer to write to
+    columns : sequence, optional
+        the subset of columns to write; default None writes all columns
+    col_space : int, optional
+        the width of each columns
+    header : bool, optional
+        whether to print column labels, default True
+    index : bool, optional
+        whether to print index (row) labels, default True
+    na_rep : string, optional
+        string representation of NAN to use, default 'NaN'
+    formatters : list or dict of one-parameter functions, optional
+        formatter functions to apply to columns' elements by position or name,
+        default None
+    float_format : one-parameter function, optional
+        formatter function to apply to columns' elements if they are floats
+        default None
+    sparsify : bool, optional
+        Set to False for a DataFrame with a hierarchical index to print every
+        multiindex key at each row, default True
+    index_names : bool, optional
+        Prints the names of the indexes, default True """
 
 class DataFrameFormatter(object):
     """
     Render a DataFrame
 
     self.to_string() : console-friendly tabular output
-    self.to_html() : html table
+    self.to_html()   : html table
+
     """
-    def __init__(self, frame, buf=None, columns=None, col_space=None,
-                 na_rep='NaN', formatters=None, float_format=None,
-                 sparsify=True, index_names=True):
 
+    __doc__ += docstring_to_string
+
+    def __init__(self, frame, buf=None, columns=None, col_space=None,
+                 header=True, index=True, na_rep='NaN', formatters=None,
+                 float_format=None, sparsify=True, index_names=True):
         self.frame = frame
         self.buf = buf if buf is not None else StringIO()
         self.show_index_names = index_names
@@ -22,6 +55,8 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
         self.formatters = formatters
         self.na_rep = na_rep
         self.col_space = col_space
+        self.header = header
+        self.index = index
 
         if columns is not None:
             self.columns = _ensure_index(columns)
@@ -47,10 +82,17 @@ def to_string(self):
             str_index = self._get_formatted_index()
             str_columns = self._get_formatted_column_labels()
 
-            stringified = [str_columns[i] + format_col(c)
-                           for i, c in enumerate(self.columns)]
+            if self.header:
+                stringified = [str_columns[i] + format_col(c)
+                               for i, c in enumerate(self.columns)]
+            else:
+                stringified = [format_col(c) for c in self.columns]
 
-            to_write.append(adjoin(1, str_index, *stringified))
+
+            if self.index:
+                to_write.append(adjoin(1, str_index, *stringified))
+            else:
+                to_write.append(adjoin(1, *stringified))
 
         for s in to_write:
             if isinstance(s, unicode):
@@ -114,17 +156,21 @@ def _column_header():
             write(buf, '</tbody>', indent  + indent_delta)
         else:
             indent += indent_delta
-            write(buf, '<thead>', indent)
-            row = []
 
             # header row
-            col_row = _column_header()
-            indent += indent_delta
-            write_tr(buf, col_row, indent, indent_delta, header=True)
-            if self.has_index_names:
-                row = frame.index.names + [''] * len(frame.columns)
-                write_tr(buf, row, indent, indent_delta, header=True)
-            write(buf, '</thead>', indent)
+            if self.header:
+                write(buf, '<thead>', indent)
+                row = []
+
+                col_row = _column_header()
+                indent += indent_delta
+                write_tr(buf, col_row, indent, indent_delta, header=True)
+                if self.has_index_names:
+                    row = frame.index.names + [''] * len(frame.columns)
+                    write_tr(buf, row, indent, indent_delta, header=True)
+
+                write(buf, '</thead>', indent)
+
             write(buf, '<tbody>', indent)
 
             # write values
@@ -148,19 +194,24 @@ def _get_column_formatter(self):
 
         col_space = self.col_space
 
-        if col_space is None:
-            def _myformat(v):
-                return _format(v, na_rep=self.na_rep,
-                               float_format=self.float_format)
-        else:
-            def _myformat(v):
-                return _pfixed(v, col_space, na_rep=self.na_rep,
-                               float_format=self.float_format)
+        def _myformat(col):
+            formatter = lambda v: _format(v, space=col_space,
+                                          na_rep=self.na_rep,
+                                          float_format=self.float_format)
+            # one pass through when float to stringify column, to pad with
+            # zeros
+            if issubclass(col.dtype.type, np.floating):
+                col_width = max(map(len, map(formatter, col)))
+                formatter = lambda v: _format(v, space=col_space,
+                                            na_rep=self.na_rep,
+                                            float_format=self.float_format,
+                                            col_width=col_width)
+            return formatter
 
         formatters = {} if self.formatters is None else self.formatters
 
         def _format_col(col, i=None):
-            formatter = formatters.get(col, _myformat)
+            formatter = formatters.get(col, _myformat(self.frame[col]))
             if i == None:
                 return [formatter(x) for x in self.frame[col]]
             else:
@@ -171,16 +222,34 @@ def _format_col(col, i=None):
     def _get_formatted_column_labels(self):
         from pandas.core.index import _sparsify
 
+        formatters = self.formatters
+        if formatters is None:
+            formatters = {}
+
+        def is_numeric_dtype(dtype):
+            return issubclass(dtype.type, np.number)
+
         if isinstance(self.columns, MultiIndex):
             fmt_columns = self.columns.format(sparsify=False, adjoin=False)
-            str_columns = zip(*[[' %s' % y for y in x]
-                                for x in zip(*fmt_columns)])
+            fmt_columns = zip(*fmt_columns)
+            dtypes = self.frame.dtypes.values
+            need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
+            str_columns = zip(*[[' %s' % y
+                                if y not in formatters and need_leadsp[x]
+                                else str(y) for y in x]
+                               for x in fmt_columns])
             if self.sparsify:
                 str_columns = _sparsify(str_columns)
 
             str_columns = [list(x) for x in zip(*str_columns)]
         else:
-            str_columns = [[' %s' % x] for x in self.columns.format()]
+            fmt_columns = self.columns.format()
+            dtypes = self.frame.dtypes
+            need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
+            str_columns = [[' %s' % x
+                            if x not in formatters and need_leadsp[x]
+                            else str(x)]
+                           for x in fmt_columns]
 
         if self.show_index_names and self.has_index_names:
             for x in str_columns:
@@ -201,7 +270,7 @@ def _get_formatted_index(self):
         columns = self.frame.columns
 
         show_index_names = self.show_index_names and self.has_index_names
-        show_col_names = self.show_index_names and self.has_column_names
+        show_col_names = (self.show_index_names and self.has_column_names)
 
         if isinstance(index, MultiIndex):
             fmt_index = index.format(sparsify=self.sparsify, adjoin=False,
@@ -213,11 +282,14 @@ def _get_formatted_index(self):
 
         # empty space for columns
         if show_col_names:
-            col_header = ['  %s' % x for x in self._get_column_name_list()]
+            col_header = ['%s' % x for x in self._get_column_name_list()]
         else:
             col_header = [''] * columns.nlevels
 
-        return col_header + adjoined
+        if self.header:
+            return col_header + adjoined
+        else:
+            return adjoined
 
     def _get_column_name_list(self):
         names = []
@@ -229,7 +301,6 @@ def _get_column_name_list(self):
             names.append('' if columns.name is None else columns.name)
         return names
 
-
 def single_column_table(column):
     table = '<table><tbody>'
     for i in column: