Skip to content

ENH: closing issues #395, #449, #570, #571 #576

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ pandas 0.7.0
- Add attribute-based item access to ``Panel`` and add IPython completion (PR
#554)
- Add ``logy`` option to ``Series.plot`` for log-scaling on the Y axis
- Add ``index`` and ``header`` options to ``DataFrame.to_string`` (GH #570)

**API Changes**

Expand All @@ -59,6 +60,8 @@ pandas 0.7.0
prevent accidentally modifying the data source (GH #316)
- Refactor to remove deprecated ``LongPanel`` class (PR #552)
- Deprecated ``Panel.to_long``, renamed to ``to_frame``
- Deprecated ``colSpace`` argument in ``DataFrame.to_string``, renamed to
``col_space``

**Improvements to existing features**

Expand Down Expand Up @@ -137,6 +140,9 @@ pandas 0.7.0
- Handle non-string index name passed to DataFrame.from_records
- DataFrame.insert caused the columns name(s) field to be discarded (GH #527)
- Fix erroneous in monotonic many-to-one left joins
- Fix DataFrame.to_string to remove extra column white space (GH #571)
- Format floats to default to same number of digits (GH #395)
- Added decorator to copy docstring from one function to another (GH #449)

Thanks
------
Expand Down
4 changes: 3 additions & 1 deletion doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ over the string representation of the object. All arguments are optional:

- ``buf`` default None, for example a StringIO object
- ``columns`` default None, which columns to write
- ``colSpace`` default None, number of spaces to write between columns
- ``col_space`` default None, number of spaces to write between columns
- ``na_rep`` default ``NaN``, representation of NA value
- ``formatters`` default None, a dictionary (by column) of functions each of
which takes a single argument and returns a formatted string
Expand All @@ -288,6 +288,8 @@ over the string representation of the object. All arguments are optional:
- ``sparsify`` default True, set to False for a DataFrame with a hierarchical
index to print every multiindex key at each row.
- ``index_names`` default True, will print the names of the indices
- ``index`` default True, will print the index (ie, row labels)
- ``header`` default True, will print the column labels

The Series object also has a ``to_string`` method, but with only the ``buf``,
``na_rep``, ``float_format`` arguments. There is also a ``length`` argument
Expand Down
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.7.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ New features
- :ref:`Add <visualization.basic>` ``logy`` option to ``Series.plot`` for
log-scaling on the Y axis

- :ref:`Add <io.formatting>` ``index`` and ``header`` options to
``DataFrame.to_string``

Performance improvements
~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
58 changes: 21 additions & 37 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,8 @@ def _try_sort(iterable):
except Exception:
return listed

def set_printoptions(precision=None, column_space=None, max_rows=None,
max_columns=None):
def set_printoptions(precision=None, column_space=None, max_rows=None,
max_columns=None):
"""
Alter default behavior of DataFrame.toString

Expand Down Expand Up @@ -491,64 +491,48 @@ def set_eng_float_format(precision=3, use_eng_prefix=False):
_float_format = EngFormatter(precision, use_eng_prefix)
_column_space = max(12, precision + 9)

_float_format = lambda x: '%.4g' % x
_float_format = lambda x: '% .4g' % x
_column_space = 12
_max_rows = 500
_max_columns = 0

def _pfixed(s, space, na_rep=None, float_format=None):
if isinstance(s, float):
if na_rep is not None and isnull(s):
if np.isnan(s):
s = na_rep
return (' %s' % s).ljust(space)

if float_format:
formatted = float_format(s)
else:
is_neg = s < 0
formatted = _float_format(np.abs(s))

if is_neg:
formatted = '-' + formatted
else:
formatted = ' ' + formatted

return formatted.ljust(space)
else:
stringified = _stringify(s)
return (' %s' % stringified)[:space].ljust(space)

def _stringify(col):
# unicode workaround
if isinstance(col, tuple):
return str(col)
else:
return '%s' % col

def _format(s, na_rep=None, float_format=None):
def _format(s, space=None, na_rep=None, float_format=None, col_width=None):
def _just_help(x):
if space is None:
return x
return x[:space].ljust(space)

if isinstance(s, float):
if na_rep is not None and isnull(s):
if np.isnan(s):
s = na_rep
return ' %s' % s
return _just_help('%s' % s)

if float_format:
formatted = float_format(s)
else:
is_neg = s < 0
formatted = _float_format(np.abs(s))
formatted = _float_format(s)

if is_neg:
formatted = '-' + formatted
else:
formatted = ' ' + formatted
# if we pass col_width, pad-zero the floats so all are same in column
if col_width is not None and formatted != ' 0':
padzeros = col_width - len(formatted)
if padzeros > 0:
formatted = formatted + ('0' * padzeros)

return formatted
return _just_help(formatted)
elif isinstance(s, int):
return _just_help('% d' % s)
else:
return ' %s' % _stringify(s)
return _just_help('%s' % _stringify(s))

#-------------------------------------------------------------------------------
#------------------------------------------------------------------------------
# miscellaneous python tools

def rands(n):
Expand Down
137 changes: 104 additions & 33 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,52 @@
from StringIO import StringIO
from pandas.core.common import adjoin, _pfixed
from pandas.core.common import adjoin
from pandas.core.index import MultiIndex, _ensure_index

import numpy as np

docstring_to_string = """
Parameters
----------
frame : DataFrame
object to render
buf : StringIO-like, optional
buffer to write to
columns : sequence, optional
the subset of columns to write; default None writes all columns
col_space : int, optional
the width of each columns
header : bool, optional
whether to print column labels, default True
index : bool, optional
whether to print index (row) labels, default True
na_rep : string, optional
string representation of NAN to use, default 'NaN'
formatters : list or dict of one-parameter functions, optional
formatter functions to apply to columns' elements by position or name,
default None
float_format : one-parameter function, optional
formatter function to apply to columns' elements if they are floats
default None
sparsify : bool, optional
Set to False for a DataFrame with a hierarchical index to print every
multiindex key at each row, default True
index_names : bool, optional
Prints the names of the indexes, default True """

class DataFrameFormatter(object):
"""
Render a DataFrame

self.to_string() : console-friendly tabular output
self.to_html() : html table
self.to_html() : html table

"""
def __init__(self, frame, buf=None, columns=None, col_space=None,
na_rep='NaN', formatters=None, float_format=None,
sparsify=True, index_names=True):

__doc__ += docstring_to_string

def __init__(self, frame, buf=None, columns=None, col_space=None,
header=True, index=True, na_rep='NaN', formatters=None,
float_format=None, sparsify=True, index_names=True):
self.frame = frame
self.buf = buf if buf is not None else StringIO()
self.show_index_names = index_names
Expand All @@ -22,6 +55,8 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
self.formatters = formatters
self.na_rep = na_rep
self.col_space = col_space
self.header = header
self.index = index

if columns is not None:
self.columns = _ensure_index(columns)
Expand All @@ -47,10 +82,17 @@ def to_string(self):
str_index = self._get_formatted_index()
str_columns = self._get_formatted_column_labels()

stringified = [str_columns[i] + format_col(c)
for i, c in enumerate(self.columns)]
if self.header:
stringified = [str_columns[i] + format_col(c)
for i, c in enumerate(self.columns)]
else:
stringified = [format_col(c) for c in self.columns]

to_write.append(adjoin(1, str_index, *stringified))

if self.index:
to_write.append(adjoin(1, str_index, *stringified))
else:
to_write.append(adjoin(1, *stringified))

for s in to_write:
if isinstance(s, unicode):
Expand Down Expand Up @@ -114,17 +156,21 @@ def _column_header():
write(buf, '</tbody>', indent + indent_delta)
else:
indent += indent_delta
write(buf, '<thead>', indent)
row = []

# header row
col_row = _column_header()
indent += indent_delta
write_tr(buf, col_row, indent, indent_delta, header=True)
if self.has_index_names:
row = frame.index.names + [''] * len(frame.columns)
write_tr(buf, row, indent, indent_delta, header=True)
write(buf, '</thead>', indent)
if self.header:
write(buf, '<thead>', indent)
row = []

col_row = _column_header()
indent += indent_delta
write_tr(buf, col_row, indent, indent_delta, header=True)
if self.has_index_names:
row = frame.index.names + [''] * len(frame.columns)
write_tr(buf, row, indent, indent_delta, header=True)

write(buf, '</thead>', indent)

write(buf, '<tbody>', indent)

# write values
Expand All @@ -148,19 +194,24 @@ def _get_column_formatter(self):

col_space = self.col_space

if col_space is None:
def _myformat(v):
return _format(v, na_rep=self.na_rep,
float_format=self.float_format)
else:
def _myformat(v):
return _pfixed(v, col_space, na_rep=self.na_rep,
float_format=self.float_format)
def _myformat(col):
formatter = lambda v: _format(v, space=col_space,
na_rep=self.na_rep,
float_format=self.float_format)
# one pass through when float to stringify column, to pad with
# zeros
if issubclass(col.dtype.type, np.floating):
col_width = max(map(len, map(formatter, col)))
formatter = lambda v: _format(v, space=col_space,
na_rep=self.na_rep,
float_format=self.float_format,
col_width=col_width)
return formatter

formatters = {} if self.formatters is None else self.formatters

def _format_col(col, i=None):
formatter = formatters.get(col, _myformat)
formatter = formatters.get(col, _myformat(self.frame[col]))
if i == None:
return [formatter(x) for x in self.frame[col]]
else:
Expand All @@ -171,16 +222,34 @@ def _format_col(col, i=None):
def _get_formatted_column_labels(self):
from pandas.core.index import _sparsify

formatters = self.formatters
if formatters is None:
formatters = {}

def is_numeric_dtype(dtype):
return issubclass(dtype.type, np.number)

if isinstance(self.columns, MultiIndex):
fmt_columns = self.columns.format(sparsify=False, adjoin=False)
str_columns = zip(*[[' %s' % y for y in x]
for x in zip(*fmt_columns)])
fmt_columns = zip(*fmt_columns)
dtypes = self.frame.dtypes.values
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
str_columns = zip(*[[' %s' % y
if y not in formatters and need_leadsp[x]
else str(y) for y in x]
for x in fmt_columns])
if self.sparsify:
str_columns = _sparsify(str_columns)

str_columns = [list(x) for x in zip(*str_columns)]
else:
str_columns = [[' %s' % x] for x in self.columns.format()]
fmt_columns = self.columns.format()
dtypes = self.frame.dtypes
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
str_columns = [[' %s' % x
if x not in formatters and need_leadsp[x]
else str(x)]
for x in fmt_columns]

if self.show_index_names and self.has_index_names:
for x in str_columns:
Expand All @@ -201,7 +270,7 @@ def _get_formatted_index(self):
columns = self.frame.columns

show_index_names = self.show_index_names and self.has_index_names
show_col_names = self.show_index_names and self.has_column_names
show_col_names = (self.show_index_names and self.has_column_names)

if isinstance(index, MultiIndex):
fmt_index = index.format(sparsify=self.sparsify, adjoin=False,
Expand All @@ -213,11 +282,14 @@ def _get_formatted_index(self):

# empty space for columns
if show_col_names:
col_header = [' %s' % x for x in self._get_column_name_list()]
col_header = ['%s' % x for x in self._get_column_name_list()]
else:
col_header = [''] * columns.nlevels

return col_header + adjoined
if self.header:
return col_header + adjoined
else:
return adjoined

def _get_column_name_list(self):
names = []
Expand All @@ -229,7 +301,6 @@ def _get_column_name_list(self):
names.append('' if columns.name is None else columns.name)
return names


def single_column_table(column):
table = '<table><tbody>'
for i in column:
Expand Down
Loading