diff --git a/RELEASE.rst b/RELEASE.rst index 184d2bb7c0d8b..f169a94279b54 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -49,6 +49,7 @@ pandas 0.7.0 - Add attribute-based item access to ``Panel`` and add IPython completion (PR #554) - Add ``logy`` option to ``Series.plot`` for log-scaling on the Y axis + - Add ``index`` and ``header`` options to ``DataFrame.to_string`` (GH #570) **API Changes** @@ -59,6 +60,8 @@ pandas 0.7.0 prevent accidentally modifying the data source (GH #316) - Refactor to remove deprecated ``LongPanel`` class (PR #552) - Deprecated ``Panel.to_long``, renamed to ``to_frame`` + - Deprecated ``colSpace`` argument in ``DataFrame.to_string``, renamed to + ``col_space`` **Improvements to existing features** @@ -137,6 +140,9 @@ pandas 0.7.0 - Handle non-string index name passed to DataFrame.from_records - DataFrame.insert caused the columns name(s) field to be discarded (GH #527) - Fix erroneous in monotonic many-to-one left joins + - Fix DataFrame.to_string to remove extra column white space (GH #571) + - Format floats to default to same number of digits (GH #395) + - Added decorator to copy docstring from one function to another (GH #449) Thanks ------ diff --git a/doc/source/io.rst b/doc/source/io.rst index 0c4e097a1ba0a..496facbe91f3e 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -278,7 +278,7 @@ over the string representation of the object. All arguments are optional: - ``buf`` default None, for example a StringIO object - ``columns`` default None, which columns to write - - ``colSpace`` default None, number of spaces to write between columns + - ``col_space`` default None, number of spaces to write between columns - ``na_rep`` default ``NaN``, representation of NA value - ``formatters`` default None, a dictionary (by column) of functions each of which takes a single argument and returns a formatted string @@ -288,6 +288,8 @@ over the string representation of the object. All arguments are optional: - ``sparsify`` default True, set to False for a DataFrame with a hierarchical index to print every multiindex key at each row. - ``index_names`` default True, will print the names of the indices + - ``index`` default True, will print the index (ie, row labels) + - ``header`` default True, will print the column labels The Series object also has a ``to_string`` method, but with only the ``buf``, ``na_rep``, ``float_format`` arguments. There is also a ``length`` argument diff --git a/doc/source/whatsnew/v0.7.0.txt b/doc/source/whatsnew/v0.7.0.txt index e267740ea84b5..881ae7395ec1b 100644 --- a/doc/source/whatsnew/v0.7.0.txt +++ b/doc/source/whatsnew/v0.7.0.txt @@ -50,6 +50,9 @@ New features - :ref:`Add ` ``logy`` option to ``Series.plot`` for log-scaling on the Y axis +- :ref:`Add ` ``index`` and ``header`` options to + ``DataFrame.to_string`` + Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/common.py b/pandas/core/common.py index 06c794bdbd550..442174222a37f 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -360,8 +360,8 @@ def _try_sort(iterable): except Exception: return listed -def set_printoptions(precision=None, column_space=None, max_rows=None, - max_columns=None): +def set_printoptions(precision=None, column_space=None, max_rows=None, + max_columns=None): """ Alter default behavior of DataFrame.toString @@ -491,34 +491,11 @@ def set_eng_float_format(precision=3, use_eng_prefix=False): _float_format = EngFormatter(precision, use_eng_prefix) _column_space = max(12, precision + 9) -_float_format = lambda x: '%.4g' % x +_float_format = lambda x: '% .4g' % x _column_space = 12 _max_rows = 500 _max_columns = 0 -def _pfixed(s, space, na_rep=None, float_format=None): - if isinstance(s, float): - if na_rep is not None and isnull(s): - if np.isnan(s): - s = na_rep - return (' %s' % s).ljust(space) - - if float_format: - formatted = float_format(s) - else: - is_neg = s < 0 - formatted = _float_format(np.abs(s)) - - if is_neg: - formatted = '-' + formatted - else: - formatted = ' ' + formatted - - return formatted.ljust(space) - else: - stringified = _stringify(s) - return (' %s' % stringified)[:space].ljust(space) - def _stringify(col): # unicode workaround if isinstance(col, tuple): @@ -526,29 +503,36 @@ def _stringify(col): else: return '%s' % col -def _format(s, na_rep=None, float_format=None): +def _format(s, space=None, na_rep=None, float_format=None, col_width=None): + def _just_help(x): + if space is None: + return x + return x[:space].ljust(space) + if isinstance(s, float): if na_rep is not None and isnull(s): if np.isnan(s): s = na_rep - return ' %s' % s + return _just_help('%s' % s) if float_format: formatted = float_format(s) else: - is_neg = s < 0 - formatted = _float_format(np.abs(s)) + formatted = _float_format(s) - if is_neg: - formatted = '-' + formatted - else: - formatted = ' ' + formatted + # if we pass col_width, pad-zero the floats so all are same in column + if col_width is not None and formatted != ' 0': + padzeros = col_width - len(formatted) + if padzeros > 0: + formatted = formatted + ('0' * padzeros) - return formatted + return _just_help(formatted) + elif isinstance(s, int): + return _just_help('% d' % s) else: - return ' %s' % _stringify(s) + return _just_help('%s' % _stringify(s)) -#------------------------------------------------------------------------------- +#------------------------------------------------------------------------------ # miscellaneous python tools def rands(n): diff --git a/pandas/core/format.py b/pandas/core/format.py index 0b1905a91e3dc..50919cc4fc724 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -1,19 +1,52 @@ from StringIO import StringIO -from pandas.core.common import adjoin, _pfixed +from pandas.core.common import adjoin from pandas.core.index import MultiIndex, _ensure_index +import numpy as np + +docstring_to_string = """ + Parameters + ---------- + frame : DataFrame + object to render + buf : StringIO-like, optional + buffer to write to + columns : sequence, optional + the subset of columns to write; default None writes all columns + col_space : int, optional + the width of each columns + header : bool, optional + whether to print column labels, default True + index : bool, optional + whether to print index (row) labels, default True + na_rep : string, optional + string representation of NAN to use, default 'NaN' + formatters : list or dict of one-parameter functions, optional + formatter functions to apply to columns' elements by position or name, + default None + float_format : one-parameter function, optional + formatter function to apply to columns' elements if they are floats + default None + sparsify : bool, optional + Set to False for a DataFrame with a hierarchical index to print every + multiindex key at each row, default True + index_names : bool, optional + Prints the names of the indexes, default True """ class DataFrameFormatter(object): """ Render a DataFrame self.to_string() : console-friendly tabular output - self.to_html() : html table + self.to_html() : html table + """ - def __init__(self, frame, buf=None, columns=None, col_space=None, - na_rep='NaN', formatters=None, float_format=None, - sparsify=True, index_names=True): + __doc__ += docstring_to_string + + def __init__(self, frame, buf=None, columns=None, col_space=None, + header=True, index=True, na_rep='NaN', formatters=None, + float_format=None, sparsify=True, index_names=True): self.frame = frame self.buf = buf if buf is not None else StringIO() self.show_index_names = index_names @@ -22,6 +55,8 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, self.formatters = formatters self.na_rep = na_rep self.col_space = col_space + self.header = header + self.index = index if columns is not None: self.columns = _ensure_index(columns) @@ -47,10 +82,17 @@ def to_string(self): str_index = self._get_formatted_index() str_columns = self._get_formatted_column_labels() - stringified = [str_columns[i] + format_col(c) - for i, c in enumerate(self.columns)] + if self.header: + stringified = [str_columns[i] + format_col(c) + for i, c in enumerate(self.columns)] + else: + stringified = [format_col(c) for c in self.columns] - to_write.append(adjoin(1, str_index, *stringified)) + + if self.index: + to_write.append(adjoin(1, str_index, *stringified)) + else: + to_write.append(adjoin(1, *stringified)) for s in to_write: if isinstance(s, unicode): @@ -114,17 +156,21 @@ def _column_header(): write(buf, '', indent + indent_delta) else: indent += indent_delta - write(buf, '', indent) - row = [] # header row - col_row = _column_header() - indent += indent_delta - write_tr(buf, col_row, indent, indent_delta, header=True) - if self.has_index_names: - row = frame.index.names + [''] * len(frame.columns) - write_tr(buf, row, indent, indent_delta, header=True) - write(buf, '', indent) + if self.header: + write(buf, '', indent) + row = [] + + col_row = _column_header() + indent += indent_delta + write_tr(buf, col_row, indent, indent_delta, header=True) + if self.has_index_names: + row = frame.index.names + [''] * len(frame.columns) + write_tr(buf, row, indent, indent_delta, header=True) + + write(buf, '', indent) + write(buf, '', indent) # write values @@ -148,19 +194,24 @@ def _get_column_formatter(self): col_space = self.col_space - if col_space is None: - def _myformat(v): - return _format(v, na_rep=self.na_rep, - float_format=self.float_format) - else: - def _myformat(v): - return _pfixed(v, col_space, na_rep=self.na_rep, - float_format=self.float_format) + def _myformat(col): + formatter = lambda v: _format(v, space=col_space, + na_rep=self.na_rep, + float_format=self.float_format) + # one pass through when float to stringify column, to pad with + # zeros + if issubclass(col.dtype.type, np.floating): + col_width = max(map(len, map(formatter, col))) + formatter = lambda v: _format(v, space=col_space, + na_rep=self.na_rep, + float_format=self.float_format, + col_width=col_width) + return formatter formatters = {} if self.formatters is None else self.formatters def _format_col(col, i=None): - formatter = formatters.get(col, _myformat) + formatter = formatters.get(col, _myformat(self.frame[col])) if i == None: return [formatter(x) for x in self.frame[col]] else: @@ -171,16 +222,34 @@ def _format_col(col, i=None): def _get_formatted_column_labels(self): from pandas.core.index import _sparsify + formatters = self.formatters + if formatters is None: + formatters = {} + + def is_numeric_dtype(dtype): + return issubclass(dtype.type, np.number) + if isinstance(self.columns, MultiIndex): fmt_columns = self.columns.format(sparsify=False, adjoin=False) - str_columns = zip(*[[' %s' % y for y in x] - for x in zip(*fmt_columns)]) + fmt_columns = zip(*fmt_columns) + dtypes = self.frame.dtypes.values + need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) + str_columns = zip(*[[' %s' % y + if y not in formatters and need_leadsp[x] + else str(y) for y in x] + for x in fmt_columns]) if self.sparsify: str_columns = _sparsify(str_columns) str_columns = [list(x) for x in zip(*str_columns)] else: - str_columns = [[' %s' % x] for x in self.columns.format()] + fmt_columns = self.columns.format() + dtypes = self.frame.dtypes + need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) + str_columns = [[' %s' % x + if x not in formatters and need_leadsp[x] + else str(x)] + for x in fmt_columns] if self.show_index_names and self.has_index_names: for x in str_columns: @@ -201,7 +270,7 @@ def _get_formatted_index(self): columns = self.frame.columns show_index_names = self.show_index_names and self.has_index_names - show_col_names = self.show_index_names and self.has_column_names + show_col_names = (self.show_index_names and self.has_column_names) if isinstance(index, MultiIndex): fmt_index = index.format(sparsify=self.sparsify, adjoin=False, @@ -213,11 +282,14 @@ def _get_formatted_index(self): # empty space for columns if show_col_names: - col_header = [' %s' % x for x in self._get_column_name_list()] + col_header = ['%s' % x for x in self._get_column_name_list()] else: col_header = [''] * columns.nlevels - return col_header + adjoined + if self.header: + return col_header + adjoined + else: + return adjoined def _get_column_name_list(self): names = [] @@ -229,7 +301,6 @@ def _get_column_name_list(self): names.append('' if columns.name is None else columns.name) return names - def single_column_table(column): table = '' for i in column: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e00d5f1c5edb6..412ee7bac2d03 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -32,7 +32,9 @@ from pandas.core.series import Series from pandas.util import py3compat from pandas.util.terminal import get_terminal_size -from pandas.util.decorators import deprecate +from pandas.util.decorators import deprecate, Appender, Substitution + +from pandas.core.format import DataFrameFormatter, docstring_to_string import pandas.core.nanops as nanops import pandas.core.common as com @@ -160,19 +162,12 @@ merged : DataFrame """ -def _add_stat_doc(f, name, shortname, na_action=_doc_exclude_na, - extras=''): - doc = _stat_doc % {'name' : name, - 'shortname' : shortname, - 'na_action' : na_action, - 'extras' : extras} - f.__doc__ = doc - #---------------------------------------------------------------------- # Factory helper methods def _arith_method(func, name, default_axis='columns'): + @Appender(_arith_doc % name) def f(self, other, axis=default_axis, level=None, fill_value=None): if isinstance(other, DataFrame): # Another DataFrame return self._combine_frame(other, func, fill_value, level) @@ -182,12 +177,12 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): return self._combine_const(other, func) f.__name__ = name - f.__doc__ = _arith_doc % name return f def comp_method(func, name): + @Appender('Wrapper for comparison method %s' % name) def f(self, other): if isinstance(other, DataFrame): # Another DataFrame return self._compare_frame(other, func) @@ -197,7 +192,6 @@ def f(self, other): return self._combine_const(other, func) f.__name__ = name - f.__doc__ = 'Wrapper for comparison method %s' % name return f @@ -915,13 +909,14 @@ def to_csv(self, path, sep=",", na_rep='', cols=None, header=True, f.close() - def to_string(self, buf=None, columns=None, colSpace=None, - na_rep='NaN', formatters=None, float_format=None, - sparsify=True, nanRep=None, index_names=True): + @Appender(docstring_to_string, indents=1) + def to_string(self, buf=None, columns=None, col_space=None, colSpace=None, + header=True, index=True, na_rep='NaN', formatters=None, + float_format=None, sparsify=True, nanRep=None, + index_names=True): """ Render a DataFrame to a console-friendly tabular output. """ - from pandas.core.format import DataFrameFormatter if nanRep is not None: # pragma: no cover import warnings @@ -929,28 +924,41 @@ def to_string(self, buf=None, columns=None, colSpace=None, FutureWarning) na_rep = nanRep + if colSpace is not None: # pragma: no cover + import warnings + warnings.warn("colSpace is deprecated, use col_space", + FutureWarning) + col_space = colSpace formatter = DataFrameFormatter(self, buf=buf, columns=columns, - col_space=colSpace, na_rep=na_rep, + col_space=col_space, na_rep=na_rep, formatters=formatters, float_format=float_format, sparsify=sparsify, - index_names=index_names) + index_names=index_names, + header=header, index=index) formatter.to_string() if buf is None: return formatter.buf.getvalue() - def to_html(self, buf=None, columns=None, colSpace=None, - na_rep='NaN', formatters=None, float_format=None, - sparsify=True, index_names=True): + @Appender(docstring_to_string, indents=1) + def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, + header=True, index=True, na_rep='NaN', formatters=None, + float_format=None, sparsify=True, index_names=True): """ - Render a DataFrame to a html table. + Render a DataFrame to an html table. """ - from pandas.core.format import DataFrameFormatter + + if colSpace is not None: # pragma: no cover + import warnings + warnings.warn("colSpace is deprecated, use col_space", + FutureWarning) + col_space = colSpace formatter = DataFrameFormatter(self, buf=buf, columns=columns, - col_space=colSpace, na_rep=na_rep, + col_space=col_space, na_rep=na_rep, + header=header, index=index, formatters=formatters, float_format=float_format, sparsify=sparsify, @@ -2843,6 +2851,8 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix=''): left_index=on is None, right_index=True, suffixes=(lsuffix, rsuffix), sort=False) + @Substitution('') + @Appender(_merge_doc, indents=2) def merge(self, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=True, suffixes=('.x', '.y'), copy=True): @@ -2851,7 +2861,6 @@ def merge(self, right, how='inner', on=None, left_on=None, right_on=None, left_on=left_on, right_on=right_on, left_index=left_index, right_index=right_index, sort=sort, suffixes=suffixes, copy=copy) - if __debug__: merge.__doc__ = _merge_doc % '' #---------------------------------------------------------------------- # Statistical methods, etc. @@ -3060,56 +3069,71 @@ def _count_level(self, level, axis=0, numeric_only=False): else: return result + @Substitution(name='sum', shortname='sum', na_action=_doc_exclude_na, + extras=_numeric_only_doc) + @Appender(_stat_doc) def sum(self, axis=0, numeric_only=None, skipna=True, level=None): if level is not None: return self._agg_by_level('sum', axis=axis, level=level, skipna=skipna) return self._reduce(nanops.nansum, axis=axis, skipna=skipna, numeric_only=numeric_only) - _add_stat_doc(sum, 'sum', 'sum', extras=_numeric_only_doc) + @Substitution(name='mean', shortname='mean', na_action=_doc_exclude_na, + extras='') + @Appender(_stat_doc) def mean(self, axis=0, skipna=True, level=None): if level is not None: return self._agg_by_level('mean', axis=axis, level=level, skipna=skipna) return self._reduce(nanops.nanmean, axis=axis, skipna=skipna, numeric_only=None) - _add_stat_doc(mean, 'mean', 'mean') + @Substitution(name='minimum', shortname='min', na_action=_doc_exclude_na, + extras='') + @Appender(_stat_doc) def min(self, axis=0, skipna=True, level=None): if level is not None: return self._agg_by_level('min', axis=axis, level=level, skipna=skipna) return self._reduce(nanops.nanmin, axis=axis, skipna=skipna, numeric_only=None) - _add_stat_doc(min, 'minimum', 'min') + @Substitution(name='maximum', shortname='max', na_action=_doc_exclude_na, + extras='') + @Appender(_stat_doc) def max(self, axis=0, skipna=True, level=None): if level is not None: return self._agg_by_level('max', axis=axis, level=level, skipna=skipna) return self._reduce(nanops.nanmax, axis=axis, skipna=skipna, numeric_only=None) - _add_stat_doc(max, 'maximum', 'max') + @Substitution(name='product', shortname='product', + na_action='NA/null values are treated as 1', extras='') + @Appender(_stat_doc) def prod(self, axis=0, skipna=True, level=None): if level is not None: return self._agg_by_level('prod', axis=axis, level=level, skipna=skipna) return self._reduce(nanops.nanprod, axis=axis, skipna=skipna, numeric_only=None) - _add_stat_doc(prod, 'product', 'product', - na_action='NA/null values are treated as 1') + product = prod + @Substitution(name='median', shortname='median', na_action=_doc_exclude_na, + extras='') + @Appender(_stat_doc) def median(self, axis=0, skipna=True, level=None): if level is not None: return self._agg_by_level('median', axis=axis, level=level, skipna=skipna) return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna, numeric_only=None) - _add_stat_doc(median, 'median', 'median') + @Substitution(name='median absolute deviation', shortname='mad', + na_action=_doc_exclude_na, extras='') + @Appender(_stat_doc) def mad(self, axis=0, skipna=True, level=None): if level is not None: return self._agg_by_level('mad', axis=axis, level=level, @@ -3122,30 +3146,35 @@ def mad(self, axis=0, skipna=True, level=None): else: demeaned = frame.sub(frame.mean(axis=1), axis=0) return np.abs(demeaned).mean(axis=axis, skipna=skipna) - _add_stat_doc(mad, 'mean absolute deviation', 'mad') + @Substitution(name='unbiased variance', shortname='var', + na_action=_doc_exclude_na, extras='') + @Appender(_stat_doc) def var(self, axis=0, skipna=True, level=None): if level is not None: return self._agg_by_level('var', axis=axis, level=level, skipna=skipna) return self._reduce(nanops.nanvar, axis=axis, skipna=skipna, numeric_only=None) - _add_stat_doc(var, 'unbiased variance', 'var') + @Substitution(name='unbiased standard deviation', shortname='std', + na_action=_doc_exclude_na, extras='') + @Appender(_stat_doc) def std(self, axis=0, skipna=True, level=None): if level is not None: return self._agg_by_level('std', axis=axis, level=level, skipna=skipna) return np.sqrt(self.var(axis=axis, skipna=skipna)) - _add_stat_doc(std, 'unbiased standard deviation', 'std') + @Substitution(name='unbiased skewness', shortname='skew', + na_action=_doc_exclude_na, extras='') + @Appender(_stat_doc) def skew(self, axis=0, skipna=True, level=None): if level is not None: return self._agg_by_level('skew', axis=axis, level=level, skipna=skipna) return self._reduce(nanops.nanskew, axis=axis, skipna=skipna, numeric_only=None) - _add_stat_doc(skew, 'unbiased skewness', 'skew') def _agg_by_level(self, name, axis=0, level=0, skipna=True): grouped = self.groupby(level=level, axis=axis) @@ -3708,20 +3737,6 @@ def _sanitize_and_check(indexes): else: return indexes, 'array' - -def _check_data_types(data): - have_raw_arrays = False - have_series = False - for v in data.values(): - if not isinstance(v, (dict, Series)): - have_raw_arrays = True - else: - have_series = True - - is_mixed = have_series and have_raw_arrays - return have_raw_arrays, is_mixed - - def _prep_ndarray(values, copy=True): if not isinstance(values, np.ndarray): arr = np.asarray(values) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index e3f7fcb072ca8..95621c4989e8f 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -9,7 +9,7 @@ from pandas.core.internals import BlockManager from pandas.core.series import Series from pandas.core.panel import Panel -from pandas.util.decorators import cache_readonly +from pandas.util.decorators import cache_readonly, Appender import pandas._tseries as lib @@ -471,6 +471,7 @@ def _generator_factory(self): axis=self.axis, factory=factory) +@Appender(GroupBy.__doc__) def groupby(obj, by, **kwds): if isinstance(obj, Series): klass = SeriesGroupBy @@ -480,7 +481,6 @@ def groupby(obj, by, **kwds): raise TypeError('invalid type: %s' % type(obj)) return klass(obj, by, **kwds) -groupby.__doc__ = GroupBy.__doc__ def _get_axes(group): if isinstance(group, Series): diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 7b306d83ae33f..fcff16b6ee4be 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -15,7 +15,7 @@ from pandas.core.frame import DataFrame, _union_indexes from pandas.core.generic import NDFrame from pandas.util import py3compat -from pandas.util.decorators import deprecate +from pandas.util.decorators import deprecate, Appender, Substitution import pandas.core.common as com import pandas.core.nanops as nanops import pandas._tseries as lib @@ -96,6 +96,7 @@ def f(self, other): return f def _panel_arith_method(op, name): + @Substitution(op) def f(self, other, axis='items'): """ Wrapper method for %s @@ -113,9 +114,6 @@ def f(self, other, axis='items'): return self._combine(other, op, axis=axis) f.__name__ = name - if __debug__: - f.__doc__ = f.__doc__ % str(op) - return f @@ -140,11 +138,6 @@ def f(self, other, axis='items'): If all values are NA, result will be NA""" -def _add_docs(method, desc, outname): - doc = _agg_doc % {'desc' : desc, - 'outname' : outname} - method.__doc__ = doc - class Panel(NDFrame): _AXIS_NUMBERS = { 'items' : 0, @@ -971,45 +964,55 @@ def count(self, axis='major'): return self._wrap_result(result, axis) + @Substitution(desc='sum', outname='sum') + @Appender(_agg_doc) def sum(self, axis='major', skipna=True): return self._reduce(nanops.nansum, axis=axis, skipna=skipna) - _add_docs(sum, 'sum', 'sum') + @Substitution(desc='mean', outname='mean') + @Appender(_agg_doc) def mean(self, axis='major', skipna=True): return self._reduce(nanops.nanmean, axis=axis, skipna=skipna) - _add_docs(mean, 'mean', 'mean') + @Substitution(desc='unbiased variance', outname='variance') + @Appender(_agg_doc) def var(self, axis='major', skipna=True): return self._reduce(nanops.nanvar, axis=axis, skipna=skipna) - _add_docs(var, 'unbiased variance', 'variance') + @Substitution(desc='unbiased standard deviation', outname='stdev') + @Appender(_agg_doc) def std(self, axis='major', skipna=True): return self.var(axis=axis, skipna=skipna).apply(np.sqrt) - _add_docs(std, 'unbiased standard deviation', 'stdev') + @Substitution(desc='unbiased skewness', outname='skew') + @Appender(_agg_doc) def skew(self, axis='major', skipna=True): return self._reduce(nanops.nanskew, axis=axis, skipna=skipna) - _add_docs(std, 'unbiased skewness', 'skew') + @Substitution(desc='product', outname='prod') + @Appender(_agg_doc) def prod(self, axis='major', skipna=True): return self._reduce(nanops.nanprod, axis=axis, skipna=skipna) - _add_docs(prod, 'product', 'prod') + @Substitution(desc='compounded percentage', outname='compounded') + @Appender(_agg_doc) def compound(self, axis='major', skipna=True): return (1 + self).prod(axis=axis, skipna=skipna) - 1 - _add_docs(compound, 'compounded percentage', 'compounded') + @Substitution(desc='median', outname='median') + @Appender(_agg_doc) def median(self, axis='major', skipna=True): return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna) - _add_docs(median, 'median', 'median') + @Substitution(desc='maximum', outname='maximum') + @Appender(_agg_doc) def max(self, axis='major', skipna=True): return self._reduce(nanops.nanmax, axis=axis, skipna=skipna) - _add_docs(max, 'maximum', 'maximum') + @Substitution(desc='minimum', outname='minimum') + @Appender(_agg_doc) def min(self, axis='major', skipna=True): return self._reduce(nanops.nanmin, axis=axis, skipna=skipna) - _add_docs(min, 'minimum', 'minimum') def shift(self, lags, axis='major'): """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 80bc21e5d59d0..f45b17124f42a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -28,6 +28,8 @@ import pandas._tseries as lib import pandas._engines as _gin +from pandas.util.decorators import Appender, Substitution + __all__ = ['Series', 'TimeSeries'] #------------------------------------------------------------------------------- @@ -68,10 +70,7 @@ def _maybe_match_name(a, b): return name def _flex_method(op, name): - def f(self, other, level=None, fill_value=None): - return self._binop(other, op, level=level, fill_value=fill_value) - - f.__doc__ = """ + doc = """ Binary operator %s with support to substitute a fill_value for missing data in one of the inputs @@ -89,17 +88,22 @@ def f(self, other, level=None, fill_value=None): ------- result : Series """ % name + + @Appender(doc) + def f(self, other, level=None, fill_value=None): + return self._binop(other, op, level=level, fill_value=fill_value) + f.__name__ = name return f def _unbox(func): + @Appender(func.__doc__) def f(self, *args, **kwargs): result = func(self, *args, **kwargs) if isinstance(result, np.ndarray) and result.ndim == 0: return result.item() else: # pragma: no cover return result - f.__doc__ = func.__doc__ f.__name__ = func.__name__ return f @@ -123,14 +127,6 @@ def f(self, *args, **kwargs): _doc_ndarray_interface = ("Extra parameters are to preserve ndarray" "interface.\n") -def _add_stat_doc(f, name, shortname, na_action=_doc_exclude_na, - extras=''): - doc = _stat_doc % {'name' : name, - 'shortname' : shortname, - 'na_action' : na_action, - 'extras' : extras} - f.__doc__ = doc - #------------------------------------------------------------------------------- # Series class @@ -141,7 +137,11 @@ class Series(np.ndarray, generic.PandasObject): _AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems()) - def __new__(cls, data, index=None, dtype=None, name=None, copy=False): + def __new__(cls, data=None, index=None, dtype=None, name=None, + copy=False): + if data is None: + data = {} + if isinstance(data, Series): if index is None: index = data.index @@ -169,7 +169,8 @@ def __new__(cls, data, index=None, dtype=None, name=None, copy=False): return subarr - def __init__(self, data, index=None, dtype=None, name=None, copy=False): + def __init__(self, data=None, index=None, dtype=None, name=None, + copy=False): """One-dimensional ndarray with axis labels (including time series). Labels must be unique and can any hashable type. The object supports both integer- and label-based indexing and provides a host of methods for @@ -460,8 +461,8 @@ def _tidy_repr(self, max_vals=20): result = '%s\n%sLength: %d' % (result, namestr, len(self)) return result - def to_string(self, buf=None, na_rep='NaN', float_format=None, nanRep=None, - length=False, name=False): + def to_string(self, buf=None, na_rep='NaN', float_format=None, + nanRep=None, length=False, name=False): if nanRep is not None: # pragma: no cover import warnings warnings.warn("nanRep is deprecated, use na_rep", @@ -705,71 +706,91 @@ def nunique(self): """ return len(self.value_counts()) + @Substitution(name='sum', shortname='sum', na_action=_doc_exclude_na, + extras=_doc_ndarray_interface) + @Appender(_stat_doc) def sum(self, axis=0, dtype=None, out=None, skipna=True, level=None): if level is not None: return self._agg_by_level('sum', level=level, skipna=skipna) return nanops.nansum(self.values, skipna=skipna, copy=True) - _add_stat_doc(sum, 'sum', 'sum', extras=_doc_ndarray_interface) + @Substitution(name='mean', shortname='mean', na_action=_doc_exclude_na, + extras=_doc_ndarray_interface) + @Appender(_stat_doc) def mean(self, axis=0, dtype=None, out=None, skipna=True, level=None): if level is not None: return self._agg_by_level('mean', level=level, skipna=skipna) return nanops.nanmean(self.values, skipna=skipna) - _add_stat_doc(mean, 'mean', 'mean', extras=_doc_ndarray_interface) + @Substitution(name='mean absolute deviation', shortname='mad', + na_action=_doc_exclude_na, extras='') + @Appender(_stat_doc) def mad(self, skipna=True, level=None): if level is not None: return self._agg_by_level('mad', level=level, skipna=skipna) demeaned = self - self.mean(skipna=skipna) return np.abs(demeaned).mean(skipna=skipna) - _add_stat_doc(mad, 'mean absolute deviation', 'mad') + @Substitution(name='median', shortname='median', + na_action=_doc_exclude_na, extras='') + @Appender(_stat_doc) def median(self, skipna=True, level=None): if level is not None: return self._agg_by_level('median', level=level, skipna=skipna) return nanops.nanmedian(self.values, skipna=skipna) - _add_stat_doc(median, 'median', 'median') + @Substitution(name='product', shortname='product', + na_action=_doc_exclude_na, extras='') + @Appender(_stat_doc) def prod(self, axis=None, dtype=None, out=None, skipna=True, level=None): if level is not None: return self._agg_by_level('prod', level=level, skipna=skipna) return nanops.nanprod(self.values, skipna=skipna) - _add_stat_doc(prod, 'product', 'product') + @Substitution(name='minimum', shortname='min', + na_action=_doc_exclude_na, extras='') + @Appender(_stat_doc) def min(self, axis=None, out=None, skipna=True, level=None): if level is not None: return self._agg_by_level('min', level=level, skipna=skipna) return nanops.nanmin(self.values, skipna=skipna, copy=True) - _add_stat_doc(min, 'minimum', 'min') + @Substitution(name='maximum', shortname='max', + na_action=_doc_exclude_na, extras='') + @Appender(_stat_doc) def max(self, axis=None, out=None, skipna=True, level=None): if level is not None: return self._agg_by_level('max', level=level, skipna=skipna) return nanops.nanmax(self.values, skipna=skipna, copy=True) - _add_stat_doc(max, 'maximum', 'max') + @Substitution(name='unbiased standard deviation', shortname='stdev', + na_action=_doc_exclude_na, extras='') + @Appender(_stat_doc) def std(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, level=None): if level is not None: return self._agg_by_level('std', level=level, skipna=skipna) return np.sqrt(nanops.nanvar(self.values, skipna=skipna, copy=True, ddof=ddof)) - _add_stat_doc(std, 'unbiased standard deviation', 'stdev') + @Substitution(name='unbiased variance', shortname='var', + na_action=_doc_exclude_na, extras='') + @Appender(_stat_doc) def var(self, axis=None, dtype=None, out=None, ddof=1, skipna=True, level=None): if level is not None: return self._agg_by_level('var', level=level, skipna=skipna) return nanops.nanvar(self.values, skipna=skipna, copy=True, ddof=ddof) - _add_stat_doc(var, 'unbiased variance', 'var') + @Substitution(name='unbiased skewness', shortname='skew', + na_action=_doc_exclude_na, extras='') + @Appender(_stat_doc) def skew(self, skipna=True, level=None): if level is not None: return self._agg_by_level('skew', level=level, skipna=skipna) return nanops.nanskew(self.values, skipna=skipna, copy=True) - _add_stat_doc(skew, 'unbiased skewness', 'skew') def _agg_by_level(self, name, level=0, skipna=True): grouped = self.groupby(level=level) @@ -873,6 +894,7 @@ def cumprod(self, axis=0, dtype=None, out=None, skipna=True): return Series(result, index=self.index) + @Appender(np.ndarray.round.__doc__) def round(self, decimals=0, out=None): """ @@ -882,7 +904,6 @@ def round(self, decimals=0, out=None): result = Series(result, index=self.index, name=self.name) return result - round.__doc__ = np.ndarray.round.__doc__ def quantile(self, q=0.5): """ diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 09222138d4b5a..c77f76fee25d7 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3,7 +3,6 @@ """ from StringIO import StringIO import re -import zipfile import numpy as np @@ -11,63 +10,7 @@ from pandas.core.frame import DataFrame import pandas._tseries as lib -def read_csv(filepath_or_buffer, sep=None, header=0, index_col=None, names=None, - skiprows=None, na_values=None, parse_dates=False, - date_parser=None, nrows=None, iterator=False, chunksize=None, - skip_footer=0, converters=None): - if hasattr(filepath_or_buffer, 'read'): - f = filepath_or_buffer - else: - try: - # universal newline mode - f = open(filepath_or_buffer, 'U') - except Exception: # pragma: no cover - f = open(filepath_or_buffer, 'r') - - if date_parser is not None: - parse_dates = True - - parser = TextParser(f, header=header, index_col=index_col, - names=names, na_values=na_values, - parse_dates=parse_dates, - date_parser=date_parser, - skiprows=skiprows, - delimiter=sep, - chunksize=chunksize, - skip_footer=skip_footer, - converters=converters) - - if nrows is not None: - return parser.get_chunk(nrows) - elif chunksize or iterator: - return parser - - return parser.get_chunk() - - -def read_table(filepath_or_buffer, sep='\t', header=0, index_col=None, - names=None, skiprows=None, na_values=None, parse_dates=False, - date_parser=None, nrows=None, iterator=False, chunksize=None, - skip_footer=0, converters=None): - return read_csv(filepath_or_buffer, sep=sep, header=header, - skiprows=skiprows, index_col=index_col, - na_values=na_values, date_parser=date_parser, - names=names, parse_dates=parse_dates, - nrows=nrows, iterator=iterator, chunksize=chunksize, - skip_footer=skip_footer, converters=converters) - -def read_clipboard(**kwargs): # pragma: no cover - """ - Read text from clipboard and pass to read_table. See read_table for the full - argument list - - Returns - ------- - parsed : DataFrame - """ - from pandas.util.clipboard import clipboard_get - text = clipboard_get() - return read_table(StringIO(text), **kwargs) +from pandas.util.decorators import Appender _parser_params = """Also supports optionally iterating or breaking of the file into chunks. @@ -116,7 +59,7 @@ def read_clipboard(**kwargs): # pragma: no cover _table_sep = """sep : string, default \\t (tab-stop) Delimiter to use""" -read_csv.__doc__ = """ +_read_csv_doc = """ Read CSV (comma-separated) file into DataFrame %s @@ -126,7 +69,17 @@ def read_clipboard(**kwargs): # pragma: no cover parsed : DataFrame """ % (_parser_params % _csv_sep) -read_table.__doc__ = """ +_read_csv_doc = """ +Read CSV (comma-separated) file into DataFrame + +%s + +Returns +------- +parsed : DataFrame +""" % (_parser_params % _csv_sep) + +_read_table_doc = """ Read delimited file into DataFrame %s @@ -136,6 +89,65 @@ def read_clipboard(**kwargs): # pragma: no cover parsed : DataFrame """ % (_parser_params % _table_sep) +@Appender(_read_csv_doc) +def read_csv(filepath_or_buffer, sep=None, header=0, index_col=None, names=None, + skiprows=None, na_values=None, parse_dates=False, + date_parser=None, nrows=None, iterator=False, chunksize=None, + skip_footer=0, converters=None): + if hasattr(filepath_or_buffer, 'read'): + f = filepath_or_buffer + else: + try: + # universal newline mode + f = open(filepath_or_buffer, 'U') + except Exception: # pragma: no cover + f = open(filepath_or_buffer, 'r') + + if date_parser is not None: + parse_dates = True + + parser = TextParser(f, header=header, index_col=index_col, + names=names, na_values=na_values, + parse_dates=parse_dates, + date_parser=date_parser, + skiprows=skiprows, + delimiter=sep, + chunksize=chunksize, + skip_footer=skip_footer, + converters=converters) + + if nrows is not None: + return parser.get_chunk(nrows) + elif chunksize or iterator: + return parser + + return parser.get_chunk() + +@Appender(_read_table_doc) +def read_table(filepath_or_buffer, sep='\t', header=0, index_col=None, + names=None, skiprows=None, na_values=None, parse_dates=False, + date_parser=None, nrows=None, iterator=False, chunksize=None, + skip_footer=0, converters=None): + return read_csv(filepath_or_buffer, sep=sep, header=header, + skiprows=skiprows, index_col=index_col, + na_values=na_values, date_parser=date_parser, + names=names, parse_dates=parse_dates, + nrows=nrows, iterator=iterator, chunksize=chunksize, + skip_footer=skip_footer, converters=converters) + +def read_clipboard(**kwargs): # pragma: no cover + """ + Read text from clipboard and pass to read_table. See read_table for the full + argument list + + Returns + ------- + parsed : DataFrame + """ + from pandas.util.clipboard import clipboard_get + text = clipboard_get() + return read_table(StringIO(text), **kwargs) + class BufferedReader(object): """ diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 7d598752a299d..e0dcf4a88fb10 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -16,6 +16,7 @@ import pandas.core.datetools as datetools from pandas.sparse.series import SparseSeries +from pandas.util.decorators import Appender class SparseDataFrame(DataFrame): """ @@ -294,11 +295,10 @@ def __getitem__(self, item): else: # pragma: no cover raise + @Appender(DataFrame.get_value.__doc__, indents=0) def get_value(self, index, col): s = self._series[col] return s.get_value(index) - if __debug__: - get_value.__doc__ = DataFrame.get_value.__doc__ def set_value(self, index, col, value): """ @@ -582,9 +582,9 @@ def transpose(self): default_kind=self.default_kind) T = property(transpose) + @Appender(DataFrame.count.__doc__) def count(self, axis=0, **kwds): return self.apply(lambda x: x.count(), axis=axis) - count.__doc__ = DataFrame.count.__doc__ def cumsum(self, axis=0): """ diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 7d626d4471478..86267caa1fa49 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -12,6 +12,8 @@ from pandas.core.api import DataFrame, Series, notnull import pandas._tseries as _tseries +from pandas.util.decorators import Substitution, Appender + __all__ = ['rolling_count', 'rolling_max', 'rolling_min', 'rolling_sum', 'rolling_mean', 'rolling_std', 'rolling_cov', 'rolling_corr', 'rolling_var', 'rolling_skew', 'rolling_kurt', @@ -19,6 +21,78 @@ 'rolling_corr_pairwise', 'ewma', 'ewmvar', 'ewmstd', 'ewmvol', 'ewmcorr', 'ewmcov'] +#------------------------------------------------------------------------------- +# Docs + +_doc_template = """ +%s + +Parameters +---------- +%s +window : Number of observations used for calculating statistic +min_periods : int + Minimum number of observations in window required to have a value +time_rule : {None, 'WEEKDAY', 'EOM', 'W@MON', ...}, default=None + Name of time rule to conform to before computing statistic + +Returns +------- +%s +""" + + +_ewm_doc = r"""%s + +Parameters +---------- +%s +com : float. optional + Center of mass: \alpha = com / (1 + com), +span : float, optional + Specify decay in terms of span, \alpha = 2 / (span + 1) +min_periods : int, default 0 + Number of observations in sample to require (only affects + beginning) +time_rule : {None, 'WEEKDAY', 'EOM', 'W@MON', ...}, default None + Name of time rule to conform to before computing statistic +%s +Notes +----- +Either center of mass or span must be specified + +EWMA is sometimes specified using a "span" parameter s, we have have that the +decay parameter \alpha is related to the span as :math:`\alpha = 1 - 2 / (s + 1) += c / (1 + c)` + +where c is the center of mass. Given a span, the associated center of mass is +:math:`c = (s - 1) / 2` + +So a "20-day EWMA" would have center 9.5. + +Returns +------- +y : type of input argument +""" + +_type_of_input = "y : type of input argument" + +_flex_retval = """y : type depends on inputs + DataFrame / DataFrame -> DataFrame (matches on columns) + DataFrame / Series -> Computes result for each column + Series / Series -> Series""" + +_unary_arg = "arg : Series, DataFrame" + +_binary_arg_flex = """arg1 : Series, DataFrame, or ndarray +arg2 : Series, DataFrame, or ndarray""" + +_binary_arg = """arg1 : Series, DataFrame, or ndarray +arg2 : Series, DataFrame, or ndarray""" + +_bias_doc = r"""bias : boolean, default False + Use a standard estimation bias correction +""" def rolling_count(arg, window, time_rule=None): """ Rolling count of number of non-NaN observations inside provided window. @@ -46,6 +120,8 @@ def rolling_count(arg, window, time_rule=None): return return_hook(result) +@Substitution("Unbiased moving covariance", _binary_arg_flex, _flex_retval) +@Appender(_doc_template) def rolling_cov(arg1, arg2, window, min_periods=None, time_rule=None): def _get_cov(X, Y): mean = lambda x: rolling_mean(x, window, min_periods, time_rule) @@ -54,6 +130,8 @@ def _get_cov(X, Y): return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj return _flex_binary_moment(arg1, arg2, _get_cov) +@Substitution("Moving sample correlation", _binary_arg_flex, _flex_retval) +@Appender(_doc_template) def rolling_corr(arg1, arg2, window, min_periods=None, time_rule=None): def _get_corr(a, b): num = rolling_cov(a, b, window, min_periods, time_rule) @@ -182,7 +260,8 @@ def _get_center_of_mass(com, span): return float(com) - +@Substitution("Exponentially-weighted moving average", _unary_arg, "") +@Appender(_ewm_doc) def ewma(arg, com=None, span=None, min_periods=0, time_rule=None): com = _get_center_of_mass(com, span) arg = _conv_timerule(arg, time_rule) @@ -201,6 +280,8 @@ def _first_valid_index(arr): # argmax scans from left return notnull(arr).argmax() +@Substitution("Exponentially-weighted moving variance", _unary_arg, _bias_doc) +@Appender(_ewm_doc) def ewmvar(arg, com=None, span=None, min_periods=0, bias=False, time_rule=None): com = _get_center_of_mass(com, span) @@ -214,6 +295,8 @@ def ewmvar(arg, com=None, span=None, min_periods=0, bias=False, return result +@Substitution("Exponentially-weighted moving std", _unary_arg, _bias_doc) +@Appender(_ewm_doc) def ewmstd(arg, com=None, span=None, min_periods=0, bias=False, time_rule=None): result = ewmvar(arg, com=com, span=span, time_rule=time_rule, @@ -222,6 +305,8 @@ def ewmstd(arg, com=None, span=None, min_periods=0, bias=False, ewmvol = ewmstd +@Substitution("Exponentially-weighted moving covariance", _binary_arg, "") +@Appender(_ewm_doc) def ewmcov(arg1, arg2, com=None, span=None, min_periods=0, bias=False, time_rule=None): X, Y = _prep_binary(arg1, arg2) @@ -238,6 +323,8 @@ def ewmcov(arg1, arg2, com=None, span=None, min_periods=0, bias=False, return result +@Substitution("Exponentially-weighted moving " "correlation", _binary_arg, "") +@Appender(_ewm_doc) def ewmcorr(arg1, arg2, com=None, span=None, min_periods=0, time_rule=None): X, Y = _prep_binary(arg1, arg2) @@ -260,95 +347,6 @@ def _prep_binary(arg1, arg2): return X, Y -#------------------------------------------------------------------------------- -# Docs - -_doc_template = """ -%s - -Parameters ----------- -%s -window : Number of observations used for calculating statistic -min_periods : int - Minimum number of observations in window required to have a value -time_rule : {None, 'WEEKDAY', 'EOM', 'W@MON', ...}, default=None - Name of time rule to conform to before computing statistic - -Returns -------- -%s -""" - - -_ewm_doc = r"""%s - -Parameters ----------- -%s -com : float. optional - Center of mass: \alpha = com / (1 + com), -span : float, optional - Specify decay in terms of span, \alpha = 2 / (span + 1) -min_periods : int, default 0 - Number of observations in sample to require (only affects - beginning) -time_rule : {None, 'WEEKDAY', 'EOM', 'W@MON', ...}, default None - Name of time rule to conform to before computing statistic -%s -Notes ------ -Either center of mass or span must be specified - -EWMA is sometimes specified using a "span" parameter s, we have have that the -decay parameter \alpha is related to the span as :math:`\alpha = 1 - 2 / (s + 1) -= c / (1 + c)` - -where c is the center of mass. Given a span, the associated center of mass is -:math:`c = (s - 1) / 2` - -So a "20-day EWMA" would have center 9.5. - -Returns -------- -y : type of input argument -""" - -_type_of_input = "y : type of input argument" - -_flex_retval = """y : type depends on inputs - DataFrame / DataFrame -> DataFrame (matches on columns) - DataFrame / Series -> Computes result for each column - Series / Series -> Series""" - -_unary_arg = "arg : Series, DataFrame" - -_binary_arg_flex = """arg1 : Series, DataFrame, or ndarray -arg2 : Series, DataFrame, or ndarray""" - -_binary_arg = """arg1 : Series, DataFrame, or ndarray -arg2 : Series, DataFrame, or ndarray""" - -_bias_doc = r"""bias : boolean, default False - Use a standard estimation bias correction -""" - -rolling_cov.__doc__ = _doc_template % ("Unbiased moving covariance", - _binary_arg_flex, _flex_retval) -rolling_corr.__doc__ = _doc_template % ("Moving sample correlation", - _binary_arg_flex, _flex_retval) - -ewma.__doc__ = _ewm_doc % ("Exponentially-weighted moving average", - _unary_arg, "") -ewmstd.__doc__ = _ewm_doc % ("Exponentially-weighted moving std", - _unary_arg, _bias_doc) -ewmvar.__doc__ = _ewm_doc % ("Exponentially-weighted moving variance", - _unary_arg, _bias_doc) -ewmcorr.__doc__ = _ewm_doc % ("Exponentially-weighted moving " - "correlation", _binary_arg, "") -ewmcov.__doc__ = _ewm_doc % ("Exponentially-weighted moving covariance", - _binary_arg, "") - #------------------------------------------------------------------------------- # Python interface to Cython functions @@ -375,6 +373,8 @@ def _use_window(minp, window): return minp def _rolling_func(func, desc, check_minp=_use_window): + @Substitution(desc, _unary_arg, _type_of_input) + @Appender(_doc_template) @wraps(func) def f(arg, window, min_periods=None, time_rule=None): def call_cython(arg, window, minp): @@ -383,8 +383,6 @@ def call_cython(arg, window, minp): return _rolling_moment(arg, window, call_cython, min_periods, time_rule=time_rule) - f.__doc__ = _doc_template % (desc, _unary_arg, _type_of_input) - return f rolling_max = _rolling_func(_tseries.roll_max, 'Moving maximum') diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index ea8004c797e7a..48eba1886ad60 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1636,7 +1636,7 @@ def test_repr_tuples(self): df = DataFrame({'tups' : zip(range(10), range(10))}) repr(df) - df.to_string(colSpace=10, buf=buf) + df.to_string(col_space=10, buf=buf) def test_to_string_unicode(self): buf = StringIO() @@ -1644,7 +1644,7 @@ def test_to_string_unicode(self): unicode_values = [u'\u03c3'] * 10 unicode_values = np.array(unicode_values, dtype=object) df = DataFrame({'unicode' : unicode_values}) - df.to_string(colSpace=10, buf=buf) + df.to_string(col_space=10, buf=buf) def test_to_string_unicode_columns(self): df = DataFrame({u'\u03c3' : np.arange(10.)}) @@ -1672,7 +1672,7 @@ def test_to_string(self): # big mixed biggie = DataFrame({'A' : randn(1000), - 'B' : tm.makeStringIndex(1000)}, + 'B' : tm.makeStringIndex(1000)}, index=range(1000)) biggie['A'][:20] = nan @@ -1687,7 +1687,7 @@ def test_to_string(self): self.assert_(isinstance(s, basestring)) # print in right order - result = biggie.to_string(columns=['B', 'A'], colSpace=17, + result = biggie.to_string(columns=['B', 'A'], col_space=17, float_format='%.6f'.__mod__) lines = result.split('\n') header = lines[0].strip().split() @@ -1701,7 +1701,7 @@ def test_to_string(self): # expected = ['B', 'A'] # self.assertEqual(header, expected) - result = biggie.to_string(columns=['A'], colSpace=17) + result = biggie.to_string(columns=['A'], col_space=17) header = result.split('\n')[0].strip().split() expected = ['A'] self.assertEqual(header, expected) @@ -1710,12 +1710,31 @@ def test_to_string(self): formatters={'A' : lambda x: '%.1f' % x}) biggie.to_string(columns=['B', 'A'], float_format=str) - biggie.to_string(columns=['B', 'A'], colSpace=12, + biggie.to_string(columns=['B', 'A'], col_space=12, float_format=str) frame = DataFrame(index=np.arange(1000)) frame.to_string() + def test_to_string_no_header(self): + df = DataFrame({'x' : [1, 2, 3], + 'y' : [4, 5, 6]}) + + df_s = df.to_string(header=False) + expected = "0 1 4\n1 2 5\n2 3 6" + + assert(df_s == expected) + + def test_to_string_no_index(self): + df = DataFrame({'x' : [1, 2, 3], + 'y' : [4, 5, 6]}) + + df_s = df.to_string(index=False) + expected = " x y\n 1 4\n 2 5\n 3 6" + + assert(df_s == expected) + + def test_to_html(self): # big mixed biggie = DataFrame({'A' : randn(1000), @@ -1733,12 +1752,12 @@ def test_to_html(self): self.assert_(isinstance(s, basestring)) - biggie.to_html(columns=['B', 'A'], colSpace=17) + biggie.to_html(columns=['B', 'A'], col_space=17) biggie.to_html(columns=['B', 'A'], formatters={'A' : lambda x: '%.1f' % x}) biggie.to_html(columns=['B', 'A'], float_format=str) - biggie.to_html(columns=['B', 'A'], colSpace=12, + biggie.to_html(columns=['B', 'A'], col_space=12, float_format=str) frame = DataFrame(index=np.arange(1000)) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 06db05c31ae06..a94c524f3b1e4 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -9,11 +9,14 @@ from pandas.core.index import Index, MultiIndex from pandas.core.internals import (IntBlock, BoolBlock, BlockManager, make_block, _consolidate) -from pandas.util.decorators import cache_readonly +from pandas.util.decorators import cache_readonly, Appender, Substitution + import pandas.core.common as com import pandas._tseries as lib +@Substitution('\nleft : DataFrame') +@Appender(_merge_doc, indents=0) def merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=True, suffixes=('.x', '.y'), copy=True): @@ -22,7 +25,6 @@ def merge(left, right, how='inner', on=None, left_on=None, right_on=None, right_index=right_index, sort=sort, suffixes=suffixes, copy=copy) return op.get_result() -if __debug__: merge.__doc__ = _merge_doc % '\nleft : DataFrame' # TODO: NA group handling # TODO: transformations?? diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index cbca3763de0b0..4c00427f0e75a 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -9,3 +9,94 @@ def wrapper(*args, **kwargs): return alternative(*args, **kwargs) return wrapper +# Substitution and Appender are derived from matplotlib.docstring (1.1.0) +# module http://matplotlib.sourceforge.net/users/license.html + +class Substitution(object): + """ + A decorator to take a function's docstring and perform string + substitution on it. + + This decorator should be robust even if func.__doc__ is None + (for example, if -OO was passed to the interpreter) + + Usage: construct a docstring.Substitution with a sequence or + dictionary suitable for performing substitution; then + decorate a suitable function with the constructed object. e.g. + + sub_author_name = Substitution(author='Jason') + + @sub_author_name + def some_function(x): + "%(author)s wrote this function" + + # note that some_function.__doc__ is now "Jason wrote this function" + + One can also use positional arguments. + + sub_first_last_names = Substitution('Edgar Allen', 'Poe') + + @sub_first_last_names + def some_function(x): + "%s %s wrote the Raven" + """ + def __init__(self, *args, **kwargs): + assert not (args and kwargs), "Only positional or keyword args are allowed" + self.params = args or kwargs + + def __call__(self, func): + func.__doc__ = func.__doc__ and func.__doc__ % self.params + return func + + def update(self, *args, **kwargs): + "Assume self.params is a dict and update it with supplied args" + self.params.update(*args, **kwargs) + + @classmethod + def from_params(cls, params): + """ + In the case where the params is a mutable sequence (list or dictionary) + and it may change before this class is called, one may explicitly use a + reference to the params rather than using *args or **kwargs which will + copy the values and not reference them. + """ + result = cls() + result.params = params + return result + +class Appender(object): + """ + A function decorator that will append an addendum to the docstring + of the target function. + + This decorator should be robust even if func.__doc__ is None + (for example, if -OO was passed to the interpreter). + + Usage: construct a docstring.Appender with a string to be joined to + the original docstring. An optional 'join' parameter may be supplied + which will be used to join the docstring and addendum. e.g. + + add_copyright = Appender("Copyright (c) 2009", join='\n') + + @add_copyright + def my_dog(has='fleas'): + "This docstring will have a copyright below" + pass + """ + def __init__(self, addendum, join='', indents=0): + if indents > 0: + self.addendum = indent(addendum, indents=indents) + else: + self.addendum = addendum + self.join = join + + def __call__(self, func): + docitems = [func.__doc__ if func.__doc__ else '', self.addendum] + func.__doc__ = ''.join(docitems) + return func + +def indent(text, indents=1): + if not text or type(text) != str: + return '' + jointext = ''.join(['\n'] + [' '] * indents) + return jointext.join(text.split('\n'))