diff --git a/doc/source/release.rst b/doc/source/release.rst index 691c7312dde72..facf753ced9a0 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -175,8 +175,18 @@ pandas 0.12 ``bs4`` + ``html5lib`` when lxml fails to parse. a list of parsers to try until success is also valid - more consistency in the to_datetime return types (give string/array of string inputs) (:issue:`3888`) + - The internal ``pandas`` class hierarchy has changed (slightly). The + previous ``PandasObject`` now is called ``PandasContainer`` and a new + ``PandasObject`` has become the baseclass for ``PandasContainer`` as well + as ``Index``, ``Categorical``, ``GroupBy``, ``SparseList``, and + ``SparseArray`` (+ their base classes). Currently, ``PandasObject`` + provides string methods (from ``StringMixin``). (:issue:`4090`, :issue:`4092`) + - New ``StringMixin`` that, given a ``__unicode__`` method, gets python 2 and + python 3 compatible string methods (``__str__``, ``__bytes__``, and + ``__repr__``). Plus string safety throughout. Now employed in many places + throughout the pandas library. (:issue:`4090`, :issue:`4092`) -**Experimental Feautres** +**Experimental Features** - Added experimental ``CustomBusinessDay`` class to support ``DateOffsets`` with custom holiday calendars and custom weekmasks. (:issue:`2301`) diff --git a/doc/source/v0.12.0.txt b/doc/source/v0.12.0.txt index 60086e1c49ae7..f8836b4532493 100644 --- a/doc/source/v0.12.0.txt +++ b/doc/source/v0.12.0.txt @@ -8,13 +8,13 @@ enhancements along with a large number of bug fixes. Highlites include a consistent I/O API naming scheme, routines to read html, write multi-indexes to csv files, read & write STATA data files, read & write JSON format -files, Python 3 support for ``HDFStore``, filtering of groupby expressions via ``filter``, and a +files, Python 3 support for ``HDFStore``, filtering of groupby expressions via ``filter``, and a revamped ``replace`` routine that accepts regular expressions. API changes ~~~~~~~~~~~ - - The I/O API is now much more consistent with a set of top level ``reader`` functions + - The I/O API is now much more consistent with a set of top level ``reader`` functions accessed like ``pd.read_csv()`` that generally return a ``pandas`` object. * ``read_csv`` @@ -38,7 +38,7 @@ API changes * ``to_clipboard`` - - Fix modulo and integer division on Series,DataFrames to act similary to ``float`` dtypes to return + - Fix modulo and integer division on Series,DataFrames to act similary to ``float`` dtypes to return ``np.nan`` or ``np.inf`` as appropriate (:issue:`3590`). This correct a numpy bug that treats ``integer`` and ``float`` dtypes differently. @@ -50,15 +50,15 @@ API changes p / p p / 0 - - Add ``squeeze`` keyword to ``groupby`` to allow reduction from + - Add ``squeeze`` keyword to ``groupby`` to allow reduction from DataFrame -> Series if groups are unique. This is a Regression from 0.10.1. - We are reverting back to the prior behavior. This means groupby will return the - same shaped objects whether the groups are unique or not. Revert this issue (:issue:`2893`) + We are reverting back to the prior behavior. This means groupby will return the + same shaped objects whether the groups are unique or not. Revert this issue (:issue:`2893`) with (:issue:`3596`). .. ipython:: python - df2 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, + df2 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, {"val1":1, "val2": 27}, {"val1":1, "val2": 12}]) def func(dataf): return dataf["val2"] - dataf["val2"].mean() @@ -96,9 +96,9 @@ API changes and thus you should cast to an appropriate numeric dtype if you need to plot something. - - Add ``colormap`` keyword to DataFrame plotting methods. Accepts either a - matplotlib colormap object (ie, matplotlib.cm.jet) or a string name of such - an object (ie, 'jet'). The colormap is sampled to select the color for each + - Add ``colormap`` keyword to DataFrame plotting methods. Accepts either a + matplotlib colormap object (ie, matplotlib.cm.jet) or a string name of such + an object (ie, 'jet'). The colormap is sampled to select the color for each column. Please see :ref:`visualization.colormaps` for more information. (:issue:`3860`) @@ -159,6 +159,18 @@ API changes ``bs4`` + ``html5lib`` when lxml fails to parse. a list of parsers to try until success is also valid + - The internal ``pandas`` class hierarchy has changed (slightly). The + previous ``PandasObject`` now is called ``PandasContainer`` and a new + ``PandasObject`` has become the baseclass for ``PandasContainer`` as well + as ``Index``, ``Categorical``, ``GroupBy``, ``SparseList``, and + ``SparseArray`` (+ their base classes). Currently, ``PandasObject`` + provides string methods (from ``StringMixin``). (:issue:`4090`, :issue:`4092`) + + - New ``StringMixin`` that, given a ``__unicode__`` method, gets python 2 and + python 3 compatible string methods (``__str__``, ``__bytes__``, and + ``__repr__``). Plus string safety throughout. Now employed in many places + throughout the pandas library. (:issue:`4090`, :issue:`4092`) + I/O Enhancements ~~~~~~~~~~~~~~~~ @@ -184,7 +196,7 @@ I/O Enhancements .. warning:: - You may have to install an older version of BeautifulSoup4, + You may have to install an older version of BeautifulSoup4, :ref:`See the installation docs` - Added module for reading and writing Stata files: ``pandas.io.stata`` (:issue:`1512`) @@ -203,15 +215,15 @@ I/O Enhancements - The option, ``tupleize_cols`` can now be specified in both ``to_csv`` and ``read_csv``, to provide compatiblity for the pre 0.12 behavior of writing and reading multi-index columns via a list of tuples. The default in - 0.12 is to write lists of tuples and *not* interpret list of tuples as a - multi-index column. + 0.12 is to write lists of tuples and *not* interpret list of tuples as a + multi-index column. Note: The default behavior in 0.12 remains unchanged, but starting with 0.13, - the default *to* write and read multi-index columns will be in the new + the default *to* write and read multi-index columns will be in the new format. (:issue:`3571`, :issue:`1651`, :issue:`3141`) - If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it - with ``df.to_csv(..., index=False``), then any ``names`` on the columns index will + with ``df.to_csv(..., index=False``), then any ``names`` on the columns index will be *lost*. .. ipython:: python @@ -296,8 +308,8 @@ Other Enhancements pd.get_option('a.b') pd.get_option('b.c') - - The ``filter`` method for group objects returns a subset of the original - object. Suppose we want to take only elements that belong to groups with a + - The ``filter`` method for group objects returns a subset of the original + object. Suppose we want to take only elements that belong to groups with a group sum greater than 2. .. ipython:: python @@ -317,7 +329,7 @@ Other Enhancements dff.groupby('B').filter(lambda x: len(x) > 2) Alternatively, instead of dropping the offending groups, we can return a - like-indexed objects where the groups that do not pass the filter are + like-indexed objects where the groups that do not pass the filter are filled with NaNs. .. ipython:: python @@ -333,9 +345,9 @@ Experimental Features - Added experimental ``CustomBusinessDay`` class to support ``DateOffsets`` with custom holiday calendars and custom weekmasks. (:issue:`2301`) - + .. note:: - + This uses the ``numpy.busdaycalendar`` API introduced in Numpy 1.7 and therefore requires Numpy 1.7.0 or newer. @@ -416,7 +428,7 @@ Bug Fixes - Extend ``reindex`` to correctly deal with non-unique indices (:issue:`3679`) - ``DataFrame.itertuples()`` now works with frames with duplicate column names (:issue:`3873`) - - Bug in non-unique indexing via ``iloc`` (:issue:`4017`); added ``takeable`` argument to + - Bug in non-unique indexing via ``iloc`` (:issue:`4017`); added ``takeable`` argument to ``reindex`` for location-based taking - ``DataFrame.from_records`` did not accept empty recarrays (:issue:`3682`) diff --git a/pandas/core/base.py b/pandas/core/base.py new file mode 100644 index 0000000000000..6122e78fa8bce --- /dev/null +++ b/pandas/core/base.py @@ -0,0 +1,58 @@ +""" +Base class(es) for all pandas objects. +""" +from pandas.util import py3compat + +class StringMixin(object): + """implements string methods so long as object defines a `__unicode__` method. + Handles Python2/3 compatibility transparently.""" + # side note - this could be made into a metaclass if more than one object nees + def __str__(self): + """ + Return a string representation for a particular object. + + Invoked by str(obj) in both py2/py3. + Yields Bytestring in Py2, Unicode String in py3. + """ + + if py3compat.PY3: + return self.__unicode__() + return self.__bytes__() + + def __bytes__(self): + """ + Return a string representation for a particular object. + + Invoked by bytes(obj) in py3 only. + Yields a bytestring in both py2/py3. + """ + from pandas.core.config import get_option + + encoding = get_option("display.encoding") + return self.__unicode__().encode(encoding, 'replace') + + def __repr__(self): + """ + Return a string representation for a particular object. + + Yields Bytestring in Py2, Unicode String in py3. + """ + return str(self) + +class PandasObject(StringMixin): + """baseclass for various pandas objects""" + + @property + def _constructor(self): + """class constructor (for this class it's just `__class__`""" + return self.__class__ + + def __unicode__(self): + """ + Return a string representation for a particular object. + + Invoked by unicode(obj) in py2 only. Yields a Unicode String in both + py2/py3. + """ + # Should be overwritten by base classes + return object.__repr__(self) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 916bb2deb417e..b25a027adedd9 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -3,6 +3,7 @@ import numpy as np from pandas.core.algorithms import factorize +from pandas.core.base import PandasObject from pandas.core.index import Index import pandas.core.common as com from pandas.core.frame import DataFrame @@ -25,8 +26,7 @@ def f(self, other): return f - -class Categorical(object): +class Categorical(PandasObject): """ Represents a categorical variable in classic R / S-plus fashion @@ -134,9 +134,9 @@ def __array__(self, dtype=None): def __len__(self): return len(self.labels) - def __repr__(self): + def __unicode__(self): temp = 'Categorical: %s\n%s\n%s' - values = np.asarray(self) + values = com.pprint_thing(np.asarray(self)) levheader = 'Levels (%d): ' % len(self.levels) levstring = np.array_repr(self.levels, max_line_width=60) @@ -145,9 +145,9 @@ def __repr__(self): lines = levstring.split('\n') levstring = '\n'.join([lines[0]] + [indent + x.lstrip() for x in lines[1:]]) + name = '' if self.name is None else self.name + return temp % (name, values, levheader + levstring) - return temp % ('' if self.name is None else self.name, - repr(values), levheader + levstring) def __getitem__(self, key): if isinstance(key, (int, np.integer)): diff --git a/pandas/core/common.py b/pandas/core/common.py index 96c567cbb6348..ddacb98a2ddf3 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -64,10 +64,10 @@ def _isnull_new(obj): if lib.isscalar(obj): return lib.checknull(obj) - from pandas.core.generic import PandasObject + from pandas.core.generic import PandasContainer if isinstance(obj, np.ndarray): return _isnull_ndarraylike(obj) - elif isinstance(obj, PandasObject): + elif isinstance(obj, PandasContainer): # TODO: optimize for DataFrame, etc. return obj.apply(isnull) elif isinstance(obj, list) or hasattr(obj, '__array__'): @@ -91,10 +91,10 @@ def _isnull_old(obj): if lib.isscalar(obj): return lib.checknull_old(obj) - from pandas.core.generic import PandasObject + from pandas.core.generic import PandasContainer if isinstance(obj, np.ndarray): return _isnull_ndarraylike_old(obj) - elif isinstance(obj, PandasObject): + elif isinstance(obj, PandasContainer): # TODO: optimize for DataFrame, etc. return obj.apply(_isnull_old) elif isinstance(obj, list) or hasattr(obj, '__array__'): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9c5108f747e44..5fe2d60993f2c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -584,10 +584,6 @@ def _verbose_info(self, value): def axes(self): return [self.index, self.columns] - @property - def _constructor(self): - return self.__class__ - @property def shape(self): return (len(self.index), len(self.columns)) @@ -653,28 +649,6 @@ def _repr_fits_horizontal_(self,ignore_width=False): return repr_width < width - def __str__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if py3compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by bytes(df) in py3 only. - Yields a bytestring in both py2/py3. - """ - encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - def __unicode__(self): """ Return a string representation for a particular DataFrame @@ -714,14 +688,6 @@ def __unicode__(self): return value - def __repr__(self): - """ - Return a string representation for a particular DataFrame - - Yields Bytestring in Py2, Unicode String in py3. - """ - return str(self) - def _repr_html_(self): """ Return a html representation for a particular DataFrame. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c41b02fdd7b22..6be5f456b50e6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1,20 +1,21 @@ # pylint: disable=W0231,E1101 import numpy as np +import pandas.lib as lib +from pandas.core.base import PandasObject from pandas.core.index import MultiIndex import pandas.core.indexing as indexing from pandas.core.indexing import _maybe_convert_indices from pandas.tseries.index import DatetimeIndex import pandas.core.common as com -import pandas.lib as lib class PandasError(Exception): pass -class PandasObject(object): +class PandasContainer(PandasObject): _AXIS_NUMBERS = { 'index': 0, @@ -52,6 +53,12 @@ def __hash__(self): raise TypeError('{0!r} objects are mutable, thus they cannot be' ' hashed'.format(self.__class__.__name__)) + def __unicode__(self): + # unicode representation based upon iterating over self + # (since, by definition, `PandasContainers` are iterable) + prepr = '[%s]' % ','.join(map(com.pprint_thing, self)) + return '%s(%s)' % (self.__class__.__name__, prepr) + #---------------------------------------------------------------------- # Axis name business @@ -578,9 +585,10 @@ def to_json(self, path_or_buf=None, orient=None, date_format='epoch', # install the indexerse for _name, _indexer in indexing.get_indexers_list(): - PandasObject._create_indexer(_name,_indexer) + PandasContainer._create_indexer(_name,_indexer) + -class NDFrame(PandasObject): +class NDFrame(PandasContainer): """ N-dimensional analogue of DataFrame. Store multi-dimensional in a size-mutable, labeled data structure @@ -625,17 +633,10 @@ def astype(self, dtype, copy = True, raise_on_error = True): mgr = self._data.astype(dtype, copy = copy, raise_on_error = raise_on_error) return self._constructor(mgr) - @property - def _constructor(self): - return NDFrame - @property def axes(self): return self._data.axes - def __repr__(self): - return 'NDFrame' - @property def values(self): return self._data.as_matrix() diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 9bd7923f6ec14..cc0a2b7589bb6 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2,6 +2,7 @@ import types import numpy as np +from pandas.core.base import PandasObject from pandas.core.categorical import Categorical from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame @@ -100,7 +101,7 @@ def _last(x): return _last(x) -class GroupBy(object): +class GroupBy(PandasObject): """ Class for grouping and aggregating relational data. See aggregate, transform, and apply functions on this object. @@ -201,6 +202,10 @@ def __init__(self, obj, keys=None, axis=0, level=None, def __len__(self): return len(self.indices) + def __unicode__(self): + # TODO: Better unicode/repr for GroupBy object + return object.__repr__(self) + @property def groups(self): return self.grouper.groups diff --git a/pandas/core/index.py b/pandas/core/index.py index 43b172c6ecde9..a3aa0804bcfe2 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -9,6 +9,7 @@ import pandas.algos as _algos import pandas.index as _index from pandas.lib import Timestamp +from pandas.core.base import PandasObject from pandas.util.decorators import cache_readonly from pandas.core.common import isnull @@ -47,7 +48,7 @@ def _shouldbe_timestamp(obj): or tslib.is_timestamp_array(obj)) -class Index(np.ndarray): +class Index(PandasObject, np.ndarray): """ Immutable ndarray implementing an ordered, sliceable set. The basic object storing axis labels for all pandas objects @@ -142,28 +143,6 @@ def __array_finalize__(self, obj): def _shallow_copy(self): return self.view() - def __str__(self): - """ - Return a string representation for a particular Index - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if py3compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular Index - - Invoked by bytes(df) in py3 only. - Yields a bytestring in both py2/py3. - """ - encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - def __unicode__(self): """ Return a string representation for a particular Index @@ -173,14 +152,6 @@ def __unicode__(self): prepr = com.pprint_thing(self, escape_chars=('\t', '\r', '\n'),quote_strings=True) return '%s(%s, dtype=%s)' % (type(self).__name__, prepr, self.dtype) - def __repr__(self): - """ - Return a string representation for a particular Index - - Yields Bytestring in Py2, Unicode String in py3. - """ - return str(self) - def to_series(self): """ return a series with both index and values equal to the index keys @@ -237,10 +208,6 @@ def _set_names(self, values): names = property(fset=_set_names, fget=_get_names) - @property - def _constructor(self): - return Index - @property def _has_complex_internals(self): # to disable groupby tricks in MultiIndex @@ -1408,10 +1375,6 @@ def __new__(cls, data, dtype=None, copy=False, name=None): def inferred_type(self): return 'integer' - @property - def _constructor(self): - return Int64Index - @property def asi8(self): # do not cache or you'll create a memory leak @@ -1531,28 +1494,6 @@ def _array_values(self): def dtype(self): return np.dtype('O') - def __str__(self): - """ - Return a string representation for a particular Index - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if py3compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular Index - - Invoked by bytes(df) in py3 only. - Yields a bytestring in both py2/py3. - """ - encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - def __unicode__(self): """ Return a string representation for a particular Index @@ -1566,14 +1507,6 @@ def __unicode__(self): return output % summary - def __repr__(self): - """ - Return a string representation for a particular Index - - Yields Bytestring in Py2, Unicode String in py3. - """ - return str(self) - def __len__(self): return len(self.labels[0]) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 7a6a13da302d1..57be20a50f7bc 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -4,6 +4,7 @@ from numpy import nan import numpy as np +from pandas.core.base import PandasObject from pandas.core.common import (_possibly_downcast_to_dtype, isnull, _NS_DTYPE, _TD_DTYPE) @@ -19,7 +20,7 @@ from pandas.util import py3compat -class Block(object): +class Block(PandasObject): """ Canonical n-dimensional unit of homogeneous dtype contained in a pandas data structure @@ -91,14 +92,12 @@ def set_ref_items(self, ref_items, maybe_rename=True): self.items = ref_items.take(self.ref_locs) self.ref_items = ref_items - def __repr__(self): + def __unicode__(self): shape = ' x '.join([com.pprint_thing(s) for s in self.shape]) name = type(self).__name__ result = '%s: %s, %s, dtype %s' % ( name, com.pprint_thing(self.items), shape, self.dtype) - if py3compat.PY3: - return unicode(result) - return com.console_encode(result) + return result def __contains__(self, item): return item in self.items @@ -969,7 +968,7 @@ def make_block(values, items, ref_items, klass=None, fastpath=False, placement=N # TODO: flexible with index=None and/or items=None -class BlockManager(object): +class BlockManager(PandasObject): """ Core internal data structure to implement DataFrame @@ -1213,7 +1212,7 @@ def __setstate__(self, state): def __len__(self): return len(self.items) - def __repr__(self): + def __unicode__(self): output = 'BlockManager' for i, ax in enumerate(self.axes): if i == 0: @@ -1222,7 +1221,7 @@ def __repr__(self): output += '\nAxis %d: %s' % (i, ax) for block in self.blocks: - output += '\n%s' % repr(block) + output += '\n%s' % com.pprint_thing(block) return output @property diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 92f69a7444aab..d33f7144c27b0 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -186,10 +186,6 @@ class Panel(NDFrame): major_axis = lib.AxisProperty(1) minor_axis = lib.AxisProperty(2) - @property - def _constructor(self): - return type(self) - # return the type of the slice constructor _constructor_sliced = DataFrame @@ -466,28 +462,6 @@ def __invert__(self): #---------------------------------------------------------------------- # Magic methods - def __str__(self): - """ - Return a string representation for a particular Panel - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if py3compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular Panel - - Invoked by bytes(df) in py3 only. - Yields a bytestring in both py2/py3. - """ - encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - def __unicode__(self): """ Return a string representation for a particular Panel @@ -515,14 +489,6 @@ def axis_pretty(a): [class_name, dims] + [axis_pretty(a) for a in self._AXIS_ORDERS]) return output - def __repr__(self): - """ - Return a string representation for a particular Panel - - Yields Bytestring in Py2, Unicode String in py3. - """ - return str(self) - def __iter__(self): return iter(getattr(self, self._info_axis)) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7684acfe85470..5ea029b414fef 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -394,8 +394,7 @@ def f(self, axis=0, dtype=None, out=None, skipna=True, level=None): #---------------------------------------------------------------------- # Series class - -class Series(pa.Array, generic.PandasObject): +class Series(generic.PandasContainer, pa.Array): """ One-dimensional ndarray with axis labels (including time series). Labels need not be unique but must be any hashable type. The object @@ -520,10 +519,6 @@ def __init__(self, data=None, index=None, dtype=None, name=None, copy=False): pass - @property - def _constructor(self): - return Series - @property def _can_hold_na(self): return not is_integer_dtype(self.dtype) @@ -1096,28 +1091,6 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False): return df.reset_index(level=level, drop=drop) - def __str__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if py3compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by bytes(df) in py3 only. - Yields a bytestring in both py2/py3. - """ - encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - def __unicode__(self): """ Return a string representation for a particular DataFrame @@ -1142,14 +1115,6 @@ def __unicode__(self): raise AssertionError() return result - def __repr__(self): - """ - Return a string representation for a particular Series - - Yields Bytestring in Py2, Unicode String in py3. - """ - return str(self) - def _tidy_repr(self, max_vals=20): """ diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 95702847d9c7f..f61db447f2cfc 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -73,9 +73,6 @@ def __init__(self, path_or_buf, kind=None, **kwds): data = path_or_buf.read() self.book = xlrd.open_workbook(file_contents=data) - def __repr__(self): - return object.__repr__(self) - def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, index_col=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None, diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5bf309edffa74..fdb86c43b7160 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -17,7 +17,8 @@ from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel from pandas.sparse.array import BlockIndex, IntIndex from pandas.tseries.api import PeriodIndex, DatetimeIndex -from pandas.core.common import adjoin, is_list_like +from pandas.core.base import StringMixin +from pandas.core.common import adjoin, is_list_like, pprint_thing from pandas.core.algorithms import match, unique from pandas.core.categorical import Categorical from pandas.core.common import _asarray_tuplesafe @@ -218,7 +219,7 @@ def read_hdf(path_or_buf, key, **kwargs): # a passed store; user controls open/close f(path_or_buf, False) -class HDFStore(object): +class HDFStore(StringMixin): """ dict-like IO interface for storing pandas objects in PyTables format. @@ -315,8 +316,8 @@ def __contains__(self, key): def __len__(self): return len(self.groups()) - def __repr__(self): - output = '%s\nFile path: %s\n' % (type(self), self._path) + def __unicode__(self): + output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) if len(self.keys()): keys = [] @@ -326,11 +327,11 @@ def __repr__(self): try: s = self.get_storer(k) if s is not None: - keys.append(str(s.pathname or k)) - values.append(str(s or 'invalid_HDFStore node')) - except (Exception), detail: + keys.append(pprint_thing(s.pathname or k)) + values.append(pprint_thing(s or 'invalid_HDFStore node')) + except Exception as detail: keys.append(k) - values.append("[invalid_HDFStore node: %s]" % str(detail)) + values.append("[invalid_HDFStore node: %s]" % pprint_thing(detail)) output += adjoin(12, keys, values) else: @@ -984,7 +985,7 @@ def get_values(self): self.close() return results -class IndexCol(object): +class IndexCol(StringMixin): """ an index column description class Parameters @@ -1050,10 +1051,9 @@ def set_table(self, table): self.table = table return self - def __repr__(self): - return "name->%s,cname->%s,axis->%s,pos->%s,kind->%s" % (self.name, self.cname, self.axis, self.pos, self.kind) - - __str__ = __repr__ + def __unicode__(self): + temp = tuple(map(pprint_thing, (self.name, self.cname, self.axis, self.pos, self.kind))) + return "name->%s,cname->%s,axis->%s,pos->%s,kind->%s" % temp def __eq__(self, other): """ compare 2 col items """ @@ -1570,7 +1570,7 @@ class GenericDataIndexableCol(DataIndexableCol): def get_attr(self): pass -class Storer(object): +class Storer(StringMixin): """ represent an object in my store facilitate read/write of various types of objects this is an abstract base class @@ -1610,19 +1610,16 @@ def set_version(self): def pandas_type(self): return _ensure_decoded(getattr(self.group._v_attrs, 'pandas_type', None)) - def __repr__(self): - """ return a pretty representatgion of myself """ + def __unicode__(self): + """ return a pretty representation of myself """ self.infer_axes() s = self.shape if s is not None: if isinstance(s, (list,tuple)): - s = "[%s]" % ','.join([ str(x) for x in s ]) + s = "[%s]" % ','.join([pprint_thing(x) for x in s]) return "%-12.12s (shape->%s)" % (self.pandas_type,s) return self.pandas_type - def __str__(self): - return self.__repr__() - def set_object_info(self): """ set my pandas type & version """ self.attrs.pandas_type = self.pandas_kind @@ -3435,7 +3432,7 @@ def _need_convert(kind): return True return False -class Term(object): +class Term(StringMixin): """create a term object that holds a field, op, and value Parameters @@ -3540,10 +3537,9 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None): if len(self.q): self.eval() - def __str__(self): - return "field->%s,op->%s,value->%s" % (self.field, self.op, self.value) - - __repr__ = __str__ + def __unicode__(self): + attrs = map(pprint_thing, (self.field, self.op, self.value)) + return "field->%s,op->%s,value->%s" % tuple(attrs) @property def is_valid(self): diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 632e97c24721f..603924ac6a292 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -15,6 +15,7 @@ import sys import struct +from pandas.core.base import StringMixin from pandas.core.frame import DataFrame from pandas.core.series import Series from pandas.core.categorical import Categorical @@ -163,7 +164,7 @@ def _datetime_to_stata_elapsed(date, fmt): raise ValueError("fmt %s not understood" % fmt) -class StataMissingValue(object): +class StataMissingValue(StringMixin): """ An observation's missing value. @@ -192,10 +193,12 @@ def __init__(self, offset, value): string = property(lambda self: self._str, doc="The Stata representation of the missing value: '.', '.a'..'.z'") value = property(lambda self: self._value, doc='The binary representation of the missing value.') - def __str__(self): - return self._str + def __unicode__(self): + return self.string - __str__.__doc__ = string.__doc__ + def __repr__(self): + # not perfect :-/ + return "%s(%s)" % (self.__class__, self) class StataParser(object): diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index 997229487e1b9..e57eacc80647f 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -33,7 +33,7 @@ _mixed_frame = _frame.copy() -class TestPandasObjects(unittest.TestCase): +class TestPandasContainer(unittest.TestCase): def setUp(self): self.ts = tm.makeTimeSeries() @@ -68,7 +68,7 @@ def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_ if type(detail) == raise_ok: return raise - + unser = unser.sort() if dtype is False: @@ -104,7 +104,7 @@ def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None): _check_orient(df, "split", dtype=dtype) _check_orient(df, "index", dtype=dtype) _check_orient(df, "values", dtype=dtype) - + _check_orient(df, "columns", dtype=dtype, convert_axes=False) _check_orient(df, "records", dtype=dtype, convert_axes=False) _check_orient(df, "split", dtype=dtype, convert_axes=False) @@ -347,7 +347,7 @@ def test_convert_dates(self): assert_series_equal(result,ts) def test_date_format(self): - + df = self.tsframe.copy() df['date'] = Timestamp('20130101') df_orig = df.copy() @@ -412,7 +412,7 @@ def test_misc_example(self): @network @slow def test_round_trip_exception_(self): - # GH 3867 + # GH 3867 df = pd.read_csv('https://raw.github.com/hayd/lahman2012/master/csvs/Teams.csv') s = df.to_json() @@ -429,9 +429,9 @@ def test_url(self): result = read_json(url,convert_dates=True) for c in ['created_at','closed_at','updated_at']: self.assert_(result[c].dtype == 'datetime64[ns]') - + url = 'http://search.twitter.com/search.json?q=pandas%20python' result = read_json(url) - + except urllib2.URLError: raise nose.SkipTest diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index de510aa155412..48fa9caa0a05c 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -8,6 +8,7 @@ import numpy as np import operator +from pandas.core.base import PandasObject import pandas.core.common as com from pandas.util import py3compat @@ -86,8 +87,7 @@ def _sparse_fillop(this, other, name): return result, result_index - -class SparseArray(np.ndarray): +class SparseArray(PandasObject, np.ndarray): """Data structure for labeled, sparse floating point data Parameters @@ -184,9 +184,9 @@ def __setstate__(self, state): def __len__(self): return self.sp_index.length - def __repr__(self): - return '%s\n%s' % (np.ndarray.__repr__(self), - repr(self.sp_index)) + def __unicode__(self): + return '%s\n%s' % (com.pprint_thing(self), + com.pprint_thing(self.sp_index)) # Arithmetic operators diff --git a/pandas/sparse/list.py b/pandas/sparse/list.py index 9f59b9108a6b0..ceb03eae5d282 100644 --- a/pandas/sparse/list.py +++ b/pandas/sparse/list.py @@ -1,10 +1,12 @@ import numpy as np +from pandas.core.base import PandasObject +from pandas.core.common import pprint_thing from pandas.sparse.array import SparseArray import pandas._sparse as splib -class SparseList(object): +class SparseList(PandasObject): """ Data structure for accumulating data to be converted into a SparseArray. Has similar API to the standard Python list @@ -21,9 +23,9 @@ def __init__(self, data=None, fill_value=np.nan): if data is not None: self.append(data) - def __repr__(self): + def __unicode__(self): contents = '\n'.join(repr(c) for c in self._chunks) - return '%s\n%s' % (object.__repr__(self), contents) + return '%s\n%s' % (object.__repr__(self), pprint_thing(contents)) def __len__(self): return sum(len(c) for c in self._chunks) diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 1b8d3541da289..802808954c8f4 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -241,8 +241,9 @@ def __setstate__(self, state): def __len__(self): return self.sp_index.length - def __repr__(self): - series_rep = Series.__repr__(self) + def __unicode__(self): + # currently, unicode is same as repr...fixes infinite loop + series_rep = Series.__unicode__(self) rep = '%s\n%s' % (series_rep, repr(self.sp_index)) return rep diff --git a/pandas/stats/fama_macbeth.py b/pandas/stats/fama_macbeth.py index b75029c615735..967199c0bcf69 100644 --- a/pandas/stats/fama_macbeth.py +++ b/pandas/stats/fama_macbeth.py @@ -1,3 +1,4 @@ +from pandas.core.base import StringMixin from pandas.util.py3compat import StringIO import numpy as np @@ -26,7 +27,7 @@ def fama_macbeth(**kwargs): return klass(**kwargs) -class FamaMacBeth(object): +class FamaMacBeth(StringMixin): def __init__(self, y, x, intercept=True, nw_lags=None, nw_lags_beta=None, entity_effects=False, time_effects=False, x_effects=None, @@ -114,7 +115,7 @@ def _coef_table(self): return buffer.getvalue() - def __repr__(self): + def __unicode__(self): return self.summary @cache_readonly diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index cdcf1ab2ab036..742d832a923d8 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -10,6 +10,7 @@ import numpy as np from pandas.core.api import DataFrame, Series, isnull +from pandas.core.base import StringMixin from pandas.core.common import _ensure_float64 from pandas.core.index import MultiIndex from pandas.core.panel import Panel @@ -22,7 +23,7 @@ _FP_ERR = 1e-8 -class OLS(object): +class OLS(StringMixin): """ Runs a full sample ordinary least squares regression. @@ -581,7 +582,7 @@ def summary(self): return template % params - def __repr__(self): + def __unicode__(self): return self.summary @cache_readonly diff --git a/pandas/stats/var.py b/pandas/stats/var.py index e993b60e18a39..8953f7badfefb 100644 --- a/pandas/stats/var.py +++ b/pandas/stats/var.py @@ -1,7 +1,7 @@ from __future__ import division import numpy as np - +from pandas.core.base import StringMixin from pandas.util.decorators import cache_readonly from pandas.core.frame import DataFrame from pandas.core.panel import Panel @@ -11,7 +11,7 @@ from pandas.stats.ols import _combine_rhs -class VAR(object): +class VAR(StringMixin): """ Estimates VAR(p) regression on multivariate time series data presented in pandas data structures. @@ -477,7 +477,7 @@ def _sigma(self): return np.dot(resid, resid.T) / (n - k) - def __repr__(self): + def __unicode__(self): return self.summary diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 56df301b5b027..7fdb6d9d2603d 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -488,7 +488,7 @@ def _mpl_repr(self): # how to represent ourselves to matplotlib return tslib.ints_to_pydatetime(self.asi8, self.tz) - def __repr__(self): + def __unicode__(self): from pandas.core.format import _format_datetime64 values = self.values @@ -514,8 +514,6 @@ def __repr__(self): return summary - __str__ = __repr__ - def __reduce__(self): """Necessary for making this object picklable""" object_state = list(np.ndarray.__reduce__(self)) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 34c640392bda9..ac79fbd6bfb37 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -3,6 +3,7 @@ from datetime import datetime, date import numpy as np +from pandas.core.base import PandasObject import pandas.tseries.offsets as offsets from pandas.tseries.frequencies import (get_freq_code as _gfc, @@ -40,7 +41,7 @@ def f(self): return property(f) -class Period(object): +class Period(PandasObject): """ Represents an period of time @@ -272,28 +273,6 @@ def __repr__(self): return "Period('%s', '%s')" % (formatted, freqstr) - def __str__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if py3compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by bytes(df) in py3 only. - Yields a bytestring in both py2/py3. - """ - encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - def __unicode__(self): """ Return a string representation for a particular DataFrame @@ -303,9 +282,7 @@ def __unicode__(self): """ base, mult = _gfc(self.freq) formatted = tslib.period_format(self.ordinal, base) - value = (u"%s" % formatted) - assert type(value) == unicode - + value = ("%s" % formatted) return value