From 411d13f5f1dca9d3e2cdfeaaf53024e86dc1bb3a Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sun, 30 Jun 2013 21:44:40 -0400 Subject: [PATCH 1/5] CLN: Refactor string methods and add PandasObject Previous PandasObject becomes PandasContainer. New PandasObject becomes baseclass for more elements (like Index, Categorical, etc.), moves string methods to baseclass and subclassing objects need only define `__unicode__` methods to get all string methods for free (and Py2/3 compatible). CLN: Cleanup extraneous str methods from Panel CLN: Remove unnecessary string methods from frame CLN: Change name of TestPandasObjects --> TestPandasContainer --- pandas/core/base.py | 53 ++++++++++++++++++++++++ pandas/core/common.py | 8 ++-- pandas/core/frame.py | 30 -------------- pandas/core/generic.py | 19 +++++---- pandas/core/panel.py | 30 -------------- pandas/core/series.py | 33 +-------------- pandas/io/tests/test_json/test_pandas.py | 14 +++---- 7 files changed, 77 insertions(+), 110 deletions(-) create mode 100644 pandas/core/base.py diff --git a/pandas/core/base.py b/pandas/core/base.py new file mode 100644 index 0000000000000..f59c8c0ae2721 --- /dev/null +++ b/pandas/core/base.py @@ -0,0 +1,53 @@ +""" +Base class(es) for all pandas objects. +""" +from pandas.util import py3compat + +class StringMixin(object): + """implements string methods so long as object defines a `__unicode__` method. + Handles Python2/3 compatibility transparently.""" + # side note - this could be made into a metaclass if more than one object nees + def __str__(self): + """ + Return a string representation for a particular object. + + Invoked by str(obj) in both py2/py3. + Yields Bytestring in Py2, Unicode String in py3. + """ + + if py3compat.PY3: + return self.__unicode__() + return self.__bytes__() + + def __bytes__(self): + """ + Return a string representation for a particular object. + + Invoked by bytes(obj) in py3 only. + Yields a bytestring in both py2/py3. + """ + from pandas.core.config import get_option + + encoding = get_option("display.encoding") + return self.__unicode__().encode(encoding, 'replace') + + def __repr__(self): + """ + Return a string representation for a particular object. + + Yields Bytestring in Py2, Unicode String in py3. + """ + return str(self) + +class PandasObject(StringMixin): + """baseclass for various pandas objects""" + + def __unicode__(self): + """ + Return a string representation for a particular object. + + Invoked by unicode(obj) in py2 only. Yields a Unicode String in both + py2/py3. + """ + # Should be overwritten by base classes + return object.__repr__(self) diff --git a/pandas/core/common.py b/pandas/core/common.py index 96c567cbb6348..ddacb98a2ddf3 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -64,10 +64,10 @@ def _isnull_new(obj): if lib.isscalar(obj): return lib.checknull(obj) - from pandas.core.generic import PandasObject + from pandas.core.generic import PandasContainer if isinstance(obj, np.ndarray): return _isnull_ndarraylike(obj) - elif isinstance(obj, PandasObject): + elif isinstance(obj, PandasContainer): # TODO: optimize for DataFrame, etc. return obj.apply(isnull) elif isinstance(obj, list) or hasattr(obj, '__array__'): @@ -91,10 +91,10 @@ def _isnull_old(obj): if lib.isscalar(obj): return lib.checknull_old(obj) - from pandas.core.generic import PandasObject + from pandas.core.generic import PandasContainer if isinstance(obj, np.ndarray): return _isnull_ndarraylike_old(obj) - elif isinstance(obj, PandasObject): + elif isinstance(obj, PandasContainer): # TODO: optimize for DataFrame, etc. return obj.apply(_isnull_old) elif isinstance(obj, list) or hasattr(obj, '__array__'): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9c5108f747e44..da52e5c5e1395 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -653,28 +653,6 @@ def _repr_fits_horizontal_(self,ignore_width=False): return repr_width < width - def __str__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if py3compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by bytes(df) in py3 only. - Yields a bytestring in both py2/py3. - """ - encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - def __unicode__(self): """ Return a string representation for a particular DataFrame @@ -714,14 +692,6 @@ def __unicode__(self): return value - def __repr__(self): - """ - Return a string representation for a particular DataFrame - - Yields Bytestring in Py2, Unicode String in py3. - """ - return str(self) - def _repr_html_(self): """ Return a html representation for a particular DataFrame. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c41b02fdd7b22..0c392defc4fde 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1,20 +1,21 @@ # pylint: disable=W0231,E1101 import numpy as np +import pandas.lib as lib +from pandas.core.base import PandasObject from pandas.core.index import MultiIndex import pandas.core.indexing as indexing from pandas.core.indexing import _maybe_convert_indices from pandas.tseries.index import DatetimeIndex import pandas.core.common as com -import pandas.lib as lib class PandasError(Exception): pass -class PandasObject(object): +class PandasContainer(PandasObject): _AXIS_NUMBERS = { 'index': 0, @@ -52,6 +53,12 @@ def __hash__(self): raise TypeError('{0!r} objects are mutable, thus they cannot be' ' hashed'.format(self.__class__.__name__)) + def __unicode__(self): + # unicode representation based upon iterating over self + # (since, by definition, `PandasContainers` are iterable) + prepr = '[%s]' % ','.join(map(com.pprint_thing, self)) + return '%s(%s)' % (self.__class__.__name__, prepr) + #---------------------------------------------------------------------- # Axis name business @@ -578,9 +585,10 @@ def to_json(self, path_or_buf=None, orient=None, date_format='epoch', # install the indexerse for _name, _indexer in indexing.get_indexers_list(): - PandasObject._create_indexer(_name,_indexer) + PandasContainer._create_indexer(_name,_indexer) -class NDFrame(PandasObject): + +class NDFrame(PandasContainer): """ N-dimensional analogue of DataFrame. Store multi-dimensional in a size-mutable, labeled data structure @@ -633,9 +641,6 @@ def _constructor(self): def axes(self): return self._data.axes - def __repr__(self): - return 'NDFrame' - @property def values(self): return self._data.as_matrix() diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 92f69a7444aab..89623bbf230cd 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -466,28 +466,6 @@ def __invert__(self): #---------------------------------------------------------------------- # Magic methods - def __str__(self): - """ - Return a string representation for a particular Panel - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if py3compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular Panel - - Invoked by bytes(df) in py3 only. - Yields a bytestring in both py2/py3. - """ - encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - def __unicode__(self): """ Return a string representation for a particular Panel @@ -515,14 +493,6 @@ def axis_pretty(a): [class_name, dims] + [axis_pretty(a) for a in self._AXIS_ORDERS]) return output - def __repr__(self): - """ - Return a string representation for a particular Panel - - Yields Bytestring in Py2, Unicode String in py3. - """ - return str(self) - def __iter__(self): return iter(getattr(self, self._info_axis)) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7684acfe85470..0870446c75bb2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -394,8 +394,7 @@ def f(self, axis=0, dtype=None, out=None, skipna=True, level=None): #---------------------------------------------------------------------- # Series class - -class Series(pa.Array, generic.PandasObject): +class Series(generic.PandasContainer, pa.Array): """ One-dimensional ndarray with axis labels (including time series). Labels need not be unique but must be any hashable type. The object @@ -1096,28 +1095,6 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False): return df.reset_index(level=level, drop=drop) - def __str__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if py3compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by bytes(df) in py3 only. - Yields a bytestring in both py2/py3. - """ - encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - def __unicode__(self): """ Return a string representation for a particular DataFrame @@ -1142,14 +1119,6 @@ def __unicode__(self): raise AssertionError() return result - def __repr__(self): - """ - Return a string representation for a particular Series - - Yields Bytestring in Py2, Unicode String in py3. - """ - return str(self) - def _tidy_repr(self, max_vals=20): """ diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index 997229487e1b9..e57eacc80647f 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -33,7 +33,7 @@ _mixed_frame = _frame.copy() -class TestPandasObjects(unittest.TestCase): +class TestPandasContainer(unittest.TestCase): def setUp(self): self.ts = tm.makeTimeSeries() @@ -68,7 +68,7 @@ def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_ if type(detail) == raise_ok: return raise - + unser = unser.sort() if dtype is False: @@ -104,7 +104,7 @@ def _check_all_orients(df, dtype=None, convert_axes=True, raise_ok=None): _check_orient(df, "split", dtype=dtype) _check_orient(df, "index", dtype=dtype) _check_orient(df, "values", dtype=dtype) - + _check_orient(df, "columns", dtype=dtype, convert_axes=False) _check_orient(df, "records", dtype=dtype, convert_axes=False) _check_orient(df, "split", dtype=dtype, convert_axes=False) @@ -347,7 +347,7 @@ def test_convert_dates(self): assert_series_equal(result,ts) def test_date_format(self): - + df = self.tsframe.copy() df['date'] = Timestamp('20130101') df_orig = df.copy() @@ -412,7 +412,7 @@ def test_misc_example(self): @network @slow def test_round_trip_exception_(self): - # GH 3867 + # GH 3867 df = pd.read_csv('https://raw.github.com/hayd/lahman2012/master/csvs/Teams.csv') s = df.to_json() @@ -429,9 +429,9 @@ def test_url(self): result = read_json(url,convert_dates=True) for c in ['created_at','closed_at','updated_at']: self.assert_(result[c].dtype == 'datetime64[ns]') - + url = 'http://search.twitter.com/search.json?q=pandas%20python' result = read_json(url) - + except urllib2.URLError: raise nose.SkipTest From 0cf93aa8b6ff1af83a460749b8092026d76b14e3 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sun, 30 Jun 2013 21:50:11 -0400 Subject: [PATCH 2/5] CLN: Make more core objects inherit PandasObject CLN: Make Categorical inherit from PandasObject CLN: Make GroupBy inherit from PandasObject CLN/ENH: Make Sparse* into PandasObjects Plus get all the string methods working... CLN: Index now a PandasObject + str method cleanup CLN: Make tseries/index fit with PandasObject. CLN: Use PandasObject in internals + cleanup CLN: Make Period into a PandasObject + cleanup CLN: Remove extraneous __repr__ from io/excel --- pandas/core/categorical.py | 12 ++++---- pandas/core/groupby.py | 7 ++++- pandas/core/index.py | 63 ++------------------------------------ pandas/core/internals.py | 15 +++++---- pandas/io/excel.py | 3 -- pandas/sparse/array.py | 10 +++--- pandas/sparse/list.py | 8 +++-- pandas/sparse/series.py | 5 +-- pandas/tseries/index.py | 4 +-- pandas/tseries/period.py | 29 ++---------------- 10 files changed, 38 insertions(+), 118 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 916bb2deb417e..b25a027adedd9 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -3,6 +3,7 @@ import numpy as np from pandas.core.algorithms import factorize +from pandas.core.base import PandasObject from pandas.core.index import Index import pandas.core.common as com from pandas.core.frame import DataFrame @@ -25,8 +26,7 @@ def f(self, other): return f - -class Categorical(object): +class Categorical(PandasObject): """ Represents a categorical variable in classic R / S-plus fashion @@ -134,9 +134,9 @@ def __array__(self, dtype=None): def __len__(self): return len(self.labels) - def __repr__(self): + def __unicode__(self): temp = 'Categorical: %s\n%s\n%s' - values = np.asarray(self) + values = com.pprint_thing(np.asarray(self)) levheader = 'Levels (%d): ' % len(self.levels) levstring = np.array_repr(self.levels, max_line_width=60) @@ -145,9 +145,9 @@ def __repr__(self): lines = levstring.split('\n') levstring = '\n'.join([lines[0]] + [indent + x.lstrip() for x in lines[1:]]) + name = '' if self.name is None else self.name + return temp % (name, values, levheader + levstring) - return temp % ('' if self.name is None else self.name, - repr(values), levheader + levstring) def __getitem__(self, key): if isinstance(key, (int, np.integer)): diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 9bd7923f6ec14..cc0a2b7589bb6 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2,6 +2,7 @@ import types import numpy as np +from pandas.core.base import PandasObject from pandas.core.categorical import Categorical from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame @@ -100,7 +101,7 @@ def _last(x): return _last(x) -class GroupBy(object): +class GroupBy(PandasObject): """ Class for grouping and aggregating relational data. See aggregate, transform, and apply functions on this object. @@ -201,6 +202,10 @@ def __init__(self, obj, keys=None, axis=0, level=None, def __len__(self): return len(self.indices) + def __unicode__(self): + # TODO: Better unicode/repr for GroupBy object + return object.__repr__(self) + @property def groups(self): return self.grouper.groups diff --git a/pandas/core/index.py b/pandas/core/index.py index 43b172c6ecde9..0e15f9ee3c134 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -9,6 +9,7 @@ import pandas.algos as _algos import pandas.index as _index from pandas.lib import Timestamp +from pandas.core.base import PandasObject from pandas.util.decorators import cache_readonly from pandas.core.common import isnull @@ -47,7 +48,7 @@ def _shouldbe_timestamp(obj): or tslib.is_timestamp_array(obj)) -class Index(np.ndarray): +class Index(PandasObject, np.ndarray): """ Immutable ndarray implementing an ordered, sliceable set. The basic object storing axis labels for all pandas objects @@ -142,28 +143,6 @@ def __array_finalize__(self, obj): def _shallow_copy(self): return self.view() - def __str__(self): - """ - Return a string representation for a particular Index - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if py3compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular Index - - Invoked by bytes(df) in py3 only. - Yields a bytestring in both py2/py3. - """ - encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - def __unicode__(self): """ Return a string representation for a particular Index @@ -173,14 +152,6 @@ def __unicode__(self): prepr = com.pprint_thing(self, escape_chars=('\t', '\r', '\n'),quote_strings=True) return '%s(%s, dtype=%s)' % (type(self).__name__, prepr, self.dtype) - def __repr__(self): - """ - Return a string representation for a particular Index - - Yields Bytestring in Py2, Unicode String in py3. - """ - return str(self) - def to_series(self): """ return a series with both index and values equal to the index keys @@ -1531,28 +1502,6 @@ def _array_values(self): def dtype(self): return np.dtype('O') - def __str__(self): - """ - Return a string representation for a particular Index - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if py3compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular Index - - Invoked by bytes(df) in py3 only. - Yields a bytestring in both py2/py3. - """ - encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - def __unicode__(self): """ Return a string representation for a particular Index @@ -1566,14 +1515,6 @@ def __unicode__(self): return output % summary - def __repr__(self): - """ - Return a string representation for a particular Index - - Yields Bytestring in Py2, Unicode String in py3. - """ - return str(self) - def __len__(self): return len(self.labels[0]) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 7a6a13da302d1..57be20a50f7bc 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -4,6 +4,7 @@ from numpy import nan import numpy as np +from pandas.core.base import PandasObject from pandas.core.common import (_possibly_downcast_to_dtype, isnull, _NS_DTYPE, _TD_DTYPE) @@ -19,7 +20,7 @@ from pandas.util import py3compat -class Block(object): +class Block(PandasObject): """ Canonical n-dimensional unit of homogeneous dtype contained in a pandas data structure @@ -91,14 +92,12 @@ def set_ref_items(self, ref_items, maybe_rename=True): self.items = ref_items.take(self.ref_locs) self.ref_items = ref_items - def __repr__(self): + def __unicode__(self): shape = ' x '.join([com.pprint_thing(s) for s in self.shape]) name = type(self).__name__ result = '%s: %s, %s, dtype %s' % ( name, com.pprint_thing(self.items), shape, self.dtype) - if py3compat.PY3: - return unicode(result) - return com.console_encode(result) + return result def __contains__(self, item): return item in self.items @@ -969,7 +968,7 @@ def make_block(values, items, ref_items, klass=None, fastpath=False, placement=N # TODO: flexible with index=None and/or items=None -class BlockManager(object): +class BlockManager(PandasObject): """ Core internal data structure to implement DataFrame @@ -1213,7 +1212,7 @@ def __setstate__(self, state): def __len__(self): return len(self.items) - def __repr__(self): + def __unicode__(self): output = 'BlockManager' for i, ax in enumerate(self.axes): if i == 0: @@ -1222,7 +1221,7 @@ def __repr__(self): output += '\nAxis %d: %s' % (i, ax) for block in self.blocks: - output += '\n%s' % repr(block) + output += '\n%s' % com.pprint_thing(block) return output @property diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 95702847d9c7f..f61db447f2cfc 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -73,9 +73,6 @@ def __init__(self, path_or_buf, kind=None, **kwds): data = path_or_buf.read() self.book = xlrd.open_workbook(file_contents=data) - def __repr__(self): - return object.__repr__(self) - def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, index_col=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None, diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index de510aa155412..48fa9caa0a05c 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -8,6 +8,7 @@ import numpy as np import operator +from pandas.core.base import PandasObject import pandas.core.common as com from pandas.util import py3compat @@ -86,8 +87,7 @@ def _sparse_fillop(this, other, name): return result, result_index - -class SparseArray(np.ndarray): +class SparseArray(PandasObject, np.ndarray): """Data structure for labeled, sparse floating point data Parameters @@ -184,9 +184,9 @@ def __setstate__(self, state): def __len__(self): return self.sp_index.length - def __repr__(self): - return '%s\n%s' % (np.ndarray.__repr__(self), - repr(self.sp_index)) + def __unicode__(self): + return '%s\n%s' % (com.pprint_thing(self), + com.pprint_thing(self.sp_index)) # Arithmetic operators diff --git a/pandas/sparse/list.py b/pandas/sparse/list.py index 9f59b9108a6b0..ceb03eae5d282 100644 --- a/pandas/sparse/list.py +++ b/pandas/sparse/list.py @@ -1,10 +1,12 @@ import numpy as np +from pandas.core.base import PandasObject +from pandas.core.common import pprint_thing from pandas.sparse.array import SparseArray import pandas._sparse as splib -class SparseList(object): +class SparseList(PandasObject): """ Data structure for accumulating data to be converted into a SparseArray. Has similar API to the standard Python list @@ -21,9 +23,9 @@ def __init__(self, data=None, fill_value=np.nan): if data is not None: self.append(data) - def __repr__(self): + def __unicode__(self): contents = '\n'.join(repr(c) for c in self._chunks) - return '%s\n%s' % (object.__repr__(self), contents) + return '%s\n%s' % (object.__repr__(self), pprint_thing(contents)) def __len__(self): return sum(len(c) for c in self._chunks) diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 1b8d3541da289..802808954c8f4 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -241,8 +241,9 @@ def __setstate__(self, state): def __len__(self): return self.sp_index.length - def __repr__(self): - series_rep = Series.__repr__(self) + def __unicode__(self): + # currently, unicode is same as repr...fixes infinite loop + series_rep = Series.__unicode__(self) rep = '%s\n%s' % (series_rep, repr(self.sp_index)) return rep diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 56df301b5b027..7fdb6d9d2603d 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -488,7 +488,7 @@ def _mpl_repr(self): # how to represent ourselves to matplotlib return tslib.ints_to_pydatetime(self.asi8, self.tz) - def __repr__(self): + def __unicode__(self): from pandas.core.format import _format_datetime64 values = self.values @@ -514,8 +514,6 @@ def __repr__(self): return summary - __str__ = __repr__ - def __reduce__(self): """Necessary for making this object picklable""" object_state = list(np.ndarray.__reduce__(self)) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 34c640392bda9..ac79fbd6bfb37 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -3,6 +3,7 @@ from datetime import datetime, date import numpy as np +from pandas.core.base import PandasObject import pandas.tseries.offsets as offsets from pandas.tseries.frequencies import (get_freq_code as _gfc, @@ -40,7 +41,7 @@ def f(self): return property(f) -class Period(object): +class Period(PandasObject): """ Represents an period of time @@ -272,28 +273,6 @@ def __repr__(self): return "Period('%s', '%s')" % (formatted, freqstr) - def __str__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if py3compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by bytes(df) in py3 only. - Yields a bytestring in both py2/py3. - """ - encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - def __unicode__(self): """ Return a string representation for a particular DataFrame @@ -303,9 +282,7 @@ def __unicode__(self): """ base, mult = _gfc(self.freq) formatted = tslib.period_format(self.ordinal, base) - value = (u"%s" % formatted) - assert type(value) == unicode - + value = ("%s" % formatted) return value From 7222e5ab78569cea4f50d57886979faa8ba9ef61 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sun, 30 Jun 2013 21:52:41 -0400 Subject: [PATCH 3/5] CLN: Have PyTables, stats, & Stata use StringMixin CLN: Make PyTables unicode safe + add StringMixin CLN: Make StataMissingValue use StringMixin ENH: Use StringMixin for addl string methods in stats --- pandas/io/pytables.py | 46 ++++++++++++++++-------------------- pandas/io/stata.py | 11 +++++---- pandas/stats/fama_macbeth.py | 5 ++-- pandas/stats/ols.py | 5 ++-- pandas/stats/var.py | 6 ++--- 5 files changed, 37 insertions(+), 36 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5bf309edffa74..fdb86c43b7160 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -17,7 +17,8 @@ from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel from pandas.sparse.array import BlockIndex, IntIndex from pandas.tseries.api import PeriodIndex, DatetimeIndex -from pandas.core.common import adjoin, is_list_like +from pandas.core.base import StringMixin +from pandas.core.common import adjoin, is_list_like, pprint_thing from pandas.core.algorithms import match, unique from pandas.core.categorical import Categorical from pandas.core.common import _asarray_tuplesafe @@ -218,7 +219,7 @@ def read_hdf(path_or_buf, key, **kwargs): # a passed store; user controls open/close f(path_or_buf, False) -class HDFStore(object): +class HDFStore(StringMixin): """ dict-like IO interface for storing pandas objects in PyTables format. @@ -315,8 +316,8 @@ def __contains__(self, key): def __len__(self): return len(self.groups()) - def __repr__(self): - output = '%s\nFile path: %s\n' % (type(self), self._path) + def __unicode__(self): + output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) if len(self.keys()): keys = [] @@ -326,11 +327,11 @@ def __repr__(self): try: s = self.get_storer(k) if s is not None: - keys.append(str(s.pathname or k)) - values.append(str(s or 'invalid_HDFStore node')) - except (Exception), detail: + keys.append(pprint_thing(s.pathname or k)) + values.append(pprint_thing(s or 'invalid_HDFStore node')) + except Exception as detail: keys.append(k) - values.append("[invalid_HDFStore node: %s]" % str(detail)) + values.append("[invalid_HDFStore node: %s]" % pprint_thing(detail)) output += adjoin(12, keys, values) else: @@ -984,7 +985,7 @@ def get_values(self): self.close() return results -class IndexCol(object): +class IndexCol(StringMixin): """ an index column description class Parameters @@ -1050,10 +1051,9 @@ def set_table(self, table): self.table = table return self - def __repr__(self): - return "name->%s,cname->%s,axis->%s,pos->%s,kind->%s" % (self.name, self.cname, self.axis, self.pos, self.kind) - - __str__ = __repr__ + def __unicode__(self): + temp = tuple(map(pprint_thing, (self.name, self.cname, self.axis, self.pos, self.kind))) + return "name->%s,cname->%s,axis->%s,pos->%s,kind->%s" % temp def __eq__(self, other): """ compare 2 col items """ @@ -1570,7 +1570,7 @@ class GenericDataIndexableCol(DataIndexableCol): def get_attr(self): pass -class Storer(object): +class Storer(StringMixin): """ represent an object in my store facilitate read/write of various types of objects this is an abstract base class @@ -1610,19 +1610,16 @@ def set_version(self): def pandas_type(self): return _ensure_decoded(getattr(self.group._v_attrs, 'pandas_type', None)) - def __repr__(self): - """ return a pretty representatgion of myself """ + def __unicode__(self): + """ return a pretty representation of myself """ self.infer_axes() s = self.shape if s is not None: if isinstance(s, (list,tuple)): - s = "[%s]" % ','.join([ str(x) for x in s ]) + s = "[%s]" % ','.join([pprint_thing(x) for x in s]) return "%-12.12s (shape->%s)" % (self.pandas_type,s) return self.pandas_type - def __str__(self): - return self.__repr__() - def set_object_info(self): """ set my pandas type & version """ self.attrs.pandas_type = self.pandas_kind @@ -3435,7 +3432,7 @@ def _need_convert(kind): return True return False -class Term(object): +class Term(StringMixin): """create a term object that holds a field, op, and value Parameters @@ -3540,10 +3537,9 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None): if len(self.q): self.eval() - def __str__(self): - return "field->%s,op->%s,value->%s" % (self.field, self.op, self.value) - - __repr__ = __str__ + def __unicode__(self): + attrs = map(pprint_thing, (self.field, self.op, self.value)) + return "field->%s,op->%s,value->%s" % tuple(attrs) @property def is_valid(self): diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 632e97c24721f..603924ac6a292 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -15,6 +15,7 @@ import sys import struct +from pandas.core.base import StringMixin from pandas.core.frame import DataFrame from pandas.core.series import Series from pandas.core.categorical import Categorical @@ -163,7 +164,7 @@ def _datetime_to_stata_elapsed(date, fmt): raise ValueError("fmt %s not understood" % fmt) -class StataMissingValue(object): +class StataMissingValue(StringMixin): """ An observation's missing value. @@ -192,10 +193,12 @@ def __init__(self, offset, value): string = property(lambda self: self._str, doc="The Stata representation of the missing value: '.', '.a'..'.z'") value = property(lambda self: self._value, doc='The binary representation of the missing value.') - def __str__(self): - return self._str + def __unicode__(self): + return self.string - __str__.__doc__ = string.__doc__ + def __repr__(self): + # not perfect :-/ + return "%s(%s)" % (self.__class__, self) class StataParser(object): diff --git a/pandas/stats/fama_macbeth.py b/pandas/stats/fama_macbeth.py index b75029c615735..967199c0bcf69 100644 --- a/pandas/stats/fama_macbeth.py +++ b/pandas/stats/fama_macbeth.py @@ -1,3 +1,4 @@ +from pandas.core.base import StringMixin from pandas.util.py3compat import StringIO import numpy as np @@ -26,7 +27,7 @@ def fama_macbeth(**kwargs): return klass(**kwargs) -class FamaMacBeth(object): +class FamaMacBeth(StringMixin): def __init__(self, y, x, intercept=True, nw_lags=None, nw_lags_beta=None, entity_effects=False, time_effects=False, x_effects=None, @@ -114,7 +115,7 @@ def _coef_table(self): return buffer.getvalue() - def __repr__(self): + def __unicode__(self): return self.summary @cache_readonly diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index cdcf1ab2ab036..742d832a923d8 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -10,6 +10,7 @@ import numpy as np from pandas.core.api import DataFrame, Series, isnull +from pandas.core.base import StringMixin from pandas.core.common import _ensure_float64 from pandas.core.index import MultiIndex from pandas.core.panel import Panel @@ -22,7 +23,7 @@ _FP_ERR = 1e-8 -class OLS(object): +class OLS(StringMixin): """ Runs a full sample ordinary least squares regression. @@ -581,7 +582,7 @@ def summary(self): return template % params - def __repr__(self): + def __unicode__(self): return self.summary @cache_readonly diff --git a/pandas/stats/var.py b/pandas/stats/var.py index e993b60e18a39..8953f7badfefb 100644 --- a/pandas/stats/var.py +++ b/pandas/stats/var.py @@ -1,7 +1,7 @@ from __future__ import division import numpy as np - +from pandas.core.base import StringMixin from pandas.util.decorators import cache_readonly from pandas.core.frame import DataFrame from pandas.core.panel import Panel @@ -11,7 +11,7 @@ from pandas.stats.ols import _combine_rhs -class VAR(object): +class VAR(StringMixin): """ Estimates VAR(p) regression on multivariate time series data presented in pandas data structures. @@ -477,7 +477,7 @@ def _sigma(self): return np.dot(resid, resid.T) / (n - k) - def __repr__(self): + def __unicode__(self): return self.summary From 8468b13b934d46d09c40175646f58bc2609d6ac6 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sun, 30 Jun 2013 22:15:25 -0400 Subject: [PATCH 4/5] DOC: New class hierarchy + StringMixin DOC/CLN: Remove extra whitespace from v0.12.0.txt DOC: Add PR issue number too DOC: Fix spelling error --- doc/source/release.rst | 12 +++++++++- doc/source/v0.12.0.txt | 54 ++++++++++++++++++++++++++---------------- 2 files changed, 44 insertions(+), 22 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 691c7312dde72..facf753ced9a0 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -175,8 +175,18 @@ pandas 0.12 ``bs4`` + ``html5lib`` when lxml fails to parse. a list of parsers to try until success is also valid - more consistency in the to_datetime return types (give string/array of string inputs) (:issue:`3888`) + - The internal ``pandas`` class hierarchy has changed (slightly). The + previous ``PandasObject`` now is called ``PandasContainer`` and a new + ``PandasObject`` has become the baseclass for ``PandasContainer`` as well + as ``Index``, ``Categorical``, ``GroupBy``, ``SparseList``, and + ``SparseArray`` (+ their base classes). Currently, ``PandasObject`` + provides string methods (from ``StringMixin``). (:issue:`4090`, :issue:`4092`) + - New ``StringMixin`` that, given a ``__unicode__`` method, gets python 2 and + python 3 compatible string methods (``__str__``, ``__bytes__``, and + ``__repr__``). Plus string safety throughout. Now employed in many places + throughout the pandas library. (:issue:`4090`, :issue:`4092`) -**Experimental Feautres** +**Experimental Features** - Added experimental ``CustomBusinessDay`` class to support ``DateOffsets`` with custom holiday calendars and custom weekmasks. (:issue:`2301`) diff --git a/doc/source/v0.12.0.txt b/doc/source/v0.12.0.txt index 60086e1c49ae7..f8836b4532493 100644 --- a/doc/source/v0.12.0.txt +++ b/doc/source/v0.12.0.txt @@ -8,13 +8,13 @@ enhancements along with a large number of bug fixes. Highlites include a consistent I/O API naming scheme, routines to read html, write multi-indexes to csv files, read & write STATA data files, read & write JSON format -files, Python 3 support for ``HDFStore``, filtering of groupby expressions via ``filter``, and a +files, Python 3 support for ``HDFStore``, filtering of groupby expressions via ``filter``, and a revamped ``replace`` routine that accepts regular expressions. API changes ~~~~~~~~~~~ - - The I/O API is now much more consistent with a set of top level ``reader`` functions + - The I/O API is now much more consistent with a set of top level ``reader`` functions accessed like ``pd.read_csv()`` that generally return a ``pandas`` object. * ``read_csv`` @@ -38,7 +38,7 @@ API changes * ``to_clipboard`` - - Fix modulo and integer division on Series,DataFrames to act similary to ``float`` dtypes to return + - Fix modulo and integer division on Series,DataFrames to act similary to ``float`` dtypes to return ``np.nan`` or ``np.inf`` as appropriate (:issue:`3590`). This correct a numpy bug that treats ``integer`` and ``float`` dtypes differently. @@ -50,15 +50,15 @@ API changes p / p p / 0 - - Add ``squeeze`` keyword to ``groupby`` to allow reduction from + - Add ``squeeze`` keyword to ``groupby`` to allow reduction from DataFrame -> Series if groups are unique. This is a Regression from 0.10.1. - We are reverting back to the prior behavior. This means groupby will return the - same shaped objects whether the groups are unique or not. Revert this issue (:issue:`2893`) + We are reverting back to the prior behavior. This means groupby will return the + same shaped objects whether the groups are unique or not. Revert this issue (:issue:`2893`) with (:issue:`3596`). .. ipython:: python - df2 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, + df2 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, {"val1":1, "val2": 27}, {"val1":1, "val2": 12}]) def func(dataf): return dataf["val2"] - dataf["val2"].mean() @@ -96,9 +96,9 @@ API changes and thus you should cast to an appropriate numeric dtype if you need to plot something. - - Add ``colormap`` keyword to DataFrame plotting methods. Accepts either a - matplotlib colormap object (ie, matplotlib.cm.jet) or a string name of such - an object (ie, 'jet'). The colormap is sampled to select the color for each + - Add ``colormap`` keyword to DataFrame plotting methods. Accepts either a + matplotlib colormap object (ie, matplotlib.cm.jet) or a string name of such + an object (ie, 'jet'). The colormap is sampled to select the color for each column. Please see :ref:`visualization.colormaps` for more information. (:issue:`3860`) @@ -159,6 +159,18 @@ API changes ``bs4`` + ``html5lib`` when lxml fails to parse. a list of parsers to try until success is also valid + - The internal ``pandas`` class hierarchy has changed (slightly). The + previous ``PandasObject`` now is called ``PandasContainer`` and a new + ``PandasObject`` has become the baseclass for ``PandasContainer`` as well + as ``Index``, ``Categorical``, ``GroupBy``, ``SparseList``, and + ``SparseArray`` (+ their base classes). Currently, ``PandasObject`` + provides string methods (from ``StringMixin``). (:issue:`4090`, :issue:`4092`) + + - New ``StringMixin`` that, given a ``__unicode__`` method, gets python 2 and + python 3 compatible string methods (``__str__``, ``__bytes__``, and + ``__repr__``). Plus string safety throughout. Now employed in many places + throughout the pandas library. (:issue:`4090`, :issue:`4092`) + I/O Enhancements ~~~~~~~~~~~~~~~~ @@ -184,7 +196,7 @@ I/O Enhancements .. warning:: - You may have to install an older version of BeautifulSoup4, + You may have to install an older version of BeautifulSoup4, :ref:`See the installation docs` - Added module for reading and writing Stata files: ``pandas.io.stata`` (:issue:`1512`) @@ -203,15 +215,15 @@ I/O Enhancements - The option, ``tupleize_cols`` can now be specified in both ``to_csv`` and ``read_csv``, to provide compatiblity for the pre 0.12 behavior of writing and reading multi-index columns via a list of tuples. The default in - 0.12 is to write lists of tuples and *not* interpret list of tuples as a - multi-index column. + 0.12 is to write lists of tuples and *not* interpret list of tuples as a + multi-index column. Note: The default behavior in 0.12 remains unchanged, but starting with 0.13, - the default *to* write and read multi-index columns will be in the new + the default *to* write and read multi-index columns will be in the new format. (:issue:`3571`, :issue:`1651`, :issue:`3141`) - If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it - with ``df.to_csv(..., index=False``), then any ``names`` on the columns index will + with ``df.to_csv(..., index=False``), then any ``names`` on the columns index will be *lost*. .. ipython:: python @@ -296,8 +308,8 @@ Other Enhancements pd.get_option('a.b') pd.get_option('b.c') - - The ``filter`` method for group objects returns a subset of the original - object. Suppose we want to take only elements that belong to groups with a + - The ``filter`` method for group objects returns a subset of the original + object. Suppose we want to take only elements that belong to groups with a group sum greater than 2. .. ipython:: python @@ -317,7 +329,7 @@ Other Enhancements dff.groupby('B').filter(lambda x: len(x) > 2) Alternatively, instead of dropping the offending groups, we can return a - like-indexed objects where the groups that do not pass the filter are + like-indexed objects where the groups that do not pass the filter are filled with NaNs. .. ipython:: python @@ -333,9 +345,9 @@ Experimental Features - Added experimental ``CustomBusinessDay`` class to support ``DateOffsets`` with custom holiday calendars and custom weekmasks. (:issue:`2301`) - + .. note:: - + This uses the ``numpy.busdaycalendar`` API introduced in Numpy 1.7 and therefore requires Numpy 1.7.0 or newer. @@ -416,7 +428,7 @@ Bug Fixes - Extend ``reindex`` to correctly deal with non-unique indices (:issue:`3679`) - ``DataFrame.itertuples()`` now works with frames with duplicate column names (:issue:`3873`) - - Bug in non-unique indexing via ``iloc`` (:issue:`4017`); added ``takeable`` argument to + - Bug in non-unique indexing via ``iloc`` (:issue:`4017`); added ``takeable`` argument to ``reindex`` for location-based taking - ``DataFrame.from_records`` did not accept empty recarrays (:issue:`3682`) From a5583141404ddf91516e70dade7942c315cb3646 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Mon, 1 Jul 2013 08:06:47 -0400 Subject: [PATCH 5/5] CLN: Move _constructor checks to PandasObject base --- pandas/core/base.py | 5 +++++ pandas/core/frame.py | 4 ---- pandas/core/generic.py | 4 ---- pandas/core/index.py | 8 -------- pandas/core/panel.py | 4 ---- pandas/core/series.py | 4 ---- 6 files changed, 5 insertions(+), 24 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index f59c8c0ae2721..6122e78fa8bce 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -42,6 +42,11 @@ def __repr__(self): class PandasObject(StringMixin): """baseclass for various pandas objects""" + @property + def _constructor(self): + """class constructor (for this class it's just `__class__`""" + return self.__class__ + def __unicode__(self): """ Return a string representation for a particular object. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index da52e5c5e1395..5fe2d60993f2c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -584,10 +584,6 @@ def _verbose_info(self, value): def axes(self): return [self.index, self.columns] - @property - def _constructor(self): - return self.__class__ - @property def shape(self): return (len(self.index), len(self.columns)) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0c392defc4fde..6be5f456b50e6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -633,10 +633,6 @@ def astype(self, dtype, copy = True, raise_on_error = True): mgr = self._data.astype(dtype, copy = copy, raise_on_error = raise_on_error) return self._constructor(mgr) - @property - def _constructor(self): - return NDFrame - @property def axes(self): return self._data.axes diff --git a/pandas/core/index.py b/pandas/core/index.py index 0e15f9ee3c134..a3aa0804bcfe2 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -208,10 +208,6 @@ def _set_names(self, values): names = property(fset=_set_names, fget=_get_names) - @property - def _constructor(self): - return Index - @property def _has_complex_internals(self): # to disable groupby tricks in MultiIndex @@ -1379,10 +1375,6 @@ def __new__(cls, data, dtype=None, copy=False, name=None): def inferred_type(self): return 'integer' - @property - def _constructor(self): - return Int64Index - @property def asi8(self): # do not cache or you'll create a memory leak diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 89623bbf230cd..d33f7144c27b0 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -186,10 +186,6 @@ class Panel(NDFrame): major_axis = lib.AxisProperty(1) minor_axis = lib.AxisProperty(2) - @property - def _constructor(self): - return type(self) - # return the type of the slice constructor _constructor_sliced = DataFrame diff --git a/pandas/core/series.py b/pandas/core/series.py index 0870446c75bb2..5ea029b414fef 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -519,10 +519,6 @@ def __init__(self, data=None, index=None, dtype=None, name=None, copy=False): pass - @property - def _constructor(self): - return Series - @property def _can_hold_na(self): return not is_integer_dtype(self.dtype)