From c7dcef894c61521fca2015caa5dd71081c1a5807 Mon Sep 17 00:00:00 2001 From: jreback Date: Wed, 20 Feb 2013 12:24:41 -0500 Subject: [PATCH 1/6] ENH/CLN: refactor of common code from frame/panel to generic.py axis creation routines now commonized under _setup_axes --- RELEASE.rst | 16 + pandas/core/frame.py | 179 +---------- pandas/core/generic.py | 591 +++++++++++++++++++++++++++-------- pandas/core/groupby.py | 2 +- pandas/core/indexing.py | 16 +- pandas/core/internals.py | 10 +- pandas/core/panel.py | 284 +++-------------- pandas/core/panel4d.py | 80 ++--- pandas/core/series.py | 6 +- pandas/sparse/frame.py | 8 +- pandas/sparse/panel.py | 3 +- pandas/sparse/series.py | 1 - pandas/tests/test_ndframe.py | 15 - pandas/tests/test_panel.py | 6 +- pandas/tests/test_panel4d.py | 6 +- 15 files changed, 592 insertions(+), 631 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 3dbfa080021e3..83bdca94151a1 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -130,6 +130,22 @@ pandas 0.11.0 - arguments to DataFrame.clip were inconsistent to numpy and Series clipping (GH2747_) + - Refactor of series.py/frame.py/panel.py to move common code to generic.py + all axis creation and manipulation code is now common (except for Series) + + - added _setup_axes to created generic NDFrame structures + - moved methods + + - from_axes,_wrap_array,axes,ix,shape,empty,swapaxes,transpose,pop + - __str__,__bytes__,__repr__ + - __iter__,keys,__contains__,__len__,__neg__,__invert__ + - convert_objects,as_blocks + - _indexed_same + + - swapaxes on a Panel with the same axes specified now return a copy + (consistent with DataFrame) + + **Bug Fixes** diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6ef2ad642612c..0b3a902ec017b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -332,16 +332,13 @@ def f(self, other): class DataFrame(NDFrame): _auto_consolidate = True - _het_axis = 1 - _info_axis = 'columns' - _col_klass = Series + _verbose_info = True - _AXIS_NUMBERS = { - 'index': 0, - 'columns': 1 - } + @property + def _constructor(self): + return DataFrame - _AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems()) + _constructor_sliced = Series def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): @@ -449,15 +446,6 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, NDFrame.__init__(self, mgr) - @classmethod - def _from_axes(cls, data, axes): - # for construction from BlockManager - if isinstance(data, BlockManager): - return cls(data) - else: - columns, index = axes - return cls(data, index=index, columns=columns, copy=False) - def _init_mgr(self, mgr, index, columns, dtype=None, copy=False): if columns is not None: mgr = mgr.reindex_axis(columns, axis=0, copy=False) @@ -556,17 +544,6 @@ def _init_ndarray(self, values, index, columns, dtype=None, block = make_block(values.T, columns, columns) return BlockManager([block], [columns, index]) - def _wrap_array(self, arr, axes, copy=False): - index, columns = axes - return self._constructor(arr, index=index, columns=columns, copy=copy) - - @property - def _verbose_info(self): - import warnings - warnings.warn('The _verbose_info property will be removed in version ' - '0.12', FutureWarning) - return get_option('display.max_info_rows') is None - @_verbose_info.setter def _verbose_info(self, value): import warnings @@ -588,13 +565,8 @@ def _constructor(self): def shape(self): return (len(self.index), len(self.columns)) - #---------------------------------------------------------------------- # Class behavior - @property - def empty(self): - return not (len(self.columns) > 0 and len(self.index) > 0) - def __nonzero__(self): raise ValueError("Cannot call bool() on DataFrame.") @@ -637,28 +609,6 @@ def _need_info_repr_(self): else: return False - def __str__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if py3compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular DataFrame - - Invoked by bytes(df) in py3 only. - Yields a bytestring in both py2/py3. - """ - encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - def __unicode__(self): """ Return a string representation for a particular DataFrame @@ -687,14 +637,6 @@ def _need_wide_repr(self): return (get_option("display.expand_frame_repr") and com.in_interactive_session()) - def __repr__(self): - """ - Return a string representation for a particular DataFrame - - Yields Bytestring in Py2, Unicode String in py3. - """ - return str(self) - def _repr_html_(self): """ Return a html representation for a particular DataFrame. @@ -713,15 +655,6 @@ def _repr_html_(self): else: return None - def __iter__(self): - """ - Iterate over columns of the frame. - """ - return iter(self.columns) - - def keys(self): - return self.columns - def iteritems(self): """Iterator over (column, series) pairs""" if self.columns.is_unique and hasattr(self, '_item_cache'): @@ -758,13 +691,9 @@ def itertuples(self, index=True): items = iteritems def __len__(self): - """Returns length of index""" + """ we are reversed, so shortcut this here """ return len(self.index) - def __contains__(self, key): - """True if DataFrame has this column""" - return key in self.columns - #---------------------------------------------------------------------- # Arithmetic methods @@ -810,14 +739,6 @@ def __contains__(self, key): __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__', default_axis=None) - def __neg__(self): - arr = operator.neg(self.values) - return self._wrap_array(arr, self.axes, copy=False) - - def __invert__(self): - arr = operator.inv(self.values) - return self._wrap_array(arr, self.axes, copy=False) - # Comparison methods __eq__ = _comp_method(operator.eq, '__eq__', '==') __ne__ = _comp_method(operator.ne, '__ne__', '!=') @@ -1596,12 +1517,6 @@ def convert_objects(self, convert_dates=True, convert_numeric=False): """ return self._constructor(self._data.convert(convert_dates=convert_dates, convert_numeric=convert_numeric)) - #---------------------------------------------------------------------- - # properties for index and columns - - columns = lib.AxisProperty(0) - index = lib.AxisProperty(1) - def as_matrix(self, columns=None): """ Convert the frame to its Numpy-array matrix representation. Columns @@ -1630,62 +1545,11 @@ def as_matrix(self, columns=None): self._consolidate_inplace() return self._data.as_matrix(columns).T - values = property(fget=as_matrix) - - def as_blocks(self, columns=None): - """ - Convert the frame to a dict of dtype -> DataFrames that each has a homogeneous dtype. - are presented in sorted order unless a specific list of columns is - provided. - - NOTE: the dtypes of the blocks WILL BE PRESERVED HERE (unlike in as_matrix) - - Parameters - ---------- - columns : array-like - Specific column order - - Returns - ------- - values : a list of DataFrames - """ - self._consolidate_inplace() - - bd = dict() - for b in self._data.blocks: - b = b.reindex_items_from(columns or b.items) - bd[str(b.dtype)] = DataFrame(BlockManager([ b ], [ b.items, self.index ])) - return bd - - blocks = property(fget=as_blocks) - def transpose(self): - """ - Returns a DataFrame with the rows/columns switched. If the DataFrame is - homogeneously-typed, the data is not copied - """ - return self._constructor(data=self.values.T, index=self.columns, - columns=self.index, copy=False) + return super(DataFrame, self).transpose(1,0) T = property(transpose) - def swapaxes(self, i, j): - """ - Like ndarray.swapaxes, equivalent to transpose - - Returns - ------- - swapped : DataFrame - View on original data (no copy) - """ - if i in (0, 1) and j in (0, 1): - if i == j: - return self - return self._constructor(data=self.values.T, index=self.columns, - columns=self.index, copy=False) - else: - raise ValueError('Axis numbers must be in (0, 1)') - #---------------------------------------------------------------------- # Picklability @@ -1840,12 +1704,6 @@ def _ixs(self, i, axis=0, copy=False): # icol else: - """ - Notes - ----- - If slice passed, the resulting data will be a view - """ - label = self.columns[i] if isinstance(i, slice): # need to return view @@ -1857,8 +1715,8 @@ def _ixs(self, i, axis=0, copy=False): return self.take(i, axis=1, convert=True) values = self._data.iget(i) - return self._col_klass.from_array(values, index=self.index, - name=label) + return self._constructor_sliced.from_array(values, index=self.index, + name=label) def iget_value(self, i, j): return self.iat[i,j] @@ -2119,17 +1977,6 @@ def _sanitize_column(self, key, value): value = com._possibly_cast_to_datetime(value, dtype) return np.atleast_2d(np.asarray(value)) - def pop(self, item): - """ - Return column and drop from frame. Raise KeyError if not found. - - Returns - ------- - column : Series - """ - return NDFrame.pop(self, item) - - # to support old APIs @property def _series(self): return self._data.get_series_dict() @@ -3560,11 +3407,6 @@ def _arith_op(left, right): return self._constructor(result, index=new_index, columns=new_columns, copy=False) - def _indexed_same(self, other): - same_index = self.index.equals(other.index) - same_columns = self.columns.equals(other.columns) - return same_index and same_columns - def _combine_series(self, other, func, fill_value=None, axis=None, level=None): if axis is not None: @@ -4820,6 +4662,7 @@ def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwds): def _reduce(self, op, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds): + f = lambda x: op(x, axis=axis, skipna=skipna, **kwds) labels = self._get_agg_axis(axis) if numeric_only is None: @@ -4906,6 +4749,7 @@ def idxmax(self, axis=0, skipna=True): return Series(result, index=self._get_agg_axis(axis)) def _get_agg_axis(self, axis_num): + """ let's be explict about this """ if axis_num == 0: return self.columns elif axis_num == 1: @@ -5222,6 +5066,7 @@ def mask(self, cond): """ return self.where(~cond, NA) +DataFrame._setup_axes(['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True) _EMPTY_SERIES = Series([]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d1c2db67713d4..058e639b64ada 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1,14 +1,17 @@ # pylint: disable=W0231,E1101 +import operator import numpy as np from pandas.core.index import MultiIndex import pandas.core.indexing as indexing from pandas.core.indexing import _maybe_convert_indices from pandas.tseries.index import DatetimeIndex +from pandas.core.internals import BlockManager +from pandas.core.indexing import _NDFrameIndexer import pandas.core.common as com import pandas.lib as lib - +from pandas.util import py3compat class PandasError(Exception): pass @@ -16,23 +19,93 @@ class PandasError(Exception): class PandasObject(object): - _AXIS_NUMBERS = { - 'index': 0, - 'columns': 1 - } + #---------------------------------------------------------------------- + # Construction - _AXIS_ALIASES = {} - _AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems()) + @property + def _constructor(self): + raise NotImplementedError - def save(self, path): - com.save(self, path) + @property + def _constructor_sliced(self): + raise NotImplementedError + + #---------------------------------------------------------------------- + # Axis @classmethod - def load(cls, path): - return com.load(path) + def _setup_axes(cls, axes, info_axis = None, stat_axis = None, aliases = None, slicers = None, + axes_are_reversed = False, build_axes = True): + """ provide axes setup for the major PandasObjects + + axes : the names of the axes in order (lowest to highest) + info_axis_num : the axis of the selector dimension (int) + stat_axis_num : the number of axis for the default stats (int) + aliases : other names for a single axis (dict) + slicers : how axes slice to others (dict) + axes_are_reversed : boolean whether to treat passed axes as reversed (DataFrame) + build_axes : setup the axis properties (default True) + """ + + cls._AXIS_ORDERS = axes + cls._AXIS_NUMBERS = dict([(a, i) for i, a in enumerate(axes) ]) + cls._AXIS_LEN = len(axes) + cls._AXIS_ALIASES = aliases or dict() + cls._AXIS_NAMES = dict([(i, a) for i, a in enumerate(axes) ]) + cls._AXIS_SLICEMAP = slicers or None + cls._AXIS_REVERSED = axes_are_reversed + + # indexing support + cls._ix = None + + if info_axis is not None: + cls._info_axis_number = info_axis + cls._info_axis_name = axes[info_axis] + + if stat_axis is not None: + cls._stat_axis_number = stat_axis + cls._stat_axis_name = axes[stat_axis] + + # setup the actual axis + if build_axes: + if axes_are_reversed: + m = cls._AXIS_LEN-1 + for i, a in cls._AXIS_NAMES.items(): + setattr(cls,a,lib.AxisProperty(m-i)) + else: + for i, a in cls._AXIS_NAMES.items(): + setattr(cls,a,lib.AxisProperty(i)) + + def _construct_axes_dict(self, axes=None, **kwargs): + """ return an axes dictionary for myself """ + d = dict([(a, getattr(self, a)) for a in (axes or self._AXIS_ORDERS)]) + d.update(kwargs) + return d + + @staticmethod + def _construct_axes_dict_from(self, axes, **kwargs): + """ return an axes dictionary for the passed axes """ + d = dict([(a, ax) for a, ax in zip(self._AXIS_ORDERS, axes)]) + d.update(kwargs) + return d + + def _construct_axes_dict_for_slice(self, axes=None, **kwargs): + """ return an axes dictionary for myself """ + d = dict([(self._AXIS_SLICEMAP[a], getattr(self, a)) + for a in (axes or self._AXIS_ORDERS)]) + d.update(kwargs) + return d - #---------------------------------------------------------------------- - # Axis name business + @classmethod + def _from_axes(cls, data, axes): + # for construction from BlockManager + if isinstance(data, BlockManager): + return cls(data) + else: + if cls._AXIS_REVERSED: + axes = axes[::-1] + d = cls._construct_axes_dict_from(cls, axes, copy=False) + return cls(data, **d) @classmethod def _get_axis_number(cls, axis): @@ -42,7 +115,7 @@ def _get_axis_number(cls, axis): if axis in cls._AXIS_NAMES: return axis else: - raise Exception('No %d axis' % axis) + raise ValueError('No %d axis' % axis) else: return cls._AXIS_NUMBERS[axis] @@ -53,7 +126,7 @@ def _get_axis_name(cls, axis): if axis in cls._AXIS_NUMBERS: return axis else: - raise Exception('No axis named %s' % axis) + raise ValueError('No axis named %s' % axis) else: return cls._AXIS_NAMES[axis] @@ -61,6 +134,14 @@ def _get_axis(self, axis): name = self._get_axis_name(axis) return getattr(self, name) + @property + def _info_axis(self): + return getattr(self, self._info_axis_name) + + @property + def _stat_axis(self): + return getattr(self, self._stat_axis_name) + #---------------------------------------------------------------------- # Indexers @classmethod @@ -76,6 +157,105 @@ def _indexer(self): setattr(cls,name,property(_indexer)) + #---------------------------------------------------------------------- + # Reconstruction + + def save(self, path): + com.save(self, path) + + @classmethod + def load(cls, path): + return com.load(path) + + #---------------------------------------------------------------------- + # Comparisons + + def _indexed_same(self, other): + return all([getattr(self, a).equals(getattr(other, a)) for a in self._AXIS_ORDERS]) + + def reindex(self, *args, **kwds): + raise NotImplementedError + + def __neg__(self): + arr = operator.neg(self.values) + return self._wrap_array(arr, self.axes, copy=False) + + def __invert__(self): + arr = operator.inv(self.values) + return self._wrap_array(arr, self.axes, copy=False) + + #---------------------------------------------------------------------- + # Iteration + + def __iter__(self): + """ + Iterate over infor axis + """ + return iter(self._info_axis) + + def keys(self): + """ return the info axis names """ + return self._info_axis + + def iteritems(self): + for h in self._info_axis: + yield h, self[h] + + # Name that won't get automatically converted to items by 2to3. items is + # already in use for the first axis. + iterkv = iteritems + + def __len__(self): + """Returns length of info axis """ + return len(self._info_axis) + + def __contains__(self, key): + """True if the key is in the info axis """ + return key in self._info_axis + + @property + def empty(self): + return not all(len(getattr(self, a)) > 0 for a in self._AXIS_ORDERS) + + #---------------------------------------------------------------------- + # Formatting + + def __unicode__(self): + raise NotImplementedError + + def __str__(self): + """ + Return a string representation for a particular Object + + Invoked by str(df) in both py2/py3. + Yields Bytestring in Py2, Unicode String in py3. + """ + + if py3compat.PY3: + return self.__unicode__() + return self.__bytes__() + + def __bytes__(self): + """ + Return a string representation for a particular Object + + Invoked by bytes(df) in py3 only. + Yields a bytestring in both py2/py3. + """ + encoding = com.get_option("display.encoding") + return self.__unicode__().encode(encoding, 'replace') + + def __repr__(self): + """ + Return a string representation for a particular Object + + Yields Bytestring in Py2, Unicode String in py3. + """ + return str(self) + + #---------------------------------------------------------------------- + # Methods + def abs(self): """ Return an object with absolute value taken. Only applicable to objects @@ -413,9 +593,6 @@ def sort_index(self, axis=0, ascending=True): new_axis = labels.take(sort_index) return self.reindex(**{axis_name: new_axis}) - def reindex(self, *args, **kwds): - raise NotImplementedError - def tshift(self, periods=1, freq=None, **kwds): """ Shift the time index, using the index's frequency if available @@ -494,8 +671,6 @@ class NDFrame(PandasObject): axes : list copy : boolean, default False """ - # kludge - _default_stat_axis = 0 def __init__(self, data, axes=None, copy=False, dtype=None): if dtype is not None: @@ -510,46 +685,194 @@ def __init__(self, data, axes=None, copy=False, dtype=None): object.__setattr__(self, '_data', data) object.__setattr__(self, '_item_cache', {}) - def astype(self, dtype, copy = True, raise_on_error = True): + #---------------------------------------------------------------------- + # Axes + + @property + def shape(self): + return tuple(len(getattr(self, a)) for a in self._AXIS_ORDERS) + + @property + def axes(self): + """ we do it this way because if we have reversed axes, then + the block manager shows then reversed """ + return [getattr(self, a) for a in self._AXIS_ORDERS] + + @property + def ndim(self): + return self._data.ndim + + def _expand_axes(self, key): + new_axes = [] + for k, ax in zip(key, self.axes): + if k not in ax: + if type(k) != ax.dtype.type: + ax = ax.astype('O') + new_axes.append(ax.insert(len(ax), k)) + else: + new_axes.append(ax) + + return new_axes + + def _set_axis(self, axis, labels): + self._data.set_axis(axis, labels) + self._clear_item_cache() + + def transpose(self, *args, **kwargs): """ - Cast object to input numpy.dtype - Return a copy when copy = True (be really careful with this!) + Permute the dimensions of the Object Parameters ---------- - dtype : numpy.dtype or Python type - raise_on_error : raise on invalid input + axes : int or name (or alias) + copy : boolean, default False + Make a copy of the underlying data. Mixed-dtype data will + always result in a copy + + Examples + -------- + >>> p.transpose(2, 0, 1) + >>> p.transpose(2, 0, 1, copy=True) Returns ------- - casted : type of caller + y : same as input """ - mgr = self._data.astype(dtype, copy = copy, raise_on_error = raise_on_error) - return self._constructor(mgr) + # construct the args + args = list(args) + for a in self._AXIS_ORDERS: + if not a in kwargs: + try: + kwargs[a] = args.pop(0) + except (IndexError): + raise ValueError( + "not enough arguments specified to transpose!") + + axes = [self._get_axis_number(kwargs[a]) for a in self._AXIS_ORDERS] + + # we must have unique axes + if len(axes) != len(set(axes)): + raise ValueError('Must specify %s unique axes' % self._AXIS_LEN) + + new_axes = self._construct_axes_dict_from( + self, [self._get_axis(x) for x in axes]) + new_values = self.values.transpose(tuple(axes)) + if kwargs.get('copy') or (len(args) and args[-1]): + new_values = new_values.copy() + return self._constructor(new_values, **new_axes) + + def swapaxes(self, axis1, axis2, copy=True): + """ + Interchange axes and swap values axes appropriately - @property - def _constructor(self): - return NDFrame + Returns + ------- + y : same as input + """ + i = self._get_axis_number(axis1) + j = self._get_axis_number(axis2) - @property - def axes(self): - return self._data.axes + if i == j: + if copy: + return self.copy() + return self - def __repr__(self): - return 'NDFrame' + mapping = {i: j, j: i} - @property - def values(self): - return self._data.as_matrix() + new_axes = (self._get_axis(mapping.get(k, k)) + for k in range(self._AXIS_LEN)) + new_values = self.values.swapaxes(i, j) + if copy: + new_values = new_values.copy() - @property - def ndim(self): - return self._data.ndim + return self._constructor(new_values, *new_axes) - def _set_axis(self, axis, labels): - self._data.set_axis(axis, labels) - self._clear_item_cache() + def pop(self, item): + """ + Return item and drop from frame. Raise KeyError if not found. + """ + result = self[item] + del self[item] + return result + + def squeeze(self): + """ squeeze length 1 dimensions """ + try: + return self.ix[tuple([ slice(None) if len(a) > 1 else a[0] for a in self.axes ])] + except: + return self + + def swaplevel(self, i, j, axis=0): + """ + Swap levels i and j in a MultiIndex on a particular axis + + Parameters + ---------- + i, j : int, string (can be mixed) + Level of index to be swapped. Can pass level name as string. + + Returns + ------- + swapped : type of caller (new object) + """ + axis = self._get_axis_number(axis) + result = self.copy() + labels = result._data.axes[axis] + result._data.set_axis(axis, labels.swaplevel(i, j)) + return result + + def rename_axis(self, mapper, axis=0, copy=True): + """ + Alter index and / or columns using input function or functions. + Function / dict values must be unique (1-to-1). Labels not contained in + a dict / Series will be left as-is. + + Parameters + ---------- + mapper : dict-like or function, optional + axis : int, default 0 + copy : boolean, default True + Also copy underlying data + + See also + -------- + DataFrame.rename + + Returns + ------- + renamed : type of caller + """ + # should move this at some point + from pandas.core.series import _get_rename_function + + mapper_f = _get_rename_function(mapper) + + if axis == 0: + new_data = self._data.rename_items(mapper_f, copydata=copy) + else: + new_data = self._data.rename_axis(mapper_f, axis=axis) + if copy: + new_data = new_data.copy() + + return self._constructor(new_data) + + #---------------------------------------------------------------------- + # Array Interface + + def _wrap_array(self, arr, axes, copy=False): + d = self._construct_axes_dict_from(self, axes, copy=copy) + return self._constructor(arr, **d) + + def __array__(self, dtype=None): + return self.values + + def __array_wrap__(self, result): + d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False) + return self._constructor(result, **d) + + #---------------------------------------------------------------------- + # Fancy Indexing def __getitem__(self, item): return self._get_item_cache(item) @@ -612,32 +935,14 @@ def get_dtype_counts(self): from pandas import Series return Series(self._data.get_dtype_counts()) - def pop(self, item): - """ - Return item and drop from frame. Raise KeyError if not found. - """ - result = self[item] - del self[item] - return result + def _reindex_axis(self, new_index, fill_method, axis, copy): + new_data = self._data.reindex_axis(new_index, axis=axis, + method=fill_method, copy=copy) - def squeeze(self): - """ squeeze length 1 dimensions """ - try: - return self.ix[tuple([ slice(None) if len(a) > 1 else a[0] for a in self.axes ])] - except: + if new_data is self._data and not copy: return self - - def _expand_axes(self, key): - new_axes = [] - for k, ax in zip(key, self.axes): - if k not in ax: - if type(k) != ax.dtype.type: - ax = ax.astype('O') - new_axes.append(ax.insert(len(ax), k)) - else: - new_axes.append(ax) - - return new_axes + else: + return self._constructor(new_data) #---------------------------------------------------------------------- # Consolidation of internals @@ -677,14 +982,83 @@ def _is_mixed_type(self): def _is_numeric_mixed_type(self): return self._data.is_numeric_mixed_type - def _reindex_axis(self, new_index, fill_method, axis, copy): - new_data = self._data.reindex_axis(new_index, axis=axis, - method=fill_method, copy=copy) + #---------------------------------------------------------------------- + # Methods - if new_data is self._data and not copy: - return self - else: - return self._constructor(new_data) + def as_matrix(self): + raise NotImplementedError + + @property + def values(self): + return self.as_matrix() + + @property + def _get_values(self): + # compat + return self.values + + def as_blocks(self, columns=None): + """ + Convert the frame to a dict of dtype -> Constructor Types that each has a homogeneous dtype. + are presented in sorted order unless a specific list of columns is + provided. + + NOTE: the dtypes of the blocks WILL BE PRESERVED HERE (unlike in as_matrix) + + Parameters + ---------- + columns : array-like + Specific column order + + Returns + ------- + values : a list of Object + """ + self._consolidate_inplace() + + bd = dict() + for b in self._data.blocks: + b = b.reindex_items_from(columns or b.items) + bd[str(b.dtype)] = self._constructor(BlockManager([ b ], [ b.items, self.index ])) + return bd + + @property + def blocks(self): + return self.as_blocks() + + def astype(self, dtype, copy = True, raise_on_error = True): + """ + Cast object to input numpy.dtype + Return a copy when copy = True (be really careful with this!) + + Parameters + ---------- + dtype : numpy.dtype or Python type + raise_on_error : raise on invalid input + + Returns + ------- + casted : type of caller + """ + + mgr = self._data.astype(dtype, copy = copy, raise_on_error = raise_on_error) + return self._constructor(mgr) + + def convert_objects(self, convert_dates=True, convert_numeric=True): + """ + Attempt to infer better dtype for object columns + Always returns a copy (even if no object columns) + + Parameters + ---------- + convert_dates : if True, attempt to soft convert_dates, if 'coerce', force conversion (and non-convertibles get NaT) + convert_numeric : if True attempt to coerce to numerbers (including strings), non-convertibles get NaN + + Returns + ------- + converted : DataFrame + """ + return self._constructor(self._data.convert(convert_dates=convert_dates, convert_numeric=convert_numeric)) def cumsum(self, axis=None, skipna=True): """ @@ -703,7 +1077,7 @@ def cumsum(self, axis=None, skipna=True): y : DataFrame """ if axis is None: - axis = self._default_stat_axis + axis = self._stat_axis_number else: axis = self._get_axis_number(axis) @@ -722,9 +1096,6 @@ def cumsum(self, axis=None, skipna=True): result = y.cumsum(axis) return self._wrap_array(result, self.axes, copy=False) - def _wrap_array(self, array, axes, copy=False): - raise NotImplementedError - def cumprod(self, axis=None, skipna=True): """ Return cumulative product over requested axis as DataFrame @@ -742,7 +1113,7 @@ def cumprod(self, axis=None, skipna=True): y : DataFrame """ if axis is None: - axis = self._default_stat_axis + axis = self._stat_axis_number else: axis = self._get_axis_number(axis) @@ -777,7 +1148,7 @@ def cummax(self, axis=None, skipna=True): y : DataFrame """ if axis is None: - axis = self._default_stat_axis + axis = self._stat_axis_number else: axis = self._get_axis_number(axis) @@ -813,7 +1184,7 @@ def cummin(self, axis=None, skipna=True): y : DataFrame """ if axis is None: - axis = self._default_stat_axis + axis = self._stat_axis_number else: axis = self._get_axis_number(axis) @@ -850,25 +1221,6 @@ def copy(self, deep=True): data = data.copy() return self._constructor(data) - def swaplevel(self, i, j, axis=0): - """ - Swap levels i and j in a MultiIndex on a particular axis - - Parameters - ---------- - i, j : int, string (can be mixed) - Level of index to be swapped. Can pass level name as string. - - Returns - ------- - swapped : type of caller (new object) - """ - axis = self._get_axis_number(axis) - result = self.copy() - labels = result._data.axes[axis] - result._data.set_axis(axis, labels.swaplevel(i, j)) - return result - def add_prefix(self, prefix): """ Concatenate prefix string with panel items names. @@ -899,41 +1251,6 @@ def add_suffix(self, suffix): new_data = self._data.add_suffix(suffix) return self._constructor(new_data) - def rename_axis(self, mapper, axis=0, copy=True): - """ - Alter index and / or columns using input function or functions. - Function / dict values must be unique (1-to-1). Labels not contained in - a dict / Series will be left as-is. - - Parameters - ---------- - mapper : dict-like or function, optional - axis : int, default 0 - copy : boolean, default True - Also copy underlying data - - See also - -------- - DataFrame.rename - - Returns - ------- - renamed : type of caller - """ - # should move this at some point - from pandas.core.series import _get_rename_function - - mapper_f = _get_rename_function(mapper) - - if axis == 0: - new_data = self._data.rename_items(mapper_f, copydata=copy) - else: - new_data = self._data.rename_axis(mapper_f, axis=axis) - if copy: - new_data = new_data.copy() - - return self._constructor(new_data) - def take(self, indices, axis=0, convert=True): """ Analogous to ndarray.take diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 5516f4f704120..69862b7b8cd04 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1751,7 +1751,7 @@ def _aggregate_generic(self, func, *args, **kwargs): obj = self._obj_with_exclusions result = {} - if axis != obj._het_axis: + if axis != obj._info_axis_number: try: for name, data in self: # for name in self.indices: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 16259fd39c0a9..1e7f8130a5971 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -110,21 +110,21 @@ def _setitem_with_indexer(self, indexer, value): if isinstance(value, Series): value = self._align_series(indexer, value) - het_axis = self.obj._het_axis - het_idx = indexer[het_axis] + info_axis = self.obj._info_axis_number + info_idx = indexer[info_axis] - if isinstance(het_idx, (int, long)): - het_idx = [het_idx] + if isinstance(info_idx, (int, long)): + info_idx = [info_idx] - plane_indexer = indexer[:het_axis] + indexer[het_axis + 1:] - item_labels = self.obj._get_axis(het_axis) + plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:] + item_labels = self.obj._get_axis(info_axis) if isinstance(value, (np.ndarray, DataFrame)) and value.ndim > 1: raise ValueError('Setting mixed-type DataFrames with ' 'array/DataFrame pieces not yet supported') try: - for item in item_labels[het_idx]: + for item in item_labels[info_idx]: data = self.obj[item] values = data.values if np.prod(values.shape): @@ -132,7 +132,7 @@ def _setitem_with_indexer(self, indexer, value): value, getattr(data, 'dtype', None)) values[plane_indexer] = value except ValueError: - for item, v in zip(item_labels[het_idx], value): + for item, v in zip(item_labels[info_idx], value): data = self.obj[item] values = data.values if np.prod(values.shape): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index fd998e5060b5f..4ea5065a9a549 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -294,12 +294,12 @@ def putmask(self, mask, new, inplace=False): # may need to align the new if hasattr(new, 'reindex_axis'): - axis = getattr(new, '_het_axis', 0) + axis = getattr(new, '_info_axis_number', 0) new = new.reindex_axis(self.items, axis=axis, copy=False).values.T # may need to align the mask if hasattr(mask, 'reindex_axis'): - axis = getattr(mask, '_het_axis', 0) + axis = getattr(mask, '_info_axis_number', 0) mask = mask.reindex_axis(self.items, axis=axis, copy=False).values.T if self._can_hold_element(new): @@ -420,7 +420,7 @@ def eval(self, func, other, raise_on_error = True, try_cast = False): # see if we can align other if hasattr(other, 'reindex_axis'): - axis = getattr(other, '_het_axis', 0) + axis = getattr(other, '_info_axis_number', 0) other = other.reindex_axis(self.items, axis=axis, copy=True).values # make sure that we can broadcast @@ -475,7 +475,7 @@ def where(self, other, cond, raise_on_error = True, try_cast = False): # see if we can align other if hasattr(other,'reindex_axis'): - axis = getattr(other,'_het_axis',0) + axis = getattr(other,'_info_axis_number',0) other = other.reindex_axis(self.items, axis=axis, copy=True).values # make sure that we can broadcast @@ -489,7 +489,7 @@ def where(self, other, cond, raise_on_error = True, try_cast = False): if not hasattr(cond,'shape'): raise ValueError("where must have a condition that is ndarray like") if hasattr(cond,'reindex_axis'): - axis = getattr(cond,'_het_axis',0) + axis = getattr(cond,'_info_axis_number',0) cond = cond.reindex_axis(self.items, axis=axis, copy=True).values else: cond = cond.values diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 9f91d8add1eac..5037659c1432b 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -145,75 +145,13 @@ def f(self, other): class Panel(NDFrame): - _AXIS_ORDERS = ['items', 'major_axis', 'minor_axis'] - _AXIS_NUMBERS = dict([(a, i) for i, a in enumerate(_AXIS_ORDERS)]) - _AXIS_ALIASES = { - 'major': 'major_axis', - 'minor': 'minor_axis' - } - _AXIS_NAMES = dict([(i, a) for i, a in enumerate(_AXIS_ORDERS)]) - _AXIS_SLICEMAP = { - 'major_axis': 'index', - 'minor_axis': 'columns' - } - _AXIS_LEN = len(_AXIS_ORDERS) - - # major - _default_stat_axis = 1 - - # info axis - _het_axis = 0 - _info_axis = _AXIS_ORDERS[_het_axis] - - items = lib.AxisProperty(0) - major_axis = lib.AxisProperty(1) - minor_axis = lib.AxisProperty(2) @property def _constructor(self): return type(self) - # return the type of the slice constructor _constructor_sliced = DataFrame - def _construct_axes_dict(self, axes=None, **kwargs): - """ return an axes dictionary for myself """ - d = dict([(a, getattr(self, a)) for a in (axes or self._AXIS_ORDERS)]) - d.update(kwargs) - return d - - @staticmethod - def _construct_axes_dict_from(self, axes, **kwargs): - """ return an axes dictionary for the passed axes """ - d = dict([(a, ax) for a, ax in zip(self._AXIS_ORDERS, axes)]) - d.update(kwargs) - return d - - def _construct_axes_dict_for_slice(self, axes=None, **kwargs): - """ return an axes dictionary for myself """ - d = dict([(self._AXIS_SLICEMAP[a], getattr(self, a)) - for a in (axes or self._AXIS_ORDERS)]) - d.update(kwargs) - return d - - __add__ = _arith_method(operator.add, '__add__') - __sub__ = _arith_method(operator.sub, '__sub__') - __truediv__ = _arith_method(operator.truediv, '__truediv__') - __floordiv__ = _arith_method(operator.floordiv, '__floordiv__') - __mul__ = _arith_method(operator.mul, '__mul__') - __pow__ = _arith_method(operator.pow, '__pow__') - - __radd__ = _arith_method(operator.add, '__radd__') - __rmul__ = _arith_method(operator.mul, '__rmul__') - __rsub__ = _arith_method(lambda x, y: y - x, '__rsub__') - __rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__') - __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__') - __rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__') - - if not py3compat.PY3: - __div__ = _arith_method(operator.div, '__div__') - __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__') - def __init__(self, data=None, items=None, major_axis=None, minor_axis=None, copy=False, dtype=None): """ @@ -262,17 +200,8 @@ def _init_data(self, data, copy, dtype, **kwargs): NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype) - @classmethod - def _from_axes(cls, data, axes): - # for construction from BlockManager - if isinstance(data, BlockManager): - return cls(data) - else: - d = cls._construct_axes_dict_from(cls, axes, copy=False) - return cls(data, **d) - def _init_dict(self, data, axes, dtype=None): - haxis = axes.pop(self._het_axis) + haxis = axes.pop(self._info_axis_number) # prefilter if haxis passed if haxis is not None: @@ -315,10 +244,6 @@ def _init_arrays(self, arrays, arr_names, axes): mgr = BlockManager(blocks, axes).consolidate() return mgr - @property - def shape(self): - return [len(getattr(self, a)) for a in self._AXIS_ORDERS] - @classmethod def from_dict(cls, data, intersect=False, orient='items', dtype=None): """ @@ -355,16 +280,35 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None): raise ValueError('only recognize items or minor for orientation') d = cls._homogenize_dict(cls, data, intersect=intersect, dtype=dtype) - d[cls._info_axis] = Index(sorted(d['data'].keys())) + d[cls._info_axis_name] = Index(sorted(d['data'].keys())) return cls(**d) + # Comparison methods + __add__ = _arith_method(operator.add, '__add__') + __sub__ = _arith_method(operator.sub, '__sub__') + __truediv__ = _arith_method(operator.truediv, '__truediv__') + __floordiv__ = _arith_method(operator.floordiv, '__floordiv__') + __mul__ = _arith_method(operator.mul, '__mul__') + __pow__ = _arith_method(operator.pow, '__pow__') + + __radd__ = _arith_method(operator.add, '__radd__') + __rmul__ = _arith_method(operator.mul, '__rmul__') + __rsub__ = _arith_method(lambda x, y: y - x, '__rsub__') + __rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__') + __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__') + __rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__') + + if not py3compat.PY3: + __div__ = _arith_method(operator.div, '__div__') + __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__') + def __getitem__(self, key): - if isinstance(getattr(self, self._info_axis), MultiIndex): + if isinstance(self._info_axis, MultiIndex): return self._getitem_multilevel(key) return super(Panel, self).__getitem__(key) def _getitem_multilevel(self, key): - info = getattr(self, self._info_axis) + info = self._info_axis loc = info.get_loc(key) if isinstance(loc, (slice, np.ndarray)): new_index = info[loc] @@ -374,7 +318,7 @@ def _getitem_multilevel(self, key): new_values = self.values[slices] d = self._construct_axes_dict(self._AXIS_ORDERS[1:]) - d[self._info_axis] = result_index + d[self._info_axis_name] = result_index result = self._constructor(new_values, **d) return result else: @@ -402,29 +346,16 @@ def _init_matrix(self, data, axes, dtype=None, copy=False): block = make_block(values, items, items) return BlockManager([block], fixed_axes) - #---------------------------------------------------------------------- - # Array interface - - def __array__(self, dtype=None): - return self.values - - def __array_wrap__(self, result): - d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False) - return self._constructor(result, **d) - #---------------------------------------------------------------------- # Comparison methods - def _indexed_same(self, other): - return all([getattr(self, a).equals(getattr(other, a)) for a in self._AXIS_ORDERS]) - def _compare_constructor(self, other, func): if not self._indexed_same(other): raise Exception('Can only compare identically-labeled ' 'same type objects') new_data = {} - for col in getattr(self, self._info_axis): + for col in self._info_axis: new_data[col] = func(self[col], other[col]) d = self._construct_axes_dict(copy=False) @@ -435,12 +366,6 @@ def _compare_constructor(self, other, func): __or__ = _arith_method(operator.or_, '__or__') __xor__ = _arith_method(operator.xor, '__xor__') - def __neg__(self): - return -1 * self - - def __invert__(self): - return -1 * self - # Comparison methods __eq__ = _comp_method(operator.eq, '__eq__') __ne__ = _comp_method(operator.ne, '__ne__') @@ -459,28 +384,6 @@ def __invert__(self): #---------------------------------------------------------------------- # Magic methods - def __str__(self): - """ - Return a string representation for a particular Panel - - Invoked by str(df) in both py2/py3. - Yields Bytestring in Py2, Unicode String in py3. - """ - - if py3compat.PY3: - return self.__unicode__() - return self.__bytes__() - - def __bytes__(self): - """ - Return a string representation for a particular Panel - - Invoked by bytes(df) in py3 only. - Yields a bytestring in both py2/py3. - """ - encoding = com.get_option("display.encoding") - return self.__unicode__().encode(encoding, 'replace') - def __unicode__(self): """ Return a string representation for a particular Panel @@ -505,25 +408,6 @@ def axis_pretty(a): [class_name, dims] + [axis_pretty(a) for a in self._AXIS_ORDERS]) return output - def __repr__(self): - """ - Return a string representation for a particular Panel - - Yields Bytestring in Py2, Unicode String in py3. - """ - return str(self) - - def __iter__(self): - return iter(getattr(self, self._info_axis)) - - def iteritems(self): - for h in getattr(self, self._info_axis): - yield h, self[h] - - # Name that won't get automatically converted to items by 2to3. items is - # already in use for the first axis. - iterkv = iteritems - def _get_plane_axes(self, axis): """ get my plane axes: these are already (as compared with higher level planes), as we are returning a DataFrame axes """ axis = self._get_axis_name(axis) @@ -540,10 +424,6 @@ def _get_plane_axes(self, axis): return index, columns - def _wrap_array(self, arr, axes, copy=False): - d = self._construct_axes_dict_from(self, axes, copy=copy) - return self._constructor(arr, **d) - fromDict = from_dict def to_sparse(self, fill_value=None, kind='block'): @@ -585,16 +465,10 @@ def to_excel(self, path, na_rep=''): df.to_excel(writer, name, na_rep=na_rep) writer.save() - # TODO: needed? - def keys(self): - return list(getattr(self, self._info_axis)) - - def _get_values(self): + def as_matrix(self): self._consolidate_inplace() return self._data.as_matrix() - values = property(fget=_get_values) - #---------------------------------------------------------------------- # Getting and setting elements @@ -650,7 +524,7 @@ def set_value(self, *args): args = list(args) likely_dtype, args[-1] = _infer_dtype_from_scalar(args[-1]) made_bigger = not np.array_equal( - axes[0], getattr(self, self._info_axis)) + axes[0], self._info_axis) # how to make this logic simpler? if made_bigger: com._possibly_cast_item(result, args[0], likely_dtype) @@ -664,7 +538,7 @@ def _box_item_values(self, key, values): def __getattr__(self, name): """After regular attribute access, try looking up the name of an item. This allows simpler access to items for interactive use.""" - if name in getattr(self, self._info_axis): + if name in self._info_axis: return self[name] raise AttributeError("'%s' object has no attribute '%s'" % (type(self).__name__, name)) @@ -692,21 +566,6 @@ def __setitem__(self, key, value): mat = mat.reshape(tuple([1]) + shape[1:]) NDFrame._set_item(self, key, mat) - def pop(self, item): - """ - Return item slice from panel and delete from panel - - Parameters - ---------- - key : object - Must be contained in panel's items - - Returns - ------- - y : DataFrame - """ - return NDFrame.pop(self, item) - def __getstate__(self): "Returned pickled representation of the panel" return self._data @@ -1096,76 +955,6 @@ def groupby(self, function, axis='major'): axis = self._get_axis_number(axis) return PanelGroupBy(self, function, axis=axis) - def swapaxes(self, axis1='major', axis2='minor', copy=True): - """ - Interchange axes and swap values axes appropriately - - Returns - ------- - y : Panel (new object) - """ - i = self._get_axis_number(axis1) - j = self._get_axis_number(axis2) - - if i == j: - raise ValueError('Cannot specify the same axis') - - mapping = {i: j, j: i} - - new_axes = (self._get_axis(mapping.get(k, k)) - for k in range(self._AXIS_LEN)) - new_values = self.values.swapaxes(i, j) - if copy: - new_values = new_values.copy() - - return self._constructor(new_values, *new_axes) - - def transpose(self, *args, **kwargs): - """ - Permute the dimensions of the Panel - - Parameters - ---------- - items : int or one of {'items', 'major', 'minor'} - major : int or one of {'items', 'major', 'minor'} - minor : int or one of {'items', 'major', 'minor'} - copy : boolean, default False - Make a copy of the underlying data. Mixed-dtype data will - always result in a copy - - Examples - -------- - >>> p.transpose(2, 0, 1) - >>> p.transpose(2, 0, 1, copy=True) - - Returns - ------- - y : Panel (new object) - """ - - # construct the args - args = list(args) - for a in self._AXIS_ORDERS: - if not a in kwargs: - try: - kwargs[a] = args.pop(0) - except (IndexError): - raise ValueError( - "not enough arguments specified to transpose!") - - axes = [self._get_axis_number(kwargs[a]) for a in self._AXIS_ORDERS] - - # we must have unique axes - if len(axes) != len(set(axes)): - raise ValueError('Must specify %s unique axes' % self._AXIS_LEN) - - new_axes = self._construct_axes_dict_from( - self, [self._get_axis(x) for x in axes]) - new_values = self.values.transpose(tuple(axes)) - if kwargs.get('copy') or (len(args) and args[-1]): - new_values = new_values.copy() - return self._constructor(new_values, **new_axes) - def to_frame(self, filter_observations=True): """ Transform wide format into long (stacked) format as DataFrame @@ -1256,7 +1045,7 @@ def _reduce(self, op, axis=0, skipna=True): result = f(self.values) axes = self._get_plane_axes(axis_name) - if result.ndim == 2 and axis_name != self._info_axis: + if result.ndim == 2 and axis_name != self._info_axis_name: result = result.T return self._constructor_sliced(result, **self._extract_axes_for_slice(self, axes)) @@ -1264,7 +1053,7 @@ def _reduce(self, op, axis=0, skipna=True): def _wrap_result(self, result, axis): axis = self._get_axis_name(axis) axes = self._get_plane_axes(axis) - if result.ndim == 2 and axis != self._info_axis: + if result.ndim == 2 and axis != self._info_axis_name: result = result.T # do we have reduced dimensionalility? @@ -1432,9 +1221,9 @@ def update(self, other, join='left', overwrite=True, filter_func=None, if not isinstance(other, self._constructor): other = self._constructor(other) - axis = self._info_axis - axis_values = getattr(self, axis) - other = other.reindex(**{axis: axis_values}) + axis_name = self._info_axis_name + axis_values = self._info_axis + other = other.reindex(**{axis_name: axis_values}) for frame in axis_values: self[frame].update(other[frame], join, overwrite, filter_func, @@ -1673,6 +1462,13 @@ def min(self, axis='major', skipna=True): return self._reduce(nanops.nanmin, axis=axis, skipna=skipna) cls.min = min +Panel._setup_axes(axes = ['items', 'major_axis', 'minor_axis'], + info_axis = 0, + stat_axis = 1, + aliases = { 'major': 'major_axis', + 'minor': 'minor_axis' }, + slicers = { 'major_axis': 'index', + 'minor_axis': 'columns' }) Panel._add_aggregate_operations() WidePanel = Panel diff --git a/pandas/core/panel4d.py b/pandas/core/panel4d.py index b2fb2d25e2355..dfe9294e06c08 100644 --- a/pandas/core/panel4d.py +++ b/pandas/core/panel4d.py @@ -1,40 +1,40 @@ -""" Panel4D: a 4-d dict like collection of panels """ - -from pandas.core.panelnd import create_nd_panel_factory -from pandas.core.panel import Panel - -Panel4D = create_nd_panel_factory( - klass_name='Panel4D', - axis_orders=['labels', 'items', 'major_axis', 'minor_axis'], - axis_slices={'labels': 'labels', 'items': 'items', - 'major_axis': 'major_axis', - 'minor_axis': 'minor_axis'}, - slicer=Panel, - axis_aliases={'major': 'major_axis', 'minor': 'minor_axis'}, - stat_axis=2) - - -def panel4d_init(self, data=None, labels=None, items=None, major_axis=None, - minor_axis=None, copy=False, dtype=None): - """ - Represents a 4 dimensonal structured - - Parameters - ---------- - data : ndarray (labels x items x major x minor), or dict of Panels - - labels : Index or array-like : axis=0 - items : Index or array-like : axis=1 - major_axis : Index or array-like: axis=2 - minor_axis : Index or array-like: axis=3 - - dtype : dtype, default None - Data type to force, otherwise infer - copy : boolean, default False - Copy data from inputs. Only affects DataFrame / 2d ndarray input - """ - self._init_data(data=data, labels=labels, items=items, - major_axis=major_axis, minor_axis=minor_axis, - copy=copy, dtype=dtype) - -Panel4D.__init__ = panel4d_init +""" Panel4D: a 4-d dict like collection of panels """ + +from pandas.core.panelnd import create_nd_panel_factory +from pandas.core.panel import Panel + +Panel4D = create_nd_panel_factory( + klass_name = 'Panel4D', + axis_orders = ['labels', 'items', 'major_axis', 'minor_axis'], + axis_slices = {'labels': 'labels', 'items': 'items', + 'major_axis': 'major_axis', + 'minor_axis': 'minor_axis'}, + slicer = Panel, + axis_aliases = {'major': 'major_axis', 'minor': 'minor_axis'}, + stat_axis = 2) + + +def panel4d_init(self, data=None, labels=None, items=None, major_axis=None, + minor_axis=None, copy=False, dtype=None): + """ + Represents a 4 dimensonal structured + + Parameters + ---------- + data : ndarray (labels x items x major x minor), or dict of Panels + + labels : Index or array-like : axis=0 + items : Index or array-like : axis=1 + major_axis : Index or array-like: axis=2 + minor_axis : Index or array-like: axis=3 + + dtype : dtype, default None + Data type to force, otherwise infer + copy : boolean, default False + Copy data from inputs. Only affects DataFrame / 2d ndarray input + """ + self._init_data(data=data, labels=labels, items=items, + major_axis=major_axis, minor_axis=minor_axis, + copy=copy, dtype=dtype) + +Panel4D.__init__ = panel4d_init diff --git a/pandas/core/series.py b/pandas/core/series.py index 0c006d4c60904..8171e24e1b709 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -381,11 +381,6 @@ def f(self, axis=0, dtype=None, out=None, skipna=True, level=None): class Series(pa.Array, generic.PandasObject): - _AXIS_NUMBERS = { - 'index': 0 - } - - _AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems()) def __new__(cls, data=None, index=None, dtype=None, name=None, copy=False): @@ -3163,6 +3158,7 @@ def str(self): from pandas.core.strings import StringMethods return StringMethods(self) +Series._setup_axes(['index'], build_axes = False) _INDEX_TYPES = ndarray, Index, list, tuple #------------------------------------------------------------------------------ diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index b2dbca70f3b77..a21ec40aa7eee 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -69,11 +69,13 @@ class SparseDataFrame(DataFrame): _columns = None _series = None _is_mixed_type = False - _col_klass = SparseSeries ndim = 2 + _constructor_sliced = SparseSeries + def __init__(self, data=None, index=None, columns=None, - default_kind='block', default_fill_value=None): + default_kind='block', default_fill_value=None, + copy=False): if default_fill_value is None: default_fill_value = np.nan @@ -383,7 +385,7 @@ def icol(self, i): return self[label] # values = self._data.iget(i) - # return self._col_klass.from_array( + # return self._constructor_sliced.from_array( # values, index=self.index, name=label, # fill_value= self.default_fill_value) diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index bd5a2785aba2b..870539118e9f0 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -60,7 +60,8 @@ class SparsePanel(Panel): ndim = 3 def __init__(self, frames, items=None, major_axis=None, minor_axis=None, - default_fill_value=np.nan, default_kind='block'): + default_fill_value=np.nan, default_kind='block', + copy=False): if isinstance(frames, np.ndarray): new_frames = {} for item, vals in zip(items, frames): diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index b799188170e6f..5b553e049c027 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -570,6 +570,5 @@ def combine_first(self, other): dense_combined = self.to_dense().combine_first(other) return dense_combined.to_sparse(fill_value=self.fill_value) - class SparseTimeSeries(SparseSeries, TimeSeries): pass diff --git a/pandas/tests/test_ndframe.py b/pandas/tests/test_ndframe.py index d5d50359b67e8..edafeb64af98e 100644 --- a/pandas/tests/test_ndframe.py +++ b/pandas/tests/test_ndframe.py @@ -14,21 +14,6 @@ def setUp(self): tdf = t.makeTimeDataFrame() self.ndf = NDFrame(tdf._data) - def test_constructor(self): - # with cast - ndf = NDFrame(self.ndf._data, dtype=np.int64) - self.assert_(ndf.values.dtype == np.int64) - - def test_ndim(self): - self.assertEquals(self.ndf.ndim, 2) - - def test_astype(self): - casted = self.ndf.astype(int) - self.assert_(casted.values.dtype == np.int_) - - casted = self.ndf.astype(np.int32) - self.assert_(casted.values.dtype == np.int32) - def test_squeeze(self): # noop for s in [ t.makeFloatSeries(), t.makeStringSeries(), t.makeObjectSeries() ]: diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index d857e999bdd33..5ea9144a7c7c2 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1103,8 +1103,10 @@ def test_swapaxes(self): result = self.panel.swapaxes(0, 1) self.assert_(result.items is self.panel.major_axis) - # this should not work - self.assertRaises(Exception, self.panel.swapaxes, 'items', 'items') + # this works, but return a copy + result = self.panel.swapaxes('items', 'items') + assert_panel_equal(self.panel,result) + self.assert_(id(self.panel) != id(result)) def test_transpose(self): result = self.panel.transpose('minor', 'major', 'items') diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 5bb452deb1d4d..f2356273c019b 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -880,8 +880,10 @@ def test_swapaxes(self): result = self.panel4d.swapaxes(0, 1) self.assert_(result.labels is self.panel4d.items) - # this should also work - self.assertRaises(Exception, self.panel4d.swapaxes, 'items', 'items') + # this works, but return a copy + result = self.panel4d.swapaxes('items', 'items') + assert_panel4d_equal(self.panel4d,result) + self.assert_(id(self.panel4d) != id(result)) def test_to_frame(self): raise nose.SkipTest From 73c6344b3c5ee85f6bc8ac9ccb0617f1d1158882 Mon Sep 17 00:00:00 2001 From: jreback Date: Wed, 20 Feb 2013 12:54:07 -0500 Subject: [PATCH 2/6] ENH: more methods added PERF: was missing multi-take opportunity in reindex was incorrectly passing to com._count_not_none doing an extra copy in certain cases --- RELEASE.rst | 14 +- pandas/core/frame.py | 272 ++++-------------------- pandas/core/generic.py | 378 +++++++++++++++++++++++++++++---- pandas/core/internals.py | 26 +-- pandas/core/panel.py | 133 +----------- pandas/core/series.py | 24 --- pandas/io/pytables.py | 14 +- pandas/sparse/frame.py | 7 +- pandas/tests/test_frame.py | 1 + pandas/tests/test_internals.py | 10 +- pandas/tests/test_panel.py | 13 +- pandas/tests/test_panel4d.py | 7 +- 12 files changed, 446 insertions(+), 453 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 83bdca94151a1..66f644e81fe18 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -130,20 +130,24 @@ pandas 0.11.0 - arguments to DataFrame.clip were inconsistent to numpy and Series clipping (GH2747_) - - Refactor of series.py/frame.py/panel.py to move common code to generic.py + - Refactor of frame.py/panel.py to move common code to generic.py all axis creation and manipulation code is now common (except for Series) - added _setup_axes to created generic NDFrame structures - - moved methods + - moved methods (some methods moved from series as well) - from_axes,_wrap_array,axes,ix,shape,empty,swapaxes,transpose,pop - __str__,__bytes__,__repr__ - __iter__,keys,__contains__,__len__,__neg__,__invert__ - - convert_objects,as_blocks - - _indexed_same + - convert_objects,as_blocks,as_matrix,values + - __getstate__,__setstate__ (though compat remains in frame/panel) + - __getattr__,__setattr__ + - _indexed_same,reindex_like,reindex + (sparse.py required some changes) + These are API changes which make Panel more consistent with DataFrame - swapaxes on a Panel with the same axes specified now return a copy - (consistent with DataFrame) + - support attribute access for setting diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0b3a902ec017b..6a9bf5d52992f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1553,22 +1553,6 @@ def transpose(self): #---------------------------------------------------------------------- # Picklability - def __getstate__(self): - return self._data - - def __setstate__(self, state): - # old DataFrame pickle - if isinstance(state, BlockManager): - self._data = state - elif isinstance(state[0], dict): # pragma: no cover - self._unpickle_frame_compat(state) - else: # pragma: no cover - # old pickling format, for compatibility - self._unpickle_matrix_compat(state) - - # ordinarily created in NDFrame - self._item_cache = {} - # legacy pickle formats def _unpickle_frame_compat(self, state): # pragma: no cover from pandas.core.common import _unpickle_array @@ -1602,15 +1586,6 @@ def _unpickle_matrix_compat(self, state): # pragma: no cover self._data = dm._data #---------------------------------------------------------------------- - # Array interface - - def __array__(self, dtype=None): - return self.values - - def __array_wrap__(self, result): - return self._constructor(result, index=self.index, - columns=self.columns, copy=False) - #---------------------------------------------------------------------- # Getting and setting elements @@ -1799,12 +1774,9 @@ def _getitem_frame(self, key): return self.where(key) def _slice(self, slobj, axis=0, raise_on_error=False): - if axis == 0: - mgr_axis = 1 - else: - mgr_axis = 0 - - new_data = self._data.get_slice(slobj, axis=mgr_axis, raise_on_error=raise_on_error) + axis = self._get_block_manager_axis(axis) + new_data = self._data.get_slice(slobj, axis=axis) + new_data = self._data.get_slice(slobj, axis=axis, raise_on_error=raise_on_error) return self._constructor(new_data) def _box_item_values(self, key, values): @@ -1814,31 +1786,6 @@ def _box_item_values(self, key, values): else: return Series.from_array(values, index=self.index, name=items) - def __getattr__(self, name): - """After regular attribute access, try looking up the name of a column. - This allows simpler access to columns for interactive use.""" - if name in self.columns: - return self[name] - raise AttributeError("'%s' object has no attribute '%s'" % - (type(self).__name__, name)) - - def __setattr__(self, name, value): - """After regular attribute access, try looking up the name of a column. - This allows simpler access to columns for interactive use.""" - if name == '_data': - super(DataFrame, self).__setattr__(name, value) - else: - try: - existing = getattr(self, name) - if isinstance(existing, Index): - super(DataFrame, self).__setattr__(name, value) - elif name in self.columns: - self[name] = value - else: - object.__setattr__(self, name, value) - except (AttributeError, TypeError): - object.__setattr__(self, name, value) - def __setitem__(self, key, value): if isinstance(key, slice): # slice rows @@ -2224,12 +2171,13 @@ def _align_frame(self, other, join='outer', axis=None, level=None, self.columns.join(other.columns, how=join, level=level, return_indexers=True) - left = self._reindex_with_indexers(join_index, ilidx, - join_columns, clidx, copy, - fill_value=fill_value) - right = other._reindex_with_indexers(join_index, iridx, - join_columns, cridx, copy, - fill_value=fill_value) + left = self._reindex_with_indexers({ 0 : [ join_index, ilidx ], + 1 : [ join_columns, clidx ] }, + copy=copy, fill_value=fill_value) + right = other._reindex_with_indexers({ 0 : [ join_index, iridx ], + 1 : [ join_columns, cridx ] }, + copy=copy, fill_value=fill_value) + if method is not None: left = left.fillna(axis=fill_axis, method=method, limit=limit) @@ -2278,115 +2226,40 @@ def _align_series(self, other, join='outer', axis=None, level=None, else: return left_result, right_result - def reindex(self, index=None, columns=None, method=None, level=None, - fill_value=NA, limit=None, copy=True): - """Conform DataFrame to new index with optional filling logic, placing - NA/NaN in locations having no value in the previous index. A new object - is produced unless the new index is equivalent to the current one and - copy=False + def _reindex_axes(self, axes, level, limit, method, fill_value, copy): + frame = self - Parameters - ---------- - index : array-like, optional - New labels / index to conform to. Preferably an Index object to - avoid duplicating data - columns : array-like, optional - Same usage as index argument - method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None - Method to use for filling holes in reindexed DataFrame - pad / ffill: propagate last valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap - copy : boolean, default True - Return a new object, even if the passed indexes are the same - level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - fill_value : scalar, default np.NaN - Value to use for missing values. Defaults to NaN, but can be any - "compatible" value - limit : int, default None - Maximum size gap to forward or backward fill - - Examples - -------- - >>> df.reindex(index=[date1, date2, date3], columns=['A', 'B', 'C']) - - Returns - ------- - reindexed : same type as calling instance - """ - self._consolidate_inplace() - frame = self - - if (index is not None and columns is not None - and method is None and level is None - and not self._is_mixed_type): - return self._reindex_multi(index, columns, copy, fill_value) - - if columns is not None: - frame = frame._reindex_columns(columns, copy, level, - fill_value, limit) - - if index is not None: - frame = frame._reindex_index(index, method, copy, level, + columns = axes['columns'] + if columns is not None: + frame = frame._reindex_columns(columns, copy, level, fill_value, limit) - return frame + index = axes['index'] + if index is not None: + frame = frame._reindex_index(index, method, copy, level, + fill_value, limit) - def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, - limit=None, fill_value=NA): - """Conform DataFrame to new index with optional filling logic, placing - NA/NaN in locations having no value in the previous index. A new object - is produced unless the new index is equivalent to the current one and - copy=False - - Parameters - ---------- - index : array-like, optional - New labels / index to conform to. Preferably an Index object to - avoid duplicating data - axis : {0, 1} - 0 -> index (rows) - 1 -> columns - method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None - Method to use for filling holes in reindexed DataFrame - pad / ffill: propagate last valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap - copy : boolean, default True - Return a new object, even if the passed indexes are the same - level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - limit : int, default None - Maximum size gap to forward or backward fill + return frame - Examples - -------- - >>> df.reindex_axis(['A', 'B', 'C'], axis=1) + def _reindex_index(self, new_index, method, copy, level, fill_value=NA, + limit=None): + new_index, indexer = self.index.reindex(new_index, method, level, + limit=limit) + return self._reindex_with_indexers({ 0 : [ new_index, indexer ] }, + copy=copy, fill_value=fill_value) - See also - -------- - DataFrame.reindex, DataFrame.reindex_like + def _reindex_columns(self, new_columns, copy, level, fill_value=NA, + limit=None): + new_columns, indexer = self.columns.reindex(new_columns, level=level, + limit=limit) + return self._reindex_with_indexers({ 1 : [ new_columns, indexer ] }, + copy=copy, fill_value=fill_value) - Returns - ------- - reindexed : same type as calling instance - """ - self._consolidate_inplace() - if axis == 0: - return self._reindex_index(labels, method, copy, level, - fill_value=fill_value, - limit=limit) - elif axis == 1: - return self._reindex_columns(labels, copy, level, - fill_value=fill_value, - limit=limit) - else: # pragma: no cover - raise ValueError('Must specify axis=0 or 1') + def _reindex_multi(self, axes, copy, fill_value): + """ we are guaranteed non-Nones in the axes! """ - def _reindex_multi(self, new_index, new_columns, copy, fill_value): - new_index, row_indexer = self.index.reindex(new_index) - new_columns, col_indexer = self.columns.reindex(new_columns) + new_index, row_indexer = self.index.reindex(axes['index']) + new_columns, col_indexer = self.columns.reindex(axes['columns']) if row_indexer is not None and col_indexer is not None: indexer = row_indexer, col_indexer @@ -2394,81 +2267,12 @@ def _reindex_multi(self, new_index, new_columns, copy, fill_value): fill_value=fill_value) return DataFrame(new_values, index=new_index, columns=new_columns) elif row_indexer is not None: - return self._reindex_with_indexers(new_index, row_indexer, - None, None, copy, fill_value) + return self._reindex_with_indexers({ 0 : [ new_index, row_indexer ] }, copy=copy, fill_value=fill_value) elif col_indexer is not None: - return self._reindex_with_indexers(None, None, - new_columns, col_indexer, - copy, fill_value) + return self._reindex_with_indexers({ 1 : [ new_columns, col_indexer ] }, copy=copy, fill_value=fill_value) else: return self.copy() if copy else self - def _reindex_index(self, new_index, method, copy, level, fill_value=NA, - limit=None): - new_index, indexer = self.index.reindex(new_index, method, level, - limit=limit) - return self._reindex_with_indexers(new_index, indexer, None, None, - copy, fill_value) - - def _reindex_columns(self, new_columns, copy, level, fill_value=NA, - limit=None): - new_columns, indexer = self.columns.reindex(new_columns, level=level, - limit=limit) - return self._reindex_with_indexers(None, None, new_columns, indexer, - copy, fill_value) - - def _reindex_with_indexers(self, index, row_indexer, columns, col_indexer, - copy, fill_value): - new_data = self._data - if row_indexer is not None: - row_indexer = com._ensure_int64(row_indexer) - new_data = new_data.reindex_indexer(index, row_indexer, axis=1, - fill_value=fill_value) - elif index is not None and index is not new_data.axes[1]: - new_data = new_data.copy(deep=copy) - new_data.axes[1] = index - - if col_indexer is not None: - # TODO: speed up on homogeneous DataFrame objects - col_indexer = com._ensure_int64(col_indexer) - new_data = new_data.reindex_indexer(columns, col_indexer, axis=0, - fill_value=fill_value) - elif columns is not None and columns is not new_data.axes[0]: - new_data = new_data.reindex_items(columns, copy=copy, - fill_value=fill_value) - - if copy and new_data is self._data: - new_data = new_data.copy() - - return DataFrame(new_data) - - def reindex_like(self, other, method=None, copy=True, limit=None, - fill_value=NA): - """ - Reindex DataFrame to match indices of another DataFrame, optionally - with filling logic - - Parameters - ---------- - other : DataFrame - method : string or None - copy : boolean, default True - limit : int, default None - Maximum size gap to forward or backward fill - - Notes - ----- - Like calling s.reindex(index=other.index, columns=other.columns, - method=...) - - Returns - ------- - reindexed : DataFrame - """ - return self.reindex(index=other.index, columns=other.columns, - method=method, copy=copy, limit=limit, - fill_value=fill_value) - truncate = generic.truncate def set_index(self, keys, drop=True, append=False, inplace=False, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 058e639b64ada..a1d6eb61b34ce 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3,15 +3,16 @@ import operator import numpy as np -from pandas.core.index import MultiIndex +from pandas.core.index import Index, MultiIndex, _ensure_index import pandas.core.indexing as indexing from pandas.core.indexing import _maybe_convert_indices from pandas.tseries.index import DatetimeIndex from pandas.core.internals import BlockManager from pandas.core.indexing import _NDFrameIndexer -import pandas.core.common as com import pandas.lib as lib from pandas.util import py3compat +import pandas.core.common as com +from pandas.core.common import (isnull, notnull, _infer_dtype_from_scalar) class PandasError(Exception): pass @@ -51,7 +52,8 @@ def _setup_axes(cls, axes, info_axis = None, stat_axis = None, aliases = None, s cls._AXIS_NUMBERS = dict([(a, i) for i, a in enumerate(axes) ]) cls._AXIS_LEN = len(axes) cls._AXIS_ALIASES = aliases or dict() - cls._AXIS_NAMES = dict([(i, a) for i, a in enumerate(axes) ]) + cls._AXIS_IALIASES = dict([ (v,k) for k, v in cls._AXIS_ALIASES.items() ]) + cls._AXIS_NAMES = dict([(i, a) for i, a in enumerate(axes) ]) cls._AXIS_SLICEMAP = slicers or None cls._AXIS_REVERSED = axes_are_reversed @@ -68,17 +70,21 @@ def _setup_axes(cls, axes, info_axis = None, stat_axis = None, aliases = None, s # setup the actual axis if build_axes: + + def set_axis(a, i): + setattr(cls,a,lib.AxisProperty(i)) + if axes_are_reversed: m = cls._AXIS_LEN-1 for i, a in cls._AXIS_NAMES.items(): - setattr(cls,a,lib.AxisProperty(m-i)) + set_axis(a,m-i) else: for i, a in cls._AXIS_NAMES.items(): - setattr(cls,a,lib.AxisProperty(i)) + set_axis(a,i) def _construct_axes_dict(self, axes=None, **kwargs): """ return an axes dictionary for myself """ - d = dict([(a, getattr(self, a)) for a in (axes or self._AXIS_ORDERS)]) + d = dict([(a, self._get_axis(a)) for a in (axes or self._AXIS_ORDERS)]) d.update(kwargs) return d @@ -91,11 +97,43 @@ def _construct_axes_dict_from(self, axes, **kwargs): def _construct_axes_dict_for_slice(self, axes=None, **kwargs): """ return an axes dictionary for myself """ - d = dict([(self._AXIS_SLICEMAP[a], getattr(self, a)) + d = dict([(self._AXIS_SLICEMAP[a], self._get_axis(a)) for a in (axes or self._AXIS_ORDERS)]) d.update(kwargs) return d + def _construct_axes_from_arguments(self, args, kwargs, require_all=False): + """ construct and returns axes if supplied in args/kwargs + if require_all, raise if all axis arguments are not supplied + return a tuple of (axes, kwargs) """ + + # construct the args + args = list(args) + for a in self._AXIS_ORDERS: + + # if we have an alias for this axis + alias = self._AXIS_IALIASES.get(a) + if alias is not None: + if a in kwargs: + if alias in kwargs: + raise Exception("arguments are multually exclusive for [%s,%s]" % (a,alias)) + continue + if alias in kwargs: + kwargs[a] = kwargs.pop(alias) + continue + + # look for a argument by position + if a not in kwargs: + try: + kwargs[a] = args.pop(0) + except (IndexError): + if require_all: + raise ValueError( + "not enough arguments specified!") + + axes = dict([ (a,kwargs.get(a)) for a in self._AXIS_ORDERS]) + return axes, kwargs + @classmethod def _from_axes(cls, data, axes): # for construction from BlockManager @@ -110,30 +148,40 @@ def _from_axes(cls, data, axes): @classmethod def _get_axis_number(cls, axis): axis = cls._AXIS_ALIASES.get(axis, axis) - - if com.is_integer(axis): - if axis in cls._AXIS_NAMES: - return axis - else: - raise ValueError('No %d axis' % axis) - else: + try: + if com.is_integer(axis): + if axis in cls._AXIS_NAMES: + return axis return cls._AXIS_NUMBERS[axis] + except: + pass + + raise ValueError('No %d axis' % axis) @classmethod def _get_axis_name(cls, axis): axis = cls._AXIS_ALIASES.get(axis, axis) - if isinstance(axis, basestring): - if axis in cls._AXIS_NUMBERS: - return axis - else: - raise ValueError('No axis named %s' % axis) - else: + try: + if isinstance(axis, basestring): + if axis in cls._AXIS_NUMBERS: + return axis return cls._AXIS_NAMES[axis] + except: + pass + + raise ValueError('No axis named %s' % axis) def _get_axis(self, axis): name = self._get_axis_name(axis) return getattr(self, name) + def _get_block_manager_axis(self, axis): + """ map the axis to the block_manager axis """ + if self._AXIS_REVERSED: + m = self._AXIS_LEN-1 + return m-axis + return axis + @property def _info_axis(self): return getattr(self, self._info_axis_name) @@ -171,7 +219,7 @@ def load(cls, path): # Comparisons def _indexed_same(self, other): - return all([getattr(self, a).equals(getattr(other, a)) for a in self._AXIS_ORDERS]) + return all([ self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS]) def reindex(self, *args, **kwds): raise NotImplementedError @@ -215,7 +263,7 @@ def __contains__(self, key): @property def empty(self): - return not all(len(getattr(self, a)) > 0 for a in self._AXIS_ORDERS) + return not all(len(self._get_axis(a)) > 0 for a in self._AXIS_ORDERS) #---------------------------------------------------------------------- # Formatting @@ -521,6 +569,29 @@ def select(self, crit, axis=0): return self.reindex(**{axis_name: new_axis}) + def reindex_like(self, other, method=None, copy=True, limit=None): + """ return an object with matching indicies to myself + + Parameters + ---------- + other : Object + method : string or None + copy : boolean, default True + limit : int, default None + Maximum size gap to forward or backward fill + + Notes + ----- + Like calling s.reindex(index=other.index, columns=other.columns, + method=...) + + Returns + ------- + reindexed : same as input + """ + d = other._construct_axes_dict(method=method) + return self.reindex(**d) + def drop(self, labels, axis=0, level=None): """ Return new object with labels in requested axis removed @@ -690,13 +761,13 @@ def __init__(self, data, axes=None, copy=False, dtype=None): @property def shape(self): - return tuple(len(getattr(self, a)) for a in self._AXIS_ORDERS) + return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS) @property def axes(self): """ we do it this way because if we have reversed axes, then the block manager shows then reversed """ - return [getattr(self, a) for a in self._AXIS_ORDERS] + return [self._get_axis(a) for a in self._AXIS_ORDERS] @property def ndim(self): @@ -740,24 +811,17 @@ def transpose(self, *args, **kwargs): """ # construct the args - args = list(args) - for a in self._AXIS_ORDERS: - if not a in kwargs: - try: - kwargs[a] = args.pop(0) - except (IndexError): - raise ValueError( - "not enough arguments specified to transpose!") - - axes = [self._get_axis_number(kwargs[a]) for a in self._AXIS_ORDERS] + axes, kwargs = self._construct_axes_from_arguments(args, kwargs, require_all=True) + axes_names = tuple([ self._get_axis_name( axes[a]) for a in self._AXIS_ORDERS ]) + axes_numbers = tuple([ self._get_axis_number(axes[a]) for a in self._AXIS_ORDERS ]) # we must have unique axes if len(axes) != len(set(axes)): raise ValueError('Must specify %s unique axes' % self._AXIS_LEN) new_axes = self._construct_axes_dict_from( - self, [self._get_axis(x) for x in axes]) - new_values = self.values.transpose(tuple(axes)) + self, [self._get_axis(x) for x in axes_names]) + new_values = self.values.transpose(axes_numbers) if kwargs.get('copy') or (len(args) and args[-1]): new_values = new_values.copy() return self._constructor(new_values, **new_axes) @@ -871,6 +935,27 @@ def __array_wrap__(self, result): d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False) return self._constructor(result, **d) + #---------------------------------------------------------------------- + # Picklability + + def __getstate__(self): + return self._data + + def __setstate__(self, state): + + if isinstance(state, BlockManager): + self._data = state + elif isinstance(state[0], dict): + self._unpickle_frame_compat(state) + elif len(state) == 4: + self._unpickle_panel_compat(state) + else: # pragma: no cover + # old pickling format, for compatibility + self._unpickle_matrix_compat(state) + + # ordinarily created in NDFrame + self._item_cache = {} + #---------------------------------------------------------------------- # Fancy Indexing @@ -935,6 +1020,165 @@ def get_dtype_counts(self): from pandas import Series return Series(self._data.get_dtype_counts()) + def reindex(self, *args, **kwargs): + """Conform DataFrame to new index with optional filling logic, placing + NA/NaN in locations having no value in the previous index. A new object + is produced unless the new index is equivalent to the current one and + copy=False + + Parameters + ---------- + axes : array-like, optional (can be specified in order, or as keywords) + New labels / index to conform to. Preferably an Index object to + avoid duplicating data + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed DataFrame + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap + copy : boolean, default True + Return a new object, even if the passed indexes are the same + level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + fill_value : scalar, default np.NaN + Value to use for missing values. Defaults to NaN, but can be any + "compatible" value + limit : int, default None + Maximum size gap to forward or backward fill + + Examples + -------- + >>> df.reindex(index=[date1, date2, date3], columns=['A', 'B', 'C']) + + Returns + ------- + reindexed : same type as calling instance + """ + + # construct the args + axes, kwargs = self._construct_axes_from_arguments(args, kwargs) + method = kwargs.get('method') + level = kwargs.get('level') + copy = kwargs.get('copy',True) + limit = kwargs.get('limit') + fill_value = kwargs.get('fill_value',np.nan) + + self._consolidate_inplace() + + # check if we are a multi reindex + if self._needs_reindex_multi(axes, method, level): + try: + return self._reindex_multi(axes, copy, fill_value) + except: + pass + + # perform the reindex on the axes + if copy and not com._count_not_none(*axes.values()): + return self.copy() + + return self._reindex_axes(axes, level, limit, method, fill_value, copy) + + def _reindex_axes(self, axes, level, limit, method, fill_value, copy): + """ perform the reinxed for all the axes """ + obj = self + for a in self._AXIS_ORDERS: + labels = axes[a] + if labels is None: continue + + # convert to an index if we are not a multi-selection + if level is None: + labels = _ensure_index(labels) + + axis = self._get_axis_number(a) + new_index, indexer = self._get_axis(a).reindex(labels, level=level, limit=limit) + obj = obj._reindex_with_indexers({ axis : [ labels, indexer ] }, method, fill_value, copy) + + return obj + + def _needs_reindex_multi(self, axes, method, level): + """ check if we do need a multi reindex """ + return (com._count_not_none(*axes.values()) == self._AXIS_LEN) and method is None and level is None and not self._is_mixed_type + + def _reindex_multi(self, axes, copy, fill_value): + return NotImplemented + + def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, + limit=None, fill_value=np.nan): + """Conform input object to new index with optional filling logic, placing + NA/NaN in locations having no value in the previous index. A new object + is produced unless the new index is equivalent to the current one and + copy=False + + Parameters + ---------- + index : array-like, optional + New labels / index to conform to. Preferably an Index object to + avoid duplicating data + axis : allowed axis for the input + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed DataFrame + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap + copy : boolean, default True + Return a new object, even if the passed indexes are the same + level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + limit : int, default None + Maximum size gap to forward or backward fill + + Examples + -------- + >>> df.reindex_axis(['A', 'B', 'C'], axis=1) + + See also + -------- + DataFrame.reindex, DataFrame.reindex_like + + Returns + ------- + reindexed : same type as calling instance + """ + self._consolidate_inplace() + + axis_name = self._get_axis_name(axis) + axis_values = self._get_axis(axis_name) + new_index, indexer = axis_values.reindex(labels, method, level, + limit=limit) + return self._reindex_with_indexers({ axis : [ new_index, indexer ] }, method, fill_value, copy) + + def _reindex_with_indexers(self, reindexers, method=None, fill_value=np.nan, copy=False): + + # reindex doing multiple operations on different axes if indiciated + new_data = self._data + for axis in sorted(reindexers.keys()): + index, indexer = reindexers[axis] + baxis = self._get_block_manager_axis(axis) + + # reindex the axis + if method is not None: + new_data = new_data.reindex_axis(index, method=method, axis=baxis, + fill_value=fill_value, copy=copy) + + elif indexer is not None: + # TODO: speed up on homogeneous DataFrame objects + indexer = com._ensure_int64(indexer) + new_data = new_data.reindex_indexer(index, indexer, axis=baxis, + fill_value=fill_value) + + elif baxis == 0 and index is not None and index is not new_data.axes[baxis]: + new_data = new_data.reindex_items(index, copy=copy, + fill_value=fill_value) + + elif baxis > 0 and index is not None and index is not new_data.axes[baxis]: + new_data = new_data.copy(deep=copy) + new_data.set_axis(baxis,index) + + if copy and new_data is self._data: + new_data = new_data.copy() + + return self._constructor(new_data) + def _reindex_axis(self, new_index, fill_method, axis, copy): new_data = self._data.reindex_axis(new_index, axis=axis, method=fill_method, copy=copy) @@ -944,6 +1188,37 @@ def _reindex_axis(self, new_index, fill_method, axis, copy): else: return self._constructor(new_data) + #---------------------------------------------------------------------- + # Attribute access + + def __getattr__(self, name): + """After regular attribute access, try looking up the name of a the info + This allows simpler access to columns for interactive use.""" + if name in self._info_axis: + return self[name] + raise AttributeError("'%s' object has no attribute '%s'" % + (type(self).__name__, name)) + + def __setattr__(self, name, value): + """After regular attribute access, try looking up the name of the info + This allows simpler access to columns for interactive use.""" + if name == '_data': + object.__setattr__(self, name, value) + else: + try: + existing = getattr(self, name) + if isinstance(existing, Index): + object.__setattr__(self, name, value) + elif name in self._info_axis: + self[name] = value + else: + object.__setattr__(self, name, value) + except (AttributeError, TypeError): + object.__setattr__(self, name, value) + + #---------------------------------------------------------------------- + # Getting and setting elements + #---------------------------------------------------------------------- # Consolidation of internals @@ -985,8 +1260,35 @@ def _is_numeric_mixed_type(self): #---------------------------------------------------------------------- # Methods - def as_matrix(self): - raise NotImplementedError + def as_matrix(self, columns=None): + """ + Convert the frame to its Numpy-array matrix representation. Columns + are presented in sorted order unless a specific list of columns is + provided. + + NOTE: the dtype will be a lower-common-denominator dtype (implicit upcasting) + that is to say if the dtypes (even of numeric types) are mixed, the one that accomodates all will be chosen + use this with care if you are not dealing with the blocks + + e.g. if the dtypes are float16,float32 -> float32 + float16,float32,float64 -> float64 + int32,uint8 -> int32 + + Parameters + ---------- + columns : array-like + Specific column order + + Returns + ------- + values : ndarray + If the DataFrame is heterogeneous and contains booleans or objects, + the result will be of dtype=object + """ + self._consolidate_inplace() + if self._AXIS_REVERSED: + return self._data.as_matrix(columns).T + return self._data.as_matrix(columns) @property def values(self): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 4ea5065a9a549..c545b84e4a7ea 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -108,11 +108,13 @@ def itemsize(self): def dtype(self): return self.values.dtype - def copy(self, deep=True): + def copy(self, deep=True, ref_items=None): values = self.values if deep: values = values.copy() - return make_block(values, self.items, self.ref_items) + if ref_items is None: + ref_items = self.ref_items + return make_block(values, self.items, ref_items) def merge(self, other): if not self.ref_items.equals(other.ref_items): @@ -933,6 +935,8 @@ def apply(self, f, *args, **kwargs): axes = kwargs.pop('axes',None) filter = kwargs.get('filter') + do_integrity_check = kwargs.pop('do_integrity_check',False) + result_blocks = [] for blk in self.blocks: if filter is not None: @@ -949,7 +953,7 @@ def apply(self, f, *args, **kwargs): result_blocks.extend(applied) else: result_blocks.append(applied) - bm = self.__class__(result_blocks, axes or self.axes) + bm = self.__class__(result_blocks, axes or self.axes, do_integrity_check=do_integrity_check) bm._consolidate_inplace() return bm @@ -1155,10 +1159,8 @@ def copy(self, deep=True): ------- copy : BlockManager """ - copy_blocks = [block.copy(deep=deep) for block in self.blocks] - # copy_axes = [ax.copy() for ax in self.axes] - copy_axes = list(self.axes) - return BlockManager(copy_blocks, copy_axes, do_integrity_check=False) + new_axes = list(self.axes) + return self.apply('copy', axes=new_axes, deep=deep, do_integrity_check=False) def as_matrix(self, items=None): if len(self.blocks) == 0: @@ -1444,7 +1446,7 @@ def _check_have(self, item): if item not in self.items: raise KeyError('no item named %s' % com.pprint_thing(item)) - def reindex_axis(self, new_axis, method=None, axis=0, copy=True): + def reindex_axis(self, new_axis, method=None, axis=0, fill_value=np.nan, copy=True): new_axis = _ensure_index(new_axis) cur_axis = self.axes[axis] @@ -1466,10 +1468,10 @@ def reindex_axis(self, new_axis, method=None, axis=0, copy=True): if method is not None: raise AssertionError('method argument not supported for ' 'axis == 0') - return self.reindex_items(new_axis) + return self.reindex_items(new_axis, copy=copy, fill_value=fill_value) new_axis, indexer = cur_axis.reindex(new_axis, method) - return self.reindex_indexer(new_axis, indexer, axis=axis) + return self.reindex_indexer(new_axis, indexer, axis=axis, fill_value=fill_value) def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=np.nan): """ @@ -1518,7 +1520,7 @@ def _reindex_indexer_items(self, new_items, indexer, fill_value): new_blocks.append(na_block) new_blocks = _consolidate(new_blocks, new_items) - return BlockManager(new_blocks, [new_items] + self.axes[1:]) + return BlockManager(new_blocks, [new_items] + list(self.axes[1:])) def reindex_items(self, new_items, copy=True, fill_value=np.nan): """ @@ -1528,7 +1530,7 @@ def reindex_items(self, new_items, copy=True, fill_value=np.nan): data = self if not data.is_consolidated(): data = data.consolidate() - return data.reindex_items(new_items) + return data.reindex_items(new_items, copy=copy, fill_value=fill_value) # TODO: this part could be faster (!) new_items, indexer = self.items.reindex(new_items) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 5037659c1432b..762885fea4bed 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -6,7 +6,7 @@ import operator import sys import numpy as np -from pandas.core.common import (PandasError, _mut_exclusive, +from pandas.core.common import (PandasError, _try_sort, _default_index, _infer_dtype_from_scalar, notnull) from pandas.core.categorical import Factor @@ -535,14 +535,6 @@ def _box_item_values(self, key, values): d = self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:]) return self._constructor_sliced(values, **d) - def __getattr__(self, name): - """After regular attribute access, try looking up the name of an item. - This allows simpler access to items for interactive use.""" - if name in self._info_axis: - return self[name] - raise AttributeError("'%s' object has no attribute '%s'" % - (type(self).__name__, name)) - def _slice(self, slobj, axis=0, raise_on_error=False): new_data = self._data.get_slice(slobj, axis=axis, raise_on_error=raise_on_error) return self._constructor(new_data) @@ -566,20 +558,6 @@ def __setitem__(self, key, value): mat = mat.reshape(tuple([1]) + shape[1:]) NDFrame._set_item(self, key, mat) - def __getstate__(self): - "Returned pickled representation of the panel" - return self._data - - def __setstate__(self, state): - # old Panel pickle - if isinstance(state, BlockManager): - self._data = state - elif len(state) == 4: # pragma: no cover - self._unpickle_panel_compat(state) - else: # pragma: no cover - raise ValueError('unrecognized pickle') - self._item_cache = {} - def _unpickle_panel_compat(self, state): # pragma: no cover "Unpickle the panel" _unpickle = com._unpickle_array @@ -612,62 +590,15 @@ def conform(self, frame, axis='items'): axes = self._get_plane_axes(axis) return frame.reindex(**self._extract_axes_for_slice(self, axes)) - def reindex(self, major=None, minor=None, method=None, - major_axis=None, minor_axis=None, copy=True, **kwargs): - """ - Conform panel to new axis or axes - - Parameters - ---------- - major : Index or sequence, default None - Can also use 'major_axis' keyword - items : Index or sequence, default None - minor : Index or sequence, default None - Can also use 'minor_axis' keyword - method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None - Method to use for filling holes in reindexed Series - - pad / ffill: propagate last valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap - copy : boolean, default True - Return a new object, even if the passed indexes are the same - - Returns - ------- - Panel (new object) - """ - result = self - - major = _mut_exclusive(major, major_axis) - minor = _mut_exclusive(minor, minor_axis) - al = self._AXIS_LEN - + def _needs_reindex_multi(self, axes, method, level): # only allowing multi-index on Panel (and not > dims) - if (method is None and not self._is_mixed_type and al <= 3): - items = kwargs.get('items') - if com._count_not_none(items, major, minor) == 3: - try: - return self._reindex_multi(items, major, minor) - except: - pass - - if major is not None: - result = result._reindex_axis(major, method, al - 2, copy) - - if minor is not None: - result = result._reindex_axis(minor, method, al - 1, copy) - - for i, a in enumerate(self._AXIS_ORDERS[0:al - 2]): - a = kwargs.get(a) - if a is not None: - result = result._reindex_axis(a, method, i, copy) - - if result is self and copy: - raise ValueError('Must specify at least one axis') + return method is None and not self._is_mixed_type and self._AXIS_LEN <= 3 and com._count_not_none(*axes.values()) == 3 - return result - - def _reindex_multi(self, items, major, minor): + def _reindex_multi(self, axes, copy, fill_value): + """ we are guaranteed non-Nones in the axes! """ + items = axes['items'] + major = axes['major_axis'] + minor = axes['minor_axis'] a0, a1, a2 = len(items), len(major), len(minor) values = self.values @@ -693,52 +624,6 @@ def _reindex_multi(self, items, major, minor): return Panel(new_values, items=new_items, major_axis=new_major, minor_axis=new_minor) - def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True): - """Conform Panel to new index with optional filling logic, placing - NA/NaN in locations having no value in the previous index. A new object - is produced unless the new index is equivalent to the current one and - copy=False - - Parameters - ---------- - index : array-like, optional - New labels / index to conform to. Preferably an Index object to - avoid duplicating data - axis : {0, 1} - 0 -> index (rows) - 1 -> columns - method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None - Method to use for filling holes in reindexed DataFrame - pad / ffill: propagate last valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap - copy : boolean, default True - Return a new object, even if the passed indexes are the same - level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - - Returns - ------- - reindexed : Panel - """ - self._consolidate_inplace() - return self._reindex_axis(labels, method, axis, copy) - - def reindex_like(self, other, method=None): - """ return an object with matching indicies to myself - - Parameters - ---------- - other : Panel - method : string or None - - Returns - ------- - reindexed : Panel - """ - d = other._construct_axes_dict(method=method) - return self.reindex(**d) - def dropna(self, axis=0, how='any'): """ Drop 2D from panel, holding passed axis constant @@ -761,7 +646,7 @@ def dropna(self, axis=0, how='any'): values = self.values mask = com.notnull(values) - for ax in reversed(sorted(set(range(3)) - set([axis]))): + for ax in reversed(sorted(set(range(self._AXIS_LEN)) - set([axis]))): mask = mask.sum(ax) per_slice = np.prod(values.shape[:axis] + values.shape[axis + 1:]) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8171e24e1b709..490f95802f945 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2521,30 +2521,6 @@ def reindex_axis(self, labels, axis=0, **kwargs): raise ValueError("cannot reindex series on non-zero axis!") return self.reindex(index=labels,**kwargs) - def reindex_like(self, other, method=None, limit=None, fill_value=pa.NA): - """ - Reindex Series to match index of another Series, optionally with - filling logic - - Parameters - ---------- - other : Series - method : string or None - See Series.reindex docstring - limit : int, default None - Maximum size gap to forward or backward fill - - Notes - ----- - Like calling s.reindex(other.index, method=...) - - Returns - ------- - reindexed : Series - """ - return self.reindex(other.index, method=method, limit=limit, - fill_value=fill_value) - def take(self, indices, axis=0, convert=True): """ Analogous to ndarray.take, return Series corresponding to requested diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 929d9182f35a9..b3c2e046c51a3 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -630,7 +630,7 @@ def append_to_multiple(self, d, value, selector, data_columns=None, axes=None, * dc = data_columns if k == selector else None # compute the val - val = value.reindex_axis(v, axis=axis, copy=False) + val = value.reindex_axis(v, axis=axis) self.append(k, val, data_columns=dc, **kwargs) @@ -2323,7 +2323,9 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, # reindex by our non_index_axes & compute data_columns for a in self.non_index_axes: - obj = obj.reindex_axis(a[1], axis=a[0], copy=False) + labels = _ensure_index(a[1]) + if not labels.equals(obj._get_axis(a[0])): + obj = obj.reindex_axis(labels, axis=a[0]) # get out blocks block_obj = self.get_object(obj) @@ -2338,10 +2340,10 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, data_columns = [c for c in data_columns if c in axis_labels] if len(data_columns): blocks = block_obj.reindex_axis(Index(axis_labels) - Index( - data_columns), axis=axis, copy=False)._data.blocks + data_columns), axis=axis)._data.blocks for c in data_columns: blocks.extend(block_obj.reindex_axis( - [c], axis=axis, copy=False)._data.blocks) + [c], axis=axis)._data.blocks) if blocks is None: blocks = block_obj._data.blocks @@ -2399,7 +2401,9 @@ def process_axes(self, obj, columns=None): for axis, labels in self.non_index_axes: if columns is not None: labels = Index(labels) & Index(columns) - obj = obj.reindex_axis(labels, axis=axis, copy=False) + labels = _ensure_index(labels) + if not labels.equals(obj._get_axis(axis)): + obj = obj.reindex_axis(labels, axis=axis) # apply the selection filters (but keep in the same order) if self.selection.filter: diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index a21ec40aa7eee..5bd5735035053 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -602,8 +602,11 @@ def _reindex_columns(self, columns, copy, level, fill_value, limit=None): return SparseDataFrame(sdict, index=self.index, columns=columns, default_fill_value=self.default_fill_value) - def _reindex_with_indexers(self, index, row_indexer, columns, col_indexer, - copy, fill_value): + def _reindex_with_indexers(self, reindexers, method=None, copy=False, fill_value=np.nan): + + index, row_indexer = reindexers.get(0,(None,None)) + columns, col_indexer = reindexers.get(1,(None, None)) + if columns is None: columns = self.columns diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 1247325e8c400..9726c415a3e85 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -6329,6 +6329,7 @@ def test_reindex_fill_value(self): assert_frame_equal(result, expected) def test_align(self): + af, bf = self.frame.align(self.frame) self.assert_(af._data is not self.frame._data) diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 93e9b07558319..253dfee5e1f96 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -326,8 +326,14 @@ def test_set_change_dtype(self): def test_copy(self): shallow = self.mgr.copy(deep=False) - for cp_blk, blk in zip(shallow.blocks, self.mgr.blocks): - self.assert_(cp_blk.values is blk.values) + # we don't guaranteee block ordering + for blk in self.mgr.blocks: + found = False + for cp_blk in shallow.blocks: + if cp_blk.values is blk.values: + found = True + break + self.assert_(found == True) def test_as_matrix_float(self): diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 5ea9144a7c7c2..f27a0adcee225 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -974,11 +974,13 @@ def test_reindex(self): major=self.panel.major_axis, minor=self.panel.minor_axis) - assert(result.items is self.panel.items) - assert(result.major_axis is self.panel.major_axis) - assert(result.minor_axis is self.panel.minor_axis) + self.assert_(result.items is self.panel.items) + self.assert_(result.major_axis is self.panel.major_axis) + self.assert_(result.minor_axis is self.panel.minor_axis) - self.assertRaises(Exception, self.panel.reindex) + # this ok + result = self.panel.reindex() + self.assert_(result == self.panel) # with filling smaller_major = self.panel.major_axis[::5] @@ -992,7 +994,8 @@ def test_reindex(self): # don't necessarily copy result = self.panel.reindex(major=self.panel.major_axis, copy=False) - self.assert_(result is self.panel) + assert_panel_equal(result,self.panel) + self.assert_((result is self.panel) == False) def test_reindex_like(self): # reindex_like diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index f2356273c019b..3cf0688b085bb 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -769,7 +769,9 @@ def test_reindex(self): assert(result.major_axis is self.panel4d.major_axis) assert(result.minor_axis is self.panel4d.minor_axis) - self.assertRaises(Exception, self.panel4d.reindex) + # don't necessarily copy + result = self.panel4d.reindex() + self.assert_(result is self.panel4d) # with filling smaller_major = self.panel4d.major_axis[::5] @@ -784,7 +786,8 @@ def test_reindex(self): # don't necessarily copy result = self.panel4d.reindex( major=self.panel4d.major_axis, copy=False) - self.assert_(result is self.panel4d) + assert_panel4d_equal(result,self.panel4d) + self.assert_((result is self.panel4d) == False) def test_reindex_like(self): # reindex_like From 604d2a550325118f6277fffc3cf4d1b4a38b8807 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 21 Feb 2013 14:31:23 -0500 Subject: [PATCH 3/6] BUG: reindex with called with no args will by default return a copy (fixed bug) --- RELEASE.rst | 1 + pandas/tests/test_frame.py | 5 +++++ pandas/tests/test_panel.py | 3 ++- pandas/tests/test_panel4d.py | 3 ++- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 66f644e81fe18..7a4454704acd5 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -177,6 +177,7 @@ pandas 0.11.0 - Bug showing up in applymap where some object type columns are converted (GH2909_) had an incorrect default in convert_objects + - Reindex called with no arguments will now return a copy of the input object - TimeDeltas diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 9726c415a3e85..f4842b4b27a7b 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -6242,6 +6242,11 @@ def test_reindex(self): newFrame = self.frame.reindex(list(self.ts1.index)) self.assert_(newFrame.index.equals(self.ts1.index)) + # copy with no axes + result = self.frame.reindex() + assert_frame_equal(result,self.frame) + self.assert_((result is self.frame) == False) + def test_reindex_name_remains(self): s = Series(random.rand(10)) df = DataFrame(s, index=np.arange(len(s))) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index f27a0adcee225..4aabf478fc9c8 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -980,7 +980,8 @@ def test_reindex(self): # this ok result = self.panel.reindex() - self.assert_(result == self.panel) + assert_panel_equal(result,self.panel) + self.assert_((result is self.panel) == False) # with filling smaller_major = self.panel.major_axis[::5] diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 3cf0688b085bb..887943ae48a8e 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -771,7 +771,8 @@ def test_reindex(self): # don't necessarily copy result = self.panel4d.reindex() - self.assert_(result is self.panel4d) + assert_panel4d_equal(result,self.panel4d) + self.assert_((result is self.panel4d) == False) # with filling smaller_major = self.panel4d.major_axis[::5] From c3335f2fb7f2b2d63ac9b1ee98b28ad9f8cbaf03 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 21 Feb 2013 15:22:45 -0500 Subject: [PATCH 4/6] ENH: moved filter and added axis arg moved where,mask,align TST: make reindex benchmarks longer CLN: fixed up names for creation in panelnd.py DOC: minor release notes changes --- RELEASE.rst | 15 +- pandas/core/frame.py | 271 +---------------------------- pandas/core/generic.py | 329 ++++++++++++++++++++++++++++++----- pandas/core/panel.py | 19 -- pandas/core/panel4d.py | 12 +- pandas/core/panelnd.py | 36 ++-- pandas/core/series.py | 2 +- pandas/tests/test_frame.py | 11 +- pandas/tests/test_panel.py | 9 +- pandas/tests/test_panelnd.py | 50 +++--- vb_suite/frame_methods.py | 4 +- 11 files changed, 360 insertions(+), 398 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 7a4454704acd5..15638ce09599f 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -131,10 +131,11 @@ pandas 0.11.0 - arguments to DataFrame.clip were inconsistent to numpy and Series clipping (GH2747_) - Refactor of frame.py/panel.py to move common code to generic.py - all axis creation and manipulation code is now common (except for Series) + all axis creation code is common (including Series), most common + code is moved to generic.py - added _setup_axes to created generic NDFrame structures - - moved methods (some methods moved from series as well) + - moved methods - from_axes,_wrap_array,axes,ix,shape,empty,swapaxes,transpose,pop - __str__,__bytes__,__repr__ @@ -142,14 +143,17 @@ pandas 0.11.0 - convert_objects,as_blocks,as_matrix,values - __getstate__,__setstate__ (though compat remains in frame/panel) - __getattr__,__setattr__ - - _indexed_same,reindex_like,reindex - (sparse.py required some changes) + - _indexed_same,reindex_like,reindex,align,where,mask + - filter (also added axis argument to selectively filter on a different axis) + - reindex,reindex_axis (which was the biggest change to make generic) + - truncate (moved to become part of PandasObject) These are API changes which make Panel more consistent with DataFrame - swapaxes on a Panel with the same axes specified now return a copy - support attribute access for setting + - filter supports same api as original DataFrame filter - + - Reindex called with no arguments will now return a copy of the input object **Bug Fixes** @@ -177,7 +181,6 @@ pandas 0.11.0 - Bug showing up in applymap where some object type columns are converted (GH2909_) had an incorrect default in convert_objects - - Reindex called with no arguments will now return a copy of the input object - TimeDeltas diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6a9bf5d52992f..94e2181de3773 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -691,7 +691,7 @@ def itertuples(self, index=True): items = iteritems def __len__(self): - """ we are reversed, so shortcut this here """ + """Returns length of info axis, but here we use the index """ return len(self.index) #---------------------------------------------------------------------- @@ -1501,50 +1501,6 @@ def info(self, verbose=True, buf=None, max_cols=None): def dtypes(self): return self.apply(lambda x: x.dtype) - def convert_objects(self, convert_dates=True, convert_numeric=False): - """ - Attempt to infer better dtype for object columns - Always returns a copy (even if no object columns) - - Parameters - ---------- - convert_dates : if True, attempt to soft convert_dates, if 'coerce', force conversion (and non-convertibles get NaT) - convert_numeric : if True attempt to coerce to numerbers (including strings), non-convertibles get NaN - - Returns - ------- - converted : DataFrame - """ - return self._constructor(self._data.convert(convert_dates=convert_dates, convert_numeric=convert_numeric)) - - def as_matrix(self, columns=None): - """ - Convert the frame to its Numpy-array matrix representation. Columns - are presented in sorted order unless a specific list of columns is - provided. - - NOTE: the dtype will be a lower-common-denominator dtype (implicit upcasting) - that is to say if the dtypes (even of numeric types) are mixed, the one that accomodates all will be chosen - use this with care if you are not dealing with the blocks - - e.g. if the dtypes are float16,float32 -> float32 - float16,float32,float64 -> float64 - int32,uint8 -> int32 - - Parameters - ---------- - columns : array-like - Specific column order - - Returns - ------- - values : ndarray - If the DataFrame is heterogeneous and contains booleans or objects, - the result will be of dtype=object - """ - self._consolidate_inplace() - return self._data.as_matrix(columns).T - def transpose(self): return super(DataFrame, self).transpose(1,0) @@ -2107,125 +2063,6 @@ def lookup(self, row_labels, col_labels): #---------------------------------------------------------------------- # Reindexing and alignment - def align(self, other, join='outer', axis=None, level=None, copy=True, - fill_value=NA, method=None, limit=None, fill_axis=0): - """ - Align two DataFrame object on their index and columns with the - specified join method for each axis Index - - Parameters - ---------- - other : DataFrame or Series - join : {'outer', 'inner', 'left', 'right'}, default 'outer' - axis : {0, 1, None}, default None - Align on index (0), columns (1), or both (None) - level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - copy : boolean, default True - Always returns new objects. If copy=False and no reindexing is - required then original objects are returned. - fill_value : scalar, default np.NaN - Value to use for missing values. Defaults to NaN, but can be any - "compatible" value - method : str, default None - limit : int, default None - fill_axis : {0, 1}, default 0 - Filling axis, method and limit - - Returns - ------- - (left, right) : (DataFrame, type of other) - Aligned objects - """ - if isinstance(other, DataFrame): - return self._align_frame(other, join=join, axis=axis, level=level, - copy=copy, fill_value=fill_value, - method=method, limit=limit, - fill_axis=fill_axis) - elif isinstance(other, Series): - return self._align_series(other, join=join, axis=axis, level=level, - copy=copy, fill_value=fill_value, - method=method, limit=limit, - fill_axis=fill_axis) - else: # pragma: no cover - raise TypeError('unsupported type: %s' % type(other)) - - def _align_frame(self, other, join='outer', axis=None, level=None, - copy=True, fill_value=NA, method=None, limit=None, - fill_axis=0): - # defaults - join_index, join_columns = None, None - ilidx, iridx = None, None - clidx, cridx = None, None - - if axis is None or axis == 0: - if not self.index.equals(other.index): - join_index, ilidx, iridx = \ - self.index.join(other.index, how=join, level=level, - return_indexers=True) - - if axis is None or axis == 1: - if not self.columns.equals(other.columns): - join_columns, clidx, cridx = \ - self.columns.join(other.columns, how=join, level=level, - return_indexers=True) - - left = self._reindex_with_indexers({ 0 : [ join_index, ilidx ], - 1 : [ join_columns, clidx ] }, - copy=copy, fill_value=fill_value) - right = other._reindex_with_indexers({ 0 : [ join_index, iridx ], - 1 : [ join_columns, cridx ] }, - copy=copy, fill_value=fill_value) - - - if method is not None: - left = left.fillna(axis=fill_axis, method=method, limit=limit) - right = right.fillna(axis=fill_axis, method=method, limit=limit) - - return left, right - - def _align_series(self, other, join='outer', axis=None, level=None, - copy=True, fill_value=None, method=None, limit=None, - fill_axis=0): - fdata = self._data - if axis == 0: - join_index = self.index - lidx, ridx = None, None - if not self.index.equals(other.index): - join_index, lidx, ridx = self.index.join(other.index, how=join, - return_indexers=True) - - if lidx is not None: - fdata = fdata.reindex_indexer(join_index, lidx, axis=1) - elif axis == 1: - join_index = self.columns - lidx, ridx = None, None - if not self.columns.equals(other.index): - join_index, lidx, ridx = \ - self.columns.join(other.index, how=join, - return_indexers=True) - - if lidx is not None: - fdata = fdata.reindex_indexer(join_index, lidx, axis=0) - else: - raise ValueError('Must specify axis=0 or 1') - - if copy and fdata is self._data: - fdata = fdata.copy() - - left_result = DataFrame(fdata) - right_result = other if ridx is None else other.reindex(join_index) - - fill_na = notnull(fill_value) or (method is not None) - if fill_na: - return (left_result.fillna(fill_value, method=method, limit=limit, - axis=fill_axis), - right_result.fillna(fill_value, method=method, - limit=limit)) - else: - return left_result, right_result - def _reindex_axes(self, axes, level, limit, method, fill_value, copy): frame = self @@ -2273,8 +2110,6 @@ def _reindex_multi(self, axes, copy, fill_value): else: return self.copy() if copy else self - truncate = generic.truncate - def set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False): """ @@ -2514,40 +2349,6 @@ def take(self, indices, axis=0, convert=True): #---------------------------------------------------------------------- # Reindex-based selection methods - def filter(self, items=None, like=None, regex=None): - """ - Restrict frame's columns to set of items or wildcard - - Parameters - ---------- - items : list-like - List of columns to restrict to (must not all be present) - like : string - Keep columns where "arg in col == True" - regex : string (regular expression) - Keep columns with re.search(regex, col) == True - - Notes - ----- - Arguments are mutually exclusive, but this is not checked for - - Returns - ------- - DataFrame with filtered columns - """ - import re - if items is not None: - return self.reindex(columns=[r for r in items if r in self]) - elif like: - matchf = lambda x: (like in x if isinstance(x, basestring) - else like in str(x)) - return self.select(matchf, axis=1) - elif regex: - matcher = re.compile(regex) - return self.select(lambda x: matcher.search(x) is not None, axis=1) - else: - raise ValueError('items was None!') - def dropna(self, axis=0, how='any', thresh=None, subset=None): """ Return object with labels on given axis omitted where alternately any @@ -4800,76 +4601,6 @@ def combineMult(self, other): """ return self.mul(other, fill_value=1.) - def where(self, cond, other=NA, inplace=False, try_cast=False, raise_on_error=True): - """ - Return a DataFrame with the same shape as self and whose corresponding - entries are from self where cond is True and otherwise are from other. - - Parameters - ---------- - cond : boolean DataFrame or array - other : scalar or DataFrame - inplace : boolean, default False - Whether to perform the operation in place on the data - try_cast : boolean, default False - try to cast the result back to the input type (if possible), - raise_on_error : boolean, default True - Whether to raise on invalid data types (e.g. trying to where on - strings) - - Returns - ------- - wh : DataFrame - """ - if isinstance(cond, DataFrame): - # this already checks for index/column equality - cond = cond.reindex(self.index, columns=self.columns) - else: - if not hasattr(cond, 'shape'): - raise ValueError('where requires an ndarray like object for its ' - 'condition') - if cond.shape != self.shape: - raise ValueError('Array conditional must be same shape as self') - cond = self._constructor(cond, index=self.index, - columns=self.columns) - - if inplace: - cond = -(cond.fillna(True).astype(bool)) - else: - cond = cond.fillna(False).astype(bool) - - if isinstance(other, DataFrame): - _, other = self.align(other, join='left', fill_value=NA) - elif isinstance(other,np.ndarray): - if other.shape != self.shape: - raise ValueError('other must be the same shape as self ' - 'when an ndarray') - other = DataFrame(other, self.index, self.columns) - - if inplace: - # we may have different type blocks come out of putmask, so reconstruct the block manager - self._data = self._data.putmask(cond,other,inplace=True) - - else: - new_data = self._data.where(other, cond, raise_on_error=raise_on_error, try_cast=try_cast) - - return self._constructor(new_data) - - def mask(self, cond): - """ - Returns copy of self whose values are replaced with nan if the - inverted condition is True - - Parameters - ---------- - cond: boolean DataFrame or array - - Returns - ------- - wh: DataFrame - """ - return self.where(~cond, NA) - DataFrame._setup_axes(['index', 'columns'], info_axis=1, stat_axis=0, axes_are_reversed=True) _EMPTY_SERIES = Series([]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a1d6eb61b34ce..fd292b617dfea 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -177,6 +177,8 @@ def _get_axis(self, axis): def _get_block_manager_axis(self, axis): """ map the axis to the block_manager axis """ + if axis not in self._AXIS_NAMES: + raise Exception("this object does not support an axis of [%s]" % axis) if self._AXIS_REVERSED: m = self._AXIS_LEN-1 return m-axis @@ -559,13 +561,14 @@ def select(self, crit, axis=0): ------- selection : type of caller """ - axis_name = self._get_axis_name(axis) - axis = self._get_axis(axis) + axis = self._get_axis_number(axis) + axis_name = self._get_axis_name(axis) + axis_values = self._get_axis(axis) - if len(axis) > 0: - new_axis = axis[np.asarray([bool(crit(label)) for label in axis])] + if len(axis_values) > 0: + new_axis = axis_values[np.asarray([bool(crit(label)) for label in axis_values])] else: - new_axis = axis + new_axis = axis_values return self.reindex(**{axis_name: new_axis}) @@ -697,6 +700,40 @@ def tshift(self, periods=1, freq=None, **kwds): return self.shift(periods, freq, **kwds) + def truncate(self, before=None, after=None, copy=True): + """Function truncate a sorted DataFrame / Series before and/or after + some particular dates. + + Parameters + ---------- + before : date + Truncate before date + after : date + Truncate after date + + Returns + ------- + truncated : type of caller + """ + from pandas.tseries.tools import to_datetime + before = to_datetime(before) + after = to_datetime(after) + + if before is not None and after is not None: + if before > after: + raise AssertionError('Truncate: %s must be after %s' % + (before, after)) + + result = self.ix[before:after] + + if isinstance(self.index, MultiIndex): + result.index = self.index.truncate(before, after) + + if copy: + result = result.copy() + + return result + def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, **kwds): """ @@ -1188,6 +1225,46 @@ def _reindex_axis(self, new_index, fill_method, axis, copy): else: return self._constructor(new_data) + def filter(self, items=None, like=None, regex=None, axis=None): + """ + Restrict the info axis to set of items or wildcard + + Parameters + ---------- + items : list-like + List of info axis to restrict to (must not all be present) + like : string + Keep info axis where "arg in col == True" + regex : string (regular expression) + Keep info axis with re.search(regex, col) == True + + Notes + ----- + Arguments are mutually exclusive, but this is not checked for + + Returns + ------- + same type as input object with filtered info axis + """ + import re + + if axis is None: + axis = self._info_axis_name + axis_name = self._get_axis_name(axis) + axis_values = self._get_axis(axis_name) + + if items is not None: + return self.reindex(**{ axis_name : [r for r in items if r in axis_values ] }) + elif like: + matchf = lambda x: (like in x if isinstance(x, basestring) + else like in str(x)) + return self.select(matchf, axis=axis_name) + elif regex: + matcher = re.compile(regex) + return self.select(lambda x: matcher.search(x) is not None, axis=axis_name) + else: + raise ValueError('items was None!') + #---------------------------------------------------------------------- # Attribute access @@ -1346,7 +1423,7 @@ def astype(self, dtype, copy = True, raise_on_error = True): mgr = self._data.astype(dtype, copy = copy, raise_on_error = raise_on_error) return self._constructor(mgr) - def convert_objects(self, convert_dates=True, convert_numeric=True): + def convert_objects(self, convert_dates=True, convert_numeric=False): """ Attempt to infer better dtype for object columns Always returns a copy (even if no object columns) @@ -1358,10 +1435,212 @@ def convert_objects(self, convert_dates=True, convert_numeric=True): Returns ------- - converted : DataFrame + converted : asm as input object """ return self._constructor(self._data.convert(convert_dates=convert_dates, convert_numeric=convert_numeric)) + def align(self, other, join='outer', axis=None, level=None, copy=True, + fill_value=np.nan, method=None, limit=None, fill_axis=0): + """ + Align two object on their axes with the + specified join method for each axis Index + + Parameters + ---------- + other : DataFrame or Series + join : {'outer', 'inner', 'left', 'right'}, default 'outer' + axis : allowed axis of the other object, default None + Align on index (0), columns (1), or both (None) + level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + copy : boolean, default True + Always returns new objects. If copy=False and no reindexing is + required then original objects are returned. + fill_value : scalar, default np.NaN + Value to use for missing values. Defaults to NaN, but can be any + "compatible" value + method : str, default None + limit : int, default None + fill_axis : {0, 1}, default 0 + Filling axis, method and limit + + Returns + ------- + (left, right) : (type of input, type of other) + Aligned objects + """ + from pandas import DataFrame,Series + + if isinstance(other, DataFrame): + return self._align_frame(other, join=join, axis=axis, level=level, + copy=copy, fill_value=fill_value, + method=method, limit=limit, + fill_axis=fill_axis) + elif isinstance(other, Series): + return self._align_series(other, join=join, axis=axis, level=level, + copy=copy, fill_value=fill_value, + method=method, limit=limit, + fill_axis=fill_axis) + else: # pragma: no cover + raise TypeError('unsupported type: %s' % type(other)) + + def _align_frame(self, other, join='outer', axis=None, level=None, + copy=True, fill_value=np.nan, method=None, limit=None, + fill_axis=0): + # defaults + join_index, join_columns = None, None + ilidx, iridx = None, None + clidx, cridx = None, None + + if axis is None or axis == 0: + if not self.index.equals(other.index): + join_index, ilidx, iridx = \ + self.index.join(other.index, how=join, level=level, + return_indexers=True) + + if axis is None or axis == 1: + if not self.columns.equals(other.columns): + join_columns, clidx, cridx = \ + self.columns.join(other.columns, how=join, level=level, + return_indexers=True) + + left = self._reindex_with_indexers({ 0 : [ join_index, ilidx ], + 1 : [ join_columns, clidx ] }, + copy=copy, fill_value=fill_value) + right = other._reindex_with_indexers({ 0 : [ join_index, iridx ], + 1 : [ join_columns, cridx ] }, + copy=copy, fill_value=fill_value) + + + if method is not None: + left = left.fillna(axis=fill_axis, method=method, limit=limit) + right = right.fillna(axis=fill_axis, method=method, limit=limit) + + return left, right + + def _align_series(self, other, join='outer', axis=None, level=None, + copy=True, fill_value=None, method=None, limit=None, + fill_axis=0): + from pandas import DataFrame + + fdata = self._data + if axis == 0: + join_index = self.index + lidx, ridx = None, None + if not self.index.equals(other.index): + join_index, lidx, ridx = self.index.join(other.index, how=join, + return_indexers=True) + + if lidx is not None: + fdata = fdata.reindex_indexer(join_index, lidx, axis=1) + elif axis == 1: + join_index = self.columns + lidx, ridx = None, None + if not self.columns.equals(other.index): + join_index, lidx, ridx = \ + self.columns.join(other.index, how=join, + return_indexers=True) + + if lidx is not None: + fdata = fdata.reindex_indexer(join_index, lidx, axis=0) + else: + raise ValueError('Must specify axis=0 or 1') + + if copy and fdata is self._data: + fdata = fdata.copy() + + left_result = DataFrame(fdata) + right_result = other if ridx is None else other.reindex(join_index) + + fill_na = notnull(fill_value) or (method is not None) + if fill_na: + return (left_result.fillna(fill_value, method=method, limit=limit, + axis=fill_axis), + right_result.fillna(fill_value, method=method, + limit=limit)) + else: + return left_result, right_result + + def where(self, cond, other=np.nan, inplace=False, try_cast=False, raise_on_error=True): + """ + Return an object of same shape as self and whose corresponding + entries are from self where cond is True and otherwise are from other. + + Parameters + ---------- + cond : boolean DataFrame or array + other : scalar or DataFrame + inplace : boolean, default False + Whether to perform the operation in place on the data + try_cast : boolean, default False + try to cast the result back to the input type (if possible), + raise_on_error : boolean, default True + Whether to raise on invalid data types (e.g. trying to where on + strings) + + Returns + ------- + wh : DataFrame + """ + if isinstance(cond, NDFrame): + cond = cond.reindex(**self._construct_axes_dict()) + else: + if not hasattr(cond, 'shape'): + raise ValueError('where requires an ndarray like object for its ' + 'condition') + if cond.shape != self.shape: + raise ValueError('Array conditional must be same shape as self') + cond = self._constructor(cond, index=self.index, + columns=self.columns) + + if inplace: + cond = -(cond.fillna(True).astype(bool)) + else: + cond = cond.fillna(False).astype(bool) + + # try to align + if hasattr(other, 'align'): + + # align with me + if other.ndim <= self.ndim: + + _, other = self.align(other, join='left', fill_value=np.nan) + + # slice me out of the other + else: + raise NotImplemented + + elif isinstance(other,np.ndarray): + if other.shape != self.shape: + raise ValueError('other must be the same shape as self ' + 'when an ndarray') + other = self._constructor(other, **self._construct_axes_dict()) + + if inplace: + # we may have different type blocks come out of putmask, so reconstruct the block manager + self._data = self._data.putmask(cond,other,inplace=True) + + else: + new_data = self._data.where(other, cond, raise_on_error=raise_on_error, try_cast=try_cast) + + return self._constructor(new_data) + + def mask(self, cond): + """ + Returns copy of self whose values are replaced with nan if the + inverted condition is True + + Parameters + ---------- + cond: boolean object or array + + Returns + ------- + wh: same as input + """ + return self.where(~cond, np.nan) + def cumsum(self, axis=None, skipna=True): """ Return DataFrame of cumulative sums over requested axis. @@ -1653,39 +1932,3 @@ def tz_localize(self, tz, axis=0, copy=True): return new_obj -# Good for either Series or DataFrame - - -def truncate(self, before=None, after=None, copy=True): - """Function truncate a sorted DataFrame / Series before and/or after - some particular dates. - - Parameters - ---------- - before : date - Truncate before date - after : date - Truncate after date - - Returns - ------- - truncated : type of caller - """ - from pandas.tseries.tools import to_datetime - before = to_datetime(before) - after = to_datetime(after) - - if before is not None and after is not None: - if before > after: - raise AssertionError('Truncate: %s must be after %s' % - (before, after)) - - result = self.ix[before:after] - - if isinstance(self.index, MultiIndex): - result.index = self.index.truncate(before, after) - - if copy: - result = result.copy() - - return result diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 762885fea4bed..f01c8e4a63a75 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -887,21 +887,6 @@ def to_frame(self, filter_observations=True): to_long = deprecate('to_long', to_frame) toLong = deprecate('toLong', to_frame) - def filter(self, items): - """ - Restrict items in panel to input list - - Parameters - ---------- - items : sequence - - Returns - ------- - y : Panel - """ - intersection = self.items.intersection(items) - return self.reindex(items=intersection) - def apply(self, func, axis='major'): """ Apply @@ -1360,10 +1345,6 @@ def min(self, axis='major', skipna=True): LongPanel = DataFrame -def _monotonic(arr): - return not (arr[1:] < arr[:-1]).any() - - def install_ipython_completers(): # pragma: no cover """Register the Panel type with IPython's tab completion machinery, so that it knows about accessing column names as attributes.""" diff --git a/pandas/core/panel4d.py b/pandas/core/panel4d.py index dfe9294e06c08..552b0f7fe1bd1 100644 --- a/pandas/core/panel4d.py +++ b/pandas/core/panel4d.py @@ -4,14 +4,14 @@ from pandas.core.panel import Panel Panel4D = create_nd_panel_factory( - klass_name = 'Panel4D', - axis_orders = ['labels', 'items', 'major_axis', 'minor_axis'], - axis_slices = {'labels': 'labels', 'items': 'items', + klass_name = 'Panel4D', + orders = ['labels', 'items', 'major_axis', 'minor_axis'], + slices = {'labels': 'labels', 'items': 'items', 'major_axis': 'major_axis', 'minor_axis': 'minor_axis'}, - slicer = Panel, - axis_aliases = {'major': 'major_axis', 'minor': 'minor_axis'}, - stat_axis = 2) + slicer = Panel, + aliases = {'major': 'major_axis', 'minor': 'minor_axis'}, + stat_axis = 2) def panel4d_init(self, data=None, labels=None, items=None, major_axis=None, diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py index ce9b43aabaa5b..2d3048d7f4033 100644 --- a/pandas/core/panelnd.py +++ b/pandas/core/panelnd.py @@ -3,20 +3,20 @@ import pandas.lib as lib -def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_aliases=None, stat_axis=2): +def create_nd_panel_factory(klass_name, orders, slices, slicer, aliases=None, stat_axis=2, info_axis=0): """ manufacture a n-d class: parameters ---------- - klass_name : the klass name - axis_orders : the names of the axes in order (highest to lowest) - axis_slices : a dictionary that defines how the axes map to the sliced axis - slicer : the class representing a slice of this panel - axis_aliases: a dictionary defining aliases for various axes + klass_name : the klass name + orders : the names of the axes in order (highest to lowest) + slices : a dictionary that defines how the axes map to the sliced axis + slicer : the class representing a slice of this panel + aliases : a dictionary defining aliases for various axes default = { major : major_axis, minor : minor_axis } - stat_axis : the default statistic axis + stat_axis : the default statistic axis default = 2 - het_axis : the info axis + info_axis : the info axis returns @@ -37,23 +37,15 @@ def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_a # build the klass klass = type(klass_name, (slicer,), {}) - # add the class variables - klass._AXIS_ORDERS = axis_orders - klass._AXIS_NUMBERS = dict([(a, i) for i, a in enumerate(axis_orders)]) - klass._AXIS_ALIASES = axis_aliases or dict() - klass._AXIS_NAMES = dict([(i, a) for i, a in enumerate(axis_orders)]) - klass._AXIS_SLICEMAP = axis_slices - klass._AXIS_LEN = len(axis_orders) - klass._default_stat_axis = stat_axis - klass._het_axis = 0 - klass._info_axis = axis_orders[klass._het_axis] + # setup the axes + klass._setup_axes(axes = orders, + info_axis = info_axis, + stat_axis = stat_axis, + aliases = aliases, + slicers = slices) klass._constructor_sliced = slicer - # add the axes - for i, a in enumerate(axis_orders): - setattr(klass, a, lib.AxisProperty(i)) - #### define the methods #### def __init__(self, *args, **kwargs): if not (kwargs.get('data') or len(args)): diff --git a/pandas/core/series.py b/pandas/core/series.py index 490f95802f945..c7a99a13dcc81 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2540,7 +2540,7 @@ def take(self, indices, axis=0, convert=True): new_values = self.values.take(indices) return Series(new_values, index=new_index, name=self.name) - truncate = generic.truncate + truncate = generic.PandasObject.truncate def fillna(self, value=None, method=None, inplace=False, limit=None): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index f4842b4b27a7b..61b0ca3583db6 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -7172,11 +7172,20 @@ def test_applymap(self): def test_filter(self): # items - filtered = self.frame.filter(['A', 'B', 'E']) self.assertEqual(len(filtered.columns), 2) self.assert_('E' not in filtered) + filtered = self.frame.filter(['A', 'B', 'E'], axis='columns') + self.assertEqual(len(filtered.columns), 2) + self.assert_('E' not in filtered) + + # other axis + idx = self.frame.index[0:4] + filtered = self.frame.filter(idx, axis='index') + expected = self.frame.reindex(index=idx) + assert_frame_equal(filtered,expected) + # like fcopy = self.frame.copy() fcopy['AA'] = 1 diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 4aabf478fc9c8..29cce8c79a0bf 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1703,15 +1703,18 @@ def test_pivot(self): def test_monotonic(): pos = np.array([1, 2, 3, 5]) - assert panelm._monotonic(pos) + def _monotonic(arr): + return not (arr[1:] < arr[:-1]).any() + + assert _monotonic(pos) neg = np.array([1, 2, 3, 4, 3]) - assert not panelm._monotonic(neg) + assert not _monotonic(neg) neg2 = np.array([5, 1, 2, 3, 4, 5]) - assert not panelm._monotonic(neg2) + assert not _monotonic(neg2) def test_panel_index(): diff --git a/pandas/tests/test_panelnd.py b/pandas/tests/test_panelnd.py index 5675cfec58678..d055706e2525c 100644 --- a/pandas/tests/test_panelnd.py +++ b/pandas/tests/test_panelnd.py @@ -29,11 +29,11 @@ def test_4d_construction(self): # create a 4D Panel4D = panelnd.create_nd_panel_factory( klass_name='Panel4D', - axis_orders=['labels', 'items', 'major_axis', 'minor_axis'], - axis_slices={'items': 'items', 'major_axis': 'major_axis', - 'minor_axis': 'minor_axis'}, + orders=['labels', 'items', 'major_axis', 'minor_axis'], + slices={'items': 'items', 'major_axis': 'major_axis', + 'minor_axis': 'minor_axis'}, slicer=Panel, - axis_aliases={'major': 'major_axis', 'minor': 'minor_axis'}, + aliases={'major': 'major_axis', 'minor': 'minor_axis'}, stat_axis=2) p4d = Panel4D(dict(L1=tm.makePanel(), L2=tm.makePanel())) @@ -43,11 +43,11 @@ def test_4d_construction_alt(self): # create a 4D Panel4D = panelnd.create_nd_panel_factory( klass_name='Panel4D', - axis_orders=['labels', 'items', 'major_axis', 'minor_axis'], - axis_slices={'items': 'items', 'major_axis': 'major_axis', - 'minor_axis': 'minor_axis'}, + orders=['labels', 'items', 'major_axis', 'minor_axis'], + slices={'items': 'items', 'major_axis': 'major_axis', + 'minor_axis': 'minor_axis'}, slicer='Panel', - axis_aliases={'major': 'major_axis', 'minor': 'minor_axis'}, + aliases={'major': 'major_axis', 'minor': 'minor_axis'}, stat_axis=2) p4d = Panel4D(dict(L1=tm.makePanel(), L2=tm.makePanel())) @@ -58,14 +58,14 @@ def test_4d_construction_error(self): self.assertRaises(Exception, panelnd.create_nd_panel_factory, klass_name='Panel4D', - axis_orders=['labels', 'items', 'major_axis', - 'minor_axis'], - axis_slices={'items': 'items', - 'major_axis': 'major_axis', - 'minor_axis': 'minor_axis'}, + orders=['labels', 'items', 'major_axis', + 'minor_axis'], + slices={'items': 'items', + 'major_axis': 'major_axis', + 'minor_axis': 'minor_axis'}, slicer='foo', - axis_aliases={'major': 'major_axis', - 'minor': 'minor_axis'}, + aliases={'major': 'major_axis', + 'minor': 'minor_axis'}, stat_axis=2) def test_5d_construction(self): @@ -73,11 +73,11 @@ def test_5d_construction(self): # create a 4D Panel4D = panelnd.create_nd_panel_factory( klass_name='Panel4D', - axis_orders=['labels1', 'items', 'major_axis', 'minor_axis'], - axis_slices={'items': 'items', 'major_axis': 'major_axis', - 'minor_axis': 'minor_axis'}, + orders=['labels1', 'items', 'major_axis', 'minor_axis'], + slices={'items': 'items', 'major_axis': 'major_axis', + 'minor_axis': 'minor_axis'}, slicer=Panel, - axis_aliases={'major': 'major_axis', 'minor': 'minor_axis'}, + aliases={'major': 'major_axis', 'minor': 'minor_axis'}, stat_axis=2) p4d = Panel4D(dict(L1=tm.makePanel(), L2=tm.makePanel())) @@ -85,13 +85,13 @@ def test_5d_construction(self): # create a 5D Panel5D = panelnd.create_nd_panel_factory( klass_name='Panel5D', - axis_orders=['cool1', 'labels1', 'items', 'major_axis', - 'minor_axis'], - axis_slices={'labels1': 'labels1', 'items': 'items', - 'major_axis': 'major_axis', - 'minor_axis': 'minor_axis'}, + orders=['cool1', 'labels1', 'items', 'major_axis', + 'minor_axis'], + slices={'labels1': 'labels1', 'items': 'items', + 'major_axis': 'major_axis', + 'minor_axis': 'minor_axis'}, slicer=Panel4D, - axis_aliases={'major': 'major_axis', 'minor': 'minor_axis'}, + aliases={'major': 'major_axis', 'minor': 'minor_axis'}, stat_axis=2) p5d = Panel5D(dict(C1=p4d)) diff --git a/vb_suite/frame_methods.py b/vb_suite/frame_methods.py index e8e8813af9890..59bf04ff6c580 100644 --- a/vb_suite/frame_methods.py +++ b/vb_suite/frame_methods.py @@ -40,8 +40,8 @@ # reindex both axes setup = common_setup + """ -df = DataFrame(randn(1000, 1000)) -idx = np.arange(400, 700) +df = DataFrame(randn(10000, 10000)) +idx = np.arange(4000, 7000) """ frame_reindex_axis0 = Benchmark('df.reindex(idx)', setup) From febd8dcf48635e8be6463f29279584b37774a804 Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 12 Mar 2013 12:16:42 -0400 Subject: [PATCH 5/6] DOC: release notes for this to 0.12.0 --- RELEASE.rst | 60 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 15638ce09599f..4fdcaad8a3eb4 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -22,6 +22,42 @@ Where to get it * Binary installers on PyPI: http://pypi.python.org/pypi/pandas * Documentation: http://pandas.pydata.org +pandas 0.12.0 +============= + +**Release date:** 2013-??-?? + +**New features** + +**Improvements to existing features** + +**API Changes** + + - Refactor of frame.py/panel.py to move common code to generic.py + all axis creation code is common (including Series), most common + code is moved to generic.py + + - added _setup_axes to created generic NDFrame structures + - moved methods + + - from_axes,_wrap_array,axes,ix,shape,empty,swapaxes,transpose,pop + - __str__,__bytes__,__repr__ + - __iter__,keys,__contains__,__len__,__neg__,__invert__ + - convert_objects,as_blocks,as_matrix,values + - __getstate__,__setstate__ (though compat remains in frame/panel) + - __getattr__,__setattr__ + - _indexed_same,reindex_like,reindex,align,where,mask + - filter (also added axis argument to selectively filter on a different axis) + - reindex,reindex_axis (which was the biggest change to make generic) + - truncate (moved to become part of PandasObject) + + These are API changes which make Panel more consistent with DataFrame + - swapaxes on a Panel with the same axes specified now return a copy + - support attribute access for setting + - filter supports same api as original DataFrame filter + + - Reindex called with no arguments will now return a copy of the input object + pandas 0.11.0 ============= @@ -130,30 +166,6 @@ pandas 0.11.0 - arguments to DataFrame.clip were inconsistent to numpy and Series clipping (GH2747_) - - Refactor of frame.py/panel.py to move common code to generic.py - all axis creation code is common (including Series), most common - code is moved to generic.py - - - added _setup_axes to created generic NDFrame structures - - moved methods - - - from_axes,_wrap_array,axes,ix,shape,empty,swapaxes,transpose,pop - - __str__,__bytes__,__repr__ - - __iter__,keys,__contains__,__len__,__neg__,__invert__ - - convert_objects,as_blocks,as_matrix,values - - __getstate__,__setstate__ (though compat remains in frame/panel) - - __getattr__,__setattr__ - - _indexed_same,reindex_like,reindex,align,where,mask - - filter (also added axis argument to selectively filter on a different axis) - - reindex,reindex_axis (which was the biggest change to make generic) - - truncate (moved to become part of PandasObject) - - These are API changes which make Panel more consistent with DataFrame - - swapaxes on a Panel with the same axes specified now return a copy - - support attribute access for setting - - filter supports same api as original DataFrame filter - - - Reindex called with no arguments will now return a copy of the input object **Bug Fixes** From 16a27f3428b2f2b2cbe9fc515bb7d0b64861f1f3 Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 15 Mar 2013 16:08:15 -0400 Subject: [PATCH 6/6] MERGE --- pandas/core/frame.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 94e2181de3773..eb0293916ce87 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -544,6 +544,13 @@ def _init_ndarray(self, values, index, columns, dtype=None, block = make_block(values.T, columns, columns) return BlockManager([block], [columns, index]) + @property + def _verbose_info(self): + import warnings + warnings.warn('The _verbose_info property will be removed in version ' + '0.12', FutureWarning) + return get_option('display.max_info_rows') is None + @_verbose_info.setter def _verbose_info(self, value): import warnings