From acca73517ec4f3baa1a75b619cd460a8add81033 Mon Sep 17 00:00:00 2001 From: jreback Date: Wed, 20 Mar 2013 19:19:55 -0400 Subject: [PATCH] BUG: fixed issues where passing an axis of 'index' would fail (as its expecting an axis number) --- RELEASE.rst | 8 ++------ pandas/core/frame.py | 23 ++++++++++++++++++++++ pandas/core/generic.py | 35 +++++++++++++++++++-------------- pandas/core/panel.py | 6 ++++-- pandas/tests/test_frame.py | 40 +++++++++++++++++++++++++------------- 5 files changed, 76 insertions(+), 36 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 7a72b9d00cec6..da1d3edd5f820 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -35,7 +35,6 @@ pandas 0.11.0 DataFrames and propogate in operations - Add function to pandas.io.data for retrieving stock index components from Yahoo! finance (GH2795_) - - Add ``squeeze`` function to reduce dimensionality of 1-len objects - Support slicing with time objects (GH2681_) - Added ``.iloc`` attribute, to support strict integer based indexing, analagous to ``.ix`` (GH2922_) - Added ``.loc`` attribute, to support strict label based indexing, analagous to ``.ix`` (GH3053_) @@ -73,10 +72,7 @@ pandas 0.11.0 - New keywords ``iterator=boolean``, and ``chunksize=number_in_a_chunk`` are provided to support iteration on ``select`` and ``select_as_multiple`` (GH3076_) - - In ``HDFStore``, provide dotted attribute access to ``get`` from stores - (e.g. ``store.df == store['df']``) - - - ``Squeeze`` to possibly remove length 1 dimensions from an object. + - Add ``squeeze`` method to possibly remove length 1 dimensions from an object. .. ipython:: python @@ -209,7 +205,6 @@ pandas 0.11.0 other values), (GH2850_) - Unstack of a frame with no nans would always cause dtype upcasting (GH2929_) - Fix scalar datetime.datetime parsing bug in read_csv (GH3071_) - - Timedeltas are now fully operational (closes GH2898_) - Fixed slow printing of large Dataframes, due to inefficient dtype reporting (GH2807_) - Fixed a segfault when using a function as grouper in groupby (GH3035_) @@ -232,6 +227,7 @@ pandas 0.11.0 - Fix issue with indexing a series with a boolean key and specifiying a 1-len list on the rhs (GH2745_) - Fixed bug in groupby apply when kernel generate list of arrays having unequal len (GH1738_) - fixed handling of rolling_corr with center=True which could produce corr>1 (GH3155_) + - Fixed issues where indices can be passed as 'index/column' in addition to 0/1 for the axis parameter .. _GH622: https://github.com/pydata/pandas/issues/622 .. _GH797: https://github.com/pydata/pandas/issues/797 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index daf31bf75f7a2..b689c7e473364 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2194,6 +2194,7 @@ def xs(self, key, axis=0, level=None, copy=True): ------- xs : Series or DataFrame """ + axis = self._get_axis_number(axis) labels = self._get_axis(axis) if level is not None: loc, new_ax = labels.get_loc_level(key, level=level) @@ -2340,6 +2341,8 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, (left, right) : (DataFrame, type of other) Aligned objects """ + if axis is not None: + axis = self._get_axis_number(axis) if isinstance(other, DataFrame): return self._align_frame(other, join=join, axis=axis, level=level, copy=copy, fill_value=fill_value, @@ -2522,6 +2525,7 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, reindexed : same type as calling instance """ self._consolidate_inplace() + axis = self._get_axis_number(axis) if axis == 0: return self._reindex_index(labels, method, copy, level, fill_value=fill_value, @@ -2834,6 +2838,7 @@ def take(self, indices, axis=0, convert=True): # check/convert indicies here if convert: + axis = self._get_axis_number(axis) indices = _maybe_convert_indices(indices, len(self._get_axis(axis))) if self._is_mixed_type: @@ -2922,6 +2927,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None): subset=subset, axis=ax) return result + axis = self._get_axis_number(axis) if axis == 0: agg_axis = 1 elif axis == 1: @@ -3089,6 +3095,7 @@ def sort_index(self, axis=0, by=None, ascending=True, inplace=False): """ from pandas.core.groupby import _lexsort_indexer + axis = self._get_axis_number(axis) if axis not in [0, 1]: raise ValueError('Axis must be 0 or 1, got %s' % str(axis)) @@ -3159,6 +3166,7 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False): ------- sorted : DataFrame """ + axis = self._get_axis_number(axis) the_axis = self._get_axis(axis) if not isinstance(the_axis, MultiIndex): raise Exception('can only sort by level with a hierarchical index') @@ -3202,6 +3210,7 @@ def swaplevel(self, i, j, axis=0): """ result = self.copy() + axis = self._get_axis_number(axis) if axis == 0: result.index = result.index.swaplevel(i, j) else: @@ -3223,6 +3232,7 @@ def reorder_levels(self, order, axis=0): ------- type of caller (new object) """ + axis = self._get_axis_number(axis) if not isinstance(self._get_axis(axis), MultiIndex): # pragma: no cover raise Exception('Can only reorder levels on a hierarchical axis.') @@ -3274,6 +3284,7 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, """ self._consolidate_inplace() + axis = self._get_axis_number(axis) if value is None: if method is None: raise ValueError('must specify a fill method or value') @@ -3362,6 +3373,7 @@ def replace(self, to_replace, value=None, method='pad', axis=0, """ self._consolidate_inplace() + axis = self._get_axis_number(axis) if inplace: import warnings warnings.warn("replace with inplace=True will return None" @@ -4057,6 +4069,7 @@ def apply(self, func, axis=0, broadcast=False, raw=False, if len(self.columns) == 0 and len(self.index) == 0: return self + axis = self._get_axis_number(axis) if kwds or args and not isinstance(func, np.ufunc): f = lambda x: func(x, *args, **kwds) else: @@ -4478,6 +4491,7 @@ def corrwith(self, other, axis=0, drop=False): ------- correls : Series """ + axis = self._get_axis_number(axis) if isinstance(other, Series): return self.apply(other.corr, axis=axis) @@ -4580,6 +4594,7 @@ def count(self, axis=0, level=None, numeric_only=False): ------- count : Series (or DataFrame if level specified) """ + axis = self._get_axis_number(axis) if level is not None: return self._count_level(level, axis=axis, numeric_only=numeric_only) @@ -4756,6 +4771,7 @@ def mad(self, axis=0, skipna=True, level=None): frame = self._get_numeric_data() + axis = self._get_axis_number(axis) if axis == 0: demeaned = frame - frame.mean(axis=0) else: @@ -4811,12 +4827,14 @@ def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwds): grouped = self.groupby(level=level, axis=axis) if hasattr(grouped, name) and skipna: return getattr(grouped, name)(**kwds) + axis = self._get_axis_number(axis) method = getattr(type(self), name) applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwds) return grouped.aggregate(applyf) def _reduce(self, op, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds): + axis = self._get_axis_number(axis) f = lambda x: op(x, axis=axis, skipna=skipna, **kwds) labels = self._get_agg_axis(axis) if numeric_only is None: @@ -4875,6 +4893,7 @@ def idxmin(self, axis=0, skipna=True): ------- idxmin : Series """ + axis = self._get_axis_number(axis) indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna) index = self._get_axis(axis) result = [index[i] if i >= 0 else NA for i in indices] @@ -4897,6 +4916,7 @@ def idxmax(self, axis=0, skipna=True): ------- idxmax : Series """ + axis = self._get_axis_number(axis) indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna) index = self._get_axis(axis) result = [index[i] if i >= 0 else NA for i in indices] @@ -5030,6 +5050,7 @@ def rank(self, axis=0, numeric_only=None, method='average', ------- ranks : DataFrame """ + axis = self._get_axis_number(axis) if numeric_only is None: try: ranks = algos.rank(self.values, axis=axis, method=method, @@ -5070,6 +5091,7 @@ def to_timestamp(self, freq=None, how='start', axis=0, copy=True): if copy: new_data = new_data.copy() + axis = self._get_axis_number(axis) if axis == 0: new_data.set_axis(1, self.index.to_timestamp(freq=freq, how=how)) elif axis == 1: @@ -5100,6 +5122,7 @@ def to_period(self, freq=None, axis=0, copy=True): if copy: new_data = new_data.copy() + axis = self._get_axis_number(axis) if axis == 0: if freq is None: freq = self.index.freqstr or self.index.inferred_freq diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bc07a509e71af..580148e11cc7c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -34,28 +34,29 @@ def load(cls, path): #---------------------------------------------------------------------- # Axis name business - @classmethod - def _get_axis_number(cls, axis): - axis = cls._AXIS_ALIASES.get(axis, axis) - + def _get_axis_number(self, axis): + axis = self._AXIS_ALIASES.get(axis, axis) if com.is_integer(axis): - if axis in cls._AXIS_NAMES: + if axis in self._AXIS_NAMES: return axis - else: - raise Exception('No %d axis' % axis) else: - return cls._AXIS_NUMBERS[axis] + try: + return self._AXIS_NUMBERS[axis] + except: + pass + raise ValueError('No axis named %s' % axis) - @classmethod - def _get_axis_name(cls, axis): - axis = cls._AXIS_ALIASES.get(axis, axis) + def _get_axis_name(self, axis): + axis = self._AXIS_ALIASES.get(axis, axis) if isinstance(axis, basestring): - if axis in cls._AXIS_NUMBERS: + if axis in self._AXIS_NUMBERS: return axis - else: - raise Exception('No axis named %s' % axis) else: - return cls._AXIS_NAMES[axis] + try: + return self._AXIS_NAMES[axis] + except: + pass + raise ValueError('No axis named %s' % axis) def _get_axis(self, axis): name = self._get_axis_name(axis) @@ -147,6 +148,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, GroupBy object """ from pandas.core.groupby import groupby + axis = self._get_axis_number(axis) return groupby(self, by, axis=axis, level=level, as_index=as_index, sort=sort, group_keys=group_keys) @@ -247,6 +249,7 @@ def resample(self, rule, how=None, axis=0, fill_method=None, range from 0 through 4. Defaults to 0 """ from pandas.tseries.resample import TimeGrouper + axis = self._get_axis_number(axis) sampler = TimeGrouper(rule, label=label, closed=closed, how=how, axis=axis, kind=kind, loffset=loffset, fill_method=fill_method, convention=convention, @@ -925,6 +928,7 @@ def rename_axis(self, mapper, axis=0, copy=True): mapper_f = _get_rename_function(mapper) + axis = self._get_axis_number(axis) if axis == 0: new_data = self._data.rename_items(mapper_f, copydata=copy) else: @@ -951,6 +955,7 @@ def take(self, indices, axis=0, convert=True): # check/convert indicies here if convert: + axis = self._get_axis_number(axis) indices = _maybe_convert_indices(indices, len(self._get_axis(axis))) if axis == 0: diff --git a/pandas/core/panel.py b/pandas/core/panel.py index d33ce4c90244b..d1f87e4e7c932 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1051,6 +1051,7 @@ def xs(self, key, axis=1, copy=True): ------- y : ndim(self)-1 """ + axis = self._get_axis_number(axis) if axis == 0: data = self[key] if copy: @@ -1320,10 +1321,11 @@ def shift(self, lags, axis='major'): vslicer = slice(-lags, None) islicer = slice(None, lags) - if axis == 'major': + axis = self._get_axis_name(axis) + if axis == 'major_axis': values = values[:, vslicer, :] major_axis = major_axis[islicer] - elif axis == 'minor': + elif axis == 'minor_axis': values = values[:, :, vslicer] minor_axis = minor_axis[islicer] else: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 670b8d2dcfb8d..fc475122a4ec8 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1766,19 +1766,20 @@ def setUp(self): index=['a', 'b', 'c']) def test_get_axis(self): - self.assert_(DataFrame._get_axis_name(0) == 'index') - self.assert_(DataFrame._get_axis_name(1) == 'columns') - self.assert_(DataFrame._get_axis_name('index') == 'index') - self.assert_(DataFrame._get_axis_name('columns') == 'columns') - self.assertRaises(Exception, DataFrame._get_axis_name, 'foo') - self.assertRaises(Exception, DataFrame._get_axis_name, None) - - self.assert_(DataFrame._get_axis_number(0) == 0) - self.assert_(DataFrame._get_axis_number(1) == 1) - self.assert_(DataFrame._get_axis_number('index') == 0) - self.assert_(DataFrame._get_axis_number('columns') == 1) - self.assertRaises(Exception, DataFrame._get_axis_number, 2) - self.assertRaises(Exception, DataFrame._get_axis_number, None) + f = self.frame + self.assert_(f._get_axis_name(0) == 'index') + self.assert_(f._get_axis_name(1) == 'columns') + self.assert_(f._get_axis_name('index') == 'index') + self.assert_(f._get_axis_name('columns') == 'columns') + self.assertRaises(Exception, f._get_axis_name, 'foo') + self.assertRaises(Exception, f._get_axis_name, None) + + self.assert_(f._get_axis_number(0) == 0) + self.assert_(f._get_axis_number(1) == 1) + self.assert_(f._get_axis_number('index') == 0) + self.assert_(f._get_axis_number('columns') == 1) + self.assertRaises(Exception, f._get_axis_number, 2) + self.assertRaises(Exception, f._get_axis_number, None) self.assert_(self.frame._get_axis(0) is self.frame.index) self.assert_(self.frame._get_axis(1) is self.frame.columns) @@ -8426,6 +8427,19 @@ def test_get_axis_etc(self): self.assert_(f._get_axis(1) is f.columns) self.assertRaises(Exception, f._get_axis_number, 2) + def test_axis_aliases(self): + + f = self.frame + + # reg name + expected = f.sum(axis=0) + result = f.sum(axis='index') + assert_series_equal(result, expected) + + expected = f.sum(axis=1) + result = f.sum(axis='columns') + assert_series_equal(result, expected) + def test_combine_first_mixed(self): a = Series(['a', 'b'], index=range(2)) b = Series(range(2), index=range(2))