diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 224925f144147..0cfc954e38f98 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -868,66 +868,6 @@ convert to an integer index: df_new[(df_new['index'] >= 1.0) & (df_new['index'] < 2)] -.. _indexing.class: - -Index objects -------------- - -The pandas Index class and its subclasses can be viewed as implementing an -*ordered set* in addition to providing the support infrastructure necessary for -lookups, data alignment, and reindexing. The easiest way to create one directly -is to pass a list or other sequence to ``Index``: - -.. ipython:: python - - index = Index(['e', 'd', 'a', 'b']) - index - 'd' in index - -You can also pass a ``name`` to be stored in the index: - - -.. ipython:: python - - index = Index(['e', 'd', 'a', 'b'], name='something') - index.name - -Starting with pandas 0.5, the name, if set, will be shown in the console -display: - -.. ipython:: python - - index = Index(list(range(5)), name='rows') - columns = Index(['A', 'B', 'C'], name='cols') - df = DataFrame(np.random.randn(5, 3), index=index, columns=columns) - df - df['A'] - - -Set operations on Index objects -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. _indexing.set_ops: - -The three main operations are ``union (|)``, ``intersection (&)``, and ``diff -(-)``. These can be directly called as instance methods or used via overloaded -operators: - -.. ipython:: python - - a = Index(['c', 'b', 'a']) - b = Index(['c', 'e', 'd']) - a.union(b) - a | b - a & b - a - b - -``isin`` method of Index objects -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -One additional operation is the ``isin`` method that works analogously to the -``Series.isin`` method found :ref:`here `. - .. _indexing.hierarchical: Hierarchical indexing (MultiIndex) @@ -1189,7 +1129,7 @@ are named. .. ipython:: python - s.index.names = ['L1', 'L2'] + s.index.set_names(['L1', 'L2'], inplace=True) s.sortlevel(level='L1') s.sortlevel(level='L2') @@ -1229,7 +1169,9 @@ However: :: >>> s.ix[('a', 'b'):('b', 'a')] - Exception: MultiIndex lexsort depth 1, key was length 2 + Traceback (most recent call last) + ... + KeyError: Key length (3) was greater than MultiIndex lexsort depth (2) Swapping levels with ``swaplevel`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1274,6 +1216,88 @@ not check (or care) whether the levels themselves are sorted. Fortunately, the constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but if you compute the levels and labels yourself, please be careful. +.. _indexing.class: + +Index objects +------------- + +The pandas Index class and its subclasses can be viewed as implementing an +*ordered set* in addition to providing the support infrastructure necessary for +lookups, data alignment, and reindexing. The easiest way to create one directly +is to pass a list or other sequence to ``Index``: + +.. ipython:: python + + index = Index(['e', 'd', 'a', 'b']) + index + 'd' in index + +You can also pass a ``name`` to be stored in the index: + + +.. ipython:: python + + index = Index(['e', 'd', 'a', 'b'], name='something') + index.name + +Starting with pandas 0.5, the name, if set, will be shown in the console +display: + +.. ipython:: python + + index = Index(list(range(5)), name='rows') + columns = Index(['A', 'B', 'C'], name='cols') + df = DataFrame(np.random.randn(5, 3), index=index, columns=columns) + df + df['A'] + + +Set operations on Index objects +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _indexing.set_ops: + +The three main operations are ``union (|)``, ``intersection (&)``, and ``diff +(-)``. These can be directly called as instance methods or used via overloaded +operators: + +.. ipython:: python + + a = Index(['c', 'b', 'a']) + b = Index(['c', 'e', 'd']) + a.union(b) + a | b + a & b + a - b + +``isin`` method of Index objects +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One additional operation is the ``isin`` method that works analogously to the +``Series.isin`` method found :ref:`here `. + +Setting index metadata (``name(s)``, ``levels``, ``labels``) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _indexing.set_metadata: + +Indexes are "mostly immutable", but it is possible to set and change their +metadata, like the index ``name`` (or, for ``MultiIndex``, ``levels`` and +``labels``). + +You can use the ``rename``, ``set_names``, ``set_levels``, and ``set_labels`` +to set these attributes directly. They default to returning a copy; however, +you can specify ``inplace=True`` to have the data change inplace. + +.. ipython:: python + + ind = Index([1, 2, 3]) + ind.rename("apple") + ind + ind.set_names(["apple"], inplace=True) + ind.name = "bob" + ind + Adding an index to an existing DataFrame ---------------------------------------- diff --git a/doc/source/release.rst b/doc/source/release.rst index 769b47b18db08..ab7a347ef0c58 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -47,6 +47,12 @@ pandas 0.13 - Added a more informative error message when plot arguments contain overlapping color and style arguments (:issue:`4402`) - Significant table writing performance improvements in ``HDFStore`` + - ``Index.copy()`` and ``MultiIndex.copy()`` now accept keyword arguments to + change attributes (i.e., ``names``, ``levels``, ``labels``) + (:issue:`4039`) + - Add ``rename`` and ``set_names`` methods to ``Index`` as well as + ``set_names``, ``set_levels``, ``set_labels`` to ``MultiIndex``. + (:issue:`4039`) **API Changes** @@ -66,6 +72,7 @@ pandas 0.13 an alias of iteritems used to get around ``2to3``'s changes). (:issue:`4384`, :issue:`4375`, :issue:`4372`) - ``Series.get`` with negative indexers now returns the same as ``[]`` (:issue:`4390`) + - ``HDFStore`` - added an ``is_open`` property to indicate if the underlying file handle is_open; @@ -83,6 +90,21 @@ pandas 0.13 be raised if you try to use ``mode='w'`` with an OPEN file handle (:issue:`4367`) - allow a passed locations array or mask as a ``where`` condition (:issue:`4467`) + - ``Index`` and ``MultiIndex`` changes (:issue:`4039`): + + - Setting ``levels`` and ``labels`` directly on ``MultiIndex`` is now + deprecated. Instead, you can use the ``set_levels()`` and + ``set_labels()`` methods. + - ``levels``, ``labels`` and ``names`` properties no longer return lists, + but instead return containers that do not allow setting of items + ('mostly immutable') + - ``levels``, ``labels`` and ``names`` are validated upon setting and are + either copied or shallow-copied. + - ``__deepcopy__`` now returns a shallow copy (currently: a view) of the + data - allowing metadata changes. + - ``MultiIndex.astype()`` now only allows ``np.object_``-like dtypes and + now returns a ``MultiIndex`` rather than an ``Index``. (:issue:`4039`) + **Experimental Features** **Bug Fixes** @@ -136,6 +158,10 @@ pandas 0.13 - frozenset objects now raise in the ``Series`` constructor (:issue:`4482`, :issue:`4480`) - Fixed issue with sorting a duplicate multi-index that has multiple dtypes (:issue:`4516`) + - Fixed bug in ``DataFrame.set_values`` which was causing name attributes to + be lost when expanding the index. (:issue:`3742`, :issue:`4039`) + - Fixed issue where individual ``names``, ``levels`` and ``labels`` could be + set on ``MultiIndex`` without validation (:issue:`3714`, :issue:`4039`) pandas 0.12 =========== diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index 7da2f03ad4c74..05bae7a952612 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -72,6 +72,24 @@ API changes import os os.remove(path) + - Changes to how ``Index`` and ``MultiIndex`` handle metadata (``levels``, + ``labels``, and ``names``) (:issue:`4039`): + + ..code-block :: + + # previously, you would have set levels or labels directly + index.levels = [[1, 2, 3, 4], [1, 2, 4, 4]] + + # now, you use the set_levels or set_labels methods + index = index.set_levels([[1, 2, 3, 4], [1, 2, 4, 4]]) + + # similarly, for names, you can rename the object + # but setting names is not deprecated. + index = index.set_names(["bob", "cranberry"]) + + # and all methods take an inplace kwarg + index.set_names(["bob", "cranberry"], inplace=True) + Enhancements ~~~~~~~~~~~~ diff --git a/pandas/core/base.py b/pandas/core/base.py index 16fe28a804b6b..e635844248371 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1,7 +1,8 @@ """ -Base class(es) for all pandas objects. +Base and utility classes for pandas objects. """ from pandas import compat +import numpy as np class StringMixin(object): """implements string methods so long as object defines a `__unicode__` method. @@ -56,3 +57,88 @@ def __unicode__(self): """ # Should be overwritten by base classes return object.__repr__(self) + +class FrozenList(PandasObject, list): + """ + Container that doesn't allow setting item *but* + because it's technically non-hashable, will be used + for lookups, appropriately, etc. + """ + # Sidenote: This has to be of type list, otherwise it messes up PyTables typechecks + + def __add__(self, other): + if isinstance(other, tuple): + other = list(other) + return self.__class__(super(FrozenList, self).__add__(other)) + + __iadd__ = __add__ + + # Python 2 compat + def __getslice__(self, i, j): + return self.__class__(super(FrozenList, self).__getslice__(i, j)) + + def __getitem__(self, n): + # Python 3 compat + if isinstance(n, slice): + return self.__class__(super(FrozenList, self).__getitem__(n)) + return super(FrozenList, self).__getitem__(n) + + def __radd__(self, other): + if isinstance(other, tuple): + other = list(other) + return self.__class__(other + list(self)) + + def __eq__(self, other): + if isinstance(other, (tuple, FrozenList)): + other = list(other) + return super(FrozenList, self).__eq__(other) + + __req__ = __eq__ + + def __mul__(self, other): + return self.__class__(super(FrozenList, self).__mul__(other)) + + __imul__ = __mul__ + + def __hash__(self): + return hash(tuple(self)) + + def _disabled(self, *args, **kwargs): + """This method will not function because object is immutable.""" + raise TypeError("'%s' does not support mutable operations." % + self.__class__) + + def __unicode__(self): + from pandas.core.common import pprint_thing + return "%s(%s)" % (self.__class__.__name__, + pprint_thing(self, quote_strings=True, + escape_chars=('\t', '\r', '\n'))) + + __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled + pop = append = extend = remove = sort = insert = _disabled + + +class FrozenNDArray(PandasObject, np.ndarray): + + # no __array_finalize__ for now because no metadata + def __new__(cls, data, dtype=None, copy=False): + if copy is None: + copy = not isinstance(data, FrozenNDArray) + res = np.array(data, dtype=dtype, copy=copy).view(cls) + return res + + def _disabled(self, *args, **kwargs): + """This method will not function because object is immutable.""" + raise TypeError("'%s' does not support mutable operations." % + self.__class__) + + __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled + put = itemset = fill = _disabled + + def _shallow_copy(self): + return self.view() + + def values(self): + """returns *copy* of underlying array""" + arr = self.view(np.ndarray).copy() + return arr diff --git a/pandas/core/common.py b/pandas/core/common.py index 06ca3be455f2a..9a90c66902376 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -8,18 +8,16 @@ from numpy.lib.format import read_array, write_array import numpy as np - import pandas.algos as algos import pandas.lib as lib import pandas.tslib as tslib from pandas import compat from pandas.compat import StringIO, BytesIO, range, long, u, zip, map - - from pandas.core.config import get_option from pandas.core import array as pa + # XXX: HACK for NumPy 1.5.1 to suppress warnings try: np.seterr(all='ignore') diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0f3bcb32f7287..20a2dab06368b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1150,7 +1150,7 @@ def to_records(self, index=True, convert_datetime64=True): arrays = ix_vals+ [self[c].values for c in self.columns] count = 0 - index_names = self.index.names + index_names = list(self.index.names) if isinstance(self.index, MultiIndex): for i, n in enumerate(index_names): if n is None: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0eaae228da627..2ee7f791c671f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -404,7 +404,7 @@ def drop(self, labels, axis=0, level=None): new_axis = axis.drop(labels) dropped = self.reindex(**{axis_name: new_axis}) try: - dropped.axes[axis_].names = axis.names + dropped.axes[axis_].set_names(axis.names, inplace=True) except AttributeError: pass return dropped diff --git a/pandas/core/index.py b/pandas/core/index.py index 33ea4d25bc7dc..7be19302d88d5 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1,5 +1,5 @@ # pylint: disable=E1101,E1103,W0232 - +from functools import partial from pandas.compat import range, zip, lrange, lzip from pandas import compat import numpy as np @@ -9,12 +9,13 @@ import pandas.algos as _algos import pandas.index as _index from pandas.lib import Timestamp -from pandas.core.base import PandasObject +from pandas.core.base import FrozenList, FrozenNDArray -from pandas.util.decorators import cache_readonly +from pandas.util.decorators import cache_readonly, deprecate from pandas.core.common import isnull import pandas.core.common as com from pandas.core.config import get_option +import warnings __all__ = ['Index'] @@ -38,6 +39,7 @@ def wrapper(self, other): class InvalidIndexError(Exception): pass + _o_dtype = np.dtype(object) @@ -47,7 +49,7 @@ def _shouldbe_timestamp(obj): or tslib.is_timestamp_array(obj)) -class Index(PandasObject, np.ndarray): +class Index(FrozenNDArray): """ Immutable ndarray implementing an ordered, sliceable set. The basic object storing axis labels for all pandas objects @@ -108,8 +110,14 @@ def __new__(cls, data, dtype=None, copy=False, name=None, **kwargs): return Int64Index(data, copy=copy, dtype=dtype, name=name) subarr = com._asarray_tuplesafe(data, dtype=object) + + # _asarray_tuplesafe does not always copy underlying data, + # so need to make sure that this happens + if copy: + subarr = subarr.copy() + elif np.isscalar(data): - raise ValueError('Index(...) must be called with a collection ' + raise TypeError('Index(...) must be called with a collection ' 'of some kind, %s was passed' % repr(data)) else: # other iterable of some kind @@ -118,7 +126,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, **kwargs): if dtype is None: inferred = lib.infer_dtype(subarr) if inferred == 'integer': - return Int64Index(subarr.astype('i8'), name=name) + return Int64Index(subarr.astype('i8'), copy=copy, name=name) elif inferred != 'string': if (inferred.startswith('datetime') or tslib.is_timestamp_array(subarr)): @@ -129,7 +137,8 @@ def __new__(cls, data, dtype=None, copy=False, name=None, **kwargs): return PeriodIndex(subarr, name=name, **kwargs) subarr = subarr.view(cls) - subarr.name = name + # could also have a _set_name, but I don't think it's really necessary + subarr._set_names([name]) return subarr def __array_finalize__(self, obj): @@ -142,6 +151,41 @@ def __array_finalize__(self, obj): def _shallow_copy(self): return self.view() + def copy(self, names=None, name=None, dtype=None, deep=False): + """ + Make a copy of this object. Name and dtype sets those attributes on + the new object. + + Parameters + ---------- + name : string, optional + dtype : numpy dtype or pandas type + + Returns + ------- + copy : Index + + Notes + ----- + In most cases, there should be no functional difference from using + ``deep``, but if ``deep`` is passed it will attempt to deepcopy. + """ + if names is not None and name is not None: + raise TypeError("Can only provide one of `names` and `name`") + if deep: + from copy import deepcopy + new_index = np.ndarray.__deepcopy__(self, {}).view(self.__class__) + name = name or deepcopy(self.name) + else: + new_index = super(Index, self).copy() + if name is not None: + names = [name] + if names: + new_index = new_index.set_names(names) + if dtype: + new_index = new_index.astype(dtype) + return new_index + def __unicode__(self): """ Return a string representation for a particular Index @@ -197,16 +241,41 @@ def nlevels(self): # for compat with multindex code def _get_names(self): - return [self.name] + return FrozenList((self.name,)) def _set_names(self, values): if len(values) != 1: - raise AssertionError('Length of new names must be 1, got %d' + raise ValueError('Length of new names must be 1, got %d' % len(values)) self.name = values[0] names = property(fset=_set_names, fget=_get_names) + def set_names(self, names, inplace=False): + """ + Set new names on index. Defaults to returning new index. + + Parameters + ---------- + names : sequence + names to set + inplace : bool + if True, mutates in place + + Returns + ------- + new index (of same type and class...etc) + """ + if inplace: + idx = self + else: + idx = self._shallow_copy() + idx._set_names(names) + return idx + + def rename(self, name, inplace=False): + return self.set_names([name], inplace=inplace) + @property def _has_complex_internals(self): # to disable groupby tricks in MultiIndex @@ -310,10 +379,7 @@ def __setstate__(self, state): np.ndarray.__setstate__(self, state) def __deepcopy__(self, memo={}): - """ - Index is not mutable, so disabling deepcopy - """ - return self + return self.copy(deep=True) def __contains__(self, key): hash(key) @@ -326,9 +392,6 @@ def __contains__(self, key): def __hash__(self): return hash(self.view(np.ndarray)) - def __setitem__(self, key, value): - raise Exception(str(self.__class__) + ' object is immutable') - def __getitem__(self, key): """Override numpy.ndarray's __getitem__ method to work as desired""" arr_idx = self.view(np.ndarray) @@ -513,7 +576,7 @@ def order(self, return_indexer=False, ascending=True): return sorted_index def sort(self, *args, **kwargs): - raise Exception('Cannot sort an Index object') + raise TypeError('Cannot sort an %r object' % self.__class__.__name__) def shift(self, periods=1, freq=None): """ @@ -572,7 +635,7 @@ def union(self, other): union : Index """ if not hasattr(other, '__iter__'): - raise Exception('Input must be iterable!') + raise TypeError('Input must be iterable.') if len(other) == 0 or self.equals(other): return self @@ -637,7 +700,7 @@ def intersection(self, other): intersection : Index """ if not hasattr(other, '__iter__'): - raise Exception('Input must be iterable!') + raise TypeError('Input must be iterable!') self._assert_can_do_setop(other) @@ -679,7 +742,7 @@ def diff(self, other): """ if not hasattr(other, '__iter__'): - raise Exception('Input must be iterable!') + raise TypeError('Input must be iterable!') if self.equals(other): return Index([], name=self.name) @@ -764,7 +827,8 @@ def get_level_values(self, level): ------- values : ndarray """ - num = self._get_level_number(level) + # checks that level number is actually just 1 + self._get_level_number(level) return self def get_indexer(self, target, method=None, limit=None): @@ -807,8 +871,8 @@ def get_indexer(self, target, method=None, limit=None): return this.get_indexer(target, method=method, limit=limit) if not self.is_unique: - raise Exception('Reindexing only valid with uniquely valued Index ' - 'objects') + raise InvalidIndexError('Reindexing only valid with uniquely' + ' valued Index objects') if method == 'pad': if not self.is_monotonic: @@ -900,7 +964,7 @@ def reindex(self, target, method=None, level=None, limit=None, target = _ensure_index(target) if level is not None: if method is not None: - raise ValueError('Fill method not supported if level passed') + raise TypeError('Fill method not supported if level passed') _, indexer, _ = self._join_level(target, level, how='right', return_indexers=True) else: @@ -1055,7 +1119,7 @@ def _join_level(self, other, level, how='left', return_indexers=False): the MultiIndex will not be changed (currently) """ if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): - raise Exception('Join on level between two MultiIndex objects ' + raise TypeError('Join on level between two MultiIndex objects ' 'is ambiguous') left, right = self, other @@ -1414,9 +1478,9 @@ class MultiIndex(Index): Parameters ---------- - levels : list or tuple of arrays + levels : sequence of arrays The unique labels for each level - labels : list or tuple of arrays + labels : sequence of arrays Integers for each level designating which label at each location sortorder : optional int Level of sortedness (must be lexicographically sorted by that @@ -1424,44 +1488,34 @@ class MultiIndex(Index): names : optional sequence of objects Names for each of the index levels. """ - # shadow property - names = None + # initialize to zero-length tuples to make everything work + _names = FrozenList() + _levels = FrozenList() + _labels = FrozenList() - def __new__(cls, levels=None, labels=None, sortorder=None, names=None): + def __new__(cls, levels=None, labels=None, sortorder=None, names=None, + copy=False): if len(levels) != len(labels): - raise AssertionError( + raise ValueError( 'Length of levels and labels must be the same') if len(levels) == 0: - raise Exception('Must pass non-zero number of levels/labels') - + raise TypeError('Must pass non-zero number of levels/labels') if len(levels) == 1: if names: name = names[0] else: name = None - return Index(levels[0], name=name).take(labels[0]) - - levels = [_ensure_index(lev) for lev in levels] - labels = [np.asarray(labs, dtype=np.int_) for labs in labels] + return Index(levels[0], name=name, copy=True).take(labels[0]) # v3, 0.8.0 subarr = np.empty(0, dtype=object).view(cls) - subarr.levels = levels - subarr.labels = labels - - if names is None: - subarr.names = [None] * subarr.nlevels - else: - if len(names) != subarr.nlevels: - raise AssertionError(('Length of names (%d) must be same as level ' - '(%d)') % (len(names),subarr.nlevels)) + subarr._set_levels(levels, copy=copy) + subarr._set_labels(labels, copy=copy) - subarr.names = list(names) + if names is not None: + subarr._set_names(names) - # set the name - for i, name in enumerate(subarr.names): - subarr.levels[i].name = name if sortorder is not None: subarr.sortorder = int(sortorder) @@ -1470,6 +1524,129 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None): return subarr + def _get_levels(self): + return self._levels + + + def _set_levels(self, levels, copy=False): + # This is NOT part of the levels property because it should be + # externally not allowed to set levels. User beware if you change + # _levels directly + if len(levels) == 0: + raise ValueError("Must set non-zero number of levels.") + levels = FrozenList(_ensure_index(lev, copy=copy)._shallow_copy() + for lev in levels) + names = self.names + self._levels = levels + if len(names): + self._set_names(names) + + def set_levels(self, levels, inplace=False): + """ + Set new levels on MultiIndex. Defaults to returning + new index. + + Parameters + ---------- + levels : sequence + new levels to apply + inplace : bool + if True, mutates in place + + Returns + ------- + new index (of same type and class...etc) + """ + if inplace: + idx = self + else: + idx = self._shallow_copy() + idx._set_levels(levels) + return idx + + # remove me in 0.14 and change to read only property + __set_levels = deprecate("setting `levels` directly", + partial(set_levels, inplace=True), + alt_name="set_levels") + levels = property(fget=_get_levels, fset=__set_levels) + + def _get_labels(self): + return self._labels + + def _set_labels(self, labels, copy=False): + if len(labels) != self.nlevels: + raise ValueError("Length of levels and labels must be the same.") + self._labels = FrozenList(_ensure_frozen(labs,copy=copy)._shallow_copy() + for labs in labels) + + def set_labels(self, labels, inplace=False): + """ + Set new labels on MultiIndex. Defaults to returning + new index. + + Parameters + ---------- + labels : sequence of arrays + new labels to apply + inplace : bool + if True, mutates in place + + Returns + ------- + new index (of same type and class...etc) + """ + if inplace: + idx = self + else: + idx = self._shallow_copy() + idx._set_labels(labels) + return idx + + # remove me in 0.14 and change to readonly property + __set_labels = deprecate("setting labels directly", + partial(set_labels, inplace=True), + alt_name="set_labels") + labels = property(fget=_get_labels, fset=__set_labels) + + def copy(self, names=None, dtype=None, levels=None, labels=None, + deep=False): + """ + Make a copy of this object. Names, dtype, levels and labels can be + passed and will be set on new copy. + + Parameters + ---------- + names : sequence, optional + dtype : numpy dtype or pandas type, optional + levels : sequence, optional + labels : sequence, optional + + Returns + ------- + copy : MultiIndex + + Notes + ----- + In most cases, there should be no functional difference from using + ``deep``, but if ``deep`` is passed it will attempt to deepcopy. + This could be potentially expensive on large MultiIndex objects. + """ + new_index = np.ndarray.copy(self) + if deep: + from copy import deepcopy + levels = levels if levels is not None else deepcopy(self.levels) + labels = labels if labels is not None else deepcopy(self.labels) + names = names if names is not None else deepcopy(self.names) + if levels is not None: + new_index = new_index.set_levels(levels) + if labels is not None: + new_index = new_index.set_labels(labels) + if names is not None: + new_index = new_index.set_names(names) + if dtype: + new_index = new_index.astype(dtype) + return new_index + def __array_finalize__(self, obj): """ Update custom MultiIndex attributes when a new array is created by @@ -1480,9 +1657,9 @@ def __array_finalize__(self, obj): # instance. return - self.levels = list(getattr(obj, 'levels', [])) - self.labels = list(getattr(obj, 'labels', [])) - self.names = list(getattr(obj, 'names', [])) + self._set_levels(getattr(obj, 'levels', [])) + self._set_labels(getattr(obj, 'labels', [])) + self._set_names(getattr(obj, 'names', [])) self.sortorder = getattr(obj, 'sortorder', None) def _array_values(self): @@ -1509,6 +1686,26 @@ def __unicode__(self): def __len__(self): return len(self.labels[0]) + def _get_names(self): + return FrozenList(level.name for level in self.levels) + + def _set_names(self, values): + """ + sets names on levels. WARNING: mutates! + + Note that you generally want to set this *after* changing levels, so that it only + acts on copies""" + values = list(values) + if len(values) != self.nlevels: + raise ValueError('Length of names (%d) must be same as level ' + '(%d)' % (len(values),self.nlevels)) + # set the name + for name, level in zip(values, self.levels): + level.rename(name, inplace=True) + + + names = property(fset=_set_names, fget=_get_names, doc="Names of levels in MultiIndex") + def _format_native_types(self, **kwargs): return self.tolist() @@ -1524,9 +1721,9 @@ def inferred_type(self): def _from_elements(values, labels=None, levels=None, names=None, sortorder=None): index = values.view(MultiIndex) - index.levels = levels - index.labels = labels - index.names = names + index._set_levels(levels) + index._set_labels(labels) + index._set_names(names) index.sortorder = sortorder return index @@ -1534,17 +1731,17 @@ def _get_level_number(self, level): try: count = self.names.count(level) if count > 1: - raise Exception('The name %s occurs multiple times, use a ' + raise ValueError('The name %s occurs multiple times, use a ' 'level number' % level) level = self.names.index(level) except ValueError: if not isinstance(level, int): - raise Exception('Level %s not found' % str(level)) + raise KeyError('Level %s not found' % str(level)) elif level < 0: level += self.nlevels # Note: levels are zero-based elif level >= self.nlevels: - raise ValueError('Index has only %d levels, not %d' + raise IndexError('Too many levels: Index has only %d levels, not %d' % (self.nlevels, level + 1)) return level @@ -1790,7 +1987,8 @@ def from_tuples(cls, tuples, sortorder=None, names=None): index : MultiIndex """ if len(tuples) == 0: - raise Exception('Cannot infer number of levels from empty list') + # I think this is right? Not quite sure... + raise TypeError('Cannot infer number of levels from empty list') if isinstance(tuples, np.ndarray): if isinstance(tuples, Index): @@ -1835,9 +2033,9 @@ def __setstate__(self, state): np.ndarray.__setstate__(self, nd_state) levels, labels, sortorder, names = own_state - self.levels = [Index(x) for x in levels] - self.labels = labels - self.names = names + self._set_levels([Index(x) for x in levels]) + self._set_labels(labels) + self._set_names(names) self.sortorder = sortorder def __getitem__(self, key): @@ -1862,10 +2060,10 @@ def __getitem__(self, key): new_labels = [lab[key] for lab in self.labels] # an optimization - result.levels = list(self.levels) - result.labels = new_labels + result._set_levels(self.levels) + result._set_labels(new_labels) result.sortorder = sortorder - result.names = self.names + result._set_names(self.names) return result @@ -2158,7 +2356,7 @@ def reindex(self, target, method=None, level=None, limit=None, """ if level is not None: if method is not None: - raise ValueError('Fill method not supported if level passed') + raise TypeError('Fill method not supported if level passed') target, indexer, _ = self._join_level(target, level, how='right', return_indexers=True) else: @@ -2202,7 +2400,7 @@ def _tuple_index(self): def slice_locs(self, start=None, end=None, strict=False): """ For an ordered MultiIndex, compute the slice locations for input - labels. They can tuples representing partial levels, e.g. for a + labels. They can be tuples representing partial levels, e.g. for a MultiIndex with 3 levels, you can pass a single value (corresponding to the first level), or a 1-, 2-, or 3-tuple. @@ -2240,8 +2438,9 @@ def slice_locs(self, start=None, end=None, strict=False): def _partial_tup_index(self, tup, side='left'): if len(tup) > self.lexsort_depth: - raise KeyError('MultiIndex lexsort depth %d, key was length %d' % - (self.lexsort_depth, len(tup))) + raise KeyError('Key length (%d) was greater than MultiIndex' + ' lexsort depth (%d)' % + (len(tup), self.lexsort_depth)) n = len(tup) start, end = 0, len(self) @@ -2251,7 +2450,7 @@ def _partial_tup_index(self, tup, side='left'): if lab not in lev: if not lev.is_type_compatible(lib.infer_dtype([lab])): - raise Exception('Level type mismatch: %s' % lab) + raise TypeError('Level type mismatch: %s' % lab) # short circuit loc = lev.searchsorted(lab, side=side) @@ -2546,7 +2745,8 @@ def diff(self, other): try: other = MultiIndex.from_tuples(other) except: - raise TypeError("other should be a MultiIndex or a list of tuples") + raise TypeError('other must be a MultiIndex or a list of' + ' tuples') result_names = self.names else: result_names = self.names if self.names == other.names else None @@ -2569,6 +2769,11 @@ def diff(self, other): def _assert_can_do_setop(self, other): pass + def astype(self, dtype): + if np.dtype(dtype) != np.object_: + raise TypeError("Setting %s dtype to anything other than object is not supported" % self.__class__) + return self._shallow_copy() + def insert(self, loc, item): """ Make new MultiIndex inserting new item at location @@ -2588,7 +2793,7 @@ def insert(self, loc, item): if not isinstance(item, tuple): item = (item,) + ('',) * (self.nlevels - 1) elif len(item) != self.nlevels: - raise ValueError('Passed item incompatible tuple length') + raise ValueError('Item must have length equal to number of levels.') new_levels = [] new_labels = [] @@ -2671,13 +2876,19 @@ def _sparsify(label_list, start=0,sentinal=''): return lzip(*result) -def _ensure_index(index_like): +def _ensure_index(index_like, copy=False): if isinstance(index_like, Index): + if copy: + index_like = index_like.copy() return index_like if hasattr(index_like, 'name'): - return Index(index_like, name=index_like.name) + return Index(index_like, name=index_like.name, copy=copy) + # must check for exactly list here because of strict type + # check in clean_index_list if isinstance(index_like, list): + if type(index_like) != list: + index_like = list(index_like) # #2200 ? converted, all_arrays = lib.clean_index_list(index_like) @@ -2685,13 +2896,32 @@ def _ensure_index(index_like): return MultiIndex.from_arrays(converted) else: index_like = converted + else: + # clean_index_list does the equivalent of copying + # so only need to do this if not list instance + if copy: + from copy import copy + index_like = copy(index_like) return Index(index_like) +def _ensure_frozen(nd_array_like, copy=False): + if not isinstance(nd_array_like, FrozenNDArray): + arr = np.asarray(nd_array_like, dtype=np.int_) + # have to do this separately so that non-index input gets copied + if copy: + arr = arr.copy() + nd_array_like = arr.view(FrozenNDArray) + else: + if copy: + nd_array_like = nd_array_like.copy() + return nd_array_like + + def _validate_join_method(method): if method not in ['left', 'right', 'inner', 'outer']: - raise Exception('do not recognize join method %s' % method) + raise ValueError('do not recognize join method %s' % method) # TODO: handle index names! diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index b69e4a6a96acc..4596b93d79778 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -92,8 +92,8 @@ def _make_index(lev,lab): def _make_sorted_values_labels(self): v = self.level - labs = self.index.labels - levs = self.index.levels + labs = list(self.index.labels) + levs = list(self.index.levels) to_sort = labs[:v] + labs[v + 1:] + [labs[v]] sizes = [len(x) for x in levs[:v] + levs[v + 1:] + [levs[v]]] @@ -206,8 +206,8 @@ def get_new_columns(self): width = len(self.value_columns) propagator = np.repeat(np.arange(width), stride) if isinstance(self.value_columns, MultiIndex): - new_levels = self.value_columns.levels + [self.removed_level] - new_names = self.value_columns.names + [self.removed_name] + new_levels = self.value_columns.levels + (self.removed_level,) + new_names = self.value_columns.names + (self.removed_name,) new_labels = [lab.take(propagator) for lab in self.value_columns.labels] diff --git a/pandas/core/series.py b/pandas/core/series.py index 58fd0a0551ace..e283058209e79 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1309,16 +1309,31 @@ def values(self): """ return self.view(ndarray) - def copy(self, order='C'): + def copy(self, order='C', deep=False): """ Return new Series with copy of underlying values + Parameters + ---------- + deep : boolean, default False + deep copy index along with data + order : boolean, default 'C' + order for underlying numpy array + Returns ------- cp : Series """ - return Series(self.values.copy(order), index=self.index, - name=self.name) + if deep: + from copy import deepcopy + index = self.index.copy(deep=deep) + name = deepcopy(self.name) + else: + index = self.index + name = self.name + + return Series(self.values.copy(order), index=index, + name=name) def tolist(self): """ diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a6c8584441daf..3b132be800cb1 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -774,7 +774,7 @@ def _make_index(self, data, alldata, columns, indexnamerow=False): # add names for the index if indexnamerow: coffset = len(indexnamerow) - len(columns) - index.names = indexnamerow[:coffset] + index = index.set_names(indexnamerow[:coffset]) # maybe create a mi on the columns columns = self._maybe_make_multi_index_columns(columns, self.col_names) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 1ac4d4e31ed10..3f41be6ae64c6 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -675,7 +675,7 @@ def _check_excel_multiindex_dates(self, ext): recons = reader.parse('test1', index_col=[0, 1]) tm.assert_frame_equal(tsframe, recons, check_names=False) - self.assertEquals(recons.index.names, ['time', 'foo']) + self.assertEquals(recons.index.names, ('time', 'foo')) # infer index tsframe.to_excel(path, 'test1') diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index d83fbd97b6044..41345352b5ec5 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -817,7 +817,11 @@ def test_parse_dates_column_list(self): expected = self.read_csv(StringIO(data), sep=";", index_col=lrange(4)) lev = expected.index.levels[0] - expected.index.levels[0] = lev.to_datetime(dayfirst=True) + levels = list(expected.index.levels) + levels[0] = lev.to_datetime(dayfirst=True) + # hack to get this to work - remove for final test + levels[0].name = lev.name + expected.index.set_levels(levels, inplace=True) expected['aux_date'] = to_datetime(expected['aux_date'], dayfirst=True) expected['aux_date'] = lmap(Timestamp, expected['aux_date']) @@ -1335,7 +1339,7 @@ def test_read_table_buglet_4x_multiindex(self): # it works! df = self.read_table(StringIO(text), sep='\s+') - self.assertEquals(df.index.names, ['one', 'two', 'three', 'four']) + self.assertEquals(df.index.names, ('one', 'two', 'three', 'four')) def test_read_csv_parse_simple_list(self): text = """foo @@ -2144,14 +2148,14 @@ def test_usecols_dtypes(self): 4,5,6 7,8,9 10,11,12""" - result = self.read_csv(StringIO(data), usecols=(0, 1, 2), - names=('a', 'b', 'c'), + result = self.read_csv(StringIO(data), usecols=(0, 1, 2), + names=('a', 'b', 'c'), header=None, converters={'a': str}, dtype={'b': int, 'c': float}, - ) + ) result2 = self.read_csv(StringIO(data), usecols=(0, 2), - names=('a', 'b', 'c'), + names=('a', 'b', 'c'), header=None, converters={'a': str}, dtype={'b': int, 'c': float}, diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 9575d99229dc4..3bc32fb3f5a32 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1921,7 +1921,7 @@ def test_store_hierarchical(self): with ensure_clean(self.path) as store: store['frame'] = frame recons = store['frame'] - assert(recons.index.names == ['foo', 'bar']) + assert(recons.index.names == ('foo', 'bar')) def test_store_index_name(self): df = tm.makeDataFrame() diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py new file mode 100644 index 0000000000000..c6285bc95b855 --- /dev/null +++ b/pandas/tests/test_base.py @@ -0,0 +1,108 @@ +import re +import unittest +import numpy as np +from pandas.core.base import FrozenList, FrozenNDArray +from pandas.util.testing import assertRaisesRegexp, assert_isinstance + + +class CheckImmutable(object): + mutable_regex = re.compile('does not support mutable operations') + + def check_mutable_error(self, *args, **kwargs): + # pass whatever functions you normally would to assertRaises (after the Exception kind) + assertRaisesRegexp(TypeError, self.mutable_regex, *args, **kwargs) + + def test_no_mutable_funcs(self): + def setitem(): self.container[0] = 5 + + self.check_mutable_error(setitem) + + def setslice(): self.container[1:2] = 3 + + self.check_mutable_error(setslice) + + def delitem(): del self.container[0] + + self.check_mutable_error(delitem) + + def delslice(): del self.container[0:3] + + self.check_mutable_error(delslice) + mutable_methods = getattr(self, "mutable_methods", []) + for meth in mutable_methods: + self.check_mutable_error(getattr(self.container, meth)) + + def test_slicing_maintains_type(self): + result = self.container[1:2] + expected = self.lst[1:2] + self.check_result(result, expected) + + def check_result(self, result, expected, klass=None): + klass = klass or self.klass + assert_isinstance(result, klass) + self.assertEqual(result, expected) + + +class TestFrozenList(CheckImmutable, unittest.TestCase): + mutable_methods = ('extend', 'pop', 'remove', 'insert') + + def setUp(self): + self.lst = [1, 2, 3, 4, 5] + self.container = FrozenList(self.lst) + self.klass = FrozenList + + def test_add(self): + result = self.container + (1, 2, 3) + expected = FrozenList(self.lst + [1, 2, 3]) + self.check_result(result, expected) + + result = (1, 2, 3) + self.container + expected = FrozenList([1, 2, 3] + self.lst) + self.check_result(result, expected) + + def test_inplace(self): + q = r = self.container + q += [5] + self.check_result(q, self.lst + [5]) + # other shouldn't be mutated + self.check_result(r, self.lst) + + +class TestFrozenNDArray(CheckImmutable, unittest.TestCase): + mutable_methods = ('put', 'itemset', 'fill') + + def setUp(self): + self.lst = [3, 5, 7, -2] + self.container = FrozenNDArray(self.lst) + self.klass = FrozenNDArray + + def test_shallow_copying(self): + original = self.container.copy() + assert_isinstance(self.container.view(), FrozenNDArray) + self.assert_(not isinstance(self.container.view(np.ndarray), FrozenNDArray)) + self.assert_(self.container.view() is not self.container) + self.assert_(np.array_equal(self.container, original)) + # shallow copy should be the same too + assert_isinstance(self.container._shallow_copy(), FrozenNDArray) + # setting should not be allowed + def testit(container): container[0] = 16 + + self.check_mutable_error(testit, self.container) + + def test_values(self): + original = self.container.view(np.ndarray).copy() + n = original[0] + 15 + vals = self.container.values() + self.assert_(np.array_equal(original, vals)) + self.assert_(original is not vals) + vals[0] = n + self.assert_(np.array_equal(self.container, original)) + self.assertEqual(vals[0], n) + + +if __name__ == '__main__': + import nose + + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + # '--with-coverage', '--cover-package=pandas.core'], + exit=False) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index abed2818cb864..946e640d331cc 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,22 +1,22 @@ from datetime import datetime import re +import unittest import nose from nose.tools import assert_equal import unittest +import numpy as np +from pandas.tslib import iNaT from pandas import Series, DataFrame, date_range, DatetimeIndex, Timestamp +import pandas.compat as compat from pandas.compat import range, long, lrange, lmap, u from pandas.core.common import notnull, isnull +import pandas.compat as compat import pandas.core.common as com import pandas.util.testing as tm import pandas.core.config as cf -import numpy as np - -from pandas.tslib import iNaT -from pandas import compat - _multiprocess_can_split_ = True @@ -782,6 +782,7 @@ def test_2d_datetime64(self): expected[:, [2, 4]] = datetime(2007, 1, 1) tm.assert_almost_equal(result, expected) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 1b405eae08797..7043698ea6476 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3526,7 +3526,7 @@ def create_dict(order_id): # MultiIndex result = DataFrame.from_records(documents, index=['order_id', 'quantity']) - self.assert_(result.index.names == ['order_id', 'quantity']) + self.assert_(result.index.names == ('order_id', 'quantity')) def test_from_records_misc_brokenness(self): # #2179 @@ -5920,14 +5920,15 @@ def test_corrwith_series(self): assert_series_equal(result, expected) def test_drop_names(self): - df = DataFrame([[1, 2, 3],[3, 4, 5],[5, 6, 7]], index=['a', 'b', 'c'], columns=['d', 'e', 'f']) + df = DataFrame([[1, 2, 3],[3, 4, 5],[5, 6, 7]], index=['a', 'b', 'c'], + columns=['d', 'e', 'f']) df.index.name, df.columns.name = 'first', 'second' df_dropped_b = df.drop('b') df_dropped_e = df.drop('e', axis=1) - self.assert_(df_dropped_b.index.name == 'first') - self.assert_(df_dropped_e.index.name == 'first') - self.assert_(df_dropped_b.columns.name == 'second') - self.assert_(df_dropped_e.columns.name == 'second') + self.assertEqual(df_dropped_b.index.name, 'first') + self.assertEqual(df_dropped_e.index.name, 'first') + self.assertEqual(df_dropped_b.columns.name, 'second') + self.assertEqual(df_dropped_e.columns.name, 'second') def test_dropEmptyRows(self): N = len(self.frame.index) @@ -7238,7 +7239,7 @@ def test_pivot(self): # don't specify values pivoted = frame.pivot(index='index', columns='columns') self.assertEqual(pivoted.index.name, 'index') - self.assertEqual(pivoted.columns.names, [None, 'columns']) + self.assertEqual(pivoted.columns.names, (None, 'columns')) # pivot multiple columns wp = tm.makePanel() diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 19f15e44dc096..9e7cdf9df2c6b 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1283,13 +1283,13 @@ def desc3(group): return result result = grouped.apply(desc) - self.assertEquals(result.index.names, ['A', 'B', 'stat']) + self.assertEquals(result.index.names, ('A', 'B', 'stat')) result2 = grouped.apply(desc2) - self.assertEquals(result2.index.names, ['A', 'B', 'stat']) + self.assertEquals(result2.index.names, ('A', 'B', 'stat')) result3 = grouped.apply(desc3) - self.assertEquals(result3.index.names, ['A', 'B', None]) + self.assertEquals(result3.index.names, ('A', 'B', None)) def test_nonsense_func(self): df = DataFrame([0]) @@ -1519,7 +1519,7 @@ def f(piece): def test_apply_series_yield_constant(self): result = self.df.groupby(['A', 'B'])['C'].apply(len) - self.assertEquals(result.index.names[:2], ['A', 'B']) + self.assertEquals(result.index.names[:2], ('A', 'B')) def test_apply_frame_to_series(self): grouped = self.df.groupby(['A', 'B']) @@ -1836,7 +1836,7 @@ def test_groupby_series_with_name(self): result = self.df.groupby([self.df['A'], self.df['B']]).mean() result2 = self.df.groupby([self.df['A'], self.df['B']], as_index=False).mean() - self.assertEquals(result.index.names, ['A', 'B']) + self.assertEquals(result.index.names, ('A', 'B')) self.assert_('A' in result2) self.assert_('B' in result2) @@ -2332,7 +2332,7 @@ def test_no_dummy_key_names(self): result = self.df.groupby([self.df['A'].values, self.df['B'].values]).sum() - self.assert_(result.index.names == [None, None]) + self.assert_(result.index.names == (None, None)) def test_groupby_categorical(self): levels = ['foo', 'bar', 'baz', 'qux'] diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index cc069a4da31e3..a5f98107895a5 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -11,8 +11,11 @@ import numpy as np from numpy.testing import assert_array_equal -from pandas.core.index import Index, Int64Index, MultiIndex -from pandas.util.testing import assert_almost_equal +from pandas.core.index import Index, Int64Index, MultiIndex, InvalidIndexError +from pandas.core.frame import DataFrame +from pandas.core.series import Series +from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp, + assert_copy) from pandas import compat import pandas.util.testing as tm @@ -37,6 +40,14 @@ def setUp(self): self.empty = Index([]) self.tuples = Index(lzip(['foo', 'bar', 'baz'], [1, 2, 3])) + def test_wrong_number_names(self): + def testit(ind): + ind.names = ["apple", "banana", "carrot"] + + indices = (self.dateIndex, self.unicodeIndex, self.strIndex, self.intIndex, self.floatIndex, self.empty, self.tuples) + for ind in indices: + assertRaisesRegexp(ValueError, "^Length", testit, ind) + def test_hash_error(self): self.assertRaises(TypeError, hash, self.strIndex) @@ -45,21 +56,28 @@ def test_new_axis(self): self.assert_(new_index.ndim == 2) tm.assert_isinstance(new_index, np.ndarray) - def test_deepcopy(self): - from copy import deepcopy + def test_copy_and_deepcopy(self): + from copy import copy, deepcopy + + for func in (copy, deepcopy): + idx_copy = func(self.strIndex) + self.assert_(idx_copy is not self.strIndex) + self.assert_(idx_copy.equals(self.strIndex)) - copy = deepcopy(self.strIndex) - self.assert_(copy is self.strIndex) + new_copy = self.strIndex.copy(deep=True, name="banana") + self.assertEqual(new_copy.name, "banana") + new_copy2 = self.intIndex.copy(dtype=int) + self.assertEqual(new_copy2.dtype.kind, 'i') def test_duplicates(self): idx = Index([0, 0, 0]) self.assert_(not idx.is_unique) def test_sort(self): - self.assertRaises(Exception, self.strIndex.sort) + self.assertRaises(TypeError, self.strIndex.sort) def test_mutability(self): - self.assertRaises(Exception, self.strIndex.__setitem__, 0, 'foo') + self.assertRaises(TypeError, self.strIndex.__setitem__, 0, 'foo') def test_constructor(self): # regular instance creation @@ -78,6 +96,8 @@ def test_constructor(self): tm.assert_isinstance(index, Index) self.assert_(index.name == 'name') assert_array_equal(arr, index) + arr[0] = "SOMEBIGLONGSTRING" + self.assertNotEqual(index[0], "SOMEBIGLONGSTRING") # what to do here? # arr = np.array(5.) @@ -85,7 +105,7 @@ def test_constructor(self): def test_constructor_corner(self): # corner case - self.assertRaises(Exception, Index, 0) + self.assertRaises(TypeError, Index, 0) def test_index_ctor_infer_periodindex(self): from pandas import period_range, PeriodIndex @@ -219,7 +239,7 @@ def test_intersection(self): self.assert_(inter is first) # non-iterable input - self.assertRaises(Exception, first.intersection, 0.5) + assertRaisesRegexp(TypeError, "iterable", first.intersection, 0.5) def test_union(self): first = self.strIndex[5:20] @@ -239,7 +259,7 @@ def test_union(self): self.assert_(union is first) # non-iterable input - self.assertRaises(Exception, first.union, 0.5) + assertRaisesRegexp(TypeError, "iterable", first.union, 0.5) # preserve names first.name = 'A' @@ -325,7 +345,7 @@ def test_diff(self): self.assertEqual(result.name, first.name) # non-iterable input - self.assertRaises(Exception, first.diff, 0.5) + assertRaisesRegexp(TypeError, "iterable", first.diff, 0.5) def test_pickle(self): def testit(index): @@ -456,7 +476,7 @@ def test_slice_locs_dup(self): rs = idx.slice_locs('a', 'd') self.assert_(rs == (0, 6)) - rs2 = idx.slice_locs(end='d') + rs = idx.slice_locs(end='d') self.assert_(rs == (0, 6)) rs = idx.slice_locs('a', 'c') @@ -487,11 +507,10 @@ def test_tuple_union_bug(self): import pandas import numpy as np - aidx1 = np.array( - [(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')], dtype=[('num', - int), ('let', 'a1')]) + aidx1 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')], + dtype=[('num', int), ('let', 'a1')]) aidx2 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B'), (1, 'C'), (2, - 'C')], dtype=[('num', int), ('let', 'a1')]) + 'C')], dtype=[('num', int), ('let', 'a1')]) idx1 = pandas.Index(aidx1) idx2 = pandas.Index(aidx2) @@ -571,6 +590,11 @@ class TestInt64Index(unittest.TestCase): def setUp(self): self.index = Int64Index(np.arange(0, 20, 2)) + def test_too_many_names(self): + def testit(): + self.index.names = ["roger", "harold"] + assertRaisesRegexp(ValueError, "^Length", testit) + def test_constructor(self): # pass list, coerce fine index = Int64Index([-5, 0, 1, 2]) @@ -584,6 +608,15 @@ def test_constructor(self): # scalar raise Exception self.assertRaises(ValueError, Int64Index, 5) + # copy + arr = self.index.values + new_index = Int64Index(arr, copy=True) + self.assert_(np.array_equal(new_index, self.index)) + val = arr[0] + 3000 + # this should not change index + arr[0] = val + self.assertNotEqual(new_index[0], val) + def test_constructor_corner(self): arr = np.array([1, 2, 3, 4], dtype=object) index = Int64Index(arr) @@ -917,7 +950,7 @@ def test_print_unicode_columns(self): repr(df.columns) # should not raise UnicodeDecodeError def test_repr_summary(self): - with cf.option_context('display.max_seq_items',10): + with cf.option_context('display.max_seq_items', 10): r = repr(pd.Index(np.arange(1000))) self.assertTrue(len(r) < 100) self.assertTrue("..." in r) @@ -951,10 +984,81 @@ def setUp(self): major_labels = np.array([0, 0, 1, 2, 3, 3]) minor_labels = np.array([0, 1, 0, 1, 0, 1]) - + self.index_names = ['first', 'second'] self.index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels], - names=['first', 'second']) + names=self.index_names) + + def test_copy_in_constructor(self): + levels = np.array(["a", "b", "c"]) + labels = np.array([1, 1, 2, 0, 0, 1, 1]) + val = labels[0] + mi = MultiIndex(levels=[levels, levels], labels=[labels, labels], + copy=True) + self.assertEqual(mi.labels[0][0], val) + labels[0] = 15 + self.assertEqual(mi.labels[0][0], val) + val = levels[0] + levels[0] = "PANDA" + self.assertEqual(mi.levels[0][0], val) + + def test_set_value_keeps_names(self): + # motivating example from #3742 + lev1 = ['hans', 'hans', 'hans', 'grethe', 'grethe', 'grethe'] + lev2 = ['1', '2', '3'] * 2 + idx = pd.MultiIndex.from_arrays( + [lev1, lev2], + names=['Name', 'Number']) + df = pd.DataFrame( + np.random.randn(6, 4), + columns=['one', 'two', 'three', 'four'], + index=idx) + df = df.sortlevel() + self.assertEqual(df.index.names, ('Name', 'Number')) + df = df.set_value(('grethe', '4'), 'one', 99.34) + self.assertEqual(df.index.names, ('Name', 'Number')) + + def test_names(self): + + # names are assigned in __init__ + names = self.index_names + level_names = [level.name for level in self.index.levels] + self.assertEqual(names, level_names) + + # setting bad names on existing + index = self.index + assertRaisesRegexp(ValueError, "^Length of names", setattr, index, + "names", list(index.names) + ["third"]) + assertRaisesRegexp(ValueError, "^Length of names", setattr, index, + "names", []) + + # initializing with bad names (should always be equivalent) + major_axis, minor_axis = self.index.levels + major_labels, minor_labels = self.index.labels + assertRaisesRegexp(ValueError, "^Length of names", MultiIndex, + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first']) + assertRaisesRegexp(ValueError, "^Length of names", MultiIndex, + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first', 'second', 'third']) + + # names are assigned + index.names = ["a", "b"] + ind_names = list(index.names) + level_names = [level.name for level in index.levels] + self.assertEqual(ind_names, level_names) + + def test_astype(self): + expected = self.index.copy() + actual = self.index.astype('O') + assert_copy(actual.levels, expected.levels) + assert_copy(actual.labels, expected.labels) + self.check_level_names(actual, expected.names) + + assertRaisesRegexp(TypeError, "^Setting.*dtype.*object", self.index.astype, np.dtype(int)) + def test_constructor_single_level(self): single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], @@ -969,63 +1073,87 @@ def test_constructor_single_level(self): self.assert_(single_level.name is None) def test_constructor_no_levels(self): - self.assertRaises(Exception, MultiIndex, levels=[], labels=[]) + assertRaisesRegexp(TypeError, "non-zero number of levels/labels", + MultiIndex, levels=[], labels=[]) - def test_copy(self): - i_copy = self.index.copy() + def test_constructor_mismatched_label_levels(self): + levels = [np.array([1]), np.array([2]), np.array([3])] + labels = ["a"] + assertRaisesRegexp(ValueError, "Length of levels and labels must be" + " the same", MultiIndex, levels=levels, + labels=labels) - # Equal...but not the same object - self.assert_(i_copy.levels == self.index.levels) - self.assert_(i_copy.levels is not self.index.levels) + def assert_multiindex_copied(self, copy, original): + # levels shoudl be (at least, shallow copied) + assert_copy(copy.levels, original.levels) - self.assert_(i_copy.labels == self.index.labels) - self.assert_(i_copy.labels is not self.index.labels) + assert_almost_equal(copy.labels, original.labels) - self.assert_(i_copy.names == self.index.names) - self.assert_(i_copy.names is not self.index.names) + # labels doesn't matter which way copied + assert_almost_equal(copy.labels, original.labels) + self.assert_(copy.labels is not original.labels) - self.assert_(i_copy.sortorder == self.index.sortorder) + # names doesn't matter which way copied + self.assertEqual(copy.names, original.names) + self.assert_(copy.names is not original.names) - def test_shallow_copy(self): - i_copy = self.index._shallow_copy() + # sort order should be copied + self.assertEqual(copy.sortorder, original.sortorder) - # Equal...but not the same object - self.assert_(i_copy.levels == self.index.levels) - self.assert_(i_copy.levels is not self.index.levels) + def test_copy(self): + i_copy = self.index.copy() + + self.assert_multiindex_copied(i_copy, self.index) - self.assert_(i_copy.labels == self.index.labels) - self.assert_(i_copy.labels is not self.index.labels) - self.assert_(i_copy.names == self.index.names) - self.assert_(i_copy.names is not self.index.names) + def test_shallow_copy(self): + i_copy = self.index._shallow_copy() - self.assert_(i_copy.sortorder == self.index.sortorder) + self.assert_multiindex_copied(i_copy, self.index) def test_view(self): i_view = self.index.view() - # Equal...but not the same object - self.assert_(i_view.levels == self.index.levels) - self.assert_(i_view.levels is not self.index.levels) + self.assert_multiindex_copied(i_view, self.index) + + def check_level_names(self, index, names): + self.assertEqual([level.name for level in index.levels], list(names)) + + def test_changing_names(self): + # names should be applied to levels + level_names = [level.name for level in self.index.levels] + self.check_level_names(self.index, self.index.names) + + view = self.index.view() + copy = self.index.copy() + shallow_copy = self.index._shallow_copy() + + # changing names should change level names on object + new_names = [name + "a" for name in self.index.names] + self.index.names = new_names + self.check_level_names(self.index, new_names) - self.assert_(i_view.labels == self.index.labels) - self.assert_(i_view.labels is not self.index.labels) + # but not on copies + self.check_level_names(view, level_names) + self.check_level_names(copy, level_names) + self.check_level_names(shallow_copy, level_names) - self.assert_(i_view.names == self.index.names) - self.assert_(i_view.names is not self.index.names) - self.assert_(i_view.sortorder == self.index.sortorder) + # and copies shouldn't change original + shallow_copy.names = [name + "c" for name in shallow_copy.names] + self.check_level_names(self.index, new_names) def test_duplicate_names(self): self.index.names = ['foo', 'foo'] - self.assertRaises(Exception, self.index._get_level_number, 'foo') + assertRaisesRegexp(KeyError, 'Level foo not found', + self.index._get_level_number, 'foo') def test_get_level_number_integer(self): self.index.names = [1, 0] self.assertEqual(self.index._get_level_number(1), 0) self.assertEqual(self.index._get_level_number(0), 1) - self.assertRaises(Exception, self.index._get_level_number, 2) - - self.assertRaises(Exception, self.index._get_level_number, 'fourth') + self.assertRaises(IndexError, self.index._get_level_number, 2) + assertRaisesRegexp(KeyError, 'Level fourth not found', + self.index._get_level_number, 'fourth') def test_from_arrays(self): arrays = [] @@ -1060,8 +1188,8 @@ def test_get_level_values(self): def test_reorder_levels(self): # this blows up - self.assertRaises(Exception, self.index.reorder_levels, - [2, 1, 0]) + assertRaisesRegexp(IndexError, '^Too many levels', + self.index.reorder_levels, [2, 1, 0]) def test_nlevels(self): self.assertEquals(self.index.nlevels, 2) @@ -1234,6 +1362,22 @@ def test_slice_locs(self): expected = df[6:15].stack() tm.assert_almost_equal(sliced.values, expected.values) + def test_slice_locs_with_type_mismatch(self): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index + assertRaisesRegexp(TypeError, '^Level type mismatch', idx.slice_locs, + (1, 3)) + assertRaisesRegexp(TypeError, '^Level type mismatch', idx.slice_locs, + df.index[5] + timedelta(seconds=30), (5, 2)) + df = tm.makeCustomDataframe(5, 5) + stacked = df.stack() + idx = stacked.index + assertRaisesRegexp(TypeError, '^Level type mismatch', idx.slice_locs, timedelta(seconds=30)) + # TODO: Try creating a UnicodeDecodeError in exception message + assertRaisesRegexp(TypeError, '^Level type mismatch', idx.slice_locs, + df.index[1], (16, "a")) + def test_slice_locs_not_sorted(self): index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), @@ -1242,12 +1386,14 @@ def test_slice_locs_not_sorted(self): np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - self.assertRaises(Exception, index.slice_locs, (1, 0, 1), - (2, 1, 0)) + assertRaisesRegexp(KeyError, "[Kk]ey length.*greater than MultiIndex" + " lexsort depth", index.slice_locs, (1, 0, 1), + (2, 1, 0)) # works sorted_index, _ = index.sortlevel(0) - result = sorted_index.slice_locs((1, 0, 1), (2, 1, 0)) + # should there be a test case here??? + sorted_index.slice_locs((1, 0, 1), (2, 1, 0)) def test_slice_locs_partial(self): sorted_idx, _ = self.index.sortlevel(0) @@ -1369,6 +1515,12 @@ def test_get_indexer(self): r1 = idx1.get_indexer([1, 2, 3]) self.assert_((r1 == [-1, -1, -1]).all()) + # create index with duplicates + idx1 = Index(lrange(10) + lrange(10)) + idx2 = Index(lrange(20)) + assertRaisesRegexp(InvalidIndexError, "Reindexing only valid with" + " uniquely valued Index objects", + idx1.get_indexer, idx2) def test_format(self): self.index.format() @@ -1543,7 +1695,7 @@ def test_diff(self): chunklet = self.index[-3:] chunklet.names = ['foo', 'baz'] result = first - chunklet - self.assertEqual(result.names, [None, None]) + self.assertEqual(result.names, (None, None)) # empty, but non-equal result = self.index - self.index.sortlevel(1)[0] @@ -1560,13 +1712,17 @@ def test_diff(self): # name from non-empty array result = first.diff([('foo', 'one')]) - expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ('foo', 'two'), - ('qux', 'one'), ('qux', 'two')]) + expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), + ('foo', 'two'), ('qux', 'one'), + ('qux', 'two')]) expected.names = first.names self.assertEqual(first.names, result.names) + assertRaisesRegexp(TypeError, "other must be a MultiIndex or a list" + " of tuples", first.diff, [1,2,3,4,5]) def test_from_tuples(self): - self.assertRaises(Exception, MultiIndex.from_tuples, []) + assertRaisesRegexp(TypeError, 'Cannot infer number of levels from' + ' empty list', MultiIndex.from_tuples, []) idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) self.assertEquals(len(idx), 2) @@ -1638,8 +1794,8 @@ def test_drop(self): self.assert_(dropped.equals(expected)) index = MultiIndex.from_tuples([('bar', 'two')]) - self.assertRaises(Exception, self.index.drop, [('bar', 'two')]) - self.assertRaises(Exception, self.index.drop, index) + self.assertRaises(KeyError, self.index.drop, [('bar', 'two')]) + self.assertRaises(KeyError, self.index.drop, index) # mixed partial / full drop dropped = self.index.drop(['foo', ('qux', 'one')]) @@ -1659,7 +1815,7 @@ def test_droplevel_with_names(self): np.array([1, 0, 1, 1, 0, 0, 1, 0])], names=['one', 'two', 'three']) dropped = index.droplevel(0) - self.assertEqual(dropped.names, ['two', 'three']) + self.assertEqual(dropped.names, ('two', 'three')) dropped = index.droplevel('two') expected = index.droplevel(1) @@ -1693,7 +1849,8 @@ def test_insert(self): self.assert_(new_index[0] == ('abc', 'three')) # key wrong length - self.assertRaises(Exception, self.index.insert, 0, ('foo2',)) + assertRaisesRegexp(ValueError, "Item must have length equal to number" + " of levels", self.index.insert, 0, ('foo2',)) def test_take_preserve_name(self): taken = self.index.take([3, 0, 1]) @@ -1740,7 +1897,8 @@ def _check_all(other): result = idx.join(self.index, level='second') tm.assert_isinstance(result, MultiIndex) - self.assertRaises(Exception, self.index.join, self.index, level=1) + assertRaisesRegexp(TypeError, "Join.*MultiIndex.*ambiguous", + self.index.join, self.index, level=1) def test_join_self(self): kinds = 'outer', 'inner', 'left', 'right' @@ -1752,10 +1910,12 @@ def test_join_self(self): def test_reindex(self): result, indexer = self.index.reindex(list(self.index[:4])) tm.assert_isinstance(result, MultiIndex) + self.check_level_names(result, self.index[:4].names) result, indexer = self.index.reindex(list(self.index)) tm.assert_isinstance(result, MultiIndex) self.assert_(indexer is None) + self.check_level_names(result, self.index.names) def test_reindex_level(self): idx = Index(['one']) @@ -1774,11 +1934,12 @@ def test_reindex_level(self): exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) self.assert_(np.array_equal(indexer2, exp_indexer2)) - self.assertRaises(ValueError, self.index.reindex, - self.index, method='pad', level='second') + assertRaisesRegexp(TypeError, "Fill method not supported", + self.index.reindex, self.index, method='pad', + level='second') - self.assertRaises(ValueError, idx.reindex, - idx, method='bfill', level='first') + assertRaisesRegexp(TypeError, "Fill method not supported", + idx.reindex, idx, method='bfill', level='first') def test_has_duplicates(self): self.assert_(not self.index.has_duplicates) @@ -1828,7 +1989,6 @@ def test_get_combined_index(): result = _get_combined_index([]) assert(result.equals(Index([]))) - if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 7379bf5d148dc..c903af1860421 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -55,9 +55,11 @@ def setUp(self): lambda x: x.day]).sum() # use Int64Index, to make sure things work - self.ymd.index.levels = [lev.astype('i8') - for lev in self.ymd.index.levels] - self.ymd.index.names = ['year', 'month', 'day'] + self.ymd.index.set_levels([lev.astype('i8') + for lev in self.ymd.index.levels], + inplace=True) + self.ymd.index.set_names(['year', 'month', 'day'], + inplace=True) def test_append(self): a, b = self.frame[:5], self.frame[5:] @@ -1667,7 +1669,7 @@ def test_drop_preserve_names(self): df = DataFrame(np.random.randn(6, 3), index=index) result = df.drop([(0, 2)]) - self.assert_(result.index.names == ['one', 'two']) + self.assert_(result.index.names == ('one', 'two')) def test_unicode_repr_issues(self): levels = [Index([u('a/\u03c3'), u('b/\u03c3'), u('c/\u03c3')]), diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 3d2a5f2e58ded..c5f9f962f4646 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1227,7 +1227,7 @@ def test_to_frame(self): assert_panel_equal(unfiltered.to_panel(), self.panel) # names - self.assertEqual(unfiltered.index.names, ['major', 'minor']) + self.assertEqual(unfiltered.index.names, ('major', 'minor')) # unsorted, round trip df = self.panel.to_frame(filter_observations=False) @@ -1255,7 +1255,8 @@ def test_to_frame_mixed(self): lp = panel.to_frame() wp = lp.to_panel() self.assertEqual(wp['bool'].values.dtype, np.bool_) - assert_frame_equal(wp['bool'], panel['bool']) + # Previously, this was mutating the underlying index and changing its name + assert_frame_equal(wp['bool'], panel['bool'], check_names=False) def test_to_panel_na_handling(self): df = DataFrame(np.random.randint(0, 10, size=20).reshape((10, 2)), diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 3c6ab18126e8f..eddddb42b680e 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -898,7 +898,7 @@ def test_to_frame(self): # assert_panel_equal(unfiltered.to_panel(), self.panel) # # names - # self.assertEqual(unfiltered.index.names, ['major', 'minor']) + # self.assertEqual(unfiltered.index.names, ('major', 'minor')) def test_to_frame_mixed(self): raise nose.SkipTest diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index effcc3ff7695f..9f497e50df802 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -217,7 +217,7 @@ def _all_key(key): row_names = result.index.names result = result.append(margin_dummy) - result.index.names = row_names + result.index = result.index.set_names(row_names) return result diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 1008e23c3ebcd..67adc6bf8e7f2 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -1299,7 +1299,7 @@ def test_concat_multiindex_with_keys(self): columns=Index(['A', 'B', 'C'], name='exp')) result = concat([frame, frame], keys=[0, 1], names=['iteration']) - self.assertEqual(result.index.names, ['iteration'] + index.names) + self.assertEqual(result.index.names, ('iteration',) + index.names) tm.assert_frame_equal(result.ix[0], frame) tm.assert_frame_equal(result.ix[1], frame) self.assertEqual(result.index.nlevels, 3) @@ -1330,14 +1330,14 @@ def test_concat_keys_and_levels(self): keys=[('foo', 'one'), ('foo', 'two'), ('baz', 'one'), ('baz', 'two')], levels=levels) - self.assertEqual(result.index.names, [None] * 3) + self.assertEqual(result.index.names, (None,) * 3) # no levels result = concat([df, df2, df, df2], keys=[('foo', 'one'), ('foo', 'two'), ('baz', 'one'), ('baz', 'two')], names=['first', 'second']) - self.assertEqual(result.index.names, ['first', 'second'] + [None]) + self.assertEqual(result.index.names, ('first', 'second') + (None,)) self.assert_(np.array_equal(result.index.levels[0], ['baz', 'foo'])) def test_concat_keys_levels_no_overlap(self): @@ -1363,7 +1363,9 @@ def test_concat_rename_index(self): names=['lvl0', 'lvl1']) exp = concat([a, b], keys=['key0', 'key1'], names=['lvl0']) - exp.index.names[1] = 'lvl1' + names = list(exp.index.names) + names[1] = 'lvl1' + exp.index.set_names(names, inplace=True) tm.assert_frame_equal(result, exp) self.assertEqual(result.index.names, exp.index.names) @@ -1391,7 +1393,7 @@ def test_crossed_dtypes_weird_corner(self): df2 = DataFrame(np.random.randn(1, 4), index=['b']) result = concat( [df, df2], keys=['one', 'two'], names=['first', 'second']) - self.assertEqual(result.index.names, ['first', 'second']) + self.assertEqual(result.index.names, ('first', 'second')) def test_handle_empty_objects(self): df = DataFrame(np.random.randn(10, 4), columns=list('abcd')) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 57e7d2f7f6ae9..1718648f81157 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -42,7 +42,7 @@ def test_pivot_table(self): pivot_table(self.data, values='D', rows=rows) if len(rows) > 1: - self.assertEqual(table.index.names, rows) + self.assertEqual(table.index.names, tuple(rows)) else: self.assertEqual(table.index.name, rows[0]) @@ -365,7 +365,7 @@ def test_crosstab_margins(self): result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'), margins=True) - self.assertEqual(result.index.names, ['a']) + self.assertEqual(result.index.names, ('a',)) self.assertEqual(result.columns.names, ['b', 'c']) all_cols = result['All', ''] diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 8c6744cbf2963..d83b1eb778763 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -4,8 +4,8 @@ import warnings -def deprecate(name, alternative): - alt_name = alternative.__name__ +def deprecate(name, alternative, alt_name=None): + alt_name = alt_name or alternative.__name__ def wrapper(*args, **kwargs): warnings.warn("%s is deprecated. Use %s instead" % (name, alt_name), diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 82fdf45265e78..8af88895a8b73 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -176,7 +176,9 @@ def assert_almost_equal(a, b, check_less_precise=False): np.testing.assert_(isiterable(b)) na, nb = len(a), len(b) assert na == nb, "%s != %s" % (na, nb) - + # TODO: Figure out why I thought this needed instance cheacks... + # if (isinstance(a, np.ndarray) and isinstance(b, np.ndarray) and + # np.array_equal(a, b)): if np.array_equal(a, b): return True else: @@ -321,6 +323,18 @@ def assert_contains_all(iterable, dic): for k in iterable: assert k in dic, "Did not contain item: '%r'" % k +def assert_copy(iter1, iter2, **eql_kwargs): + """ + iter1, iter2: iterables that produce elements comparable with assert_almost_equal + + Checks that the elements are equal, but not the same object. (Does not + check that items in sequences are also not the same object) + """ + for elem1, elem2 in zip(iter1, iter2): + assert_almost_equal(elem1, elem2, **eql_kwargs) + assert elem1 is not elem2, "Expected object %r and object %r to be different objects, were same." % ( + type(elem1), type(elem2)) + def getCols(k): return string.ascii_uppercase[:k]