diff --git a/pandas/core/api.py b/pandas/core/api.py index fde9bc77c4bd9..103fe740cfa36 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -8,7 +8,7 @@ from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper from pandas.core.format import set_eng_float_format -from pandas.core.index import Index, CategoricalIndex, Int64Index, Float64Index, MultiIndex +from pandas.core.index import Index, CategoricalIndex, Int64Index, RangeIndex, Float64Index, MultiIndex from pandas.core.series import Series, TimeSeries from pandas.core.frame import DataFrame diff --git a/pandas/core/common.py b/pandas/core/common.py index 3d23aeff942dc..b888b8721a24d 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -86,6 +86,7 @@ def _check(cls, inst): ABCCategoricalIndex = create_pandas_abc_type("ABCCategoricalIndex", "_typ", ("categoricalindex",)) ABCIndexClass = create_pandas_abc_type("ABCIndexClass", "_typ", ("index", "int64index", + "rangeindex", "float64index", "multiindex", "datetimeindex", @@ -2142,10 +2143,8 @@ def is_bool_indexer(key): def _default_index(n): - from pandas.core.index import Int64Index - values = np.arange(n, dtype=np.int64) - result = Int64Index(values,name=None) - result.is_unique = True + from pandas.core.index import RangeIndex + result = RangeIndex(0, int(n), name=None) return result @@ -2498,6 +2497,11 @@ def is_integer_dtype(arr_or_dtype): not issubclass(tipo, (np.datetime64, np.timedelta64))) +def is_int64_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.int64) + + def is_int_or_datetime_dtype(arr_or_dtype): tipo = _get_dtype_type(arr_or_dtype) return (issubclass(tipo, np.integer) or diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 272c401c18761..aa1bcd7dc182a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4826,7 +4826,7 @@ def extract_index(data): % (lengths[0], len(index))) raise ValueError(msg) else: - index = Index(np.arange(lengths[0])) + index = _default_index(lengths[0]) return _ensure_index(index) @@ -5043,11 +5043,11 @@ def convert(arr): def _get_names_from_index(data): - index = lrange(len(data)) has_some_name = any([getattr(s, 'name', None) is not None for s in data]) if not has_some_name: - return index + return _default_index(len(data)) + index = lrange(len(data)) count = 0 for i, s in enumerate(data): n = getattr(s, 'name', None) diff --git a/pandas/core/index.py b/pandas/core/index.py index 8b650fea9b440..387bd2c0c98e0 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -7,6 +7,7 @@ from pandas.compat import range, zip, lrange, lzip, u, reduce, filter, map from pandas import compat import numpy as np +from math import ceil, floor from sys import getsizeof import pandas.tslib as tslib @@ -21,7 +22,7 @@ from pandas.core.common import (isnull, array_equivalent, is_dtype_equal, is_object_dtype, _values_from_object, is_float, is_integer, is_iterator, is_categorical_dtype, ABCSeries, ABCCategorical, _ensure_object, _ensure_int64, is_bool_indexer, - is_list_like, is_bool_dtype, is_null_slice, is_integer_dtype) + is_list_like, is_bool_dtype, is_null_slice, is_integer_dtype, is_int64_dtype) from pandas.core.config import get_option from pandas.io.common import PerformanceWarning @@ -111,9 +112,23 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, if fastpath: return cls._simple_new(data, name) + if isinstance(data, int) and isinstance(dtype, int): + if copy == False: + copy = None + range_constructor = True + elif isinstance(copy, int): + range_constructor = True + + if range_constructor: + return RangeIndex(data, dtype, copy, name) + from pandas.tseries.period import PeriodIndex if isinstance(data, (np.ndarray, Index, ABCSeries)): - if issubclass(data.dtype.type, np.datetime64): + if (isinstance(data, RangeIndex) and + (dtype is None or is_int64_dtype(dtype))): + # copy passed-in RangeIndex + return data.copy(name=name) + elif issubclass(data.dtype.type, np.datetime64): from pandas.tseries.index import DatetimeIndex result = DatetimeIndex(data, copy=copy, name=name, **kwargs) if dtype is not None and _o_dtype == dtype: @@ -230,7 +245,7 @@ def is_(self, other): True if both have same underlying data, False otherwise : bool """ # use something other than None to be clearer - return self._id is getattr(other, '_id', Ellipsis) + return self._id is getattr(other, '_id', Ellipsis) and self._id is not None def _reset_identity(self): """Initializes or resets ``_id`` attribute with new object""" @@ -1735,7 +1750,9 @@ def reindex(self, target, method=None, level=None, limit=None): # GH7774: preserve dtype/tz if target is empty and not an Index. target = _ensure_has_len(target) # target may be an iterator - if not isinstance(target, Index) and len(target) == 0: + if isinstance(self, RangeIndex) and len(target) == 0: + target = self._simple_new(0, 0, 1, name=self.name) + elif not isinstance(target, Index) and len(target) == 0: attrs = self._get_attributes_dict() attrs.pop('freq', None) # don't preserve freq target = self._simple_new(np.empty(0, dtype=self.dtype), **attrs) @@ -3299,6 +3316,411 @@ def _wrap_joined_index(self, joined, other): Int64Index._add_logical_methods() +class RangeIndex(Int64Index): + + """ + Immutable Index implementing an monotonic range. RangeIndex is a + memory-saving special case of `Int64Index` limited to representing + monotonic ranges. + + Parameters + ---------- + start : int (default: 0) + stop : int (default: 0) + step : int (default: 1) + name : object, optional + Name to be stored in the index + """ + + _typ = 'rangeindex' + _engine_type = _index.Int64Engine + _attributes = ['name', 'start', 'stop', 'step'] + + def __new__(cls, start=None, stop=None, step=None, name=None, fastpath=False, **kwargs): + if fastpath: + return cls._simple_new(start, stop, step, name=name) + + # cheap check for array input + if len(kwargs) > 0: + return cls._data_passthrough(start, stop, step, name, fastpath, **kwargs) + + # RangeIndex() constructor + if start is None and stop is None and step is None: + return cls._simple_new(0, 0, 1, name=name) + + new_start, new_stop, new_step = None, None, None + # sort the arguments depending on which are provided + if step is None: + new_step = 1 + if stop is None: + new_stop = start + new_start = 0 + + try: + # check validity of inputs + new_start = start if new_start is None else new_start + new_stop = stop if new_stop is None else new_stop + new_step = step if new_step is None else new_step + new_start = cls._ensure_int(new_start) + new_stop = cls._ensure_int(new_stop) + new_step = cls._ensure_int(new_step) + if new_step == 0: + raise ValueError("Step must not be zero") + return cls._simple_new(new_start, new_stop, new_step, name) + except TypeError: + # pass all invalid inputs to Int64Index to handle + return cls._data_passthrough(start, stop, step, name, fastpath, **kwargs) + + @classmethod + def _simple_new(cls, start, stop, step, name=None): + result = object.__new__(cls) + result._start = start + result._stop = stop + result._step = step + result.name = name + return result + + @classmethod + def _data_passthrough(cls, data, dtype, copy, name, fastpath, **kwargs): + kwargs.setdefault('data', data) + kwargs.setdefault('dtype', dtype) + if copy is not None: + kwargs.setdefault('copy', copy) + kwargs.setdefault('name', name) + kwargs.setdefault('fastpath', fastpath) + return Int64Index(**kwargs) + + @classmethod + def _ensure_int(cls, value): + try: + int_value = int(value) + # don't allow casting 1-element arrays to int! + if int_value != value or hasattr(value, '__len__'): + raise Exception + except Exception: + raise TypeError("Need to pass integral values") + return int_value + + @cache_readonly + def _data(self): + return np.arange(self.start, self.stop, self.step, dtype=np.int64) + + @cache_readonly + def _int64index(self): + return Int64Index(self._data, name=self.name, fastpath=True) + + @property + def dtype(self): + return np.dtype(np.int64) + + @property + def start(self): + return self._start + + @property + def stop(self): + return self._stop + + @property + def step(self): + return self._step + + @property + def is_unique(self): + """ return if the index has unique values """ + return True + + @property + def has_duplicates(self): + return False + + def tolist(self): + return lrange(self.start, self.stop, self.step) + + def _shallow_copy(self, values=None, **kwargs): + """ create a new Index, don't copy the data, use the same object attributes + with passed in attributes taking precedence """ + if values is None: + return RangeIndex(self.start, self.stop, self.step, + name=self.name, fastpath=True) + else: + kwargs.setdefault('name', self.name) + return self._int64index._shallow_copy(values, **kwargs) + + def copy(self, names=None, name=None, dtype=None, deep=False): + """ + Make a copy of this object. Name and dtype sets those attributes on + the new object. + + Parameters + ---------- + name : string, optional + dtype : numpy dtype or pandas type + + Returns + ------- + copy : Index + + Notes + ----- + In most cases, there should be no functional difference from using + ``deep``, but if ``deep`` is passed it will attempt to deepcopy. + """ + if dtype is not None and not is_int64_dtype(dtype): + return super(RangeIndex, self).copy(names, name, dtype, deep) + + if name is None: + name = self.name + return RangeIndex(self.start, self.stop, self.step, name, fastpath=True) + + def argsort(self, *args, **kwargs): + """ + return an ndarray indexer of the underlying data + + See also + -------- + numpy.ndarray.argsort + """ + if self.step > 0: + return np.arange(len(self)) + else: + return np.arange(len(self)-1, -1, -1) + + def __repr__(self): + attrs = [('start', default_pprint(self.start)), + ('stop', default_pprint(self.stop)), + ('step', default_pprint(self.step)), + ('name', default_pprint(self.name))] + + prepr = u(", ").join([u("%s=%s") % (k, v) + for k, v in attrs]) + res = u("%s(%s)") % (self.__class__.__name__, prepr) + + if not compat.PY3: + # needs to be str in Python 2 + encoding = get_option('display.encoding') + res = res.encode(encoding) + return res + + def __unicode__(self): + """ + Return a string representation for this object. + + Invoked by unicode(df) in py2 only. Yields a Unicode String in both + py2/py3. + """ + if self.start != 0 or self.step != 1: + start = u('%s, ') % default_pprint(self.start) + else: + start = u('') + stop = default_pprint(self.stop) + step = u('') if self.step == 1 else u(', %s') % default_pprint(self.step) + if self.name is None: + name = u('') + else: + name = u(', name=%s') % default_pprint(self.name) + + res = u("%s(%s%s%s%s)") % (self.__class__.__name__, + start, stop, step, name) + return res + + def equals(self, other): + """ + Determines if two Index objects contain the same elements. + """ + if isinstance(other, RangeIndex): + return (len(self) == len(other) == 0 + or (self.start == other.start and + self.stop == other.stop and + self.step == other.step) + ) + + return super(RangeIndex, self).equals(other) + + def __reduce__(self): + d = self._get_attributes_dict() + return _new_Index, (self.__class__, d), None + + def view(self, cls=None): + if cls is None or hasattr(cls,'_typ') or is_int64_dtype(cls): + result = self._shallow_copy() + else: + result = self._data.view(cls) + if isinstance(result, Index): + result._id = self._id + return result + + def intersection(self, other): + """ + Form the intersection of two Index objects. Sortedness of the result is + not guaranteed + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + intersection : Index + """ + if not isinstance(other, RangeIndex): + return super(RangeIndex, self).intersection(other) + + # check whether intervals intersect + # deals with in- and decreasing ranges + int_low = max(min(self.start, self.stop+1), + min(other.start, other.stop+1)) + int_high = min(max(self.stop, self.start+1), + max(other.stop, other.start+1)) + if int_high <= int_low: + return RangeIndex() + + ### Method hint: linear Diophantine equation + # solve intersection problem + # performance hint: for identical step sizes, could use cheaper alternative + gcd, s, t = self._extended_gcd(self.step, other.step) + + # check whether element sets intersect + if (self.start - other.start) % gcd: + return RangeIndex() + + # calculate parameters for the RangeIndex describing the intersection + # disregarding the lower bounds + tmp_start = self.start + (other.start-self.start)*self.step//gcd*s + new_step = self.step * other.step // gcd + new_index = RangeIndex(tmp_start, int_high, new_step, fastpath=True) + + # adjust index to limiting interval + new_index._start = new_index._min_fitting_element(int_low) + return new_index + + def _min_fitting_element(self, lower_limit): + """Returns the value of the smallest element greater than the limit""" + round = ceil if self.step > 0 else floor + no_steps = round( (float(lower_limit)-self.start) / self.step ) + return self.start + self.step * no_steps + + def _max_fitting_element(self, upper_limit): + """Returns the value of the largest element smaller than the limit""" + round = floor if self.step > 0 else ceil + no_steps = round( (float(upper_limit)-self.start) / self.step ) + return self.start + self.step * no_steps + + def _extended_gcd(self, a, b): + """ + Extended Euclidean algorithms to solve Bezout's identity: + a*x + b*y = gcd(x, y) + Finds one particular solution for x, y: s, t + Returns: gcd, s, t + """ + s, old_s = 0, 1 + t, old_t = 1, 0 + r, old_r = b, a + while r: + quotient = old_r // r + old_r, r = r, old_r - quotient * r + old_s, s = s, old_s - quotient * s + old_t, t = t, old_t - quotient * t + return old_r, old_s, old_t + + def union(self, other): + """ + Form the union of two Index objects and sorts if possible + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + union : Index + """ + # note: could return a RangeIndex in some circumstances + return self._int64index.union(other) + + def join(self, other, how='left', level=None, return_indexers=False): + """ + *this is an internal non-public method* + + Compute join_index and indexers to conform data + structures to the new index. + + Parameters + ---------- + other : Index + how : {'left', 'right', 'inner', 'outer'} + level : int or level name, default None + return_indexers : boolean, default False + + Returns + ------- + join_index, (left_indexer, right_indexer) + """ + if how == 'outer' and self is not other: + # note: could return RangeIndex in more circumstances + return self._int64index.join(other, how, level, return_indexers) + + return super(RangeIndex, self).join(other, how, level, return_indexers) + + def _mul(self, other): + "__mul__() implementation" + try: + int_input = other == int(other) + if int_input: + other = int(other) + except Exception: + int_input = False + + if int_input == True and other != 0: + return RangeIndex(self.start*other, self.stop*other, self.step*other, + fastpath=True) + else: + return super(RangeIndex, self).__mul__(other) + + def __len__(self): + """ + return the length of the RangeIndex + """ + return max(0, (self.stop-self.start) // self.step) + + @property + def size(self): + return len(self) + + def __getitem__(self, key): + """ + Conserve RangeIndex type for scalar and slice keys. + """ + super_getitem = super(RangeIndex, self).__getitem__ + + if np.isscalar(key): + n = int(key) + if n != key: + return super_getitem(key) + if n < 0: + n = len(self) + key + if n < 0 or n > len(self)-1: + raise IndexError('index %d is out of bounds for axis 0 with size %d' % (key, len(self))) + return self.start + n * self.step + + if isinstance(key, slice): + start, stop, step = key.indices(len(self)) + + # convert indexes to values + start = self.start + self.step * start + stop = self.start + self.step * stop + step = self.step * step + + return RangeIndex(start, stop, step, self.name, fastpath=True) + + # fall back to Int64Index + return super_getitem(key) + +RangeIndex._add_numeric_methods() +RangeIndex.__mul__ = RangeIndex.__rmul__ = RangeIndex._mul +RangeIndex._add_logical_methods() + + class Float64Index(NumericIndex): """ @@ -4098,9 +4520,13 @@ def get_level_values(self, level): unique = self.levels[num] # .values labels = self.labels[num] filled = com.take_1d(unique.values, labels, fill_value=unique._na_value) - values = unique._simple_new(filled, self.names[num], - freq=getattr(unique, 'freq', None), - tz=getattr(unique, 'tz', None)) + if isinstance(unique, RangeIndex): + _simple_new = Int64Index._simple_new + else: + _simple_new = unique._simple_new + values = _simple_new(filled, self.names[num], + freq=getattr(unique, 'freq', None), + tz=getattr(unique, 'tz', None)) return values def format(self, space=2, sparsify=None, adjoin=True, names=False, diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 75ca44fd1ef3e..33946a29a9dee 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -49,7 +49,7 @@ from pandas.compat import u, PY3 from pandas import ( Timestamp, Period, Series, DataFrame, Panel, Panel4D, - Index, MultiIndex, Int64Index, PeriodIndex, DatetimeIndex, Float64Index, + Index, MultiIndex, Int64Index, RangeIndex, PeriodIndex, DatetimeIndex, Float64Index, NaT ) from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel @@ -257,7 +257,14 @@ def encode(obj): tobj = type(obj) if isinstance(obj, Index): - if isinstance(obj, PeriodIndex): + if isinstance(obj, RangeIndex): + return {'typ': 'range_index', + 'klass': obj.__class__.__name__, + 'name': getattr(obj, 'name', None), + 'start': getattr(obj, 'start', None), + 'stop': getattr(obj, 'stop', None), + 'step': getattr(obj, 'step', None)} + elif isinstance(obj, PeriodIndex): return {'typ': 'period_index', 'klass': obj.__class__.__name__, 'name': getattr(obj, 'name', None), @@ -447,6 +454,8 @@ def decode(obj): data = unconvert(obj['data'], np.typeDict[obj['dtype']], obj.get('compress')) return globals()[obj['klass']](data, dtype=dtype, name=obj['name']) + elif typ == 'range_index': + return globals()[obj['klass']](obj['start'], obj['stop'], obj['step'], name=obj['name']) elif typ == 'multi_index': data = unconvert(obj['data'], np.typeDict[obj['dtype']], obj.get('compress')) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 03e7a8eae549d..236254856c09e 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -10,7 +10,7 @@ import pandas import pandas as pd -from pandas import (Series, DataFrame, Panel, MultiIndex, Categorical, bdate_range, +from pandas import (Series, DataFrame, Panel, MultiIndex, Int64Index, RangeIndex, Categorical, bdate_range, date_range, timedelta_range, Index, DatetimeIndex, TimedeltaIndex, isnull) from pandas.io.pytables import _tables @@ -1541,14 +1541,17 @@ def test_column_multiindex(self): index = MultiIndex.from_tuples([('A','a'), ('A','b'), ('B','a'), ('B','b')], names=['first','second']) df = DataFrame(np.arange(12).reshape(3,4), columns=index) + expected = df.copy() + if isinstance(expected.index, RangeIndex): + expected.index = Int64Index(expected.index) with ensure_clean_store(self.path) as store: store.put('df',df) - tm.assert_frame_equal(store['df'],df,check_index_type=True,check_column_type=True) + tm.assert_frame_equal(store['df'],expected,check_index_type=True,check_column_type=True) store.put('df1',df,format='table') - tm.assert_frame_equal(store['df1'],df,check_index_type=True,check_column_type=True) + tm.assert_frame_equal(store['df1'],expected,check_index_type=True,check_column_type=True) self.assertRaises(ValueError, store.put, 'df2',df,format='table',data_columns=['A']) self.assertRaises(ValueError, store.put, 'df3',df,format='table',data_columns=True) @@ -1562,11 +1565,14 @@ def test_column_multiindex(self): # non_index_axes name df = DataFrame(np.arange(12).reshape(3,4), columns=Index(list('ABCD'),name='foo')) - + expected = df.copy() + if isinstance(expected.index, RangeIndex): + expected.index = Int64Index(expected.index) + with ensure_clean_store(self.path) as store: store.put('df1',df,format='table') - tm.assert_frame_equal(store['df1'],df,check_index_type=True,check_column_type=True) + tm.assert_frame_equal(store['df1'],expected,check_index_type=True,check_column_type=True) def test_store_multiindex(self): diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 3c9dbd2e48cb6..a3ba08bae7d8c 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -13,7 +13,7 @@ from numpy.testing import assert_array_equal from pandas import (period_range, date_range, Categorical, Series, - Index, Float64Index, Int64Index, MultiIndex, + Index, Float64Index, Int64Index, RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex) from pandas.core.index import InvalidIndexError, NumericIndex from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp, @@ -2447,6 +2447,587 @@ def test_slice_keep_name(self): idx = Int64Index([1, 2], name='asdf') self.assertEqual(idx.name, idx[1:].name) +class TestRangeIndex(Numeric, tm.TestCase): + _holder = RangeIndex + ### what does the following do? + #_multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index = RangeIndex(0, 20, 2)) + self.setup_indices() + + def create_index(self): + return RangeIndex(5) + + def test_too_many_names(self): + def testit(): + self.index.names = ["roger", "harold"] + assertRaisesRegexp(ValueError, "^Length", testit) + + def test_constructor(self): + index = RangeIndex(5) + expected = np.arange(5, dtype=np.int64) + tm.assert_isinstance(index, RangeIndex) + self.assertEqual(index.start, 0) + self.assertEqual(index.stop, 5) + self.assertEqual(index.step, 1) + self.assertEqual(index.name, None) + self.assert_numpy_array_equal(index, expected) + + index = RangeIndex(1, 5) + expected = np.arange(1, 5, dtype=np.int64) + tm.assert_isinstance(index, RangeIndex) + self.assertEqual(index.start, 1) + self.assert_numpy_array_equal(index, expected) + + index = RangeIndex(1, 5, 2) + expected = np.arange(1, 5, 2, dtype=np.int64) + tm.assert_isinstance(index, RangeIndex) + self.assertEqual(index.step, 2) + self.assert_numpy_array_equal(index, expected) + + index = RangeIndex() + expected = np.empty(0, dtype=np.int64) + tm.assert_isinstance(index, RangeIndex) + self.assertEqual(index.start, 0) + self.assertEqual(index.stop, 0) + self.assertEqual(index.step, 1) + self.assert_numpy_array_equal(index, expected) + + index = RangeIndex(name='Foo') + tm.assert_isinstance(index, RangeIndex) + self.assertEqual(index.name, 'Foo') + + def test_constructor_corner(self): + arr = np.array([1, 2, 3, 4], dtype=object) + index = RangeIndex(1, 5) + self.assertEqual(index.values.dtype, np.int64) + self.assertTrue(index.equals(arr)) + + # non-int raise Exception + self.assertRaises(TypeError, RangeIndex, '1', '10', '1') + self.assertRaises(TypeError, RangeIndex, 1.1, 10.2, 1.3) + + def test_copy(self): + i = RangeIndex(5, name='Foo') + i_copy = i.copy() + self.assertTrue(i_copy is not i) + self.assertTrue(i_copy.identical(i)) + self.assertEqual(i_copy.start, 0) + self.assertEqual(i_copy.stop, 5) + self.assertEqual(i_copy.step, 1) + self.assertEqual(i_copy.name, 'Foo') + + def test_view(self): + super(TestRangeIndex, self).test_view() + + i = RangeIndex(name='Foo') + i_view = i.view() + self.assertEqual(i_view.name, 'Foo') + + i_view = i.view('i8') + tm.assert_index_equal(i, i_view) + + i_view = i.view(RangeIndex) + tm.assert_index_equal(i, i_view) + + def test_index_constructor(self): + arr = Index(0, 5) + tm.assert_isinstance(arr, RangeIndex) + + def test_dtype(self): + self.assertEqual(self.index.dtype, np.int64) + + def test_is_monotonic(self): + self.assertTrue(self.index.is_monotonic) + self.assertTrue(self.index.is_monotonic_increasing) + self.assertFalse(self.index.is_monotonic_decreasing) + + index = RangeIndex(4, 0, -1) + self.assertFalse(index.is_monotonic) + self.assertTrue(index.is_monotonic_decreasing) + + index = RangeIndex(1, 2) + self.assertTrue(index.is_monotonic) + self.assertTrue(index.is_monotonic_increasing) + self.assertTrue(index.is_monotonic_decreasing) + + def test_equals(self): + same_values = Index(self.index, dtype=object) + self.assertTrue(self.index.equals(same_values)) + self.assertTrue(same_values.equals(self.index)) + + def test_logical_compat(self): + idx = self.create_index() + self.assertEqual(idx.all(), idx.values.all()) + self.assertEqual(idx.any(), idx.values.any()) + + def test_identical(self): + i = Index(self.index.copy()) + self.assertTrue(i.identical(self.index)) + + same_values_different_type = Index(i, dtype=object) + self.assertFalse(i.identical(same_values_different_type)) + + i = self.index.copy(dtype=object) + i = i.rename('foo') + same_values = Index(i, dtype=object) + self.assertTrue(same_values.identical(self.index.copy(dtype=object))) + + self.assertFalse(i.identical(self.index)) + self.assertTrue(Index(same_values, name='foo', dtype=object + ).identical(i)) + + self.assertFalse( + self.index.copy(dtype=object) + .identical(self.index.copy(dtype='int64'))) + + def test_get_indexer(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1]) + self.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_pad(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target, method='pad') + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]) + self.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_backfill(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target, method='backfill') + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5]) + self.assert_numpy_array_equal(indexer, expected) + + def test_join_outer(self): + ### join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='outer', + return_indexers=True) + noidx_res = self.index.join(other, how='outer') + self.assertTrue(res.equals(noidx_res)) + + eres = Int64Index([0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]) + elidx = np.array([0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, -1, -1, -1, -1, -1, -1, -1], + dtype=np.int64) + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], + dtype=np.int64) + + tm.assert_isinstance(res, Int64Index) + self.assertFalse(isinstance(res, RangeIndex)) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + ### join with RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='outer', + return_indexers=True) + noidx_res = self.index.join(other, how='outer') + self.assertTrue(res.equals(noidx_res)) + + tm.assert_isinstance(res, Int64Index) + self.assertFalse(isinstance(res, RangeIndex)) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_inner(self): + ### Join with non-RangeIndex + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='inner', + return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([16, 18]) + elidx = np.array([8, 9]) + eridx = np.array([9, 7]) + + tm.assert_isinstance(res, Int64Index) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + ### Join two RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='inner', + return_indexers=True) + + tm.assert_isinstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + + def test_join_left(self): + ### Join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='left', + return_indexers=True) + eres = self.index + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], + dtype=np.int64) + + tm.assert_isinstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assertIsNone(lidx) + self.assert_numpy_array_equal(ridx, eridx) + + ### Join withRangeIndex + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='left', + return_indexers=True) + + tm.assert_isinstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assertIsNone(lidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + ### Join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='right', + return_indexers=True) + eres = other + elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], + dtype=np.int64) + + tm.assert_isinstance(other, Int64Index) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assertIsNone(ridx) + + ### Join withRangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='right', + return_indexers=True) + eres = other + + tm.assert_isinstance(other, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assertIsNone(ridx) + + def test_join_non_int_index(self): + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = self.index.join(other, how='outer') + outer2 = other.join(self.index, how='outer') + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, + 16, 18], dtype=object) + self.assertTrue(outer.equals(outer2)) + self.assertTrue(outer.equals(expected)) + + inner = self.index.join(other, how='inner') + inner2 = other.join(self.index, how='inner') + expected = Index([6, 8, 10], dtype=object) + self.assertTrue(inner.equals(inner2)) + self.assertTrue(inner.equals(expected)) + + left = self.index.join(other, how='left') + self.assertTrue(left.equals(self.index)) + + left2 = other.join(self.index, how='left') + self.assertTrue(left2.equals(other)) + + right = self.index.join(other, how='right') + self.assertTrue(right.equals(other)) + + right2 = other.join(self.index, how='right') + self.assertTrue(right2.equals(self.index)) + + def test_join_non_unique(self): + other = Index([4, 4, 3, 3]) + + res, lidx, ridx = self.index.join(other, return_indexers=True) + + eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) + elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int64) + eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], dtype=np.int64) + + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_self(self): + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = self.index.join(self.index, how=kind) + self.assertIs(self.index, joined) + + def test_intersection(self): + ### intersect with Int64Index + other = Index(np.arange(1, 6)) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + result = other.intersection(self.index) + expected = np.sort(np.asarray(np.intersect1d(self.index.values, + other.values))) + self.assert_numpy_array_equal(result, expected) + + ### intersect with increasing RangeIndex + other = Index(1, 6) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + ### intersect with decreasing RangeIndex + other = Index(5, 0, -1) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + def test_intersect_str_dates(self): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(['aa'], dtype=object) + res = i2.intersection(i1) + + self.assertEqual(len(res), 0) + + def test_union_noncomparable(self): + from datetime import datetime, timedelta + # corner case, non-Int64Index + now = datetime.now() + other = Index([now + timedelta(i) for i in range(4)], dtype=object) + result = self.index.union(other) + expected = np.concatenate((self.index, other)) + self.assert_numpy_array_equal(result, expected) + + result = other.union(self.index) + expected = np.concatenate((other, self.index)) + self.assert_numpy_array_equal(result, expected) + + def test_cant_or_shouldnt_cast(self): + # can't + self.assertRaises(TypeError, RangeIndex, 'foo', 'bar', 'baz') + + # shouldn't + self.assertRaises(TypeError, RangeIndex, '0', '1', '2') + + def test_view_Index(self): + self.index.view(Index) + + def test_prevent_casting(self): + result = self.index.astype('O') + self.assertEqual(result.dtype, np.object_) + + def test_take_preserve_name(self): + index = RangeIndex(1, 5, name='foo') + taken = index.take([3, 0, 1]) + self.assertEqual(index.name, taken.name) + + def test_int_name_format(self): + from pandas import Series, DataFrame + index = Index(0, 3, name=0) + s = Series(lrange(3), index) + df = DataFrame(lrange(3), index=index) + repr(s) + repr(df) + + def test_print_unicode_columns(self): + df = pd.DataFrame( + {u("\u05d0"): [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) + repr(df.columns) # should not raise UnicodeDecodeError + + def test_repr_roundtrip(self): + tm.assert_index_equal(eval(repr(self.index)), self.index) + + def test_unicode_string_with_unicode(self): + idx = Index(0, 1000) + + if compat.PY3: + str(idx) + else: + compat.text_type(idx) + + def test_bytestring_with_unicode(self): + idx = Index(0, 1000) + if compat.PY3: + bytes(idx) + else: + str(idx) + + def test_slice_keep_name(self): + idx = RangeIndex(1, 2, name='asdf') + self.assertEqual(idx.name, idx[1:].name) + + def test_numeric_compat(self): + idx = RangeIndex(5) + didx = Index(np.arange(5,dtype='int64')**2) + + # note: special cases of the following could return RangeIndex + # see _mul() example + + result = idx * 1 + tm.assert_index_equal(result, idx) + + result = 1 * idx + tm.assert_index_equal(result, idx) + + result = idx * idx + tm.assert_index_equal(result, didx) + + result = idx / 1 + tm.assert_index_equal(result, idx) + + result = idx // 1 + tm.assert_index_equal(result, idx) + + result = idx * np.array(5,dtype='int64') + tm.assert_index_equal(result, Index(np.arange(5,dtype='int64')*5)) + + result = idx * np.arange(5,dtype='int64') + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5,dtype='int64')) + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5,dtype='float64')+0.1) + tm.assert_index_equal(result, + Float64Index(np.arange(5,dtype='float64')*(np.arange(5,dtype='float64')+0.1))) + + # invalid + self.assertRaises(TypeError, lambda : idx * date_range('20130101',periods=5)) + self.assertRaises(ValueError, lambda : idx * self._holder(3)) + self.assertRaises(ValueError, lambda : idx * np.array([1,2])) + + def test_explicit_conversions(self): + + # GH 8608 + # add/sub are overriden explicity for Float/Int Index + idx = RangeIndex(5) + + # float conversions + arr = np.arange(5,dtype='int64')*3.2 + expected = Float64Index(arr) + fidx = idx * 3.2 + tm.assert_index_equal(fidx,expected) + fidx = 3.2 * idx + tm.assert_index_equal(fidx,expected) + + # interops with numpy arrays + expected = Float64Index(arr) + a = np.zeros(5,dtype='float64') + result = fidx - a + tm.assert_index_equal(result,expected) + + expected = Float64Index(-arr) + a = np.zeros(5,dtype='float64') + result = a - fidx + tm.assert_index_equal(result,expected) + + def test_duplicates(self): + for ind in self.indices: + if not len(ind): + continue + idx = self.indices[ind] + self.assertTrue(idx.is_unique) + self.assertFalse(idx.has_duplicates) + + def test_ufunc_compat(self): + idx = RangeIndex(5) + result = np.sin(idx) + expected = Float64Index(np.sin(np.arange(5,dtype='int64'))) + tm.assert_index_equal(result, expected) + + def test_extended_gcd(self): + result = self.index._extended_gcd(6, 10) + self.assertEqual(result[0], result[1]*6 + result[2]*10) + self.assertEqual(2, result[0]) + + result = self.index._extended_gcd(10, 6) + self.assertEqual(2, result[1]*10 + result[2]*6) + self.assertEqual(2, result[0]) + + def test_min_fitting_element(self): + result = RangeIndex(0, 20, 2)._min_fitting_element(1) + self.assertEqual(2, result) + + result = RangeIndex(1, 6)._min_fitting_element(1) + self.assertEqual(1, result) + + result = RangeIndex(18, -2, -2)._min_fitting_element(1) + self.assertEqual(2, result) + + result = RangeIndex(5, 0, -1)._min_fitting_element(1) + self.assertEqual(1, result) + + def test_max_fitting_element(self): + result = RangeIndex(0, 20, 2)._max_fitting_element(17) + self.assertEqual(16, result) + + result = RangeIndex(1, 6)._max_fitting_element(4) + self.assertEqual(4, result) + + result = RangeIndex(18, -2, -2)._max_fitting_element(17) + self.assertEqual(16, result) + + result = RangeIndex(5, 0, -1)._max_fitting_element(4) + self.assertEqual(4, result) + + def test_pickle_compat_construction(self): + # RangeIndex() is a valid constructor + pass + + def test_slice_specialised(self): + # scalar indexing + res = self.index[1] + expected = 2 + self.assertEqual(res, expected) + + res = self.index[-1] + expected = 18 + self.assertEqual(res, expected) + + ### slicing + # slice value completion + index = self.index[:] + expected = self.index + self.assert_numpy_array_equal(index, expected) + + # positive slice values + index = self.index[7:10:2] + expected = np.array([14, 18]) + self.assert_numpy_array_equal(index, expected) + + # negative slice values + index = self.index[-1:-5:-2] + expected = np.array([18, 14]) + self.assert_numpy_array_equal(index, expected) + + # stop overshoot + index = self.index[2:100:4] + expected = np.array([4, 12]) + self.assert_numpy_array_equal(index, expected) + + def test_len_specialised(self): + # TODO: How to test that len is specialised rather than calling + # the parent classes __len__() (which is slow)? + pass + + def test_size_specialised(self): + # TODO: How to test that size is specialised rather than calling + # the parent classes size property (which is slow)? + pass + class DatetimeLike(Base): def test_view(self):