From 8ab6fbd544c48f0b4a4b891016313abc7e070e91 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 21 Dec 2016 03:00:58 -0500 Subject: [PATCH] ENH: Create and propagate UInt64Index --- doc/source/whatsnew/v0.20.0.txt | 26 +- pandas/api/tests/test_api.py | 2 +- pandas/core/api.py | 3 +- pandas/core/indexing.py | 20 +- pandas/indexes/api.py | 4 +- pandas/indexes/base.py | 72 ++- pandas/indexes/numeric.py | 112 +++- pandas/src/algos_common_helper.pxi.in | 1 + pandas/src/index_class_helper.pxi.in | 1 + pandas/src/join_helper.pxi.in | 5 +- pandas/src/joins_func_helper.pxi.in | 3 +- pandas/tests/frame/test_indexing.py | 58 +- pandas/tests/indexes/common.py | 27 +- pandas/tests/indexes/test_base.py | 71 ++- pandas/tests/indexes/test_numeric.py | 704 ++++++++++++++++--------- pandas/tests/indexing/test_indexing.py | 123 +++-- pandas/tests/types/test_generic.py | 1 + pandas/types/generic.py | 4 +- pandas/util/testing.py | 4 + setup.py | 3 +- 20 files changed, 855 insertions(+), 389 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 2a825edd0e98a..0e682874b4b73 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -91,6 +91,25 @@ support for bz2 compression in the python 2 c-engine improved (:issue:`14874`). df = pd.read_table(url, compression='bz2') # explicitly specify compression df.head(2) +.. _whatsnew_0200.enhancements.uint64_support: + +Pandas has significantly improved support for operations involving unsigned, +or purely non-negative, integers. Previously, handling these integers would +result in improper rounding or data-type casting, leading to incorrect results. +Notably, a new numerical index, ``UInt64Index``, has been created (:issue:`14937`) + +.. ipython:: python + + idx = pd.UInt64Index([1, 2, 3]) + df = pd.DataFrame({'A': ['a', 'b', 'c']}, index=idx) + df.index + +- Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`, :issue:`14982`) +- Bug in ``Series.unique()`` in which unsigned 64-bit integers were causing overflow (:issue:`14721`) +- Bug in ``DataFrame`` construction in which unsigned 64-bit integer elements were being converted to objects (:issue:`14881`) +- Bug in ``pd.read_csv()`` in which unsigned 64-bit integer elements were being improperly converted to the wrong data types (:issue:`14983`) +- Bug in ``pd.unique()`` in which unsigned 64-bit integers were causing overflow (:issue:`14915`) + .. _whatsnew_0200.enhancements.other: Other enhancements @@ -298,8 +317,6 @@ Bug Fixes - Bug in ``Index`` power operations with reversed operands (:issue:`14973`) - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) -- Bug in ``DataFrame`` construction in which unsigned 64-bit integer elements were being converted to objects (:issue:`14881`) -- Bug in ``pd.read_csv()`` in which unsigned 64-bit integer elements were being improperly converted to the wrong data types (:issue:`14983`) - Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`) - Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) @@ -324,8 +341,6 @@ Bug Fixes -- Bug in ``Series.unique()`` in which unsigned 64-bit integers were causing overflow (:issue:`14721`) -- Bug in ``pd.unique()`` in which unsigned 64-bit integers were causing overflow (:issue:`14915`) @@ -350,7 +365,6 @@ Bug Fixes - Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) -- Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`, :issue:`14982`) - Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) - Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) @@ -369,4 +383,4 @@ Bug Fixes - Bug in ``Series`` constructor when both ``copy=True`` and ``dtype`` arguments are provided (:issue:`15125`) - Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) -- Bug in ``Series.dt.round`` inconsistent behaviour on NAT's with different arguments (:issue:`14940`) \ No newline at end of file +- Bug in ``Series.dt.round`` inconsistent behaviour on NAT's with different arguments (:issue:`14940`) diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py index b13b4d7de60ca..78dfe46914200 100644 --- a/pandas/api/tests/test_api.py +++ b/pandas/api/tests/test_api.py @@ -53,7 +53,7 @@ class TestPDApi(Base, tm.TestCase): classes = ['Categorical', 'CategoricalIndex', 'DataFrame', 'DateOffset', 'DatetimeIndex', 'ExcelFile', 'ExcelWriter', 'Float64Index', 'Grouper', 'HDFStore', 'Index', 'Int64Index', 'MultiIndex', - 'Period', 'PeriodIndex', 'RangeIndex', + 'Period', 'PeriodIndex', 'RangeIndex', 'UInt64Index', 'Series', 'SparseArray', 'SparseDataFrame', 'SparseSeries', 'TimeGrouper', 'Timedelta', 'TimedeltaIndex', 'Timestamp'] diff --git a/pandas/core/api.py b/pandas/core/api.py index b5e1de2063c7e..177e7b31cbd4f 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -10,7 +10,8 @@ from pandas.core.groupby import Grouper from pandas.formats.format import set_eng_float_format from pandas.core.index import (Index, CategoricalIndex, Int64Index, - RangeIndex, Float64Index, MultiIndex) + UInt64Index, RangeIndex, Float64Index, + MultiIndex) from pandas.core.series import Series, TimeSeries from pandas.core.frame import DataFrame diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6970d1891ee63..0db5103a18807 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -860,15 +860,20 @@ def _convert_for_reindex(self, key, axis=0): return labels[key] else: if isinstance(key, Index): - # want Index objects to pass through untouched - keyarr = key + keyarr = labels._convert_index_indexer(key) else: # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) - if is_integer_dtype(keyarr) and not labels.is_integer(): - keyarr = _ensure_platform_int(keyarr) - return labels.take(keyarr) + if is_integer_dtype(keyarr): + # Cast the indexer to uint64 if possible so + # that the values returned from indexing are + # also uint64. + keyarr = labels._convert_arr_indexer(keyarr) + + if not labels.is_integer(): + keyarr = _ensure_platform_int(keyarr) + return labels.take(keyarr) return keyarr @@ -1044,11 +1049,10 @@ def _getitem_iterable(self, key, axis=0): return self.obj.take(inds, axis=axis, convert=False) else: if isinstance(key, Index): - # want Index objects to pass through untouched - keyarr = key + keyarr = labels._convert_index_indexer(key) else: - # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) + keyarr = labels._convert_arr_indexer(keyarr) if is_categorical_dtype(labels): keyarr = labels._shallow_copy(keyarr) diff --git a/pandas/indexes/api.py b/pandas/indexes/api.py index 0b81c47488ef4..64992e46613e5 100644 --- a/pandas/indexes/api.py +++ b/pandas/indexes/api.py @@ -4,7 +4,7 @@ from pandas.indexes.category import CategoricalIndex # noqa from pandas.indexes.multi import MultiIndex # noqa from pandas.indexes.numeric import (NumericIndex, Float64Index, # noqa - Int64Index) + Int64Index, UInt64Index) from pandas.indexes.range import RangeIndex # noqa import pandas.core.common as com @@ -13,7 +13,7 @@ # TODO: there are many places that rely on these private methods existing in # pandas.core.index __all__ = ['Index', 'MultiIndex', 'NumericIndex', 'Float64Index', 'Int64Index', - 'CategoricalIndex', 'RangeIndex', + 'CategoricalIndex', 'RangeIndex', 'UInt64Index', 'InvalidIndexError', '_new_Index', '_ensure_index', '_get_na_value', '_get_combined_index', diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index b87fb5dc84782..d0bf4edfbc5d2 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -27,6 +27,8 @@ is_object_dtype, is_categorical_dtype, is_bool_dtype, + is_signed_integer_dtype, + is_unsigned_integer_dtype, is_integer_dtype, is_float_dtype, is_datetime64_any_dtype, is_timedelta64_dtype, @@ -199,14 +201,25 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, data = np.array(data, copy=copy, dtype=dtype) elif inferred in ['floating', 'mixed-integer-float']: - # if we are actually all equal to integers + # If we are actually all equal to integers, # then coerce to integer - from .numeric import Int64Index, Float64Index + from .numeric import (Int64Index, UInt64Index, + Float64Index) try: - res = data.astype('i8') + res = data.astype('i8', copy=False) if (res == data).all(): return Int64Index(res, copy=copy, name=name) + except (OverflowError, TypeError, ValueError): + pass + + # Conversion to int64 failed (possibly due to + # overflow), so let's try now with uint64. + try: + res = data.astype('u8', copy=False) + if (res == data).all(): + return UInt64Index(res, copy=copy, + name=name) except (TypeError, ValueError): pass @@ -235,10 +248,13 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, IncompatibleFrequency) if isinstance(data, PeriodIndex): return PeriodIndex(data, copy=copy, name=name, **kwargs) - if issubclass(data.dtype.type, np.integer): + if is_signed_integer_dtype(data.dtype): from .numeric import Int64Index return Int64Index(data, copy=copy, dtype=dtype, name=name) - elif issubclass(data.dtype.type, np.floating): + elif is_unsigned_integer_dtype(data.dtype): + from .numeric import UInt64Index + return UInt64Index(data, copy=copy, dtype=dtype, name=name) + elif is_float_dtype(data.dtype): from .numeric import Float64Index return Float64Index(data, copy=copy, dtype=dtype, name=name) elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data): @@ -254,9 +270,13 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if dtype is None: inferred = lib.infer_dtype(subarr) if inferred == 'integer': - from .numeric import Int64Index - return Int64Index(subarr.astype('i8'), copy=copy, - name=name) + from .numeric import Int64Index, UInt64Index + try: + return Int64Index(subarr.astype('i8'), copy=copy, + name=name) + except OverflowError: + return UInt64Index(subarr.astype('u8'), copy=copy, + name=name) elif inferred in ['floating', 'mixed-integer-float']: from .numeric import Float64Index return Float64Index(subarr, copy=copy, name=name) @@ -1253,6 +1273,40 @@ def is_int(v): return indexer + _index_shared_docs['_convert_arr_indexer'] = """ + Convert an array-like indexer to the appropriate dtype. + + Parameters + ---------- + keyarr : array-like + Indexer to convert. + + Returns + ------- + converted_keyarr : array-like + """ + + @Appender(_index_shared_docs['_convert_arr_indexer']) + def _convert_arr_indexer(self, keyarr): + return keyarr + + _index_shared_docs['_convert_index_indexer'] = """ + Convert an Index indexer to the appropriate dtype. + + Parameters + ---------- + keyarr : Index (or sub-class) + Indexer to convert. + + Returns + ------- + converted_keyarr : Index (or sub-class) + """ + + @Appender(_index_shared_docs['_convert_index_indexer']) + def _convert_index_indexer(self, keyarr): + return keyarr + def _convert_list_indexer(self, keyarr, kind=None): """ passed a key that is tuplesafe that is integer based @@ -3489,7 +3543,7 @@ def _validate_for_numeric_binop(self, other, op, opstr): raise ValueError("cannot evaluate a numeric op with " "unequal lengths") other = _values_from_object(other) - if other.dtype.kind not in ['f', 'i']: + if other.dtype.kind not in ['f', 'i', 'u']: raise TypeError("cannot evaluate a numeric op " "with a non-numeric dtype") elif isinstance(other, (DateOffset, np.timedelta64, diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index c71abe202226e..0b9b337731d7f 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -8,7 +8,7 @@ is_float_dtype, is_object_dtype, is_integer_dtype, is_scalar) from pandas.types.missing import isnull -from pandas.core.common import _values_from_object +from pandas.core.common import _asarray_tuplesafe, _values_from_object from pandas import compat from pandas.indexes.base import Index, InvalidIndexError, _index_shared_docs @@ -73,6 +73,13 @@ def _assert_safe_casting(cls, data, subarr): """ pass + @property + def is_all_dates(self): + """ + Checks that all the labels are datetime objects + """ + return False + _num_index_shared_docs['class_descr'] = """ Immutable ndarray implementing an ordered, sliceable set. The basic object @@ -128,13 +135,6 @@ def asi8(self): # do not cache or you'll create a memory leak return self.values.view('i8') - @property - def is_all_dates(self): - """ - Checks that all the labels are datetime objects - """ - return False - @Appender(_index_shared_docs['_convert_scalar_indexer']) def _convert_scalar_indexer(self, key, kind=None): assert kind in ['ix', 'loc', 'getitem', 'iloc', None] @@ -154,7 +154,7 @@ def _assert_safe_casting(cls, data, subarr): """ Ensure incoming data can be represented as ints. """ - if not issubclass(data.dtype.type, np.integer): + if not issubclass(data.dtype.type, np.signedinteger): if not np.array_equal(data, subarr): raise TypeError('Unsafe NumPy casting, you must ' 'explicitly cast') @@ -162,6 +162,84 @@ def _assert_safe_casting(cls, data, subarr): Int64Index._add_numeric_methods() Int64Index._add_logical_methods() +_uint64_descr_args = dict( + klass='UInt64Index', + ltype='unsigned integer', + dtype='uint64', + extra='' +) + + +class UInt64Index(NumericIndex): + __doc__ = _num_index_shared_docs['class_descr'] % _uint64_descr_args + + _typ = 'uint64index' + _arrmap = _algos.arrmap_uint64 + _left_indexer_unique = _join.left_join_indexer_unique_uint64 + _left_indexer = _join.left_join_indexer_uint64 + _inner_indexer = _join.inner_join_indexer_uint64 + _outer_indexer = _join.outer_join_indexer_uint64 + + _can_hold_na = False + _na_value = 0 + + _engine_type = _index.UInt64Engine + + _default_dtype = np.uint64 + + @property + def inferred_type(self): + return 'integer' + + @property + def asi8(self): + # do not cache or you'll create a memory leak + return self.values.view('u8') + + @Appender(_index_shared_docs['_convert_scalar_indexer']) + def _convert_scalar_indexer(self, key, kind=None): + assert kind in ['ix', 'loc', 'getitem', 'iloc', None] + + # don't coerce ilocs to integers + if kind != 'iloc': + key = self._maybe_cast_indexer(key) + return (super(UInt64Index, self) + ._convert_scalar_indexer(key, kind=kind)) + + @Appender(_index_shared_docs['_convert_arr_indexer']) + def _convert_arr_indexer(self, keyarr): + # Cast the indexer to uint64 if possible so + # that the values returned from indexing are + # also uint64. + if is_integer_dtype(keyarr): + return _asarray_tuplesafe(keyarr, dtype=np.uint64) + return keyarr + + @Appender(_index_shared_docs['_convert_index_indexer']) + def _convert_index_indexer(self, keyarr): + # Cast the indexer to uint64 if possible so + # that the values returned from indexing are + # also uint64. + if keyarr.is_integer(): + return keyarr.astype(np.uint64) + return keyarr + + def _wrap_joined_index(self, joined, other): + name = self.name if self.name == other.name else None + return UInt64Index(joined, name=name) + + @classmethod + def _assert_safe_casting(cls, data, subarr): + """ + Ensure incoming data can be represented as uints. + """ + if not issubclass(data.dtype.type, np.unsignedinteger): + if not np.array_equal(data, subarr): + raise TypeError('Unsafe NumPy casting, you must ' + 'explicitly cast') + +UInt64Index._add_numeric_methods() +UInt64Index._add_logical_methods() _float64_descr_args = dict( klass='Float64Index', @@ -207,15 +285,6 @@ def astype(self, dtype, copy=True): @Appender(_index_shared_docs['_convert_scalar_indexer']) def _convert_scalar_indexer(self, key, kind=None): - """ - convert a scalar indexer - - Parameters - ---------- - key : label of the slice bound - kind : {'ix', 'loc', 'getitem'} or None - """ - assert kind in ['ix', 'loc', 'getitem', 'iloc', None] if kind == 'iloc': @@ -310,13 +379,6 @@ def get_loc(self, key, method=None, tolerance=None): return super(Float64Index, self).get_loc(key, method=method, tolerance=tolerance) - @property - def is_all_dates(self): - """ - Checks that all the labels are datetime objects - """ - return False - @cache_readonly def is_unique(self): return super(Float64Index, self).is_unique and self._nan_idxs.size < 2 diff --git a/pandas/src/algos_common_helper.pxi.in b/pandas/src/algos_common_helper.pxi.in index c1c190704b4c7..a579a5020f6e7 100644 --- a/pandas/src/algos_common_helper.pxi.in +++ b/pandas/src/algos_common_helper.pxi.in @@ -27,6 +27,7 @@ dtypes = [('float64', 'float64_t', 'np.float64', True, True), ('object', 'object', 'object', True, False), ('int32', 'int32_t', 'np.int32', False, True), ('int64', 'int64_t', 'np.int64', False, True), + ('uint64', 'uint64_t', 'np.uint64', False, True), ('bool', 'uint8_t', 'np.bool', False, True)] def get_dispatch(dtypes): diff --git a/pandas/src/index_class_helper.pxi.in b/pandas/src/index_class_helper.pxi.in index 315dd18009ad4..76c0deef7ebee 100644 --- a/pandas/src/index_class_helper.pxi.in +++ b/pandas/src/index_class_helper.pxi.in @@ -12,6 +12,7 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in # name, dtype, ctype dtypes = [('Float64', 'float64', 'float64_t'), + ('UInt64', 'uint64', 'uint64_t'), ('Int64', 'int64', 'int64_t'), ('Object', 'object', 'object')] }} diff --git a/pandas/src/join_helper.pxi.in b/pandas/src/join_helper.pxi.in index 5b55ec2b1bf6d..feb8cfb76a7f0 100644 --- a/pandas/src/join_helper.pxi.in +++ b/pandas/src/join_helper.pxi.in @@ -15,7 +15,8 @@ dtypes = [('float64', 'float64_t', 'np.float64'), ('float32', 'float32_t', 'np.float32'), ('object', 'object', 'object'), ('int32', 'int32_t', 'np.int32'), - ('int64', 'int64_t', 'np.int64')] + ('int64', 'int64_t', 'np.int64'), + ('uint64', 'uint64_t', 'np.uint64')] def get_dispatch(dtypes): @@ -404,4 +405,4 @@ def outer_join_indexer_{{name}}(ndarray[{{c_type}}] left, return result, lindexer, rindexer -{{endfor}} \ No newline at end of file +{{endfor}} diff --git a/pandas/src/joins_func_helper.pxi.in b/pandas/src/joins_func_helper.pxi.in index 33926a23f7f41..68c376492f8f2 100644 --- a/pandas/src/joins_func_helper.pxi.in +++ b/pandas/src/joins_func_helper.pxi.in @@ -12,7 +12,8 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in {{py: # table_type, by_dtype -by_dtypes = [('PyObjectHashTable', 'object'), ('Int64HashTable', 'int64_t')] +by_dtypes = [('PyObjectHashTable', 'object'), ('Int64HashTable', 'int64_t'), + ('UInt64HashTable', 'uint64_t')] # on_dtype on_dtypes = ['uint8_t', 'uint16_t', 'uint32_t', 'uint64_t', diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index abe40f7be1d90..02d288bdf6ea8 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2789,7 +2789,63 @@ def test_set_reset(self): result = df.reset_index() self.assertTrue(result['foo'].dtype, 'M8[ns, US/Eastern') - result = result.set_index('foo') + df = result.set_index('foo') + tm.assert_index_equal(df.index, idx) + + def test_transpose(self): + + result = self.df.T + expected = DataFrame(self.df.values.T) + expected.index = ['A', 'B'] + assert_frame_equal(result, expected) + + +class TestDataFrameIndexingUInt64(tm.TestCase, TestData): + + _multiprocess_can_split_ = True + + def setUp(self): + self.ir = Index(np.arange(3), dtype=np.uint64) + self.idx = Index([2**63, 2**63 + 5, 2**63 + 10], name='foo') + + self.df = DataFrame({'A': self.idx, 'B': self.ir}) + + def test_setitem(self): + + df = self.df + idx = self.idx + + # setitem + df['C'] = idx + assert_series_equal(df['C'], Series(idx, name='C')) + + df['D'] = 'foo' + df['D'] = idx + assert_series_equal(df['D'], Series(idx, name='D')) + del df['D'] + + # With NaN: because uint64 has no NaN element, + # the column should be cast to object. + df2 = df.copy() + df2.iloc[1, 1] = pd.NaT + df2.iloc[1, 2] = pd.NaT + result = df2['B'] + assert_series_equal(notnull(result), Series( + [True, False, True], name='B')) + assert_series_equal(df2.dtypes, Series([np.dtype('uint64'), + np.dtype('O'), np.dtype('O')], + index=['A', 'B', 'C'])) + + def test_set_reset(self): + + idx = self.idx + + # set/reset + df = DataFrame({'A': [0, 1, 2]}, index=idx) + result = df.reset_index() + self.assertEqual(result['foo'].dtype, np.dtype('uint64')) + + df = result.set_index('foo') tm.assert_index_equal(df.index, idx) def test_transpose(self): diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 1b373baf9b3c1..63e9fe580d73d 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -5,8 +5,8 @@ import numpy as np -from pandas import (Series, Index, Float64Index, Int64Index, RangeIndex, - MultiIndex, CategoricalIndex, DatetimeIndex, +from pandas import (Series, Index, Float64Index, Int64Index, UInt64Index, + RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, notnull) from pandas.types.common import needs_i8_conversion from pandas.util.testing import assertRaisesRegexp @@ -470,10 +470,11 @@ def test_where(self): expected = i tm.assert_index_equal(result, expected) - i2 = i.copy() - i2 = pd.Index([np.nan, np.nan] + i[2:].tolist()) - result = i.where(notnull(i2)) - expected = i2 + _nan = i._na_value + cond = [False] + [True] * len(i[1:]) + expected = pd.Index([_nan] + i[1:].tolist(), dtype=i.dtype) + + result = i.where(cond) tm.assert_index_equal(result, expected) def test_setops_errorcases(self): @@ -660,6 +661,12 @@ def test_equals(self): self.assertFalse(idx.equals(list(idx))) self.assertFalse(idx.equals(np.array(idx))) + # Cannot pass in non-int64 dtype to RangeIndex + if not isinstance(idx, RangeIndex): + same_values = Index(idx, dtype=object) + self.assertTrue(idx.equals(same_values)) + self.assertTrue(same_values.equals(idx)) + if idx.nlevels == 1: # do not test MultiIndex self.assertFalse(idx.equals(pd.Series(idx))) @@ -744,7 +751,7 @@ def test_numpy_ufuncs(self): with tm.assertRaises(Exception): with np.errstate(all='ignore'): func(idx) - elif isinstance(idx, (Float64Index, Int64Index)): + elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): # coerces to float (e.g. np.sin) with np.errstate(all='ignore'): result = func(idx) @@ -765,7 +772,7 @@ def test_numpy_ufuncs(self): # raise TypeError or ValueError (PeriodIndex) with tm.assertRaises(Exception): func(idx) - elif isinstance(idx, (Float64Index, Int64Index)): + elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): # results in bool array result = func(idx) exp = func(idx.values) @@ -798,7 +805,7 @@ def test_hasnans_isnans(self): continue elif isinstance(index, pd.tseries.base.DatetimeIndexOpsMixin): values[1] = pd.tslib.iNaT - elif isinstance(index, Int64Index): + elif isinstance(index, (Int64Index, UInt64Index)): continue else: values[1] = np.nan @@ -838,7 +845,7 @@ def test_fillna(self): if isinstance(index, pd.tseries.base.DatetimeIndexOpsMixin): values[1] = pd.tslib.iNaT - elif isinstance(index, Int64Index): + elif isinstance(index, (Int64Index, UInt64Index)): continue else: values[1] = np.nan diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 0e6773fd83404..a0f2a090c9a06 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -4,17 +4,18 @@ import pandas.util.testing as tm from pandas.indexes.api import Index, MultiIndex -from .common import Base +from pandas.tests.indexes.common import Base from pandas.compat import (range, lrange, lzip, u, - zip, PY3, PY36) + text_type, zip, PY3, PY36) import operator import os +import nose import numpy as np from pandas import (period_range, date_range, Series, - Float64Index, Int64Index, + DataFrame, Float64Index, Int64Index, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex) from pandas.core.index import _get_combined_index @@ -40,6 +41,7 @@ def setUp(self): periodIndex=tm.makePeriodIndex(100), tdIndex=tm.makeTimedeltaIndex(100), intIndex=tm.makeIntIndex(100), + uintIndex=tm.makeUIntIndex(100), rangeIndex=tm.makeIntIndex(100), floatIndex=tm.makeFloatIndex(100), boolIndex=Index([True, False]), @@ -449,7 +451,7 @@ def test_delete(self): self.assertEqual(result.name, expected.name) with tm.assertRaises((IndexError, ValueError)): - # either depeidnig on numpy version + # either depending on numpy version result = idx.delete(5) def test_identical(self): @@ -2020,3 +2022,64 @@ def test_repeat(self): with tm.assert_produces_warning(FutureWarning): result = idx.repeat(n=repeats) tm.assert_index_equal(result, expected) + + def test_is_monotonic_na(self): + examples = [pd.Index([np.nan]), + pd.Index([np.nan, 1]), + pd.Index([1, 2, np.nan]), + pd.Index(['a', 'b', np.nan]), + pd.to_datetime(['NaT']), + pd.to_datetime(['NaT', '2000-01-01']), + pd.to_datetime(['2000-01-01', 'NaT', '2000-01-02']), + pd.to_timedelta(['1 day', 'NaT']), ] + for index in examples: + self.assertFalse(index.is_monotonic_increasing) + self.assertFalse(index.is_monotonic_decreasing) + + def test_repr_summary(self): + with cf.option_context('display.max_seq_items', 10): + r = repr(pd.Index(np.arange(1000))) + self.assertTrue(len(r) < 200) + self.assertTrue("..." in r) + + def test_int_name_format(self): + index = Index(['a', 'b', 'c'], name=0) + s = Series(lrange(3), index) + df = DataFrame(lrange(3), index=index) + repr(s) + repr(df) + + def test_print_unicode_columns(self): + df = pd.DataFrame({u("\u05d0"): [1, 2, 3], + "\u05d1": [4, 5, 6], + "c": [7, 8, 9]}) + repr(df.columns) # should not raise UnicodeDecodeError + + def test_unicode_string_with_unicode(self): + idx = Index(lrange(1000)) + + if PY3: + str(idx) + else: + text_type(idx) + + def test_bytestring_with_unicode(self): + idx = Index(lrange(1000)) + if PY3: + bytes(idx) + else: + str(idx) + + def test_intersect_str_dates(self): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(['aa'], dtype=object) + res = i2.intersection(i1) + + self.assertEqual(len(res), 0) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index f7f072d5b5d2a..044d3477271ad 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1,22 +1,20 @@ # -*- coding: utf-8 -*- from datetime import datetime -from pandas import compat -from pandas.compat import range, lrange, u, PY3 +from pandas.compat import range, PY3 +import nose import numpy as np -from pandas import (date_range, Series, DataFrame, - Index, Float64Index, Int64Index, RangeIndex) -from pandas.util.testing import assertRaisesRegexp +from pandas import (date_range, Series, Index, Float64Index, + Int64Index, UInt64Index, RangeIndex) import pandas.util.testing as tm -import pandas.core.config as cf import pandas as pd from pandas.lib import Timestamp -from .common import Base +from pandas.tests.indexes.common import Base def full_like(array, value): @@ -64,10 +62,11 @@ def test_numeric_compat(self): result = idx * np.array(5, dtype='int64') tm.assert_index_equal(result, idx * 5) - result = idx * np.arange(5, dtype='int64') + arr_dtype = 'uint64' if isinstance(idx, UInt64Index) else 'int64' + result = idx * np.arange(5, dtype=arr_dtype) tm.assert_index_equal(result, didx) - result = idx * Series(np.arange(5, dtype='int64')) + result = idx * Series(np.arange(5, dtype=arr_dtype)) tm.assert_index_equal(result, didx) result = idx * Series(np.arange(5, dtype='float64') + 0.1) @@ -448,7 +447,183 @@ def test_take_fill_value(self): idx.take(np.array([1, -5])) -class TestInt64Index(Numeric, tm.TestCase): +class NumericInt(Numeric): + + def test_view(self): + super(NumericInt, self).test_view() + + i = self._holder([], name='Foo') + i_view = i.view() + self.assertEqual(i_view.name, 'Foo') + + i_view = i.view(self._dtype) + tm.assert_index_equal(i, self._holder(i_view, name='Foo')) + + i_view = i.view(self._holder) + tm.assert_index_equal(i, self._holder(i_view, name='Foo')) + + def test_is_monotonic(self): + self.assertTrue(self.index.is_monotonic) + self.assertTrue(self.index.is_monotonic_increasing) + self.assertFalse(self.index.is_monotonic_decreasing) + + index = self._holder([4, 3, 2, 1]) + self.assertFalse(index.is_monotonic) + self.assertTrue(index.is_monotonic_decreasing) + + index = self._holder([1]) + self.assertTrue(index.is_monotonic) + self.assertTrue(index.is_monotonic_increasing) + self.assertTrue(index.is_monotonic_decreasing) + + def test_logical_compat(self): + idx = self.create_index() + self.assertEqual(idx.all(), idx.values.all()) + self.assertEqual(idx.any(), idx.values.any()) + + def test_identical(self): + i = Index(self.index.copy()) + self.assertTrue(i.identical(self.index)) + + same_values_different_type = Index(i, dtype=object) + self.assertFalse(i.identical(same_values_different_type)) + + i = self.index.copy(dtype=object) + i = i.rename('foo') + same_values = Index(i, dtype=object) + self.assertTrue(same_values.identical(i)) + + self.assertFalse(i.identical(self.index)) + self.assertTrue(Index(same_values, name='foo', dtype=object).identical( + i)) + + self.assertFalse(self.index.copy(dtype=object) + .identical(self.index.copy(dtype=self._dtype))) + + def test_join_non_unique(self): + left = Index([4, 4, 3, 3]) + + joined, lidx, ridx = left.join(left, return_indexers=True) + + exp_joined = Index([3, 3, 3, 3, 4, 4, 4, 4]) + self.assert_index_equal(joined, exp_joined) + + exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.intp) + tm.assert_numpy_array_equal(lidx, exp_lidx) + + exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + def test_join_self(self): + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = self.index.join(self.index, how=kind) + self.assertIs(self.index, joined) + + def test_union_noncomparable(self): + from datetime import datetime, timedelta + # corner case, non-Int64Index + now = datetime.now() + other = Index([now + timedelta(i) for i in range(4)], dtype=object) + result = self.index.union(other) + expected = Index(np.concatenate((self.index, other))) + tm.assert_index_equal(result, expected) + + result = other.union(self.index) + expected = Index(np.concatenate((other, self.index))) + tm.assert_index_equal(result, expected) + + def test_cant_or_shouldnt_cast(self): + # can't + data = ['foo', 'bar', 'baz'] + self.assertRaises(TypeError, self._holder, data) + + # shouldn't + data = ['0', '1', '2'] + self.assertRaises(TypeError, self._holder, data) + + def test_view_index(self): + self.index.view(Index) + + def test_prevent_casting(self): + result = self.index.astype('O') + self.assertEqual(result.dtype, np.object_) + + def test_take_preserve_name(self): + index = self._holder([1, 2, 3, 4], name='foo') + taken = index.take([3, 0, 1]) + self.assertEqual(index.name, taken.name) + + def test_take_fill_value(self): + # see gh-12631 + idx = self._holder([1, 2, 3], name='xxx') + result = idx.take(np.array([1, 0, -1])) + expected = self._holder([2, 1, 3], name='xxx') + tm.assert_index_equal(result, expected) + + name = self._holder.__name__ + msg = ("Unable to fill values because " + "{name} cannot contain NA").format(name=name) + + # fill_value=True + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -1]), fill_value=True) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, + fill_value=True) + expected = self._holder([2, 1, 3], name='xxx') + tm.assert_index_equal(result, expected) + + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with tm.assertRaisesRegexp(ValueError, msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with tm.assertRaises(IndexError): + idx.take(np.array([1, -5])) + + def test_slice_keep_name(self): + idx = self._holder([1, 2], name='asdf') + self.assertEqual(idx.name, idx[1:].name) + + def test_ufunc_coercions(self): + idx = self._holder([1, 2, 3, 4, 5], name='x') + + result = np.sqrt(idx) + tm.assertIsInstance(result, Float64Index) + exp = Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name='x') + tm.assert_index_equal(result, exp) + + result = np.divide(idx, 2.) + tm.assertIsInstance(result, Float64Index) + exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') + tm.assert_index_equal(result, exp) + + # _evaluate_numeric_binop + result = idx + 2. + tm.assertIsInstance(result, Float64Index) + exp = Float64Index([3., 4., 5., 6., 7.], name='x') + tm.assert_index_equal(result, exp) + + result = idx - 2. + tm.assertIsInstance(result, Float64Index) + exp = Float64Index([-1., 0., 1., 2., 3.], name='x') + tm.assert_index_equal(result, exp) + + result = idx * 1. + tm.assertIsInstance(result, Float64Index) + exp = Float64Index([1., 2., 3., 4., 5.], name='x') + tm.assert_index_equal(result, exp) + + result = idx / 2. + tm.assertIsInstance(result, Float64Index) + exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') + tm.assert_index_equal(result, exp) + + +class TestInt64Index(NumericInt, tm.TestCase): + _dtype = 'int64' _holder = Int64Index _multiprocess_can_split_ = True @@ -459,12 +634,6 @@ def setUp(self): def create_index(self): return Int64Index(np.arange(5, dtype='int64')) - def test_too_many_names(self): - def testit(): - self.index.names = ["roger", "harold"] - - assertRaisesRegexp(ValueError, "^Length", testit) - def test_constructor(self): # pass list, coerce fine index = Int64Index([-5, 0, 1, 2]) @@ -511,24 +680,6 @@ def test_constructor_corner(self): with tm.assertRaisesRegexp(TypeError, 'casting'): Int64Index(arr_with_floats) - def test_copy(self): - i = Int64Index([], name='Foo') - i_copy = i.copy() - self.assertEqual(i_copy.name, 'Foo') - - def test_view(self): - super(TestInt64Index, self).test_view() - - i = Int64Index([], name='Foo') - i_view = i.view() - self.assertEqual(i_view.name, 'Foo') - - i_view = i.view('i8') - tm.assert_index_equal(i, Int64Index(i_view, name='Foo')) - - i_view = i.view(Int64Index) - tm.assert_index_equal(i, Int64Index(i_view, name='Foo')) - def test_coerce_list(self): # coerce things arr = Index([1, 2, 3, 4]) @@ -538,119 +689,33 @@ def test_coerce_list(self): arr = Index([1, 2, 3, 4], dtype=object) tm.assertIsInstance(arr, Index) - def test_dtype(self): - self.assertEqual(self.index.dtype, np.int64) - - def test_is_monotonic(self): - self.assertTrue(self.index.is_monotonic) - self.assertTrue(self.index.is_monotonic_increasing) - self.assertFalse(self.index.is_monotonic_decreasing) - - index = Int64Index([4, 3, 2, 1]) - self.assertFalse(index.is_monotonic) - self.assertTrue(index.is_monotonic_decreasing) - - index = Int64Index([1]) - self.assertTrue(index.is_monotonic) - self.assertTrue(index.is_monotonic_increasing) - self.assertTrue(index.is_monotonic_decreasing) - - def test_is_monotonic_na(self): - examples = [Index([np.nan]), - Index([np.nan, 1]), - Index([1, 2, np.nan]), - Index(['a', 'b', np.nan]), - pd.to_datetime(['NaT']), - pd.to_datetime(['NaT', '2000-01-01']), - pd.to_datetime(['2000-01-01', 'NaT', '2000-01-02']), - pd.to_timedelta(['1 day', 'NaT']), ] - for index in examples: - self.assertFalse(index.is_monotonic_increasing) - self.assertFalse(index.is_monotonic_decreasing) - - def test_equals(self): - same_values = Index(self.index, dtype=object) - self.assertTrue(self.index.equals(same_values)) - self.assertTrue(same_values.equals(self.index)) - - def test_logical_compat(self): - idx = self.create_index() - self.assertEqual(idx.all(), idx.values.all()) - self.assertEqual(idx.any(), idx.values.any()) - - def test_identical(self): - i = Index(self.index.copy()) - self.assertTrue(i.identical(self.index)) - - same_values_different_type = Index(i, dtype=object) - self.assertFalse(i.identical(same_values_different_type)) - - i = self.index.copy(dtype=object) - i = i.rename('foo') - same_values = Index(i, dtype=object) - self.assertTrue(same_values.identical(i)) - - self.assertFalse(i.identical(self.index)) - self.assertTrue(Index(same_values, name='foo', dtype=object).identical( - i)) - - self.assertFalse(self.index.copy(dtype=object) - .identical(self.index.copy(dtype='int64'))) - def test_get_indexer(self): target = Int64Index(np.arange(10)) indexer = self.index.get_indexer(target) expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) - def test_get_indexer_pad(self): target = Int64Index(np.arange(10)) indexer = self.index.get_indexer(target, method='pad') expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) - def test_get_indexer_backfill(self): target = Int64Index(np.arange(10)) indexer = self.index.get_indexer(target, method='backfill') expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) - def test_join_outer(self): - other = Int64Index([7, 12, 25, 1, 2, 5]) - other_mono = Int64Index([1, 2, 5, 7, 12, 25]) - - # not monotonic - # guarantee of sortedness - res, lidx, ridx = self.index.join(other, how='outer', - return_indexers=True) - noidx_res = self.index.join(other, how='outer') - self.assert_index_equal(res, noidx_res) - - eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) - elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], - dtype=np.intp) - eridx = np.array([-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], - dtype=np.intp) - - tm.assertIsInstance(res, Int64Index) - self.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - # monotonic - res, lidx, ridx = self.index.join(other_mono, how='outer', - return_indexers=True) - noidx_res = self.index.join(other_mono, how='outer') - self.assert_index_equal(res, noidx_res) + def test_intersection(self): + other = Index([1, 2, 3, 4, 5]) + result = self.index.intersection(other) + expected = Index(np.sort(np.intersect1d(self.index.values, + other.values))) + tm.assert_index_equal(result, expected) - elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], - dtype=np.intp) - eridx = np.array([-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], - dtype=np.intp) - tm.assertIsInstance(res, Int64Index) - self.assert_index_equal(res, eres) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) + result = other.intersection(self.index) + expected = Index(np.sort(np.asarray(np.intersect1d(self.index.values, + other.values)))) + tm.assert_index_equal(result, expected) def test_join_inner(self): other = Int64Index([7, 12, 25, 1, 2, 5]) @@ -789,28 +854,92 @@ def test_join_non_int_index(self): right2 = other.join(self.index, how='right') self.assert_index_equal(right2, self.index.astype(object)) - def test_join_non_unique(self): - left = Index([4, 4, 3, 3]) + def test_join_outer(self): + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) - joined, lidx, ridx = left.join(left, return_indexers=True) + # not monotonic + # guarantee of sortedness + res, lidx, ridx = self.index.join(other, how='outer', + return_indexers=True) + noidx_res = self.index.join(other, how='outer') + self.assert_index_equal(res, noidx_res) - exp_joined = Index([3, 3, 3, 3, 4, 4, 4, 4]) - self.assert_index_equal(joined, exp_joined) + eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], + dtype=np.intp) + eridx = np.array([-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], + dtype=np.intp) - exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.intp) - tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assertIsInstance(res, Int64Index) + self.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) - exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.intp) - tm.assert_numpy_array_equal(ridx, exp_ridx) + # monotonic + res, lidx, ridx = self.index.join(other_mono, how='outer', + return_indexers=True) + noidx_res = self.index.join(other_mono, how='outer') + self.assert_index_equal(res, noidx_res) - def test_join_self(self): - kinds = 'outer', 'inner', 'left', 'right' - for kind in kinds: - joined = self.index.join(self.index, how=kind) - self.assertIs(self.index, joined) + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], + dtype=np.intp) + eridx = np.array([-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], + dtype=np.intp) + tm.assertIsInstance(res, Int64Index) + self.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + +class TestUInt64Index(NumericInt, tm.TestCase): + + _dtype = 'uint64' + _holder = UInt64Index + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=UInt64Index([2**63, 2**63 + 10, 2**63 + 15, + 2**63 + 20, 2**63 + 25])) + self.setup_indices() + + def create_index(self): + return UInt64Index(np.arange(5, dtype='uint64')) + + def test_constructor(self): + idx = UInt64Index([1, 2, 3]) + res = Index([1, 2, 3], dtype=np.uint64) + tm.assert_index_equal(res, idx) + + idx = UInt64Index([1, 2**63]) + res = Index([1, 2**63], dtype=np.uint64) + tm.assert_index_equal(res, idx) + + idx = UInt64Index([1, 2**63]) + res = Index([1, 2**63]) + tm.assert_index_equal(res, idx) + + def test_get_indexer(self): + target = UInt64Index(np.arange(10).astype('uint64') * 5 + 2**63) + indexer = self.index.get_indexer(target) + expected = np.array([0, -1, 1, 2, 3, 4, + -1, -1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = UInt64Index(np.arange(10).astype('uint64') * 5 + 2**63) + indexer = self.index.get_indexer(target, method='pad') + expected = np.array([0, 0, 1, 2, 3, 4, + 4, 4, 4, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = UInt64Index(np.arange(10).astype('uint64') * 5 + 2**63) + indexer = self.index.get_indexer(target, method='backfill') + expected = np.array([0, 1, 1, 2, 3, 4, + -1, -1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) def test_intersection(self): - other = Index([1, 2, 3, 4, 5]) + other = Index([2**63, 2**63 + 5, 2**63 + 10, 2**63 + 15, 2**63 + 20]) result = self.index.intersection(other) expected = Index(np.sort(np.intersect1d(self.index.values, other.values))) @@ -821,147 +950,198 @@ def test_intersection(self): other.values)))) tm.assert_index_equal(result, expected) - def test_intersect_str_dates(self): - dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + def test_join_inner(self): + other = UInt64Index(2**63 + np.array( + [7, 12, 25, 1, 2, 10], dtype='uint64')) + other_mono = UInt64Index(2**63 + np.array( + [1, 2, 7, 10, 12, 25], dtype='uint64')) - i1 = Index(dt_dates, dtype=object) - i2 = Index(['aa'], dtype=object) - res = i2.intersection(i1) + # not monotonic + res, lidx, ridx = self.index.join(other, how='inner', + return_indexers=True) - self.assertEqual(len(res), 0) + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) - def test_union_noncomparable(self): - from datetime import datetime, timedelta - # corner case, non-Int64Index - now = datetime.now() - other = Index([now + timedelta(i) for i in range(4)], dtype=object) - result = self.index.union(other) - expected = Index(np.concatenate((self.index, other))) - tm.assert_index_equal(result, expected) + eres = UInt64Index(2**63 + np.array([10, 25], dtype='uint64')) + elidx = np.array([1, 4], dtype=np.intp) + eridx = np.array([5, 2], dtype=np.intp) - result = other.union(self.index) - expected = Index(np.concatenate((other, self.index))) - tm.assert_index_equal(result, expected) + tm.assertIsInstance(res, UInt64Index) + self.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) - def test_cant_or_shouldnt_cast(self): - # can't - data = ['foo', 'bar', 'baz'] - self.assertRaises(TypeError, Int64Index, data) + # monotonic + res, lidx, ridx = self.index.join(other_mono, how='inner', + return_indexers=True) - # shouldn't - data = ['0', '1', '2'] - self.assertRaises(TypeError, Int64Index, data) + res2 = self.index.intersection(other_mono) + self.assert_index_equal(res, res2) - def test_view_Index(self): - self.index.view(Index) + elidx = np.array([1, 4], dtype=np.intp) + eridx = np.array([3, 5], dtype=np.intp) - def test_prevent_casting(self): - result = self.index.astype('O') - self.assertEqual(result.dtype, np.object_) + tm.assertIsInstance(res, UInt64Index) + self.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) - def test_take_preserve_name(self): - index = Int64Index([1, 2, 3, 4], name='foo') - taken = index.take([3, 0, 1]) - self.assertEqual(index.name, taken.name) + def test_join_left(self): + other = UInt64Index(2**63 + np.array( + [7, 12, 25, 1, 2, 10], dtype='uint64')) + other_mono = UInt64Index(2**63 + np.array( + [1, 2, 7, 10, 12, 25], dtype='uint64')) - def test_take_fill_value(self): - # GH 12631 - idx = pd.Int64Index([1, 2, 3], name='xxx') - result = idx.take(np.array([1, 0, -1])) - expected = pd.Int64Index([2, 1, 3], name='xxx') - tm.assert_index_equal(result, expected) + # not monotonic + res, lidx, ridx = self.index.join(other, how='left', + return_indexers=True) + eres = self.index + eridx = np.array([-1, 5, -1, -1, 2], dtype=np.intp) - # fill_value - msg = "Unable to fill values because Int64Index cannot contain NA" - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -1]), fill_value=True) + tm.assertIsInstance(res, UInt64Index) + self.assert_index_equal(res, eres) + self.assertIsNone(lidx) + tm.assert_numpy_array_equal(ridx, eridx) - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, - fill_value=True) - expected = pd.Int64Index([2, 1, 3], name='xxx') - tm.assert_index_equal(result, expected) + # monotonic + res, lidx, ridx = self.index.join(other_mono, how='left', + return_indexers=True) + eridx = np.array([-1, 3, -1, -1, 5], dtype=np.intp) - msg = "Unable to fill values because Int64Index cannot contain NA" - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with tm.assertRaisesRegexp(ValueError, msg): - idx.take(np.array([1, 0, -5]), fill_value=True) + tm.assertIsInstance(res, UInt64Index) + self.assert_index_equal(res, eres) + self.assertIsNone(lidx) + tm.assert_numpy_array_equal(ridx, eridx) - with tm.assertRaises(IndexError): - idx.take(np.array([1, -5])) + # non-unique + idx = UInt64Index(2**63 + np.array([1, 1, 2, 5], dtype='uint64')) + idx2 = UInt64Index(2**63 + np.array([1, 2, 5, 7, 9], dtype='uint64')) + res, lidx, ridx = idx2.join(idx, how='left', return_indexers=True) - def test_int_name_format(self): - index = Index(['a', 'b', 'c'], name=0) - s = Series(lrange(3), index) - df = DataFrame(lrange(3), index=index) - repr(s) - repr(df) - - def test_print_unicode_columns(self): - df = pd.DataFrame({u("\u05d0"): [1, 2, 3], - "\u05d1": [4, 5, 6], - "c": [7, 8, 9]}) - repr(df.columns) # should not raise UnicodeDecodeError - - def test_repr_summary(self): - with cf.option_context('display.max_seq_items', 10): - r = repr(pd.Index(np.arange(1000))) - self.assertTrue(len(r) < 200) - self.assertTrue("..." in r) + # 1 is in idx2, so it should be x2 + eres = UInt64Index(2**63 + np.array( + [1, 1, 2, 5, 7, 9], dtype='uint64')) + eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) - def test_repr_roundtrip(self): - tm.assert_index_equal(eval(repr(self.index)), self.index) + self.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) - def test_unicode_string_with_unicode(self): - idx = Index(lrange(1000)) + def test_join_right(self): + other = UInt64Index(2**63 + np.array( + [7, 12, 25, 1, 2, 10], dtype='uint64')) + other_mono = UInt64Index(2**63 + np.array( + [1, 2, 7, 10, 12, 25], dtype='uint64')) - if PY3: - str(idx) - else: - compat.text_type(idx) + # not monotonic + res, lidx, ridx = self.index.join(other, how='right', + return_indexers=True) + eres = other + elidx = np.array([-1, -1, 4, -1, -1, 1], dtype=np.intp) - def test_bytestring_with_unicode(self): - idx = Index(lrange(1000)) - if PY3: - bytes(idx) - else: - str(idx) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assertIsInstance(other, UInt64Index) + self.assert_index_equal(res, eres) + self.assertIsNone(ridx) - def test_slice_keep_name(self): - idx = Int64Index([1, 2], name='asdf') - self.assertEqual(idx.name, idx[1:].name) + # monotonic + res, lidx, ridx = self.index.join(other_mono, how='right', + return_indexers=True) + eres = other_mono + elidx = np.array([-1, -1, -1, 1, -1, 4], dtype=np.intp) - def test_ufunc_coercions(self): - idx = Int64Index([1, 2, 3, 4, 5], name='x') + tm.assertIsInstance(other, UInt64Index) + tm.assert_numpy_array_equal(lidx, elidx) + self.assert_index_equal(res, eres) + self.assertIsNone(ridx) - result = np.sqrt(idx) - tm.assertIsInstance(result, Float64Index) - exp = Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name='x') - tm.assert_index_equal(result, exp) + # non-unique + idx = UInt64Index(2**63 + np.array([1, 1, 2, 5], dtype='uint64')) + idx2 = UInt64Index(2**63 + np.array([1, 2, 5, 7, 9], dtype='uint64')) + res, lidx, ridx = idx.join(idx2, how='right', return_indexers=True) - result = np.divide(idx, 2.) - tm.assertIsInstance(result, Float64Index) - exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') - tm.assert_index_equal(result, exp) + # 1 is in idx2, so it should be x2 + eres = UInt64Index(2**63 + np.array( + [1, 1, 2, 5, 7, 9], dtype='uint64')) + elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) - # _evaluate_numeric_binop - result = idx + 2. - tm.assertIsInstance(result, Float64Index) - exp = Float64Index([3., 4., 5., 6., 7.], name='x') - tm.assert_index_equal(result, exp) + self.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) - result = idx - 2. - tm.assertIsInstance(result, Float64Index) - exp = Float64Index([-1., 0., 1., 2., 3.], name='x') - tm.assert_index_equal(result, exp) + def test_join_non_int_index(self): + other = Index(2**63 + np.array( + [1, 5, 7, 10, 20], dtype='uint64'), dtype=object) - result = idx * 1. - tm.assertIsInstance(result, Float64Index) - exp = Float64Index([1., 2., 3., 4., 5.], name='x') - tm.assert_index_equal(result, exp) + outer = self.index.join(other, how='outer') + outer2 = other.join(self.index, how='outer') + expected = Index(2**63 + np.array( + [0, 1, 5, 7, 10, 15, 20, 25], dtype='uint64')) + self.assert_index_equal(outer, outer2) + self.assert_index_equal(outer, expected) - result = idx / 2. - tm.assertIsInstance(result, Float64Index) - exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') - tm.assert_index_equal(result, exp) + inner = self.index.join(other, how='inner') + inner2 = other.join(self.index, how='inner') + expected = Index(2**63 + np.array([10, 20], dtype='uint64')) + self.assert_index_equal(inner, inner2) + self.assert_index_equal(inner, expected) + + left = self.index.join(other, how='left') + self.assert_index_equal(left, self.index.astype(object)) + + left2 = other.join(self.index, how='left') + self.assert_index_equal(left2, other) + + right = self.index.join(other, how='right') + self.assert_index_equal(right, other) + + right2 = other.join(self.index, how='right') + self.assert_index_equal(right2, self.index.astype(object)) + + def test_join_outer(self): + other = UInt64Index(2**63 + np.array( + [7, 12, 25, 1, 2, 10], dtype='uint64')) + other_mono = UInt64Index(2**63 + np.array( + [1, 2, 7, 10, 12, 25], dtype='uint64')) + + # not monotonic + # guarantee of sortedness + res, lidx, ridx = self.index.join(other, how='outer', + return_indexers=True) + noidx_res = self.index.join(other, how='outer') + self.assert_index_equal(res, noidx_res) + + eres = UInt64Index(2**63 + np.array( + [0, 1, 2, 7, 10, 12, 15, 20, 25], dtype='uint64')) + elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) + eridx = np.array([-1, 3, 4, 0, 5, 1, -1, -1, 2], dtype=np.intp) + + tm.assertIsInstance(res, UInt64Index) + self.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = self.index.join(other_mono, how='outer', + return_indexers=True) + noidx_res = self.index.join(other_mono, how='outer') + self.assert_index_equal(res, noidx_res) + + elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) + eridx = np.array([-1, 0, 1, 2, 3, 4, -1, -1, 5], dtype=np.intp) + + tm.assertIsInstance(res, UInt64Index) + self.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 6fc24e41ee914..a50027f1d0343 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -20,7 +20,7 @@ from pandas import option_context from pandas.core.indexing import _non_reducing_slice, _maybe_numeric_slice from pandas.core.api import (DataFrame, Index, Series, Panel, isnull, - MultiIndex, Timestamp, Timedelta) + MultiIndex, Timestamp, Timedelta, UInt64Index) from pandas.formats.printing import pprint_thing from pandas import concat from pandas.core.common import PerformanceWarning, UnsortedIndexError @@ -100,7 +100,8 @@ class TestIndexing(tm.TestCase): _multiprocess_can_split_ = True _objs = set(['series', 'frame', 'panel']) - _typs = set(['ints', 'labels', 'mixed', 'ts', 'floats', 'empty', 'ts_rev']) + _typs = set(['ints', 'uints', 'labels', 'mixed', + 'ts', 'floats', 'empty', 'ts_rev']) def setUp(self): @@ -116,6 +117,16 @@ def setUp(self): major_axis=lrange(0, 12, 3), minor_axis=lrange(0, 16, 4)) + self.series_uints = Series(np.random.rand(4), + index=UInt64Index(lrange(0, 8, 2))) + self.frame_uints = DataFrame(np.random.randn(4, 4), + index=UInt64Index(lrange(0, 8, 2)), + columns=UInt64Index(lrange(0, 12, 3))) + self.panel_uints = Panel(np.random.rand(4, 4, 4), + items=UInt64Index(lrange(0, 8, 2)), + major_axis=UInt64Index(lrange(0, 12, 3)), + minor_axis=UInt64Index(lrange(0, 16, 4))) + self.series_labels = Series(np.random.randn(4), index=list('abcd')) self.frame_labels = DataFrame(np.random.randn(4, 4), index=list('abcd'), columns=list('ABCD')) @@ -197,10 +208,6 @@ def _print(result, error=None): pprint_thing(v) try: - # if (name == 'bool' and t == 'empty' and o == 'series' and - # method1 == 'loc'): - # import pdb; pdb.set_trace() - rs = getattr(obj, method1).__getitem__(_axify(obj, k1, a)) try: @@ -210,6 +217,8 @@ def _print(result, error=None): _print(result) return + detail = None + try: if is_scalar(rs) and is_scalar(xp): self.assertEqual(rs, xp) @@ -220,7 +229,8 @@ def _print(result, error=None): elif xp.ndim == 3: tm.assert_panel_equal(rs, xp) result = 'ok' - except (AssertionError): + except AssertionError as e: + detail = str(e) result = 'fail' # reverse the checks @@ -228,10 +238,9 @@ def _print(result, error=None): if result == 'fail': result = 'ok (fail)' - if not result.startswith('ok'): - raise AssertionError(_print(result)) - _print(result) + if not result.startswith('ok'): + raise AssertionError(detail) except AssertionError: raise @@ -309,16 +318,17 @@ def _check(f, func, values=False): d = getattr(self, o) # iat - _check(d['ints'], 'iat', values=True) + for f in [d['ints'], d['uints']]: + _check(f, 'iat', values=True) + for f in [d['labels'], d['ts'], d['floats']]: if f is not None: self.assertRaises(ValueError, self.check_values, f, 'iat') # at - _check(d['ints'], 'at') - _check(d['labels'], 'at') - _check(d['ts'], 'at') - _check(d['floats'], 'at') + for f in [d['ints'], d['uints'], d['labels'], + d['ts'], d['floats']]: + _check(f, 'at') def test_at_and_iat_set(self): def _check(f, func, values=False): @@ -334,16 +344,18 @@ def _check(f, func, values=False): d = getattr(self, t) - _check(d['ints'], 'iat', values=True) + # iat + for f in [d['ints'], d['uints']]: + _check(f, 'iat', values=True) + for f in [d['labels'], d['ts'], d['floats']]: if f is not None: self.assertRaises(ValueError, _check, f, 'iat') # at - _check(d['ints'], 'at') - _check(d['labels'], 'at') - _check(d['ts'], 'at') - _check(d['floats'], 'at') + for f in [d['ints'], d['uints'], d['labels'], + d['ts'], d['floats']]: + _check(f, 'at') def test_at_iat_coercion(self): @@ -508,7 +520,7 @@ def test_iloc_getitem_int(self): # integer self.check_result('integer', 'iloc', 2, 'ix', - {0: 4, 1: 6, 2: 8}, typs=['ints']) + {0: 4, 1: 6, 2: 8}, typs=['ints', 'uints']) self.check_result('integer', 'iloc', 2, 'indexer', 2, typs=['labels', 'mixed', 'ts', 'floats', 'empty'], fails=IndexError) @@ -517,7 +529,7 @@ def test_iloc_getitem_neg_int(self): # neg integer self.check_result('neg int', 'iloc', -1, 'ix', - {0: 6, 1: 9, 2: 12}, typs=['ints']) + {0: 6, 1: 9, 2: 12}, typs=['ints', 'uints']) self.check_result('neg int', 'iloc', -1, 'indexer', -1, typs=['labels', 'mixed', 'ts', 'floats', 'empty'], fails=IndexError) @@ -527,9 +539,9 @@ def test_iloc_getitem_list_int(self): # list of ints self.check_result('list int', 'iloc', [0, 1, 2], 'ix', {0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]}, - typs=['ints']) + typs=['ints', 'uints']) self.check_result('list int', 'iloc', [2], 'ix', - {0: [4], 1: [6], 2: [8]}, typs=['ints']) + {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints']) self.check_result('list int', 'iloc', [0, 1, 2], 'indexer', [0, 1, 2], typs=['labels', 'mixed', 'ts', 'floats', 'empty'], fails=IndexError) @@ -539,9 +551,9 @@ def test_iloc_getitem_list_int(self): self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'ix', {0: [0, 2, 4], 1: [0, 3, 6], - 2: [0, 4, 8]}, typs=['ints']) + 2: [0, 4, 8]}, typs=['ints', 'uints']) self.check_result('array int', 'iloc', np.array([2]), 'ix', - {0: [4], 1: [6], 2: [8]}, typs=['ints']) + {0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints']) self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'indexer', [0, 1, 2], typs=['labels', 'mixed', 'ts', 'floats', 'empty'], @@ -579,7 +591,7 @@ def test_iloc_getitem_dups(self): # no dups in panel (bug?) self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix', {0: [0, 2, 2, 6], 1: [0, 3, 3, 9]}, - objs=['series', 'frame'], typs=['ints']) + objs=['series', 'frame'], typs=['ints', 'uints']) # GH 6766 df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) @@ -601,13 +613,13 @@ def test_iloc_getitem_array(self): s = Series(index=lrange(1, 4)) self.check_result('array like', 'iloc', s.index, 'ix', {0: [2, 4, 6], 1: [3, 6, 9], 2: [4, 8, 12]}, - typs=['ints']) + typs=['ints', 'uints']) def test_iloc_getitem_bool(self): # boolean indexers b = [True, False, True, False, ] - self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints']) + self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints', 'uints']) self.check_result('bool', 'iloc', b, 'ix', b, typs=['labels', 'mixed', 'ts', 'floats', 'empty'], fails=IndexError) @@ -617,7 +629,7 @@ def test_iloc_getitem_slice(self): # slices self.check_result('slice', 'iloc', slice(1, 3), 'ix', {0: [2, 4], 1: [3, 6], 2: [4, 8]}, - typs=['ints']) + typs=['ints', 'uints']) self.check_result('slice', 'iloc', slice(1, 3), 'indexer', slice(1, 3), typs=['labels', 'mixed', 'ts', 'floats', 'empty'], @@ -1124,14 +1136,14 @@ def check(result, expected): def test_loc_getitem_int(self): # int label - self.check_result('int label', 'loc', 2, 'ix', 2, typs=['ints'], - axes=0) - self.check_result('int label', 'loc', 3, 'ix', 3, typs=['ints'], - axes=1) - self.check_result('int label', 'loc', 4, 'ix', 4, typs=['ints'], - axes=2) - self.check_result('int label', 'loc', 2, 'ix', 2, typs=['label'], - fails=KeyError) + self.check_result('int label', 'loc', 2, 'ix', 2, + typs=['ints', 'uints'], axes=0) + self.check_result('int label', 'loc', 3, 'ix', 3, + typs=['ints', 'uints'], axes=1) + self.check_result('int label', 'loc', 4, 'ix', 4, + typs=['ints', 'uints'], axes=2) + self.check_result('int label', 'loc', 2, 'ix', 2, + typs=['label'], fails=KeyError) def test_loc_getitem_label(self): @@ -1150,12 +1162,12 @@ def test_loc_getitem_label_out_of_range(self): # out of range label self.check_result('label range', 'loc', 'f', 'ix', 'f', - typs=['ints', 'labels', 'mixed', 'ts'], + typs=['ints', 'uints', 'labels', 'mixed', 'ts'], fails=KeyError) self.check_result('label range', 'loc', 'f', 'ix', 'f', typs=['floats'], fails=TypeError) self.check_result('label range', 'loc', 20, 'ix', 20, - typs=['ints', 'mixed'], fails=KeyError) + typs=['ints', 'uints', 'mixed'], fails=KeyError) self.check_result('label range', 'loc', 20, 'ix', 20, typs=['labels'], fails=TypeError) self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'], @@ -1167,11 +1179,11 @@ def test_loc_getitem_label_list(self): # list of labels self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4], - typs=['ints'], axes=0) + typs=['ints', 'uints'], axes=0) self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9], - typs=['ints'], axes=1) + typs=['ints', 'uints'], axes=1) self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12], - typs=['ints'], axes=2) + typs=['ints', 'uints'], axes=2) self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix', ['a', 'b', 'd'], typs=['labels'], axes=0) self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix', @@ -1188,27 +1200,27 @@ def test_loc_getitem_label_list(self): self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2], typs=['empty'], fails=KeyError) self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3], - typs=['ints'], axes=0, fails=KeyError) + typs=['ints', 'uints'], axes=0, fails=KeyError) self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7], - typs=['ints'], axes=1, fails=KeyError) + typs=['ints', 'uints'], axes=1, fails=KeyError) self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10], - typs=['ints'], axes=2, fails=KeyError) + typs=['ints', 'uints'], axes=2, fails=KeyError) def test_loc_getitem_label_list_fails(self): # fails self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], - typs=['ints'], axes=1, fails=KeyError) + typs=['ints', 'uints'], axes=1, fails=KeyError) self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], - typs=['ints'], axes=2, fails=KeyError) + typs=['ints', 'uints'], axes=2, fails=KeyError) def test_loc_getitem_label_array_like(self): # array like self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index, - 'ix', [0, 2, 4], typs=['ints'], axes=0) + 'ix', [0, 2, 4], typs=['ints', 'uints'], axes=0) self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index, - 'ix', [3, 6, 9], typs=['ints'], axes=1) + 'ix', [3, 6, 9], typs=['ints', 'uints'], axes=1) self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index, - 'ix', [4, 8, 12], typs=['ints'], axes=2) + 'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2) def test_loc_getitem_series(self): # GH14730 @@ -1236,7 +1248,8 @@ def test_loc_getitem_bool(self): # boolean indexers b = [True, False, True, False] self.check_result('bool', 'loc', b, 'ix', b, - typs=['ints', 'labels', 'mixed', 'ts', 'floats']) + typs=['ints', 'uints', 'labels', + 'mixed', 'ts', 'floats']) self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'], fails=KeyError) @@ -1244,11 +1257,11 @@ def test_loc_getitem_int_slice(self): # ok self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4], - typs=['ints'], axes=0) + typs=['ints', 'uints'], axes=0) self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6], - typs=['ints'], axes=1) + typs=['ints', 'uints'], axes=1) self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8], - typs=['ints'], axes=2) + typs=['ints', 'uints'], axes=2) # GH 3053 # loc should treat integer slices like label slices diff --git a/pandas/tests/types/test_generic.py b/pandas/tests/types/test_generic.py index 89913de6f6069..28600687e8062 100644 --- a/pandas/tests/types/test_generic.py +++ b/pandas/tests/types/test_generic.py @@ -24,6 +24,7 @@ class TestABCClasses(tm.TestCase): def test_abc_types(self): self.assertIsInstance(pd.Index(['a', 'b', 'c']), gt.ABCIndex) self.assertIsInstance(pd.Int64Index([1, 2, 3]), gt.ABCInt64Index) + self.assertIsInstance(pd.UInt64Index([1, 2, 3]), gt.ABCUInt64Index) self.assertIsInstance(pd.Float64Index([1, 2, 3]), gt.ABCFloat64Index) self.assertIsInstance(self.multi_index, gt.ABCMultiIndex) self.assertIsInstance(self.datetime_index, gt.ABCDatetimeIndex) diff --git a/pandas/types/generic.py b/pandas/types/generic.py index 0d576eed43d45..86d266f4595e2 100644 --- a/pandas/types/generic.py +++ b/pandas/types/generic.py @@ -16,6 +16,8 @@ def _check(cls, inst): ABCIndex = create_pandas_abc_type("ABCIndex", "_typ", ("index", )) ABCInt64Index = create_pandas_abc_type("ABCInt64Index", "_typ", ("int64index", )) +ABCUInt64Index = create_pandas_abc_type("ABCUInt64Index", "_typ", + ("uint64index", )) ABCRangeIndex = create_pandas_abc_type("ABCRangeIndex", "_typ", ("rangeindex", )) ABCFloat64Index = create_pandas_abc_type("ABCFloat64Index", "_typ", @@ -32,7 +34,7 @@ def _check(cls, inst): ("categoricalindex", )) ABCIndexClass = create_pandas_abc_type("ABCIndexClass", "_typ", ("index", "int64index", "rangeindex", - "float64index", + "float64index", "uint64index", "multiindex", "datetimeindex", "timedeltaindex", "periodindex", "categoricalindex")) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index d96f57f2810e3..d39ce7acf0029 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1573,6 +1573,10 @@ def makeIntIndex(k=10, name=None): return Index(lrange(k), name=name) +def makeUIntIndex(k=10, name=None): + return Index([2**63 + i for i in lrange(k)], name=name) + + def makeRangeIndex(k=10, name=None): return RangeIndex(0, k, 1, name=name) diff --git a/setup.py b/setup.py index 0a84cf527bfb1..a53464f8f7987 100755 --- a/setup.py +++ b/setup.py @@ -490,7 +490,8 @@ def pxd(name): index={'pyxfile': 'index', 'sources': ['pandas/src/datetime/np_datetime.c', 'pandas/src/datetime/np_datetime_strings.c'], - 'pxdfiles': ['src/util']}, + 'pxdfiles': ['src/util'], + 'depends': _pxi_dep['index']}, algos={'pyxfile': 'algos', 'pxdfiles': ['src/util'], 'depends': _pxi_dep['algos']},