Skip to content

Commit d9b9a02

Browse files
ARFjreback
ARF
authored andcommitted
Introduction of RangeIndex
`RangeIndex(1, 10, 2)` is a memory saving alternative to `Index(np.arange(1, 10,2))`: c.f. #939. This re-implementation is compatible with the current `Index()` api and is a drop-in replacement for `Int64Index()`. It automatically converts to Int64Index() when required by operations. At present only for a minimum number of operations the type is conserved (e.g. slicing, inner-, left- and right-joins). Most other operations trigger creation of an equivalent Int64Index (or at least an equivalent numpy array) and fall back to its implementation. This PR also extends the functionality of the `Index()` constructor to allow creation of `RangeIndexes()` with ``` Index(20) Index(2, 20) Index(0, 20, 2) ``` in analogy to ``` range(20) range(2, 20) range(0, 20, 2) ``` restore Index() fastpath precedence Various fixes suggested by @jreback and @shoyer Cache a private Int64Index object the first time it or its values are required. Restore Index(5) as error. Restore its test. Allow Index(0, 5) and Index(0, 5, 1). Make RangeIndex immutable. See start, stop, step properties. In test_constructor(): check class, attributes (possibly including dtype). In test_copy(): check that copy is not identical (but equal) to the existing. In test_duplicates(): Assert is_unique and has_duplicates return correct values. fix slicing fix view Set RangeIndex as default index * enh: set RangeIndex as default index * fix: pandas.io.packers: encode() and decode() for RangeIndex * enh: array argument pass-through * fix: reindex * fix: use _default_index() in pandas.core.frame.extract_index() * fix: pandas.core.index.Index._is() * fix: add RangeIndex to ABCIndexClass * fix: use _default_index() in _get_names_from_index() * fix: pytables tests * fix: MultiIndex.get_level_values() * fix: RangeIndex._shallow_copy() * fix: null-size RangeIndex equals() comparison * enh: make RangeIndex.is_unique immutable enh: various performance optimizations * optimize argsort() * optimize tolist() * comment clean-up
1 parent 449ab6b commit d9b9a02

File tree

7 files changed

+1070
-26
lines changed

7 files changed

+1070
-26
lines changed

pandas/core/api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pandas.core.categorical import Categorical
99
from pandas.core.groupby import Grouper
1010
from pandas.core.format import set_eng_float_format
11-
from pandas.core.index import Index, CategoricalIndex, Int64Index, Float64Index, MultiIndex
11+
from pandas.core.index import Index, CategoricalIndex, Int64Index, RangeIndex, Float64Index, MultiIndex
1212

1313
from pandas.core.series import Series, TimeSeries
1414
from pandas.core.frame import DataFrame

pandas/core/common.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def _check(cls, inst):
8585
ABCCategoricalIndex = create_pandas_abc_type("ABCCategoricalIndex", "_typ", ("categoricalindex",))
8686
ABCIndexClass = create_pandas_abc_type("ABCIndexClass", "_typ", ("index",
8787
"int64index",
88+
"rangeindex",
8889
"float64index",
8990
"multiindex",
9091
"datetimeindex",
@@ -1756,10 +1757,8 @@ def is_bool_indexer(key):
17561757

17571758

17581759
def _default_index(n):
1759-
from pandas.core.index import Int64Index
1760-
values = np.arange(n, dtype=np.int64)
1761-
result = Int64Index(values,name=None)
1762-
result.is_unique = True
1760+
from pandas.core.index import RangeIndex
1761+
result = RangeIndex(0, int(n), name=None)
17631762
return result
17641763

17651764

@@ -2157,6 +2156,11 @@ def is_int64_dtype(arr_or_dtype):
21572156
tipo = _get_dtype_type(arr_or_dtype)
21582157
return issubclass(tipo, np.int64)
21592158

2159+
def is_int64_dtype(arr_or_dtype):
2160+
tipo = _get_dtype_type(arr_or_dtype)
2161+
return issubclass(tipo, np.int64)
2162+
2163+
21602164
def is_int_or_datetime_dtype(arr_or_dtype):
21612165
tipo = _get_dtype_type(arr_or_dtype)
21622166
return (issubclass(tipo, np.integer) or

pandas/core/frame.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -5302,7 +5302,7 @@ def extract_index(data):
53025302
% (lengths[0], len(index)))
53035303
raise ValueError(msg)
53045304
else:
5305-
index = Index(np.arange(lengths[0]))
5305+
index = _default_index(lengths[0])
53065306

53075307
return _ensure_index(index)
53085308

@@ -5519,11 +5519,11 @@ def convert(arr):
55195519

55205520

55215521
def _get_names_from_index(data):
5522-
index = lrange(len(data))
55235522
has_some_name = any([getattr(s, 'name', None) is not None for s in data])
55245523
if not has_some_name:
5525-
return index
5524+
return _default_index(len(data))
55265525

5526+
index = lrange(len(data))
55275527
count = 0
55285528
for i, s in enumerate(data):
55295529
n = getattr(s, 'name', None)

0 commit comments

Comments
 (0)