Skip to content

Commit ef878f0

Browse files
committed
ENH: start building RangeIndex to avoid creating so many np.arange(N) indexes
1 parent 023b1d4 commit ef878f0

File tree

4 files changed

+293
-12
lines changed

4 files changed

+293
-12
lines changed

pandas/core/common.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -525,8 +525,8 @@ def _is_bool_indexer(key):
525525
return False
526526

527527
def _default_index(n):
528-
from pandas.core.index import Index
529-
return Index(np.arange(n))
528+
from pandas.core.index import RangeIndex
529+
return RangeIndex(n)
530530

531531
def ensure_float(arr):
532532
if issubclass(arr.dtype.type, np.integer):

pandas/core/frame.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
401401
conv_data, columns = _to_sdict(data, columns)
402402
if isinstance(conv_data, dict):
403403
if len(conv_data) == 0 and index is None:
404-
index = np.arange(len(data))
404+
index = _default_index(len(data))
405405
mgr = self._init_dict(conv_data, index, columns,
406406
dtype=dtype)
407407
else:
@@ -876,9 +876,9 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
876876
result_index = index
877877
elif isinstance(data, dict) and len(data) > 0:
878878
# utilize first element of sdict to get length
879-
result_index = np.arange(len(data.values()[0]))
879+
result_index = _default_index(len(data.values()[0]))
880880
else:
881-
result_index = np.arange(len(data))
881+
result_index = _default_index(len(data))
882882

883883
return cls(sdict, index=result_index, columns=columns)
884884

@@ -2505,7 +2505,7 @@ def _maybe_cast(values):
25052505
values = lib.maybe_convert_objects(values)
25062506
return values
25072507

2508-
new_index = np.arange(len(new_obj))
2508+
new_index = _default_index(len(new_obj))
25092509
if isinstance(self.index, MultiIndex):
25102510
if level is not None:
25112511
if not isinstance(level, (tuple, list)):
@@ -4910,7 +4910,7 @@ def extract_index(data):
49104910
% (lengths[0], len(index)))
49114911
raise ValueError(msg)
49124912
else:
4913-
index = Index(np.arange(lengths[0]))
4913+
index = _default_index(lengths[0])
49144914

49154915
return _ensure_index(index)
49164916

pandas/core/index.py

+135-3
Original file line numberDiff line numberDiff line change
@@ -1110,7 +1110,7 @@ def delete(self, loc):
11101110
new_index : Index
11111111
"""
11121112
arr = np.delete(np.asarray(self), loc)
1113-
return Index(arr)
1113+
return Index(arr, name=self.name)
11141114

11151115
def insert(self, loc, item):
11161116
"""
@@ -1151,7 +1151,11 @@ def drop(self, labels):
11511151
return self.delete(indexer)
11521152

11531153

1154-
class Int64Index(Index):
1154+
class IntIndex(object):
1155+
pass
1156+
1157+
1158+
class Int64Index(Index, IntIndex):
11551159

11561160
_groupby = _algos.groupby_int64
11571161
_arrmap = _algos.arrmap_int64
@@ -1219,7 +1223,10 @@ def equals(self, other):
12191223
# return False
12201224

12211225
try:
1222-
return np.array_equal(self, other)
1226+
if isinstance(other, Index):
1227+
other = other.values
1228+
1229+
return np.array_equal(self.values, other)
12231230
except TypeError:
12241231
# e.g. fails in numpy 1.6 with DatetimeIndex #1681
12251232
return False
@@ -1229,6 +1236,131 @@ def _wrap_joined_index(self, joined, other):
12291236
return Int64Index(joined, name=name)
12301237

12311238

1239+
class RangeIndex(Int64Index):
1240+
"""
1241+
Compactly represents regular integer range (instead of generating a full
1242+
array of integers)
1243+
"""
1244+
1245+
def __new__(cls, start=None, stop=None, step=None, name=None):
1246+
if stop is None and step is None:
1247+
#
1248+
stop = start
1249+
start = 0
1250+
1251+
if step is None:
1252+
step = 1
1253+
1254+
result = np.array([], dtype=np.int_).view(RangeIndex)
1255+
result.start = start
1256+
result.stop = stop
1257+
result.step = step
1258+
result.name = name
1259+
1260+
return result
1261+
1262+
def __array_finalize__(self, obj):
1263+
if not isinstance(obj, type(self)):
1264+
# Only relevant if array being created from an Index instance
1265+
return
1266+
1267+
self.start = getattr(obj, 'start', None)
1268+
self.stop = getattr(obj, 'stop', None)
1269+
self.step = getattr(obj, 'step', None)
1270+
self.name = getattr(obj, 'name', None)
1271+
1272+
@property
1273+
def values(self):
1274+
return np.arange(self.start, self.stop, self.step)
1275+
1276+
def __len__(self):
1277+
return (self.stop - self.start - 1) // self.step + 1
1278+
1279+
def __repr__(self):
1280+
if self.step != 1:
1281+
return 'RangeIndex(%d, %d, step=%d)' % (self.start, self.stop,
1282+
self.step)
1283+
else:
1284+
return 'RangeIndex(%d, %d)' % (self.start, self.stop)
1285+
1286+
def __getslice__(self, i, j):
1287+
return self.__getitem__(slice(i, j))
1288+
1289+
def __getitem__(self, key):
1290+
if com.is_integer(key):
1291+
if key >= 0:
1292+
return self.start + self.step * key
1293+
else:
1294+
return self.stop + self.step * key
1295+
elif isinstance(key, slice):
1296+
kstart = key.start or 0
1297+
kstop = self.stop if key.stop is None else key.stop
1298+
kstep = key.step or 1
1299+
1300+
if kstop >= 0:
1301+
new_stop = min(self.start + kstop * self.step, self.stop)
1302+
else:
1303+
new_stop = self.stop + kstop * self.step
1304+
1305+
if kstart is None:
1306+
new_start = self.start
1307+
else:
1308+
if kstart >= 0:
1309+
new_start = self.start + kstart * self.step
1310+
else:
1311+
new_start = self.step + kstart * self.step
1312+
1313+
new_step = self.step * kstep
1314+
if kstep < 0:
1315+
return RangeIndex(new_stop - self.step, new_start - self.step,
1316+
new_step, name=self.name)
1317+
else:
1318+
return RangeIndex(new_start, new_stop, new_step, name=self.name)
1319+
1320+
elif isinstance(key, (list, np.ndarray)):
1321+
key = np.asarray(key)
1322+
1323+
if key.dtype == np.bool_:
1324+
return Int64Index(self.values[key], name=self.name)
1325+
1326+
return np.where(key >= 0, self.start + self.step * key,
1327+
self.stop + self.step * key)
1328+
else: # pragma: no cover
1329+
raise TypeError('Unhandled getitem type: %s' % type(key))
1330+
1331+
def insert(self, loc, item):
1332+
return Int64Index(self.values, name=self.name).insert(loc, item)
1333+
1334+
def delete(self, loc):
1335+
return Int64Index(self.values, name=self.name).delete(loc)
1336+
1337+
def take(self, key):
1338+
# XXX
1339+
return self[key]
1340+
1341+
def equals(self, other):
1342+
"""
1343+
Returns True if the indexes are equivalent
1344+
1345+
Parameters
1346+
----------
1347+
other : Index
1348+
1349+
Returns
1350+
-------
1351+
is_equal : True or False
1352+
"""
1353+
if not isinstance(other, RangeIndex):
1354+
if isinstance(other, Index):
1355+
other = other.values
1356+
return np.array_equal(self.values, other)
1357+
1358+
return (self.start == other.start and self.stop == other.stop
1359+
and self.step == other.step)
1360+
1361+
@property
1362+
def _constructor(self):
1363+
return RangeIndex
12321364

12331365

12341366
class MultiIndex(Index):

pandas/tests/test_index.py

+151-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010
import numpy as np
1111
from numpy.testing import assert_array_equal
1212

13-
from pandas.core.categorical import Factor
14-
from pandas.core.index import Index, Int64Index, MultiIndex
13+
from pandas.core.index import Index, Int64Index, MultiIndex, RangeIndex
1514
from pandas.util.testing import assert_almost_equal
1615
from pandas.util import py3compat
1716
import pandas.core.common as com
@@ -848,6 +847,156 @@ def test_int_name_format(self):
848847
repr(s)
849848
repr(df)
850849

850+
851+
class TestRangeIndex(unittest.TestCase):
852+
853+
def test_constructor(self):
854+
pass
855+
856+
def test_repr(self):
857+
index = RangeIndex(0, 20, 1)
858+
result = repr(index)
859+
exp = 'RangeIndex(0, 20)'
860+
self.assertEqual(result, exp)
861+
862+
index = RangeIndex(0, 20, 3)
863+
result = repr(index)
864+
exp = 'RangeIndex(0, 20, step=3)'
865+
self.assertEqual(result, exp)
866+
867+
def test_len(self):
868+
for i in range(20):
869+
index = RangeIndex(0, i, 3)
870+
self.assertEqual(len(index), len(list(range(0, i, 3))))
871+
872+
def test_values(self):
873+
index = RangeIndex(1000)
874+
875+
values = index.values
876+
expected = np.arange(1000)
877+
assert_almost_equal(values, expected)
878+
879+
def test_getitem(self):
880+
index = RangeIndex(0, 20, 2)
881+
882+
self.assertEqual(index[4], 8)
883+
self.assertEqual(index[-1], 18)
884+
885+
def test_fancy_index(self):
886+
index = RangeIndex(0, 20, 2)
887+
result = index[[4, 1, 3, -1]]
888+
expected = np.array([8, 2, 6, 18])
889+
assert_almost_equal(result, expected)
890+
891+
def test_boolean_index(self):
892+
index = RangeIndex(0, 20, 2)
893+
894+
mask = np.ones(len(index), dtype=bool)
895+
mask[::2] = False
896+
897+
result = index[mask]
898+
exp = index.values[mask]
899+
assert_almost_equal(result, exp)
900+
901+
def test_take(self):
902+
index = RangeIndex(0, 20, 2)
903+
result = index.take([4, 1, 3, -1])
904+
expected = np.array([8, 2, 6, 18])
905+
assert_almost_equal(result, expected)
906+
907+
def test_slice(self):
908+
index = RangeIndex(5, 20, name='foo')
909+
910+
result = index[2:]
911+
self.assertTrue(result.equals(RangeIndex(7, 20)))
912+
self.assertTrue(result.name == 'foo')
913+
914+
result = index[:7]
915+
self.assertTrue(result.equals(RangeIndex(5, 12)))
916+
917+
result = index[3:10]
918+
self.assertTrue(result.equals(RangeIndex(8, 15)))
919+
920+
result = index[-5:]
921+
self.assertTrue(result.equals(RangeIndex(15, 20)))
922+
923+
result = index[-5:-2]
924+
self.assertTrue(result.equals(RangeIndex(15, 18)))
925+
926+
result = index[::-1]
927+
expected = RangeIndex(19, 4, -1)
928+
self.assertTrue(result.equals(expected))
929+
930+
result = result[::-1]
931+
self.assertTrue(result.equals(index))
932+
933+
result = index[2::2]
934+
exp = RangeIndex(7, 20, 2)
935+
self.assertTrue(result.equals(exp))
936+
937+
# with a step
938+
index = RangeIndex(5, 41, 2)
939+
result = index[2:]
940+
self.assertTrue(result.equals(RangeIndex(9, 41, 2)))
941+
942+
result = index[:7]
943+
self.assertTrue(result.equals(RangeIndex(5, 19, 2)))
944+
945+
result = index[3:10]
946+
self.assertTrue(result.equals(RangeIndex(11, 25, 2)))
947+
948+
result = index[-5:]
949+
self.assertTrue(result.equals(RangeIndex(31, 41, 2)))
950+
951+
result = index[-5:-2]
952+
self.assertTrue(result.equals(RangeIndex(31, 37, 2)))
953+
954+
result = index[::-1]
955+
expected = RangeIndex(39, 3, -2)
956+
self.assertTrue(result.equals(expected))
957+
958+
result = result[::-1]
959+
self.assertTrue(result.equals(index))
960+
961+
# corner case
962+
index = RangeIndex(0, 1)
963+
self.assertTrue(index[:0].equals(RangeIndex(0, 0)))
964+
965+
def test_insert(self):
966+
index = RangeIndex(5, name='bar')
967+
968+
result = index.insert(2, 'foo')
969+
exp = Index([0, 1, 'foo', 2, 3, 4])
970+
self.assertTrue(result.equals(exp))
971+
self.assertEqual(result.name, 'bar')
972+
973+
def test_delete(self):
974+
index = RangeIndex(5, name='bar')
975+
result = index.delete(2)
976+
exp = Index([0, 1, 3, 4])
977+
self.assertTrue(result.equals(exp))
978+
self.assertEqual(result.name, 'bar')
979+
980+
def test_union(self):
981+
pass
982+
983+
def test_intersection(self):
984+
pass
985+
986+
def test_pickle(self):
987+
pass
988+
989+
def test_equals(self):
990+
left = RangeIndex(0, 20)
991+
992+
self.assertTrue(left.equals(RangeIndex(0, 20)))
993+
self.assertFalse(left.equals(RangeIndex(0, 10)))
994+
self.assertFalse(left.equals(RangeIndex(0, 20, 2)))
995+
996+
left = RangeIndex(0, 20, 2)
997+
self.assertTrue(left.equals(RangeIndex(0, 20, 2)))
998+
999+
8511000
class TestMultiIndex(unittest.TestCase):
8521001

8531002
def setUp(self):

0 commit comments

Comments
 (0)