Skip to content

Commit 11d08f7

Browse files
committed
ENH: improve column reindexing performance in DataFrame by using Cython take
1 parent 0552521 commit 11d08f7

File tree

2 files changed

+25
-20
lines changed

2 files changed

+25
-20
lines changed

pandas/core/internals.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from pandas.core.index import Index, _ensure_index
77
from pandas.util.decorators import cache_readonly
8-
import pandas.core.common as common
8+
import pandas.core.common as com
99
import pandas._tseries as lib
1010

1111
class Block(object):
@@ -107,8 +107,8 @@ def reindex_axis(self, indexer, mask, needs_masking, axis=0):
107107
Reindex using pre-computed indexer information
108108
"""
109109
if self.values.size > 0:
110-
new_values = common.take_fast(self.values, indexer, mask,
111-
needs_masking, axis=axis)
110+
new_values = com.take_fast(self.values, indexer, mask,
111+
needs_masking, axis=axis)
112112
else:
113113
shape = list(self.shape)
114114
shape[axis] = len(indexer)
@@ -130,7 +130,12 @@ def reindex_items_from(self, new_ref_items):
130130
new_ref_items, indexer = self.items.reindex(new_ref_items)
131131
mask = indexer != -1
132132
masked_idx = indexer[mask]
133-
new_values = self.values.take(masked_idx, axis=0)
133+
134+
if self.values.ndim == 2:
135+
new_values = com.take_2d(self.values, masked_idx, axis=0)
136+
else:
137+
new_values = self.values.take(masked_idx, axis=0)
138+
134139
new_items = self.items.take(masked_idx)
135140
return make_block(new_values, new_items, new_ref_items)
136141

@@ -196,7 +201,7 @@ def split_block_at(self, item):
196201

197202
def fillna(self, value):
198203
new_values = self.values.copy()
199-
mask = common.isnull(new_values.ravel())
204+
mask = com.isnull(new_values.ravel())
200205
new_values.flat[mask] = value
201206
return make_block(new_values, self.items, self.ref_items)
202207

@@ -670,8 +675,8 @@ def reindex_indexer(self, new_axis, indexer, axis=1):
670675
new_axes[axis] = new_axis
671676
new_blocks = []
672677
for blk in self.blocks:
673-
new_values = common.take_fast(blk.values, indexer, None,
674-
False, axis=axis)
678+
new_values = com.take_fast(blk.values, indexer, None,
679+
False, axis=axis)
675680
newb = make_block(new_values, blk.items, self.items)
676681
new_blocks.append(newb)
677682

@@ -729,8 +734,8 @@ def take(self, indexer, axis=1):
729734
new_axes[axis] = self.axes[axis].take(indexer)
730735
new_blocks = []
731736
for blk in self.blocks:
732-
new_values = common.take_fast(blk.values, indexer,
733-
None, False, axis=axis)
737+
new_values = com.take_fast(blk.values, indexer,
738+
None, False, axis=axis)
734739
newb = make_block(new_values, blk.items, self.items)
735740
new_blocks.append(newb)
736741

@@ -1191,21 +1196,21 @@ def _merge_blocks(self, lblk, rblk):
11911196
# is this really faster than assigning to arr.flat?
11921197
if lidx is None:
11931198
# out[:lk] = lblk.values
1194-
common.take_fast(lblk.values, np.arange(n, dtype='i4'),
1195-
None, False,
1196-
axis=self.axis, out=out[:lk])
1199+
com.take_fast(lblk.values, np.arange(n, dtype='i4'),
1200+
None, False,
1201+
axis=self.axis, out=out[:lk])
11971202
else:
11981203
# write out the values to the result array
1199-
common.take_fast(lblk.values, lidx, None, False,
1204+
com.take_fast(lblk.values, lidx, None, False,
12001205
axis=self.axis, out=out[:lk])
12011206
if ridx is None:
12021207
# out[lk:] = lblk.values
1203-
common.take_fast(rblk.values, np.arange(n, dtype='i4'),
1204-
None, False,
1205-
axis=self.axis, out=out[lk:])
1208+
com.take_fast(rblk.values, np.arange(n, dtype='i4'),
1209+
None, False,
1210+
axis=self.axis, out=out[lk:])
12061211
else:
1207-
common.take_fast(rblk.values, ridx, None, False,
1208-
axis=self.axis, out=out[lk:])
1212+
com.take_fast(rblk.values, ridx, None, False,
1213+
axis=self.axis, out=out[lk:])
12091214

12101215
# does not sort
12111216
new_items = lblk.items.append(rblk.items)

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,8 @@
129129

130130
MAJOR = 0
131131
MINOR = 6
132-
MICRO = 1
133-
ISRELEASED = True
132+
MICRO = 2
133+
ISRELEASED = False
134134
VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
135135
QUALIFIER = ''
136136

0 commit comments

Comments
 (0)