Skip to content

Commit 2972f37

Browse files
immerrrjreback
authored andcommitted
Force 64-bit integers to comply with cython buffer specs
1 parent 897a53c commit 2972f37

File tree

3 files changed

+34
-28
lines changed

3 files changed

+34
-28
lines changed

pandas/core/internals.py

+15-11
Original file line numberDiff line numberDiff line change
@@ -2033,11 +2033,11 @@ def get_ftype_counts(self):
20332033

20342034
def get_dtypes(self):
20352035
dtypes = np.array([blk.dtype for blk in self.blocks])
2036-
return dtypes.take(self._blknos)
2036+
return com.take_1d(dtypes, self._blknos, allow_fill=False)
20372037

20382038
def get_ftypes(self):
20392039
ftypes = np.array([blk.ftype for blk in self.blocks])
2040-
return ftypes.take(self._blknos)
2040+
return com.take_1d(ftypes, self._blknos, allow_fill=False)
20412041

20422042
def __getstate__(self):
20432043
block_values = [b.values for b in self.blocks]
@@ -2322,7 +2322,8 @@ def combine(self, blocks, copy=True):
23222322
new_blocks = []
23232323
for b in blocks:
23242324
b = b.copy(deep=copy)
2325-
b.mgr_locs = inv_indexer.take(b.mgr_locs.as_array)
2325+
b.mgr_locs = com.take_1d(inv_indexer, b.mgr_locs.as_array, axis=0,
2326+
allow_fill=False)
23262327
new_blocks.append(b)
23272328

23282329
new_axes = list(self.axes)
@@ -2666,11 +2667,12 @@ def value_getitem(placement):
26662667
is_deleted = np.zeros(self.nblocks, dtype=np.bool_)
26672668
is_deleted[removed_blknos] = True
26682669

2669-
new_blknos = np.empty(self.nblocks, dtype=np.int_)
2670+
new_blknos = np.empty(self.nblocks, dtype=np.int64)
26702671
new_blknos.fill(-1)
26712672
new_blknos[~is_deleted] = np.arange(self.nblocks -
26722673
len(removed_blknos))
2673-
self._blknos = new_blknos.take(self._blknos, axis=0)
2674+
self._blknos = com.take_1d(new_blknos, self._blknos, axis=0,
2675+
allow_fill=False)
26742676
self.blocks = tuple(blk for i, blk in enumerate(self.blocks)
26752677
if i not in set(removed_blknos))
26762678

@@ -3546,19 +3548,19 @@ def _invert_reordering(reordering, minlength=None):
35463548
array([-1, 0, -1, 1, -1, 2])
35473549
35483550
"""
3549-
reordering = np.asanyarray(reordering)
3551+
reordering = np.asanyarray(reordering, dtype=np.int64)
35503552
if not com.is_integer_dtype(reordering):
35513553
raise ValueError("Only integer indexers are supported")
35523554

3553-
nonneg_indices = reordering[reordering >= 0]
3555+
nonneg_indices = reordering[reordering >= 0].astype(np.int_)
35543556
counts = np.bincount(nonneg_indices, minlength=minlength)
35553557
has_non_unique = (counts > 1).any()
35563558

3557-
dtype = np.dtype(np.object_) if has_non_unique else np.dtype(np.int_)
3559+
dtype = np.dtype(np.object_) if has_non_unique else np.dtype(np.int64)
35583560
inverted = np.empty_like(counts, dtype=dtype)
35593561
inverted.fill(-1)
35603562

3561-
nonneg_positions = np.arange(len(reordering), dtype=np.int_)[reordering >= 0]
3563+
nonneg_positions = np.arange(len(reordering), dtype=np.int64)[reordering >= 0]
35623564
np.put(inverted, nonneg_indices, nonneg_positions)
35633565

35643566
if has_non_unique:
@@ -3585,6 +3587,8 @@ def _get_blkno_placements(blknos, blk_count, group=True):
35853587
35863588
"""
35873589

3590+
blknos = com._ensure_int64(blknos)
3591+
35883592
# FIXME: blk_count is unused, but it may avoid the use of dicts in cython
35893593
for blkno, indexer in lib.get_blkno_indexers(blknos, group):
35903594
yield blkno, BlockPlacement(indexer)
@@ -4076,7 +4080,7 @@ def _fast_count_smallints(arr):
40764080
# Handle empty arr case separately: numpy 1.6 chokes on that.
40774081
return np.empty((0, 2), dtype=arr.dtype)
40784082
else:
4079-
counts = np.bincount(arr)
4083+
counts = np.bincount(arr.astype(np.int_))
40804084
nz = counts.nonzero()[0]
40814085
return np.c_[nz, counts[nz]]
40824086

@@ -4089,7 +4093,7 @@ def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill):
40894093
slice_or_indexer.dtype == np.bool_):
40904094
return 'mask', slice_or_indexer, slice_or_indexer.sum()
40914095
else:
4092-
indexer = np.asanyarray(slice_or_indexer, dtype=np.int_)
4096+
indexer = np.asanyarray(slice_or_indexer, dtype=np.int64)
40934097
if not allow_fill:
40944098
indexer = _maybe_convert_indices(indexer, length)
40954099
return 'fancy', indexer, len(indexer)

pandas/lib.pyx

+6-4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem,
2020
PyObject_SetAttrString)
2121

2222
cdef extern from "Python.h":
23+
Py_ssize_t PY_SSIZE_T_MAX
24+
2325
ctypedef struct PySliceObject:
2426
pass
2527

@@ -1411,7 +1413,7 @@ cpdef slice_canonize(slice s):
14111413
return slice(start, stop, step)
14121414

14131415

1414-
cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=INT64_MAX):
1416+
cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX):
14151417
"""
14161418
Get (start, stop, step, length) tuple for a slice.
14171419
@@ -1430,7 +1432,7 @@ cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=INT64_MAX):
14301432
return start, stop, step, length
14311433

14321434

1433-
cpdef Py_ssize_t slice_len(slice slc, Py_ssize_t objlen=INT64_MAX) except -1:
1435+
cpdef Py_ssize_t slice_len(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1:
14341436
"""
14351437
Get length of a bounded slice.
14361438
@@ -1484,7 +1486,7 @@ def slice_getitem(slice slc not None, ind):
14841486
return slice(s_start, s_stop, s_step)
14851487

14861488
else:
1487-
return np.arange(s_start, s_stop, s_step)[ind]
1489+
return np.arange(s_start, s_stop, s_step, dtype=np.int64)[ind]
14881490

14891491

14901492
cdef class BlockPlacement:
@@ -1568,7 +1570,7 @@ cdef class BlockPlacement:
15681570
if not self._has_array:
15691571
start, stop, step, _ = slice_get_indices_ex(self._as_slice)
15701572
self._as_array = np.arange(start, stop, step,
1571-
dtype=np.int_)
1573+
dtype=np.int64)
15721574
self._has_array = True
15731575
return self._as_array
15741576

pandas/tests/test_internals.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -518,27 +518,27 @@ def _compare(old_mgr, new_mgr):
518518
mgr.set('b', np.array(['2.'] * N, dtype=np.object_))
519519
mgr.set('foo', np.array(['foo.'] * N, dtype=np.object_))
520520
new_mgr = mgr.convert(convert_numeric=True)
521-
self.assertEquals(new_mgr.get('a').dtype.type, np.int64)
522-
self.assertEquals(new_mgr.get('b').dtype.type, np.float64)
523-
self.assertEquals(new_mgr.get('foo').dtype.type, np.object_)
524-
self.assertEquals(new_mgr.get('f').dtype.type, np.int64)
525-
self.assertEquals(new_mgr.get('g').dtype.type, np.float64)
521+
self.assertEquals(new_mgr.get('a').dtype, np.int64)
522+
self.assertEquals(new_mgr.get('b').dtype, np.float64)
523+
self.assertEquals(new_mgr.get('foo').dtype, np.object_)
524+
self.assertEquals(new_mgr.get('f').dtype, np.int64)
525+
self.assertEquals(new_mgr.get('g').dtype, np.float64)
526526

527527
mgr = create_mgr('a,b,foo: object; f: i4; bool: bool; dt: datetime;'
528528
'i: i8; g: f8; h: f2')
529529
mgr.set('a', np.array(['1'] * N, dtype=np.object_))
530530
mgr.set('b', np.array(['2.'] * N, dtype=np.object_))
531531
mgr.set('foo', np.array(['foo.'] * N, dtype=np.object_))
532532
new_mgr = mgr.convert(convert_numeric=True)
533-
self.assertEquals(new_mgr.get('a').dtype.type, np.int64)
534-
self.assertEquals(new_mgr.get('b').dtype.type, np.float64)
535-
self.assertEquals(new_mgr.get('foo').dtype.type, np.object_)
536-
self.assertEquals(new_mgr.get('f').dtype.type, np.int32)
537-
self.assertEquals(new_mgr.get('bool').dtype.type, np.bool_)
533+
self.assertEquals(new_mgr.get('a').dtype, np.int64)
534+
self.assertEquals(new_mgr.get('b').dtype, np.float64)
535+
self.assertEquals(new_mgr.get('foo').dtype, np.object_)
536+
self.assertEquals(new_mgr.get('f').dtype, np.int32)
537+
self.assertEquals(new_mgr.get('bool').dtype, np.bool_)
538538
self.assertEquals(new_mgr.get('dt').dtype.type, np.datetime64)
539-
self.assertEquals(new_mgr.get('i').dtype.type, np.int64)
540-
self.assertEquals(new_mgr.get('g').dtype.type, np.float64)
541-
self.assertEquals(new_mgr.get('h').dtype.type, np.float16)
539+
self.assertEquals(new_mgr.get('i').dtype, np.int64)
540+
self.assertEquals(new_mgr.get('g').dtype, np.float64)
541+
self.assertEquals(new_mgr.get('h').dtype, np.float16)
542542

543543
def test_interleave(self):
544544
pass

0 commit comments

Comments
 (0)