Skip to content

Commit dbfdb07

Browse files
committed
BUG: fix issue calling sort on result of Series.unique, close #1807
1 parent d67f9d6 commit dbfdb07

File tree

5 files changed

+62
-15
lines changed

5 files changed

+62
-15
lines changed

RELEASE.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ pandas 0.8.2
9797
- Fix conversion of array of tz-aware datetime.datetime to DatetimeIndex with
9898
right time zone (#1777)
9999
- Fix DST issues with generating anchored date ranges (#1778)
100+
- Fix issue calling sort on result of Series.unique (#1807)
100101

101102
pandas 0.8.1
102103
============

pandas/src/hashtable.pyx

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -47,44 +47,73 @@ cdef extern from "kvec.h":
4747
cdef class ObjectVector:
4848

4949
cdef:
50+
bint owndata
5051
kv_object_t vec
5152

52-
def __array__(self):
53+
def __cinit__(self):
54+
self.owndata = 1
55+
56+
def to_array(self, xfer_data=True):
5357
""" Here we use the __array__ method, that is called when numpy
5458
tries to get an array from the object."""
55-
cdef npy_intp shape[1]
59+
cdef:
60+
npy_intp shape[1]
61+
ndarray result
62+
5663
shape[0] = <npy_intp> self.vec.n
5764

5865
# Create a 1D array, of length 'size'
59-
return PyArray_SimpleNewFromData(1, shape, np.NPY_OBJECT, self.vec.a)
66+
result = PyArray_SimpleNewFromData(1, shape,
67+
np.NPY_OBJECT, self.vec.a)
68+
if xfer_data:
69+
self.owndata = 0
70+
util.set_array_owndata(result)
71+
72+
return result
73+
6074

6175
cdef inline append(self, object o):
6276
kv_object_push(&self.vec, <PyObject*> o)
6377

6478
def __dealloc__(self):
65-
kv_object_destroy(&self.vec)
79+
if self.owndata:
80+
kv_object_destroy(&self.vec)
6681

6782

6883
cdef class Int64Vector:
6984

7085
cdef:
86+
bint owndata
7187
kv_int64_t vec
7288

73-
def __array__(self):
89+
def __cinit__(self):
90+
self.owndata = 1
91+
92+
def to_array(self, xfer_data=True):
7493
""" Here we use the __array__ method, that is called when numpy
7594
tries to get an array from the object."""
76-
cdef npy_intp shape[1]
95+
cdef:
96+
npy_intp shape[1]
97+
ndarray result
98+
7799
shape[0] = <npy_intp> self.vec.n
78100

79101
# Create a 1D array, of length 'size'
80-
return PyArray_SimpleNewFromData(1, shape, np.NPY_INT64,
81-
self.vec.a)
102+
result = PyArray_SimpleNewFromData(1, shape, np.NPY_INT64,
103+
self.vec.a)
104+
105+
if xfer_data:
106+
self.owndata = 0
107+
util.set_array_owndata(result)
108+
109+
return result
82110

83111
cdef inline append(self, int64_t x):
84112
kv_int64_push(&self.vec, x)
85113

86114
def __dealloc__(self):
87-
free(self.vec.a)
115+
if self.owndata:
116+
free(self.vec.a)
88117

89118

90119
cdef class HashTable:
@@ -522,9 +551,11 @@ cdef class Int64HashTable(HashTable):
522551
uniques.append(val)
523552
count += 1
524553

525-
result = np.array(uniques, copy=False)
526-
result.base = <PyObject*> uniques
527-
Py_INCREF(uniques)
554+
result = uniques.to_array(xfer_data=True)
555+
556+
# result = np.array(uniques, copy=False)
557+
# result.base = <PyObject*> uniques
558+
# Py_INCREF(uniques)
528559

529560
return result
530561

@@ -797,9 +828,11 @@ cdef class PyObjectHashTable(HashTable):
797828
seen_na = 1
798829
uniques.append(ONAN)
799830

800-
result = np.array(uniques, copy=False)
801-
result.base = <PyObject*> uniques
802-
Py_INCREF(uniques)
831+
result = uniques.to_array(xfer_data=True)
832+
833+
# result = np.array(uniques, copy=False)
834+
# result.base = <PyObject*> uniques
835+
# Py_INCREF(uniques)
803836

804837
return result
805838

pandas/src/numpy_helper.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,11 @@ PANDAS_INLINE PyObject* floatify(PyObject* str) {
144144
}
145145

146146

147+
void set_array_owndata(PyArrayObject *ao) {
148+
ao->flags |= NPY_OWNDATA;
149+
}
150+
151+
147152
// PANDAS_INLINE PyObject*
148153
// get_base_ndarray(PyObject* ap) {
149154
// // if (!ap || (NULL == ap)) {

pandas/src/util.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ from numpy cimport ndarray
22
cimport numpy as cnp
33

44
cdef extern from "numpy_helper.h":
5+
inline void set_array_owndata(ndarray ao)
6+
57
inline int is_integer_object(object)
68
inline int is_float_object(object)
79
inline int is_complex_object(object)

pandas/tests/test_series.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3081,6 +3081,7 @@ def test_replace(self):
30813081
self.assert_((ser[:5] == -1).all())
30823082
self.assert_((ser[6:10] == -1).all())
30833083
self.assert_((ser[20:30] == -1).all())
3084+
30843085
def test_repeat(self):
30853086
s = Series(np.random.randn(3), index=['a', 'b', 'c'])
30863087

@@ -3094,6 +3095,11 @@ def test_repeat(self):
30943095
index=s.index.values.repeat(to_rep))
30953096
assert_series_equal(reps, exp)
30963097

3098+
def test_unique_data_ownership(self):
3099+
# it works! #1807
3100+
Series(Series(["a","c","b"]).unique()).sort()
3101+
3102+
30973103
if __name__ == '__main__':
30983104
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
30993105
exit=False)

0 commit comments

Comments
 (0)