Skip to content

Commit 93664ed

Browse files
committed
TST: hacking for talk, dict benchmarks
1 parent 183764d commit 93664ed

File tree

8 files changed

+274
-59
lines changed

8 files changed

+274
-59
lines changed

fake_pyrex/Pyrex/Distutils/__init__.py

-1
This file was deleted.

fake_pyrex/Pyrex/Distutils/build_ext.py

-1
This file was deleted.

fake_pyrex/Pyrex/__init__.py

-1
This file was deleted.

pandas/core/frame.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3483,7 +3483,7 @@ def _bar_plot(self, axes, subplots=False, use_index=True, grid=True,
34833483
if not subplots:
34843484
ax = axes[0]
34853485

3486-
for i, col in enumerate(_try_sort(self.columns)):
3486+
for i, col in enumerate(self.columns):
34873487
empty = self[col].count() == 0
34883488
y = self[col].values if not empty else np.zeros(len(self))
34893489
if subplots:
@@ -3493,7 +3493,7 @@ def _bar_plot(self, axes, subplots=False, use_index=True, grid=True,
34933493
ax.set_title(col)
34943494
else:
34953495
rects.append(ax.bar(xinds + i * 0.5/K, y, 0.5/K,
3496-
bottom=np.zeros(N),
3496+
bottom=np.zeros(N), label=col,
34973497
color=colors[i % len(colors)], **kwds))
34983498
labels.append(col)
34993499

pandas/src/hashtable.pyx

+92-3
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,31 @@ cdef class StringHashTable:
222222
resbuf[i] = -1
223223
return labels
224224

225+
def unique(self, ndarray[object] values):
226+
cdef:
227+
Py_ssize_t i, n = len(values)
228+
Py_ssize_t idx, count = 0
229+
int ret
230+
object val
231+
char *buf
232+
khiter_t k
233+
list uniques = []
234+
235+
for i in range(n):
236+
val = values[i]
237+
buf = PyString_AsString(val)
238+
k = kh_get_str(self.table, buf)
239+
if k == self.table.n_buckets:
240+
k = kh_put_str(self.table, buf, &ret)
241+
# print 'putting %s, %s' % (val, count)
242+
if not ret:
243+
kh_del_str(self.table, k)
244+
count += 1
245+
uniques.append(val)
246+
247+
# return None
248+
return uniques
249+
225250
def factorize(self, ndarray[object] values):
226251
cdef:
227252
Py_ssize_t i, n = len(values)
@@ -476,6 +501,25 @@ cdef class Int64HashTable:
476501

477502
return labels, counts[:count].copy()
478503

504+
def unique(self, ndarray[int64_t] values):
505+
cdef:
506+
Py_ssize_t i, n = len(values)
507+
Py_ssize_t idx, count = 0
508+
int ret
509+
int64_t val
510+
khiter_t k
511+
list uniques = []
512+
513+
for i in range(n):
514+
val = values[i]
515+
k = kh_get_int64(self.table, val)
516+
if k == self.table.n_buckets:
517+
k = kh_put_int64(self.table, val, &ret)
518+
uniques.append(val)
519+
count += 1
520+
521+
return uniques
522+
479523
cdef class PyObjectHashTable:
480524

481525
cdef:
@@ -571,9 +615,6 @@ cdef class PyObjectHashTable:
571615
def unique(self, ndarray[object] values):
572616
cdef:
573617
Py_ssize_t i, n = len(values)
574-
ndarray[int32_t] labels = np.empty(n, dtype=np.int32)
575-
ndarray[int32_t] counts = np.empty(n, dtype=np.int32)
576-
dict reverse = {}
577618
Py_ssize_t idx, count = 0
578619
int ret
579620
object val
@@ -625,6 +666,22 @@ cdef class PyObjectHashTable:
625666

626667
return labels, counts[:count].copy()
627668

669+
# def unique(self, ndarray[object] values, list uniques):
670+
# cdef:
671+
# Py_ssize_t i, n = len(values)
672+
# Py_ssize_t idx, count = 0
673+
# int ret
674+
# object val
675+
# khiter_t k
676+
677+
# for i in range(n):
678+
# val = values[i]
679+
# k = kh_get_pymap(self.table, <PyObject*>val)
680+
# if k == self.table.n_buckets:
681+
# k = kh_put_pymap(self.table, <PyObject*>val, &ret)
682+
# uniques.append(val)
683+
# count += 1
684+
628685
cdef class Factorizer:
629686

630687
cdef public:
@@ -656,6 +713,10 @@ cdef class Factorizer:
656713
self.count = len(counts)
657714
return labels, counts
658715

716+
def unique(self, ndarray[object] values):
717+
# just for fun
718+
return self.table.unique(values)
719+
659720
cdef class Int64Factorizer:
660721

661722
cdef public:
@@ -753,6 +814,34 @@ cdef class DictFactorizer:
753814
self.count = len(counts)
754815
return labels, counts
755816

817+
def unique(self, ndarray[object] values):
818+
cdef:
819+
Py_ssize_t i, n = len(values)
820+
Py_ssize_t idx, count = self.count
821+
object val
822+
823+
for i in range(n):
824+
val = values[i]
825+
if val not in self.table:
826+
self.table[val] = count
827+
self.uniques.append(val)
828+
count += 1
829+
return self.uniques
830+
831+
832+
def unique_int64(self, ndarray[int64_t] values):
833+
cdef:
834+
Py_ssize_t i, n = len(values)
835+
Py_ssize_t idx, count = self.count
836+
int64_t val
837+
838+
for i in range(n):
839+
val = values[i]
840+
if val not in self.table:
841+
self.table[val] = count
842+
self.uniques.append(val)
843+
count += 1
844+
return self.uniques
756845

757846
def lookup_locations2(ndarray[object] values):
758847
cdef:

pandas/src/skiplist_test.c

-36
This file was deleted.

pandas/tools/merge.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from pandas.core.frame import DataFrame, _merge_doc
88
from pandas.core.groupby import get_group_index
9-
from pandas.core.index import (Index, MultiIndex, _get_combined_index,
9+
from pandas.core.index import (Factor, Index, MultiIndex, _get_combined_index,
1010
_ensure_index)
1111
from pandas.core.internals import (IntBlock, BoolBlock, BlockManager,
1212
make_block, _consolidate)

0 commit comments

Comments
 (0)