1
1
from copy import copy
2
2
3
3
from cython import Py_ssize_t
4
- from cpython.ref cimport Py_INCREF
5
4
6
5
from libc.stdlib cimport malloc, free
7
6
8
7
import numpy as np
9
8
cimport numpy as cnp
10
- from numpy cimport (ndarray,
11
- int64_t,
12
- PyArray_SETITEM,
13
- PyArray_ITER_NEXT, PyArray_ITER_DATA, PyArray_IterNew,
14
- flatiter)
9
+ from numpy cimport ndarray, int64_t
15
10
cnp.import_array()
16
11
17
12
from pandas._libs cimport util
@@ -26,146 +21,6 @@ cdef _check_result_array(object obj, Py_ssize_t cnt):
26
21
raise ValueError (' Function does not reduce' )
27
22
28
23
29
- cdef class Reducer:
30
- """
31
- Performs generic reduction operation on a C or Fortran-contiguous ndarray
32
- while avoiding ndarray construction overhead
33
- """
34
- cdef:
35
- Py_ssize_t increment, chunksize, nresults
36
- object dummy, f, labels, typ, ityp, index
37
- ndarray arr
38
-
39
- def __init__ (
40
- self , ndarray arr , object f , int axis = 1 , object dummy = None , object labels = None
41
- ):
42
- cdef:
43
- Py_ssize_t n, k
44
-
45
- n, k = (< object > arr).shape
46
-
47
- if axis == 0 :
48
- if not arr.flags.f_contiguous:
49
- arr = arr.copy(' F' )
50
-
51
- self .nresults = k
52
- self .chunksize = n
53
- self .increment = n * arr.dtype.itemsize
54
- else :
55
- if not arr.flags.c_contiguous:
56
- arr = arr.copy(' C' )
57
-
58
- self .nresults = n
59
- self .chunksize = k
60
- self .increment = k * arr.dtype.itemsize
61
-
62
- self .f = f
63
- self .arr = arr
64
- self .labels = labels
65
- self .dummy, self .typ, self .index, self .ityp = self ._check_dummy(
66
- dummy = dummy)
67
-
68
- cdef _check_dummy(self , object dummy = None ):
69
- cdef:
70
- object index = None , typ = None , ityp = None
71
-
72
- if dummy is None :
73
- dummy = np.empty(self .chunksize, dtype = self .arr.dtype)
74
-
75
- # our ref is stolen later since we are creating this array
76
- # in cython, so increment first
77
- Py_INCREF(dummy)
78
-
79
- else :
80
-
81
- # we passed a Series
82
- typ = type (dummy)
83
- index = dummy.index
84
- dummy = dummy.values
85
-
86
- if dummy.dtype != self .arr.dtype:
87
- raise ValueError (' Dummy array must be same dtype' )
88
- if len (dummy) != self .chunksize:
89
- raise ValueError (f' Dummy array must be length {self.chunksize}' )
90
-
91
- return dummy, typ, index, ityp
92
-
93
- def get_result (self ):
94
- cdef:
95
- char * dummy_buf
96
- ndarray arr, result, chunk
97
- Py_ssize_t i
98
- flatiter it
99
- object res, name, labels
100
- object cached_typ = None
101
-
102
- arr = self .arr
103
- chunk = self .dummy
104
- dummy_buf = chunk.data
105
- chunk.data = arr.data
106
- labels = self .labels
107
-
108
- result = np.empty(self .nresults, dtype = ' O' )
109
- it = < flatiter> PyArray_IterNew(result)
110
- reduction_success = True
111
-
112
- try :
113
- for i in range (self .nresults):
114
-
115
- # create the cached type
116
- # each time just reassign the data
117
- if i == 0 :
118
-
119
- if self .typ is not None :
120
- # In this case, we also have self.index
121
- name = labels[i]
122
- cached_typ = self .typ(
123
- chunk, index = self .index, name = name, dtype = arr.dtype)
124
-
125
- # use the cached_typ if possible
126
- if cached_typ is not None :
127
- # In this case, we also have non-None labels
128
- name = labels[i]
129
-
130
- object .__setattr__ (
131
- cached_typ._mgr._block, ' values' , chunk)
132
- object .__setattr__ (cached_typ, ' name' , name)
133
- res = self .f(cached_typ)
134
- else :
135
- res = self .f(chunk)
136
-
137
- # TODO: reason for not squeezing here?
138
- extracted_res = _extract_result(res, squeeze = False )
139
- if i == 0 :
140
- # On the first pass, we check the output shape to see
141
- # if this looks like a reduction.
142
- # If it does not, return the computed value to be used by the
143
- # pure python implementation,
144
- # so the function won't be called twice on the same object,
145
- # and side effects would occur twice
146
- try :
147
- _check_result_array(extracted_res, len (self .dummy))
148
- except ValueError as err:
149
- if " Function does not reduce" not in str (err):
150
- # catch only the specific exception
151
- raise
152
-
153
- reduction_success = False
154
- PyArray_SETITEM(result, PyArray_ITER_DATA(it), copy(res))
155
- break
156
-
157
- PyArray_SETITEM(result, PyArray_ITER_DATA(it), extracted_res)
158
- chunk.data = chunk.data + self .increment
159
- PyArray_ITER_NEXT(it)
160
-
161
- finally :
162
- # so we don't free the wrong memory
163
- chunk.data = dummy_buf
164
-
165
- result = maybe_convert_objects(result)
166
- return result, reduction_success
167
-
168
-
169
24
cdef class _BaseGrouper:
170
25
cdef _check_dummy(self , object dummy):
171
26
# both values and index must be an ndarray!
@@ -610,30 +465,3 @@ cdef class BlockSlider:
610
465
# axis=1 is the frame's axis=0
611
466
arr.data = self .base_ptrs[i]
612
467
arr.shape[1 ] = 0
613
-
614
-
615
- def compute_reduction (arr: ndarray , f , axis: int = 0 , dummy = None , labels = None ):
616
- """
617
-
618
- Parameters
619
- -----------
620
- arr : np.ndarray
621
- f : function
622
- axis : integer axis
623
- dummy : type of reduced output (series)
624
- labels : Index or None
625
- """
626
-
627
- # We either have both dummy and labels, or neither of them
628
- if (labels is None ) ^ (dummy is None ):
629
- raise ValueError (" Must pass either dummy and labels, or neither" )
630
-
631
- if labels is not None :
632
- # Caller is responsible for ensuring we don't have MultiIndex
633
- assert labels.nlevels == 1
634
-
635
- # pass as an ndarray/ExtensionArray
636
- labels = labels._values
637
-
638
- reducer = Reducer(arr, f, axis = axis, dummy = dummy, labels = labels)
639
- return reducer.get_result()
0 commit comments