|
| 1 | +.. _enhancingperf: |
| 2 | + |
| 3 | +.. currentmodule:: pandas |
| 4 | + |
| 5 | +.. ipython:: python |
| 6 | + :suppress: |
| 7 | +
|
| 8 | + import os |
| 9 | + import csv |
| 10 | + from pandas import DataFrame |
| 11 | + import pandas as pd |
| 12 | +
|
| 13 | + import numpy as np |
| 14 | + np.random.seed(123456) |
| 15 | + randn = np.random.randn |
| 16 | + randint = np.random.randint |
| 17 | + np.set_printoptions(precision=4, suppress=True) |
| 18 | +
|
| 19 | +
|
| 20 | +********************* |
| 21 | +Enhancing Performance |
| 22 | +********************* |
| 23 | + |
| 24 | +.. _enhancingperf.cython: |
| 25 | + |
| 26 | +Cython (Writing C extensions for pandas) |
| 27 | +---------------------------------------- |
| 28 | + |
| 29 | +For many use cases writing pandas in pure python and numpy is sufficient. In some |
| 30 | +computationally heavy applications however, it can be possible to achieve sizeable |
| 31 | +speed-ups by offloading work to `cython <http://cython.org/>`_. |
| 32 | + |
| 33 | +This tutorial assumes you have refactored as much as possible in python, for example |
| 34 | +trying to remove for loops and making use of numpy vectorization, it's always worth |
| 35 | +optimising in python first. |
| 36 | + |
| 37 | +This tutorial walks through a "typical" process of cythonizing a slow computation. |
| 38 | +We use an `example from the cython documentation <http://docs.cython.org/src/quickstart/cythonize.html>`_ |
| 39 | +but in the context of pandas. Our final cythonized solution is around 100 times |
| 40 | +faster than the pure python. |
| 41 | + |
| 42 | +.. _enhancingperf.pure: |
| 43 | + |
| 44 | +Pure python |
| 45 | +~~~~~~~~~~~ |
| 46 | + |
| 47 | +We have a DataFrame to which we want to apply a function row-wise. |
| 48 | + |
| 49 | +.. ipython:: python |
| 50 | +
|
| 51 | + df = DataFrame({'a': randn(1000), 'b': randn(1000),'N': randint(100, 1000, (1000)), 'x': 'x'}) |
| 52 | + df |
| 53 | +
|
| 54 | +Here's the function in pure python: |
| 55 | + |
| 56 | +.. ipython:: python |
| 57 | +
|
| 58 | + def f(x): |
| 59 | + return x * (x - 1) |
| 60 | +
|
| 61 | + def integrate_f(a, b, N): |
| 62 | + s = 0 |
| 63 | + dx = (b - a) / N |
| 64 | + for i in range(N): |
| 65 | + s += f(a + i * dx) |
| 66 | + return s * dx |
| 67 | +
|
| 68 | +We achieve our result by by using ``apply`` (row-wise): |
| 69 | + |
| 70 | +.. ipython:: python |
| 71 | + |
| 72 | + %timeit df.apply(lambda x: integrate_f(x['a'], x['b'], x['N']), axis=1) |
| 73 | +
|
| 74 | +But clearly this isn't fast enough for us. Let's take a look and see where the |
| 75 | +time is spent during this operation (limited to the most time consuming |
| 76 | +four calls) using the `prun ipython magic function <http://ipython.org/ipython-doc/stable/api/generated/IPython.core.magics.execution.html#IPython.core.magics.execution.ExecutionMagics.prun>`_: |
| 77 | + |
| 78 | +.. ipython:: python |
| 79 | +
|
| 80 | + %prun -l 4 df.apply(lambda x: integrate_f(x['a'], x['b'], x['N']), axis=1) |
| 81 | +
|
| 82 | +By far the majority of time is spend inside either ``integrate_f`` or ``f``, |
| 83 | +hence we'll concentrate our efforts cythonizing these two functions. |
| 84 | + |
| 85 | +.. note:: |
| 86 | + |
| 87 | + In python 2 replacing the ``range`` with its generator counterpart (``xrange``) |
| 88 | + would mean the ``range`` line would vanish. In python 3 range is already a generator. |
| 89 | + |
| 90 | +.. _enhancingperf.plain: |
| 91 | + |
| 92 | +Plain cython |
| 93 | +~~~~~~~~~~~~ |
| 94 | + |
| 95 | +First we're going to need to import the cython magic function to ipython: |
| 96 | + |
| 97 | +.. ipython:: python |
| 98 | +
|
| 99 | + %load_ext cythonmagic |
| 100 | +
|
| 101 | +
|
| 102 | +Now, let's simply copy our functions over to cython as is (the suffix |
| 103 | +is here to distinguish between function versions): |
| 104 | + |
| 105 | +.. ipython:: |
| 106 | + |
| 107 | + In [2]: %%cython |
| 108 | + ...: def f_plain(x): |
| 109 | + ...: return x * (x - 1) |
| 110 | + ...: def integrate_f_plain(a, b, N): |
| 111 | + ...: s = 0 |
| 112 | + ...: dx = (b - a) / N |
| 113 | + ...: for i in range(N): |
| 114 | + ...: s += f_plain(a + i * dx) |
| 115 | + ...: return s * dx |
| 116 | + ...: |
| 117 | + |
| 118 | +.. note:: |
| 119 | + |
| 120 | + If you're having trouble pasting the above into your ipython, you may need |
| 121 | + to be using bleeding edge ipython for paste to play well with cell magics. |
| 122 | + |
| 123 | + |
| 124 | +.. ipython:: python |
| 125 | +
|
| 126 | + %timeit df.apply(lambda x: integrate_f_plain(x['a'], x['b'], x['N']), axis=1) |
| 127 | +
|
| 128 | +Already this has shaved a third off, not too bad for a simple copy and paste. |
| 129 | + |
| 130 | +.. _enhancingperf.type: |
| 131 | + |
| 132 | +Adding type |
| 133 | +~~~~~~~~~~~ |
| 134 | + |
| 135 | +We get another huge improvement simply by providing type information: |
| 136 | + |
| 137 | +.. ipython:: |
| 138 | + |
| 139 | + In [3]: %%cython |
| 140 | + ...: cdef double f_typed(double x) except? -2: |
| 141 | + ...: return x * (x - 1) |
| 142 | + ...: cpdef double integrate_f_typed(double a, double b, int N): |
| 143 | + ...: cdef int i |
| 144 | + ...: cdef double s, dx |
| 145 | + ...: s = 0 |
| 146 | + ...: dx = (b - a) / N |
| 147 | + ...: for i in range(N): |
| 148 | + ...: s += f_typed(a + i * dx) |
| 149 | + ...: return s * dx |
| 150 | + ...: |
| 151 | + |
| 152 | +.. ipython:: python |
| 153 | +
|
| 154 | + %timeit df.apply(lambda x: integrate_f_typed(x['a'], x['b'], x['N']), axis=1) |
| 155 | +
|
| 156 | +Now, we're talking! It's now over ten times faster than the original python |
| 157 | +implementation, and we haven't *really* modified the code. Let's have another |
| 158 | +look at what's eating up time: |
| 159 | + |
| 160 | +.. ipython:: python |
| 161 | +
|
| 162 | + %prun -l 4 df.apply(lambda x: integrate_f_typed(x['a'], x['b'], x['N']), axis=1) |
| 163 | +
|
| 164 | +.. _enhancingperf.ndarray: |
| 165 | + |
| 166 | +Using ndarray |
| 167 | +~~~~~~~~~~~~~ |
| 168 | + |
| 169 | +It's calling series... a lot! It's creating a Series from each row, and get-ting from both |
| 170 | +the index and the series (three times for each row). Function calls are expensive |
| 171 | +in python, so maybe we could minimise these by cythonizing the apply part. |
| 172 | + |
| 173 | +.. note:: |
| 174 | + |
| 175 | + We are now passing ndarrays into the cython function, fortunately cython plays |
| 176 | + very nicely with numpy. |
| 177 | + |
| 178 | +.. ipython:: |
| 179 | + |
| 180 | + In [4]: %%cython |
| 181 | + ...: cimport numpy as np |
| 182 | + ...: import numpy as np |
| 183 | + ...: cdef double f_typed(double x) except? -2: |
| 184 | + ...: return x * (x - 1) |
| 185 | + ...: cpdef double integrate_f_typed(double a, double b, int N): |
| 186 | + ...: cdef int i |
| 187 | + ...: cdef double s, dx |
| 188 | + ...: s = 0 |
| 189 | + ...: dx = (b - a) / N |
| 190 | + ...: for i in range(N): |
| 191 | + ...: s += f_typed(a + i * dx) |
| 192 | + ...: return s * dx |
| 193 | + ...: cpdef np.ndarray[double] apply_integrate_f(np.ndarray col_a, np.ndarray col_b, np.ndarray col_N): |
| 194 | + ...: assert (col_a.dtype == np.float and col_b.dtype == np.float and col_N.dtype == np.int) |
| 195 | + ...: cdef Py_ssize_t i, n = len(col_N) |
| 196 | + ...: assert (len(col_a) == len(col_b) == n) |
| 197 | + ...: cdef np.ndarray[double] res = np.empty(n) |
| 198 | + ...: for i in range(len(col_a)): |
| 199 | + ...: res[i] = integrate_f_typed(col_a[i], col_b[i], col_N[i]) |
| 200 | + ...: return res |
| 201 | + ...: |
| 202 | + |
| 203 | + |
| 204 | +The implementation is simple, it creates an array of zeros and loops over |
| 205 | +the rows, applying our ``integrate_f_typed``, and putting this in the zeros array. |
| 206 | + |
| 207 | + |
| 208 | +.. note:: |
| 209 | + |
| 210 | + Loop like this would be *extremely* slow in python, but in cython looping over |
| 211 | + numpy arrays is *fast*. |
| 212 | + |
| 213 | +.. ipython:: python |
| 214 | +
|
| 215 | + %timeit apply_integrate_f(df['a'], df['b'], df['N']) |
| 216 | +
|
| 217 | +We've gone another three times faster! Let's check again where the time is spent: |
| 218 | + |
| 219 | +.. ipython:: python |
| 220 | +
|
| 221 | + %prun -l 4 apply_integrate_f(df['a'], df['b'], df['N']) |
| 222 | +
|
| 223 | +As one might expect, the majority of the time is now spent in ``apply_integrate_f``, |
| 224 | +so if we wanted to make anymore efficiencies we must continue to concentrate our |
| 225 | +efforts here. |
| 226 | + |
| 227 | +.. _enhancingperf.boundswrap: |
| 228 | + |
| 229 | +More advanced techniques |
| 230 | +~~~~~~~~~~~~~~~~~~~~~~~~ |
| 231 | + |
| 232 | +There is still scope for improvement, here's an example of using some more |
| 233 | +advanced cython techniques: |
| 234 | + |
| 235 | +.. ipython:: |
| 236 | + |
| 237 | + In [5]: %%cython |
| 238 | + ...: cimport cython |
| 239 | + ...: cimport numpy as np |
| 240 | + ...: import numpy as np |
| 241 | + ...: cdef double f_typed(double x) except? -2: |
| 242 | + ...: return x * (x - 1) |
| 243 | + ...: cpdef double integrate_f_typed(double a, double b, int N): |
| 244 | + ...: cdef int i |
| 245 | + ...: cdef double s, dx |
| 246 | + ...: s = 0 |
| 247 | + ...: dx = (b - a) / N |
| 248 | + ...: for i in range(N): |
| 249 | + ...: s += f_typed(a + i * dx) |
| 250 | + ...: return s * dx |
| 251 | + ...: @cython.boundscheck(False) |
| 252 | + ...: @cython.wraparound(False) |
| 253 | + ...: cpdef np.ndarray[double] apply_integrate_f_wrap(np.ndarray[double] col_a, np.ndarray[double] col_b, np.ndarray[Py_ssize_t] col_N): |
| 254 | + ...: cdef Py_ssize_t i, n = len(col_N) |
| 255 | + ...: assert len(col_a) == len(col_b) == n |
| 256 | + ...: cdef np.ndarray[double] res = np.empty(n) |
| 257 | + ...: for i in range(n): |
| 258 | + ...: res[i] = integrate_f_typed(col_a[i], col_b[i], col_N[i]) |
| 259 | + ...: return res |
| 260 | + ...: |
| 261 | + |
| 262 | +.. ipython:: python |
| 263 | +
|
| 264 | + %timeit apply_integrate_f_wrap(df['a'], df['b'], df['N']) |
| 265 | +
|
| 266 | +This shaves another third off! |
| 267 | + |
| 268 | +Further topics |
| 269 | +~~~~~~~~~~~~~~ |
| 270 | + |
| 271 | +- Loading C modules into cython. |
| 272 | + |
| 273 | +Read more in the `cython docs <http://docs.cython.org/>`_. |
0 commit comments