diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 9c0791c3eb8ce..1632f5d016439 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -6,7 +6,7 @@ from cython cimport Py_ssize_t import numpy as np cimport numpy as np -from numpy cimport (ndarray, PyArray_NDIM, PyArray_GETITEM, PyArray_SETITEM, +from numpy cimport (ndarray, PyArray_NDIM, PyArray_GETITEM, PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew, flatiter, NPY_OBJECT, int64_t, @@ -57,8 +57,6 @@ cimport util cdef int64_t NPY_NAT = util.get_nat() from util cimport is_array, _checknull -from libc.math cimport fabs, sqrt - def values_from_object(object o): """ return my values or the object if we are say an ndarray """ @@ -1119,5 +1117,4 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys, return result -include "reduce.pyx" include "inference.pyx" diff --git a/pandas/_libs/src/reduce.pyx b/pandas/_libs/reduction.pyx similarity index 97% rename from pandas/_libs/src/reduce.pyx rename to pandas/_libs/reduction.pyx index f0ec8d284ef0e..d51583c7aa473 100644 --- a/pandas/_libs/src/reduce.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,9 +1,24 @@ # -*- coding: utf-8 -*- # cython: profile=False -import numpy as np - from distutils.version import LooseVersion +from cython cimport Py_ssize_t +from cpython cimport Py_INCREF + +from libc.stdlib cimport malloc, free + +import numpy as np +cimport numpy as np +from numpy cimport (ndarray, + int64_t, + PyArray_SETITEM, + PyArray_ITER_NEXT, PyArray_ITER_DATA, PyArray_IterNew, + flatiter) +np.import_array() + +cimport util +from lib import maybe_convert_objects + is_numpy_prior_1_6_2 = LooseVersion(np.__version__) < '1.6.2' diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 2f43087f7dff9..4cdec54b9a07a 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1,6 +1,6 @@ import numpy as np from pandas import compat -from pandas._libs import lib +from pandas._libs import reduction from pandas.core.dtypes.common import ( is_extension_type, is_sequence) @@ -114,7 +114,7 @@ def apply_empty_result(self): def apply_raw(self): try: - result = lib.reduce(self.values, self.f, axis=self.axis) + result = reduction.reduce(self.values, self.f, axis=self.axis) except Exception: result = np.apply_along_axis(self.f, self.axis, self.values) @@ -150,10 +150,10 @@ def apply_standard(self): try: labels = self.agg_axis - result = lib.reduce(values, self.f, - axis=self.axis, - dummy=dummy, - labels=labels) + result = reduction.reduce(values, self.f, + axis=self.axis, + dummy=dummy, + labels=labels) return Series(result, index=labels) except Exception: pass diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 25e44589488ee..66162af1e7314 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -66,7 +66,9 @@ from pandas.plotting._core import boxplot_frame_groupby -from pandas._libs import lib, groupby as libgroupby, Timestamp, NaT, iNaT +from pandas._libs import (lib, reduction, + groupby as libgroupby, + Timestamp, NaT, iNaT) from pandas._libs.lib import count_level_2d _doc_template = """ @@ -1981,7 +1983,7 @@ def apply(self, f, data, axis=0): try: values, mutated = splitter.fast_apply(f, group_keys) return group_keys, values, mutated - except (lib.InvalidApply): + except reduction.InvalidApply: # we detect a mutation of some kind # so take slow path pass @@ -2404,8 +2406,8 @@ def _aggregate_series_fast(self, obj, func): obj = obj._take(indexer, convert=False).to_dense() group_index = algorithms.take_nd( group_index, indexer, allow_fill=False) - grouper = lib.SeriesGrouper(obj, func, group_index, ngroups, - dummy) + grouper = reduction.SeriesGrouper(obj, func, group_index, ngroups, + dummy) result, counts = grouper.get_result() return result, counts @@ -2618,7 +2620,7 @@ def groupings(self): def agg_series(self, obj, func): dummy = obj[:0] - grouper = lib.SeriesBinGrouper(obj, func, self.bins, dummy) + grouper = reduction.SeriesBinGrouper(obj, func, self.bins, dummy) return grouper.get_result() # ---------------------------------------------------------------------- @@ -4758,7 +4760,8 @@ def fast_apply(self, f, names): return [], True sdata = self._get_sorted_data() - results, mutated = lib.apply_frame_axis0(sdata, f, names, starts, ends) + results, mutated = reduction.apply_frame_axis0(sdata, f, names, + starts, ends) return results, mutated diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index 8b95455b53d22..979b2f7a539af 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -9,7 +9,7 @@ from pandas import Index, isna from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm -from pandas._libs import lib, groupby +from pandas._libs import lib, groupby, reduction def test_series_grouper(): @@ -19,7 +19,7 @@ def test_series_grouper(): labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64) - grouper = lib.SeriesGrouper(obj, np.mean, labels, 2, dummy) + grouper = reduction.SeriesGrouper(obj, np.mean, labels, 2, dummy) result, counts = grouper.get_result() expected = np.array([obj[3:6].mean(), obj[6:].mean()]) @@ -36,7 +36,7 @@ def test_series_bin_grouper(): bins = np.array([3, 6]) - grouper = lib.SeriesBinGrouper(obj, np.mean, bins, dummy) + grouper = reduction.SeriesBinGrouper(obj, np.mean, bins, dummy) result, counts = grouper.get_result() expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()]) @@ -127,26 +127,27 @@ def test_int_index(self): from pandas.core.series import Series arr = np.random.randn(100, 4) - result = lib.reduce(arr, np.sum, labels=Index(np.arange(4))) + result = reduction.reduce(arr, np.sum, labels=Index(np.arange(4))) expected = arr.sum(0) assert_almost_equal(result, expected) - result = lib.reduce(arr, np.sum, axis=1, labels=Index(np.arange(100))) + result = reduction.reduce(arr, np.sum, axis=1, + labels=Index(np.arange(100))) expected = arr.sum(1) assert_almost_equal(result, expected) dummy = Series(0., index=np.arange(100)) - result = lib.reduce(arr, np.sum, dummy=dummy, - labels=Index(np.arange(4))) + result = reduction.reduce(arr, np.sum, dummy=dummy, + labels=Index(np.arange(4))) expected = arr.sum(0) assert_almost_equal(result, expected) dummy = Series(0., index=np.arange(4)) - result = lib.reduce(arr, np.sum, axis=1, dummy=dummy, - labels=Index(np.arange(100))) + result = reduction.reduce(arr, np.sum, axis=1, dummy=dummy, + labels=Index(np.arange(100))) expected = arr.sum(1) assert_almost_equal(result, expected) - result = lib.reduce(arr, np.sum, axis=1, dummy=dummy, - labels=Index(np.arange(100))) + result = reduction.reduce(arr, np.sum, axis=1, dummy=dummy, + labels=Index(np.arange(100))) assert_almost_equal(result, expected) diff --git a/setup.py b/setup.py index 16ca0c132eaa9..7ade1544ec5cd 100755 --- a/setup.py +++ b/setup.py @@ -309,6 +309,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/interval.pyx', 'pandas/_libs/hashing.pyx', 'pandas/_libs/missing.pyx', + 'pandas/_libs/reduction.pyx', 'pandas/_libs/testing.pyx', 'pandas/_libs/window.pyx', 'pandas/_libs/skiplist.pyx', @@ -506,6 +507,8 @@ def pxd(name): 'pandas/_libs/src/numpy_helper.h'], 'sources': ['pandas/_libs/src/parser/tokenizer.c', 'pandas/_libs/src/parser/io.c']}, + '_libs.reduction': { + 'pyxfile': '_libs/reduction'}, '_libs.tslibs.period': { 'pyxfile': '_libs/tslibs/period', 'pxdfiles': ['_libs/src/util',