Skip to content

BLD: split join.pyx from algos.pyx #13925

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 6, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ if [ "$LINT" ]; then
echo "Linting *.py DONE"

echo "Linting *.pyx"
for path in 'window.pyx'
for path in 'window.pyx' "src/join.pyx"
do
echo "linting -> pandas/$path"
flake8 pandas/$path --filename '*.pyx' --select=E501,E302,E203,E226,E111,E114,E221,E303,E128,E231,E126
Expand Down
3 changes: 0 additions & 3 deletions pandas/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1340,10 +1340,7 @@ cdef inline float64_t _median_linear(float64_t* a, int n):
return result


include "join.pyx"

# generated from template
include "algos_common_helper.pxi"
include "algos_groupby_helper.pxi"
include "algos_join_helper.pxi"
include "algos_take_helper.pxi"
9 changes: 5 additions & 4 deletions pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
import pandas.tslib as tslib
import pandas.lib as lib
import pandas._join as _join
import pandas.algos as _algos
import pandas.index as _index
from pandas.lib import Timestamp, Timedelta, is_datetime_array
Expand Down Expand Up @@ -110,10 +111,10 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):
# Cython methods
_groupby = _algos.groupby_object
_arrmap = _algos.arrmap_object
_left_indexer_unique = _algos.left_join_indexer_unique_object
_left_indexer = _algos.left_join_indexer_object
_inner_indexer = _algos.inner_join_indexer_object
_outer_indexer = _algos.outer_join_indexer_object
_left_indexer_unique = _join.left_join_indexer_unique_object
_left_indexer = _join.left_join_indexer_object
_inner_indexer = _join.inner_join_indexer_object
_outer_indexer = _join.outer_join_indexer_object
_box_scalars = False

_typ = 'index'
Expand Down
17 changes: 9 additions & 8 deletions pandas/indexes/numeric.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import pandas.lib as lib
import pandas._join as _join
import pandas.algos as _algos
import pandas.index as _index

Expand Down Expand Up @@ -114,10 +115,10 @@ class Int64Index(NumericIndex):
_typ = 'int64index'
_groupby = _algos.groupby_int64
_arrmap = _algos.arrmap_int64
_left_indexer_unique = _algos.left_join_indexer_unique_int64
_left_indexer = _algos.left_join_indexer_int64
_inner_indexer = _algos.inner_join_indexer_int64
_outer_indexer = _algos.outer_join_indexer_int64
_left_indexer_unique = _join.left_join_indexer_unique_int64
_left_indexer = _join.left_join_indexer_int64
_inner_indexer = _join.inner_join_indexer_int64
_outer_indexer = _join.outer_join_indexer_int64

_can_hold_na = False

Expand Down Expand Up @@ -211,10 +212,10 @@ class Float64Index(NumericIndex):
_engine_type = _index.Float64Engine
_groupby = _algos.groupby_float64
_arrmap = _algos.arrmap_float64
_left_indexer_unique = _algos.left_join_indexer_unique_float64
_left_indexer = _algos.left_join_indexer_float64
_inner_indexer = _algos.inner_join_indexer_float64
_outer_indexer = _algos.outer_join_indexer_float64
_left_indexer_unique = _join.left_join_indexer_unique_float64
_left_indexer = _join.left_join_indexer_float64
_inner_indexer = _join.inner_join_indexer_float64
_outer_indexer = _join.outer_join_indexer_float64

_default_dtype = np.float64

Expand Down
55 changes: 48 additions & 7 deletions pandas/src/join.pyx
Original file line number Diff line number Diff line change
@@ -1,3 +1,40 @@
# cython: profile=False

from numpy cimport *
cimport numpy as np
import numpy as np

cimport cython

import_array()

cimport util

from numpy cimport NPY_INT8 as NPY_int8
from numpy cimport NPY_INT16 as NPY_int16
from numpy cimport NPY_INT32 as NPY_int32
from numpy cimport NPY_INT64 as NPY_int64
from numpy cimport NPY_FLOAT16 as NPY_float16
from numpy cimport NPY_FLOAT32 as NPY_float32
from numpy cimport NPY_FLOAT64 as NPY_float64

from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
uint32_t, uint64_t, float16_t, float32_t, float64_t)

int8 = np.dtype(np.int8)
int16 = np.dtype(np.int16)
int32 = np.dtype(np.int32)
int64 = np.dtype(np.int64)
float16 = np.dtype(np.float16)
float32 = np.dtype(np.float32)
float64 = np.dtype(np.float64)

cdef double NaN = <double> np.NaN
cdef double nan = NaN

from pandas.algos import groupsort_indexer


def inner_join(ndarray[int64_t] left, ndarray[int64_t] right,
Py_ssize_t max_groups):
cdef:
Expand Down Expand Up @@ -48,6 +85,7 @@ def inner_join(ndarray[int64_t] left, ndarray[int64_t] right,
return (_get_result_indexer(left_sorter, left_indexer),
_get_result_indexer(right_sorter, right_indexer))


def left_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
Py_ssize_t max_groups, sort=True):
cdef:
Expand Down Expand Up @@ -117,14 +155,13 @@ def left_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
rev, _ = groupsort_indexer(left_indexer, len(left))

if rev.dtype != np.int_:
rev = rev.astype(np.int_)
rev = rev.astype(np.int_)
right_indexer = right_indexer.take(rev)
left_indexer = left_indexer.take(rev)

return left_indexer, right_indexer



def left_outer_asof_join(ndarray[int64_t] left, ndarray[int64_t] right,
Py_ssize_t max_groups, # ignored
bint allow_exact_matches=1,
Expand All @@ -140,7 +177,8 @@ def left_outer_asof_join(ndarray[int64_t] left, ndarray[int64_t] right,
int64_t tolerance_

# if we are using tolerance, set our objects
if left_values is not None and right_values is not None and tolerance is not None:
if (left_values is not None and right_values is not None and
tolerance is not None):
has_tolerance = 1
left_values_ = left_values
right_values_ = right_values
Expand All @@ -160,10 +198,12 @@ def left_outer_asof_join(ndarray[int64_t] left, ndarray[int64_t] right,

# find last position in right whose value is less than left's value
if allow_exact_matches:
while right_pos < right_size and right[right_pos] <= left[left_pos]:
while (right_pos < right_size and
right[right_pos] <= left[left_pos]):
right_pos += 1
else:
while right_pos < right_size and right[right_pos] < left[left_pos]:
while (right_pos < right_size and
right[right_pos] < left[left_pos]):
right_pos += 1
right_pos -= 1

Expand Down Expand Up @@ -243,7 +283,6 @@ def full_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
_get_result_indexer(right_sorter, right_indexer))



def _get_result_indexer(sorter, indexer):
if indexer.dtype != np.int_:
indexer = indexer.astype(np.int_)
Expand All @@ -258,7 +297,6 @@ def _get_result_indexer(sorter, indexer):
return res



def ffill_indexer(ndarray[int64_t] indexer):
cdef:
Py_ssize_t i, n = len(indexer)
Expand Down Expand Up @@ -301,3 +339,6 @@ def ffill_by_group(ndarray[int64_t] indexer, ndarray[int64_t] group_ids,
last_obs[gid] = val

return result


include "join_helper.pxi"
File renamed without changes.
File renamed without changes.
33 changes: 17 additions & 16 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import pandas.algos as _algos
from pandas.compat import lrange
import pandas.core.algorithms as algos
import pandas._join as _join
import pandas.util.testing as tm
import pandas.hashtable as hashtable
from pandas.compat.numpy import np_array_datetime64_compat
Expand Down Expand Up @@ -303,11 +304,11 @@ class TestIndexer(tm.TestCase):
_multiprocess_can_split_ = True

def test_outer_join_indexer(self):
typemap = [('int32', algos.algos.outer_join_indexer_int32),
('int64', algos.algos.outer_join_indexer_int64),
('float32', algos.algos.outer_join_indexer_float32),
('float64', algos.algos.outer_join_indexer_float64),
('object', algos.algos.outer_join_indexer_object)]
typemap = [('int32', _join.outer_join_indexer_int32),
('int64', _join.outer_join_indexer_int64),
('float32', _join.outer_join_indexer_float32),
('float64', _join.outer_join_indexer_float64),
('object', _join.outer_join_indexer_object)]

for dtype, indexer in typemap:
left = np.arange(3, dtype=dtype)
Expand Down Expand Up @@ -1070,7 +1071,7 @@ def test_left_join_indexer_unique():
a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
b = np.array([2, 2, 3, 4, 4], dtype=np.int64)

result = _algos.left_join_indexer_unique_int64(b, a)
result = _join.left_join_indexer_unique_int64(b, a)
expected = np.array([1, 1, 2, 3, 3], dtype=np.int64)
assert (np.array_equal(result, expected))

Expand All @@ -1086,7 +1087,7 @@ def test_left_outer_join_bug():
right = np.array([3, 1], dtype=np.int64)
max_groups = 4

lidx, ridx = _algos.left_outer_join(left, right, max_groups, sort=False)
lidx, ridx = _join.left_outer_join(left, right, max_groups, sort=False)

exp_lidx = np.arange(len(left))
exp_ridx = -np.ones(len(left))
Expand All @@ -1101,7 +1102,7 @@ def test_inner_join_indexer():
a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
b = np.array([0, 3, 5, 7, 9], dtype=np.int64)

index, ares, bres = _algos.inner_join_indexer_int64(a, b)
index, ares, bres = _join.inner_join_indexer_int64(a, b)

index_exp = np.array([3, 5], dtype=np.int64)
assert_almost_equal(index, index_exp)
Expand All @@ -1114,7 +1115,7 @@ def test_inner_join_indexer():
a = np.array([5], dtype=np.int64)
b = np.array([5], dtype=np.int64)

index, ares, bres = _algos.inner_join_indexer_int64(a, b)
index, ares, bres = _join.inner_join_indexer_int64(a, b)
tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64))
tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64))
tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64))
Expand All @@ -1124,7 +1125,7 @@ def test_outer_join_indexer():
a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
b = np.array([0, 3, 5, 7, 9], dtype=np.int64)

index, ares, bres = _algos.outer_join_indexer_int64(a, b)
index, ares, bres = _join.outer_join_indexer_int64(a, b)

index_exp = np.array([0, 1, 2, 3, 4, 5, 7, 9], dtype=np.int64)
assert_almost_equal(index, index_exp)
Expand All @@ -1137,7 +1138,7 @@ def test_outer_join_indexer():
a = np.array([5], dtype=np.int64)
b = np.array([5], dtype=np.int64)

index, ares, bres = _algos.outer_join_indexer_int64(a, b)
index, ares, bres = _join.outer_join_indexer_int64(a, b)
tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64))
tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64))
tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64))
Expand All @@ -1147,7 +1148,7 @@ def test_left_join_indexer():
a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
b = np.array([0, 3, 5, 7, 9], dtype=np.int64)

index, ares, bres = _algos.left_join_indexer_int64(a, b)
index, ares, bres = _join.left_join_indexer_int64(a, b)

assert_almost_equal(index, a)

Expand All @@ -1159,7 +1160,7 @@ def test_left_join_indexer():
a = np.array([5], dtype=np.int64)
b = np.array([5], dtype=np.int64)

index, ares, bres = _algos.left_join_indexer_int64(a, b)
index, ares, bres = _join.left_join_indexer_int64(a, b)
tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64))
tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64))
tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64))
Expand All @@ -1169,7 +1170,7 @@ def test_left_join_indexer2():
idx = Index([1, 1, 2, 5])
idx2 = Index([1, 2, 5, 7, 9])

res, lidx, ridx = _algos.left_join_indexer_int64(idx2.values, idx.values)
res, lidx, ridx = _join.left_join_indexer_int64(idx2.values, idx.values)

exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64)
assert_almost_equal(res, exp_res)
Expand All @@ -1185,7 +1186,7 @@ def test_outer_join_indexer2():
idx = Index([1, 1, 2, 5])
idx2 = Index([1, 2, 5, 7, 9])

res, lidx, ridx = _algos.outer_join_indexer_int64(idx2.values, idx.values)
res, lidx, ridx = _join.outer_join_indexer_int64(idx2.values, idx.values)

exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64)
assert_almost_equal(res, exp_res)
Expand All @@ -1201,7 +1202,7 @@ def test_inner_join_indexer2():
idx = Index([1, 1, 2, 5])
idx2 = Index([1, 2, 5, 7, 9])

res, lidx, ridx = _algos.inner_join_indexer_int64(idx2.values, idx.values)
res, lidx, ridx = _join.inner_join_indexer_int64(idx2.values, idx.values)

exp_res = np.array([1, 1, 2, 5], dtype=np.int64)
assert_almost_equal(res, exp_res)
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,9 @@ def test_float_panel(self):

@slow
def test_panel4d(self):
self.run_panel(tm.makePanel4D(), np.random.randn() + 0.5,
assert_func=assert_panel4d_equal, binary_comp=3)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
self.run_panel(tm.makePanel4D(), np.random.randn() + 0.5,
assert_func=assert_panel4d_equal, binary_comp=3)

def test_mixed_arithmetic_frame(self):
# TODO: FIGURE OUT HOW TO GET IT TO WORK...
Expand Down
20 changes: 10 additions & 10 deletions pandas/tools/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
import pandas.core.common as com
import pandas.types.concat as _concat

import pandas.algos as _algos
import pandas._join as _join
import pandas.hashtable as _hash


Expand Down Expand Up @@ -918,8 +918,8 @@ def get_result(self):
rdata.items, rsuf)

if self.fill_method == 'ffill':
left_join_indexer = _algos.ffill_indexer(left_indexer)
right_join_indexer = _algos.ffill_indexer(right_indexer)
left_join_indexer = _join.ffill_indexer(left_indexer)
right_join_indexer = _join.ffill_indexer(right_indexer)
else:
left_join_indexer = left_indexer
right_join_indexer = right_indexer
Expand Down Expand Up @@ -1094,13 +1094,13 @@ def _get_multiindex_indexer(join_keys, index, sort):
# factorize keys to a dense i8 space
lkey, rkey, count = fkeys(lkey, rkey)

return _algos.left_outer_join(lkey, rkey, count, sort=sort)
return _join.left_outer_join(lkey, rkey, count, sort=sort)


def _get_single_indexer(join_key, index, sort=False):
left_key, right_key, count = _factorize_keys(join_key, index, sort=sort)

left_indexer, right_indexer = _algos.left_outer_join(
left_indexer, right_indexer = _join.left_outer_join(
_ensure_int64(left_key),
_ensure_int64(right_key),
count, sort=sort)
Expand Down Expand Up @@ -1135,15 +1135,15 @@ def _left_join_on_index(left_ax, right_ax, join_keys, sort=False):


def _right_outer_join(x, y, max_groups):
right_indexer, left_indexer = _algos.left_outer_join(y, x, max_groups)
right_indexer, left_indexer = _join.left_outer_join(y, x, max_groups)
return left_indexer, right_indexer

_join_functions = {
'inner': _algos.inner_join,
'left': _algos.left_outer_join,
'inner': _join.inner_join,
'left': _join.left_outer_join,
'right': _right_outer_join,
'outer': _algos.full_outer_join,
'asof': _algos.left_outer_asof_join,
'outer': _join.full_outer_join,
'asof': _join.left_outer_asof_join,
}


Expand Down
Loading