Skip to content

Commit 55a0c2e

Browse files
authored
BLD: split join.pyx from algos.pyx (#13925)
closes #13921
1 parent 3186fef commit 55a0c2e

15 files changed

+113
-68
lines changed

ci/lint.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ if [ "$LINT" ]; then
2020
echo "Linting *.py DONE"
2121

2222
echo "Linting *.pyx"
23-
for path in 'window.pyx'
23+
for path in 'window.pyx' "src/join.pyx"
2424
do
2525
echo "linting -> pandas/$path"
2626
flake8 pandas/$path --filename '*.pyx' --select=E501,E302,E203,E226,E111,E114,E221,E303,E128,E231,E126

pandas/algos.pyx

-3
Original file line numberDiff line numberDiff line change
@@ -1340,10 +1340,7 @@ cdef inline float64_t _median_linear(float64_t* a, int n):
13401340
return result
13411341

13421342

1343-
include "join.pyx"
1344-
13451343
# generated from template
13461344
include "algos_common_helper.pxi"
13471345
include "algos_groupby_helper.pxi"
1348-
include "algos_join_helper.pxi"
13491346
include "algos_take_helper.pxi"

pandas/indexes/base.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import numpy as np
66
import pandas.tslib as tslib
77
import pandas.lib as lib
8+
import pandas._join as _join
89
import pandas.algos as _algos
910
import pandas.index as _index
1011
from pandas.lib import Timestamp, Timedelta, is_datetime_array
@@ -110,10 +111,10 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):
110111
# Cython methods
111112
_groupby = _algos.groupby_object
112113
_arrmap = _algos.arrmap_object
113-
_left_indexer_unique = _algos.left_join_indexer_unique_object
114-
_left_indexer = _algos.left_join_indexer_object
115-
_inner_indexer = _algos.inner_join_indexer_object
116-
_outer_indexer = _algos.outer_join_indexer_object
114+
_left_indexer_unique = _join.left_join_indexer_unique_object
115+
_left_indexer = _join.left_join_indexer_object
116+
_inner_indexer = _join.inner_join_indexer_object
117+
_outer_indexer = _join.outer_join_indexer_object
117118
_box_scalars = False
118119

119120
_typ = 'index'

pandas/indexes/numeric.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import numpy as np
22
import pandas.lib as lib
3+
import pandas._join as _join
34
import pandas.algos as _algos
45
import pandas.index as _index
56

@@ -114,10 +115,10 @@ class Int64Index(NumericIndex):
114115
_typ = 'int64index'
115116
_groupby = _algos.groupby_int64
116117
_arrmap = _algos.arrmap_int64
117-
_left_indexer_unique = _algos.left_join_indexer_unique_int64
118-
_left_indexer = _algos.left_join_indexer_int64
119-
_inner_indexer = _algos.inner_join_indexer_int64
120-
_outer_indexer = _algos.outer_join_indexer_int64
118+
_left_indexer_unique = _join.left_join_indexer_unique_int64
119+
_left_indexer = _join.left_join_indexer_int64
120+
_inner_indexer = _join.inner_join_indexer_int64
121+
_outer_indexer = _join.outer_join_indexer_int64
121122

122123
_can_hold_na = False
123124

@@ -211,10 +212,10 @@ class Float64Index(NumericIndex):
211212
_engine_type = _index.Float64Engine
212213
_groupby = _algos.groupby_float64
213214
_arrmap = _algos.arrmap_float64
214-
_left_indexer_unique = _algos.left_join_indexer_unique_float64
215-
_left_indexer = _algos.left_join_indexer_float64
216-
_inner_indexer = _algos.inner_join_indexer_float64
217-
_outer_indexer = _algos.outer_join_indexer_float64
215+
_left_indexer_unique = _join.left_join_indexer_unique_float64
216+
_left_indexer = _join.left_join_indexer_float64
217+
_inner_indexer = _join.inner_join_indexer_float64
218+
_outer_indexer = _join.outer_join_indexer_float64
218219

219220
_default_dtype = np.float64
220221

pandas/src/join.pyx

+48-7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,40 @@
1+
# cython: profile=False
2+
3+
from numpy cimport *
4+
cimport numpy as np
5+
import numpy as np
6+
7+
cimport cython
8+
9+
import_array()
10+
11+
cimport util
12+
13+
from numpy cimport NPY_INT8 as NPY_int8
14+
from numpy cimport NPY_INT16 as NPY_int16
15+
from numpy cimport NPY_INT32 as NPY_int32
16+
from numpy cimport NPY_INT64 as NPY_int64
17+
from numpy cimport NPY_FLOAT16 as NPY_float16
18+
from numpy cimport NPY_FLOAT32 as NPY_float32
19+
from numpy cimport NPY_FLOAT64 as NPY_float64
20+
21+
from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
22+
uint32_t, uint64_t, float16_t, float32_t, float64_t)
23+
24+
int8 = np.dtype(np.int8)
25+
int16 = np.dtype(np.int16)
26+
int32 = np.dtype(np.int32)
27+
int64 = np.dtype(np.int64)
28+
float16 = np.dtype(np.float16)
29+
float32 = np.dtype(np.float32)
30+
float64 = np.dtype(np.float64)
31+
32+
cdef double NaN = <double> np.NaN
33+
cdef double nan = NaN
34+
35+
from pandas.algos import groupsort_indexer
36+
37+
138
def inner_join(ndarray[int64_t] left, ndarray[int64_t] right,
239
Py_ssize_t max_groups):
340
cdef:
@@ -48,6 +85,7 @@ def inner_join(ndarray[int64_t] left, ndarray[int64_t] right,
4885
return (_get_result_indexer(left_sorter, left_indexer),
4986
_get_result_indexer(right_sorter, right_indexer))
5087

88+
5189
def left_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
5290
Py_ssize_t max_groups, sort=True):
5391
cdef:
@@ -117,14 +155,13 @@ def left_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
117155
rev, _ = groupsort_indexer(left_indexer, len(left))
118156

119157
if rev.dtype != np.int_:
120-
rev = rev.astype(np.int_)
158+
rev = rev.astype(np.int_)
121159
right_indexer = right_indexer.take(rev)
122160
left_indexer = left_indexer.take(rev)
123161

124162
return left_indexer, right_indexer
125163

126164

127-
128165
def left_outer_asof_join(ndarray[int64_t] left, ndarray[int64_t] right,
129166
Py_ssize_t max_groups, # ignored
130167
bint allow_exact_matches=1,
@@ -140,7 +177,8 @@ def left_outer_asof_join(ndarray[int64_t] left, ndarray[int64_t] right,
140177
int64_t tolerance_
141178

142179
# if we are using tolerance, set our objects
143-
if left_values is not None and right_values is not None and tolerance is not None:
180+
if (left_values is not None and right_values is not None and
181+
tolerance is not None):
144182
has_tolerance = 1
145183
left_values_ = left_values
146184
right_values_ = right_values
@@ -160,10 +198,12 @@ def left_outer_asof_join(ndarray[int64_t] left, ndarray[int64_t] right,
160198

161199
# find last position in right whose value is less than left's value
162200
if allow_exact_matches:
163-
while right_pos < right_size and right[right_pos] <= left[left_pos]:
201+
while (right_pos < right_size and
202+
right[right_pos] <= left[left_pos]):
164203
right_pos += 1
165204
else:
166-
while right_pos < right_size and right[right_pos] < left[left_pos]:
205+
while (right_pos < right_size and
206+
right[right_pos] < left[left_pos]):
167207
right_pos += 1
168208
right_pos -= 1
169209

@@ -243,7 +283,6 @@ def full_outer_join(ndarray[int64_t] left, ndarray[int64_t] right,
243283
_get_result_indexer(right_sorter, right_indexer))
244284

245285

246-
247286
def _get_result_indexer(sorter, indexer):
248287
if indexer.dtype != np.int_:
249288
indexer = indexer.astype(np.int_)
@@ -258,7 +297,6 @@ def _get_result_indexer(sorter, indexer):
258297
return res
259298

260299

261-
262300
def ffill_indexer(ndarray[int64_t] indexer):
263301
cdef:
264302
Py_ssize_t i, n = len(indexer)
@@ -301,3 +339,6 @@ def ffill_by_group(ndarray[int64_t] indexer, ndarray[int64_t] group_ids,
301339
last_obs[gid] = val
302340

303341
return result
342+
343+
344+
include "join_helper.pxi"
File renamed without changes.
File renamed without changes.

pandas/tests/test_algos.py

+17-16
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import pandas.algos as _algos
1313
from pandas.compat import lrange
1414
import pandas.core.algorithms as algos
15+
import pandas._join as _join
1516
import pandas.util.testing as tm
1617
import pandas.hashtable as hashtable
1718
from pandas.compat.numpy import np_array_datetime64_compat
@@ -303,11 +304,11 @@ class TestIndexer(tm.TestCase):
303304
_multiprocess_can_split_ = True
304305

305306
def test_outer_join_indexer(self):
306-
typemap = [('int32', algos.algos.outer_join_indexer_int32),
307-
('int64', algos.algos.outer_join_indexer_int64),
308-
('float32', algos.algos.outer_join_indexer_float32),
309-
('float64', algos.algos.outer_join_indexer_float64),
310-
('object', algos.algos.outer_join_indexer_object)]
307+
typemap = [('int32', _join.outer_join_indexer_int32),
308+
('int64', _join.outer_join_indexer_int64),
309+
('float32', _join.outer_join_indexer_float32),
310+
('float64', _join.outer_join_indexer_float64),
311+
('object', _join.outer_join_indexer_object)]
311312

312313
for dtype, indexer in typemap:
313314
left = np.arange(3, dtype=dtype)
@@ -1070,7 +1071,7 @@ def test_left_join_indexer_unique():
10701071
a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
10711072
b = np.array([2, 2, 3, 4, 4], dtype=np.int64)
10721073

1073-
result = _algos.left_join_indexer_unique_int64(b, a)
1074+
result = _join.left_join_indexer_unique_int64(b, a)
10741075
expected = np.array([1, 1, 2, 3, 3], dtype=np.int64)
10751076
assert (np.array_equal(result, expected))
10761077

@@ -1086,7 +1087,7 @@ def test_left_outer_join_bug():
10861087
right = np.array([3, 1], dtype=np.int64)
10871088
max_groups = 4
10881089

1089-
lidx, ridx = _algos.left_outer_join(left, right, max_groups, sort=False)
1090+
lidx, ridx = _join.left_outer_join(left, right, max_groups, sort=False)
10901091

10911092
exp_lidx = np.arange(len(left))
10921093
exp_ridx = -np.ones(len(left))
@@ -1101,7 +1102,7 @@ def test_inner_join_indexer():
11011102
a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
11021103
b = np.array([0, 3, 5, 7, 9], dtype=np.int64)
11031104

1104-
index, ares, bres = _algos.inner_join_indexer_int64(a, b)
1105+
index, ares, bres = _join.inner_join_indexer_int64(a, b)
11051106

11061107
index_exp = np.array([3, 5], dtype=np.int64)
11071108
assert_almost_equal(index, index_exp)
@@ -1114,7 +1115,7 @@ def test_inner_join_indexer():
11141115
a = np.array([5], dtype=np.int64)
11151116
b = np.array([5], dtype=np.int64)
11161117

1117-
index, ares, bres = _algos.inner_join_indexer_int64(a, b)
1118+
index, ares, bres = _join.inner_join_indexer_int64(a, b)
11181119
tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64))
11191120
tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64))
11201121
tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64))
@@ -1124,7 +1125,7 @@ def test_outer_join_indexer():
11241125
a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
11251126
b = np.array([0, 3, 5, 7, 9], dtype=np.int64)
11261127

1127-
index, ares, bres = _algos.outer_join_indexer_int64(a, b)
1128+
index, ares, bres = _join.outer_join_indexer_int64(a, b)
11281129

11291130
index_exp = np.array([0, 1, 2, 3, 4, 5, 7, 9], dtype=np.int64)
11301131
assert_almost_equal(index, index_exp)
@@ -1137,7 +1138,7 @@ def test_outer_join_indexer():
11371138
a = np.array([5], dtype=np.int64)
11381139
b = np.array([5], dtype=np.int64)
11391140

1140-
index, ares, bres = _algos.outer_join_indexer_int64(a, b)
1141+
index, ares, bres = _join.outer_join_indexer_int64(a, b)
11411142
tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64))
11421143
tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64))
11431144
tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64))
@@ -1147,7 +1148,7 @@ def test_left_join_indexer():
11471148
a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
11481149
b = np.array([0, 3, 5, 7, 9], dtype=np.int64)
11491150

1150-
index, ares, bres = _algos.left_join_indexer_int64(a, b)
1151+
index, ares, bres = _join.left_join_indexer_int64(a, b)
11511152

11521153
assert_almost_equal(index, a)
11531154

@@ -1159,7 +1160,7 @@ def test_left_join_indexer():
11591160
a = np.array([5], dtype=np.int64)
11601161
b = np.array([5], dtype=np.int64)
11611162

1162-
index, ares, bres = _algos.left_join_indexer_int64(a, b)
1163+
index, ares, bres = _join.left_join_indexer_int64(a, b)
11631164
tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64))
11641165
tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64))
11651166
tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64))
@@ -1169,7 +1170,7 @@ def test_left_join_indexer2():
11691170
idx = Index([1, 1, 2, 5])
11701171
idx2 = Index([1, 2, 5, 7, 9])
11711172

1172-
res, lidx, ridx = _algos.left_join_indexer_int64(idx2.values, idx.values)
1173+
res, lidx, ridx = _join.left_join_indexer_int64(idx2.values, idx.values)
11731174

11741175
exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64)
11751176
assert_almost_equal(res, exp_res)
@@ -1185,7 +1186,7 @@ def test_outer_join_indexer2():
11851186
idx = Index([1, 1, 2, 5])
11861187
idx2 = Index([1, 2, 5, 7, 9])
11871188

1188-
res, lidx, ridx = _algos.outer_join_indexer_int64(idx2.values, idx.values)
1189+
res, lidx, ridx = _join.outer_join_indexer_int64(idx2.values, idx.values)
11891190

11901191
exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64)
11911192
assert_almost_equal(res, exp_res)
@@ -1201,7 +1202,7 @@ def test_inner_join_indexer2():
12011202
idx = Index([1, 1, 2, 5])
12021203
idx2 = Index([1, 2, 5, 7, 9])
12031204

1204-
res, lidx, ridx = _algos.inner_join_indexer_int64(idx2.values, idx.values)
1205+
res, lidx, ridx = _join.inner_join_indexer_int64(idx2.values, idx.values)
12051206

12061207
exp_res = np.array([1, 1, 2, 5], dtype=np.int64)
12071208
assert_almost_equal(res, exp_res)

pandas/tests/test_expressions.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -208,8 +208,9 @@ def test_float_panel(self):
208208

209209
@slow
210210
def test_panel4d(self):
211-
self.run_panel(tm.makePanel4D(), np.random.randn() + 0.5,
212-
assert_func=assert_panel4d_equal, binary_comp=3)
211+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
212+
self.run_panel(tm.makePanel4D(), np.random.randn() + 0.5,
213+
assert_func=assert_panel4d_equal, binary_comp=3)
213214

214215
def test_mixed_arithmetic_frame(self):
215216
# TODO: FIGURE OUT HOW TO GET IT TO WORK...

pandas/tools/merge.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
import pandas.core.common as com
4040
import pandas.types.concat as _concat
4141

42-
import pandas.algos as _algos
42+
import pandas._join as _join
4343
import pandas.hashtable as _hash
4444

4545

@@ -918,8 +918,8 @@ def get_result(self):
918918
rdata.items, rsuf)
919919

920920
if self.fill_method == 'ffill':
921-
left_join_indexer = _algos.ffill_indexer(left_indexer)
922-
right_join_indexer = _algos.ffill_indexer(right_indexer)
921+
left_join_indexer = _join.ffill_indexer(left_indexer)
922+
right_join_indexer = _join.ffill_indexer(right_indexer)
923923
else:
924924
left_join_indexer = left_indexer
925925
right_join_indexer = right_indexer
@@ -1094,13 +1094,13 @@ def _get_multiindex_indexer(join_keys, index, sort):
10941094
# factorize keys to a dense i8 space
10951095
lkey, rkey, count = fkeys(lkey, rkey)
10961096

1097-
return _algos.left_outer_join(lkey, rkey, count, sort=sort)
1097+
return _join.left_outer_join(lkey, rkey, count, sort=sort)
10981098

10991099

11001100
def _get_single_indexer(join_key, index, sort=False):
11011101
left_key, right_key, count = _factorize_keys(join_key, index, sort=sort)
11021102

1103-
left_indexer, right_indexer = _algos.left_outer_join(
1103+
left_indexer, right_indexer = _join.left_outer_join(
11041104
_ensure_int64(left_key),
11051105
_ensure_int64(right_key),
11061106
count, sort=sort)
@@ -1135,15 +1135,15 @@ def _left_join_on_index(left_ax, right_ax, join_keys, sort=False):
11351135

11361136

11371137
def _right_outer_join(x, y, max_groups):
1138-
right_indexer, left_indexer = _algos.left_outer_join(y, x, max_groups)
1138+
right_indexer, left_indexer = _join.left_outer_join(y, x, max_groups)
11391139
return left_indexer, right_indexer
11401140

11411141
_join_functions = {
1142-
'inner': _algos.inner_join,
1143-
'left': _algos.left_outer_join,
1142+
'inner': _join.inner_join,
1143+
'left': _join.left_outer_join,
11441144
'right': _right_outer_join,
1145-
'outer': _algos.full_outer_join,
1146-
'asof': _algos.left_outer_asof_join,
1145+
'outer': _join.full_outer_join,
1146+
'asof': _join.left_outer_asof_join,
11471147
}
11481148

11491149

0 commit comments

Comments
 (0)