Skip to content

Commit fa96250

Browse files
mrocklinjorisvandenbossche
authored andcommitted
Implement sjoin within with contains (geopandas#575)
We switch the sides of the join and use the complementary predicate. This seems to be significantly faster in the common case.
1 parent 5805073 commit fa96250

File tree

1 file changed

+18
-1
lines changed

1 file changed

+18
-1
lines changed

geopandas/tools/sjoin.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,11 +206,23 @@ def sjoin(left_df, right_df, op='intersects', how='inner',
206206
raise ValueError("'right_df' should be GeoDataFrame, got {}".format(
207207
type(right_df)))
208208

209-
allowed_hows = ['left', 'right', 'inner']
209+
allowed_hows = ('left', 'right', 'inner')
210210
if how not in allowed_hows:
211211
raise ValueError("`how` was \"%s\" but is expected to be in %s" %
212212
(how, allowed_hows))
213213

214+
original_op = op
215+
original_how = how
216+
if op == "within":
217+
# within implemented as the inverse of contains; swap names
218+
# This is done for efficiency reasons
219+
op = 'contains'
220+
left_df, right_df = right_df, left_df
221+
if how == 'left':
222+
how = 'right'
223+
elif how == 'right':
224+
how = 'left'
225+
214226
if left_df.crs != right_df.crs:
215227
warn(
216228
('CRS of frames being joined does not match!'
@@ -240,6 +252,11 @@ def sjoin(left_df, right_df, op='intersects', how='inner',
240252
left = left_df.take(left_indices)
241253
right = right_df.take(right_indices)
242254

255+
if original_op == 'within': # switch back
256+
left, right = right, left
257+
n_left, n_right = n_right, n_left
258+
how = original_how
259+
243260
if how in ('inner', 'left'):
244261
del right[right._geometry_column_name]
245262
index = left.index

0 commit comments

Comments
 (0)