Skip to content

Commit 07996a1

Browse files
committed
task: removed scheduled task support, which at some point was introduced to improve performance, but which now hinders performance, besides being unnecessary ;)
1 parent 57a4e09 commit 07996a1

File tree

2 files changed

+11
-65
lines changed

2 files changed

+11
-65
lines changed

lib/git/async/pool.py

+11-32
Original file line numberDiff line numberDiff line change
@@ -80,27 +80,21 @@ def read(self, count=0, block=True, timeout=None):
8080
self._pre_cb()
8181
# END pre callback
8282

83-
# if we have count items, don't do any queue preparation - if someone
84-
# depletes the queue in the meanwhile, the channel will close and
85-
# we will unblock naturally
86-
# PROBLEM: If there are multiple consumer of this channel, we might
87-
# run out of items without being replenished == block forever in the
88-
# worst case. task.min_count could have triggered to produce more ...
89-
# usually per read with n items, we put n items on to the queue,
90-
# so we wouldn't check this
91-
# Even if we have just one consumer ( we could determine that with
92-
# the reference count ), it could be that in one moment we don't yet
93-
# have an item, but its currently being produced by some worker.
94-
# This is why we:
95-
# * make no assumptions if there are multiple consumers
96-
# *
83+
# NOTE: we always queue the operation that would give us count items
84+
# as tracking the scheduled items or testing the channels size
85+
# is in herently unsafe depending on the design of the task network
86+
# If we put on tasks onto the queue for every request, we are sure
87+
# to always produce enough items, even if the task.min_count actually
88+
# provided enough - its better to have some possibly empty task runs
89+
# than having and empty queue that blocks.
90+
91+
# NOTE: TODO: that case is only possible if one Task could be connected
92+
# to multiple input channels in a manner known by the system. Currently
93+
# this is not possible, but should be implemented at some point
9794

9895
# if the user tries to use us to read from a done task, we will never
9996
# compute as all produced items are already in the channel
10097
skip_compute = self._task.is_done() or self._task.error()
101-
#if count > 0:
102-
# skip_compute = self._task.scheduled_item_count() >= count or self._wc._queue.qsize() >= count
103-
# END
10498

10599
########## prepare ##############################
106100
if not skip_compute:
@@ -249,13 +243,6 @@ def _prepare_channel_read(self, task, count):
249243
# raise AssertionError("Shouldn't have consumed tasks on the pool, they delete themeselves, what happend ?")
250244
# END skip processing
251245

252-
# if the task does not have the required output on its queue, schedule
253-
# it for processing. If we should process all, we don't care about the
254-
# amount as it should process until its all done.
255-
#if count > 1 and task._out_wc.size() >= count:
256-
# continue
257-
# END skip if we have enough
258-
259246
# but use the actual count to produce the output, we may produce
260247
# more than requested
261248
numchunks = 1
@@ -283,33 +270,26 @@ def _prepare_channel_read(self, task, count):
283270
queue = self._queue
284271
if numchunks > 1:
285272
for i in xrange(numchunks):
286-
# schedule them as early as we know about them
287-
task.add_scheduled_items(chunksize)
288273
queue.put((task.process, chunksize))
289274
# END for each chunk to put
290275
else:
291-
task.add_scheduled_items(chunksize)
292276
queue.put((task.process, chunksize))
293277
# END try efficient looping
294278

295279
if remainder:
296-
task.add_scheduled_items(remainder)
297280
queue.put((task.process, remainder))
298281
# END handle chunksize
299282
else:
300283
# no workers, so we have to do the work ourselves
301284
if numchunks > 1:
302285
for i in xrange(numchunks):
303-
task.add_scheduled_items(chunksize)
304286
task.process(chunksize)
305287
# END for each chunk to put
306288
else:
307-
task.add_scheduled_items(chunksize)
308289
task.process(chunksize)
309290
# END try efficient looping
310291

311292
if remainder:
312-
task.add_scheduled_items(remainder)
313293
task.process(remainder)
314294
# END handle chunksize
315295
# END handle serial mode
@@ -452,7 +432,6 @@ def add_task(self, task):
452432
# This brings about 15% more performance, but sacrifices thread-safety
453433
# when reading from multiple threads.
454434
if self.size() == 0:
455-
task._slock = DummyLock()
456435
wctype = SerialWChannel
457436
# END improve locks
458437

lib/git/async/task.py

-33
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ class OutputChannelTask(Node):
2323
'_out_wc', # output write channel
2424
'_exc', # exception caught
2525
'_done', # True if we are done
26-
'_scheduled_items', # amount of scheduled items that will be processed in total
27-
'_slock', # lock for scheduled items
2826
'fun', # function to call with items read
2927
'min_count', # minimum amount of items to produce, None means no override
3028
'max_chunksize', # maximium amount of items to process per process call
@@ -37,8 +35,6 @@ def __init__(self, id, fun, apply_single=True, min_count=None, max_chunksize=0):
3735
self._out_wc = None # to be set later
3836
self._exc = None
3937
self._done = False
40-
self._scheduled_items = 0
41-
self._slock = threading.Lock()
4238
self.fun = fun
4339
self.min_count = None
4440
self.max_chunksize = 0 # note set
@@ -72,21 +68,6 @@ def error(self):
7268
""":return: Exception caught during last processing or None"""
7369
return self._exc
7470

75-
def add_scheduled_items(self, count):
76-
"""Add the given amount of scheduled items to this task"""
77-
self._slock.acquire()
78-
self._scheduled_items += count
79-
self._slock.release()
80-
81-
def scheduled_item_count(self):
82-
""":return: amount of scheduled items for this task"""
83-
self._slock.acquire()
84-
try:
85-
return self._scheduled_items
86-
finally:
87-
self._slock.release()
88-
# END threadsafe return
89-
9071
def process(self, count=0):
9172
"""Process count items and send the result individually to the output channel"""
9273
items = self._read(count)
@@ -101,19 +82,12 @@ def process(self, count=0):
10182
if self.apply_single:
10283
for item in items:
10384
rval = self.fun(item)
104-
# decrement afterwards, the its unscheduled once its produced
105-
self._slock.acquire()
106-
self._scheduled_items -= 1
107-
self._slock.release()
10885
wc.write(rval)
10986
# END for each item
11087
else:
11188
# shouldn't apply single be the default anyway ?
11289
# The task designers should chunk them up in advance
11390
rvals = self.fun(items)
114-
self._slock.acquire()
115-
self._scheduled_items -= len(items)
116-
self._slock.release()
11791
for rval in rvals:
11892
wc.write(rval)
11993
# END handle single apply
@@ -122,13 +96,6 @@ def process(self, count=0):
12296

12397
# be sure our task is not scheduled again
12498
self.set_done()
125-
# unschedule all, we don't know how many have been produced actually
126-
# but only if we don't apply single please
127-
if not self.apply_single:
128-
self._slock.acquire()
129-
self._scheduled_items -= len(items)
130-
self._slock.release()
131-
# END unschedule all
13299

133100
# PROBLEM: We have failed to create at least one item, hence its not
134101
# garantueed that enough items will be produced for a possibly blocking

0 commit comments

Comments
 (0)