Skip to content

Commit ad3428e

Browse files
committed
Merge tag '1.41.0' into cdb
Version 1.41.0 2016-10-21 Announcements: * Stop migrating old queues by default. Bug fixes: * Fix some scenarios where batch queries got stuck waiting for available slots.
2 parents 10776a0 + 17ab40f commit ad3428e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+2455
-652
lines changed

NEWS.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,43 @@
1+
1.41.0 - 2016-10-21
2+
-------------------
3+
4+
Announcements:
5+
* Stop migrating old queues by default.
6+
7+
Bug fixes:
8+
* Fix some scenarios where batch queries got stuck waiting for available slots.
9+
10+
11+
1.40.0 - 2016-10-20
12+
-------------------
13+
14+
New features:
15+
* Batch queries are handled per db host.
16+
- There is an scheduler controlling how many queries and in what order they are run.
17+
- Priority is based on: number of queries already ran, and oldest user in queue.
18+
* Batch queries capacity: allow to configure how many jobs to run per db host.
19+
20+
21+
1.39.1 - 2016-10-17
22+
-------------------
23+
24+
Enhancements:
25+
* Log creation and waiting time for fallback jobs' queries.
26+
27+
28+
1.39.0 - 2016-10-17
29+
-------------------
30+
31+
Enhancements:
32+
* Use just one Redis pool across the whole application.
33+
34+
New features:
35+
* Batch queries use per user-queues.
36+
* Batch queries queues can limit the number of queued jobs per user.
37+
- Default is 64 jobs.
38+
- Configuration key `batch_max_queued_jobs` allows to modify the limit.
39+
40+
141
1.38.2 - 2016-10-13
242
-------------------
343

app/server.js

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ var _ = require('underscore');
2323
var LRU = require('lru-cache');
2424

2525
var RedisPool = require('redis-mpool');
26+
var cartodbRedis = require('cartodb-redis');
2627
var UserDatabaseService = require('./services/user_database_service');
2728
var JobPublisher = require('../batch/pubsub/job-publisher');
2829
var JobQueue = require('../batch/job_queue');
@@ -53,14 +54,15 @@ function App() {
5354

5455
var app = express();
5556

56-
var redisConfig = {
57+
var redisPool = new RedisPool({
58+
name: 'sql-api',
5759
host: global.settings.redis_host,
5860
port: global.settings.redis_port,
5961
max: global.settings.redisPool,
6062
idleTimeoutMillis: global.settings.redisIdleTimeoutMillis,
6163
reapIntervalMillis: global.settings.redisReapIntervalMillis
62-
};
63-
var metadataBackend = require('cartodb-redis')(redisConfig);
64+
});
65+
var metadataBackend = cartodbRedis({ pool: redisPool });
6466

6567

6668
// Set default configuration
@@ -181,8 +183,7 @@ function App() {
181183

182184
var userDatabaseService = new UserDatabaseService(metadataBackend);
183185

184-
var redisPoolPublisher = new RedisPool(_.extend(redisConfig, { name: 'job-publisher'}));
185-
var jobPublisher = new JobPublisher(redisPoolPublisher);
186+
var jobPublisher = new JobPublisher(redisPool);
186187
var jobQueue = new JobQueue(metadataBackend, jobPublisher);
187188
var jobBackend = new JobBackend(metadataBackend, jobQueue);
188189
var userDatabaseMetadataService = new UserDatabaseMetadataService(metadataBackend);
@@ -212,7 +213,7 @@ function App() {
212213
if (global.settings.environment !== 'test' && isBatchProcess) {
213214
var batchName = global.settings.api_hostname || 'batch';
214215
app.batch = batchFactory(
215-
metadataBackend, redisConfig, batchName, statsd_client, global.settings.batch_log_filename
216+
metadataBackend, redisPool, batchName, statsd_client, global.settings.batch_log_filename
216217
);
217218
app.batch.start();
218219
}

batch/README.md

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,40 @@
11
# Batch Queries
22

3-
This document describes the currently supported query types, and what they are missing in terms of features.
3+
This document describes features from Batch Queries, it also details some internals that might be useful for maintainers
4+
and developers.
5+
6+
7+
## Redis data structures
8+
9+
### Jobs definition
10+
11+
Redis Hash: `batch:jobs:{UUID}`.
12+
13+
Redis DB: 5.
14+
15+
It stores the job definition, the user, and some metadata like the final status, the failure reason, and so.
16+
17+
### Job queues
18+
19+
Redis List: `batch:queue:{username}`.
20+
21+
Redis DB: 5.
22+
23+
It stores a pending list of jobs per user. It points to a job definition with the `{UUID}`.
24+
25+
### Job notifications
26+
27+
Redis Pub/Sub channel: `batch:users`.
28+
29+
Redis DB: 0.
30+
31+
In order to notify new jobs, it uses a Pub/Sub channel were the username for the queued job is published.
32+
433

534
## Job types
635

36+
Format for the currently supported query types, and what they are missing in terms of features.
37+
738
### Simple
839

940
```json

batch/batch.js

Lines changed: 47 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
var util = require('util');
44
var EventEmitter = require('events').EventEmitter;
55
var debug = require('./util/debug')('batch');
6-
var forever = require('./util/forever');
76
var queue = require('queue-async');
8-
var Locker = require('./leader/locker');
7+
var HostScheduler = require('./scheduler/host-scheduler');
98

10-
function Batch(name, jobSubscriber, jobQueue, jobRunner, jobService, jobPublisher, redisConfig, logger) {
9+
var EMPTY_QUEUE = true;
10+
11+
function Batch(name, jobSubscriber, jobQueue, jobRunner, jobService, jobPublisher, redisPool, logger) {
1112
EventEmitter.call(this);
1213
this.name = name || 'batch';
1314
this.jobSubscriber = jobSubscriber;
@@ -16,10 +17,10 @@ function Batch(name, jobSubscriber, jobQueue, jobRunner, jobService, jobPublishe
1617
this.jobService = jobService;
1718
this.jobPublisher = jobPublisher;
1819
this.logger = logger;
19-
this.locker = Locker.create('redis-distlock', { redisConfig: redisConfig });
20+
this.hostScheduler = new HostScheduler(name, { run: this.processJob.bind(this) }, redisPool);
2021

21-
// map: host => jobId
22-
this.workingQueues = {};
22+
// map: user => jobId. Will be used for draining jobs.
23+
this.workInProgressJobs = {};
2324
}
2425
util.inherits(Batch, EventEmitter);
2526

@@ -29,33 +30,16 @@ Batch.prototype.start = function () {
2930
var self = this;
3031

3132
this.jobSubscriber.subscribe(
32-
function onJobHandler(host) {
33-
if (self.isProcessingHost(host)) {
34-
return debug('%s is already processing host=%s', self.name, host);
35-
}
36-
37-
// do forever, it does not throw a stack overflow
38-
forever(
39-
function (next) {
40-
self.locker.lock(host, function(err) {
41-
// we didn't get the lock for the host
42-
if (err) {
43-
debug('Could not lock host=%s from %s. Reason: %s', host, self.name, err.message);
44-
return next(err);
45-
}
46-
debug('Locked host=%s from %s', host, self.name);
47-
self.processNextJob(host, next);
48-
});
49-
},
50-
function (err) {
51-
if (err) {
52-
debug(err.name === 'EmptyQueue' ? err.message : err);
53-
}
54-
55-
self.finishedProcessingHost(host);
56-
self.locker.unlock(host, debug);
33+
function onJobHandler(user, host) {
34+
debug('[%s] onJobHandler(%s, %s)', self.name, user, host);
35+
self.hostScheduler.add(host, user, function(err) {
36+
if (err) {
37+
return debug(
38+
'Could not schedule host=%s user=%s from %s. Reason: %s',
39+
host, self.name, user, err.message
40+
);
5741
}
58-
);
42+
});
5943
},
6044
function onJobSubscriberReady(err) {
6145
if (err) {
@@ -67,50 +51,49 @@ Batch.prototype.start = function () {
6751
);
6852
};
6953

70-
Batch.prototype.processNextJob = function (host, callback) {
54+
Batch.prototype.processJob = function (user, callback) {
7155
var self = this;
72-
self.jobQueue.dequeue(host, function (err, jobId) {
56+
self.jobQueue.dequeue(user, function (err, jobId) {
7357
if (err) {
74-
return callback(err);
58+
return callback(new Error('Could not get job from "' + user + '". Reason: ' + err.message), !EMPTY_QUEUE);
7559
}
7660

7761
if (!jobId) {
78-
var emptyQueueError = new Error('Queue ' + host + ' is empty');
79-
emptyQueueError.name = 'EmptyQueue';
80-
return callback(emptyQueueError);
62+
debug('Queue empty user=%s', user);
63+
return callback(null, EMPTY_QUEUE);
8164
}
8265

83-
self.setProcessingJobId(host, jobId);
84-
66+
self.setWorkInProgressJob(user, jobId);
8567
self.jobRunner.run(jobId, function (err, job) {
86-
self.setProcessingJobId(host, null);
68+
self.clearWorkInProgressJob(user);
8769

8870
if (err) {
8971
debug(err);
9072
if (err.name === 'JobNotRunnable') {
91-
return callback();
73+
return callback(null, !EMPTY_QUEUE);
9274
}
93-
return callback(err);
75+
return callback(err, !EMPTY_QUEUE);
9476
}
9577

96-
debug('Job[%s] status=%s in host=%s (failed_reason=%s)', jobId, job.data.status, host, job.failed_reason);
78+
debug(
79+
'[%s] Job=%s status=%s user=%s (failed_reason=%s)',
80+
self.name, jobId, job.data.status, user, job.failed_reason
81+
);
9782

9883
self.logger.log(job);
9984

100-
self.emit('job:' + job.data.status, jobId);
101-
102-
callback();
85+
return callback(null, !EMPTY_QUEUE);
10386
});
10487
});
10588
};
10689

10790
Batch.prototype.drain = function (callback) {
10891
var self = this;
109-
var workingHosts = this.getWorkingHosts();
110-
var batchQueues = queue(workingHosts.length);
92+
var workingUsers = this.getWorkInProgressUsers();
93+
var batchQueues = queue(workingUsers.length);
11194

112-
workingHosts.forEach(function (host) {
113-
batchQueues.defer(self._drainJob.bind(self), host);
95+
workingUsers.forEach(function (user) {
96+
batchQueues.defer(self._drainJob.bind(self), user);
11497
});
11598

11699
batchQueues.awaitAll(function (err) {
@@ -124,9 +107,9 @@ Batch.prototype.drain = function (callback) {
124107
});
125108
};
126109

127-
Batch.prototype._drainJob = function (host, callback) {
110+
Batch.prototype._drainJob = function (user, callback) {
128111
var self = this;
129-
var job_id = this.getProcessingJobId(host);
112+
var job_id = this.getWorkInProgressJob(user);
130113

131114
if (!job_id) {
132115
return process.nextTick(function () {
@@ -143,7 +126,7 @@ Batch.prototype._drainJob = function (host, callback) {
143126
return callback(err);
144127
}
145128

146-
self.jobQueue.enqueueFirst(job_id, host, callback);
129+
self.jobQueue.enqueueFirst(user, job_id, callback);
147130
});
148131
};
149132

@@ -152,22 +135,21 @@ Batch.prototype.stop = function (callback) {
152135
this.jobSubscriber.unsubscribe(callback);
153136
};
154137

155-
Batch.prototype.isProcessingHost = function(host) {
156-
return this.workingQueues.hasOwnProperty(host);
157-
};
158138

159-
Batch.prototype.getWorkingHosts = function() {
160-
return Object.keys(this.workingQueues);
139+
/* Work in progress jobs */
140+
141+
Batch.prototype.setWorkInProgressJob = function(user, jobId) {
142+
this.workInProgressJobs[user] = jobId;
161143
};
162144

163-
Batch.prototype.setProcessingJobId = function(host, jobId) {
164-
this.workingQueues[host] = jobId;
145+
Batch.prototype.getWorkInProgressJob = function(user) {
146+
return this.workInProgressJobs[user];
165147
};
166148

167-
Batch.prototype.getProcessingJobId = function(host) {
168-
return this.workingQueues[host];
149+
Batch.prototype.clearWorkInProgressJob = function(user) {
150+
delete this.workInProgressJobs[user];
169151
};
170152

171-
Batch.prototype.finishedProcessingHost = function(host) {
172-
delete this.workingQueues[host];
153+
Batch.prototype.getWorkInProgressUsers = function() {
154+
return Object.keys(this.workInProgressJobs);
173155
};

batch/index.js

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
'use strict';
22

3-
var RedisPool = require('redis-mpool');
4-
var _ = require('underscore');
53
var JobRunner = require('./job_runner');
64
var QueryRunner = require('./query_runner');
75
var JobCanceller = require('./job_canceller');
@@ -14,18 +12,18 @@ var JobService = require('./job_service');
1412
var BatchLogger = require('./batch-logger');
1513
var Batch = require('./batch');
1614

17-
module.exports = function batchFactory (metadataBackend, redisConfig, name, statsdClient, loggerPath) {
18-
var pubSubRedisPool = new RedisPool(_.extend({ name: 'batch-pubsub'}, redisConfig));
19-
var jobSubscriber = new JobSubscriber(pubSubRedisPool);
20-
var jobPublisher = new JobPublisher(pubSubRedisPool);
15+
module.exports = function batchFactory (metadataBackend, redisPool, name, statsdClient, loggerPath) {
16+
var userDatabaseMetadataService = new UserDatabaseMetadataService(metadataBackend);
17+
18+
var jobSubscriber = new JobSubscriber(redisPool, userDatabaseMetadataService);
19+
var jobPublisher = new JobPublisher(redisPool);
2120

2221
var jobQueue = new JobQueue(metadataBackend, jobPublisher);
2322
var jobBackend = new JobBackend(metadataBackend, jobQueue);
24-
var userDatabaseMetadataService = new UserDatabaseMetadataService(metadataBackend);
2523
var queryRunner = new QueryRunner(userDatabaseMetadataService);
2624
var jobCanceller = new JobCanceller(userDatabaseMetadataService);
2725
var jobService = new JobService(jobBackend, jobCanceller);
28-
var jobRunner = new JobRunner(jobService, jobQueue, queryRunner, statsdClient);
26+
var jobRunner = new JobRunner(jobService, jobQueue, queryRunner, metadataBackend, statsdClient);
2927
var logger = new BatchLogger(loggerPath);
3028

3129
return new Batch(
@@ -35,7 +33,7 @@ module.exports = function batchFactory (metadataBackend, redisConfig, name, stat
3533
jobRunner,
3634
jobService,
3735
jobPublisher,
38-
redisConfig,
36+
redisPool,
3937
logger
4038
);
4139
};

0 commit comments

Comments
 (0)