diff --git a/README.md b/README.md index 75723530..e397e1b2 100755 --- a/README.md +++ b/README.md @@ -180,6 +180,8 @@ To update the existing challengeId data on submissions in DynamoDB to v5 challen ```bash SUBMISSION_TABLE_NAME // Table name of the submission records. Defaults to 'Submission' UPDATE_V5_CHALLENGE_BATCH_SIZE // Number of records that are updated simultaneously. Defaults to 250 +FETCH_CREATED_DATE_START // The start day of fetch latest challenges. Defaults to '2021-01-01' +FETCH_PAGE_SIZE // The page size of each api request. Defaults to 500 ``` diff --git a/config/default.js b/config/default.js index cfef076f..fa8090f6 100755 --- a/config/default.js +++ b/config/default.js @@ -37,7 +37,9 @@ module.exports = { PAGE_SIZE: process.env.PAGE_SIZE || 20, MAX_PAGE_SIZE: parseInt(process.env.MAX_PAGE_SIZE) || 100, ES_BATCH_SIZE: process.env.ES_BATCH_SIZE || 250, - UPDATE_V5_CHALLENGE_BATCH_SIZE: process.env.UPDATE_V5_CHALLENGE_BATCH_SIZE || 250, + UPDATE_V5_CHALLENGE_BATCH_SIZE: process.env.UPDATE_V5_CHALLENGE_BATCH_SIZE || 100, SUBMISSION_TABLE_NAME: process.env.SUBMISSION_TABLE_NAME || 'Submission', - AUTH0_PROXY_SERVER_URL: process.env.AUTH0_PROXY_SERVER_URL + AUTH0_PROXY_SERVER_URL: process.env.AUTH0_PROXY_SERVER_URL, + FETCH_CREATED_DATE_START: process.env.FETCH_CREATED_DATE_START || '2021-01-01', + FETCH_PAGE_SIZE: process.env.FETCH_PAGE_SIZE || 500 } diff --git a/scripts/updateToV5ChallengeId.js b/scripts/updateToV5ChallengeId.js index 326e824d..617f17f4 100644 --- a/scripts/updateToV5ChallengeId.js +++ b/scripts/updateToV5ChallengeId.js @@ -13,17 +13,10 @@ const helper = require('../src/common/helper') * Update Submission's challenge id to v5 * @param {Object} submission The submission record * @param {Array} failedContainer The failed records container + * @param {String} v5challengeId The v5 challenge id * @returns {Promise} */ -function * updateRecord (submission, failedContainer) { - let v5challengeId - try { - v5challengeId = yield helper.getV5ChallengeId(submission.challengeId) - } catch (err) { - logger.error(`fetching the details of the challenge(${submission.challengeId}) failed, ${err.message}`) - failedContainer.push(submission) - return - } +function * updateRecord (submission, failedContainer, v5challengeId) { const record = { TableName: 'Submission', Key: { @@ -35,13 +28,11 @@ function * updateRecord (submission, failedContainer) { ':l': submission.challengeId } } - if (!v5challengeId) { - logger.warn(`the challengeId: ${submission.challengeId} is not having a v5 challengeId`) - failedContainer.push(submission) - } else if (v5challengeId === submission.challengeId) { - logger.info(`the challengeId: ${submission.challengeId} is already a v5 challengeId`) - } else { + try { yield dbhelper.updateRecord(record) + } catch (err) { + logger.error(`update submission record error: ${err.message}`) + failedContainer.push(submission) } } @@ -53,23 +44,34 @@ function * updateRecords () { const tableName = config.SUBMISSION_TABLE_NAME const promises = [] const failedRecords = [] - const params = { - TableName: tableName - } // Process until all the records from DB is fetched - while (true) { - const records = yield dbhelper.scanRecords(params) - const totalRecords = records.Items.length - logger.debug(`Number of ${tableName}s fetched from DB - ${totalRecords}. More fetch iterations may follow (pagination in progress)`) - for (let i = 0; i < totalRecords; i++) { - const record = records.Items[i] - promises.push(updateRecord(record, failedRecords)) + const challengeIds = yield helper.getLatestChallenges() + logger.debug(`Total number of challenges fetched from api - ${challengeIds.length}.`) + const batchIds = _.chunk(challengeIds, config.UPDATE_V5_CHALLENGE_BATCH_SIZE) + for (const cId of batchIds) { + const queryParams = _.fromPairs(_.map(cId, (c, i) => [`:challengeId${i}`, c.legacyId])) + const params = { + TableName: tableName, + FilterExpression: `#challengeId IN (${_.join(_.keys(queryParams), ',')})`, + ExpressionAttributeNames: { + '#challengeId': 'challengeId' + }, + ExpressionAttributeValues: queryParams } - // Continue fetching the remaining records from Database - if (typeof records.LastEvaluatedKey !== 'undefined') { - params.ExclusiveStartKey = records.LastEvaluatedKey - } else { - break // If there are no more records to process, exit the loop + while (true) { + const records = yield dbhelper.scanRecords(params) + const totalRecords = records.Items.length + logger.debug(`Number of ${tableName}s fetched from DB - ${totalRecords}. More fetch iterations may follow (pagination in progress)`) + for (let i = 0; i < totalRecords; i++) { + const record = records.Items[i] + promises.push(updateRecord(record, failedRecords, _.find(cId, ['legacyId', record.challengeId]).id)) + } + // Continue fetching the remaining records from Database + if (typeof records.LastEvaluatedKey !== 'undefined') { + params.ExclusiveStartKey = records.LastEvaluatedKey + } else { + break // If there are no more records to process, exit the loop + } } } logger.debug(`All records fetched. Proceeding to update them in batches of ${config.UPDATE_V5_CHALLENGE_BATCH_SIZE}`) diff --git a/src/common/helper.js b/src/common/helper.js index 67fad8fa..f3170bf9 100755 --- a/src/common/helper.js +++ b/src/common/helper.js @@ -769,6 +769,32 @@ function adjustSubmissionChallengeId (submission) { } } +/** + * Get all latest challenges + * @param {Number} page page index + * @returns {Array} an array of challenge + */ +function * getLatestChallenges (page) { + page = page || 1 + const token = yield getM2Mtoken() + const url = `${config.CHALLENGEAPI_V5_URL}?createdDateStart=${config.FETCH_CREATED_DATE_START}&page=${page}&perPage=${config.FETCH_PAGE_SIZE}&isLightweight=true` + try { + const response = yield request.get(url) + .set('Authorization', `Bearer ${token}`) + .set('Content-Type', 'application/json') + const challenges = _.map(_.filter(_.get(response, 'body'), 'legacyId'), c => _.pick(c, 'id', 'legacyId')) + logger.debug(`Fetched ${challenges.length} challenges in this iteration. More may follow...`) + if (_.get(response, 'headers.x-total-pages') > page) { + const leftChallenges = yield getLatestChallenges(page + 1) + challenges.push(...leftChallenges) + } + return challenges + } catch (err) { + logger.error(`Error while accessing ${url}, message: ${err.message}`) + return [] + } +} + module.exports = { wrapExpress, autoWrapExpress, @@ -786,5 +812,6 @@ module.exports = { cleanseReviews, getRoleIdToRoleNameMap, getV5ChallengeId, - adjustSubmissionChallengeId + adjustSubmissionChallengeId, + getLatestChallenges }