diff --git a/config/default.js b/config/default.js index 726c40fa..d90ae6c0 100644 --- a/config/default.js +++ b/config/default.js @@ -176,8 +176,6 @@ module.exports = { ROLE_MATCHING_RATE: process.env.ROLE_MATCHING_RATE || 0.70, // member groups representing Wipro or TopCoder employee INTERNAL_MEMBER_GROUPS: process.env.INTERNAL_MEMBER_GROUPS || ['20000000', '20000001', '20000003', '20000010', '20000015'], - // Topcoder skills cache time in minutes - TOPCODER_SKILLS_CACHE_TIME: process.env.TOPCODER_SKILLS_CACHE_TIME || 60, // payment scheduler config PAYMENT_PROCESSING: { // switch off actual API calls in Payment Scheduler diff --git a/docs/Topcoder-bookings-api.postman_collection.json b/docs/Topcoder-bookings-api.postman_collection.json index b37011a5..479ddceb 100644 --- a/docs/Topcoder-bookings-api.postman_collection.json +++ b/docs/Topcoder-bookings-api.postman_collection.json @@ -1,6 +1,6 @@ { "info": { - "_postman_id": "d413d21d-272f-454f-b26a-0d7e3bf926d9", + "_postman_id": "18310e1b-429d-49db-8555-f4a54404271f", "name": "Topcoder-bookings-api", "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json" }, @@ -21165,221 +21165,6 @@ } ] }, - { - "name": "Get Skills by Job Description", - "item": [ - { - "name": "get skills successfully", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test('Status code is 200', function () {\r", - " pm.response.to.have.status(200);\r", - "});" - ], - "type": "text/javascript" - } - } - ], - "request": { - "method": "POST", - "header": [ - { - "key": "Authorization", - "type": "text", - "value": "Bearer {{token_administrator}}" - }, - { - "key": "Content-Type", - "type": "text", - "value": "application/json" - } - ], - "body": { - "mode": "raw", - "raw": "{ \"description\": \"Description A global leading healthcare company is seeking a strong Databricks Engineer to join their development team as they build their new Databricks workspace. Development efforts will contribute to the migration of data from Hadoop to Databricks to prepare data for visualization. Candidate must be well-versed in Databricks components and best practices, be an excellent problem solver and be comfortable working in a fast-moving, rapidly changing, and dynamic environment via Agile, SCRUM, and DevOps. PREFERRED QUALIFICATIONS: 2+ years of Azure Data Stack experience: Azure Data Services using ADF, ADLS, Databricks with PySpark, Azure DevOps & Azure Key Vault. Strong knowledge of various data warehousing methodologies and data modeling concepts. Hands-on experience using Azure, Azure data lake, Azure functions & Databricks Minimum 2-3+ years of Python experience (PySpark) Design & Develop Azure native solutions for Data Platform Minimum 3+ years of experience using Big Data ecosystem (Cloudera/Hortonworks) using Oozie, Hive, Impala, and Spark Expert in SQL and performance tuning\" }", - "options": { - "raw": { - "language": "json" - } - } - }, - "url": { - "raw": "{{URL}}/taas-teams/getSkillsByJobDescription", - "host": [ - "{{URL}}" - ], - "path": [ - "taas-teams", - "getSkillsByJobDescription" - ] - } - }, - "response": [] - }, - { - "name": "get skills by invalid token", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test('Status code is 401', function () {\r", - " pm.response.to.have.status(401);\r", - " const response = pm.response.json()\r", - " pm.expect(response.message).to.eq(\"Invalid Token.\")\r", - "});" - ], - "type": "text/javascript" - } - } - ], - "request": { - "method": "POST", - "header": [ - { - "key": "Authorization", - "type": "text", - "value": "Bearer invalid_token" - }, - { - "key": "Content-Type", - "type": "text", - "value": "application/json" - } - ], - "body": { - "mode": "raw", - "raw": "{ \"description\": \"Description A global leading healthcare company is seeking a strong Databricks Engineer to join their development team as they build their new Databricks workspace. Development efforts will contribute to the migration of data from Hadoop to Databricks to prepare data for visualization. Candidate must be well-versed in Databricks components and best practices, be an excellent problem solver and be comfortable working in a fast-moving, rapidly changing, and dynamic environment via Agile, SCRUM, and DevOps. PREFERRED QUALIFICATIONS: 2+ years of Azure Data Stack experience: Azure Data Services using ADF, ADLS, Databricks with PySpark, Azure DevOps & Azure Key Vault. Strong knowledge of various data warehousing methodologies and data modeling concepts. Hands-on experience using Azure, Azure data lake, Azure functions & Databricks Minimum 2-3+ years of Python experience (PySpark) Design & Develop Azure native solutions for Data Platform Minimum 3+ years of experience using Big Data ecosystem (Cloudera/Hortonworks) using Oozie, Hive, Impala, and Spark Expert in SQL and performance tuning\" }", - "options": { - "raw": { - "language": "json" - } - } - }, - "url": { - "raw": "{{URL}}/taas-teams/getSkillsByJobDescription", - "host": [ - "{{URL}}" - ], - "path": [ - "taas-teams", - "getSkillsByJobDescription" - ] - } - }, - "response": [] - }, - { - "name": "get skills by invalid field", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test('Status code is 400', function () {\r", - " pm.response.to.have.status(400);\r", - " const response = pm.response.json()\r", - " pm.expect(response.message).to.eq(\"\\\"data.description\\\" is not allowed to be empty\")\r", - "});" - ], - "type": "text/javascript" - } - } - ], - "request": { - "method": "POST", - "header": [ - { - "key": "Authorization", - "type": "text", - "value": "Bearer {{token_administrator}}" - }, - { - "key": "Content-Type", - "type": "text", - "value": "application/json" - } - ], - "body": { - "mode": "raw", - "raw": "{ \"description\": \"\" }", - "options": { - "raw": { - "language": "json" - } - } - }, - "url": { - "raw": "{{URL}}/taas-teams/getSkillsByJobDescription", - "host": [ - "{{URL}}" - ], - "path": [ - "taas-teams", - "getSkillsByJobDescription" - ] - } - }, - "response": [] - }, - { - "name": "get skills by missing field", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test('Status code is 400', function () {\r", - " pm.response.to.have.status(400);\r", - " const response = pm.response.json()\r", - " pm.expect(response.message).to.eq(\"\\\"data.description\\\" is required\")\r", - "});" - ], - "type": "text/javascript" - } - } - ], - "request": { - "method": "POST", - "header": [ - { - "key": "Authorization", - "type": "text", - "value": "Bearer {{token_administrator}}" - }, - { - "key": "Content-Type", - "type": "text", - "value": "application/json" - } - ], - "body": { - "mode": "raw", - "raw": "{}", - "options": { - "raw": { - "language": "json" - } - } - }, - "url": { - "raw": "{{URL}}/taas-teams/getSkillsByJobDescription", - "host": [ - "{{URL}}" - ], - "path": [ - "taas-teams", - "getSkillsByJobDescription" - ] - } - }, - "response": [] - } - ] - }, { "name": "GET /taas-teams", "request": { @@ -21863,6 +21648,44 @@ }, "response": [] }, + { + "name": "POST /taas-teams/getSkillsByJobDescription", + "request": { + "method": "POST", + "header": [ + { + "key": "Authorization", + "type": "text", + "value": "Bearer {{token_member}}" + }, + { + "key": "Content-Type", + "type": "text", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"description\": \"nodejs react c++ hello\"\n}", + "options": { + "raw": { + "language": "json" + } + } + }, + "url": { + "raw": "{{URL}}/taas-teams/getSkillsByJobDescription", + "host": [ + "{{URL}}" + ], + "path": [ + "taas-teams", + "getSkillsByJobDescription" + ] + } + }, + "response": [] + }, { "name": "GET /taas-teams/:id/members", "request": { diff --git a/docs/stopWords.json b/docs/stopWords.json deleted file mode 100644 index dded6681..00000000 --- a/docs/stopWords.json +++ /dev/null @@ -1,574 +0,0 @@ -[ - "dr", - "dra", - "mr", - "ms", - "a", - "a's", - "able", - "about", - "above", - "according", - "accordingly", - "across", - "actually", - "after", - "afterwards", - "again", - "against", - "ain't", - "all", - "allow", - "allows", - "almost", - "alone", - "along", - "already", - "also", - "although", - "always", - "am", - "among", - "amongst", - "an", - "and", - "another", - "any", - "anybody", - "anyhow", - "anyone", - "anything", - "anyway", - "anyways", - "anywhere", - "apart", - "appear", - "appreciate", - "appropriate", - "are", - "aren't", - "around", - "as", - "aside", - "ask", - "asking", - "associated", - "at", - "available", - "away", - "awfully", - "b", - "be", - "became", - "because", - "become", - "becomes", - "becoming", - "been", - "before", - "beforehand", - "behind", - "being", - "believe", - "below", - "beside", - "besides", - "best", - "better", - "between", - "beyond", - "both", - "brief", - "but", - "by", - "c'mon", - "c's", - "came", - "can", - "can't", - "cannot", - "cant", - "cause", - "causes", - "certain", - "certainly", - "changes", - "clearly", - "co", - "come", - "comes", - "concerning", - "consequently", - "consider", - "considering", - "contain", - "containing", - "contains", - "corresponding", - "could", - "couldn't", - "course", - "currently", - "d", - "definitely", - "described", - "despite", - "did", - "didn't", - "different", - "do", - "does", - "doesn't", - "doing", - "don't", - "done", - "down", - "downwards", - "during", - "e", - "each", - "edu", - "eg", - "eight", - "either", - "else", - "elsewhere", - "enough", - "entirely", - "especially", - "et", - "etc", - "even", - "ever", - "every", - "everybody", - "everyone", - "everything", - "everywhere", - "ex", - "exactly", - "example", - "except", - "f", - "far", - "few", - "fifth", - "first", - "five", - "followed", - "following", - "follows", - "for", - "former", - "formerly", - "forth", - "four", - "from", - "further", - "furthermore", - "g", - "get", - "gets", - "getting", - "given", - "gives", - "goes", - "going", - "gone", - "got", - "gotten", - "greetings", - "h", - "had", - "hadn't", - "happens", - "hardly", - "has", - "hasn't", - "have", - "haven't", - "having", - "he", - "he's", - "hello", - "help", - "hence", - "her", - "here", - "here's", - "hereafter", - "hereby", - "herein", - "hereupon", - "hers", - "herself", - "hi", - "him", - "himself", - "his", - "hither", - "hopefully", - "how", - "howbeit", - "however", - "i", - "i'd", - "i'll", - "i'm", - "i've", - "ie", - "if", - "ignored", - "immediate", - "in", - "inasmuch", - "inc", - "indeed", - "indicate", - "indicated", - "indicates", - "inner", - "insofar", - "instead", - "into", - "inward", - "is", - "isn't", - "it", - "it'd", - "it'll", - "it's", - "its", - "itself", - "j", - "just", - "k", - "keep", - "keeps", - "kept", - "know", - "knows", - "known", - "l", - "last", - "lately", - "later", - "latter", - "latterly", - "least", - "lest", - "let", - "let's", - "like", - "liked", - "likely", - "little", - "look", - "looking", - "looks", - "ltd", - "m", - "mainly", - "many", - "may", - "maybe", - "me", - "mean", - "meanwhile", - "merely", - "might", - "more", - "moreover", - "most", - "mostly", - "much", - "must", - "my", - "myself", - "n", - "name", - "namely", - "nd", - "near", - "nearly", - "necessary", - "need", - "needs", - "neither", - "never", - "nevertheless", - "new", - "next", - "nine", - "no", - "nobody", - "non", - "none", - "noone", - "nor", - "normally", - "not", - "nothing", - "novel", - "now", - "nowhere", - "o", - "obviously", - "of", - "off", - "often", - "oh", - "ok", - "okay", - "old", - "on", - "once", - "one", - "ones", - "only", - "onto", - "or", - "other", - "others", - "otherwise", - "ought", - "our", - "ours", - "ourselves", - "out", - "outside", - "over", - "overall", - "own", - "p", - "particular", - "particularly", - "per", - "perhaps", - "placed", - "please", - "plus", - "point", - "possible", - "presumably", - "probably", - "provides", - "q", - "que", - "quite", - "qv", - "rather", - "rd", - "re", - "really", - "reasonably", - "regarding", - "regardless", - "regards", - "relatively", - "respectively", - "right", - "s", - "said", - "same", - "saw", - "say", - "saying", - "says", - "second", - "secondly", - "see", - "seeing", - "seem", - "seemed", - "seeming", - "seems", - "seen", - "self", - "selves", - "sensible", - "sent", - "serious", - "seriously", - "seven", - "several", - "shall", - "she", - "should", - "shouldn't", - "since", - "six", - "so", - "some", - "somebody", - "somehow", - "someone", - "something", - "sometime", - "sometimes", - "somewhat", - "somewhere", - "soon", - "sorry", - "specified", - "specify", - "specifying", - "still", - "strong", - "sub", - "such", - "sup", - "sure", - "t", - "t's", - "take", - "taken", - "tell", - "tends", - "th", - "than", - "thank", - "thanks", - "thanx", - "that", - "that's", - "thats", - "the", - "their", - "theirs", - "them", - "themselves", - "then", - "thence", - "there", - "there's", - "thereafter", - "thereby", - "therefore", - "therein", - "theres", - "thereupon", - "these", - "they", - "they'd", - "they'll", - "they're", - "they've", - "think", - "third", - "this", - "thorough", - "thoroughly", - "those", - "though", - "three", - "through", - "throughout", - "thru", - "thus", - "to", - "together", - "too", - "took", - "toward", - "towards", - "tried", - "tries", - "truly", - "try", - "trying", - "twice", - "two", - "u", - "un", - "under", - "unfortunately", - "unless", - "unlikely", - "until", - "unto", - "up", - "upon", - "us", - "use", - "used", - "useful", - "uses", - "using", - "usually", - "uucp", - "v", - "value", - "various", - "very", - "via", - "viz", - "vs", - "w", - "want", - "wants", - "was", - "wasn't", - "way", - "we", - "we'd", - "we'll", - "we're", - "we've", - "welcome", - "well", - "went", - "were", - "weren't", - "what", - "what's", - "whatever", - "when", - "whence", - "whenever", - "where", - "where's", - "whereafter", - "whereas", - "whereby", - "wherein", - "whereupon", - "wherever", - "whether", - "which", - "while", - "whither", - "who", - "who's", - "whoever", - "whole", - "whom", - "whose", - "why", - "will", - "willing", - "wish", - "with", - "within", - "without", - "won't", - "wonder", - "would", - "would", - "wouldn't", - "x", - "y", - "yes", - "yet", - "you", - "you'd", - "you'll", - "you're", - "you've", - "your", - "yours", - "yourself", - "yourselves", - "z", - "zero" -] \ No newline at end of file diff --git a/docs/swagger.yaml b/docs/swagger.yaml index e6cb5988..cab269c9 100644 --- a/docs/swagger.yaml +++ b/docs/swagger.yaml @@ -3279,6 +3279,12 @@ paths: application/json: schema: $ref: "#/components/schemas/Error" + "403": + description: Forbidden + content: + application/json: + schema: + $ref: "#/components/schemas/Error" "500": description: Internal Server Error content: @@ -3759,13 +3765,10 @@ components: properties: tag: type: string - example: "Java" type: type: string - example: "taas_skill" source: type: string - example: "taas-jd-parser" Job: required: diff --git a/src/services/TeamService.js b/src/services/TeamService.js index af006603..c32b2776 100644 --- a/src/services/TeamService.js +++ b/src/services/TeamService.js @@ -15,10 +15,8 @@ const ResourceBookingService = require('./ResourceBookingService') const HttpStatus = require('http-status-codes') const { Op } = require('sequelize') const models = require('../models') -const stopWords = require('../../docs/stopWords.json') const Role = models.Role const RoleSearchRequest = models.RoleSearchRequest -const topcoderSkills = {} const emailTemplates = _.mapValues(emailTemplateConfig, (template) => { return { @@ -62,73 +60,6 @@ async function _getJobsByProjectIds (currentUser, projectIds) { return result } -/** - * Gets topcoder skills and stores their name and compiled - * regex patters according to Levenshtein distance <=1 - */ -async function _reloadCachedTopcoderSkills () { - // do not reload if cache time is not expired - if (!_.isUndefined(topcoderSkills.time)) { - const cacheTime = config.TOPCODER_SKILLS_CACHE_TIME * 60 * 1000 - if (new Date().getTime() - topcoderSkills.time < cacheTime) { - return - } - } - // collect all skills - const skills = await helper.getAllTopcoderSkills() - // set the last cached time - topcoderSkills.time = new Date().getTime() - topcoderSkills.skills = [] - // store skill names and compiled regex paterns - _.each(skills, skill => { - topcoderSkills.skills.push({ - name: skill.name, - pattern: _compileRegexPatternForSkillName(skill.name) - }) - }) -} - -/** - * Prepares the regex pattern for the given word - * according to Levenshtein distance of 1 (insertions, deletions or substitutions) - * @param {String} skillName the name of the skill - * @returns {RegExp} the compiled regex pattern - */ -function _compileRegexPatternForSkillName (skillName) { - // split the name into its chars - let chars = _.split(skillName, '') - // escape characters reserved to regex - chars = _.map(chars, _.escapeRegExp) - // Its not a good idea to apply tolerance according to - // Levenshtein distance for the words have less than 3 letters - // We expect the skill names have 1 or 2 letters to take place - // in job description as how they are exactly spelled - if (chars.length < 3) { - return new RegExp(`^(?:${_.join(chars, '')})$`, 'i') - } - - const res = [] - // include the skill name itself - res.push(_.join(chars, '')) - // include every transposition combination - // E.g. java => ajva, jvaa, jaav - for (let i = 0; i < chars.length - 1; i++) { - res.push(_.join(_.slice(chars, 0, i), '') + chars[i + 1] + chars[i] + _.join(_.slice(chars, i + 2), '')) - } - // include every insertion combination - // E.g. java => .java, j.ava, ja.va, jav.a, java. - for (let i = 0; i <= chars.length; i++) { - res.push(_.join(_.slice(chars, 0, i), '') + '.' + _.join(_.slice(chars, i), '')) - } - // include every deletion/substitution combination - // E.g. java => .?ava, j.?va, ja.?a, jav.? - for (let i = 0; i < chars.length; i++) { - res.push(_.join(_.slice(chars, 0, i), '') + '.?' + _.join(_.slice(chars, i + 1), '')) - } - // return the regex pattern - return new RegExp(`^(?:${_.join(res, '|')})$`, 'i') -} - /** * List teams * @param {Object} currentUser the user who perform this operation @@ -764,7 +695,9 @@ async function roleSearchRequest (currentUser, data) { } else { // if only job description is provided, collect skill names from description const tags = await getSkillsByJobDescription(currentUser, { description: data.jobDescription }) - const skills = _.map(tags, 'tag') + // collected tags from description has inconsistency with topcoder skills + // we need to filter invalid skills + const skills = await getSkillNamesByNames(_.map(tags, 'tag')) // find the best matching role role = await getRoleBySkills(skills) } @@ -830,40 +763,7 @@ getRoleBySkills.schema = Joi.object() * @returns {Object} the result */ async function getSkillsByJobDescription (currentUser, data) { - // load topcoder skills if needed. Using cached skills helps to avoid - // unnecessary api calls which is extremely time comsuming. - await _reloadCachedTopcoderSkills() - // replace markdown tags with spaces - let description = _.replace(data.description, /[`|^[\]{}~/,:-]|#{2,}|
/gi, ' ') - // replace all whitespace characters with single space - description = _.replace(description, /\s\s+/g, ' ') - // extract words from description - let words = _.split(description, ' ') - // remove stopwords from description - words = _.filter(words, word => stopWords.indexOf(word.toLowerCase()) === -1) - let foundSkills = [] - const result = [] - // try to match each word with skill names - // using pre-compiled regex pattern - _.each(words, word => { - _.each(topcoderSkills.skills, skill => { - // do not stop searching after a match in order to detect more lookalikes - if (skill.pattern.test(word)) { - foundSkills.push(skill.name) - } - }) - }) - foundSkills = _.uniq(foundSkills) - // apply desired template - _.each(foundSkills, skill => { - result.push({ - tag: skill, - type: 'taas_skill', - source: 'taas-jd-parser' - }) - }) - - return result + return helper.getTags(data.description) } getSkillsByJobDescription.schema = Joi.object() @@ -928,6 +828,27 @@ getSkillIdsByNames.schema = Joi.object() skills: Joi.array().items(Joi.string().required()).required() }).required() +/** + * Filters invalid skills from given skill names + * + * @param {Array} skills the array of skill names + * @returns {Array} the array of skill names + */ +async function getSkillNamesByNames (skills) { + // remove duplicates, leading and trailing whitespaces, empties. + const cleanedSkills = _.uniq(_.filter(_.map(skills, skill => _.trim(skill)), skill => !_.isEmpty(skill))) + const result = await helper.getAllTopcoderSkills({ name: _.join(cleanedSkills, ',') }) + const skillNames = _.map(result, 'name') + // endpoint returns the partial matched skills + // we need to filter by exact match case insensitive + return _.intersectionBy(skillNames, cleanedSkills, _.toLower) +} + +getSkillNamesByNames.schema = Joi.object() + .keys({ + skills: Joi.array().items(Joi.string().required()).required() + }).required() + /** * Creates the role search request * @@ -1133,6 +1054,7 @@ module.exports = { getSkillsByJobDescription, getSkillNamesByIds, getSkillIdsByNames, + getSkillNamesByNames, createRoleSearchRequest, isExternalMember, createTeam,