diff --git a/lib/common/error_codes.js b/lib/common/error_codes.js index 1459fd940..689cebe38 100644 --- a/lib/common/error_codes.js +++ b/lib/common/error_codes.js @@ -14,5 +14,8 @@ module.exports = { eofBeforeEndTagName: 'eof-before-end-tag-name', cdataInHtmlContent: 'cdata-in-html-content', malformedComment: 'malformed-comment', - eofInScriptHtmlComment: 'eof-in-script-html-comment' + eofInScriptHtmlComment: 'eof-in-script-html-comment', + nestedComment: 'nested-comment', + abruptComment: 'abrupt-comment', + eofInComment: 'eof-in-comment' }; diff --git a/lib/tokenizer/index.js b/lib/tokenizer/index.js index 00660674a..3112e6c44 100644 --- a/lib/tokenizer/index.js +++ b/lib/tokenizer/index.js @@ -71,6 +71,10 @@ var DATA_STATE = 'DATA_STATE', COMMENT_START_STATE = 'COMMENT_START_STATE', COMMENT_START_DASH_STATE = 'COMMENT_START_DASH_STATE', COMMENT_STATE = 'COMMENT_STATE', + COMMENT_LESS_THAN_SIGN_STATE = 'COMMENT_LESS_THAN_SIGN_STATE', + COMMENT_LESS_THAN_SIGN_BANG_STATE = 'COMMENT_LESS_THAN_SIGN_BANG_STATE', + COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE = 'COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE', + COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE = 'COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE', COMMENT_END_DASH_STATE = 'COMMENT_END_DASH_STATE', COMMENT_END_STATE = 'COMMENT_END_STATE', COMMENT_END_BANG_STATE = 'COMMENT_END_BANG_STATE', @@ -1573,25 +1577,14 @@ _[COMMENT_START_STATE] = function commentStartState(cp) { if (cp === $.HYPHEN_MINUS) this.state = COMMENT_START_DASH_STATE; - else if (cp === $.NULL) { - this.currentToken.data += UNICODE.REPLACEMENT_CHARACTER; - this.state = COMMENT_STATE; - } - else if (cp === $.GREATER_THAN_SIGN) { + this._err(ERR.abruptComment); this.state = DATA_STATE; this._emitCurrentToken(); } - else if (cp === $.EOF) { - this._emitCurrentToken(); - this._emitEOFToken(); - } - - else { - this.currentToken.data += toChar(cp); - this.state = COMMENT_STATE; - } + else + this._reconsumeInState(COMMENT_STATE); }; @@ -1601,26 +1594,21 @@ _[COMMENT_START_DASH_STATE] = function commentStartDashState(cp) { if (cp === $.HYPHEN_MINUS) this.state = COMMENT_END_STATE; - else if (cp === $.NULL) { - this.currentToken.data += '-'; - this.currentToken.data += UNICODE.REPLACEMENT_CHARACTER; - this.state = COMMENT_STATE; - } - else if (cp === $.GREATER_THAN_SIGN) { + this._err(ERR.abruptComment); this.state = DATA_STATE; this._emitCurrentToken(); } else if (cp === $.EOF) { + this._err(ERR.eofInComment); this._emitCurrentToken(); this._emitEOFToken(); } else { this.currentToken.data += '-'; - this.currentToken.data += toChar(cp); - this.state = COMMENT_STATE; + this._reconsumeInState(COMMENT_STATE); } }; @@ -1631,10 +1619,18 @@ _[COMMENT_STATE] = function commentState(cp) { if (cp === $.HYPHEN_MINUS) this.state = COMMENT_END_DASH_STATE; - else if (cp === $.NULL) + else if (cp === $.LESS_THAN_SIGN) { + this.currentToken.data += '<'; + this.state = COMMENT_LESS_THAN_SIGN_STATE; + } + + else if (cp === $.NULL) { + this._err(ERR.unexpectedNullCharacter); this.currentToken.data += UNICODE.REPLACEMENT_CHARACTER; + } else if (cp === $.EOF) { + this._err(ERR.eofInComment); this._emitCurrentToken(); this._emitEOFToken(); } @@ -1644,18 +1640,60 @@ _[COMMENT_STATE] = function commentState(cp) { }; +// Comment less-than sign state +//------------------------------------------------------------------ +_[COMMENT_LESS_THAN_SIGN_STATE] = function commentLessThanSignState(cp) { + if (cp === $.EXCLAMATION_MARK) { + this.currentToken.data += '!'; + this.state = COMMENT_LESS_THAN_SIGN_BANG_STATE; + } + + else if (cp === $.LESS_THAN_SIGN) + this.currentToken.data += '!'; + + else + this._reconsumeInState(COMMENT_STATE); +}; + + +// Comment less-than sign bang state +//------------------------------------------------------------------ +_[COMMENT_LESS_THAN_SIGN_BANG_STATE] = function commentLessThanSignBangState(cp) { + if (cp === $.HYPHEN_MINUS) + this.state = COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE; + + else + this._reconsumeInState(COMMENT_STATE); +}; + + +// Comment less-than sign bang dash state +//------------------------------------------------------------------ +_[COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE] = function commentLessThanSignBangDashState(cp) { + if (cp === $.HYPHEN_MINUS) + this.state = COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE; + + else + this._reconsumeInState(COMMENT_END_DASH_STATE); +}; + + +// Comment less-than sign bang dash dash state +//------------------------------------------------------------------ +_[COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE] = function commentLessThanSignBangDashDashState(cp) { + if (cp !== $.GREATER_THAN_SIGN && cp !== $.EOF) + this._err(ERR.nestedComment); + + this._reconsumeInState(COMMENT_END_STATE); +}; + + // Comment end dash state //------------------------------------------------------------------ _[COMMENT_END_DASH_STATE] = function commentEndDashState(cp) { if (cp === $.HYPHEN_MINUS) this.state = COMMENT_END_STATE; - else if (cp === $.NULL) { - this.currentToken.data += '-'; - this.currentToken.data += UNICODE.REPLACEMENT_CHARACTER; - this.state = COMMENT_STATE; - } - else if (cp === $.EOF) { this._emitCurrentToken(); this._emitEOFToken(); @@ -1663,8 +1701,7 @@ _[COMMENT_END_DASH_STATE] = function commentEndDashState(cp) { else { this.currentToken.data += '-'; - this.currentToken.data += toChar(cp); - this.state = COMMENT_STATE; + this._reconsumeInState(COMMENT_STATE); } }; @@ -1683,12 +1720,6 @@ _[COMMENT_END_STATE] = function commentEndState(cp) { else if (cp === $.HYPHEN_MINUS) this.currentToken.data += '-'; - else if (cp === $.NULL) { - this.currentToken.data += '--'; - this.currentToken.data += UNICODE.REPLACEMENT_CHARACTER; - this.state = COMMENT_STATE; - } - else if (cp === $.EOF) { this._emitCurrentToken(); this._emitEOFToken(); @@ -1696,8 +1727,7 @@ _[COMMENT_END_STATE] = function commentEndState(cp) { else { this.currentToken.data += '--'; - this.currentToken.data += toChar(cp); - this.state = COMMENT_STATE; + this._reconsumeInState(COMMENT_STATE); } }; @@ -1715,12 +1745,6 @@ _[COMMENT_END_BANG_STATE] = function commentEndBangState(cp) { this._emitCurrentToken(); } - else if (cp === $.NULL) { - this.currentToken.data += '--!'; - this.currentToken.data += UNICODE.REPLACEMENT_CHARACTER; - this.state = COMMENT_STATE; - } - else if (cp === $.EOF) { this._emitCurrentToken(); this._emitEOFToken(); @@ -1728,8 +1752,7 @@ _[COMMENT_END_BANG_STATE] = function commentEndBangState(cp) { else { this.currentToken.data += '--!'; - this.currentToken.data += toChar(cp); - this.state = COMMENT_STATE; + this._reconsumeInState(COMMENT_STATE); } };