Skip to content

Commit 9b3bb56

Browse files
authored
Merge pull request #2 from HTMLParseErrorWG/comments-parse-errors
Comments parse errors
2 parents a68c520 + f2af6cb commit 9b3bb56

File tree

2 files changed

+74
-48
lines changed

2 files changed

+74
-48
lines changed

lib/common/error_codes.js

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,8 @@ module.exports = {
1414
eofBeforeEndTagName: 'eof-before-end-tag-name',
1515
cdataInHtmlContent: 'cdata-in-html-content',
1616
malformedComment: 'malformed-comment',
17-
eofInScriptHtmlComment: 'eof-in-script-html-comment'
17+
eofInScriptHtmlComment: 'eof-in-script-html-comment',
18+
nestedComment: 'nested-comment',
19+
abruptComment: 'abrupt-comment',
20+
eofInComment: 'eof-in-comment'
1821
};

lib/tokenizer/index.js

Lines changed: 70 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ var DATA_STATE = 'DATA_STATE',
7171
COMMENT_START_STATE = 'COMMENT_START_STATE',
7272
COMMENT_START_DASH_STATE = 'COMMENT_START_DASH_STATE',
7373
COMMENT_STATE = 'COMMENT_STATE',
74+
COMMENT_LESS_THAN_SIGN_STATE = 'COMMENT_LESS_THAN_SIGN_STATE',
75+
COMMENT_LESS_THAN_SIGN_BANG_STATE = 'COMMENT_LESS_THAN_SIGN_BANG_STATE',
76+
COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE = 'COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE',
77+
COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE = 'COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE',
7478
COMMENT_END_DASH_STATE = 'COMMENT_END_DASH_STATE',
7579
COMMENT_END_STATE = 'COMMENT_END_STATE',
7680
COMMENT_END_BANG_STATE = 'COMMENT_END_BANG_STATE',
@@ -1573,25 +1577,14 @@ _[COMMENT_START_STATE] = function commentStartState(cp) {
15731577
if (cp === $.HYPHEN_MINUS)
15741578
this.state = COMMENT_START_DASH_STATE;
15751579

1576-
else if (cp === $.NULL) {
1577-
this.currentToken.data += UNICODE.REPLACEMENT_CHARACTER;
1578-
this.state = COMMENT_STATE;
1579-
}
1580-
15811580
else if (cp === $.GREATER_THAN_SIGN) {
1581+
this._err(ERR.abruptComment);
15821582
this.state = DATA_STATE;
15831583
this._emitCurrentToken();
15841584
}
15851585

1586-
else if (cp === $.EOF) {
1587-
this._emitCurrentToken();
1588-
this._emitEOFToken();
1589-
}
1590-
1591-
else {
1592-
this.currentToken.data += toChar(cp);
1593-
this.state = COMMENT_STATE;
1594-
}
1586+
else
1587+
this._reconsumeInState(COMMENT_STATE);
15951588
};
15961589

15971590

@@ -1601,26 +1594,21 @@ _[COMMENT_START_DASH_STATE] = function commentStartDashState(cp) {
16011594
if (cp === $.HYPHEN_MINUS)
16021595
this.state = COMMENT_END_STATE;
16031596

1604-
else if (cp === $.NULL) {
1605-
this.currentToken.data += '-';
1606-
this.currentToken.data += UNICODE.REPLACEMENT_CHARACTER;
1607-
this.state = COMMENT_STATE;
1608-
}
1609-
16101597
else if (cp === $.GREATER_THAN_SIGN) {
1598+
this._err(ERR.abruptComment);
16111599
this.state = DATA_STATE;
16121600
this._emitCurrentToken();
16131601
}
16141602

16151603
else if (cp === $.EOF) {
1604+
this._err(ERR.eofInComment);
16161605
this._emitCurrentToken();
16171606
this._emitEOFToken();
16181607
}
16191608

16201609
else {
16211610
this.currentToken.data += '-';
1622-
this.currentToken.data += toChar(cp);
1623-
this.state = COMMENT_STATE;
1611+
this._reconsumeInState(COMMENT_STATE);
16241612
}
16251613
};
16261614

@@ -1631,10 +1619,18 @@ _[COMMENT_STATE] = function commentState(cp) {
16311619
if (cp === $.HYPHEN_MINUS)
16321620
this.state = COMMENT_END_DASH_STATE;
16331621

1634-
else if (cp === $.NULL)
1622+
else if (cp === $.LESS_THAN_SIGN) {
1623+
this.currentToken.data += '<';
1624+
this.state = COMMENT_LESS_THAN_SIGN_STATE;
1625+
}
1626+
1627+
else if (cp === $.NULL) {
1628+
this._err(ERR.unexpectedNullCharacter);
16351629
this.currentToken.data += UNICODE.REPLACEMENT_CHARACTER;
1630+
}
16361631

16371632
else if (cp === $.EOF) {
1633+
this._err(ERR.eofInComment);
16381634
this._emitCurrentToken();
16391635
this._emitEOFToken();
16401636
}
@@ -1644,27 +1640,68 @@ _[COMMENT_STATE] = function commentState(cp) {
16441640
};
16451641

16461642

1643+
// Comment less-than sign state
1644+
//------------------------------------------------------------------
1645+
_[COMMENT_LESS_THAN_SIGN_STATE] = function commentLessThanSignState(cp) {
1646+
if (cp === $.EXCLAMATION_MARK) {
1647+
this.currentToken.data += '!';
1648+
this.state = COMMENT_LESS_THAN_SIGN_BANG_STATE;
1649+
}
1650+
1651+
else if (cp === $.LESS_THAN_SIGN)
1652+
this.currentToken.data += '!';
1653+
1654+
else
1655+
this._reconsumeInState(COMMENT_STATE);
1656+
};
1657+
1658+
1659+
// Comment less-than sign bang state
1660+
//------------------------------------------------------------------
1661+
_[COMMENT_LESS_THAN_SIGN_BANG_STATE] = function commentLessThanSignBangState(cp) {
1662+
if (cp === $.HYPHEN_MINUS)
1663+
this.state = COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE;
1664+
1665+
else
1666+
this._reconsumeInState(COMMENT_STATE);
1667+
};
1668+
1669+
1670+
// Comment less-than sign bang dash state
1671+
//------------------------------------------------------------------
1672+
_[COMMENT_LESS_THAN_SIGN_BANG_DASH_STATE] = function commentLessThanSignBangDashState(cp) {
1673+
if (cp === $.HYPHEN_MINUS)
1674+
this.state = COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE;
1675+
1676+
else
1677+
this._reconsumeInState(COMMENT_END_DASH_STATE);
1678+
};
1679+
1680+
1681+
// Comment less-than sign bang dash dash state
1682+
//------------------------------------------------------------------
1683+
_[COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH_STATE] = function commentLessThanSignBangDashDashState(cp) {
1684+
if (cp !== $.GREATER_THAN_SIGN && cp !== $.EOF)
1685+
this._err(ERR.nestedComment);
1686+
1687+
this._reconsumeInState(COMMENT_END_STATE);
1688+
};
1689+
1690+
16471691
// Comment end dash state
16481692
//------------------------------------------------------------------
16491693
_[COMMENT_END_DASH_STATE] = function commentEndDashState(cp) {
16501694
if (cp === $.HYPHEN_MINUS)
16511695
this.state = COMMENT_END_STATE;
16521696

1653-
else if (cp === $.NULL) {
1654-
this.currentToken.data += '-';
1655-
this.currentToken.data += UNICODE.REPLACEMENT_CHARACTER;
1656-
this.state = COMMENT_STATE;
1657-
}
1658-
16591697
else if (cp === $.EOF) {
16601698
this._emitCurrentToken();
16611699
this._emitEOFToken();
16621700
}
16631701

16641702
else {
16651703
this.currentToken.data += '-';
1666-
this.currentToken.data += toChar(cp);
1667-
this.state = COMMENT_STATE;
1704+
this._reconsumeInState(COMMENT_STATE);
16681705
}
16691706
};
16701707

@@ -1683,21 +1720,14 @@ _[COMMENT_END_STATE] = function commentEndState(cp) {
16831720
else if (cp === $.HYPHEN_MINUS)
16841721
this.currentToken.data += '-';
16851722

1686-
else if (cp === $.NULL) {
1687-
this.currentToken.data += '--';
1688-
this.currentToken.data += UNICODE.REPLACEMENT_CHARACTER;
1689-
this.state = COMMENT_STATE;
1690-
}
1691-
16921723
else if (cp === $.EOF) {
16931724
this._emitCurrentToken();
16941725
this._emitEOFToken();
16951726
}
16961727

16971728
else {
16981729
this.currentToken.data += '--';
1699-
this.currentToken.data += toChar(cp);
1700-
this.state = COMMENT_STATE;
1730+
this._reconsumeInState(COMMENT_STATE);
17011731
}
17021732
};
17031733

@@ -1715,21 +1745,14 @@ _[COMMENT_END_BANG_STATE] = function commentEndBangState(cp) {
17151745
this._emitCurrentToken();
17161746
}
17171747

1718-
else if (cp === $.NULL) {
1719-
this.currentToken.data += '--!';
1720-
this.currentToken.data += UNICODE.REPLACEMENT_CHARACTER;
1721-
this.state = COMMENT_STATE;
1722-
}
1723-
17241748
else if (cp === $.EOF) {
17251749
this._emitCurrentToken();
17261750
this._emitEOFToken();
17271751
}
17281752

17291753
else {
17301754
this.currentToken.data += '--!';
1731-
this.currentToken.data += toChar(cp);
1732-
this.state = COMMENT_STATE;
1755+
this._reconsumeInState(COMMENT_STATE);
17331756
}
17341757
};
17351758

0 commit comments

Comments
 (0)