Skip to content

Commit

Permalink
Merge pull request #30 from HTMLParseErrorWG/before-head-errs
Browse files Browse the repository at this point in the history
Parse errors for a bunch of insertion modes with a word "head" in them
  • Loading branch information
inikulin authored Dec 10, 2017
2 parents 41f08b5 + a3e2c2b commit db23e29
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 18 deletions.
10 changes: 9 additions & 1 deletion lib/common/error_codes.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,13 @@ module.exports = {
invalidCharacterSequenceAfterDoctypeName: 'invalid-character-sequence-after-doctype-name',
duplicateAttribute: 'duplicate-attribute',
nonConformingDoctype: 'non-conforming-doctype',
missingDoctype: 'missing-doctype'
missingDoctype: 'missing-doctype',
misplacedDoctype: 'misplaced-doctype',
endTagWithoutMatchingOpenElement: 'end-tag-without-matching-open-element',
closingOfElementWithOpenChildElements: 'closing-of-element-with-open-child-elements',
disallowedContentInNoscriptInHead: 'disallowed-content-in-noscript-in-head',
openElementsLeftAfterEof: 'open-elements-left-after-eof',
abandonedHeadElementChild: 'abandoned-head-element-child',
misplacedStartTagForHeadElement: 'misplaced-start-tag-for-head-element',
nestedNoscriptInHead: 'nested-noscript-in-head'
};
14 changes: 11 additions & 3 deletions lib/extensions/location_info/tokenizer_mixin.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,17 @@ LocationInfoTokenizerMixin.prototype._getOverriddenMethods = function (mxn, orig
this.currentCharacterToken.location.endOffset = ctLoc.startOffset;
}

ctLoc.endLine = mxn.posTracker.line;
ctLoc.endCol = mxn.posTracker.col + 1;
ctLoc.endOffset = mxn.posTracker.offset + 1;
if (this.currentToken.type === Tokenizer.EOF_TOKEN) {
ctLoc.endLine = ctLoc.startLine;
ctLoc.endCol = ctLoc.startCol;
ctLoc.endOffset = ctLoc.startOffset;
}

else {
ctLoc.endLine = mxn.posTracker.line;
ctLoc.endCol = mxn.posTracker.col + 1;
ctLoc.endOffset = mxn.posTracker.offset + 1;
}

orig._emitCurrentToken.call(this);
},
Expand Down
68 changes: 54 additions & 14 deletions lib/parser/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ _[BEFORE_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] =
_[BEFORE_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenBeforeHead;
_[BEFORE_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = ignoreToken;
_[BEFORE_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment;
_[BEFORE_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken;
_[BEFORE_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = misplacedDoctype;
_[BEFORE_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagBeforeHead;
_[BEFORE_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagBeforeHead;
_[BEFORE_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenBeforeHead;
Expand All @@ -122,7 +122,7 @@ _[IN_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] =
_[IN_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenInHead;
_[IN_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters;
_[IN_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment;
_[IN_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken;
_[IN_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = misplacedDoctype;
_[IN_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagInHead;
_[IN_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagInHead;
_[IN_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenInHead;
Expand All @@ -132,7 +132,7 @@ _[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.CHARACTER_TOKEN] =
_[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenInHeadNoScript;
_[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters;
_[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.COMMENT_TOKEN] = appendComment;
_[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken;
_[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.DOCTYPE_TOKEN] = misplacedDoctype;
_[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.START_TAG_TOKEN] = startTagInHeadNoScript;
_[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.END_TAG_TOKEN] = endTagInHeadNoScript;
_[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.EOF_TOKEN] = tokenInHeadNoScript;
Expand All @@ -142,7 +142,7 @@ _[AFTER_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] =
_[AFTER_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenAfterHead;
_[AFTER_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters;
_[AFTER_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment;
_[AFTER_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken;
_[AFTER_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = misplacedDoctype;
_[AFTER_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterHead;
_[AFTER_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagAfterHead;
_[AFTER_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenAfterHead;
Expand Down Expand Up @@ -1025,6 +1025,10 @@ function ignoreToken() {
//NOTE: do nothing =)
}

function misplacedDoctype(p) {
p._err(ERR.misplacedDoctype);
}

function appendComment(p, token) {
p._appendCommentNode(token, p.openElements.currentTmplContent || p.openElements.current);
}
Expand Down Expand Up @@ -1117,6 +1121,8 @@ function endTagBeforeHead(p, token) {

if (tn === $.HEAD || tn === $.BODY || tn === $.HTML || tn === $.BR)
tokenBeforeHead(p, token);
else
p._err(ERR.endTagWithoutMatchingOpenElement);
}

function tokenBeforeHead(p, token) {
Expand Down Expand Up @@ -1167,7 +1173,10 @@ function startTagInHead(p, token) {
p._pushTmplInsertionMode(IN_TEMPLATE_MODE);
}

else if (tn !== $.HEAD)
else if (tn === $.HEAD)
p._err(ERR.misplacedStartTagForHeadElement);

else
tokenInHead(p, token);
}

Expand All @@ -1182,13 +1191,25 @@ function endTagInHead(p, token) {
else if (tn === $.BODY || tn === $.BR || tn === $.HTML)
tokenInHead(p, token);

else if (tn === $.TEMPLATE && p.openElements.tmplCount > 0) {
p.openElements.generateImpliedEndTags();
p.openElements.popUntilTagNamePopped($.TEMPLATE);
p.activeFormattingElements.clearToLastMarker();
p._popTmplInsertionMode();
p._resetInsertionMode();
else if (tn === $.TEMPLATE) {
if (p.openElements.tmplCount > 0) {
p.openElements.generateImpliedEndTagsThoroughly();

if (p.openElements.currentTagName !== $.TEMPLATE)
p._err(ERR.closingOfElementWithOpenChildElements);

p.openElements.popUntilTagNamePopped($.TEMPLATE);
p.activeFormattingElements.clearToLastMarker();
p._popTmplInsertionMode();
p._resetInsertionMode();
}

else
p._err(ERR.endTagWithoutMatchingOpenElement);
}

else
p._err(ERR.endTagWithoutMatchingOpenElement);
}

function tokenInHead(p, token) {
Expand All @@ -1206,10 +1227,14 @@ function startTagInHeadNoScript(p, token) {
if (tn === $.HTML)
startTagInBody(p, token);

else if (tn === $.BASEFONT || tn === $.BGSOUND || tn === $.LINK || tn === $.META || tn === $.NOFRAMES || tn === $.STYLE)
else if (tn === $.BASEFONT || tn === $.BGSOUND || tn === $.HEAD || tn === $.LINK ||
tn === $.META || tn === $.NOFRAMES || tn === $.STYLE)
startTagInHead(p, token);

else if (tn !== $.HEAD && tn !== $.NOSCRIPT)
else if (tn === $.NOSCRIPT)
p._err(ERR.nestedNoscriptInHead);

else
tokenInHeadNoScript(p, token);
}

Expand All @@ -1223,9 +1248,17 @@ function endTagInHeadNoScript(p, token) {

else if (tn === $.BR)
tokenInHeadNoScript(p, token);

else
p._err(ERR.endTagWithoutMatchingOpenElement);
}

function tokenInHeadNoScript(p, token) {
var errCode = token.type === Tokenizer.EOF_TOKEN ?
ERR.openElementsLeftAfterEof :
ERR.disallowedContentInNoscriptInHead;

p._err(errCode);
p.openElements.pop();
p.insertionMode = IN_HEAD_MODE;
p._processToken(token);
Expand All @@ -1252,12 +1285,16 @@ function startTagAfterHead(p, token) {

else if (tn === $.BASE || tn === $.BASEFONT || tn === $.BGSOUND || tn === $.LINK || tn === $.META ||
tn === $.NOFRAMES || tn === $.SCRIPT || tn === $.STYLE || tn === $.TEMPLATE || tn === $.TITLE) {
p._err(ERR.abandonedHeadElementChild);
p.openElements.push(p.headElement);
startTagInHead(p, token);
p.openElements.remove(p.headElement);
}

else if (tn !== $.HEAD)
else if (tn === $.HEAD)
p._err(ERR.misplacedStartTagForHeadElement);

else
tokenAfterHead(p, token);
}

Expand All @@ -1269,6 +1306,9 @@ function endTagAfterHead(p, token) {

else if (tn === $.TEMPLATE)
endTagInHead(p, token);

else
p._err(ERR.endTagWithoutMatchingOpenElement);
}

function tokenAfterHead(p, token) {
Expand Down
33 changes: 33 additions & 0 deletions lib/parser/open_element_stack.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,34 @@ function isImpliedEndTagRequired(tn) {
return false;
}

function isImpliedEndTagRequiredThoroughly(tn) {
switch (tn.length) {
case 1:
return tn === $.P;

case 2:
return tn === $.RB || tn === $.RP || tn === $.RT || tn === $.DD ||
tn === $.DT || tn === $.LI || tn === $.TD || tn === $.TH || tn === $.TR;

case 3:
return tn === $.RTC;

case 5:
return tn === $.TBODY || tn === $.TFOOT || tn === $.THEAD;

case 6:
return tn === $.OPTION;

case 7:
return tn === $.CAPTION;

case 8:
return tn === $.OPTGROUP || tn === $.COLGROUP;
}

return false;
}

function isScopingElement(tn, ns) {
switch (tn.length) {
case 2:
Expand Down Expand Up @@ -389,6 +417,11 @@ OpenElementStack.prototype.generateImpliedEndTags = function () {
this.pop();
};

OpenElementStack.prototype.generateImpliedEndTagsThoroughly = function () {
while (isImpliedEndTagRequiredThoroughly(this.currentTagName))
this.pop();
};

OpenElementStack.prototype.generateImpliedEndTagsWithExclusion = function (exclusionTagName) {
while (isImpliedEndTagRequired(this.currentTagName) && this.currentTagName !== exclusionTagName)
this.pop();
Expand Down
2 changes: 2 additions & 0 deletions test/data/tree_construction/tests18.dat
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@
42: Bad start tag in “plaintext” in “head”.
54: End of file seen and there were open elements.
42: Unclosed element “plaintext”.
#new-errors
(1:32-1:43) disallowed-content-in-noscript
#document
| <!DOCTYPE html>
| <html>
Expand Down

0 comments on commit db23e29

Please sign in to comment.