From ba0e634374d355c025f87009e7d61fc9dbfd67db Mon Sep 17 00:00:00 2001 From: Ivan Nikulin Date: Thu, 28 Sep 2017 14:52:45 +0100 Subject: [PATCH 1/7] Add before head insertion mode parse errors --- lib/common/error_codes.js | 4 +++- lib/parser/index.js | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/common/error_codes.js b/lib/common/error_codes.js index 7aeefc918..e5b1d0fa4 100644 --- a/lib/common/error_codes.js +++ b/lib/common/error_codes.js @@ -51,5 +51,7 @@ module.exports = { invalidCharacterSequenceAfterDoctypeName: 'invalid-character-sequence-after-doctype-name', duplicateAttribute: 'duplicate-attribute', nonConformingDoctype: 'non-conforming-doctype', - missingDoctype: 'missing-doctype' + missingDoctype: 'missing-doctype', + misplacedDoctype: 'misplaced-doctype', + endTagWithoutMatchingOpenElement: 'end-tag-without-matching-open-element' }; diff --git a/lib/parser/index.js b/lib/parser/index.js index b7be8d5d7..61490e329 100644 --- a/lib/parser/index.js +++ b/lib/parser/index.js @@ -112,7 +112,7 @@ _[BEFORE_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] = _[BEFORE_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenBeforeHead; _[BEFORE_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = ignoreToken; _[BEFORE_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[BEFORE_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[BEFORE_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = misplacedDoctype; _[BEFORE_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagBeforeHead; _[BEFORE_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagBeforeHead; _[BEFORE_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenBeforeHead; @@ -1025,6 +1025,10 @@ function ignoreToken() { //NOTE: do nothing =) } +function misplacedDoctype(p) { + p._err(ERR.misplacedDoctype); +} + function appendComment(p, token) { p._appendCommentNode(token, p.openElements.currentTmplContent || p.openElements.current); } @@ -1117,6 +1121,8 @@ function endTagBeforeHead(p, token) { if (tn === $.HEAD || tn === $.BODY || tn === $.HTML || tn === $.BR) tokenBeforeHead(p, token); + else + p._err(ERR.endTagWithoutMatchingOpenElement); } function tokenBeforeHead(p, token) { From 1d749fad516d3e96ffa0210bf7b2ef824e02b16c Mon Sep 17 00:00:00 2001 From: Ivan Nikulin Date: Fri, 29 Sep 2017 01:24:04 +0100 Subject: [PATCH 2/7] Add in head insertion mode errors --- lib/common/error_codes.js | 3 ++- lib/parser/index.js | 26 ++++++++++++++++++------- lib/parser/open_element_stack.js | 33 ++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 8 deletions(-) diff --git a/lib/common/error_codes.js b/lib/common/error_codes.js index e5b1d0fa4..a6f69d4bd 100644 --- a/lib/common/error_codes.js +++ b/lib/common/error_codes.js @@ -53,5 +53,6 @@ module.exports = { nonConformingDoctype: 'non-conforming-doctype', missingDoctype: 'missing-doctype', misplacedDoctype: 'misplaced-doctype', - endTagWithoutMatchingOpenElement: 'end-tag-without-matching-open-element' + endTagWithoutMatchingOpenElement: 'end-tag-without-matching-open-element', + closingOfElementWithOpenChildElements: 'closing-of-element-with-open-child-elements' }; diff --git a/lib/parser/index.js b/lib/parser/index.js index 61490e329..e138f813b 100644 --- a/lib/parser/index.js +++ b/lib/parser/index.js @@ -122,7 +122,7 @@ _[IN_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] = _[IN_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenInHead; _[IN_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; _[IN_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[IN_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = misplacedDoctype; _[IN_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagInHead; _[IN_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagInHead; _[IN_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenInHead; @@ -1188,13 +1188,25 @@ function endTagInHead(p, token) { else if (tn === $.BODY || tn === $.BR || tn === $.HTML) tokenInHead(p, token); - else if (tn === $.TEMPLATE && p.openElements.tmplCount > 0) { - p.openElements.generateImpliedEndTags(); - p.openElements.popUntilTagNamePopped($.TEMPLATE); - p.activeFormattingElements.clearToLastMarker(); - p._popTmplInsertionMode(); - p._resetInsertionMode(); + else if (tn === $.TEMPLATE) { + if (p.openElements.tmplCount > 0) { + p.openElements.generateImpliedEndTagsThoroughly(); + + if (p.openElements.currentTagName !== $.TEMPLATE) + p._err(ERR.closingOfElementWithOpenChildElements); + + p.openElements.popUntilTagNamePopped($.TEMPLATE); + p.activeFormattingElements.clearToLastMarker(); + p._popTmplInsertionMode(); + p._resetInsertionMode(); + } + + else + p._err(ERR.endTagWithoutMatchingOpenElement); } + + else + p._err(ERR.endTagWithoutMatchingOpenElement); } function tokenInHead(p, token) { diff --git a/lib/parser/open_element_stack.js b/lib/parser/open_element_stack.js index 0ba89f408..80958ec64 100644 --- a/lib/parser/open_element_stack.js +++ b/lib/parser/open_element_stack.js @@ -31,6 +31,34 @@ function isImpliedEndTagRequired(tn) { return false; } +function isImpliedEndTagRequiredThoroughly(tn) { + switch (tn.length) { + case 1: + return tn === $.P; + + case 2: + return tn === $.RB || tn === $.RP || tn === $.RT || tn === $.DD || + tn === $.DT || tn === $.LI || tn === $.TD || tn === $.TH || tn === $.TR; + + case 3: + return tn === $.RTC; + + case 5: + return tn === $.TBODY || tn === $.TFOOT || tn === $.THEAD; + + case 6: + return tn === $.OPTION; + + case 7: + return tn === $.CAPTION; + + case 8: + return tn === $.OPTGROUP || tn === $.COLGROUP; + } + + return false; +} + function isScopingElement(tn, ns) { switch (tn.length) { case 2: @@ -389,6 +417,11 @@ OpenElementStack.prototype.generateImpliedEndTags = function () { this.pop(); }; +OpenElementStack.prototype.generateImpliedEndTagsThoroughly = function () { + while (isImpliedEndTagRequiredThoroughly(this.currentTagName)) + this.pop(); +}; + OpenElementStack.prototype.generateImpliedEndTagsWithExclusion = function (exclusionTagName) { while (isImpliedEndTagRequired(this.currentTagName) && this.currentTagName !== exclusionTagName) this.pop(); From e0da303bce0fda0f7a196d6f3a3234a17ec71ff4 Mon Sep 17 00:00:00 2001 From: Ivan Nikulin Date: Sat, 30 Sep 2017 23:11:42 +0100 Subject: [PATCH 3/7] Add in head noscript insertion mode errors --- lib/common/error_codes.js | 4 +++- lib/extensions/location_info/tokenizer_mixin.js | 14 +++++++++++--- lib/parser/index.js | 10 +++++++++- test/data/tree_construction/tests18.dat | 2 ++ 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/lib/common/error_codes.js b/lib/common/error_codes.js index a6f69d4bd..c4dd51a8a 100644 --- a/lib/common/error_codes.js +++ b/lib/common/error_codes.js @@ -54,5 +54,7 @@ module.exports = { missingDoctype: 'missing-doctype', misplacedDoctype: 'misplaced-doctype', endTagWithoutMatchingOpenElement: 'end-tag-without-matching-open-element', - closingOfElementWithOpenChildElements: 'closing-of-element-with-open-child-elements' + closingOfElementWithOpenChildElements: 'closing-of-element-with-open-child-elements', + disallowedContentInNoscript: 'disallowed-content-in-noscript', + openElementsLeftAfterEof: 'open-elements-left-after-eof' }; diff --git a/lib/extensions/location_info/tokenizer_mixin.js b/lib/extensions/location_info/tokenizer_mixin.js index 09a1371a9..50c320334 100644 --- a/lib/extensions/location_info/tokenizer_mixin.js +++ b/lib/extensions/location_info/tokenizer_mixin.js @@ -99,9 +99,17 @@ LocationInfoTokenizerMixin.prototype._getOverriddenMethods = function (mxn, orig this.currentCharacterToken.location.endOffset = ctLoc.startOffset; } - ctLoc.endLine = mxn.posTracker.line; - ctLoc.endCol = mxn.posTracker.col + 1; - ctLoc.endOffset = mxn.posTracker.offset + 1; + if (this.currentToken.type === Tokenizer.EOF_TOKEN) { + ctLoc.endLine = ctLoc.startLine; + ctLoc.endCol = ctLoc.startCol; + ctLoc.endOffset = ctLoc.startOffset; + } + + else { + ctLoc.endLine = mxn.posTracker.line; + ctLoc.endCol = mxn.posTracker.col + 1; + ctLoc.endOffset = mxn.posTracker.offset + 1; + } orig._emitCurrentToken.call(this); }, diff --git a/lib/parser/index.js b/lib/parser/index.js index e138f813b..a440070dc 100644 --- a/lib/parser/index.js +++ b/lib/parser/index.js @@ -132,7 +132,7 @@ _[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.CHARACTER_TOKEN] = _[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenInHeadNoScript; _[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; _[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.DOCTYPE_TOKEN] = misplacedDoctype; _[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.START_TAG_TOKEN] = startTagInHeadNoScript; _[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.END_TAG_TOKEN] = endTagInHeadNoScript; _[IN_HEAD_NO_SCRIPT_MODE][Tokenizer.EOF_TOKEN] = tokenInHeadNoScript; @@ -1241,9 +1241,17 @@ function endTagInHeadNoScript(p, token) { else if (tn === $.BR) tokenInHeadNoScript(p, token); + + else + p._err(ERR.endTagWithoutMatchingOpenElement); } function tokenInHeadNoScript(p, token) { + var errCode = token.type === Tokenizer.EOF_TOKEN ? + ERR.openElementsLeftAfterEof : + ERR.disallowedContentInNoscript; + + p._err(errCode); p.openElements.pop(); p.insertionMode = IN_HEAD_MODE; p._processToken(token); diff --git a/test/data/tree_construction/tests18.dat b/test/data/tree_construction/tests18.dat index 3ce39fc6b..ca135a7bb 100644 --- a/test/data/tree_construction/tests18.dat +++ b/test/data/tree_construction/tests18.dat @@ -56,6 +56,8 @@ 42: Bad start tag in “plaintext” in “head”. 54: End of file seen and there were open elements. 42: Unclosed element “plaintext”. +#new-errors +(1:32-1:43) disallowed-content-in-noscript #document | | From 78e68d568c2138b45597c20336316a2efa5ae305 Mon Sep 17 00:00:00 2001 From: Ivan Nikulin Date: Sun, 1 Oct 2017 19:11:33 +0100 Subject: [PATCH 4/7] Add after head insertion mode errors --- lib/common/error_codes.js | 4 +++- lib/parser/index.js | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/lib/common/error_codes.js b/lib/common/error_codes.js index c4dd51a8a..27f05b82f 100644 --- a/lib/common/error_codes.js +++ b/lib/common/error_codes.js @@ -56,5 +56,7 @@ module.exports = { endTagWithoutMatchingOpenElement: 'end-tag-without-matching-open-element', closingOfElementWithOpenChildElements: 'closing-of-element-with-open-child-elements', disallowedContentInNoscript: 'disallowed-content-in-noscript', - openElementsLeftAfterEof: 'open-elements-left-after-eof' + openElementsLeftAfterEof: 'open-elements-left-after-eof', + abandonedHeadElementChild: 'abandoned-head-element-child', + misplacedStartTagForHeadElement: 'misplaced-start-tag-for-head-element' }; diff --git a/lib/parser/index.js b/lib/parser/index.js index a440070dc..56d05653b 100644 --- a/lib/parser/index.js +++ b/lib/parser/index.js @@ -142,7 +142,7 @@ _[AFTER_HEAD_MODE][Tokenizer.CHARACTER_TOKEN] = _[AFTER_HEAD_MODE][Tokenizer.NULL_CHARACTER_TOKEN] = tokenAfterHead; _[AFTER_HEAD_MODE][Tokenizer.WHITESPACE_CHARACTER_TOKEN] = insertCharacters; _[AFTER_HEAD_MODE][Tokenizer.COMMENT_TOKEN] = appendComment; -_[AFTER_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = ignoreToken; +_[AFTER_HEAD_MODE][Tokenizer.DOCTYPE_TOKEN] = misplacedDoctype; _[AFTER_HEAD_MODE][Tokenizer.START_TAG_TOKEN] = startTagAfterHead; _[AFTER_HEAD_MODE][Tokenizer.END_TAG_TOKEN] = endTagAfterHead; _[AFTER_HEAD_MODE][Tokenizer.EOF_TOKEN] = tokenAfterHead; @@ -1173,7 +1173,10 @@ function startTagInHead(p, token) { p._pushTmplInsertionMode(IN_TEMPLATE_MODE); } - else if (tn !== $.HEAD) + else if (tn === $.HEAD) + p._err(ERR.misplacedStartTagForHeadElement); + + else tokenInHead(p, token); } @@ -1278,12 +1281,16 @@ function startTagAfterHead(p, token) { else if (tn === $.BASE || tn === $.BASEFONT || tn === $.BGSOUND || tn === $.LINK || tn === $.META || tn === $.NOFRAMES || tn === $.SCRIPT || tn === $.STYLE || tn === $.TEMPLATE || tn === $.TITLE) { + p._err(ERR.abandonedHeadElementChild); p.openElements.push(p.headElement); startTagInHead(p, token); p.openElements.remove(p.headElement); } - else if (tn !== $.HEAD) + else if (tn === $.HEAD) + p._err(ERR.misplacedStartTagForHeadElement); + + else tokenAfterHead(p, token); } @@ -1295,6 +1302,9 @@ function endTagAfterHead(p, token) { else if (tn === $.TEMPLATE) endTagInHead(p, token); + + else + p._err(ERR.endTagWithoutMatchingOpenElement); } function tokenAfterHead(p, token) { From 4451b771543c11392e379bcaea8ec043673257e0 Mon Sep 17 00:00:00 2001 From: Ivan Nikulin Date: Thu, 9 Nov 2017 20:11:07 +0000 Subject: [PATCH 5/7] disallowed-content-in-noscript -> disallowed-content-in-noscript-in-head --- lib/common/error_codes.js | 2 +- lib/parser/index.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/common/error_codes.js b/lib/common/error_codes.js index 27f05b82f..65b01807e 100644 --- a/lib/common/error_codes.js +++ b/lib/common/error_codes.js @@ -55,7 +55,7 @@ module.exports = { misplacedDoctype: 'misplaced-doctype', endTagWithoutMatchingOpenElement: 'end-tag-without-matching-open-element', closingOfElementWithOpenChildElements: 'closing-of-element-with-open-child-elements', - disallowedContentInNoscript: 'disallowed-content-in-noscript', + disallowedContentInNoscriptInHead: 'disallowed-content-in-noscript-in-head', openElementsLeftAfterEof: 'open-elements-left-after-eof', abandonedHeadElementChild: 'abandoned-head-element-child', misplacedStartTagForHeadElement: 'misplaced-start-tag-for-head-element' diff --git a/lib/parser/index.js b/lib/parser/index.js index 56d05653b..69a420120 100644 --- a/lib/parser/index.js +++ b/lib/parser/index.js @@ -1252,7 +1252,7 @@ function endTagInHeadNoScript(p, token) { function tokenInHeadNoScript(p, token) { var errCode = token.type === Tokenizer.EOF_TOKEN ? ERR.openElementsLeftAfterEof : - ERR.disallowedContentInNoscript; + ERR.disallowedContentInNoscriptInHead; p._err(errCode); p.openElements.pop(); From 91dc070cbef76e142b58eeb099a2f02a4c4176c7 Mon Sep 17 00:00:00 2001 From: Ivan Nikulin Date: Thu, 9 Nov 2017 21:33:55 +0000 Subject: [PATCH 6/7] Process head in noscript in head using rules for head --- lib/parser/index.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/parser/index.js b/lib/parser/index.js index 69a420120..d550fb6ae 100644 --- a/lib/parser/index.js +++ b/lib/parser/index.js @@ -1227,10 +1227,11 @@ function startTagInHeadNoScript(p, token) { if (tn === $.HTML) startTagInBody(p, token); - else if (tn === $.BASEFONT || tn === $.BGSOUND || tn === $.LINK || tn === $.META || tn === $.NOFRAMES || tn === $.STYLE) + else if (tn === $.BASEFONT || tn === $.BGSOUND || tn === $.HEAD || tn === $.LINK || + tn === $.META || tn === $.NOFRAMES || tn === $.STYLE) startTagInHead(p, token); - else if (tn !== $.HEAD && tn !== $.NOSCRIPT) + else if (tn !== $.NOSCRIPT) tokenInHeadNoScript(p, token); } From a3e2c2bac16b7a9732c12433e525438f7bcd3811 Mon Sep 17 00:00:00 2001 From: Ivan Nikulin Date: Thu, 9 Nov 2017 21:47:45 +0000 Subject: [PATCH 7/7] nested-noscript-in-head error --- lib/common/error_codes.js | 3 ++- lib/parser/index.js | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/common/error_codes.js b/lib/common/error_codes.js index 65b01807e..c4e2defa0 100644 --- a/lib/common/error_codes.js +++ b/lib/common/error_codes.js @@ -58,5 +58,6 @@ module.exports = { disallowedContentInNoscriptInHead: 'disallowed-content-in-noscript-in-head', openElementsLeftAfterEof: 'open-elements-left-after-eof', abandonedHeadElementChild: 'abandoned-head-element-child', - misplacedStartTagForHeadElement: 'misplaced-start-tag-for-head-element' + misplacedStartTagForHeadElement: 'misplaced-start-tag-for-head-element', + nestedNoscriptInHead: 'nested-noscript-in-head' }; diff --git a/lib/parser/index.js b/lib/parser/index.js index d550fb6ae..bda2ca623 100644 --- a/lib/parser/index.js +++ b/lib/parser/index.js @@ -1231,7 +1231,10 @@ function startTagInHeadNoScript(p, token) { tn === $.META || tn === $.NOFRAMES || tn === $.STYLE) startTagInHead(p, token); - else if (tn !== $.NOSCRIPT) + else if (tn === $.NOSCRIPT) + p._err(ERR.nestedNoscriptInHead); + + else tokenInHeadNoScript(p, token); }