Skip to content

Commit

Permalink
Merge pull request #24 from HTMLParseErrorWG/review-fixes2
Browse files Browse the repository at this point in the history
Character error fixes
  • Loading branch information
inikulin authored May 31, 2017
2 parents 8eb950a + 5d2483f commit 8e457b9
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 14 deletions.
9 changes: 5 additions & 4 deletions lib/common/error_codes.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

module.exports = {
controlCharacterInInputStream: 'control-character-in-input-stream',
undefinedCharacterInInputStream: 'undefined-character-in-input-stream',
nonUnicodeCharacterInInputStream: 'non-unicode-character-in-input-stream',
noncharacterInInputStream: 'noncharacter-in-input-stream',
surrogateInInputStream: 'surrogate-in-input-stream',
nonVoidHtmlElementStartTagWithTrailingSolidus: 'non-void-html-element-start-tag-with-trailing-solidus',
endTagWithAttributes: 'end-tag-with-attributes',
endTagWithTrailingSolidus: 'end-tag-with-trailing-solidus',
Expand Down Expand Up @@ -42,9 +42,10 @@ module.exports = {
eofInCdata: 'eof-in-cdata',
absenceOfDigitsInNumericCharacterReference: 'absence-of-digits-in-numeric-character-reference',
nullCharacterReference: 'null-character-reference',
nonUnicodeCharacterReference: 'non-unicode-character-reference',
surrogateCharacterReference: 'surrogate-character-reference',
characterReferenceOutsideUnicodeRange: 'character-reference-outside-unicode-range',
controlCharacterReference: 'control-character-reference',
undefinedCharacterReference: 'undefined-character-reference',
noncharacterCharacterReference: 'noncharacter-character-reference',
missingWhitespaceBeforeDoctypeName: 'missing-whitespace-before-doctype-name',
missingDoctypeName: 'missing-doctype-name',
invalidCharacterSequenceAfterDoctypeName: 'invalid-character-sequence-after-doctype-name',
Expand Down
6 changes: 1 addition & 5 deletions lib/common/unicode.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ exports.CODE_POINT_SEQUENCES = {


//Surrogates
var isSurrogate = exports.isSurrogate = function (cp) {
exports.isSurrogate = function (cp) {
return cp >= 0xD800 && cp <= 0xDFFF;
};

Expand All @@ -77,7 +77,3 @@ exports.isControlCodePoint = function (cp) {
exports.isUndefinedCodePoint = function (cp) {
return cp >= 0xFDD0 && cp <= 0xFDEF || UNDEFINED_CODE_POINTS.indexOf(cp) > -1;
};

exports.isNonUnicodeCodePoint = function (cp) {
return isSurrogate(cp) || cp > 0x10FFFF;
};
11 changes: 8 additions & 3 deletions lib/tokenizer/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2515,13 +2515,18 @@ _[NUMERIC_CHARACTER_REFERENCE_END_STATE] = function numericCharacterReferenceEnd
this.charRefCode = $.REPLACEMENT_CHARACTER;
}

else if (unicode.isNonUnicodeCodePoint(this.charRefCode)) {
this._err(ERR.nonUnicodeCharacterReference);
else if (this.charRefCode > 0x10FFFF) {
this._err(ERR.characterReferenceOutsideUnicodeRange);
this.charRefCode = $.REPLACEMENT_CHARACTER;
}

else if (unicode.isSurrogate(this.charRefCode)) {
this._err(ERR.surrogateCharacterReference);
this.charRefCode = $.REPLACEMENT_CHARACTER;
}

else if (unicode.isUndefinedCodePoint(this.charRefCode))
this._err(ERR.undefinedCharacterReference);
this._err(ERR.noncharacterCharacterReference);

else if (unicode.isControlCodePoint(this.charRefCode) || this.charRefCode === $.CARRIAGE_RETURN) {
this._err(ERR.controlCharacterReference);
Expand Down
4 changes: 2 additions & 2 deletions lib/tokenizer/preprocessor.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Preprocessor.prototype._processSurrogate = function (cp) {
}

//NOTE: isolated surrogate
this._err(ERR.nonUnicodeCharacterInInputStream);
this._err(ERR.surrogateInInputStream);

return cp;
};
Expand Down Expand Up @@ -141,7 +141,7 @@ Preprocessor.prototype._checkForProblematicCharacters = function (cp) {
this._err(ERR.controlCharacterInInputStream);

else if (unicode.isUndefinedCodePoint(cp))
this._err(ERR.undefinedCharacterInInputStream);
this._err(ERR.noncharacterInInputStream);
};

Preprocessor.prototype.retreat = function () {
Expand Down

0 comments on commit 8e457b9

Please sign in to comment.