diff --git a/docs/options.md b/docs/options.md index 3f90a0139b..de00c9f903 100644 --- a/docs/options.md +++ b/docs/options.md @@ -50,6 +50,8 @@ You can provide an object of options as the last argument to [`katex.render` and - `trust`: `boolean` or `function` (default: `false`). If `false` (do not trust input), prevent any commands like `\includegraphics` that could enable adverse behavior, rendering them instead in `errorColor`. If `true` (trust input), allow all such commands. Provide a custom function `handler(context)` to customize behavior depending on the context (command, arguments e.g. a URL, etc.). A list of possible contexts: - `{command: "\\url", url, protocol}` + where `protocol` is a lowercased string like `"http"` or `"https"` + that appears before a colon, or `"_relative"` for relative URLs. - `{command: "\\href", url, protocol}` - `{command: "\\includegraphics", url, protocol}` - `{command: "\\htmlClass", class}` diff --git a/src/Settings.js b/src/Settings.js index 9c5336a3ba..304a7d1e6a 100644 --- a/src/Settings.js +++ b/src/Settings.js @@ -346,7 +346,11 @@ export default class Settings { */ isTrusted(context: AnyTrustContext): boolean { if (context.url && !context.protocol) { - context.protocol = utils.protocolFromUrl(context.url); + const protocol = utils.protocolFromUrl(context.url); + if (protocol == null) { + return false; + } + context.protocol = protocol; } const trust = typeof this.trust === "function" ? this.trust(context) diff --git a/src/utils.js b/src/utils.js index 534c0f58f5..dfb197cfa0 100644 --- a/src/utils.js +++ b/src/utils.js @@ -93,11 +93,30 @@ export const assert = function(value: ?T): T { /** * Return the protocol of a URL, or "_relative" if the URL does not specify a - * protocol (and thus is relative). + * protocol (and thus is relative), or `null` if URL has invalid protocol + * (so should be outright rejected). */ -export const protocolFromUrl = function(url: string): string { - const protocol = /^\s*([^\\/#]*?)(?::|�*58|�*3a)/i.exec(url); - return (protocol != null ? protocol[1] : "_relative"); +export const protocolFromUrl = function(url: string): string | null { + // Check for possible leading protocol. + // https://url.spec.whatwg.org/#url-parsing strips leading whitespace + // (U+20) or C0 control (U+00-U+1F) characters. + // eslint-disable-next-line no-control-regex + const protocol = /^[\x00-\x20]*([^\\/#?]*?)(:|�*58|�*3a|&colon)/i + .exec(url); + if (!protocol) { + return "_relative"; + } + // Reject weird colons + if (protocol[2] !== ":") { + return null; + } + // Reject invalid characters in scheme according to + // https://datatracker.ietf.org/doc/html/rfc3986#section-3.1 + if (!/^[a-zA-Z][a-zA-Z0-9+\-.]*$/.test(protocol[1])) { + return null; + } + // Lowercase the protocol + return protocol[1].toLowerCase(); }; export default { diff --git a/test/__snapshots__/katex-spec.js.snap b/test/__snapshots__/katex-spec.js.snap index 3546ca8560..0795a111eb 100755 --- a/test/__snapshots__/katex-spec.js.snap +++ b/test/__snapshots__/katex-spec.js.snap @@ -1468,7 +1468,50 @@ exports[`href and url commands should not affect spacing around 1`] = ` ] `; -exports[`href and url commands should not allow explicitly disallow protocols 1`] = ` +exports[`href and url commands should not allow explicitly disallowed protocols 1`] = ` +[ + { + "type": "color", + "body": [ + { + "type": "text", + "body": [ + { + "type": "textord", + "mode": "text", + "text": "\\\\" + }, + { + "type": "textord", + "mode": "text", + "text": "h" + }, + { + "type": "textord", + "mode": "text", + "text": "r" + }, + { + "type": "textord", + "mode": "text", + "text": "e" + }, + { + "type": "textord", + "mode": "text", + "text": "f" + } + ], + "mode": "math" + } + ], + "color": "#cc0000", + "mode": "math" + } +] +`; + +exports[`href and url commands should not allow explicitly uppercased disallowed protocols 1`] = ` [ { "type": "color", diff --git a/test/katex-spec.js b/test/katex-spec.js index 89cb389a2b..438be76c8e 100644 --- a/test/katex-spec.js +++ b/test/katex-spec.js @@ -3005,13 +3005,46 @@ describe("href and url commands", function() { expect(parsed).toMatchSnapshot(); }); - it("should not allow explicitly disallow protocols", () => { + it("should not allow explicitly disallowed protocols", () => { const parsed = getParsed( "\\href{javascript:alert('x')}{foo}", new Settings({trust: context => context.protocol !== "javascript"}), ); expect(parsed).toMatchSnapshot(); }); + + it("should not allow explicitly uppercased disallowed protocols", () => { + const parsed = getParsed( + "\\href{JavaScript:alert('x')}{foo}", + new Settings({trust: context => context.protocol !== "javascript"}), + ); + expect(parsed).toMatchSnapshot(); + }); + + function getProtocolViaTrust(url) { + let protocol; + getParsed(`\\url{${url}}`, new Settings({ + trust: context => protocol = context.protocol, + })); + return protocol; + } + + it("should get protocols correctly", () => { + expect(getProtocolViaTrust("foo")).toBe("_relative"); + expect(getProtocolViaTrust("Foo:")).toBe("foo"); + expect(getProtocolViaTrust("Foo:bar")).toBe("foo"); + expect(getProtocolViaTrust("JavaScript:")).toBe("javascript"); + expect(getProtocolViaTrust("JavaScript:code")).toBe("javascript"); + expect(getProtocolViaTrust("!:")).toBeUndefined(); + expect(getProtocolViaTrust("foo:")).toBeUndefined(); + expect(getProtocolViaTrust("?query=string&colon=")).toBe("_relative"); + expect(getProtocolViaTrust("#query=string&colon=")).toBe("_relative"); + expect(getProtocolViaTrust("dir/file&colon")).toBe("_relative"); + expect(getProtocolViaTrust("//foo")).toBe("_relative"); + expect(getProtocolViaTrust("://foo")).toBeUndefined(); + expect(getProtocolViaTrust(" \t http://")).toBe("http"); + expect(getProtocolViaTrust(" \t http://foo")).toBe("http"); + }); }); describe("A raw text parser", function() {