fix #2215: lower regexp literals to new RegExp()

evanw · May 27, 2022 · 14d9de5 · 14d9de5
1 parent d189b2e
commit 14d9de5
Show file tree

Hide file tree

Showing 8 changed files with 339 additions and 5 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -29,6 +29,25 @@
     import k2 from "keep";
     ```
 
+* Avoid regular expression syntax errors in older browsers ([#2215](https://github.com/evanw/esbuild/issues/2215))
+
+    Previously esbuild always passed JavaScript regular expression literals through unmodified from the input to the output. This is undesirable when the regular expression uses newer features that the configured target environment doesn't support. For example, the `d` flag (i.e. the [match indices feature](https://v8.dev/features/regexp-match-indices)) is new in ES2022 and doesn't work in older browsers. If esbuild generated a regular expression literal containing the `d` flag, then older browsers would consider esbuild's output to be a syntax error and none of the code would run.
+
+    With this release, esbuild now detects when an unsupported feature is being used and converts the regular expression literal into a `new RegExp()` constructor instead. One consequence of this is that the syntax error is transformed into a run-time error, which allows the output code to run (and to potentially handle the run-time error). Another consequence of this is that it allows you to include a polyfill that overwrites the `RegExp` constructor in older browsers with one that supports modern features. Note that esbuild does not handle polyfills for you, so you will need to include a `RegExp` polyfill yourself if you want one.
+
+    ```js
+    // Original code
+    console.log(/b/d.exec('abc').indices)
+
+    // New output (with --target=chrome90)
+    console.log(/b/d.exec("abc").indices);
+
+    // New output (with --target=chrome89)
+    console.log(new RegExp("b", "d").exec("abc").indices);
+    ```
+
+    This is currently done transparently without a warning. If you would like to debug this transformation to see where in your code esbuild is transforming regular expression literals and why, you can pass `--log-level=debug` to esbuild and review the information present in esbuild's debug logs.
+
 * Add Opera to more internal feature compatibility tables ([#2247](https://github.com/evanw/esbuild/issues/2247), [#2252](https://github.com/evanw/esbuild/pull/2252))
 
     The internal compatibility tables that esbuild uses to determine which environments support which features are derived from multiple sources. Most of it is automatically derived from [these ECMAScript compatibility tables](https://kangax.github.io/compat-table/), but missing information is manually copied from [MDN](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/), GitHub PR comments, and various other websites. Version 0.14.35 of esbuild introduced Opera as a possible target environment which was automatically picked up by the compatibility table script, but the manually-copied information wasn't updated to include Opera. This release fixes this omission so Opera feature compatibility should now be accurate.

diff --git a/internal/bundler/bundler_lower_test.go b/internal/bundler/bundler_lower_test.go
@@ -2280,3 +2280,21 @@ func TestStaticClassBlockES2021(t *testing.T) {
 		},
 	})
 }
+
+func TestLowerRegExpNameCollision(t *testing.T) {
+	lower_suite.expectBundled(t, bundled{
+		files: map[string]string{
+			"/entry.js": `
+				export function foo(RegExp) {
+					return new RegExp(/./d, 'd')
+				}
+			`,
+		},
+		entryPaths: []string{"/entry.js"},
+		options: config.Options{
+			Mode:                  config.ModeBundle,
+			AbsOutputFile:         "/out.js",
+			UnsupportedJSFeatures: es(2021),
+		},
+	})
+}
diff --git a/internal/bundler/snapshots/snapshots_lower.txt b/internal/bundler/snapshots/snapshots_lower.txt
@@ -1650,6 +1650,17 @@ export {
   Foo
 };
 
+================================================================================
+TestLowerRegExpNameCollision
+---------- /out.js ----------
+// entry.js
+function foo(RegExp2) {
+  return new RegExp2(new RegExp(".", "d"), "d");
+}
+export {
+  foo
+};
+
 ================================================================================
 TestLowerStaticAsyncArrowSuperES2016
 ---------- /out.js ----------

diff --git a/internal/compat/js_table.go b/internal/compat/js_table.go
@@ -84,6 +84,12 @@ const (
 	ObjectRestSpread
 	OptionalCatchBinding
 	OptionalChain
+	RegExpDotAllFlag
+	RegExpLookbehindAssertions
+	RegExpMatchIndices
+	RegExpNamedCaptureGroups
+	RegExpStickyAndUnicodeFlags
+	RegExpUnicodePropertyEscapes
 	RestArgument
 	TemplateLiteral
 	TopLevelAwait
@@ -436,7 +442,7 @@ var jsTable = map[JSFeature]map[Engine][]versionRange{
 		Firefox: {{start: v{2, 0, 0}}},
 		IE:      {{start: v{9, 0, 0}}},
 		IOS:     {{start: v{6, 0, 0}}},
-		Node:    {{start: v{0, 4, 0}}},
+		Node:    {{start: v{0, 10, 0}}},
 		Opera:   {{start: v{10, 10, 0}}},
 		Safari:  {{start: v{3, 1, 0}}},
 	},
@@ -477,6 +483,63 @@ var jsTable = map[JSFeature]map[Engine][]versionRange{
 		Opera:   {{start: v{77, 0, 0}}},
 		Safari:  {{start: v{13, 1, 0}}},
 	},
+	RegExpDotAllFlag: {
+		Chrome:  {{start: v{62, 0, 0}}},
+		Edge:    {{start: v{79, 0, 0}}},
+		ES:      {{start: v{2018, 0, 0}}},
+		Firefox: {{start: v{78, 0, 0}}},
+		IOS:     {{start: v{11, 3, 0}}},
+		Node:    {{start: v{8, 10, 0}}},
+		Opera:   {{start: v{49, 0, 0}}},
+		Safari:  {{start: v{11, 1, 0}}},
+	},
+	RegExpLookbehindAssertions: {
+		Chrome:  {{start: v{62, 0, 0}}},
+		Edge:    {{start: v{79, 0, 0}}},
+		ES:      {{start: v{2018, 0, 0}}},
+		Firefox: {{start: v{78, 0, 0}}},
+		Node:    {{start: v{8, 10, 0}}},
+		Opera:   {{start: v{49, 0, 0}}},
+	},
+	RegExpMatchIndices: {
+		Chrome:  {{start: v{90, 0, 0}}},
+		Edge:    {{start: v{90, 0, 0}}},
+		ES:      {{start: v{2022, 0, 0}}},
+		Firefox: {{start: v{88, 0, 0}}},
+		IOS:     {{start: v{15, 0, 0}}},
+		Opera:   {{start: v{76, 0, 0}}},
+		Safari:  {{start: v{15, 0, 0}}},
+	},
+	RegExpNamedCaptureGroups: {
+		Chrome:  {{start: v{64, 0, 0}}},
+		Edge:    {{start: v{79, 0, 0}}},
+		ES:      {{start: v{2018, 0, 0}}},
+		Firefox: {{start: v{78, 0, 0}}},
+		IOS:     {{start: v{11, 3, 0}}},
+		Node:    {{start: v{10, 0, 0}}},
+		Opera:   {{start: v{51, 0, 0}}},
+		Safari:  {{start: v{11, 1, 0}}},
+	},
+	RegExpStickyAndUnicodeFlags: {
+		Chrome:  {{start: v{50, 0, 0}}},
+		Edge:    {{start: v{13, 0, 0}}},
+		ES:      {{start: v{2015, 0, 0}}},
+		Firefox: {{start: v{46, 0, 0}}},
+		IOS:     {{start: v{12, 0, 0}}},
+		Node:    {{start: v{6, 0, 0}}},
+		Opera:   {{start: v{37, 0, 0}}},
+		Safari:  {{start: v{12, 0, 0}}},
+	},
+	RegExpUnicodePropertyEscapes: {
+		Chrome:  {{start: v{64, 0, 0}}},
+		Edge:    {{start: v{79, 0, 0}}},
+		ES:      {{start: v{2018, 0, 0}}},
+		Firefox: {{start: v{78, 0, 0}}},
+		IOS:     {{start: v{11, 3, 0}}},
+		Node:    {{start: v{10, 0, 0}}},
+		Opera:   {{start: v{51, 0, 0}}},
+		Safari:  {{start: v{11, 1, 0}}},
+	},
 	RestArgument: {
 		Chrome:  {{start: v{47, 0, 0}}},
 		Edge:    {{start: v{12, 0, 0}}},

diff --git a/internal/js_lexer/js_lexer.go b/internal/js_lexer/js_lexer.go
@@ -2208,7 +2208,7 @@ func (lexer *Lexer) ScanRegExp() {
 			bits := uint32(0)
 			for IsIdentifierContinue(lexer.codePoint) {
 				switch lexer.codePoint {
-				case 'g', 'i', 'm', 's', 'u', 'y':
+				case 'd', 'g', 'i', 'm', 's', 'u', 'y':
 					bit := uint32(1) << uint32(lexer.codePoint-'a')
 					if (bit & bits) != 0 {
 						// Reject duplicate flags

diff --git a/internal/js_parser/js_parser.go b/internal/js_parser/js_parser.go
@@ -200,6 +200,7 @@ type parser struct {
 	moduleRef                js_ast.Ref
 	importMetaRef            js_ast.Ref
 	promiseRef               js_ast.Ref
+	regExpRef                js_ast.Ref
 	runtimePublicFieldImport js_ast.Ref
 	superCtorRef             js_ast.Ref
 
@@ -1510,6 +1511,14 @@ func (p *parser) makePromiseRef() js_ast.Ref {
 	return p.promiseRef
 }
 
+func (p *parser) makeRegExpRef() js_ast.Ref {
+	if p.regExpRef == js_ast.InvalidRef {
+		p.regExpRef = p.newSymbol(js_ast.SymbolUnbound, "RegExp")
+		p.moduleScope.Generated = append(p.moduleScope.Generated, p.regExpRef)
+	}
+	return p.regExpRef
+}
+
 // The name is temporarily stored in the ref until the scope traversal pass
 // happens, at which point a symbol will be generated and the ref will point
 // to the symbol instead.
@@ -11526,6 +11535,144 @@ func containsClosingScriptTag(text string) bool {
 	return false
 }
 
+func (p *parser) isUnsupportedRegularExpression(loc logger.Loc, value string) (pattern string, flags string, isUnsupported bool) {
+	var feature compat.JSFeature
+	var what string
+	var r logger.Range
+
+	end := strings.LastIndexByte(value, '/')
+	pattern = value[1:end]
+	flags = value[end+1:]
+	isUnicode := strings.IndexByte(flags, 'u') >= 0
+	parenDepth := 0
+	i := 0
+
+	// Do a simple scan for unsupported features assuming the regular expression
+	// is valid. This doesn't do a full validation of the regular expression
+	// because regular expression grammar is complicated. If it contains a syntax
+	// error that we don't catch, then we will just generate output code with a
+	// syntax error. Garbage in, garbage out.
+pattern:
+	for i < len(pattern) {
+		c := pattern[i]
+		i++
+
+		switch c {
+		case '[':
+		class:
+			for i < len(pattern) {
+				c := pattern[i]
+				i++
+
+				switch c {
+				case ']':
+					break class
+
+				case '\\':
+					i++ // Skip the escaped character
+				}
+				break
+			}
+
+		case '(':
+			tail := pattern[i:]
+
+			if strings.HasPrefix(tail, "?<=") || strings.HasPrefix(tail, "?<!") {
+				if p.options.unsupportedJSFeatures.Has(compat.RegExpLookbehindAssertions) {
+					feature = compat.RegExpLookbehindAssertions
+					what = "Lookbehind assertions in regular expressions are not available"
+					r = logger.Range{Loc: logger.Loc{Start: loc.Start + int32(i) + 1}, Len: 3}
+					isUnsupported = true
+					break pattern
+				}
+			} else if strings.HasPrefix(tail, "?<") {
+				if p.options.unsupportedJSFeatures.Has(compat.RegExpNamedCaptureGroups) {
+					if end := strings.IndexByte(tail, '>'); end >= 0 {
+						feature = compat.RegExpNamedCaptureGroups
+						what = "Named capture groups in regular expressions are not available"
+						r = logger.Range{Loc: logger.Loc{Start: loc.Start + int32(i) + 1}, Len: int32(end) + 1}
+						isUnsupported = true
+						break pattern
+					}
+				}
+			}
+
+			parenDepth++
+
+		case ')':
+			if parenDepth == 0 {
+				r := logger.Range{Loc: logger.Loc{Start: loc.Start + int32(i)}, Len: 1}
+				p.log.Add(logger.Error, &p.tracker, r, "Unexpected \")\" in regular expression")
+				return
+			}
+
+			parenDepth--
+
+		case '\\':
+			tail := pattern[i:]
+
+			if isUnicode && (strings.HasPrefix(tail, "p{") || strings.HasPrefix(tail, "P{")) {
+				if p.options.unsupportedJSFeatures.Has(compat.RegExpUnicodePropertyEscapes) {
+					if end := strings.IndexByte(tail, '}'); end >= 0 {
+						feature = compat.RegExpUnicodePropertyEscapes
+						what = "Unicode property escapes in regular expressions are not available"
+						r = logger.Range{Loc: logger.Loc{Start: loc.Start + int32(i)}, Len: int32(end) + 2}
+						isUnsupported = true
+						break pattern
+					}
+				}
+			}
+
+			i++ // Skip the escaped character
+		}
+	}
+
+	if !isUnsupported {
+		for i, c := range flags {
+			switch c {
+			case 'g', 'i', 'm':
+				continue // These are part of ES5 and are always supported
+
+			case 's':
+				if !p.options.unsupportedJSFeatures.Has(compat.RegExpDotAllFlag) {
+					continue // This is part of ES2018
+				}
+				feature = compat.RegExpDotAllFlag
+
+			case 'y', 'u':
+				if !p.options.unsupportedJSFeatures.Has(compat.RegExpStickyAndUnicodeFlags) {
+					continue // These are part of ES2018
+				}
+				feature = compat.RegExpStickyAndUnicodeFlags
+
+			case 'd':
+				if !p.options.unsupportedJSFeatures.Has(compat.RegExpMatchIndices) {
+					continue // This is part of ES2022
+				}
+				feature = compat.RegExpMatchIndices
+
+			default:
+				// Unknown flags are never supported
+			}
+
+			r = logger.Range{Loc: logger.Loc{Start: loc.Start + int32(end+1) + int32(i)}, Len: 1}
+			what = fmt.Sprintf("The regular expression flag \"%c\" is not available", c)
+			isUnsupported = true
+			break
+		}
+	}
+
+	if isUnsupported {
+		where, notes := p.prettyPrintTargetEnvironment(feature)
+		p.log.AddWithNotes(logger.Debug, &p.tracker, r, fmt.Sprintf("%s in %s", what, where), append(notes, logger.MsgData{
+			Text: "This regular expression literal has been converted to a \"new RegExp()\" constructor " +
+				"to avoid generating code with a syntax error. However, you will need to include a " +
+				"polyfill for \"RegExp\" for your code to have the correct behavior at run-time."}))
+	}
+
+	return
+}
+
 // This function takes "exprIn" as input from the caller and produces "exprOut"
 // for the caller to pass along extra data. This is mostly for optional chaining.
 func (p *parser) visitExprInOut(expr js_ast.Expr, in exprIn) (js_ast.Expr, exprOut) {
@@ -11534,9 +11681,29 @@ func (p *parser) visitExprInOut(expr js_ast.Expr, in exprIn) (js_ast.Expr, exprO
 	}
 
 	switch e := expr.Data.(type) {
-	case *js_ast.ENull, *js_ast.ESuper,
-		*js_ast.EBoolean, *js_ast.EBigInt,
-		*js_ast.ERegExp, *js_ast.EUndefined:
+	case *js_ast.ENull, *js_ast.ESuper, *js_ast.EBoolean, *js_ast.EBigInt, *js_ast.EUndefined:
+
+	case *js_ast.ERegExp:
+		// "/pattern/flags" => "new RegExp('pattern', 'flags')"
+		if pattern, flags, ok := p.isUnsupportedRegularExpression(expr.Loc, e.Value); ok {
+			args := []js_ast.Expr{{
+				Loc:  logger.Loc{Start: expr.Loc.Start + 1},
+				Data: &js_ast.EString{Value: helpers.StringToUTF16(pattern)},
+			}}
+			if flags != "" {
+				args = append(args, js_ast.Expr{
+					Loc:  logger.Loc{Start: expr.Loc.Start + int32(len(pattern)) + 2},
+					Data: &js_ast.EString{Value: helpers.StringToUTF16(flags)},
+				})
+			}
+			regExpRef := p.makeRegExpRef()
+			p.recordUsage(regExpRef)
+			return js_ast.Expr{Loc: expr.Loc, Data: &js_ast.ENew{
+				Target:        js_ast.Expr{Loc: expr.Loc, Data: &js_ast.EIdentifier{Ref: regExpRef}},
+				Args:          args,
+				CloseParenLoc: logger.Loc{Start: expr.Loc.Start + int32(len(e.Value))},
+			}}, exprOut{}
+		}
 
 	case *js_ast.ENewTarget:
 		if !p.fnOnlyDataVisit.isNewTargetAllowed {
@@ -14952,6 +15119,7 @@ func newParser(log logger.Log, source logger.Source, lexer js_lexer.Lexer, optio
 		options:                  *options,
 		runtimeImports:           make(map[string]js_ast.Ref),
 		promiseRef:               js_ast.InvalidRef,
+		regExpRef:                js_ast.InvalidRef,
 		afterArrowBodyLoc:        logger.Loc{Start: -1},
 		importMetaRef:            js_ast.InvalidRef,
 		runtimePublicFieldImport: js_ast.InvalidRef,