Skip to content

Commit

Permalink
fix #2215: lower regexp literals to new RegExp()
Browse files Browse the repository at this point in the history
  • Loading branch information
evanw committed May 27, 2022
1 parent d189b2e commit 14d9de5
Show file tree
Hide file tree
Showing 8 changed files with 339 additions and 5 deletions.
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,25 @@
import k2 from "keep";
```

* Avoid regular expression syntax errors in older browsers ([#2215](https://github.com/evanw/esbuild/issues/2215))

Previously esbuild always passed JavaScript regular expression literals through unmodified from the input to the output. This is undesirable when the regular expression uses newer features that the configured target environment doesn't support. For example, the `d` flag (i.e. the [match indices feature](https://v8.dev/features/regexp-match-indices)) is new in ES2022 and doesn't work in older browsers. If esbuild generated a regular expression literal containing the `d` flag, then older browsers would consider esbuild's output to be a syntax error and none of the code would run.

With this release, esbuild now detects when an unsupported feature is being used and converts the regular expression literal into a `new RegExp()` constructor instead. One consequence of this is that the syntax error is transformed into a run-time error, which allows the output code to run (and to potentially handle the run-time error). Another consequence of this is that it allows you to include a polyfill that overwrites the `RegExp` constructor in older browsers with one that supports modern features. Note that esbuild does not handle polyfills for you, so you will need to include a `RegExp` polyfill yourself if you want one.

```js
// Original code
console.log(/b/d.exec('abc').indices)

// New output (with --target=chrome90)
console.log(/b/d.exec("abc").indices);

// New output (with --target=chrome89)
console.log(new RegExp("b", "d").exec("abc").indices);
```

This is currently done transparently without a warning. If you would like to debug this transformation to see where in your code esbuild is transforming regular expression literals and why, you can pass `--log-level=debug` to esbuild and review the information present in esbuild's debug logs.

* Add Opera to more internal feature compatibility tables ([#2247](https://github.com/evanw/esbuild/issues/2247), [#2252](https://github.com/evanw/esbuild/pull/2252))

The internal compatibility tables that esbuild uses to determine which environments support which features are derived from multiple sources. Most of it is automatically derived from [these ECMAScript compatibility tables](https://kangax.github.io/compat-table/), but missing information is manually copied from [MDN](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/), GitHub PR comments, and various other websites. Version 0.14.35 of esbuild introduced Opera as a possible target environment which was automatically picked up by the compatibility table script, but the manually-copied information wasn't updated to include Opera. This release fixes this omission so Opera feature compatibility should now be accurate.
Expand Down
18 changes: 18 additions & 0 deletions internal/bundler/bundler_lower_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2280,3 +2280,21 @@ func TestStaticClassBlockES2021(t *testing.T) {
},
})
}

func TestLowerRegExpNameCollision(t *testing.T) {
lower_suite.expectBundled(t, bundled{
files: map[string]string{
"/entry.js": `
export function foo(RegExp) {
return new RegExp(/./d, 'd')
}
`,
},
entryPaths: []string{"/entry.js"},
options: config.Options{
Mode: config.ModeBundle,
AbsOutputFile: "/out.js",
UnsupportedJSFeatures: es(2021),
},
})
}
11 changes: 11 additions & 0 deletions internal/bundler/snapshots/snapshots_lower.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1650,6 +1650,17 @@ export {
Foo
};

================================================================================
TestLowerRegExpNameCollision
---------- /out.js ----------
// entry.js
function foo(RegExp2) {
return new RegExp2(new RegExp(".", "d"), "d");
}
export {
foo
};

================================================================================
TestLowerStaticAsyncArrowSuperES2016
---------- /out.js ----------
Expand Down
65 changes: 64 additions & 1 deletion internal/compat/js_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ const (
ObjectRestSpread
OptionalCatchBinding
OptionalChain
RegExpDotAllFlag
RegExpLookbehindAssertions
RegExpMatchIndices
RegExpNamedCaptureGroups
RegExpStickyAndUnicodeFlags
RegExpUnicodePropertyEscapes
RestArgument
TemplateLiteral
TopLevelAwait
Expand Down Expand Up @@ -436,7 +442,7 @@ var jsTable = map[JSFeature]map[Engine][]versionRange{
Firefox: {{start: v{2, 0, 0}}},
IE: {{start: v{9, 0, 0}}},
IOS: {{start: v{6, 0, 0}}},
Node: {{start: v{0, 4, 0}}},
Node: {{start: v{0, 10, 0}}},
Opera: {{start: v{10, 10, 0}}},
Safari: {{start: v{3, 1, 0}}},
},
Expand Down Expand Up @@ -477,6 +483,63 @@ var jsTable = map[JSFeature]map[Engine][]versionRange{
Opera: {{start: v{77, 0, 0}}},
Safari: {{start: v{13, 1, 0}}},
},
RegExpDotAllFlag: {
Chrome: {{start: v{62, 0, 0}}},
Edge: {{start: v{79, 0, 0}}},
ES: {{start: v{2018, 0, 0}}},
Firefox: {{start: v{78, 0, 0}}},
IOS: {{start: v{11, 3, 0}}},
Node: {{start: v{8, 10, 0}}},
Opera: {{start: v{49, 0, 0}}},
Safari: {{start: v{11, 1, 0}}},
},
RegExpLookbehindAssertions: {
Chrome: {{start: v{62, 0, 0}}},
Edge: {{start: v{79, 0, 0}}},
ES: {{start: v{2018, 0, 0}}},
Firefox: {{start: v{78, 0, 0}}},
Node: {{start: v{8, 10, 0}}},
Opera: {{start: v{49, 0, 0}}},
},
RegExpMatchIndices: {
Chrome: {{start: v{90, 0, 0}}},
Edge: {{start: v{90, 0, 0}}},
ES: {{start: v{2022, 0, 0}}},
Firefox: {{start: v{88, 0, 0}}},
IOS: {{start: v{15, 0, 0}}},
Opera: {{start: v{76, 0, 0}}},
Safari: {{start: v{15, 0, 0}}},
},
RegExpNamedCaptureGroups: {
Chrome: {{start: v{64, 0, 0}}},
Edge: {{start: v{79, 0, 0}}},
ES: {{start: v{2018, 0, 0}}},
Firefox: {{start: v{78, 0, 0}}},
IOS: {{start: v{11, 3, 0}}},
Node: {{start: v{10, 0, 0}}},
Opera: {{start: v{51, 0, 0}}},
Safari: {{start: v{11, 1, 0}}},
},
RegExpStickyAndUnicodeFlags: {
Chrome: {{start: v{50, 0, 0}}},
Edge: {{start: v{13, 0, 0}}},
ES: {{start: v{2015, 0, 0}}},
Firefox: {{start: v{46, 0, 0}}},
IOS: {{start: v{12, 0, 0}}},
Node: {{start: v{6, 0, 0}}},
Opera: {{start: v{37, 0, 0}}},
Safari: {{start: v{12, 0, 0}}},
},
RegExpUnicodePropertyEscapes: {
Chrome: {{start: v{64, 0, 0}}},
Edge: {{start: v{79, 0, 0}}},
ES: {{start: v{2018, 0, 0}}},
Firefox: {{start: v{78, 0, 0}}},
IOS: {{start: v{11, 3, 0}}},
Node: {{start: v{10, 0, 0}}},
Opera: {{start: v{51, 0, 0}}},
Safari: {{start: v{11, 1, 0}}},
},
RestArgument: {
Chrome: {{start: v{47, 0, 0}}},
Edge: {{start: v{12, 0, 0}}},
Expand Down
2 changes: 1 addition & 1 deletion internal/js_lexer/js_lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2208,7 +2208,7 @@ func (lexer *Lexer) ScanRegExp() {
bits := uint32(0)
for IsIdentifierContinue(lexer.codePoint) {
switch lexer.codePoint {
case 'g', 'i', 'm', 's', 'u', 'y':
case 'd', 'g', 'i', 'm', 's', 'u', 'y':
bit := uint32(1) << uint32(lexer.codePoint-'a')
if (bit & bits) != 0 {
// Reject duplicate flags
Expand Down
174 changes: 171 additions & 3 deletions internal/js_parser/js_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ type parser struct {
moduleRef js_ast.Ref
importMetaRef js_ast.Ref
promiseRef js_ast.Ref
regExpRef js_ast.Ref
runtimePublicFieldImport js_ast.Ref
superCtorRef js_ast.Ref

Expand Down Expand Up @@ -1510,6 +1511,14 @@ func (p *parser) makePromiseRef() js_ast.Ref {
return p.promiseRef
}

func (p *parser) makeRegExpRef() js_ast.Ref {
if p.regExpRef == js_ast.InvalidRef {
p.regExpRef = p.newSymbol(js_ast.SymbolUnbound, "RegExp")
p.moduleScope.Generated = append(p.moduleScope.Generated, p.regExpRef)
}
return p.regExpRef
}

// The name is temporarily stored in the ref until the scope traversal pass
// happens, at which point a symbol will be generated and the ref will point
// to the symbol instead.
Expand Down Expand Up @@ -11526,6 +11535,144 @@ func containsClosingScriptTag(text string) bool {
return false
}

func (p *parser) isUnsupportedRegularExpression(loc logger.Loc, value string) (pattern string, flags string, isUnsupported bool) {
var feature compat.JSFeature
var what string
var r logger.Range

end := strings.LastIndexByte(value, '/')
pattern = value[1:end]
flags = value[end+1:]
isUnicode := strings.IndexByte(flags, 'u') >= 0
parenDepth := 0
i := 0

// Do a simple scan for unsupported features assuming the regular expression
// is valid. This doesn't do a full validation of the regular expression
// because regular expression grammar is complicated. If it contains a syntax
// error that we don't catch, then we will just generate output code with a
// syntax error. Garbage in, garbage out.
pattern:
for i < len(pattern) {
c := pattern[i]
i++

switch c {
case '[':
class:
for i < len(pattern) {
c := pattern[i]
i++

switch c {
case ']':
break class

case '\\':
i++ // Skip the escaped character
}
break
}

case '(':
tail := pattern[i:]

if strings.HasPrefix(tail, "?<=") || strings.HasPrefix(tail, "?<!") {
if p.options.unsupportedJSFeatures.Has(compat.RegExpLookbehindAssertions) {
feature = compat.RegExpLookbehindAssertions
what = "Lookbehind assertions in regular expressions are not available"
r = logger.Range{Loc: logger.Loc{Start: loc.Start + int32(i) + 1}, Len: 3}
isUnsupported = true
break pattern
}
} else if strings.HasPrefix(tail, "?<") {
if p.options.unsupportedJSFeatures.Has(compat.RegExpNamedCaptureGroups) {
if end := strings.IndexByte(tail, '>'); end >= 0 {
feature = compat.RegExpNamedCaptureGroups
what = "Named capture groups in regular expressions are not available"
r = logger.Range{Loc: logger.Loc{Start: loc.Start + int32(i) + 1}, Len: int32(end) + 1}
isUnsupported = true
break pattern
}
}
}

parenDepth++

case ')':
if parenDepth == 0 {
r := logger.Range{Loc: logger.Loc{Start: loc.Start + int32(i)}, Len: 1}
p.log.Add(logger.Error, &p.tracker, r, "Unexpected \")\" in regular expression")
return
}

parenDepth--

case '\\':
tail := pattern[i:]

if isUnicode && (strings.HasPrefix(tail, "p{") || strings.HasPrefix(tail, "P{")) {
if p.options.unsupportedJSFeatures.Has(compat.RegExpUnicodePropertyEscapes) {
if end := strings.IndexByte(tail, '}'); end >= 0 {
feature = compat.RegExpUnicodePropertyEscapes
what = "Unicode property escapes in regular expressions are not available"
r = logger.Range{Loc: logger.Loc{Start: loc.Start + int32(i)}, Len: int32(end) + 2}
isUnsupported = true
break pattern
}
}
}

i++ // Skip the escaped character
}
}

if !isUnsupported {
for i, c := range flags {
switch c {
case 'g', 'i', 'm':
continue // These are part of ES5 and are always supported

case 's':
if !p.options.unsupportedJSFeatures.Has(compat.RegExpDotAllFlag) {
continue // This is part of ES2018
}
feature = compat.RegExpDotAllFlag

case 'y', 'u':
if !p.options.unsupportedJSFeatures.Has(compat.RegExpStickyAndUnicodeFlags) {
continue // These are part of ES2018
}
feature = compat.RegExpStickyAndUnicodeFlags

case 'd':
if !p.options.unsupportedJSFeatures.Has(compat.RegExpMatchIndices) {
continue // This is part of ES2022
}
feature = compat.RegExpMatchIndices

default:
// Unknown flags are never supported
}

r = logger.Range{Loc: logger.Loc{Start: loc.Start + int32(end+1) + int32(i)}, Len: 1}
what = fmt.Sprintf("The regular expression flag \"%c\" is not available", c)
isUnsupported = true
break
}
}

if isUnsupported {
where, notes := p.prettyPrintTargetEnvironment(feature)
p.log.AddWithNotes(logger.Debug, &p.tracker, r, fmt.Sprintf("%s in %s", what, where), append(notes, logger.MsgData{
Text: "This regular expression literal has been converted to a \"new RegExp()\" constructor " +
"to avoid generating code with a syntax error. However, you will need to include a " +
"polyfill for \"RegExp\" for your code to have the correct behavior at run-time."}))
}

return
}

// This function takes "exprIn" as input from the caller and produces "exprOut"
// for the caller to pass along extra data. This is mostly for optional chaining.
func (p *parser) visitExprInOut(expr js_ast.Expr, in exprIn) (js_ast.Expr, exprOut) {
Expand All @@ -11534,9 +11681,29 @@ func (p *parser) visitExprInOut(expr js_ast.Expr, in exprIn) (js_ast.Expr, exprO
}

switch e := expr.Data.(type) {
case *js_ast.ENull, *js_ast.ESuper,
*js_ast.EBoolean, *js_ast.EBigInt,
*js_ast.ERegExp, *js_ast.EUndefined:
case *js_ast.ENull, *js_ast.ESuper, *js_ast.EBoolean, *js_ast.EBigInt, *js_ast.EUndefined:

case *js_ast.ERegExp:
// "/pattern/flags" => "new RegExp('pattern', 'flags')"
if pattern, flags, ok := p.isUnsupportedRegularExpression(expr.Loc, e.Value); ok {
args := []js_ast.Expr{{
Loc: logger.Loc{Start: expr.Loc.Start + 1},
Data: &js_ast.EString{Value: helpers.StringToUTF16(pattern)},
}}
if flags != "" {
args = append(args, js_ast.Expr{
Loc: logger.Loc{Start: expr.Loc.Start + int32(len(pattern)) + 2},
Data: &js_ast.EString{Value: helpers.StringToUTF16(flags)},
})
}
regExpRef := p.makeRegExpRef()
p.recordUsage(regExpRef)
return js_ast.Expr{Loc: expr.Loc, Data: &js_ast.ENew{
Target: js_ast.Expr{Loc: expr.Loc, Data: &js_ast.EIdentifier{Ref: regExpRef}},
Args: args,
CloseParenLoc: logger.Loc{Start: expr.Loc.Start + int32(len(e.Value))},
}}, exprOut{}
}

case *js_ast.ENewTarget:
if !p.fnOnlyDataVisit.isNewTargetAllowed {
Expand Down Expand Up @@ -14952,6 +15119,7 @@ func newParser(log logger.Log, source logger.Source, lexer js_lexer.Lexer, optio
options: *options,
runtimeImports: make(map[string]js_ast.Ref),
promiseRef: js_ast.InvalidRef,
regExpRef: js_ast.InvalidRef,
afterArrowBodyLoc: logger.Loc{Start: -1},
importMetaRef: js_ast.InvalidRef,
runtimePublicFieldImport: js_ast.InvalidRef,
Expand Down
Loading

0 comments on commit 14d9de5

Please sign in to comment.