-
Notifications
You must be signed in to change notification settings - Fork 30k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
3x faster setImmediate #6436
3x faster setImmediate #6436
Changes from all commits
44243ad
d005b81
ad3bcb8
1f82a29
49b2611
c12ba47
6dcbd83
197aac2
4b54cd7
8179d32
77e0443
0e75f1f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
'use strict'; | ||
var common = require('../common.js'); | ||
|
||
var bench = common.createBenchmark(main, { | ||
thousands: [2000], | ||
type: ['depth', 'depth1', 'breadth', 'breadth1', 'breadth4', 'clear'] | ||
}); | ||
|
||
function main(conf) { | ||
var N = +conf.thousands * 1e3; | ||
switch (conf.type) { | ||
case 'depth': | ||
depth(N); | ||
break; | ||
case 'depth1': | ||
depth1(N); | ||
break; | ||
case 'breadth': | ||
breadth(N); | ||
break; | ||
case 'breadth1': | ||
breadth1(N); | ||
break; | ||
case 'breadth4': | ||
breadth4(N); | ||
break; | ||
case 'clear': | ||
clear(N); | ||
break; | ||
} | ||
} | ||
|
||
// setImmediate tail recursion, 0 arguments | ||
function depth(N) { | ||
var n = 0; | ||
bench.start(); | ||
setImmediate(cb); | ||
function cb() { | ||
n++; | ||
if (n === N) | ||
bench.end(N / 1e3); | ||
else | ||
setImmediate(cb); | ||
} | ||
} | ||
|
||
// setImmediate tail recursion, 1 argument | ||
function depth1(N) { | ||
var n = 0; | ||
bench.start(); | ||
setImmediate(cb, 1); | ||
function cb(a1) { | ||
n++; | ||
if (n === N) | ||
bench.end(N / 1e3); | ||
else | ||
setImmediate(cb, 1); | ||
} | ||
} | ||
|
||
// concurrent setImmediate, 0 arguments | ||
function breadth(N) { | ||
var n = 0; | ||
bench.start(); | ||
function cb() { | ||
n++; | ||
if (n === N) | ||
bench.end(N / 1e3); | ||
} | ||
for (var i = 0; i < N; i++) { | ||
setImmediate(cb); | ||
} | ||
} | ||
|
||
// concurrent setImmediate, 1 argument | ||
function breadth1(N) { | ||
var n = 0; | ||
bench.start(); | ||
function cb(a1) { | ||
n++; | ||
if (n === N) | ||
bench.end(N / 1e3); | ||
} | ||
for (var i = 0; i < N; i++) { | ||
setImmediate(cb, 1); | ||
} | ||
} | ||
|
||
// concurrent setImmediate, 4 arguments | ||
function breadth4(N) { | ||
var n = 0; | ||
bench.start(); | ||
function cb(a1, a2, a3, a4) { | ||
n++; | ||
if (n === N) | ||
bench.end(N / 1e3); | ||
} | ||
for (var i = 0; i < N; i++) { | ||
setImmediate(cb, 1, 2, 3, 4); | ||
} | ||
} | ||
|
||
function clear(N) { | ||
bench.start(); | ||
function cb(a1) { | ||
if (a1 === 2) | ||
bench.end(N / 1e3); | ||
} | ||
for (var i = 0; i < N; i++) { | ||
clearImmediate(setImmediate(cb, 1)); | ||
} | ||
setImmediate(cb, 2); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -502,24 +502,26 @@ Timeout.prototype.close = function() { | |
}; | ||
|
||
|
||
var immediateQueue = {}; | ||
L.init(immediateQueue); | ||
var immediateQueue = L.create(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't really understand why this change is needed or is helpful but I guess it does clean things up a little. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. prevents a hidden class change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. at top-level it was changed to match processImmediate (and it cleans things up a bit). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I bet the object ends up being access-optimized after enough runs anyway. Not to mention we can make objects access-optimized explicitly. That said - this change improves style anyway and is better coding. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. probably, though depends on how many immediates are queued in an event loop cycle. Out of curiosity, what are the ways of creating access-optimized objects? I know about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @andrasq conveniently, here is a StackOverflow answer I wrote about a technique petkaantonov used in bluebird (with making an object as a prototype of a function). This also works with Object.create so I guess that's another one.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to be explicit - this specific change LGTM, even if it's not faster but it probably is given the old code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that's a great writeup, and a handy reference, thanks! |
||
|
||
|
||
function processImmediate() { | ||
var queue = immediateQueue; | ||
const queue = immediateQueue; | ||
var domain, immediate; | ||
|
||
immediateQueue = {}; | ||
L.init(immediateQueue); | ||
immediateQueue = L.create(); | ||
|
||
while (L.isEmpty(queue) === false) { | ||
immediate = L.shift(queue); | ||
domain = immediate.domain; | ||
|
||
if (!immediate._onImmediate) | ||
continue; | ||
|
||
if (domain) | ||
domain.enter(); | ||
|
||
immediate._callback = immediate._onImmediate; | ||
tryOnImmediate(immediate, queue); | ||
|
||
if (domain) | ||
|
@@ -540,7 +542,8 @@ function processImmediate() { | |
function tryOnImmediate(immediate, queue) { | ||
var threw = true; | ||
try { | ||
immediate._onImmediate(); | ||
// make the actual call outside the try/catch to allow it to be optimized | ||
runCallback(immediate); | ||
threw = false; | ||
} finally { | ||
if (threw && !L.isEmpty(queue)) { | ||
|
@@ -554,67 +557,77 @@ function tryOnImmediate(immediate, queue) { | |
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a breaking change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, nvm, #6206 has not landed yet There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this would be breaking if #6206 had landed, as it doesn't get rid of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That code changed since the comment was left here. |
||
|
||
function runCallback(timer) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we sure we don't do this optimization anywhere else in the code more generically? It sounds like something that would go in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I use this construct in other projects, and found it hard to make generic; That being said, I agree, an optimized call invoker is a nice utility to have. |
||
const argv = timer._argv; | ||
const argc = argv ? argv.length : 0; | ||
switch (argc) { | ||
// fast-path callbacks with 0-3 arguments | ||
case 0: | ||
return timer._callback(); | ||
case 1: | ||
return timer._callback(argv[0]); | ||
case 2: | ||
return timer._callback(argv[0], argv[1]); | ||
case 3: | ||
return timer._callback(argv[0], argv[1], argv[2]); | ||
// more than 3 arguments run slower with .apply | ||
default: | ||
return timer._callback.apply(timer, argv); | ||
} | ||
} | ||
|
||
function Immediate() { } | ||
|
||
Immediate.prototype.domain = undefined; | ||
Immediate.prototype._onImmediate = undefined; | ||
Immediate.prototype._idleNext = undefined; | ||
Immediate.prototype._idlePrev = undefined; | ||
|
||
function Immediate() { | ||
// assigning the callback here can cause optimize/deoptimize thrashing | ||
// so have caller annotate the object (node v6.0.0, v8 5.0.71.35) | ||
this._idleNext = null; | ||
this._idlePrev = null; | ||
this._callback = null; | ||
this._argv = null; | ||
this._onImmediate = null; | ||
this.domain = process.domain; | ||
} | ||
|
||
exports.setImmediate = function(callback, arg1, arg2, arg3) { | ||
if (typeof callback !== 'function') { | ||
throw new TypeError('"callback" argument must be a function'); | ||
} | ||
|
||
var i, args; | ||
var len = arguments.length; | ||
var immediate = new Immediate(); | ||
|
||
L.init(immediate); | ||
|
||
switch (len) { | ||
switch (arguments.length) { | ||
// fast cases | ||
case 0: | ||
case 1: | ||
immediate._onImmediate = callback; | ||
break; | ||
case 2: | ||
immediate._onImmediate = function() { | ||
callback.call(immediate, arg1); | ||
}; | ||
args = [arg1]; | ||
break; | ||
case 3: | ||
immediate._onImmediate = function() { | ||
callback.call(immediate, arg1, arg2); | ||
}; | ||
args = [arg1, arg2]; | ||
break; | ||
case 4: | ||
immediate._onImmediate = function() { | ||
callback.call(immediate, arg1, arg2, arg3); | ||
}; | ||
args = [arg1, arg2, arg3]; | ||
break; | ||
// slow case | ||
default: | ||
args = new Array(len - 1); | ||
for (i = 1; i < len; i++) | ||
args = [arg1, arg2, arg3]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not for places where performance matters; the Array.of() function call is 40x slower. Array.of is a native function, and native (C++) functions are often slower than v8 javascript. The speed limit to native functions is the js - C++ divide; crossing from js into C++ is slow, |
||
for (i = 4; i < arguments.length; i++) | ||
// extend array dynamically, makes .apply run much faster in v6.0.0 | ||
args[i - 1] = arguments[i]; | ||
|
||
immediate._onImmediate = function() { | ||
callback.apply(immediate, args); | ||
}; | ||
break; | ||
} | ||
// declaring it `const immediate` causes v6.0.0 to deoptimize this function | ||
var immediate = new Immediate(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mscdex also suggested that, but making it
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @andrasq Could you add a comment about that then? Thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good idea; fixed |
||
immediate._callback = callback; | ||
immediate._argv = args; | ||
immediate._onImmediate = callback; | ||
|
||
if (!process._needImmediateCallback) { | ||
process._needImmediateCallback = true; | ||
process._immediateCallback = processImmediate; | ||
} | ||
|
||
if (process.domain) | ||
immediate.domain = process.domain; | ||
|
||
L.append(immediateQueue, immediate); | ||
|
||
return immediate; | ||
|
@@ -626,9 +639,5 @@ exports.clearImmediate = function(immediate) { | |
|
||
immediate._onImmediate = undefined; | ||
|
||
L.remove(immediate); | ||
|
||
if (L.isEmpty(immediateQueue)) { | ||
process._needImmediateCallback = false; | ||
} | ||
// leave queued, much faster overall to skip it later in processImmediate | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I don't think this is true, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the result is very reproducible, it's 3x faster to set/clear without the remove. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think it's ever possible to even have this show 1% in a v8 profile of an actual application, could you compile some benchmark results for us on these? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would expect actual applications make very few calls to clearImmediate The output of test from benchmark/timers/immediate.js
This test creates 2m cleared immediates and shows 254% speedup. |
||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -103,3 +103,8 @@ assert.equal(C, L.shift(list)); | |
// list | ||
assert.ok(L.isEmpty(list)); | ||
|
||
const list2 = L.create(); | ||
const list3 = L.create(); | ||
assert.ok(L.isEmpty(list2)); | ||
assert.ok(L.isEmpty(list3)); | ||
assert.ok(list2 != list3); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe also add: L.init(list);
assert.deepEqual(list2, list); (I think that should pass) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally, we'd block-scope the tests and the variables. For a later PR I suppose. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the assertion failed... which makes sense in hindsight, circular lists point to themselves, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would the common case of no trailing list element make it better to check these in the opposite order?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
linkedlist.js implements a circular list, the expected case is that both will be set or not set together.
This is just a fast-path optimization for newly created list nodes that have never been linked.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, right. Forgot about that.