Skip to content

Commit

Permalink
Compiler: Optimize k*n and k+n when types are known (#1529)
Browse files Browse the repository at this point in the history
When type information is specified, we can compile k*n and k+n into
MULK/ADDK forms that are faster to execute, as long as we think n is a
number. Since we generally restrict type aware optimizations to O2, this
does that as well.

This makes trig benchmark ~4% faster on Apple M2 in VM, and also a tiny
improvement on scimark (~0.1%) can be observed. The optimization only
affects interpreted execution, as NCG already can synthesize optimal
code here.

If the type information is not truthful (e.g. user annotates type as a
number and it's not), the worst case scenario is flipped arguments to
metamethods like __add/__mul for constant left hand side.

Fixes #626 (the fix requires type information or NCG but I doubt any
further work on this is warranted)

---------

Co-authored-by: vegorov-rbx <[email protected]>
  • Loading branch information
zeux and vegorov-rbx authored Nov 19, 2024
1 parent c2e4ee0 commit b1b21f3
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 8 deletions.
19 changes: 19 additions & 0 deletions Compiler/src/Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ LUAU_FASTINTVARIABLE(LuauCompileInlineThresholdMaxBoost, 300)
LUAU_FASTINTVARIABLE(LuauCompileInlineDepth, 5)

LUAU_FASTFLAG(LuauNativeAttribute)
LUAU_FASTFLAGVARIABLE(LuauCompileOptimizeRevArith)

namespace Luau
{
Expand Down Expand Up @@ -1623,6 +1624,24 @@ struct Compiler
return;
}
}
else if (FFlag::LuauCompileOptimizeRevArith && options.optimizationLevel >= 2 && (expr->op == AstExprBinary::Add || expr->op == AstExprBinary::Mul))
{
// Optimization: replace k*r with r*k when r is known to be a number (otherwise metamethods may be called)
if (LuauBytecodeType* ty = exprTypes.find(expr); ty && *ty == LBC_TYPE_NUMBER)
{
int32_t lc = getConstantNumber(expr->left);

if (lc >= 0 && lc <= 255)
{
uint8_t rr = compileExprAuto(expr->right, rs);

bytecode.emitABC(getBinaryOpArith(expr->op, /* k= */ true), target, rr, uint8_t(lc));

hintTemporaryExprRegType(expr->right, rr, LBC_TYPE_NUMBER, /* instLength */ 1);
return;
}
}
}

uint8_t rl = compileExprAuto(expr->left, rs);
uint8_t rr = compileExprAuto(expr->right, rs);
Expand Down
47 changes: 39 additions & 8 deletions tests/Compiler.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,17 @@ LUAU_FASTINT(LuauCompileLoopUnrollThresholdMaxBoost)
LUAU_FASTINT(LuauRecursionLimit)
LUAU_FASTFLAG(LuauUserDefinedTypeFunctionsSyntax2)
LUAU_FASTFLAG(LuauCompileVectorTypeInfo)
LUAU_FASTFLAG(LuauCompileOptimizeRevArith)

using namespace Luau;

static std::string compileFunction(const char* source, uint32_t id, int optimizationLevel = 1, bool enableVectors = false)
static std::string compileFunction(const char* source, uint32_t id, int optimizationLevel = 1, int typeInfoLevel = 0, bool enableVectors = false)
{
Luau::BytecodeBuilder bcb;
bcb.setDumpFlags(Luau::BytecodeBuilder::Dump_Code);
Luau::CompileOptions options;
options.optimizationLevel = optimizationLevel;
options.typeInfoLevel = typeInfoLevel;
if (enableVectors)
{
options.vectorLib = "Vector3";
Expand Down Expand Up @@ -4931,32 +4933,32 @@ L0: RETURN R3 -1

TEST_CASE("VectorLiterals")
{
CHECK_EQ("\n" + compileFunction("return Vector3.new(1, 2, 3)", 0, 2, /*enableVectors*/ true), R"(
CHECK_EQ("\n" + compileFunction("return Vector3.new(1, 2, 3)", 0, 2, 0, /*enableVectors*/ true), R"(
LOADK R0 K0 [1, 2, 3]
RETURN R0 1
)");

CHECK_EQ("\n" + compileFunction("print(Vector3.new(1, 2, 3))", 0, 2, /*enableVectors*/ true), R"(
CHECK_EQ("\n" + compileFunction("print(Vector3.new(1, 2, 3))", 0, 2, 0, /*enableVectors*/ true), R"(
GETIMPORT R0 1 [print]
LOADK R1 K2 [1, 2, 3]
CALL R0 1 0
RETURN R0 0
)");

CHECK_EQ("\n" + compileFunction("print(Vector3.new(1, 2, 3, 4))", 0, 2, /*enableVectors*/ true), R"(
CHECK_EQ("\n" + compileFunction("print(Vector3.new(1, 2, 3, 4))", 0, 2, 0, /*enableVectors*/ true), R"(
GETIMPORT R0 1 [print]
LOADK R1 K2 [1, 2, 3, 4]
CALL R0 1 0
RETURN R0 0
)");

CHECK_EQ("\n" + compileFunction("return Vector3.new(0, 0, 0), Vector3.new(-0, 0, 0)", 0, 2, /*enableVectors*/ true), R"(
CHECK_EQ("\n" + compileFunction("return Vector3.new(0, 0, 0), Vector3.new(-0, 0, 0)", 0, 2, 0, /*enableVectors*/ true), R"(
LOADK R0 K0 [0, 0, 0]
LOADK R1 K1 [-0, 0, 0]
RETURN R0 2
)");

CHECK_EQ("\n" + compileFunction("return type(Vector3.new(0, 0, 0))", 0, 2, /*enableVectors*/ true), R"(
CHECK_EQ("\n" + compileFunction("return type(Vector3.new(0, 0, 0))", 0, 2, 0, /*enableVectors*/ true), R"(
LOADK R0 K0 ['vector']
RETURN R0 1
)");
Expand Down Expand Up @@ -8845,8 +8847,9 @@ RETURN R0 1

TEST_CASE("ArithRevK")
{
// - and / have special optimized form for reverse constants; in the future, + and * will likely get compiled to ADDK/MULK
// other operators are not important enough to optimize reverse constant forms for
ScopedFastFlag sff(FFlag::LuauCompileOptimizeRevArith, true);

// - and / have special optimized form for reverse constants; in absence of type information, we can't optimize other ops
CHECK_EQ(
"\n" + compileFunction0(R"(
local x: number = unknown
Expand All @@ -8867,6 +8870,34 @@ IDIV R6 R7 R0
LOADN R8 2
POW R7 R8 R0
RETURN R1 7
)"
);

// the same code with type information can optimize commutative operators (+ and *) as well
// other operators are not important enough to optimize reverse constant forms for
CHECK_EQ(
"\n" + compileFunction(
R"(
local x: number = unknown
return 2 + x, 2 - x, 2 * x, 2 / x, 2 % x, 2 // x, 2 ^ x
)",
0,
2,
1
),
R"(
GETIMPORT R0 1 [unknown]
ADDK R1 R0 K2 [2]
SUBRK R2 K2 [2] R0
MULK R3 R0 K2 [2]
DIVRK R4 K2 [2] R0
LOADN R6 2
MOD R5 R6 R0
LOADN R7 2
IDIV R6 R7 R0
LOADN R8 2
POW R7 R8 R0
RETURN R1 7
)"
);
}
Expand Down

0 comments on commit b1b21f3

Please sign in to comment.