Skip to content

Commit

Permalink
wazevo: implements linear reg alloc algorithm (#1829)
Browse files Browse the repository at this point in the history
Signed-off-by: Takeshi Yoneda <[email protected]>
  • Loading branch information
mathetake authored Nov 16, 2023
1 parent 73d6c3b commit 374d6ff
Show file tree
Hide file tree
Showing 34 changed files with 2,209 additions and 3,295 deletions.
1,055 changes: 568 additions & 487 deletions internal/engine/wazevo/backend/backend_test.go

Large diffs are not rendered by default.

20 changes: 6 additions & 14 deletions internal/engine/wazevo/backend/compiler.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ func newCompiler(ctx context.Context, mach Machine, builder ssa.Builder) *compil
// Compiler is the backend of wazevo which takes ssa.Builder and Machine,
// use the information there to emit the final machine code.
type Compiler interface {
// SSABuilder returns the ssa.Builder used by this compiler.
SSABuilder() ssa.Builder

// Compile executes the following steps:
// 1. Lower()
// 2. RegAlloc()
Expand Down Expand Up @@ -66,9 +69,6 @@ type Compiler interface {
// Init initializes the internal state of the compiler for the next compilation.
Init()

// ResolveSignature returns the ssa.Signature of the given ssa.SignatureID.
ResolveSignature(id ssa.SignatureID) *ssa.Signature

// AllocateVReg allocates a new virtual register of the given type.
AllocateVReg(typ ssa.Type) regalloc.VReg

Expand Down Expand Up @@ -102,9 +102,6 @@ type Compiler interface {

// Emit4Bytes appends 4 bytes to the buffer. Used during the code emission.
Emit4Bytes(b uint32)

// LoopNestingForestRoots returns the roots of the loop nesting forest.
LoopNestingForestRoots() []ssa.BasicBlock
}

// RelocationInfo represents the relocation information for a call instruction.
Expand Down Expand Up @@ -357,9 +354,9 @@ func (c *compiler) MatchInstrOneOf(def *SSAValueDefinition, opcodes []ssa.Opcode
return ssa.OpcodeInvalid
}

// ResolveSignature implements Compiler.ResolveSignature.
func (c *compiler) ResolveSignature(id ssa.SignatureID) *ssa.Signature {
return c.ssaBuilder.ResolveSignature(id)
// SSABuilder implements Compiler .SSABuilder.
func (c *compiler) SSABuilder() ssa.Builder {
return c.ssaBuilder
}

// AddSourceOffsetInfo implements Compiler.AddSourceOffsetInfo.
Expand Down Expand Up @@ -392,8 +389,3 @@ func (c *compiler) Emit4Bytes(b uint32) {
func (c *compiler) Buf() []byte {
return c.buf
}

// LoopNestingForestRoots implements Compiler.LoopNestingForestRoots.
func (c *compiler) LoopNestingForestRoots() []ssa.BasicBlock {
return c.ssaBuilder.LoopNestingForestRoots()
}
2 changes: 1 addition & 1 deletion internal/engine/wazevo/backend/isa/arm64/abi.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ func (m *machine) lowerCall(si *ssa.Instruction) {
} else {
indirectCalleePtr, sigID, args = si.CallIndirectData()
}
calleeABI := m.getOrCreateABIImpl(m.compiler.ResolveSignature(sigID))
calleeABI := m.getOrCreateABIImpl(m.compiler.SSABuilder().ResolveSignature(sigID))

stackSlotSize := calleeABI.alignedArgResultStackSlotSize()
if m.maxRequiredStackSizeForCalls < stackSlotSize+16 {
Expand Down
129 changes: 76 additions & 53 deletions internal/engine/wazevo/backend/isa/arm64/instr.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,27 @@ type (
instructionKind int
)

// IsCall implements regalloc.Instr IsCall.
func (i *instruction) IsCall() bool {
return i.kind == call
}

// IsIndirectCall implements regalloc.Instr IsIndirectCall.
func (i *instruction) IsIndirectCall() bool {
return i.kind == callInd
}

// IsReturn implements regalloc.Instr IsReturn.
func (i *instruction) IsReturn() bool {
return i.kind == ret
}

type defKind byte

const (
defKindNone defKind = iota + 1
defKindRD
defKindCall
defKindVecRRR
)

var defKinds = [numInstructionKinds]defKind{
Expand Down Expand Up @@ -112,7 +126,8 @@ var defKinds = [numInstructionKinds]defKind{
vecTbl: defKindRD,
vecTbl2: defKindRD,
vecPermute: defKindRD,
vecRRR: defKindVecRRR,
vecRRR: defKindRD,
vecRRRRewrite: defKindNone,
fpuToInt: defKindRD,
intToFpu: defKindRD,
cCmpImm: defKindNone,
Expand All @@ -121,34 +136,30 @@ var defKinds = [numInstructionKinds]defKind{
emitSourceOffsetInfo: defKindNone,
}

// defs returns the list of regalloc.VReg that are defined by the instruction.
// Defs returns the list of regalloc.VReg that are defined by the instruction.
// In order to reduce the number of allocations, the caller can pass the slice to be used.
func (i *instruction) defs(regs []regalloc.VReg) []regalloc.VReg {
func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg {
*regs = (*regs)[:0]
switch defKinds[i.kind] {
case defKindNone:
case defKindRD:
regs = append(regs, i.rd.nr())
*regs = append(*regs, i.rd.nr())
case defKindCall:
regs = append(regs, i.abi.retRealRegs...)
case defKindVecRRR:
if vecOp(i.u1) != vecOpBsl {
regs = append(regs, i.rd.nr())
}
*regs = append(*regs, i.abi.retRealRegs...)
default:
panic(fmt.Sprintf("defKind for %v not defined", i))
}
return regs
return *regs
}

func (i *instruction) assignDef(reg regalloc.VReg) {
// AssignDef implements regalloc.Instr AssignDef.
func (i *instruction) AssignDef(reg regalloc.VReg) {
switch defKinds[i.kind] {
case defKindNone:
case defKindRD:
i.rd = i.rd.assignReg(reg)
case defKindCall:
panic("BUG: call instructions shouldn't be assigned")
case defKindVecRRR:
i.rd = i.rd.assignReg(reg)
default:
panic(fmt.Sprintf("defKind for %v not defined", i))
}
Expand All @@ -168,7 +179,7 @@ const (
useKindAMode
useKindRNAMode
useKindCond
useKindVecRRR
useKindVecRRRRewrite
)

var useKinds = [numInstructionKinds]useKind{
Expand Down Expand Up @@ -237,7 +248,8 @@ var useKinds = [numInstructionKinds]useKind{
vecShiftImm: useKindRN,
vecTbl: useKindRNRM,
vecTbl2: useKindRNRN1RM,
vecRRR: useKindVecRRR,
vecRRR: useKindRNRM,
vecRRRRewrite: useKindVecRRRRewrite,
vecPermute: useKindRNRM,
fpuToInt: useKindRN,
intToFpu: useKindRN,
Expand All @@ -247,84 +259,79 @@ var useKinds = [numInstructionKinds]useKind{
emitSourceOffsetInfo: useKindNone,
}

// uses returns the list of regalloc.VReg that are used by the instruction.
// Uses returns the list of regalloc.VReg that are used by the instruction.
// In order to reduce the number of allocations, the caller can pass the slice to be used.
func (i *instruction) uses(regs []regalloc.VReg) []regalloc.VReg {
func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
*regs = (*regs)[:0]
switch useKinds[i.kind] {
case useKindNone:
case useKindRN:
if rn := i.rn.reg(); rn.Valid() {
regs = append(regs, rn)
*regs = append(*regs, rn)
}
case useKindRNRM:
if rn := i.rn.reg(); rn.Valid() {
regs = append(regs, rn)
*regs = append(*regs, rn)
}
if rm := i.rm.reg(); rm.Valid() {
regs = append(regs, rm)
*regs = append(*regs, rm)
}
case useKindRNRMRA:
if rn := i.rn.reg(); rn.Valid() {
regs = append(regs, rn)
*regs = append(*regs, rn)
}
if rm := i.rm.reg(); rm.Valid() {
regs = append(regs, rm)
*regs = append(*regs, rm)
}
if ra := i.ra.reg(); ra.Valid() {
regs = append(regs, ra)
*regs = append(*regs, ra)
}
case useKindRNRN1RM:
if rn := i.rn.reg(); rn.Valid() && rn.IsRealReg() {
rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType())
regs = append(regs, rn, rn1)
*regs = append(*regs, rn, rn1)
}
if rm := i.rm.reg(); rm.Valid() {
regs = append(regs, rm)
*regs = append(*regs, rm)
}
case useKindRet:
regs = append(regs, i.abi.retRealRegs...)
*regs = append(*regs, i.abi.retRealRegs...)
case useKindAMode:
if amodeRN := i.amode.rn; amodeRN.Valid() {
regs = append(regs, amodeRN)
*regs = append(*regs, amodeRN)
}
if amodeRM := i.amode.rm; amodeRM.Valid() {
regs = append(regs, amodeRM)
*regs = append(*regs, amodeRM)
}
case useKindRNAMode:
regs = append(regs, i.rn.reg())
*regs = append(*regs, i.rn.reg())
if amodeRN := i.amode.rn; amodeRN.Valid() {
regs = append(regs, amodeRN)
*regs = append(*regs, amodeRN)
}
if amodeRM := i.amode.rm; amodeRM.Valid() {
regs = append(regs, amodeRM)
*regs = append(*regs, amodeRM)
}
case useKindCond:
cnd := cond(i.u1)
if cnd.kind() != condKindCondFlagSet {
regs = append(regs, cnd.register())
*regs = append(*regs, cnd.register())
}
case useKindCall:
regs = append(regs, i.abi.argRealRegs...)
*regs = append(*regs, i.abi.argRealRegs...)
case useKindCallInd:
regs = append(regs, i.rn.nr())
regs = append(regs, i.abi.argRealRegs...)
case useKindVecRRR:
if rn := i.rn.reg(); rn.Valid() {
regs = append(regs, rn)
}
if rm := i.rm.reg(); rm.Valid() {
regs = append(regs, rm)
}
if vecOp(i.u1) == vecOpBsl {
regs = append(regs, i.rd.reg())
}
*regs = append(*regs, i.rn.nr())
*regs = append(*regs, i.abi.argRealRegs...)
case useKindVecRRRRewrite:
*regs = append(*regs, i.rn.reg())
*regs = append(*regs, i.rm.reg())
*regs = append(*regs, i.rd.reg())
default:
panic(fmt.Sprintf("useKind for %v not defined", i))
}
return regs
return *regs
}

func (i *instruction) assignUse(index int, reg regalloc.VReg) {
func (i *instruction) AssignUse(index int, reg regalloc.VReg) {
switch useKinds[i.kind] {
case useKindNone:
case useKindRN:
Expand All @@ -341,7 +348,7 @@ func (i *instruction) assignUse(index int, reg regalloc.VReg) {
i.rm = i.rm.assignReg(reg)
}
}
case useKindVecRRR:
case useKindVecRRRRewrite:
if index == 0 {
if rn := i.rn.reg(); rn.Valid() {
i.rn = i.rn.assignReg(reg)
Expand Down Expand Up @@ -834,19 +841,21 @@ func (i *instruction) asMove32(rd, rn regalloc.VReg) {
i.rn, i.rd = operandNR(rn), operandNR(rd)
}

func (i *instruction) asMove64(rd, rn regalloc.VReg) {
func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction {
i.kind = mov64
i.rn, i.rd = operandNR(rn), operandNR(rd)
return i
}

func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) {
i.kind = fpuMov64
i.rn, i.rd = operandNR(rn), operandNR(rd)
}

func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) {
func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction {
i.kind = fpuMov128
i.rn, i.rd = operandNR(rn), operandNR(rd)
return i
}

func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) {
Expand Down Expand Up @@ -948,7 +957,16 @@ func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement)
i.u2 = uint64(arr)
}

func (i *instruction) isCopy() bool {
// asVecRRRRewrite encodes a vector instruction that rewrites the destination register.
// IMPORTANT: the destination register must be already defined before this instruction.
func (i *instruction) asVecRRRRewrite(op vecOp, rd, rn, rm operand, arr vecArrangement) {
i.kind = vecRRRRewrite
i.u1 = uint64(op)
i.rn, i.rd, i.rm = rn, rd, rm
i.u2 = uint64(arr)
}

func (i *instruction) IsCopy() bool {
op := i.kind
// We do not include mov32 as it is not a copy instruction in the sense that it does not preserve the upper 32 bits,
// and it is only used in the translation of IReduce, not the actual copy indeed.
Expand Down Expand Up @@ -1269,7 +1287,7 @@ func (i *instruction) String() (str string) {
)
case vecMiscNarrow:
panic("TODO")
case vecRRR:
case vecRRR, vecRRRRewrite:
str = fmt.Sprintf("%s %s, %s, %s",
vecOp(i.u1),
formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
Expand Down Expand Up @@ -1556,6 +1574,11 @@ const (
vecMiscNarrow
// vecRRR represents a vector ALU operation.
vecRRR
// vecRRRRewrite is exactly the same as vecRRR except that this rewrites the destination register.
// For example, BSL instruction rewrites the destination register, and the existing value influences the result.
// Therefore, the "destination" register in vecRRRRewrite will be treated as "use" which makes the register outlive
// the instruction while this instruction doesn't have "def" in the context of register allocation.
vecRRRRewrite
// vecMisc represents a vector two register miscellaneous instruction.
vecMisc
// vecLanes represents a vector instruction across lanes.
Expand Down
5 changes: 5 additions & 0 deletions internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,11 @@ func (i *instruction) encode(c backend.Compiler) {
i.u3 == 1,
))
case vecRRR:
if op := vecOp(i.u1); op == vecOpBsl || op == vecOpBit || op == vecOpUmlal {
panic(fmt.Sprintf("vecOp %s must use vecRRRRewrite instead of vecRRR", op.String()))
}
fallthrough
case vecRRRRewrite:
c.Emit4Bytes(encodeVecRRR(
vecOp(i.u1),
regNumberInEncoding[i.rd.realReg()],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,10 @@ func TestInstruction_encode(t *testing.T) {
i.asVecPermute(vecOpZip1, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement2D)
}},
{want: "411ca32e", setup: func(i *instruction) {
i.asVecRRR(vecOpBit, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B)
i.asVecRRRRewrite(vecOpBit, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B)
}},
{want: "411ca36e", setup: func(i *instruction) {
i.asVecRRR(vecOpBit, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B)
i.asVecRRRRewrite(vecOpBit, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B)
}},
{want: "411c236e", setup: func(i *instruction) {
i.asVecRRR(vecOpEOR, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B)
Expand Down Expand Up @@ -1495,10 +1495,10 @@ func TestInstruction_encode(t *testing.T) {
i.asVecRRR(vecOpBic, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B)
}},
{want: "411c632e", setup: func(i *instruction) {
i.asVecRRR(vecOpBsl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B)
i.asVecRRRRewrite(vecOpBsl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement8B)
}},
{want: "411c636e", setup: func(i *instruction) {
i.asVecRRR(vecOpBsl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B)
i.asVecRRRRewrite(vecOpBsl, operandNR(v1VReg), operandNR(v2VReg), operandNR(v3VReg), vecArrangement16B)
}},
{want: "4158202e", setup: func(i *instruction) {
i.asVecMisc(vecOpNot, operandNR(v1VReg), operandNR(v2VReg), vecArrangement8B)
Expand Down
8 changes: 4 additions & 4 deletions internal/engine/wazevo/backend/isa/arm64/instr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ func TestInstruction_String(t *testing.T) {
}

func TestInstruction_isCopy(t *testing.T) {
require.False(t, (&instruction{kind: mov32}).isCopy())
require.True(t, (&instruction{kind: mov64}).isCopy())
require.True(t, (&instruction{kind: fpuMov64}).isCopy())
require.True(t, (&instruction{kind: fpuMov128}).isCopy())
require.False(t, (&instruction{kind: mov32}).IsCopy())
require.True(t, (&instruction{kind: mov64}).IsCopy())
require.True(t, (&instruction{kind: fpuMov64}).IsCopy())
require.True(t, (&instruction{kind: fpuMov128}).IsCopy())
}
Loading

0 comments on commit 374d6ff

Please sign in to comment.