Skip to content

Commit

Permalink
[X86][SchedModels] Fix missing ReadAdvance for MULX and ADCX/ADOX (PR…
Browse files Browse the repository at this point in the history
…51494)

Before this patch, instructions MULX32rm and MULX64rm were missing a ReadAdvance
for the implicit read of register EDX/RDX.  This patch fixes the issue, and it
also introduces a new SchedWrite for the two variants of MULX. The general idea
behind this last change is to eventually decrease the number of InstRW in the
scheduling models.

This patch also adds a ReadAdvance for the implicit read of EFLAGS in ADCX/ADOX.

Differential Revision: https://reviews.llvm.org/D108372
  • Loading branch information
adibiagio committed Aug 20, 2021
1 parent 5cf5df8 commit 35d4292
Show file tree
Hide file tree
Showing 20 changed files with 137 additions and 144 deletions.
19 changes: 14 additions & 5 deletions llvm/lib/Target/X86/X86InstrArithmetic.td
Original file line number Diff line number Diff line change
Expand Up @@ -1502,8 +1502,12 @@ let hasSideEffects = 0 in {
let mayLoad = 1 in
def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),

[]>, T8XD, VEX_4V, Sched<[sched.Folded, WriteIMulH]>;
[]>, T8XD, VEX_4V,
Sched<[sched.Folded, WriteIMulH,
// Memory operand.
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// Implicit read of EDX/RDX
sched.ReadAfterFold]>;

// Pseudo instructions to be used when the low result isn't used. The
// instruction is defined to keep the high if both destinations are the same.
Expand All @@ -1518,9 +1522,9 @@ let hasSideEffects = 0 in {

let Predicates = [HasBMI2] in {
let Uses = [EDX] in
defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul32>;
defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
let Uses = [RDX] in
defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteIMul64>, VEX_W;
defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, VEX_W;
}

//===----------------------------------------------------------------------===//
Expand All @@ -1547,7 +1551,12 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
"adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
} // SchedRW

let mayLoad = 1, SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold] in {
let mayLoad = 1,
SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold,
// Memory operand.
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// Implicit read of EFLAGS
WriteADC.ReadAfterFold] in {
def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86SchedBroadwell.td
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,11 @@ defm : X86WriteRes<WriteIMul16Imm, [BWPort1,BWPort0156], 4, [1,1], 2>;
defm : X86WriteRes<WriteIMul16ImmLd, [BWPort1,BWPort0156,BWPort23], 8, [1,1,1], 3>;
defm : BWWriteResPair<WriteIMul16Reg, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul32, [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>;
defm : BWWriteResPair<WriteMULX32, [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>;
defm : BWWriteResPair<WriteIMul32Imm, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul32Reg, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul64, [BWPort1,BWPort5], 4, [1,1], 2>;
defm : BWWriteResPair<WriteMULX64, [BWPort1,BWPort5], 4, [1,1], 2>;
defm : BWWriteResPair<WriteIMul64Imm, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul64Reg, [BWPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86SchedHaswell.td
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,11 @@ defm : X86WriteRes<WriteIMul16Imm, [HWPort1,HWPort0156], 4, [1,1], 2>;
defm : X86WriteRes<WriteIMul16ImmLd, [HWPort1,HWPort0156,HWPort23], 8, [1,1,1], 3>;
defm : HWWriteResPair<WriteIMul16Reg, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul32, [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>;
defm : HWWriteResPair<WriteMULX32, [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>;
defm : HWWriteResPair<WriteIMul32Imm, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul32Reg, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul64, [HWPort1,HWPort6], 4, [1,1], 2>;
defm : HWWriteResPair<WriteMULX64, [HWPort1,HWPort6], 4, [1,1], 2>;
defm : HWWriteResPair<WriteIMul64Imm, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul64Reg, [HWPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86SchedSandyBridge.td
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,11 @@ defm : X86WriteRes<WriteIMul16Imm, [SBPort1,SBPort015], 4, [1,1], 2>;
defm : X86WriteRes<WriteIMul16ImmLd, [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>;
defm : SBWriteResPair<WriteIMul16Reg, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
defm : SBWriteResPair<WriteMULX32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
defm : SBWriteResPair<WriteIMul32Imm, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul32Reg, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul64, [SBPort1,SBPort0], 4, [1,1], 2>;
defm : SBWriteResPair<WriteMULX64, [SBPort1,SBPort0], 4, [1,1], 2>;
defm : SBWriteResPair<WriteIMul64Imm, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul64Reg, [SBPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86SchedSkylakeClient.td
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,11 @@ defm : X86WriteRes<WriteIMul16Imm, [SKLPort1,SKLPort0156], 4, [1,1], 2>;
defm : X86WriteRes<WriteIMul16ImmLd, [SKLPort1,SKLPort0156,SKLPort23], 8, [1,1,1], 3>;
defm : SKLWriteResPair<WriteIMul16Reg, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul32, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>;
defm : SKLWriteResPair<WriteMULX32, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>;
defm : SKLWriteResPair<WriteIMul32Imm, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul32Reg, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul64, [SKLPort1,SKLPort5], 4, [1,1], 2>;
defm : SKLWriteResPair<WriteMULX64, [SKLPort1,SKLPort5], 4, [1,1], 2>;
defm : SKLWriteResPair<WriteIMul64Imm, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul64Reg, [SKLPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86SchedSkylakeServer.td
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,11 @@ defm : X86WriteRes<WriteIMul16ImmLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1
defm : X86WriteRes<WriteIMul16Reg, [SKXPort1], 3, [1], 1>;
defm : X86WriteRes<WriteIMul16RegLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
defm : SKXWriteResPair<WriteIMul32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
defm : SKXWriteResPair<WriteMULX32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>;
defm : SKXWriteResPair<WriteMULX64, [SKXPort1,SKXPort5], 4, [1,1], 2>;
defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/X86/X86Schedule.td
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,9 @@ defm WriteIMul32Reg : X86SchedWritePair; // Integer 32-bit multiplication by reg
defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by immediate.
defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register.
def WriteIMulH : SchedWrite; // Integer multiplication, high part.
defm WriteMULX32 : X86SchedWritePair; // Integer 32-bit Multiplication without affecting flags.
defm WriteMULX64 : X86SchedWritePair; // Integer 64-bit Multiplication without affecting flags.
def WriteIMulH : SchedWrite; // Integer multiplication, high part (only used by MULX).

def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86ScheduleAtom.td
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12]
defm : AtomWriteResPair<WriteIMul64Imm, [AtomPort01], [AtomPort01], 14, 14, [14], [14]>;
defm : AtomWriteResPair<WriteIMul64Reg, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
defm : X86WriteResUnsupported<WriteIMulH>;
defm : X86WriteResPairUnsupported<WriteMULX32>;
defm : X86WriteResPairUnsupported<WriteMULX64>;

defm : X86WriteRes<WriteXCHG, [AtomPort01], 2, [2], 1>;
defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>;
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/X86/X86ScheduleBdVer2.td
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,11 @@ defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4, [1, 2]>;
defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 6]>;
defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>;
defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>;
defm : X86WriteResUnsupported<WriteIMulH>; // BMI2 MULX

// BMI2 MULX
defm : X86WriteResUnsupported<WriteIMulH>;
defm : X86WriteResPairUnsupported<WriteMULX32>;
defm : X86WriteResPairUnsupported<WriteMULX64>;

defm : PdWriteResExPair<WriteDiv8, [PdEX1, PdDiv], 12, [1, 12]>;
defm : PdWriteResExPair<WriteDiv16, [PdEX1, PdDiv], 15, [1, 15], 2>;
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/X86/X86ScheduleBtVer2.td
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,9 @@ defm : JWriteResIntPair<WriteIMul32Reg, [JALU1, JMul], 3, [1, 1], 1>;
defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>;
defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 1>;
defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 1>;
defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
defm : X86WriteResUnsupported<WriteIMulH>;
defm : X86WriteResPairUnsupported<WriteMULX32>;
defm : X86WriteResPairUnsupported<WriteMULX64>;

defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/X86/X86ScheduleSLM.td
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ defm : SLMWriteResPair<WriteIMul32Reg, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteIMul64Imm, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteIMul64Reg, [SLM_IEC_RSV1], 3>;
def : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>;
defm : X86WriteResUnsupported<WriteIMulH>;
defm : X86WriteResPairUnsupported<WriteMULX32>;
defm : X86WriteResPairUnsupported<WriteMULX64>;

defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
Expand Down
35 changes: 7 additions & 28 deletions llvm/lib/Target/X86/X86ScheduleZnver1.td
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,9 @@ defm : ZnWriteResPair<WriteIDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
defm : ZnWriteResPair<WriteIDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;

// IMULH
def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
let Latency = 4;
def : WriteRes<WriteIMulH, [ZnMultiplier]>{
let Latency = 3;
let NumMicroOps = 0;
}

// Floating point operations
Expand Down Expand Up @@ -659,32 +660,10 @@ def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
}
def : SchedAlias<WriteIMul64Ld, ZnWriteMul64Ld>;

// MULX.
// r32,r32,r32.
def ZnWriteMulX32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
let Latency = 3;
let ResourceCycles = [1, 2];
}
def : InstRW<[ZnWriteMulX32], (instrs MULX32rr)>;

// r32,r32,m32.
def ZnWriteMulX32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
let Latency = 8;
let ResourceCycles = [1, 2, 2];
}
def : InstRW<[ZnWriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;

// r64,r64,r64.
def ZnWriteMulX64 : SchedWriteRes<[ZnALU1]> {
let Latency = 3;
}
def : InstRW<[ZnWriteMulX64], (instrs MULX64rr)>;

// r64,r64,m64.
def ZnWriteMulX64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
let Latency = 8;
}
def : InstRW<[ZnWriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
// MULX
// Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
defm : ZnWriteResPair<WriteMULX32, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
defm : ZnWriteResPair<WriteMULX64, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;

//-- Control transfer instructions --//

Expand Down
33 changes: 6 additions & 27 deletions llvm/lib/Target/X86/X86ScheduleZnver2.td
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,9 @@ defm : Zn2WriteResPair<WriteIDiv32, [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
defm : Zn2WriteResPair<WriteIDiv64, [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;

// IMULH
def : WriteRes<WriteIMulH, [Zn2ALU1, Zn2Multiplier]>{
let Latency = 4;
def : WriteRes<WriteIMulH, [Zn2Multiplier]>{
let Latency = 3;
let NumMicroOps = 0;
}

// Floating point operations
Expand Down Expand Up @@ -658,31 +659,9 @@ def : SchedAlias<WriteIMul64ImmLd, Zn2WriteMul64Ld>;
def : SchedAlias<WriteIMul64RegLd, Zn2WriteMul64Ld>;

// MULX.
// r32,r32,r32.
def Zn2WriteMulX32 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
let Latency = 3;
let ResourceCycles = [1, 2];
}
def : InstRW<[Zn2WriteMulX32], (instrs MULX32rr)>;

// r32,r32,m32.
def Zn2WriteMulX32Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
let Latency = 7;
let ResourceCycles = [1, 2, 2];
}
def : InstRW<[Zn2WriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;

// r64,r64,r64.
def Zn2WriteMulX64 : SchedWriteRes<[Zn2ALU1]> {
let Latency = 3;
}
def : InstRW<[Zn2WriteMulX64], (instrs MULX64rr)>;

// r64,r64,m64.
def Zn2WriteMulX64Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
let Latency = 7;
}
def : InstRW<[Zn2WriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
// Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
defm : Zn2WriteResPair<WriteMULX32, [Zn2ALU1, Zn2Multiplier], 3, [1, 1], 1, 4, 0>;
defm : Zn2WriteResPair<WriteMULX64, [Zn2ALU1, Zn2Multiplier], 3, [1, 1], 1, 4, 0>;

//-- Control transfer instructions --//

Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/Target/X86/X86ScheduleZnver3.td
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,7 @@ defm : Zn3WriteResIntPair<WriteIMul16, [Zn3Multiplier], 3, [3], 3, /*LoadUOps=*/
defm : Zn3WriteResIntPair<WriteIMul16Imm, [Zn3Multiplier], 4, [4], 2>; // Integer 16-bit multiplication by immediate.
defm : Zn3WriteResIntPair<WriteIMul16Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 16-bit multiplication by register.
defm : Zn3WriteResIntPair<WriteIMul32, [Zn3Multiplier], 3, [3], 2>; // Integer 32-bit multiplication.
defm : Zn3WriteResIntPair<WriteMULX32, [Zn3Multiplier], 4, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.

def Zn3MULX32rr : SchedWriteRes<[Zn3Multiplier]> {
let Latency = 4;
Expand All @@ -630,11 +631,14 @@ def Zn3MULX32rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
let ResourceCycles = [1, 1, 2];
let NumMicroOps = Zn3MULX32rr.NumMicroOps;
}
def : InstRW<[Zn3MULX32rm, WriteIMulH], (instrs MULX32rm)>;
def : InstRW<[Zn3MULX32rm, WriteIMulH,
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadAfterLd], (instrs MULX32rm)>;

defm : Zn3WriteResIntPair<WriteIMul32Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by immediate.
defm : Zn3WriteResIntPair<WriteIMul32Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by register.
defm : Zn3WriteResIntPair<WriteIMul64, [Zn3Multiplier], 3, [3], 2>; // Integer 64-bit multiplication.
defm : Zn3WriteResIntPair<WriteMULX64, [Zn3Multiplier], 4, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.

def Zn3MULX64rr : SchedWriteRes<[Zn3Multiplier]> {
let Latency = 4;
Expand All @@ -648,7 +652,9 @@ def Zn3MULX64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
let ResourceCycles = [1, 1, 2];
let NumMicroOps = Zn3MULX64rr.NumMicroOps;
}
def : InstRW<[Zn3MULX64rm, WriteIMulH], (instrs MULX64rm)>;
def : InstRW<[Zn3MULX64rm, WriteIMulH,
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadAfterLd], (instrs MULX64rm)>;

defm : Zn3WriteResIntPair<WriteIMul64Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate.
defm : Zn3WriteResIntPair<WriteIMul64Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register.
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/tools/llvm-mca/X86/Haswell/adcx-adox-read-advance.s
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ adox (%rdi), %rcx

# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 17
# CHECK-NEXT: Total Cycles: 12
# CHECK-NEXT: Total uOps: 6

# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.35
# CHECK-NEXT: IPC: 0.12
# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.17
# CHECK-NEXT: Block RThroughput: 0.8

# CHECK: Instruction Info:
Expand Down Expand Up @@ -55,11 +55,11 @@ adox (%rdi), %rcx
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 0.50 0.50 - adcxq (%rdi), %rcx

# CHECK: Timeline view:
# CHECK-NEXT: 0123456
# CHECK-NEXT: 01
# CHECK-NEXT: Index 0123456789

# CHECK: [0,0] DeeeeeeeER. .. adcxq (%rdi), %rcx
# CHECK-NEXT: [1,0] .D======eeeeeeeER adcxq (%rdi), %rcx
# CHECK: [0,0] DeeeeeeeER.. adcxq (%rdi), %rcx
# CHECK-NEXT: [1,0] .D=eeeeeeeER adcxq (%rdi), %rcx

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
Expand All @@ -68,18 +68,18 @@ adox (%rdi), %rcx
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 4.0 0.5 0.0 adcxq (%rdi), %rcx
# CHECK-NEXT: 0. 2 1.5 0.5 0.0 adcxq (%rdi), %rcx

# CHECK: [1] Code Region

# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 17
# CHECK-NEXT: Total Cycles: 12
# CHECK-NEXT: Total uOps: 6

# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.35
# CHECK-NEXT: IPC: 0.12
# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.17
# CHECK-NEXT: Block RThroughput: 0.8

# CHECK: Instruction Info:
Expand Down Expand Up @@ -114,11 +114,11 @@ adox (%rdi), %rcx
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 0.50 0.50 - adoxq (%rdi), %rcx

# CHECK: Timeline view:
# CHECK-NEXT: 0123456
# CHECK-NEXT: 01
# CHECK-NEXT: Index 0123456789

# CHECK: [0,0] DeeeeeeeER. .. adoxq (%rdi), %rcx
# CHECK-NEXT: [1,0] .D======eeeeeeeER adoxq (%rdi), %rcx
# CHECK: [0,0] DeeeeeeeER.. adoxq (%rdi), %rcx
# CHECK-NEXT: [1,0] .D=eeeeeeeER adoxq (%rdi), %rcx

# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
Expand All @@ -127,4 +127,4 @@ adox (%rdi), %rcx
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage

# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 4.0 0.5 0.0 adoxq (%rdi), %rcx
# CHECK-NEXT: 0. 2 1.5 0.5 0.0 adoxq (%rdi), %rcx
Loading

0 comments on commit 35d4292

Please sign in to comment.