Skip to content

Commit

Permalink
[LCSSA] Handle case with single new PHI faster.
Browse files Browse the repository at this point in the history
If there is only a single available value, all uses must be dominated by
the single value and there is no need to search for a reaching
definition.

This drastically speeds up LCSSA in some cases. For the test case
from PR37202, it speeds up LCSSA construction by 4 times.

Time-passes without this patch for test case from PR37202:

    Total Execution Time: 29.9285 seconds (29.9276 wall clock)

    ---User Time---   --System Time--   --User+System--   ---Wall Time---  --- Name ---
    5.2786 ( 17.7%)   0.0021 (  1.2%)   5.2806 ( 17.6%)   5.2808 ( 17.6%)  Unswitch loops
    4.3739 ( 14.7%)   0.0303 ( 18.1%)   4.4042 ( 14.7%)   4.4042 ( 14.7%)  Loop-Closed SSA Form Pass
    4.2658 ( 14.3%)   0.0192 ( 11.5%)   4.2850 ( 14.3%)   4.2851 ( 14.3%)  Loop-Closed SSA Form Pass #2
    2.2307 (  7.5%)   0.0013 (  0.8%)   2.2320 (  7.5%)   2.2318 (  7.5%)  Loop Invariant Code Motion
    2.0888 (  7.0%)   0.0012 (  0.7%)   2.0900 (  7.0%)   2.0897 (  7.0%)  Unroll loops
    1.6761 (  5.6%)   0.0013 (  0.8%)   1.6774 (  5.6%)   1.6774 (  5.6%)  Value Propagation
    1.3686 (  4.6%)   0.0029 (  1.8%)   1.3716 (  4.6%)   1.3714 (  4.6%)  Induction Variable Simplification
    1.1457 (  3.8%)   0.0010 (  0.6%)   1.1468 (  3.8%)   1.1468 (  3.8%)  Loop-Closed SSA Form Pass #4
    1.1384 (  3.8%)   0.0005 (  0.3%)   1.1389 (  3.8%)   1.1389 (  3.8%)  Loop-Closed SSA Form Pass #6
    1.1360 (  3.8%)   0.0027 (  1.6%)   1.1387 (  3.8%)   1.1387 (  3.8%)  Loop-Closed SSA Form Pass #5
    1.1331 (  3.8%)   0.0010 (  0.6%)   1.1341 (  3.8%)   1.1340 (  3.8%)  Loop-Closed SSA Form Pass #3

Time passes with this patch

  Total Execution Time: 19.2802 seconds (19.2813 wall clock)

   ---User Time---   --System Time--   --User+System--   ---Wall Time---  --- Name ---
   4.4234 ( 23.2%)   0.0038 (  2.0%)   4.4272 ( 23.0%)   4.4273 ( 23.0%)  Unswitch loops
   2.3828 ( 12.5%)   0.0020 (  1.1%)   2.3848 ( 12.4%)   2.3847 ( 12.4%)  Unroll loops
   1.8714 (  9.8%)   0.0020 (  1.1%)   1.8734 (  9.7%)   1.8735 (  9.7%)  Loop Invariant Code Motion
   1.7973 (  9.4%)   0.0022 (  1.2%)   1.7995 (  9.3%)   1.8003 (  9.3%)  Value Propagation
   1.4010 (  7.3%)   0.0033 (  1.8%)   1.4043 (  7.3%)   1.4044 (  7.3%)  Induction Variable Simplification
   0.9978 (  5.2%)   0.0244 ( 13.1%)   1.0222 (  5.3%)   1.0224 (  5.3%)  Loop-Closed SSA Form Pass #2
   0.9611 (  5.0%)   0.0257 ( 13.8%)   0.9868 (  5.1%)   0.9868 (  5.1%)  Loop-Closed SSA Form Pass
   0.5856 (  3.1%)   0.0015 (  0.8%)   0.5871 (  3.0%)   0.5869 (  3.0%)  Unroll loops #2
   0.4132 (  2.2%)   0.0012 (  0.7%)   0.4145 (  2.1%)   0.4143 (  2.1%)  Loop Invariant Code Motion #3

Reviewers: efriedma, davide, mzolotukhin

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D57033

llvm-svn: 352960
  • Loading branch information
fhahn committed Feb 2, 2019
1 parent ffe93a1 commit dd2ef0a
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 35 deletions.
20 changes: 17 additions & 3 deletions llvm/lib/Transforms/Utils/LCSSA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,17 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
continue;
}

// If we added a single PHI, it must dominate all uses and we can directly
// rename it.
if (AddedPHIs.size() == 1) {
// Tell the VHs that the uses changed. This updates SCEV's caches.
// We might call ValueIsRAUWd multiple times for the same value.
if (UseToRewrite->get()->hasValueHandle())
ValueHandleBase::ValueIsRAUWd(*UseToRewrite, AddedPHIs[0]);
UseToRewrite->set(AddedPHIs[0]);
continue;
}

// Otherwise, do full PHI insertion.
SSAUpdate.RewriteUse(*UseToRewrite);
}
Expand All @@ -210,9 +221,12 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
BasicBlock *UserBB = DVI->getParent();
if (InstBB == UserBB || L->contains(UserBB))
continue;
// We currently only handle debug values residing in blocks where we have
// inserted a PHI instruction.
if (Value *V = SSAUpdate.FindValueForBlock(UserBB))
// We currently only handle debug values residing in blocks that were
// traversed while rewriting the uses. If we inserted just a single PHI,
// we will handle all relevant debug values.
Value *V = AddedPHIs.size() == 1 ? AddedPHIs[0]
: SSAUpdate.FindValueForBlock(UserBB);
if (V)
DVI->setOperand(0, MetadataAsValue::get(Ctx, ValueAsMetadata::get(V)));
}

Expand Down
132 changes: 100 additions & 32 deletions llvm/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll
Original file line number Diff line number Diff line change
@@ -1,54 +1,118 @@
; RUN: opt -S -lcssa < %s | FileCheck %s

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Reproducer for PR39019.
;
; Verify that the llvm.dbg.value in the %for.cond.cleanup2 block is rewritten
; to use the PHI node for %add that is created by LCSSA.
; Verify that the llvm.dbg.values are updated to use the PHI nodes inserted by
; LCSSA.

; CHECK-LABEL: for.cond.cleanup2:
; CHECK-NEXT: [[PN:%[^ ]*]] = phi i32 [ %add.lcssa, %for.cond.cleanup1 ]
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[PN]], metadata [[VAR:![0-9]+]], metadata !DIExpression())
; CHECK-NEXT: call void @bar(i32 [[PN]])
; For the test case @single_exit, we can rewrite all llvm.dbg.value calls
; to use the inserted PHI.

; CHECK-LABEL: for.body:
; CHECK-LABEL: @single_exit(

; CHECK-LABEL: inner.body:
; CHECK: %add = add nsw i32 0, 2
; CHECK: call void @llvm.dbg.value(metadata i32 %add, metadata [[VAR]], metadata !DIExpression())
; CHECK: call void @llvm.dbg.value(metadata i32 %add, metadata [[VAR:![0-9]+]], metadata !DIExpression())

; CHECK: [[VAR]] = !DILocalVariable(name: "sum",

; Function Attrs: nounwind
define void @foo() #0 !dbg !6 {
; CHECK-LABEL: outer.exit:
; CHECK-NEXT: [[PN:%[^ ]*]] = phi i32 [ %add.lcssa, %outer.latch ]
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[PN]], metadata [[VAR]], metadata !DIExpression())
; CHECK-NEXT: call void @bar(i32 [[PN]])

; CHECK-LABEL: exit:
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[PN]], metadata [[VAR]], metadata !DIExpression())

define void @single_exit() !dbg !6 {
entry:
br label %for.cond.preheader, !dbg !12
br label %outer.header, !dbg !12

for.cond.preheader: ; preds = %for.cond.cleanup1, %entry
br label %for.body, !dbg !12
outer.header: ; preds = %outer.latch, %entry
br label %inner.body, !dbg !12

for.cond.cleanup2: ; preds = %for.cond.cleanup1
inner.body: ; preds = %inner.body, %outer.header
%add = add nsw i32 0, 2, !dbg !12
call void @llvm.dbg.value(metadata i32 %add, metadata !9, metadata !DIExpression()), !dbg !12
tail call void @bar(i32 %add) #0, !dbg !12
ret void, !dbg !12
br i1 false, label %inner.body, label %inner.exit, !dbg !12

for.cond.cleanup1: ; preds = %for.body
br i1 false, label %for.cond.preheader, label %for.cond.cleanup2, !dbg !12
inner.exit: ; preds = %inner.body
br label %outer.latch

for.body: ; preds = %for.body, %for.cond.preheader
%add = add nsw i32 0, 2, !dbg !12
outer.latch: ; preds = %inner.exit
br i1 false, label %outer.header, label %outer.exit, !dbg !12

outer.exit: ; preds = %outer.latch
call void @llvm.dbg.value(metadata i32 %add, metadata !9, metadata !DIExpression()), !dbg !12
tail call void @bar(i32 %add), !dbg !12
br label %exit

exit: ; preds = %outer.exit
call void @llvm.dbg.value(metadata i32 %add, metadata !9, metadata !DIExpression()), !dbg !12
br i1 false, label %for.body, label %for.cond.cleanup1, !dbg !12
ret void, !dbg !12
}

; Function Attrs: nounwind
declare void @bar(i32) #0
; For the test case @multi_exit, we cannot update the llvm.dbg.value call in exit,
; because LCSSA did not insert a PHI node in %exit, as there is no non-debug
; use.

; CHECK-LABEL: @multi_exit()

; CHECK-LABEL: for.header:
; CHECK-NEXT: %add = add nsw i32 0, 2
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %add, metadata [[VAR2:![0-9]+]], metadata !DIExpression())

; CHECK-LABEL: for.exit1:
; CHECK-NEXT: [[PN1:%[^ ]*]] = phi i32 [ %add, %for.header ]
; CHECK-NEXT: br label %for.exit1.succ

; CHECK-LABEL: for.exit1.succ:
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[PN1]], metadata [[VAR2]], metadata !DIExpression())
; CHECK-NEXT: call void @bar(i32 [[PN1]])

; CHECK-LABEL: for.exit2:
; CHECK-NEXT: [[PN2:%[^ ]*]] = phi i32 [ %add, %for.latch ]
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[PN2]], metadata [[VAR2]], metadata !DIExpression())
; CHECK-NEXT: call void @bar(i32 [[PN2]])

; CHECK-LABEL: exit:
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %add, metadata [[VAR2]], metadata !DIExpression())

define void @multi_exit() !dbg !13 {
entry:
br label %for.header, !dbg !14

for.header: ; preds = %for.latch, %entry
%add = add nsw i32 0, 2, !dbg !14
call void @llvm.dbg.value(metadata i32 %add, metadata !16, metadata !DIExpression()), !dbg !14
br i1 false, label %for.latch, label %for.exit1, !dbg !14

for.latch: ; preds = %for.header
br i1 false, label %for.header, label %for.exit2, !dbg !14

for.exit1: ; preds = %for.header
br label %for.exit1.succ

for.exit1.succ: ; preds = %for.exit1
call void @llvm.dbg.value(metadata i32 %add, metadata !16, metadata !DIExpression()), !dbg !14
tail call void @bar(i32 %add), !dbg !14
br label %exit

for.exit2: ; preds = %for.latch
call void @llvm.dbg.value(metadata i32 %add, metadata !16, metadata !DIExpression()), !dbg !14
tail call void @bar(i32 %add), !dbg !14
br label %exit

exit: ; preds = %for.exit2, %for.exit1.succ
call void @llvm.dbg.value(metadata i32 %add, metadata !16, metadata !DIExpression()), !dbg !14
ret void, !dbg !14
}

; CHECK: [[VAR]] = !DILocalVariable(name: "sum",
; CHECK: [[VAR2]] = !DILocalVariable(name: "sum2",

; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
declare void @bar(i32)

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone speculatable }
declare void @llvm.dbg.value(metadata, metadata, metadata)

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
Expand All @@ -60,10 +124,14 @@ attributes #1 = { nounwind readnone speculatable }
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{!"clang version 8.0.0"}
!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 10, type: !7, isLocal: false, isDefinition: true, scopeLine: 10, isOptimized: true, unit: !0, retainedNodes: !8)
!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 10, type: !7, scopeLine: 10, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8)
!7 = !DISubroutineType(types: !2)
!8 = !{!9}
!9 = !DILocalVariable(name: "sum", scope: !10, file: !1, line: 11, type: !11)
!10 = !DILexicalBlockFile(scope: !6, file: !1, discriminator: 0)
!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!12 = !DILocation(line: 0, scope: !10)
!13 = distinct !DISubprogram(name: "multi_exit", scope: !1, file: !1, line: 10, type: !7, scopeLine: 10, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8)
!14 = !DILocation(line: 0, scope: !15)
!15 = !DILexicalBlockFile(scope: !13, file: !1, discriminator: 0)
!16 = !DILocalVariable(name: "sum2", scope: !15, file: !1, line: 11, type: !11)

0 comments on commit dd2ef0a

Please sign in to comment.