Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[late-gc-lowering] null-out GC frame slots for dead objects #52935

Merged
merged 1 commit into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/llvm-gc-interface-passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -353,10 +353,11 @@ struct LateLowerGCFrame: private JuliaPassContext {
State LocalScan(Function &F);
void ComputeLiveness(State &S);
void ComputeLiveSets(State &S);
SmallVector<int, 0> ColorRoots(const State &S);
std::pair<SmallVector<int, 0>, int> ColorRoots(const State &S);
void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame, Instruction *InsertBefore);
void PlaceGCFrameStores(State &S, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame);
void PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>);
void PlaceGCFrameStores(State &S, unsigned MinColorRoot, ArrayRef<int> Colors, int PreAssignedColors, Value *GCFrame);
void PlaceGCFrameReset(State &S, unsigned R, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame, Instruction *InsertBefore);
void PlaceRootsAndUpdateCalls(ArrayRef<int> Colors, int PreAssignedColors, State &S, std::map<Value *, std::pair<int, int>>);
void CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified);
bool CleanupIR(Function &F, State *S, bool *CFGModified);
void NoteUseChain(State &S, BBState &BBS, User *TheUser);
Expand Down
36 changes: 29 additions & 7 deletions src/llvm-late-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1820,7 +1820,7 @@ JL_USED_FUNC static void dumpColorAssignments(const State &S, const ArrayRef<int
}
}

SmallVector<int, 0> LateLowerGCFrame::ColorRoots(const State &S) {
std::pair<SmallVector<int, 0>, int> LateLowerGCFrame::ColorRoots(const State &S) {
SmallVector<int, 0> Colors;
Colors.resize(S.MaxPtrNumber + 1, -1);
PEOIterator Ordering(S.Neighbors);
Expand Down Expand Up @@ -1862,7 +1862,7 @@ SmallVector<int, 0> LateLowerGCFrame::ColorRoots(const State &S) {
NewColor += PreAssignedColors;
Colors[ActiveElement] = NewColor;
}
return Colors;
return {Colors, PreAssignedColors};
}

// Size of T is assumed to be `sizeof(void*)`
Expand Down Expand Up @@ -2292,8 +2292,21 @@ void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColor
new StoreInst(Val, slotAddress, InsertBefore);
}

void LateLowerGCFrame::PlaceGCFrameReset(State &S, unsigned R, unsigned MinColorRoot,
ArrayRef<int> Colors, Value *GCFrame,
Instruction *InsertBefore) {
// Get the slot address.
auto slotAddress = CallInst::Create(
getOrDeclare(jl_intrinsics::getGCFrameSlot),
{GCFrame, ConstantInt::get(Type::getInt32Ty(InsertBefore->getContext()), Colors[R] + MinColorRoot)},
"gc_slot_addr_" + StringRef(std::to_string(Colors[R] + MinColorRoot)), InsertBefore);
// Reset the slot to NULL.
Value *Val = ConstantPointerNull::get(T_prjlvalue);
new StoreInst(Val, slotAddress, InsertBefore);
}

void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot,
ArrayRef<int> Colors, Value *GCFrame)
ArrayRef<int> Colors, int PreAssignedColors, Value *GCFrame)
{
for (auto &BB : *S.F) {
const BBState &BBS = S.BBStates[&BB];
Expand All @@ -2306,6 +2319,15 @@ void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot,
for(auto rit = BBS.Safepoints.rbegin();
rit != BBS.Safepoints.rend(); ++rit ) {
const LargeSparseBitVector &NowLive = S.LiveSets[*rit];
// reset slots which are no longer alive
for (int Idx : *LastLive) {
if (Idx >= PreAssignedColors && !HasBitSet(NowLive, Idx)) {
PlaceGCFrameReset(S, Idx, MinColorRoot, Colors, GCFrame,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be better to compute which slot colors transition? LLVM can merge the stores, but it's a bit of an expensive computation for it to do that.

Copy link
Member

@vtjnash vtjnash Nov 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That might be interesting. Especially since we we would know if skipped slots are already null (and thus can be written with a larger store), but maybe as a later followup. I think our bigger problem right now is how extensively double-rooted everything is due to lack of any stack coloring

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good to me.

S.ReverseSafepointNumbering[*rit]);
}
}
// store values which are alive in this safepoint but
// haven't been stored in the GC frame before
for (int Idx : NowLive) {
if (!HasBitSet(*LastLive, Idx)) {
PlaceGCFrameStore(S, Idx, MinColorRoot, Colors, GCFrame,
Expand All @@ -2317,7 +2339,7 @@ void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot,
}
}

void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, State &S,
void LateLowerGCFrame::PlaceRootsAndUpdateCalls(ArrayRef<int> Colors, int PreAssignedColors, State &S,
std::map<Value *, std::pair<int, int>>) {
auto F = S.F;
auto T_int32 = Type::getInt32Ty(F->getContext());
Expand Down Expand Up @@ -2439,7 +2461,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, St
pushGcframe->setArgOperand(1, NRoots);

// Insert GC frame stores
PlaceGCFrameStores(S, AllocaSlot - 2, Colors, gcframe);
PlaceGCFrameStores(S, AllocaSlot - 2, Colors, PreAssignedColors, gcframe);
// Insert GCFrame pops
for (auto &BB : *F) {
if (isa<ReturnInst>(BB.getTerminator())) {
Expand All @@ -2464,9 +2486,9 @@ bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) {

State S = LocalScan(F);
ComputeLiveness(S);
SmallVector<int, 0> Colors = ColorRoots(S);
auto Colors = ColorRoots(S);
std::map<Value *, std::pair<int, int>> CallFrames; // = OptimizeCallFrames(S, Ordering);
PlaceRootsAndUpdateCalls(Colors, S, CallFrames);
PlaceRootsAndUpdateCalls(Colors.first, Colors.second, S, CallFrames);
CleanupIR(F, &S, CFGModified);
return true;
}
Expand Down
22 changes: 22 additions & 0 deletions test/compiler/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1003,3 +1003,25 @@ end
@test f55768(Vector)
@test f55768(Vector{T} where T)
@test !f55768(Vector{S} where S)

# test that values get rooted correctly over throw
for a in ((@noinline Ref{Int}(2)),
(@noinline Ref{Int}(3)),
5,
(@noinline Ref{Int}(4)),
6)
@test a[] != 0
try
b = (@noinline Ref{Int}(5),
@noinline Ref{Int}(6),
@noinline Ref{Int}(7),
@noinline Ref{Int}(8),
@noinline Ref{Int}(9),
@noinline Ref{Int}(10),
@noinline Ref{Int}(11))
GC.gc(true)
GC.@preserve b throw(a)
catch ex
@test ex === a
end
end
9 changes: 7 additions & 2 deletions test/llvmpasses/returnstwicegc.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; This file is a part of Julia. License is MIT: https://julialang.org/license

; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s --check-prefixes=OPAQUE
; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame,FinalLowerGC)' -S %s | FileCheck %s


declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
Expand All @@ -13,7 +13,12 @@ declare void @one_arg_boxed({} addrspace(10)*)
define void @try_catch(i64 %a, i64 %b)
{
; Because of the returns_twice function, we need to keep aboxed live everywhere
; OPAQUE: %gcframe = alloca ptr addrspace(10), i32 4
; CHECK: %gcframe = alloca ptr addrspace(10), i32 4
; CHECK: store ptr addrspace(10) %aboxed, ptr [[slot_0:%.*]],
; CHECK-NOT: store {{.*}} ptr [[slot_0]]
; CHECK: store ptr addrspace(10) %bboxed, ptr {{%.*}}
; CHECK-NOT: store {{.*}} ptr [[slot_0]]

top:
%sigframe = alloca [208 x i8], align 16
%sigframe.sub = getelementptr inbounds [208 x i8], [208 x i8]* %sigframe, i64 0, i64 0
Expand Down