From 2517da33e80e4b4de3b7bd3a77ee36cc94f88cf9 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Mon, 29 Jul 2024 18:43:07 -0400 Subject: [PATCH 1/8] AllocOpt: Fix stack lowering where object contains both julia and non-julia objects --- src/llvm-alloc-helpers.cpp | 11 ++++++++- src/llvm-alloc-helpers.h | 7 ++++++ src/llvm-alloc-opt.cpp | 11 +++++++++ test/llvmpasses/alloc-opt-bits.ll | 37 +++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 test/llvmpasses/alloc-opt-bits.ll diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp index 953ecc1830142..f4ca0bdf3e029 100644 --- a/src/llvm-alloc-helpers.cpp +++ b/src/llvm-alloc-helpers.cpp @@ -88,6 +88,9 @@ bool AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_t offset, memop.isaggr = isa(elty) || isa(elty) || isa(elty); memop.isobjref = hasObjref(elty); auto &field = getField(offset, size, elty); + + field.second.hasbits |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); + if (field.second.hasobjref != memop.isobjref) field.second.multiloc = true; // can't split this field, since it contains a mix of references and bits if (!isstore) @@ -198,6 +201,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r auto elty = inst->getType(); required.use_info.has_unknown_objref |= hasObjref(elty); required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa(elty); + required.use_info.has_unknown_bits |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); required.use_info.hasunknownmem = true; } else if (!required.use_info.addMemOp(inst, 0, cur.offset, inst->getType(), @@ -289,6 +293,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r auto elty = storev->getType(); required.use_info.has_unknown_objref |= hasObjref(elty); required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa(elty); + required.use_info.has_unknown_bits |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); required.use_info.hasunknownmem = true; } else if (!required.use_info.addMemOp(inst, use->getOperandNo(), cur.offset, storev->getType(), @@ -310,10 +315,14 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r } required.use_info.hasload = true; auto storev = isa(inst) ? cast(inst)->getNewValOperand() : cast(inst)->getValOperand(); + Type *elty = storev->getType(); if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(), - cur.offset, storev->getType(), + cur.offset, elty, true, required.DL)) { LLVM_DEBUG(dbgs() << "Atomic inst has unknown offset\n"); + required.use_info.has_unknown_objref |= hasObjref(elty); + required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa(elty); + required.use_info.has_unknown_bits |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); required.use_info.hasunknownmem = true; } required.use_info.refload = true; diff --git a/src/llvm-alloc-helpers.h b/src/llvm-alloc-helpers.h index 49c3b15332a56..c567d8e218790 100644 --- a/src/llvm-alloc-helpers.h +++ b/src/llvm-alloc-helpers.h @@ -46,6 +46,8 @@ namespace jl_alloc { bool hasaggr:1; bool multiloc:1; bool hasload:1; + // The alloc has a non-julia object at this offset. + bool hasbits:1; llvm::Type *elty; llvm::SmallVector accesses; Field(uint32_t size, llvm::Type *elty) @@ -54,6 +56,7 @@ namespace jl_alloc { hasaggr(false), multiloc(false), hasload(false), + hasbits(false), elty(elty) { } @@ -95,6 +98,9 @@ namespace jl_alloc { // The alloc has an aggregate Julia object reference not in an explicit field. bool has_unknown_objrefaggr:1; + // The alloc has a non-julia object at an unknown offset. + bool has_unknown_bits:1; + void reset() { escaped = false; @@ -110,6 +116,7 @@ namespace jl_alloc { allockind = llvm::AllocFnKind::Unknown; has_unknown_objref = false; has_unknown_objrefaggr = false; + has_unknown_bits = false; uses.clear(); preserves.clear(); memops.clear(); diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index e0cde7206b6b9..0f2b5663b0ba9 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -252,10 +252,12 @@ void Optimizer::optimizeAll() removeAlloc(orig); continue; } + bool has_bits = use_info.has_unknown_bits; bool has_ref = use_info.has_unknown_objref; bool has_refaggr = use_info.has_unknown_objrefaggr; for (auto memop: use_info.memops) { auto &field = memop.second; + has_bits |= field.hasbits; if (field.hasobjref) { has_ref = true; // This can be relaxed a little based on hasload @@ -284,6 +286,15 @@ void Optimizer::optimizeAll() splitOnStack(orig); continue; } + if (has_bits && has_ref) { + REMARK([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig) + << "GC allocation could not be split and contained both julia object and non-julia-object data, unable to move to stack " << ore::NV("GC Allocation", orig); + }); + if (use_info.hastypeof) + optimizeTag(orig); + continue; + } REMARK([&](){ return OptimizationRemark(DEBUG_TYPE, "Stack Move Allocation", orig) << "GC allocation moved to stack " << ore::NV("GC Allocation", orig); diff --git a/test/llvmpasses/alloc-opt-bits.ll b/test/llvmpasses/alloc-opt-bits.ll new file mode 100644 index 0000000000000..68d17fec2df0a --- /dev/null +++ b/test/llvmpasses/alloc-opt-bits.ll @@ -0,0 +1,37 @@ +; This file is a part of Julia. License is MIT: https://julialang.org/license + +; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE + + +@tag = external addrspace(10) global {} + +@glob = external addrspace(10) global {} + +; Test that the gc_preserve intrinsics are deleted directly. + +; CHECK-LABEL: @ptr_and_bits +; CHECK-NOT: alloca +; CHECK: call noalias ptr addrspace(10) @julia.gc_alloc_obj + +define void @ptr_and_bits(ptr %fptr, i1 %b, i1 %b2, i32 %idx) { + %pgcstack = call ptr @julia.get_pgcstack() + %ptls = call ptr @julia.ptls_states() + %ptls_i8 = bitcast ptr %ptls to ptr + %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 16, ptr addrspace(10) @tag) + + %g0 = getelementptr { i64, ptr addrspace(10) }, ptr addrspace(10) %v, i32 %idx, i32 1 + store ptr addrspace(10) @glob, ptr addrspace(10) %g0 + + %g1 = getelementptr { i64, ptr addrspace(10) }, ptr addrspace(10) %v, i32 %idx, i32 0 + store i64 7, ptr addrspace(10) %g1 + + %res = load ptr addrspace(10), ptr addrspace(10) %g0 + %res2 = load i64, ptr addrspace(10) %g1 + ret void +} + +declare noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10)) + +declare ptr @julia.ptls_states() + +declare ptr @julia.get_pgcstack() From 398f31859628291b0ecd134e6dbc08b783ec5a82 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Wed, 31 Jul 2024 17:02:52 -0400 Subject: [PATCH 2/8] fix test --- test/llvmpasses/alloc-opt-bits.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/llvmpasses/alloc-opt-bits.ll b/test/llvmpasses/alloc-opt-bits.ll index 68d17fec2df0a..e19093f46f815 100644 --- a/test/llvmpasses/alloc-opt-bits.ll +++ b/test/llvmpasses/alloc-opt-bits.ll @@ -1,6 +1,6 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s @tag = external addrspace(10) global {} From f012c57ef79ace2074784e63bd30481730ad2562 Mon Sep 17 00:00:00 2001 From: Billy Moses Date: Thu, 1 Aug 2024 13:36:48 -0400 Subject: [PATCH 3/8] rewrite --- src/llvm-alloc-helpers.cpp | 8 ++++---- src/llvm-alloc-helpers.h | 8 ++++---- src/llvm-alloc-opt.cpp | 10 +++++++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp index f4ca0bdf3e029..f215281821313 100644 --- a/src/llvm-alloc-helpers.cpp +++ b/src/llvm-alloc-helpers.cpp @@ -89,7 +89,7 @@ bool AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_t offset, memop.isobjref = hasObjref(elty); auto &field = getField(offset, size, elty); - field.second.hasbits |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); + field.second.hasunboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); if (field.second.hasobjref != memop.isobjref) field.second.multiloc = true; // can't split this field, since it contains a mix of references and bits @@ -201,7 +201,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r auto elty = inst->getType(); required.use_info.has_unknown_objref |= hasObjref(elty); required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa(elty); - required.use_info.has_unknown_bits |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); + required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); required.use_info.hasunknownmem = true; } else if (!required.use_info.addMemOp(inst, 0, cur.offset, inst->getType(), @@ -293,7 +293,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r auto elty = storev->getType(); required.use_info.has_unknown_objref |= hasObjref(elty); required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa(elty); - required.use_info.has_unknown_bits |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); + required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); required.use_info.hasunknownmem = true; } else if (!required.use_info.addMemOp(inst, use->getOperandNo(), cur.offset, storev->getType(), @@ -322,7 +322,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r LLVM_DEBUG(dbgs() << "Atomic inst has unknown offset\n"); required.use_info.has_unknown_objref |= hasObjref(elty); required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa(elty); - required.use_info.has_unknown_bits |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); + required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); required.use_info.hasunknownmem = true; } required.use_info.refload = true; diff --git a/src/llvm-alloc-helpers.h b/src/llvm-alloc-helpers.h index c567d8e218790..b235bcca8bafa 100644 --- a/src/llvm-alloc-helpers.h +++ b/src/llvm-alloc-helpers.h @@ -47,7 +47,7 @@ namespace jl_alloc { bool multiloc:1; bool hasload:1; // The alloc has a non-julia object at this offset. - bool hasbits:1; + bool hasunboxed:1; llvm::Type *elty; llvm::SmallVector accesses; Field(uint32_t size, llvm::Type *elty) @@ -56,7 +56,7 @@ namespace jl_alloc { hasaggr(false), multiloc(false), hasload(false), - hasbits(false), + hasunboxed(false), elty(elty) { } @@ -99,7 +99,7 @@ namespace jl_alloc { bool has_unknown_objrefaggr:1; // The alloc has a non-julia object at an unknown offset. - bool has_unknown_bits:1; + bool has_unknown_unboxed:1; void reset() { @@ -116,7 +116,7 @@ namespace jl_alloc { allockind = llvm::AllocFnKind::Unknown; has_unknown_objref = false; has_unknown_objrefaggr = false; - has_unknown_bits = false; + has_unknown_unboxed = false; uses.clear(); preserves.clear(); memops.clear(); diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 0f2b5663b0ba9..91ac12710bd92 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -252,12 +252,12 @@ void Optimizer::optimizeAll() removeAlloc(orig); continue; } - bool has_bits = use_info.has_unknown_bits; + bool has_unboxed = use_info.has_unknown_bits; bool has_ref = use_info.has_unknown_objref; bool has_refaggr = use_info.has_unknown_objrefaggr; for (auto memop: use_info.memops) { auto &field = memop.second; - has_bits |= field.hasbits; + has_unboxed |= field.hasbits; if (field.hasobjref) { has_ref = true; // This can be relaxed a little based on hasload @@ -286,7 +286,11 @@ void Optimizer::optimizeAll() splitOnStack(orig); continue; } - if (has_bits && has_ref) { + // The move to stack code below, if has_ref is set, changes the allocation to an array of jlvalue_t's. This is fine + // if all objects are jlvalue_t's. However, if part of the allocation is not a julia object (e.g. it is a { float, jlvaluet }), + // then the moveToStackCAll will create a [2 x jlvaluet] bitcast to { float, jlvaluet }. The float clearly is not a GC + // value and will result in segfaults and other problems. + if (has_unboxed && has_ref) { REMARK([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig) << "GC allocation could not be split and contained both julia object and non-julia-object data, unable to move to stack " << ore::NV("GC Allocation", orig); From 4c07af95fc6103a0341752b02c22fa5e87dd2717 Mon Sep 17 00:00:00 2001 From: William Moses Date: Fri, 2 Aug 2024 10:40:19 -0400 Subject: [PATCH 4/8] Update llvm-alloc-helpers.h --- src/llvm-alloc-helpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llvm-alloc-helpers.h b/src/llvm-alloc-helpers.h index b235bcca8bafa..6c1550ffff5e7 100644 --- a/src/llvm-alloc-helpers.h +++ b/src/llvm-alloc-helpers.h @@ -98,7 +98,7 @@ namespace jl_alloc { // The alloc has an aggregate Julia object reference not in an explicit field. bool has_unknown_objrefaggr:1; - // The alloc has a non-julia object at an unknown offset. + // The alloc has an unboxed object at an unknown offset. bool has_unknown_unboxed:1; void reset() From 1eb8b91976ea0b572fb15544da50bb5e210c02d6 Mon Sep 17 00:00:00 2001 From: William Moses Date: Fri, 2 Aug 2024 10:40:52 -0400 Subject: [PATCH 5/8] Update llvm-alloc-helpers.h --- src/llvm-alloc-helpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llvm-alloc-helpers.h b/src/llvm-alloc-helpers.h index 6c1550ffff5e7..20e9132d10b4c 100644 --- a/src/llvm-alloc-helpers.h +++ b/src/llvm-alloc-helpers.h @@ -46,7 +46,7 @@ namespace jl_alloc { bool hasaggr:1; bool multiloc:1; bool hasload:1; - // The alloc has a non-julia object at this offset. + // The alloc has a unboxed object at this offset. bool hasunboxed:1; llvm::Type *elty; llvm::SmallVector accesses; From 45567197e2bd17f8fd5b5221a3ab930a90dd0c1f Mon Sep 17 00:00:00 2001 From: William Moses Date: Fri, 2 Aug 2024 17:18:48 -0400 Subject: [PATCH 6/8] Apply suggestions from code review Co-authored-by: Valentin Churavy --- src/llvm-alloc-opt.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 91ac12710bd92..258fb6cf3e4bd 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -287,13 +287,13 @@ void Optimizer::optimizeAll() continue; } // The move to stack code below, if has_ref is set, changes the allocation to an array of jlvalue_t's. This is fine - // if all objects are jlvalue_t's. However, if part of the allocation is not a julia object (e.g. it is a { float, jlvaluet }), - // then the moveToStackCAll will create a [2 x jlvaluet] bitcast to { float, jlvaluet }. The float clearly is not a GC - // value and will result in segfaults and other problems. + // if all objects are jlvalue_t's. However, if part of the allocation is an unboxed value (e.g. it is a { float, jlvaluet }), + // then moveToStack will create a [2 x jlvaluet] bitcast to { float, jlvaluet }. + // This later causes the GC rooting pass, to miss-characterize the float as a pointer to a GC value if (has_unboxed && has_ref) { REMARK([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig) - << "GC allocation could not be split and contained both julia object and non-julia-object data, unable to move to stack " << ore::NV("GC Allocation", orig); + << "GC allocation could not be split since it contains both boxed and unboxed values, unable to move to stack " << ore::NV("GC Allocation", orig); }); if (use_info.hastypeof) optimizeTag(orig); From 3ed59fdc92bb76f6ad5fcacb4b42fabfaccfeb9d Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sun, 4 Aug 2024 14:37:31 +0200 Subject: [PATCH 7/8] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mosè Giordano --- src/llvm-alloc-helpers.cpp | 1 - src/llvm-alloc-opt.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp index f215281821313..9d2fba832839c 100644 --- a/src/llvm-alloc-helpers.cpp +++ b/src/llvm-alloc-helpers.cpp @@ -88,7 +88,6 @@ bool AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_t offset, memop.isaggr = isa(elty) || isa(elty) || isa(elty); memop.isobjref = hasObjref(elty); auto &field = getField(offset, size, elty); - field.second.hasunboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa(elty)); if (field.second.hasobjref != memop.isobjref) diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 258fb6cf3e4bd..22c4ebaf93d58 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -288,7 +288,7 @@ void Optimizer::optimizeAll() } // The move to stack code below, if has_ref is set, changes the allocation to an array of jlvalue_t's. This is fine // if all objects are jlvalue_t's. However, if part of the allocation is an unboxed value (e.g. it is a { float, jlvaluet }), - // then moveToStack will create a [2 x jlvaluet] bitcast to { float, jlvaluet }. + // then moveToStack will create a [2 x jlvaluet] bitcast to { float, jlvaluet }. // This later causes the GC rooting pass, to miss-characterize the float as a pointer to a GC value if (has_unboxed && has_ref) { REMARK([&]() { From 80b1a2707e69ddd25948bf1dcb7a505ddca0bab6 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Sun, 4 Aug 2024 13:25:43 -0400 Subject: [PATCH 8/8] fixup --- src/llvm-alloc-opt.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 22c4ebaf93d58..5984ad55d221c 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -252,12 +252,12 @@ void Optimizer::optimizeAll() removeAlloc(orig); continue; } - bool has_unboxed = use_info.has_unknown_bits; + bool has_unboxed = use_info.has_unknown_unboxed; bool has_ref = use_info.has_unknown_objref; bool has_refaggr = use_info.has_unknown_objrefaggr; for (auto memop: use_info.memops) { auto &field = memop.second; - has_unboxed |= field.hasbits; + has_unboxed |= field.hasunboxed; if (field.hasobjref) { has_ref = true; // This can be relaxed a little based on hasload