Skip to content

Commit

Permalink
optimizer: cancel finalizer registration rather than inserting `final…
Browse files Browse the repository at this point in the history
…ize`

This seems to perform well.

```julia
mutable struct AtomicCounter
    @atomic count::Int
end
const counter = AtomicCounter(0)

function withfinalizer(x)
    xs = finalizer(Ref(x)) do obj
        Base.@assume_effects :nothrow :notaskstate
        @atomic counter.count += obj[]
    end
    println(devnull, xs[])
    xs[] = xs[]
    println(devnull, xs[])
    return xs[]
end

@benchmark withfinalizer(100)
```

> master
```
BenchmarkTools.Trial: 10000 samples with 867 evaluations.
 Range (min … max):  135.717 ns …  18.617 μs  ┊ GC (min … max):  0.00% … 47.51%
 Time  (median):     144.992 ns               ┊ GC (median):     0.00%
 Time  (mean ± σ):   192.949 ns ± 687.513 ns  ┊ GC (mean ± σ):  11.57% ±  3.23%

     ▇  ▃█▅▂
  ▁▁▂█▇▂████▇▆▅▄▄▄▅▄▄▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▂
  136 ns           Histogram: frequency by time          201 ns <

 Memory estimate: 208 bytes, allocs estimate: 7.
```

> this commit
```
BenchmarkTools.Trial: 10000 samples with 964 evaluations.
 Range (min … max):   83.506 ns …   3.408 μs  ┊ GC (min … max):  0.00% … 96.85%
 Time  (median):      88.780 ns               ┊ GC (median):     0.00%
 Time  (mean ± σ):   101.797 ns ± 137.067 ns  ┊ GC (mean ± σ):  10.26% ±  7.31%

    ▄█▃▅
  ▄▃████▅▅▆▅▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▂▂▁▁▁▁▁▁▂▁▂▂▁▂▂ ▃
  83.5 ns          Histogram: frequency by time          145 ns <

 Memory estimate: 208 bytes, allocs estimate: 7.
```
  • Loading branch information
aviatesk committed Oct 10, 2024
1 parent fecbc3e commit bf08f1e
Show file tree
Hide file tree
Showing 13 changed files with 138 additions and 37 deletions.
2 changes: 1 addition & 1 deletion base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ export
nfields, throw, tuple, ===, isdefined, eval,
# access to globals
getglobal, setglobal!, swapglobal!, modifyglobal!, replaceglobal!, setglobalonce!,
# ifelse, sizeof, finalize # not exported, to avoid conflicting with Base
# ifelse, sizeof # not exported, to avoid conflicting with Base
# type reflection
<:, typeof, isa, typeassert,
# method reflection
Expand Down
4 changes: 2 additions & 2 deletions base/compiler/abstractinterpretation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2279,10 +2279,10 @@ function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any},
finalizer_argvec = Any[argtypes[2], argtypes[3]]
call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, #=max_methods=#1)::Future
return Future{CallMeta}(call, interp, sv) do call, interp, sv
return CallMeta(Nothing, Any, Effects(), FinalizerInfo(call.info, call.effects))
return CallMeta(Int, Any, Effects(), FinalizerInfo(call.info, call.effects))
end
end
return Future(CallMeta(Nothing, Any, Effects(), NoCallInfo()))
return Future(CallMeta(Int, Any, Effects(), NoCallInfo()))
end

function abstract_throw(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)
Expand Down
38 changes: 27 additions & 11 deletions base/compiler/ssair/passes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1626,11 +1626,9 @@ function try_resolve_finalizer!(ir::IRCode, alloc_idx::Int, finalizer_idx::Int,
insert_bb != 0 || return nothing # verify post-dominator of all uses exists

# Figure out the exact statement where we're going to inline the finalizer.
loc = insert_idx === nothing ? first(ir.cfg.blocks[insert_bb].stmts) : insert_idx::Int
attach_after = insert_idx !== nothing
flag = info isa FinalizerInfo ? flags_for_effects(info.effects) : IR_FLAG_NULL
finalizer_stmt = ir[SSAValue(finalizer_idx)][:stmt]

current_task_ssa = nothing
if !OptimizationParams(inlining.interp).assume_fatal_throw
# Collect all reachable blocks between the finalizer registration and the
# insertion point
Expand All @@ -1655,15 +1653,25 @@ function try_resolve_finalizer!(ir::IRCode, alloc_idx::Int, finalizer_idx::Int,

# An exception may be thrown between the finalizer registration and the point
# where the object’s lifetime ends (`insert_idx`): In such cases, we can’t
# remove the finalizer registration, but we can still insert a `Core.finalize`
# call at `insert_idx` while leaving the registration intact.
newinst = add_flag(NewInstruction(Expr(:call, GlobalRef(Core, :finalize), finalizer_stmt.args[3]), Nothing), flag)
insert_node!(ir, loc, newinst, attach_after)
return nothing
# remove the finalizer registration, but we can still inline the finalizer
# with inserting `Core._cancel_finalizer` at the end.
# Here, prepare a reference to the current task object that should be passed to
# `Core._cancel_finalizer` and insert it into `Core.finalizer` so that the
# finalizer is added to the ptls of the current task.
current_task_stmt = Expr(:foreigncall, QuoteNode(:jl_get_current_task),
Core.Ref{Core.Task}, Core.svec(), 0, QuoteNode(:ccall))
newinst = NewInstruction(current_task_stmt, Core.Task)
current_task_ssa = insert_node!(ir, finalizer_idx, newinst)
push!(finalizer_stmt.args, current_task_ssa)
break
end
end

argexprs = Any[finalizer_stmt.args[2], finalizer_stmt.args[3]]
loc = insert_idx === nothing ? first(ir.cfg.blocks[insert_bb].stmts) : insert_idx::Int
attach_after = insert_idx !== nothing
flag = info isa FinalizerInfo ? flags_for_effects(info.effects) : IR_FLAG_NULL
alloc_obj = finalizer_stmt.args[3]
argexprs = Any[finalizer_stmt.args[2], alloc_obj]
if length(finalizer_stmt.args) >= 4
inline = finalizer_stmt.args[4]
if inline === nothing
Expand All @@ -1681,8 +1689,16 @@ function try_resolve_finalizer!(ir::IRCode, alloc_idx::Int, finalizer_idx::Int,
newinst = add_flag(NewInstruction(Expr(:call, argexprs...), Nothing), flag)
insert_node!(ir, loc, newinst, attach_after)
end
# Erase the call to `finalizer`
ir[SSAValue(finalizer_idx)][:stmt] = nothing
cancel_registration = current_task_ssa !== nothing
if cancel_registration
lookup_idx_ssa = SSAValue(finalizer_idx)
finalize_call = Expr(:call, GlobalRef(Core, :_cancel_finalizer), alloc_obj, current_task_ssa, lookup_idx_ssa)
newinst = add_flag(NewInstruction(finalize_call, Nothing), flag)
insert_node!(ir, loc, newinst, attach_after)
else
# Erase the call to `finalizer`
ir[SSAValue(finalizer_idx)][:stmt] = nothing
end
return nothing
end

Expand Down
17 changes: 11 additions & 6 deletions base/compiler/tfuncs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -742,8 +742,13 @@ add_tfunc(donotdelete, 0, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->No
end
end
add_tfunc(compilerbarrier, 2, 2, compilerbarrier_tfunc, 5)
add_tfunc(Core.finalizer, 2, 4, @nospecs((𝕃::AbstractLattice, args...)->Nothing), 5)
add_tfunc(Core.finalize, 1, 1, @nospecs((𝕃::AbstractLattice, o)->Nothing), 100)
add_tfunc(Core.finalizer, 2, 5, @nospecs((𝕃::AbstractLattice, args...)->Int), 5)
@nospecs function _cancel_finalizer_tfunc(𝕃::AbstractLattice, o, ct, lookup_idx)
hasintersect(widenconst(ct), Task) || return Bottom
hasintersect(widenconst(lookup_idx), Int) || return Bottom
return Nothing
end
add_tfunc(Core._cancel_finalizer, 3, 3, _cancel_finalizer_tfunc, 5)

@nospecs function compilerbarrier_nothrow(setting, val)
return isa(setting, Const) && contains_is((:type, :const, :conditional), setting.val)
Expand Down Expand Up @@ -2288,12 +2293,12 @@ function _builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f::Builtin), argt
elseif f === donotdelete
return true
elseif f === Core.finalizer
2 <= na <= 4 || return false
2 <= na <= 5 || return false
# `Core.finalizer` does no error checking - that's done in Base.finalizer
return true
elseif f === Core.finalize
na == 2 || return false
return true # `Core.finalize` does no error checking
elseif f === Core._cancel_finalizer
na == 3 || return false
return argtypes[2] Task && argtypes[3] Int
elseif f === Core.compilerbarrier
na == 2 || return false
return compilerbarrier_nothrow(argtypes[1], nothing)
Expand Down
2 changes: 1 addition & 1 deletion base/gcutils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ end
Immediately run finalizers registered for object `x`.
"""
finalize(@nospecialize(o)) = Core.finalize(o)
finalize(@nospecialize(o)) = (:jl_finalize_th, Cvoid, (Any, Any,), current_task(), o)

"""
Base.GC
Expand Down
2 changes: 1 addition & 1 deletion src/builtin_proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ DECLARE_BUILTIN(_apply_pure);
DECLARE_BUILTIN(_call_in_world);
DECLARE_BUILTIN(_call_in_world_total);
DECLARE_BUILTIN(_call_latest);
DECLARE_BUILTIN(_cancel_finalizer);
DECLARE_BUILTIN(_compute_sparams);
DECLARE_BUILTIN(_expr);
DECLARE_BUILTIN(_svec_ref);
Expand All @@ -37,7 +38,6 @@ DECLARE_BUILTIN(compilerbarrier);
DECLARE_BUILTIN(current_scope);
DECLARE_BUILTIN(donotdelete);
DECLARE_BUILTIN(fieldtype);
DECLARE_BUILTIN(finalize);
DECLARE_BUILTIN(finalizer);
DECLARE_BUILTIN(getfield);
DECLARE_BUILTIN(getglobal);
Expand Down
20 changes: 13 additions & 7 deletions src/builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -2004,16 +2004,22 @@ JL_CALLABLE(jl_f_compilerbarrier)
JL_CALLABLE(jl_f_finalizer)
{
// NOTE the compiler may temporarily insert additional argument for the later inlining pass
JL_NARGS(finalizer, 2, 4);
JL_NARGS(finalizer, 2, 5);
if (nargs == 5 && jl_is_task(args[4]))
// There are cases where the compiler inserts `current_task` as the 5th argument,
// and in such cases, the finalizer is added to the `ptls` of that task.
// This task is later referenced by `_cancel_finalizer`.
return jl_box_long(jl_gc_add_finalizer_(((jl_task_t*)args[4])->ptls, args[1], args[0]));
jl_task_t *ct = jl_current_task;
jl_gc_add_finalizer_(ct->ptls, args[1], args[0]);
return jl_nothing;
return jl_box_long(jl_gc_add_finalizer_(ct->ptls, args[1], args[0]));
}

JL_CALLABLE(jl_f_finalize)
JL_CALLABLE(jl_f__cancel_finalizer)
{
JL_NARGS(finalize, 1, 1);
jl_finalize(args[0]);
JL_NARGS(_cancel_finalizer, 3, 3);
JL_TYPECHK(_cancel_finalizer, task, args[1])
JL_TYPECHK(_cancel_finalizer, long, args[2])
jl_cancel_finalizer((jl_value_t*)args[0], (jl_task_t*)args[1], jl_unbox_long(args[2]));
return jl_nothing;
}

Expand Down Expand Up @@ -2449,7 +2455,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
jl_builtin_donotdelete = add_builtin_func("donotdelete", jl_f_donotdelete);
jl_builtin_compilerbarrier = add_builtin_func("compilerbarrier", jl_f_compilerbarrier);
add_builtin_func("finalizer", jl_f_finalizer);
add_builtin_func("finalize", jl_f_finalize);
add_builtin_func("_cancel_finalizer", jl_f__cancel_finalizer);
add_builtin_func("_compute_sparams", jl_f__compute_sparams);
add_builtin_func("_svec_ref", jl_f__svec_ref);
jl_builtin_current_scope = add_builtin_func("current_scope", jl_f_current_scope);
Expand Down
2 changes: 1 addition & 1 deletion src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1646,7 +1646,7 @@ static const auto &builtin_func_map() {
{ jl_f_donotdelete_addr, new JuliaFunction<>{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} },
{ jl_f_compilerbarrier_addr, new JuliaFunction<>{XSTR(jl_f_compilerbarrier), get_func_sig, get_func_attrs} },
{ jl_f_finalizer_addr, new JuliaFunction<>{XSTR(jl_f_finalizer), get_func_sig, get_func_attrs} },
{ jl_f_finalize_addr, new JuliaFunction<>{XSTR(jl_f_finalize), get_func_sig, get_func_attrs} },
{ jl_f__cancel_finalizer_addr, new JuliaFunction<>{XSTR(jl_f__cancel_finalizer), get_func_sig, get_func_attrs} },
{ jl_f__svec_ref_addr, new JuliaFunction<>{XSTR(jl_f__svec_ref), get_func_sig, get_func_attrs} },
{ jl_f_current_scope_addr, new JuliaFunction<>{XSTR(jl_f_current_scope), get_func_sig, get_func_attrs} },
};
Expand Down
45 changes: 42 additions & 3 deletions src/gc-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ void run_finalizer(jl_task_t *ct, void *o, void *ff)
// if `need_sync` is true, the `list` is the `finalizers` list of another
// thread and we need additional synchronizations
static void finalize_object(arraylist_t *list, jl_value_t *o,
arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT
arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT
{
// The acquire load makes sure that the first `len` objects are valid.
// If `need_sync` is true, all mutations of the content should be limited
Expand Down Expand Up @@ -387,7 +387,7 @@ void jl_gc_run_all_finalizers(jl_task_t *ct)
run_finalizers(ct, 1);
}

void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
size_t jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
{
assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_STATE_UNSAFE);
arraylist_t *a = &ptls->finalizers;
Expand All @@ -413,6 +413,7 @@ void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
items[oldlen] = v;
items[oldlen + 1] = f;
jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2);
return oldlen;
}

JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
Expand Down Expand Up @@ -459,8 +460,9 @@ JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
if (ptls2 != NULL)
if (ptls2 != NULL) {
finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
}
}
finalize_object(&finalizer_list_marked, o, &copied_list, 0);
if (copied_list.len > 0) {
Expand All @@ -478,6 +480,43 @@ JL_DLLEXPORT void jl_finalize(jl_value_t *o)
jl_finalize_th(jl_current_task, o);
}

// Remove the finalizer for `o` from `ct->ptls->finalizers` and cancel that finalizer.
// `lookup_idx` is the index at which the finalizer was registered in `ct->ptls->finalizers`
// by `jl_gc_add_finalizer_` (used for the fast path).
// Note that it is assumed that only a single finalizer exists for `o`.
void jl_cancel_finalizer(jl_value_t *o, jl_task_t *ct, int lookup_idx)
{
arraylist_t *list = &ct->ptls->finalizers;
size_t len = list->len;
void **items = list->items;
if (0 <= lookup_idx && lookup_idx < len) {
void *v = items[lookup_idx];
if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) {
for (size_t i = lookup_idx + 2; i < len; i += 2) {
items[i-2] = items[i];
items[i-1] = items[i+1];
}
list->len = len - 2;
return;
}
}
// slow path for erasing the finalizer
int finalizer_erased = 0;
for (size_t i = 0; i < len; i += 2) {
void *v = items[i];
if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) {
for (size_t j = i + 2; j < len; j += 2) {
items[j-2] = items[j];
items[j-1] = items[j+1];
}
list->len = len - 2;
finalizer_erased = 1;
break;
}
}
assert(finalizer_erased);
}

// =========================================================================== //
// Threading
// =========================================================================== //
Expand Down
2 changes: 2 additions & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -2326,6 +2326,8 @@ JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e, jl_task_t *ct);
JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
#define jl_current_task (container_of(jl_get_pgcstack(), jl_task_t, gcstack))

void jl_cancel_finalizer(jl_value_t *o, jl_task_t *ct, int lookup_idx);

extern JL_DLLIMPORT int jl_task_gcstack_offset;
extern JL_DLLIMPORT int jl_task_ptls_offset;

Expand Down
2 changes: 1 addition & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT;
void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT;
void jl_gc_run_all_finalizers(jl_task_t *ct);
void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task);
void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT;
size_t jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT;

void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT;
JL_DLLEXPORT void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT;
Expand Down
2 changes: 1 addition & 1 deletion src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ static const jl_fptr_args_t id_to_fptrs[] = {
&jl_f__typebody, &jl_f__setsuper, &jl_f__equiv_typedef, &jl_f_get_binding_type,
&jl_f_opaque_closure_call, &jl_f_donotdelete, &jl_f_compilerbarrier,
&jl_f_getglobal, &jl_f_setglobal, &jl_f_swapglobal, &jl_f_modifyglobal, &jl_f_replaceglobal, &jl_f_setglobalonce,
&jl_f_finalizer, &jl_f_finalize, &jl_f__compute_sparams, &jl_f__svec_ref,
&jl_f_finalizer, &jl_f__cancel_finalizer, &jl_f__compute_sparams, &jl_f__svec_ref,
&jl_f_current_scope,
NULL };

Expand Down
37 changes: 35 additions & 2 deletions test/compiler/inline.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1622,7 +1622,7 @@ let src = code_typed1((Int,)) do x
@test count(iscall((src, setfield!)), src.code) == 1
end

# early `finalize` insertion
# finalizer inlining with `Core._cancel_finalizer`
let src = code_typed1((Int,)) do x
xs = finalizer(Ref(x)) do obj
Base.@assume_effects :nothrow :notaskstate
Expand All @@ -1632,9 +1632,42 @@ let src = code_typed1((Int,)) do x
return xs[]
end
@test count(iscall((src, Core.finalizer)), src.code) == 1
@test count(iscall((src, Core.finalize)), src.code) == 1
@test count(iscall((src, Core._cancel_finalizer)), src.code) == 1
end

# validate the runtime implementation
mutable struct AtomicCounter
@atomic count::Int
end
const counter = AtomicCounter(0)
const _throw_or_not = Ref(false)
@noinline throw_or_not() = _throw_or_not[] ? error("") : nothing
function with_finalizer_cancellation(x)
xs = finalizer(Ref(x)) do obj
Base.@assume_effects :nothrow :notaskstate
@atomic counter.count += obj[]
end
throw_or_not()
return xs[] += 1
end
let src = code_typed1(with_finalizer_cancellation, (Int,))
@test count(iscall((src, Core.finalizer)), src.code) == 1
@test count(iscall((src, Core._cancel_finalizer)), src.code) == 1
end

# successful case
with_finalizer_cancellation(0)
@test counter.count == 1

# error case: check if the finalizer is still registered
let prev = GC.enable(false)
_throw_or_not[] = true
try with_finalizer_cancellation(1) catch end
GC.enable(prev)
GC.gc()
end
@test counter.count == 2

# optimize `[push!|pushfirst!](::Vector{Any}, x...)`
@testset "optimize `$f(::Vector{Any}, x...)`" for f = Any[push!, pushfirst!]
@eval begin
Expand Down

0 comments on commit bf08f1e

Please sign in to comment.