diff --git a/src/cluster.jl b/src/cluster.jl index d8cc052..2444695 100644 --- a/src/cluster.jl +++ b/src/cluster.jl @@ -92,7 +92,7 @@ mutable struct WorkerConfig end end -@enum WorkerState W_CREATED W_CONNECTED W_TERMINATING W_TERMINATED +@enum WorkerState W_CREATED W_CONNECTED W_TERMINATING W_TERMINATED W_UNKNOWN_STATE mutable struct Worker id::Int msg_lock::Threads.ReentrantLock # Lock for del_msgs, add_msgs, and gcflag diff --git a/src/managers.jl b/src/managers.jl index 57f5859..b2b655a 100644 --- a/src/managers.jl +++ b/src/managers.jl @@ -450,6 +450,11 @@ Launch `np` workers on the local host using the in-built `LocalManager`. Local workers inherit the current package environment (i.e., active project, [`LOAD_PATH`](@ref), and [`DEPOT_PATH`](@ref)) from the main process. +!!! warning + Note that workers do not run a `~/.julia/config/startup.jl` startup script, nor do they synchronize + their global state (such as command-line switches, global variables, new method definitions, and loaded modules) with any + of the other running processes. + **Keyword arguments**: - `restrict::Bool`: if `true` (default) binding is restricted to `127.0.0.1`. - `dir`, `exename`, `exeflags`, `env`, `topology`, `lazy`, `enable_threaded_blas`: same effect diff --git a/src/process_messages.jl b/src/process_messages.jl index e68e05b..b5c60c3 100644 --- a/src/process_messages.jl +++ b/src/process_messages.jl @@ -210,8 +210,7 @@ function message_handler_loop(r_stream::IO, w_stream::IO, incoming::Bool) handle_msg(msg, header, r_stream, w_stream, version) end catch e - werr = worker_from_id(wpid) - oldstate = werr.state + oldstate = W_UNKNOWN_STATE # Check again as it may have been set in a message handler but not propagated to the calling block above if wpid < 1 @@ -222,6 +221,8 @@ function message_handler_loop(r_stream::IO, w_stream::IO, incoming::Bool) println(stderr, e, CapturedException(e, catch_backtrace())) println(stderr, "Process($(myid())) - Unknown remote, closing connection.") elseif !(wpid in map_del_wrkr) + werr = worker_from_id(wpid) + oldstate = werr.state set_worker_state(werr, W_TERMINATED) # If unhandleable error occurred talking to pid 1, exit