From 3cc75d0227c3d8a5cf606a0be9bc3c5fd9314ef4 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Sun, 30 Jul 2023 16:01:40 +0200 Subject: [PATCH 01/13] Move landlock out of thread into process; add landlock exceptions --- node/core/pvf/common/src/worker/security.rs | 9 ++- node/core/pvf/execute-worker/src/lib.rs | 78 +++++++++++---------- node/core/pvf/prepare-worker/src/lib.rs | 65 ++++++++++------- 3 files changed, 86 insertions(+), 66 deletions(-) diff --git a/node/core/pvf/common/src/worker/security.rs b/node/core/pvf/common/src/worker/security.rs index 5ba42915238c..49260e942e3c 100644 --- a/node/core/pvf/common/src/worker/security.rs +++ b/node/core/pvf/common/src/worker/security.rs @@ -110,18 +110,21 @@ pub mod landlock { /// Tries to restrict the current thread with the following landlock access controls: /// - /// 1. all global filesystem access - /// 2. ... more may be supported in the future. + /// 1. all global filesystem access restricted, with optional exceptions + /// 2. ... more sandbox types (e.g. networking) may be supported in the future. /// /// If landlock is not supported in the current environment this is simply a noop. /// /// # Returns /// /// The status of the restriction (whether it was fully, partially, or not-at-all enforced). - pub fn try_restrict_thread() -> Result { + pub fn try_restrict_thread( + fs_exceptions: impl Iterator, RulesetError>>, + ) -> Result { let status = Ruleset::new() .handle_access(AccessFs::from_all(LANDLOCK_ABI))? .create()? + .add_rules(fs_exceptions)? .restrict_self()?; Ok(status.ruleset) } diff --git a/node/core/pvf/execute-worker/src/lib.rs b/node/core/pvf/execute-worker/src/lib.rs index b2714b60a6ee..897663c9d26b 100644 --- a/node/core/pvf/execute-worker/src/lib.rs +++ b/node/core/pvf/execute-worker/src/lib.rs @@ -37,7 +37,7 @@ use polkadot_node_core_pvf_common::{ }; use polkadot_parachain::primitives::ValidationResult; use std::{ - path::PathBuf, + path::{Path, PathBuf}, sync::{mpsc::channel, Arc}, time::Duration, }; @@ -121,10 +121,38 @@ async fn send_response(stream: &mut UnixStream, response: Response) -> io::Resul /// `node_version`, if `Some`, is checked against the worker version. A mismatch results in /// immediate worker termination. `None` is used for tests and in other situations when version /// check is not necessary. -pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { +pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>, artifact_dir: &Path) { worker_event_loop("execute", socket_path, node_version, |mut stream| async move { let worker_pid = std::process::id(); + // Try to enable landlock. + { + #[cfg(target_os = "linux")] + let landlock_status = { + use polkadot_node_core_pvf_common::worker::security::landlock::try_restrict_thread; + + try_restrict_thread(landlock::path_beneath_rules( + &[artifact_dir], + landlock::AccessFs::from_read(abi), + )) + .map(LandlockStatus::from_ruleset_status) + .map_err(|e| e.to_string()) + }; + #[cfg(not(target_os = "linux"))] + let landlock_status: Result = Ok(LandlockStatus::NotEnforced); + + // TODO: return an error? + // Log if landlock threw an error. + if let Err(err) = landlock_status { + gum::warn!( + target: LOG_TARGET, + %worker_pid, + "error enabling landlock: {}", + err + ); + } + } + let handshake = recv_handshake(&mut stream).await?; let executor = Executor::new(handshake.executor_params).map_err(|e| { io::Error::new(io::ErrorKind::Other, format!("cannot create executor: {}", e)) @@ -139,9 +167,11 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { artifact_path.display(), ); + if !artifact_path.starts_with(artifact_dir) { + return Err(io::Error::new(io::ErrorKind::Other, format!("received an artifact path {artifact_path:?} that does not belong to expected artifact dir {artifact_dir:?}"))) + } + // Get the artifact bytes. - // - // We do this outside the thread so that we can lock down filesystem access there. let compiled_artifact_blob = match std::fs::read(artifact_path) { Ok(bytes) => bytes, Err(err) => { @@ -172,22 +202,11 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { let execute_thread = thread::spawn_worker_thread_with_stack_size( "execute thread", move || { - // Try to enable landlock. - #[cfg(target_os = "linux")] - let landlock_status = polkadot_node_core_pvf_common::worker::security::landlock::try_restrict_thread() - .map(LandlockStatus::from_ruleset_status) - .map_err(|e| e.to_string()); - #[cfg(not(target_os = "linux"))] - let landlock_status: Result = Ok(LandlockStatus::NotEnforced); - - ( - validate_using_artifact( - &compiled_artifact_blob, - ¶ms, - executor_2, - cpu_time_start, - ), - landlock_status, + validate_using_artifact( + &compiled_artifact_blob, + ¶ms, + executor_2, + cpu_time_start, ) }, Arc::clone(&condvar), @@ -200,22 +219,9 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { let response = match outcome { WaitOutcome::Finished => { let _ = cpu_time_monitor_tx.send(()); - let (result, landlock_status) = execute_thread.join().unwrap_or_else(|e| { - ( - Response::Panic(stringify_panic_payload(e)), - Ok(LandlockStatus::Unavailable), - ) - }); - - // Log if landlock threw an error. - if let Err(err) = landlock_status { - gum::warn!( - target: LOG_TARGET, - %worker_pid, - "error enabling landlock: {}", - err - ); - } + let result = execute_thread + .join() + .unwrap_or_else(|e| Response::Panic(stringify_panic_payload(e))); result }, diff --git a/node/core/pvf/prepare-worker/src/lib.rs b/node/core/pvf/prepare-worker/src/lib.rs index 6f3cb18b4280..4a78f07a20ed 100644 --- a/node/core/pvf/prepare-worker/src/lib.rs +++ b/node/core/pvf/prepare-worker/src/lib.rs @@ -45,7 +45,7 @@ use polkadot_node_core_pvf_common::{ }; use polkadot_primitives::ExecutorParams; use std::{ - path::PathBuf, + path::{Path, PathBuf}, sync::{mpsc::channel, Arc}, time::Duration, }; @@ -116,10 +116,38 @@ async fn send_response(stream: &mut UnixStream, result: PrepareResult) -> io::Re /// /// 7. Send the result of preparation back to the host. If any error occurred in the above steps, we /// send that in the `PrepareResult`. -pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { +pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>, temp_artifact_dir: &Path) { worker_event_loop("prepare", socket_path, node_version, |mut stream| async move { let worker_pid = std::process::id(); + // Try to enable landlock. + { + #[cfg(target_os = "linux")] + let landlock_status = { + use polkadot_node_core_pvf_common::worker::security::landlock::try_restrict_thread; + + try_restrict_thread(landlock::path_beneath_rules( + &[temp_artifact_dir], + landlock::AccessFs::from_write(abi), + )) + .map(LandlockStatus::from_ruleset_status) + .map_err(|e| e.to_string()) + }; + #[cfg(not(target_os = "linux"))] + let landlock_status: Result = Ok(LandlockStatus::NotEnforced); + + // TODO: return an error? + // Log if landlock threw an error. + if let Err(err) = landlock_status { + gum::warn!( + target: LOG_TARGET, + %worker_pid, + "error enabling landlock: {}", + err + ); + } + } + loop { let (pvf, temp_artifact_dest) = recv_request(&mut stream).await?; gum::debug!( @@ -128,6 +156,10 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { "worker: preparing artifact", ); + if !temp_artifact_dest.starts_with(temp_artifact_dir) { + return Err(io::Error::new(io::ErrorKind::Other, format!("received an artifact path {temp_artifact_dest:?} that does not belong to expected artifact dir {temp_artifact_dir:?}"))) + } + let preparation_timeout = pvf.prep_timeout(); let prepare_job_kind = pvf.prep_kind(); let executor_params = (*pvf.executor_params()).clone(); @@ -157,14 +189,6 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { let prepare_thread = thread::spawn_worker_thread( "prepare thread", move || { - // Try to enable landlock. - #[cfg(target_os = "linux")] - let landlock_status = polkadot_node_core_pvf_common::worker::security::landlock::try_restrict_thread() - .map(LandlockStatus::from_ruleset_status) - .map_err(|e| e.to_string()); - #[cfg(not(target_os = "linux"))] - let landlock_status: Result = Ok(LandlockStatus::NotEnforced); - #[allow(unused_mut)] let mut result = prepare_artifact(pvf, cpu_time_start); @@ -183,7 +207,7 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { }); } - (result, landlock_status) + result }, Arc::clone(&condvar), WaitOutcome::Finished, @@ -196,16 +220,13 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { let _ = cpu_time_monitor_tx.send(()); match prepare_thread.join().unwrap_or_else(|err| { - ( - Err(PrepareError::Panic(stringify_panic_payload(err))), - Ok(LandlockStatus::Unavailable), - ) + Err(PrepareError::Panic(stringify_panic_payload(err))) }) { - (Err(err), _) => { + Err(err) => { // Serialized error will be written into the socket. Err(err) }, - (Ok(ok), landlock_status) => { + Ok(ok) => { #[cfg(not(target_os = "linux"))] let (artifact, cpu_time_elapsed) = ok; #[cfg(target_os = "linux")] @@ -221,16 +242,6 @@ pub fn worker_entrypoint(socket_path: &str, node_version: Option<&str>) { max_rss: extract_max_rss_stat(max_rss, worker_pid), }; - // Log if landlock threw an error. - if let Err(err) = landlock_status { - gum::warn!( - target: LOG_TARGET, - %worker_pid, - "error enabling landlock: {}", - err - ); - } - // Write the serialized artifact into a temp file. // // PVF host only keeps artifacts statuses in its memory, successfully From f34cec479452633830deeead31c26cb105824156 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Fri, 4 Aug 2023 11:34:35 +0200 Subject: [PATCH 02/13] Add --cache-path cli param - [X] Add check for whether --socket-path or --cache-path are `None` (both are required) --- node/core/pvf/common/src/worker/mod.rs | 10 +++++++--- node/core/pvf/execute-worker/src/lib.rs | 8 ++++---- node/core/pvf/prepare-worker/src/lib.rs | 8 ++++---- node/core/pvf/src/execute/queue.rs | 8 ++++++++ node/core/pvf/src/execute/worker_intf.rs | 8 +++++++- node/core/pvf/src/host.rs | 1 + node/core/pvf/src/prepare/pool.rs | 13 +++++++++++-- node/core/pvf/src/prepare/worker_intf.rs | 8 +++++++- node/core/pvf/src/testing.rs | 12 +++++++++--- node/core/pvf/src/worker_intf.rs | 2 ++ node/core/pvf/tests/it/worker_common.rs | 20 +++++++++++++++++++- 11 files changed, 79 insertions(+), 19 deletions(-) diff --git a/node/core/pvf/common/src/worker/mod.rs b/node/core/pvf/common/src/worker/mod.rs index 8dd99fc762d8..5cd829b98e2b 100644 --- a/node/core/pvf/common/src/worker/mod.rs +++ b/node/core/pvf/common/src/worker/mod.rs @@ -70,17 +70,21 @@ macro_rules! decl_worker_main { } let mut node_version = None; - let mut socket_path: &str = ""; + let mut socket_path = None; + let mut cache_path = None; for i in (2..args.len()).step_by(2) { match args[i].as_ref() { - "--socket-path" => socket_path = args[i + 1].as_str(), + "--socket-path" => socket_path = Some(args[i + 1].as_str()), "--node-impl-version" => node_version = Some(args[i + 1].as_str()), + "--cache-path" => cache_path = Some(args[i + 1].as_str()), arg => panic!("Unexpected argument found: {}", arg), } } + let socket_path = socket_path.expect("the --socket-path argument is required"); + let cache_path = cache_path.expect("the --cache-path argument is required"); - $entrypoint(&socket_path, node_version, Some($worker_version)); + $entrypoint(&socket_path, node_version, Some($worker_version), cache_path); } }; } diff --git a/node/core/pvf/execute-worker/src/lib.rs b/node/core/pvf/execute-worker/src/lib.rs index f34ee3b93873..2d90445d0955 100644 --- a/node/core/pvf/execute-worker/src/lib.rs +++ b/node/core/pvf/execute-worker/src/lib.rs @@ -125,7 +125,7 @@ pub fn worker_entrypoint( socket_path: &str, node_version: Option<&str>, worker_version: Option<&str>, - artifact_dir: &Path, + cache_path: &Path, ) { worker_event_loop( "execute", @@ -142,7 +142,7 @@ pub fn worker_entrypoint( use polkadot_node_core_pvf_common::worker::security::landlock::try_restrict_thread; try_restrict_thread(landlock::path_beneath_rules( - &[artifact_dir], + &[cache_path], landlock::AccessFs::from_read(abi), )) .map(LandlockStatus::from_ruleset_status) @@ -177,8 +177,8 @@ pub fn worker_entrypoint( artifact_path.display(), ); - if !artifact_path.starts_with(artifact_dir) { - return Err(io::Error::new(io::ErrorKind::Other, format!("received an artifact path {artifact_path:?} that does not belong to expected artifact dir {artifact_dir:?}"))) + if !artifact_path.starts_with(cache_path) { + return Err(io::Error::new(io::ErrorKind::Other, format!("received an artifact path {artifact_path:?} that does not belong to expected artifact dir {cache_path:?}"))) } // Get the artifact bytes. diff --git a/node/core/pvf/prepare-worker/src/lib.rs b/node/core/pvf/prepare-worker/src/lib.rs index 6210b9835312..8fbf7d4a16bd 100644 --- a/node/core/pvf/prepare-worker/src/lib.rs +++ b/node/core/pvf/prepare-worker/src/lib.rs @@ -120,7 +120,7 @@ pub fn worker_entrypoint( socket_path: &str, node_version: Option<&str>, worker_version: Option<&str>, - temp_artifact_dir: &Path, + cache_path: &Path, ) { worker_event_loop( "prepare", @@ -137,7 +137,7 @@ pub fn worker_entrypoint( use polkadot_node_core_pvf_common::worker::security::landlock::try_restrict_thread; try_restrict_thread(landlock::path_beneath_rules( - &[temp_artifact_dir], + &[cache_path], landlock::AccessFs::from_write(abi), )) .map(LandlockStatus::from_ruleset_status) @@ -166,8 +166,8 @@ pub fn worker_entrypoint( "worker: preparing artifact", ); - if !temp_artifact_dest.starts_with(temp_artifact_dir) { - return Err(io::Error::new(io::ErrorKind::Other, format!("received an artifact path {temp_artifact_dest:?} that does not belong to expected artifact dir {temp_artifact_dir:?}"))) + if !temp_artifact_dest.starts_with(cache_path) { + return Err(io::Error::new(io::ErrorKind::Other, format!("received an artifact path {temp_artifact_dest:?} that does not belong to expected cache path {cache_path:?}"))) } let preparation_timeout = pvf.prep_timeout(); diff --git a/node/core/pvf/src/execute/queue.rs b/node/core/pvf/src/execute/queue.rs index 33a1c6f89709..cdadfa035429 100644 --- a/node/core/pvf/src/execute/queue.rs +++ b/node/core/pvf/src/execute/queue.rs @@ -141,6 +141,7 @@ struct Queue { program_path: PathBuf, spawn_timeout: Duration, node_version: Option, + cache_path: PathBuf, /// The queue of jobs that are waiting for a worker to pick up. queue: VecDeque, @@ -155,6 +156,7 @@ impl Queue { worker_capacity: usize, spawn_timeout: Duration, node_version: Option, + cache_path: PathBuf, to_queue_rx: mpsc::Receiver, ) -> Self { Self { @@ -162,6 +164,7 @@ impl Queue { program_path, spawn_timeout, node_version, + cache_path, to_queue_rx, queue: VecDeque::new(), mux: Mux::new(), @@ -408,6 +411,7 @@ fn spawn_extra_worker(queue: &mut Queue, job: ExecuteJob) { job, queue.spawn_timeout, queue.node_version.clone(), + queue.cache_path.clone(), ) .boxed(), ); @@ -425,6 +429,7 @@ async fn spawn_worker_task( job: ExecuteJob, spawn_timeout: Duration, node_version: Option, + cache_path: PathBuf, ) -> QueueEvent { use futures_timer::Delay; @@ -434,6 +439,7 @@ async fn spawn_worker_task( job.executor_params.clone(), spawn_timeout, node_version.as_deref(), + &cache_path, ) .await { @@ -498,6 +504,7 @@ pub fn start( worker_capacity: usize, spawn_timeout: Duration, node_version: Option, + cache_path: PathBuf, ) -> (mpsc::Sender, impl Future) { let (to_queue_tx, to_queue_rx) = mpsc::channel(20); let run = Queue::new( @@ -506,6 +513,7 @@ pub fn start( worker_capacity, spawn_timeout, node_version, + cache_path, to_queue_rx, ) .run(); diff --git a/node/core/pvf/src/execute/worker_intf.rs b/node/core/pvf/src/execute/worker_intf.rs index 9d8b61d10447..9759b071a27f 100644 --- a/node/core/pvf/src/execute/worker_intf.rs +++ b/node/core/pvf/src/execute/worker_intf.rs @@ -46,11 +46,17 @@ pub async fn spawn( executor_params: ExecutorParams, spawn_timeout: Duration, node_version: Option<&str>, + cache_path: &Path, ) -> Result<(IdleWorker, WorkerHandle), SpawnErr> { - let mut extra_args = vec!["execute-worker"]; + let cache_path = match cache_path.to_str() { + Some(a) => a, + None => return Err(SpawnErr::InvalidCachePath(cache_path.to_owned())), + }; + let mut extra_args = vec!["execute-worker", "--cache-path", cache_path]; if let Some(node_version) = node_version { extra_args.extend_from_slice(&["--node-impl-version", node_version]); } + let (mut idle_worker, worker_handle) = spawn_with_program_path("execute", program_path, &extra_args, spawn_timeout).await?; send_handshake(&mut idle_worker.stream, Handshake { executor_params }) diff --git a/node/core/pvf/src/host.rs b/node/core/pvf/src/host.rs index a5772e34e16e..0afdb5bcb0cc 100644 --- a/node/core/pvf/src/host.rs +++ b/node/core/pvf/src/host.rs @@ -232,6 +232,7 @@ pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future { @@ -260,11 +266,14 @@ async fn spawn_worker_task( program_path: PathBuf, spawn_timeout: Duration, node_version: Option, + cache_path: PathBuf, ) -> PoolEvent { use futures_timer::Delay; loop { - match worker_intf::spawn(&program_path, spawn_timeout, node_version.as_deref()).await { + match worker_intf::spawn(&program_path, spawn_timeout, node_version.as_deref(), &cache_path) + .await + { Ok((idle, handle)) => break PoolEvent::Spawn(idle, handle), Err(err) => { gum::warn!(target: LOG_TARGET, "failed to spawn a prepare worker: {:?}", err); diff --git a/node/core/pvf/src/prepare/worker_intf.rs b/node/core/pvf/src/prepare/worker_intf.rs index d0d9a026dda7..3508d5700f9a 100644 --- a/node/core/pvf/src/prepare/worker_intf.rs +++ b/node/core/pvf/src/prepare/worker_intf.rs @@ -46,11 +46,17 @@ pub async fn spawn( program_path: &Path, spawn_timeout: Duration, node_version: Option<&str>, + cache_path: &Path, ) -> Result<(IdleWorker, WorkerHandle), SpawnErr> { - let mut extra_args = vec!["prepare-worker"]; + let cache_path = match cache_path.to_str() { + Some(a) => a, + None => return Err(SpawnErr::InvalidCachePath(cache_path.to_owned())), + }; + let mut extra_args = vec!["prepare-worker", "--cache-path", cache_path]; if let Some(node_version) = node_version { extra_args.extend_from_slice(&["--node-impl-version", node_version]); } + spawn_with_program_path("prepare", program_path, &extra_args, spawn_timeout).await } diff --git a/node/core/pvf/src/testing.rs b/node/core/pvf/src/testing.rs index 3cd1ce304ab8..3fd8cbb18f42 100644 --- a/node/core/pvf/src/testing.rs +++ b/node/core/pvf/src/testing.rs @@ -76,17 +76,23 @@ macro_rules! decl_puppet_worker_main { }; let mut node_version = None; - let mut socket_path: &str = ""; + let mut socket_path = None; + let mut cache_path = None; for i in (2..args.len()).step_by(2) { match args[i].as_ref() { - "--socket-path" => socket_path = args[i + 1].as_str(), + "--socket-path" => socket_path = Some(args[i + 1].as_str()), "--node-impl-version" => node_version = Some(args[i + 1].as_str()), + "--cache-path" => cache_path = Some(args[i + 1].as_str()), arg => panic!("Unexpected argument found: {}", arg), } } + let socket_path = socket_path.expect("the --socket-path argument is required"); + let cache_path = cache_path.expect("the --cache-path argument is required"); - entrypoint(&socket_path, node_version, None); + let cache_path = &std::path::Path::new(cache_path); + + entrypoint(&socket_path, node_version, None, cache_path); } }; } diff --git a/node/core/pvf/src/worker_intf.rs b/node/core/pvf/src/worker_intf.rs index ef5733ec0e6d..b1a448d17149 100644 --- a/node/core/pvf/src/worker_intf.rs +++ b/node/core/pvf/src/worker_intf.rs @@ -194,6 +194,8 @@ pub enum SpawnErr { AcceptTimeout, /// Failed to send handshake after successful spawning was signaled Handshake, + /// Cache path is not a valid UTF-8 str. + InvalidCachePath(PathBuf), } /// This is a representation of a potentially running worker. Drop it and the process will be killed. diff --git a/node/core/pvf/tests/it/worker_common.rs b/node/core/pvf/tests/it/worker_common.rs index 439ac8538c95..2ead666bfa14 100644 --- a/node/core/pvf/tests/it/worker_common.rs +++ b/node/core/pvf/tests/it/worker_common.rs @@ -35,12 +35,30 @@ async fn spawn_timeout() { assert!(matches!(result, Err(SpawnErr::AcceptTimeout))); } +#[tokio::test] +async fn should_fail_without_cache_path() { + // --socket-path is handled by `spawn_with_program_path` so we don't pass it here. + let result = spawn_with_program_path( + "integration-test", + PUPPET_EXE, + &["prepare-worker"], + Duration::from_secs(2), + ) + .await; + assert!(matches!(result, Err(SpawnErr::AcceptTimeout))); +} + #[tokio::test] async fn should_connect() { + let socket_path = tempfile::NamedTempFile::new().unwrap(); + let socket_path = socket_path.path().to_str().unwrap(); + let cache_path = tempfile::tempdir().unwrap(); + let cache_path = cache_path.path().to_str().unwrap(); + let _ = spawn_with_program_path( "integration-test", PUPPET_EXE, - &["prepare-worker"], + &["prepare-worker", "--socket-path", socket_path, "--cache-path", cache_path], Duration::from_secs(2), ) .await From d416b57c9359cec269c226c882e245dafb99ad39 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Fri, 4 Aug 2023 16:03:48 +0200 Subject: [PATCH 03/13] Finishing touches - [X] Fix some compile errors - [X] Update some docs - [X] Update landlock tests --- node/core/pvf/common/src/worker/security.rs | 112 ++++++++++++++++---- node/core/pvf/execute-worker/src/lib.rs | 22 ++-- node/core/pvf/prepare-worker/src/lib.rs | 22 ++-- 3 files changed, 121 insertions(+), 35 deletions(-) diff --git a/node/core/pvf/common/src/worker/security.rs b/node/core/pvf/common/src/worker/security.rs index cf96b9bbfaa5..b68be39d7eb9 100644 --- a/node/core/pvf/common/src/worker/security.rs +++ b/node/core/pvf/common/src/worker/security.rs @@ -52,14 +52,19 @@ impl LandlockStatus { /// [landlock]: https://docs.rs/landlock/latest/landlock/index.html #[cfg(target_os = "linux")] pub mod landlock { - use landlock::{Access, AccessFs, Ruleset, RulesetAttr, RulesetError, RulesetStatus, ABI}; + pub use landlock::{path_beneath_rules, Access, AccessFs}; + + use landlock::{ + PathBeneath, PathFd, Ruleset, RulesetAttr, RulesetCreatedAttr, RulesetError, RulesetStatus, + ABI, + }; /// Landlock ABI version. We use ABI V1 because: /// /// 1. It is supported by our reference kernel version. /// 2. Later versions do not (yet) provide additional security. /// - /// # Versions (June 2023) + /// # Versions (as of June 2023) /// /// - Polkadot reference kernel version: 5.16+ /// - ABI V1: 5.13 - introduces landlock, including full restrictions on file reads @@ -87,7 +92,7 @@ pub mod landlock { /// Returns to what degree landlock is enabled with the given ABI on the current Linux /// environment. pub fn get_status() -> Result> { - match std::thread::spawn(|| try_restrict_thread()).join() { + match std::thread::spawn(|| try_restrict_thread(std::iter::empty())).join() { Ok(Ok(status)) => Ok(status), Ok(Err(ruleset_err)) => Err(ruleset_err.into()), Err(_err) => Err("a panic occurred in try_restrict_thread".into()), @@ -135,7 +140,7 @@ pub mod landlock { use std::{fs, io::ErrorKind, thread}; #[test] - fn restricted_thread_cannot_access_fs() { + fn restricted_thread_cannot_read_file() { // TODO: This would be nice: . if !check_is_fully_enabled() { return @@ -143,21 +148,51 @@ pub mod landlock { // Restricted thread cannot read from FS. let handle = thread::spawn(|| { - // Write to a tmp file, this should succeed before landlock is applied. - let text = "foo"; - let tmpfile = tempfile::NamedTempFile::new().unwrap(); - let path = tmpfile.path(); - fs::write(path, text).unwrap(); - let s = fs::read_to_string(path).unwrap(); - assert_eq!(s, text); - - let status = try_restrict_thread().unwrap(); + // Create, write, and read two tmp files. This should succeed before any landlock + // restrictions are applied. + const TEXT: &str = "foo"; + let tmpfile1 = tempfile::NamedTempFile::new().unwrap(); + let path1 = tmpfile1.path(); + let tmpfile2 = tempfile::NamedTempFile::new().unwrap(); + let path2 = tmpfile2.path(); + + fs::write(path1, TEXT).unwrap(); + let s = fs::read_to_string(path1).unwrap(); + assert_eq!(s, TEXT); + fs::write(path2, TEXT).unwrap(); + let s = fs::read_to_string(path2).unwrap(); + assert_eq!(s, TEXT); + + // Apply Landlock with a read exception for only one of the files. + let status = try_restrict_thread(path_beneath_rules( + &[path1], + AccessFs::from_read(LANDLOCK_ABI), + )) + .unwrap(); + if !matches!(status, RulesetStatus::FullyEnforced) { + panic!("Ruleset should be enforced since we checked if landlock is enabled"); + } + + // Try to read from both files, only tmpfile1 should succeed. + let result = fs::read_to_string(path1); + assert!(matches!( + result, + Ok(s) if s == TEXT + )); + let result = fs::read_to_string(path2); + assert!(matches!( + result, + Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied) + )); + + // Apply Landlock for all files. + let status = try_restrict_thread(std::iter::empty()).unwrap(); if !matches!(status, RulesetStatus::FullyEnforced) { panic!("Ruleset should be enforced since we checked if landlock is enabled"); } - // Try to read from the tmp file after landlock. - let result = fs::read_to_string(path); + // Try to read from tmpfile1 after landlock, it should fail. + let result = fs::read_to_string(path1); assert!(matches!( result, Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied) @@ -165,20 +200,55 @@ pub mod landlock { }); assert!(handle.join().is_ok()); + } + + #[test] + fn restricted_thread_cannot_write_file() { + // TODO: This would be nice: . + if !check_is_fully_enabled() { + return + } // Restricted thread cannot write to FS. let handle = thread::spawn(|| { - let text = "foo"; - let tmpfile = tempfile::NamedTempFile::new().unwrap(); - let path = tmpfile.path(); + // Create and write two tmp files. This should succeed before any landlock + // restrictions are applied. + const TEXT: &str = "foo"; + let tmpfile1 = tempfile::NamedTempFile::new().unwrap(); + let path1 = tmpfile1.path(); + let tmpfile2 = tempfile::NamedTempFile::new().unwrap(); + let path2 = tmpfile2.path(); + + fs::write(path1, TEXT).unwrap(); + fs::write(path2, TEXT).unwrap(); + + // Apply Landlock with a write exception for only one of the files. + let status = try_restrict_thread(path_beneath_rules( + &[path1], + AccessFs::from_write(LANDLOCK_ABI), + )) + .unwrap(); + if !matches!(status, RulesetStatus::FullyEnforced) { + panic!("Ruleset should be enforced since we checked if landlock is enabled"); + } + + // Try to write to both files, only tmpfile1 should succeed. + let result = fs::write(path1, TEXT); + assert!(matches!(result, Ok(_))); + let result = fs::write(path2, TEXT); + assert!(matches!( + result, + Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied) + )); - let status = try_restrict_thread().unwrap(); + // Apply Landlock for all files. + let status = try_restrict_thread(std::iter::empty()).unwrap(); if !matches!(status, RulesetStatus::FullyEnforced) { panic!("Ruleset should be enforced since we checked if landlock is enabled"); } - // Try to write to the tmp file after landlock. - let result = fs::write(path, text); + // Try to write to tmpfile1 after landlock, it should fail. + let result = fs::write(path1, TEXT); assert!(matches!( result, Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied) diff --git a/node/core/pvf/execute-worker/src/lib.rs b/node/core/pvf/execute-worker/src/lib.rs index 2d90445d0955..f13a3a69a4cb 100644 --- a/node/core/pvf/execute-worker/src/lib.rs +++ b/node/core/pvf/execute-worker/src/lib.rs @@ -117,10 +117,16 @@ async fn send_response(stream: &mut UnixStream, response: Response) -> io::Resul /// /// # Parameters /// -/// The `socket_path` specifies the path to the socket used to communicate with the host. The -/// `node_version`, if `Some`, is checked against the worker version. A mismatch results in -/// immediate worker termination. `None` is used for tests and in other situations when version -/// check is not necessary. +/// - `socket_path` specifies the path to the socket used to communicate with the host. +/// +/// - `node_version`, if `Some`, is checked against the `worker_version`. A mismatch results in +/// immediate worker termination. `None` is used for tests and in other situations when version +/// check is not necessary. +/// +/// - `worker_version`: see above +/// +/// - `cache_path` contains the expected cache path for artifacts and is used to provide a sandbox +/// exception for landlock. pub fn worker_entrypoint( socket_path: &str, node_version: Option<&str>, @@ -139,11 +145,13 @@ pub fn worker_entrypoint( { #[cfg(target_os = "linux")] let landlock_status = { - use polkadot_node_core_pvf_common::worker::security::landlock::try_restrict_thread; + use polkadot_node_core_pvf_common::worker::security::landlock::{ + path_beneath_rules, try_restrict_thread, Access, AccessFs, LANDLOCK_ABI, + }; - try_restrict_thread(landlock::path_beneath_rules( + try_restrict_thread(path_beneath_rules( &[cache_path], - landlock::AccessFs::from_read(abi), + AccessFs::from_read(LANDLOCK_ABI), )) .map(LandlockStatus::from_ruleset_status) .map_err(|e| e.to_string()) diff --git a/node/core/pvf/prepare-worker/src/lib.rs b/node/core/pvf/prepare-worker/src/lib.rs index 8fbf7d4a16bd..8adddb82c907 100644 --- a/node/core/pvf/prepare-worker/src/lib.rs +++ b/node/core/pvf/prepare-worker/src/lib.rs @@ -93,10 +93,16 @@ async fn send_response(stream: &mut UnixStream, result: PrepareResult) -> io::Re /// /// # Parameters /// -/// The `socket_path` specifies the path to the socket used to communicate with the host. The -/// `node_version`, if `Some`, is checked against the worker version. A mismatch results in -/// immediate worker termination. `None` is used for tests and in other situations when version -/// check is not necessary. +/// - `socket_path` specifies the path to the socket used to communicate with the host. +/// +/// - `node_version`, if `Some`, is checked against the `worker_version`. A mismatch results in +/// immediate worker termination. `None` is used for tests and in other situations when version +/// check is not necessary. +/// +/// - `worker_version`: see above +/// +/// - `cache_path` contains the expected cache path for artifacts and is used to provide a sandbox +/// exception for landlock. /// /// # Flow /// @@ -134,11 +140,13 @@ pub fn worker_entrypoint( { #[cfg(target_os = "linux")] let landlock_status = { - use polkadot_node_core_pvf_common::worker::security::landlock::try_restrict_thread; + use polkadot_node_core_pvf_common::worker::security::landlock::{ + path_beneath_rules, try_restrict_thread, Access, AccessFs, LANDLOCK_ABI, + }; - try_restrict_thread(landlock::path_beneath_rules( + try_restrict_thread(path_beneath_rules( &[cache_path], - landlock::AccessFs::from_write(abi), + AccessFs::from_write(LANDLOCK_ABI), )) .map(LandlockStatus::from_ruleset_status) .map_err(|e| e.to_string()) From f1e4381dc9a23cd3c1521c32bffd040b529559cb Mon Sep 17 00:00:00 2001 From: Marcin S Date: Fri, 4 Aug 2023 16:19:53 +0200 Subject: [PATCH 04/13] Remove unneeded argument in test --- node/core/pvf/tests/it/worker_common.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/node/core/pvf/tests/it/worker_common.rs b/node/core/pvf/tests/it/worker_common.rs index 2ead666bfa14..c5934d8f06c6 100644 --- a/node/core/pvf/tests/it/worker_common.rs +++ b/node/core/pvf/tests/it/worker_common.rs @@ -50,15 +50,13 @@ async fn should_fail_without_cache_path() { #[tokio::test] async fn should_connect() { - let socket_path = tempfile::NamedTempFile::new().unwrap(); - let socket_path = socket_path.path().to_str().unwrap(); let cache_path = tempfile::tempdir().unwrap(); let cache_path = cache_path.path().to_str().unwrap(); let _ = spawn_with_program_path( "integration-test", PUPPET_EXE, - &["prepare-worker", "--socket-path", socket_path, "--cache-path", cache_path], + &["prepare-worker", "--cache-path", cache_path], Duration::from_secs(2), ) .await From 49f49822b877c362950b50a07e380d7398efe705 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Sun, 6 Aug 2023 11:13:21 +0200 Subject: [PATCH 05/13] Fix compile error! --- node/core/pvf/common/src/worker/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/node/core/pvf/common/src/worker/mod.rs b/node/core/pvf/common/src/worker/mod.rs index 5cd829b98e2b..d5f91a16ef44 100644 --- a/node/core/pvf/common/src/worker/mod.rs +++ b/node/core/pvf/common/src/worker/mod.rs @@ -84,6 +84,8 @@ macro_rules! decl_worker_main { let socket_path = socket_path.expect("the --socket-path argument is required"); let cache_path = cache_path.expect("the --cache-path argument is required"); + let cache_path = &std::path::Path::new(cache_path); + $entrypoint(&socket_path, node_version, Some($worker_version), cache_path); } }; From 4fe842a08a641b81c1c52cdbdf8f1ec3139b6d73 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Mon, 7 Aug 2023 14:18:16 +0200 Subject: [PATCH 06/13] Remove some unnecessary unwraps --- node/core/pvf/common/src/worker/security.rs | 170 ++++++++++---------- 1 file changed, 85 insertions(+), 85 deletions(-) diff --git a/node/core/pvf/common/src/worker/security.rs b/node/core/pvf/common/src/worker/security.rs index b68be39d7eb9..bf5aeab33c35 100644 --- a/node/core/pvf/common/src/worker/security.rs +++ b/node/core/pvf/common/src/worker/security.rs @@ -147,57 +147,57 @@ pub mod landlock { } // Restricted thread cannot read from FS. - let handle = thread::spawn(|| { - // Create, write, and read two tmp files. This should succeed before any landlock - // restrictions are applied. - const TEXT: &str = "foo"; - let tmpfile1 = tempfile::NamedTempFile::new().unwrap(); - let path1 = tmpfile1.path(); - let tmpfile2 = tempfile::NamedTempFile::new().unwrap(); - let path2 = tmpfile2.path(); + let handle = + thread::spawn(|| { + // Create, write, and read two tmp files. This should succeed before any landlock + // restrictions are applied. + const TEXT: &str = "foo"; + let tmpfile1 = tempfile::NamedTempFile::new().unwrap(); + let path1 = tmpfile1.path(); + let tmpfile2 = tempfile::NamedTempFile::new().unwrap(); + let path2 = tmpfile2.path(); - fs::write(path1, TEXT).unwrap(); - let s = fs::read_to_string(path1).unwrap(); - assert_eq!(s, TEXT); - fs::write(path2, TEXT).unwrap(); - let s = fs::read_to_string(path2).unwrap(); - assert_eq!(s, TEXT); + fs::write(path1, TEXT).unwrap(); + let s = fs::read_to_string(path1).unwrap(); + assert_eq!(s, TEXT); + fs::write(path2, TEXT).unwrap(); + let s = fs::read_to_string(path2).unwrap(); + assert_eq!(s, TEXT); - // Apply Landlock with a read exception for only one of the files. - let status = try_restrict_thread(path_beneath_rules( - &[path1], - AccessFs::from_read(LANDLOCK_ABI), - )) - .unwrap(); - if !matches!(status, RulesetStatus::FullyEnforced) { - panic!("Ruleset should be enforced since we checked if landlock is enabled"); - } + // Apply Landlock with a read exception for only one of the files. + let status = try_restrict_thread(path_beneath_rules( + &[path1], + AccessFs::from_read(LANDLOCK_ABI), + )); + if !matches!(status, Ok(RulesetStatus::FullyEnforced)) { + panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status); + } - // Try to read from both files, only tmpfile1 should succeed. - let result = fs::read_to_string(path1); - assert!(matches!( - result, - Ok(s) if s == TEXT - )); - let result = fs::read_to_string(path2); - assert!(matches!( - result, - Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied) - )); + // Try to read from both files, only tmpfile1 should succeed. + let result = fs::read_to_string(path1); + assert!(matches!( + result, + Ok(s) if s == TEXT + )); + let result = fs::read_to_string(path2); + assert!(matches!( + result, + Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied) + )); - // Apply Landlock for all files. - let status = try_restrict_thread(std::iter::empty()).unwrap(); - if !matches!(status, RulesetStatus::FullyEnforced) { - panic!("Ruleset should be enforced since we checked if landlock is enabled"); - } + // Apply Landlock for all files. + let status = try_restrict_thread(std::iter::empty()); + if !matches!(status, Ok(RulesetStatus::FullyEnforced)) { + panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status); + } - // Try to read from tmpfile1 after landlock, it should fail. - let result = fs::read_to_string(path1); - assert!(matches!( - result, - Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied) - )); - }); + // Try to read from tmpfile1 after landlock, it should fail. + let result = fs::read_to_string(path1); + assert!(matches!( + result, + Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied) + )); + }); assert!(handle.join().is_ok()); } @@ -210,50 +210,50 @@ pub mod landlock { } // Restricted thread cannot write to FS. - let handle = thread::spawn(|| { - // Create and write two tmp files. This should succeed before any landlock - // restrictions are applied. - const TEXT: &str = "foo"; - let tmpfile1 = tempfile::NamedTempFile::new().unwrap(); - let path1 = tmpfile1.path(); - let tmpfile2 = tempfile::NamedTempFile::new().unwrap(); - let path2 = tmpfile2.path(); + let handle = + thread::spawn(|| { + // Create and write two tmp files. This should succeed before any landlock + // restrictions are applied. + const TEXT: &str = "foo"; + let tmpfile1 = tempfile::NamedTempFile::new().unwrap(); + let path1 = tmpfile1.path(); + let tmpfile2 = tempfile::NamedTempFile::new().unwrap(); + let path2 = tmpfile2.path(); - fs::write(path1, TEXT).unwrap(); - fs::write(path2, TEXT).unwrap(); + fs::write(path1, TEXT).unwrap(); + fs::write(path2, TEXT).unwrap(); - // Apply Landlock with a write exception for only one of the files. - let status = try_restrict_thread(path_beneath_rules( - &[path1], - AccessFs::from_write(LANDLOCK_ABI), - )) - .unwrap(); - if !matches!(status, RulesetStatus::FullyEnforced) { - panic!("Ruleset should be enforced since we checked if landlock is enabled"); - } + // Apply Landlock with a write exception for only one of the files. + let status = try_restrict_thread(path_beneath_rules( + &[path1], + AccessFs::from_write(LANDLOCK_ABI), + )); + if !matches!(status, Ok(RulesetStatus::FullyEnforced)) { + panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status); + } - // Try to write to both files, only tmpfile1 should succeed. - let result = fs::write(path1, TEXT); - assert!(matches!(result, Ok(_))); - let result = fs::write(path2, TEXT); - assert!(matches!( - result, - Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied) - )); + // Try to write to both files, only tmpfile1 should succeed. + let result = fs::write(path1, TEXT); + assert!(matches!(result, Ok(_))); + let result = fs::write(path2, TEXT); + assert!(matches!( + result, + Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied) + )); - // Apply Landlock for all files. - let status = try_restrict_thread(std::iter::empty()).unwrap(); - if !matches!(status, RulesetStatus::FullyEnforced) { - panic!("Ruleset should be enforced since we checked if landlock is enabled"); - } + // Apply Landlock for all files. + let status = try_restrict_thread(std::iter::empty()); + if !matches!(status, Ok(RulesetStatus::FullyEnforced)) { + panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status); + } - // Try to write to tmpfile1 after landlock, it should fail. - let result = fs::write(path1, TEXT); - assert!(matches!( - result, - Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied) - )); - }); + // Try to write to tmpfile1 after landlock, it should fail. + let result = fs::write(path1, TEXT); + assert!(matches!( + result, + Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied) + )); + }); assert!(handle.join().is_ok()); } From 6d9c276b557f4fb959c0f49c0a73fd7b10f01308 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Tue, 8 Aug 2023 11:50:03 +0200 Subject: [PATCH 07/13] Add note about `extra_args` --- node/core/pvf/src/worker_intf.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/node/core/pvf/src/worker_intf.rs b/node/core/pvf/src/worker_intf.rs index b1a448d17149..565f5cdfa42f 100644 --- a/node/core/pvf/src/worker_intf.rs +++ b/node/core/pvf/src/worker_intf.rs @@ -39,6 +39,18 @@ use tokio::{ pub const JOB_TIMEOUT_WALL_CLOCK_FACTOR: u32 = 4; /// This is publicly exposed only for integration tests. +/// +/// # Parameters +/// +/// - `debug_id`: An identifier for the process (e.g. "execute" or "prepare"). +/// +/// - `program_path`: The path to the program. +/// +/// - `extra_args`: Optional extra CLI arguments to the program. NOTE: Should only contain data +/// required before the handshake, like node/worker versions for the version check. Other data +/// should go through the handshake. +/// +/// - `spawn_timeout`: The amount of time to wait for the child process to spawn. #[doc(hidden)] pub async fn spawn_with_program_path( debug_id: &'static str, From 69d15b454da1935ba637d11e6a017c5867df0fc0 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Sun, 20 Aug 2023 13:23:46 +0200 Subject: [PATCH 08/13] Landlock: disallow listing directories Allow an exception for reading from the artifact cache, but disallow listing the directory contents. Since we prepend artifact names with a random hash, this means attackers can't discover artifacts apart from the current job. --- node/core/pvf/common/src/worker/security.rs | 66 ++++++++++++++++++--- node/core/pvf/execute-worker/src/lib.rs | 9 ++- node/core/pvf/prepare-worker/src/lib.rs | 8 ++- 3 files changed, 70 insertions(+), 13 deletions(-) diff --git a/node/core/pvf/common/src/worker/security.rs b/node/core/pvf/common/src/worker/security.rs index bf5aeab33c35..8fe4ff093bff 100644 --- a/node/core/pvf/common/src/worker/security.rs +++ b/node/core/pvf/common/src/worker/security.rs @@ -92,10 +92,10 @@ pub mod landlock { /// Returns to what degree landlock is enabled with the given ABI on the current Linux /// environment. pub fn get_status() -> Result> { - match std::thread::spawn(|| try_restrict_thread(std::iter::empty())).join() { + match std::thread::spawn(|| try_restrict(std::iter::empty())).join() { Ok(Ok(status)) => Ok(status), Ok(Err(ruleset_err)) => Err(ruleset_err.into()), - Err(_err) => Err("a panic occurred in try_restrict_thread".into()), + Err(_err) => Err("a panic occurred in try_restrict".into()), } } @@ -113,7 +113,8 @@ pub mod landlock { status_is_fully_enabled(&get_status()) } - /// Tries to restrict the current thread with the following landlock access controls: + /// Tries to restrict the current thread (should only be called in a process' main thread) with + /// the following landlock access controls: /// /// 1. all global filesystem access restricted, with optional exceptions /// 2. ... more sandbox types (e.g. networking) may be supported in the future. @@ -123,7 +124,7 @@ pub mod landlock { /// # Returns /// /// The status of the restriction (whether it was fully, partially, or not-at-all enforced). - pub fn try_restrict_thread( + pub fn try_restrict( fs_exceptions: impl Iterator, RulesetError>>, ) -> Result { let status = Ruleset::new() @@ -165,7 +166,7 @@ pub mod landlock { assert_eq!(s, TEXT); // Apply Landlock with a read exception for only one of the files. - let status = try_restrict_thread(path_beneath_rules( + let status = try_restrict(path_beneath_rules( &[path1], AccessFs::from_read(LANDLOCK_ABI), )); @@ -186,7 +187,7 @@ pub mod landlock { )); // Apply Landlock for all files. - let status = try_restrict_thread(std::iter::empty()); + let status = try_restrict(std::iter::empty()); if !matches!(status, Ok(RulesetStatus::FullyEnforced)) { panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status); } @@ -224,7 +225,7 @@ pub mod landlock { fs::write(path2, TEXT).unwrap(); // Apply Landlock with a write exception for only one of the files. - let status = try_restrict_thread(path_beneath_rules( + let status = try_restrict(path_beneath_rules( &[path1], AccessFs::from_write(LANDLOCK_ABI), )); @@ -242,7 +243,7 @@ pub mod landlock { )); // Apply Landlock for all files. - let status = try_restrict_thread(std::iter::empty()); + let status = try_restrict(std::iter::empty()); if !matches!(status, Ok(RulesetStatus::FullyEnforced)) { panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status); } @@ -257,5 +258,54 @@ pub mod landlock { assert!(handle.join().is_ok()); } + + #[test] + fn restricted_thread_can_read_files_but_not_list_dir() { + // TODO: This would be nice: . + if !check_is_fully_enabled() { + return + } + + // Restricted thread can read files but not list directory contents. + let handle = + thread::spawn(|| { + // Create, write to and read a tmp file. This should succeed before any landlock + // restrictions are applied. + const TEXT: &str = "foo"; + let tmpfile = tempfile::NamedTempFile::new().unwrap(); + let filepath = tmpfile.path(); + let dirpath = filepath.parent().unwrap(); + + fs::write(filepath, TEXT).unwrap(); + let s = fs::read_to_string(filepath).unwrap(); + assert_eq!(s, TEXT); + + // Apply Landlock with a general read exception for the directory, *without* the + // `ReadDir` exception. + let status = try_restrict(path_beneath_rules( + &[dirpath], + AccessFs::from_read(LANDLOCK_ABI) ^ AccessFs::ReadDir, + )); + if !matches!(status, Ok(RulesetStatus::FullyEnforced)) { + panic!("Ruleset should be enforced since we checked if landlock is enabled: {:?}", status); + } + + // Try to read file, should still be able to. + let result = fs::read_to_string(filepath); + assert!(matches!( + result, + Ok(s) if s == TEXT + )); + + // Try to list dir contents, should fail. + let result = fs::read_dir(dirpath); + assert!(matches!( + result, + Err(err) if matches!(err.kind(), ErrorKind::PermissionDenied) + )); + }); + + assert!(handle.join().is_ok()); + } } } diff --git a/node/core/pvf/execute-worker/src/lib.rs b/node/core/pvf/execute-worker/src/lib.rs index f13a3a69a4cb..ba4901d0a4b0 100644 --- a/node/core/pvf/execute-worker/src/lib.rs +++ b/node/core/pvf/execute-worker/src/lib.rs @@ -146,12 +146,15 @@ pub fn worker_entrypoint( #[cfg(target_os = "linux")] let landlock_status = { use polkadot_node_core_pvf_common::worker::security::landlock::{ - path_beneath_rules, try_restrict_thread, Access, AccessFs, LANDLOCK_ABI, + path_beneath_rules, try_restrict, Access, AccessFs, LANDLOCK_ABI, }; - try_restrict_thread(path_beneath_rules( + // Allow an exception for reading from the artifact cache, but disallow listing + // the directory contents. Since we prepend artifact names with a random hash, + // this means attackers can't discover artifacts apart from the current job. + try_restrict(path_beneath_rules( &[cache_path], - AccessFs::from_read(LANDLOCK_ABI), + AccessFs::from_read(LANDLOCK_ABI) ^ AccessFs::ReadDir, )) .map(LandlockStatus::from_ruleset_status) .map_err(|e| e.to_string()) diff --git a/node/core/pvf/prepare-worker/src/lib.rs b/node/core/pvf/prepare-worker/src/lib.rs index 8adddb82c907..2080430296ce 100644 --- a/node/core/pvf/prepare-worker/src/lib.rs +++ b/node/core/pvf/prepare-worker/src/lib.rs @@ -141,10 +141,14 @@ pub fn worker_entrypoint( #[cfg(target_os = "linux")] let landlock_status = { use polkadot_node_core_pvf_common::worker::security::landlock::{ - path_beneath_rules, try_restrict_thread, Access, AccessFs, LANDLOCK_ABI, + path_beneath_rules, try_restrict, Access, AccessFs, LANDLOCK_ABI, }; - try_restrict_thread(path_beneath_rules( + // Allow an exception for writing to the artifact cache, with no allowance for + // listing the directory contents. Since we prepend artifact names with a random + // hash, this means attackers can't discover artifacts apart from the current + // job. + try_restrict(path_beneath_rules( &[cache_path], AccessFs::from_write(LANDLOCK_ABI), )) From c5879b254ac93c4b199f99a06e650e91822b36bf Mon Sep 17 00:00:00 2001 From: Marcin S Date: Sun, 20 Aug 2023 17:01:15 +0200 Subject: [PATCH 09/13] Only throw an error if landlock is enabled and expected to work We already checked whether landlock is enabled in the host. We can therefore only throw an error here if landlock is enabled and expected to work. Otherwise we shouldn't even log here, as errors are already logged in the host, and is just noise here. --- node/core/pvf/common/src/execute.rs | 2 ++ node/core/pvf/common/src/prepare.rs | 8 ++++++ node/core/pvf/common/src/worker/security.rs | 5 ++-- node/core/pvf/execute-worker/src/lib.rs | 26 +++++++++++------- node/core/pvf/prepare-worker/src/lib.rs | 30 ++++++++++++++++----- node/core/pvf/src/execute/queue.rs | 8 ++++++ node/core/pvf/src/execute/worker_intf.rs | 3 ++- node/core/pvf/src/host.rs | 23 +++++++++++----- node/core/pvf/src/prepare/pool.rs | 19 +++++++++++-- node/core/pvf/src/prepare/worker_intf.rs | 22 +++++++++++++-- 10 files changed, 116 insertions(+), 30 deletions(-) diff --git a/node/core/pvf/common/src/execute.rs b/node/core/pvf/common/src/execute.rs index de5ce39f7838..c988b2123a55 100644 --- a/node/core/pvf/common/src/execute.rs +++ b/node/core/pvf/common/src/execute.rs @@ -26,6 +26,8 @@ use std::time::Duration; pub struct Handshake { /// The executor parameters. pub executor_params: ExecutorParams, + /// Whether the host has determined that landlock is enabled. + pub landlock_enabled: bool, } /// The response from an execution job on the worker. diff --git a/node/core/pvf/common/src/prepare.rs b/node/core/pvf/common/src/prepare.rs index c205eddfb8b1..e3ae1d7f9a46 100644 --- a/node/core/pvf/common/src/prepare.rs +++ b/node/core/pvf/common/src/prepare.rs @@ -55,3 +55,11 @@ pub enum PrepareJobKind { /// A prechecking job. Prechecking, } + +/// The payload of the one-time handshake that is done when a worker process is created. Carries +/// data from the host to the worker. +#[derive(Encode, Decode)] +pub struct Handshake { + /// Whether the host has determined that landlock is enabled. + pub landlock_enabled: bool, +} diff --git a/node/core/pvf/common/src/worker/security.rs b/node/core/pvf/common/src/worker/security.rs index 8fe4ff093bff..36fe07d03aaf 100644 --- a/node/core/pvf/common/src/worker/security.rs +++ b/node/core/pvf/common/src/worker/security.rs @@ -20,6 +20,7 @@ /// To what degree landlock is enabled. It's a separate struct from `RulesetStatus` because that is /// only available on Linux, plus this has a nicer name. +#[derive(Debug)] pub enum LandlockStatus { FullyEnforced, PartiallyEnforced, @@ -150,8 +151,8 @@ pub mod landlock { // Restricted thread cannot read from FS. let handle = thread::spawn(|| { - // Create, write, and read two tmp files. This should succeed before any landlock - // restrictions are applied. + // Create, write, and read two tmp files. This should succeed before any + // landlock restrictions are applied. const TEXT: &str = "foo"; let tmpfile1 = tempfile::NamedTempFile::new().unwrap(); let path1 = tmpfile1.path(); diff --git a/node/core/pvf/execute-worker/src/lib.rs b/node/core/pvf/execute-worker/src/lib.rs index 639911d6b013..75a4f537a75c 100644 --- a/node/core/pvf/execute-worker/src/lib.rs +++ b/node/core/pvf/execute-worker/src/lib.rs @@ -143,6 +143,12 @@ pub fn worker_entrypoint( |mut stream| async move { let worker_pid = std::process::id(); + let Handshake { executor_params, landlock_enabled } = + recv_handshake(&mut stream).await?; + let executor = Executor::new(executor_params).map_err(|e| { + io::Error::new(io::ErrorKind::Other, format!("cannot create executor: {}", e)) + })?; + // Try to enable landlock. { #[cfg(target_os = "linux")] @@ -164,23 +170,23 @@ pub fn worker_entrypoint( #[cfg(not(target_os = "linux"))] let landlock_status: Result = Ok(LandlockStatus::NotEnforced); - // TODO: return an error? - // Log if landlock threw an error. - if let Err(err) = landlock_status { + // Error if the host determined that landlock is fully enabled and we couldn't fully + // enforce it here. + if landlock_enabled && !matches!(landlock_status, Ok(LandlockStatus::FullyEnforced)) + { gum::warn!( target: LOG_TARGET, %worker_pid, - "error enabling landlock: {}", - err + "could not fully enable landlock: {:?}", + landlock_status ); + return Err(io::Error::new( + io::ErrorKind::Other, + format!("could not fully enable landlock: {:?}", landlock_status), + )) } } - let handshake = recv_handshake(&mut stream).await?; - let executor = Executor::new(handshake.executor_params).map_err(|e| { - io::Error::new(io::ErrorKind::Other, format!("cannot create executor: {}", e)) - })?; - loop { let (artifact_path, params, execution_timeout) = recv_request(&mut stream).await?; gum::debug!( diff --git a/node/core/pvf/prepare-worker/src/lib.rs b/node/core/pvf/prepare-worker/src/lib.rs index bfaae87406c9..a7a707deaf31 100644 --- a/node/core/pvf/prepare-worker/src/lib.rs +++ b/node/core/pvf/prepare-worker/src/lib.rs @@ -34,7 +34,7 @@ use polkadot_node_core_pvf_common::{ error::{PrepareError, PrepareResult}, executor_intf::Executor, framed_recv, framed_send, - prepare::{MemoryStats, PrepareJobKind, PrepareStats}, + prepare::{Handshake, MemoryStats, PrepareJobKind, PrepareStats}, pvf::PvfPrepData, worker::{ bytes_to_path, cpu_time_monitor_loop, @@ -69,6 +69,17 @@ impl AsRef<[u8]> for CompiledArtifact { } } +async fn recv_handshake(stream: &mut UnixStream) -> io::Result { + let handshake_enc = framed_recv(stream).await?; + let handshake = Handshake::decode(&mut &handshake_enc[..]).map_err(|_| { + io::Error::new( + io::ErrorKind::Other, + "prepare pvf recv_handshake: failed to decode Handshake".to_owned(), + ) + })?; + Ok(handshake) +} + async fn recv_request(stream: &mut UnixStream) -> io::Result<(PvfPrepData, PathBuf)> { let pvf = framed_recv(stream).await?; let pvf = PvfPrepData::decode(&mut &pvf[..]).map_err(|e| { @@ -138,6 +149,8 @@ pub fn worker_entrypoint( |mut stream| async move { let worker_pid = std::process::id(); + let Handshake { landlock_enabled } = recv_handshake(&mut stream).await?; + // Try to enable landlock. { #[cfg(target_os = "linux")] @@ -160,15 +173,20 @@ pub fn worker_entrypoint( #[cfg(not(target_os = "linux"))] let landlock_status: Result = Ok(LandlockStatus::NotEnforced); - // TODO: return an error? - // Log if landlock threw an error. - if let Err(err) = landlock_status { + // Error if the host determined that landlock is fully enabled and we couldn't fully + // enforce it here. + if landlock_enabled && !matches!(landlock_status, Ok(LandlockStatus::FullyEnforced)) + { gum::warn!( target: LOG_TARGET, %worker_pid, - "error enabling landlock: {}", - err + "could not fully enable landlock: {:?}", + landlock_status ); + return Err(io::Error::new( + io::ErrorKind::Other, + format!("could not fully enable landlock: {:?}", landlock_status), + )) } } diff --git a/node/core/pvf/src/execute/queue.rs b/node/core/pvf/src/execute/queue.rs index 8231dd03e8ed..b6dcac9abf91 100644 --- a/node/core/pvf/src/execute/queue.rs +++ b/node/core/pvf/src/execute/queue.rs @@ -142,6 +142,7 @@ struct Queue { spawn_timeout: Duration, node_version: Option, cache_path: PathBuf, + landlock_enabled: bool, /// The queue of jobs that are waiting for a worker to pick up. queue: VecDeque, @@ -157,6 +158,7 @@ impl Queue { spawn_timeout: Duration, node_version: Option, cache_path: PathBuf, + landlock_enabled: bool, to_queue_rx: mpsc::Receiver, ) -> Self { Self { @@ -165,6 +167,7 @@ impl Queue { spawn_timeout, node_version, cache_path, + landlock_enabled, to_queue_rx, queue: VecDeque::new(), mux: Mux::new(), @@ -412,6 +415,7 @@ fn spawn_extra_worker(queue: &mut Queue, job: ExecuteJob) { queue.spawn_timeout, queue.node_version.clone(), queue.cache_path.clone(), + queue.landlock_enabled, ) .boxed(), ); @@ -431,6 +435,7 @@ async fn spawn_worker_task( spawn_timeout: Duration, node_version: Option, cache_path: PathBuf, + landlock_enabled: bool, ) -> QueueEvent { use futures_timer::Delay; @@ -441,6 +446,7 @@ async fn spawn_worker_task( spawn_timeout, node_version.as_deref(), &cache_path, + landlock_enabled, ) .await { @@ -506,6 +512,7 @@ pub fn start( spawn_timeout: Duration, node_version: Option, cache_path: PathBuf, + landlock_enabled: bool, ) -> (mpsc::Sender, impl Future) { let (to_queue_tx, to_queue_rx) = mpsc::channel(20); let run = Queue::new( @@ -515,6 +522,7 @@ pub fn start( spawn_timeout, node_version, cache_path, + landlock_enabled, to_queue_rx, ) .run(); diff --git a/node/core/pvf/src/execute/worker_intf.rs b/node/core/pvf/src/execute/worker_intf.rs index f18e57033f22..77b7393fa71d 100644 --- a/node/core/pvf/src/execute/worker_intf.rs +++ b/node/core/pvf/src/execute/worker_intf.rs @@ -47,6 +47,7 @@ pub async fn spawn( spawn_timeout: Duration, node_version: Option<&str>, cache_path: &Path, + landlock_enabled: bool, ) -> Result<(IdleWorker, WorkerHandle), SpawnErr> { let cache_path = match cache_path.to_str() { Some(a) => a, @@ -59,7 +60,7 @@ pub async fn spawn( let (mut idle_worker, worker_handle) = spawn_with_program_path("execute", program_path, &extra_args, spawn_timeout).await?; - send_handshake(&mut idle_worker.stream, Handshake { executor_params }) + send_handshake(&mut idle_worker.stream, Handshake { executor_params, landlock_enabled }) .await .map_err(|error| { gum::warn!( diff --git a/node/core/pvf/src/host.rs b/node/core/pvf/src/host.rs index 73d104ca0caa..0c15ecf14943 100644 --- a/node/core/pvf/src/host.rs +++ b/node/core/pvf/src/host.rs @@ -202,8 +202,8 @@ impl Config { pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future) { gum::debug!(target: LOG_TARGET, ?config, "starting PVF validation host"); - // Run checks for supported security features once per host startup. - warn_if_no_landlock(); + // Run checks for supported security features once per host startup. Warn here if not enabled. + let landlock_enabled = check_landlock(); let (to_host_tx, to_host_rx) = mpsc::channel(10); @@ -215,6 +215,7 @@ pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future (ValidationHost, impl Future impl futures::Stream } /// Check if landlock is supported and emit a warning if not. -fn warn_if_no_landlock() { +fn check_landlock() -> bool { #[cfg(target_os = "linux")] { use polkadot_node_core_pvf_common::worker::security::landlock; + let status = landlock::get_status(); if !landlock::status_is_fully_enabled(&status) { let abi = landlock::LANDLOCK_ABI as u8; @@ -888,14 +891,20 @@ fn warn_if_no_landlock() { %abi, "Cannot fully enable landlock, a Linux kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider upgrading the kernel version for maximum security." ); + false + } else { + true } } #[cfg(not(target_os = "linux"))] - gum::warn!( - target: LOG_TARGET, - "Cannot enable landlock, a Linux kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running on Linux with landlock support for maximum security." - ); + { + gum::warn!( + target: LOG_TARGET, + "Cannot enable landlock, a Linux kernel security feature. Running validation of malicious PVF code has a higher risk of compromising this machine. Consider running on Linux with landlock support for maximum security." + ); + false + } } #[cfg(test)] diff --git a/node/core/pvf/src/prepare/pool.rs b/node/core/pvf/src/prepare/pool.rs index 81a33de4e13c..a3b44642b0a0 100644 --- a/node/core/pvf/src/prepare/pool.rs +++ b/node/core/pvf/src/prepare/pool.rs @@ -110,10 +110,12 @@ enum PoolEvent { type Mux = FuturesUnordered>; struct Pool { + // Some variables related to the current session. program_path: PathBuf, cache_path: PathBuf, spawn_timeout: Duration, node_version: Option, + landlock_enabled: bool, to_pool: mpsc::Receiver, from_pool: mpsc::UnboundedSender, @@ -132,6 +134,7 @@ async fn run( cache_path, spawn_timeout, node_version, + landlock_enabled, to_pool, mut from_pool, mut spawned, @@ -160,6 +163,7 @@ async fn run( &cache_path, spawn_timeout, node_version.clone(), + landlock_enabled, &mut spawned, &mut mux, to_pool, @@ -207,6 +211,7 @@ fn handle_to_pool( cache_path: &Path, spawn_timeout: Duration, node_version: Option, + landlock_enabled: bool, spawned: &mut HopSlotMap, mux: &mut Mux, to_pool: ToPool, @@ -221,6 +226,7 @@ fn handle_to_pool( spawn_timeout, node_version, cache_path.to_owned(), + landlock_enabled, ) .boxed(), ); @@ -267,12 +273,19 @@ async fn spawn_worker_task( spawn_timeout: Duration, node_version: Option, cache_path: PathBuf, + landlock_enabled: bool, ) -> PoolEvent { use futures_timer::Delay; loop { - match worker_intf::spawn(&program_path, spawn_timeout, node_version.as_deref(), &cache_path) - .await + match worker_intf::spawn( + &program_path, + spawn_timeout, + node_version.as_deref(), + &cache_path, + landlock_enabled, + ) + .await { Ok((idle, handle)) => break PoolEvent::Spawn(idle, handle), Err(err) => { @@ -443,6 +456,7 @@ pub fn start( cache_path: PathBuf, spawn_timeout: Duration, node_version: Option, + landlock_enabled: bool, ) -> (mpsc::Sender, mpsc::UnboundedReceiver, impl Future) { let (to_pool_tx, to_pool_rx) = mpsc::channel(10); let (from_pool_tx, from_pool_rx) = mpsc::unbounded(); @@ -453,6 +467,7 @@ pub fn start( cache_path, spawn_timeout, node_version, + landlock_enabled, to_pool: to_pool_rx, from_pool: from_pool_tx, spawned: HopSlotMap::with_capacity_and_key(20), diff --git a/node/core/pvf/src/prepare/worker_intf.rs b/node/core/pvf/src/prepare/worker_intf.rs index f92934ef78e8..49ee145b6cbc 100644 --- a/node/core/pvf/src/prepare/worker_intf.rs +++ b/node/core/pvf/src/prepare/worker_intf.rs @@ -28,7 +28,7 @@ use parity_scale_codec::{Decode, Encode}; use polkadot_node_core_pvf_common::{ error::{PrepareError, PrepareResult}, framed_recv, framed_send, - prepare::PrepareStats, + prepare::{Handshake, PrepareStats}, pvf::PvfPrepData, }; @@ -47,6 +47,7 @@ pub async fn spawn( spawn_timeout: Duration, node_version: Option<&str>, cache_path: &Path, + landlock_enabled: bool, ) -> Result<(IdleWorker, WorkerHandle), SpawnErr> { let cache_path = match cache_path.to_str() { Some(a) => a, @@ -57,7 +58,20 @@ pub async fn spawn( extra_args.extend_from_slice(&["--node-impl-version", node_version]); } - spawn_with_program_path("prepare", program_path, &extra_args, spawn_timeout).await + let (mut idle_worker, worker_handle) = + spawn_with_program_path("prepare", program_path, &extra_args, spawn_timeout).await?; + send_handshake(&mut idle_worker.stream, Handshake { landlock_enabled }) + .await + .map_err(|error| { + gum::warn!( + target: LOG_TARGET, + worker_pid = %idle_worker.pid, + ?error, + "failed to send a handshake to the spawned worker", + ); + SpawnErr::Handshake + })?; + Ok((idle_worker, worker_handle)) } pub enum Outcome { @@ -284,6 +298,10 @@ async fn send_request( Ok(()) } +async fn send_handshake(stream: &mut UnixStream, handshake: Handshake) -> io::Result<()> { + framed_send(stream, &handshake.encode()).await +} + async fn recv_response(stream: &mut UnixStream, pid: u32) -> io::Result { let result = framed_recv(stream).await?; let result = PrepareResult::decode(&mut &result[..]).map_err(|e| { From 99761ebb7476d4151ef49211fc47f3bfe6419a78 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Mon, 21 Aug 2023 10:52:41 +0200 Subject: [PATCH 10/13] Update Cargo.lock --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index be03ec3daa41..ffe11a6db38c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6593,7 +6593,7 @@ dependencies = [ "frame-support", "frame-system", "log", - "parity-scale-codec", + "parity-scale-codec 3.6.4", "scale-info", "sp-arithmetic", "sp-core", From 75ff59483f322d5fffad57a0dd5e1c4651d2af2c Mon Sep 17 00:00:00 2001 From: Marcin S Date: Mon, 21 Aug 2023 15:11:35 +0200 Subject: [PATCH 11/13] Update a docstring --- node/core/pvf/common/src/worker/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/core/pvf/common/src/worker/mod.rs b/node/core/pvf/common/src/worker/mod.rs index dd08bf4f9305..22353c5a8b1a 100644 --- a/node/core/pvf/common/src/worker/mod.rs +++ b/node/core/pvf/common/src/worker/mod.rs @@ -257,7 +257,7 @@ pub mod thread { Arc::new((Mutex::new(WaitOutcome::Pending), Condvar::new())) } - /// Runs a worker thread. Will first enable security features, and afterwards notify the threads + /// Runs a worker thread. Will run the requested function, and afterwards notify the threads /// waiting on the condvar. Catches panics during execution and resumes the panics after /// triggering the condvar, so that the waiting thread is notified on panics. /// From e00d0b40dbc0879e9c644a93e47c8903b2b8ab5d Mon Sep 17 00:00:00 2001 From: Marcin S Date: Tue, 22 Aug 2023 16:34:34 +0200 Subject: [PATCH 12/13] Change root to be the artifact directory. --- node/core/pvf/common/src/worker/mod.rs | 44 ++++++++++++++++++++++++- node/core/pvf/execute-worker/src/lib.rs | 1 + node/core/pvf/prepare-worker/src/lib.rs | 1 + 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/node/core/pvf/common/src/worker/mod.rs b/node/core/pvf/common/src/worker/mod.rs index 22353c5a8b1a..a3f37387dfd1 100644 --- a/node/core/pvf/common/src/worker/mod.rs +++ b/node/core/pvf/common/src/worker/mod.rs @@ -23,7 +23,7 @@ use cpu_time::ProcessTime; use futures::never::Never; use std::{ any::Any, - path::PathBuf, + path::{Path, PathBuf}, sync::mpsc::{Receiver, RecvTimeoutError}, time::Duration, }; @@ -108,6 +108,7 @@ pub fn worker_event_loop( socket_path: &str, node_version: Option<&str>, worker_version: Option<&str>, + cache_path: &Path, mut event_loop: F, ) where F: FnMut(UnixStream) -> Fut, @@ -116,6 +117,47 @@ pub fn worker_event_loop( let worker_pid = std::process::id(); gum::debug!(target: LOG_TARGET, %worker_pid, "starting pvf worker ({})", debug_id); + // Change root to be the artifact directory. + // + // SAFETY: TODO + #[cfg(target_os = "linux")] + { + use std::ffi::CString; + + let cache_path_c = CString::new(cache_path).unwrap(); + let root_relative_c = CString::new(".").unwrap(); + let oldroot_relative_c = CString::new("./oldroot").unwrap(); + let root_absolute_c = CString::new("/").unwrap(); + let oldroot_absolute_c = CString::new("/oldroot").unwrap(); + + unsafe { + // 1. `unshare` the user and the mount namespaces. + libc::unshare(libc::CLONE_NEWUSER); + libc::unshare(libc::CLONE_NEWNS); + + // 2. `pivot_root` to the artifact directory. + libc::chdir(cache_path_c.as_ptr()); + libc::mount( + root_relative_c.as_ptr(), + root_relative_c.as_ptr(), + std::ptr::null(), // ignored when MS_BIND is used + libc::MS_BIND | libc::MS_REC | libc::MS_NOEXEC, + std::ptr::null(), // ignored when MS_BIND is used + ); + libc::mkdir(oldroot_relative_c.as_ptr(), 0755); + libc::syscall( + libc::SYS_pivot_root, + root_relative_c.as_ptr(), + oldroot_relative_c.as_ptr(), + ); + + // 3. Change to the new root, `unmount2` and remove the old root. + libc::chdir(root_absolute_c.as_ptr()); + libc::umount2(oldroot_absolute_c.as_ptr(), libc::MNT_DETACH); + libc::rmdir(oldroot_absolute_c.as_ptr()); + } + } + // Check for a mismatch between the node and worker versions. if let (Some(node_version), Some(worker_version)) = (node_version, worker_version) { if node_version != worker_version { diff --git a/node/core/pvf/execute-worker/src/lib.rs b/node/core/pvf/execute-worker/src/lib.rs index 75a4f537a75c..7c6e3bf5db70 100644 --- a/node/core/pvf/execute-worker/src/lib.rs +++ b/node/core/pvf/execute-worker/src/lib.rs @@ -140,6 +140,7 @@ pub fn worker_entrypoint( socket_path, node_version, worker_version, + cache_path, |mut stream| async move { let worker_pid = std::process::id(); diff --git a/node/core/pvf/prepare-worker/src/lib.rs b/node/core/pvf/prepare-worker/src/lib.rs index a7a707deaf31..b0780c5fe4bd 100644 --- a/node/core/pvf/prepare-worker/src/lib.rs +++ b/node/core/pvf/prepare-worker/src/lib.rs @@ -146,6 +146,7 @@ pub fn worker_entrypoint( socket_path, node_version, worker_version, + cache_path, |mut stream| async move { let worker_pid = std::process::id(); From cd416037f7e4ecdceb7d1f87919419854a975b95 Mon Sep 17 00:00:00 2001 From: Marcin S Date: Wed, 23 Aug 2023 16:47:25 +0200 Subject: [PATCH 13/13] Experiment with pivot_root in child workers This is an attempt at an improved chroot jail that doesn't require root, but still allows us to use sockets and artifacts from the host. --- Cargo.lock | 3 +- node/core/pvf/common/Cargo.toml | 1 + node/core/pvf/common/src/worker/mod.rs | 143 ++++++++++++++++------- node/core/pvf/prepare-worker/src/lib.rs | 84 ++++++------- node/core/pvf/src/execute/worker_intf.rs | 6 +- node/core/pvf/src/prepare/worker_intf.rs | 20 +++- node/core/pvf/src/worker_intf.rs | 37 +++++- node/core/pvf/tests/it/main.rs | 2 +- node/core/pvf/tests/it/worker_common.rs | 10 +- 9 files changed, 206 insertions(+), 100 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 033ed8d58e9f..27c1abaf8935 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6601,7 +6601,7 @@ dependencies = [ "frame-support", "frame-system", "log", - "parity-scale-codec 3.6.4", + "parity-scale-codec", "scale-info", "sp-arithmetic", "sp-core", @@ -7934,6 +7934,7 @@ dependencies = [ "parity-scale-codec", "polkadot-parachain", "polkadot-primitives", + "rand 0.8.5", "sc-executor", "sc-executor-common", "sc-executor-wasmtime", diff --git a/node/core/pvf/common/Cargo.toml b/node/core/pvf/common/Cargo.toml index dfb490455b3d..8de983180851 100644 --- a/node/core/pvf/common/Cargo.toml +++ b/node/core/pvf/common/Cargo.toml @@ -29,6 +29,7 @@ sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master [target.'cfg(target_os = "linux")'.dependencies] landlock = "0.2.0" +rand = "0.8.5" [dev-dependencies] assert_matches = "1.4.0" diff --git a/node/core/pvf/common/src/worker/mod.rs b/node/core/pvf/common/src/worker/mod.rs index 08d5fcca4c1c..e40c03dcf7ae 100644 --- a/node/core/pvf/common/src/worker/mod.rs +++ b/node/core/pvf/common/src/worker/mod.rs @@ -117,52 +117,12 @@ pub fn worker_event_loop( let worker_pid = std::process::id(); gum::debug!(target: LOG_TARGET, %worker_pid, "starting pvf worker ({})", debug_id); - // Change root to be the artifact directory. - // - // SAFETY: TODO - #[cfg(target_os = "linux")] - { - use std::ffi::CString; - - let cache_path_c = CString::new(cache_path).unwrap(); - let root_relative_c = CString::new(".").unwrap(); - let oldroot_relative_c = CString::new("./oldroot").unwrap(); - let root_absolute_c = CString::new("/").unwrap(); - let oldroot_absolute_c = CString::new("/oldroot").unwrap(); - - unsafe { - // 1. `unshare` the user and the mount namespaces. - libc::unshare(libc::CLONE_NEWUSER); - libc::unshare(libc::CLONE_NEWNS); - - // 2. `pivot_root` to the artifact directory. - libc::chdir(cache_path_c.as_ptr()); - libc::mount( - root_relative_c.as_ptr(), - root_relative_c.as_ptr(), - std::ptr::null(), // ignored when MS_BIND is used - libc::MS_BIND | libc::MS_REC | libc::MS_NOEXEC, - std::ptr::null(), // ignored when MS_BIND is used - ); - libc::mkdir(oldroot_relative_c.as_ptr(), 0755); - libc::syscall( - libc::SYS_pivot_root, - root_relative_c.as_ptr(), - oldroot_relative_c.as_ptr(), - ); - - // 3. Change to the new root, `unmount2` and remove the old root. - libc::chdir(root_absolute_c.as_ptr()); - libc::umount2(oldroot_absolute_c.as_ptr(), libc::MNT_DETACH); - libc::rmdir(oldroot_absolute_c.as_ptr()); - } - } - // Check for a mismatch between the node and worker versions. if let (Some(node_version), Some(worker_version)) = (node_version, worker_version) { if node_version != worker_version { gum::error!( target: LOG_TARGET, + %debug_id, %worker_pid, %node_version, %worker_version, @@ -175,8 +135,28 @@ pub fn worker_event_loop( } } + #[cfg(target_os = "linux")] + { + if let Err(err_ctx) = change_root(cache_path) { + let err = io::Error::last_os_error(); + gum::error!( + target: LOG_TARGET, + %debug_id, + %worker_pid, + %err_ctx, + ?cache_path, + "Could not change root to be the cache path: {}", + err + ); + worker_shutdown_message(debug_id, worker_pid, err); + return + } + } + remove_env_vars(debug_id); + gum::info!(target: LOG_TARGET, "5. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::>()); + // Run the main worker loop. let rt = Runtime::new().expect("Creates tokio runtime. If this panics the worker will die and the host will detect that and deal with it."); let err = rt @@ -199,6 +179,87 @@ pub fn worker_event_loop( rt.shutdown_background(); } +/// Change root to be the artifact directory. +#[cfg(target_os = "linux")] +fn change_root(cache_path: &Path) -> Result<(), &'static str> { + use rand::{distributions::Alphanumeric, Rng}; + use std::{ffi::CString, os::unix::ffi::OsStrExt, ptr}; + + const RANDOM_LEN: usize = 10; + let mut buf = Vec::with_capacity(RANDOM_LEN); + buf.extend(rand::thread_rng().sample_iter(&Alphanumeric).take(RANDOM_LEN)); + let s = std::str::from_utf8(&buf) + .expect("the string is collected from a valid utf-8 sequence; qed"); + + let cache_path_str = match cache_path.to_str() { + Some(s) => s, + None => return Err("cache path is not valid UTF-8") + }; + let cache_path_c = CString::new(cache_path.as_os_str().as_bytes()).unwrap(); + let root_absolute_c = CString::new("/").unwrap(); + // Append a random string to prevent races and to avoid dealing with the dir already existing. + let oldroot_relative_c = CString::new(format!("{}/oldroot-{}", cache_path_str, s)).unwrap(); + let oldroot_absolute_c = CString::new(format!("/oldroot-{}", s)).unwrap(); + + // SAFETY: TODO + unsafe { + // 1. `unshare` the user and the mount namespaces. + if libc::unshare(libc::CLONE_NEWUSER) < 0 { + return Err("unshare user namespace") + } + if libc::unshare(libc::CLONE_NEWNS) < 0 { + return Err("unshare mount namespace") + } + + // 2. `pivot_root` to the artifact directory. + gum::info!(target: LOG_TARGET, "1. {:?}", std::env::current_dir()); + gum::info!(target: LOG_TARGET, "1.5. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::>()); + // TODO: Ensure that 'new_root' and its parent mount don't have shared propagation. + if libc::mount(ptr::null(), root_absolute_c.as_ptr(), ptr::null(), libc::MS_REC | libc::MS_PRIVATE, ptr::null()) < 0 { + return Err("mount MS_PRIVATE") + } + if libc::mount( + cache_path_c.as_ptr(), + cache_path_c.as_ptr(), + ptr::null(), // ignored when MS_BIND is used + libc::MS_BIND | libc::MS_REC | libc::MS_NOEXEC, + ptr::null(), // ignored when MS_BIND is used + ) < 0 + { + return Err("mount MS_BIND") + } + if libc::mkdir(oldroot_relative_c.as_ptr(), 0755) < 0 { + return Err("mkdir oldroot") + } + if libc::syscall( + libc::SYS_pivot_root, + cache_path_c.as_ptr(), + oldroot_relative_c.as_ptr(), + ) < 0 + { + return Err("pivot_root") + } + + // 3. Change to the new root, `unmount2` and remove the old root. + if libc::chdir(root_absolute_c.as_ptr()) < 0 { + return Err("chdir to new root") + } + gum::info!(target: LOG_TARGET, "2. {:?}", std::env::current_dir()); + gum::info!(target: LOG_TARGET, "3. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::>()); + if libc::umount2(oldroot_absolute_c.as_ptr(), libc::MNT_DETACH) < 0 { + return Err("umount2 the oldroot") + } + if libc::rmdir(oldroot_absolute_c.as_ptr()) < 0 { + return Err("rmdir the oldroot") + } + gum::info!(target: LOG_TARGET, "4. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::>()); + + // TODO: do some assertions + } + + Ok(()) +} + /// Delete all env vars to prevent malicious code from accessing them. fn remove_env_vars(debug_id: &'static str) { for (key, value) in std::env::vars_os() { diff --git a/node/core/pvf/prepare-worker/src/lib.rs b/node/core/pvf/prepare-worker/src/lib.rs index b0780c5fe4bd..8eea18edb449 100644 --- a/node/core/pvf/prepare-worker/src/lib.rs +++ b/node/core/pvf/prepare-worker/src/lib.rs @@ -150,46 +150,50 @@ pub fn worker_entrypoint( |mut stream| async move { let worker_pid = std::process::id(); + gum::info!(target: LOG_TARGET, "10. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::>()); + let Handshake { landlock_enabled } = recv_handshake(&mut stream).await?; + gum::info!(target: LOG_TARGET, "11. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::>()); + // Try to enable landlock. - { - #[cfg(target_os = "linux")] - let landlock_status = { - use polkadot_node_core_pvf_common::worker::security::landlock::{ - path_beneath_rules, try_restrict, Access, AccessFs, LANDLOCK_ABI, - }; - - // Allow an exception for writing to the artifact cache, with no allowance for - // listing the directory contents. Since we prepend artifact names with a random - // hash, this means attackers can't discover artifacts apart from the current - // job. - try_restrict(path_beneath_rules( - &[cache_path], - AccessFs::from_write(LANDLOCK_ABI), - )) - .map(LandlockStatus::from_ruleset_status) - .map_err(|e| e.to_string()) - }; - #[cfg(not(target_os = "linux"))] - let landlock_status: Result = Ok(LandlockStatus::NotEnforced); - - // Error if the host determined that landlock is fully enabled and we couldn't fully - // enforce it here. - if landlock_enabled && !matches!(landlock_status, Ok(LandlockStatus::FullyEnforced)) - { - gum::warn!( - target: LOG_TARGET, - %worker_pid, - "could not fully enable landlock: {:?}", - landlock_status - ); - return Err(io::Error::new( - io::ErrorKind::Other, - format!("could not fully enable landlock: {:?}", landlock_status), - )) - } - } + // { + // #[cfg(target_os = "linux")] + // let landlock_status = { + // use polkadot_node_core_pvf_common::worker::security::landlock::{ + // path_beneath_rules, try_restrict, Access, AccessFs, LANDLOCK_ABI, + // }; + + // // Allow an exception for writing to the artifact cache, with no allowance for + // // listing the directory contents. Since we prepend artifact names with a random + // // hash, this means attackers can't discover artifacts apart from the current + // // job. + // try_restrict(path_beneath_rules( + // &[cache_path], + // AccessFs::from_write(LANDLOCK_ABI), + // )) + // .map(LandlockStatus::from_ruleset_status) + // .map_err(|e| e.to_string()) + // }; + // #[cfg(not(target_os = "linux"))] + // let landlock_status: Result = Ok(LandlockStatus::NotEnforced); + + // // Error if the host determined that landlock is fully enabled and we couldn't fully + // // enforce it here. + // if landlock_enabled && !matches!(landlock_status, Ok(LandlockStatus::FullyEnforced)) + // { + // gum::warn!( + // target: LOG_TARGET, + // %worker_pid, + // "could not fully enable landlock: {:?}", + // landlock_status + // ); + // return Err(io::Error::new( + // io::ErrorKind::Other, + // format!("could not fully enable landlock: {:?}", landlock_status), + // )) + // } + // } loop { let (pvf, temp_artifact_dest) = recv_request(&mut stream).await?; @@ -199,9 +203,9 @@ pub fn worker_entrypoint( "worker: preparing artifact", ); - if !temp_artifact_dest.starts_with(cache_path) { - return Err(io::Error::new(io::ErrorKind::Other, format!("received an artifact path {temp_artifact_dest:?} that does not belong to expected cache path {cache_path:?}"))) - } + // if !temp_artifact_dest.starts_with(cache_path) { + // return Err(io::Error::new(io::ErrorKind::Other, format!("received an artifact path {temp_artifact_dest:?} that does not belong to expected cache path {cache_path:?}"))) + // } let preparation_timeout = pvf.prep_timeout(); let prepare_job_kind = pvf.prep_kind(); diff --git a/node/core/pvf/src/execute/worker_intf.rs b/node/core/pvf/src/execute/worker_intf.rs index 77b7393fa71d..6e39c6a3eb9e 100644 --- a/node/core/pvf/src/execute/worker_intf.rs +++ b/node/core/pvf/src/execute/worker_intf.rs @@ -49,17 +49,17 @@ pub async fn spawn( cache_path: &Path, landlock_enabled: bool, ) -> Result<(IdleWorker, WorkerHandle), SpawnErr> { - let cache_path = match cache_path.to_str() { + let cache_path_str = match cache_path.to_str() { Some(a) => a, None => return Err(SpawnErr::InvalidCachePath(cache_path.to_owned())), }; - let mut extra_args = vec!["execute-worker", "--cache-path", cache_path]; + let mut extra_args = vec!["execute-worker", "--cache-path", cache_path_str]; if let Some(node_version) = node_version { extra_args.extend_from_slice(&["--node-impl-version", node_version]); } let (mut idle_worker, worker_handle) = - spawn_with_program_path("execute", program_path, &extra_args, spawn_timeout).await?; + spawn_with_program_path("execute", program_path, Some(cache_path), &extra_args, spawn_timeout).await?; send_handshake(&mut idle_worker.stream, Handshake { executor_params, landlock_enabled }) .await .map_err(|error| { diff --git a/node/core/pvf/src/prepare/worker_intf.rs b/node/core/pvf/src/prepare/worker_intf.rs index 49ee145b6cbc..52fb8b75a1f5 100644 --- a/node/core/pvf/src/prepare/worker_intf.rs +++ b/node/core/pvf/src/prepare/worker_intf.rs @@ -49,17 +49,23 @@ pub async fn spawn( cache_path: &Path, landlock_enabled: bool, ) -> Result<(IdleWorker, WorkerHandle), SpawnErr> { - let cache_path = match cache_path.to_str() { + let cache_path_str = match cache_path.to_str() { Some(a) => a, None => return Err(SpawnErr::InvalidCachePath(cache_path.to_owned())), }; - let mut extra_args = vec!["prepare-worker", "--cache-path", cache_path]; + let mut extra_args = vec!["prepare-worker", "--cache-path", cache_path_str]; if let Some(node_version) = node_version { extra_args.extend_from_slice(&["--node-impl-version", node_version]); } - let (mut idle_worker, worker_handle) = - spawn_with_program_path("prepare", program_path, &extra_args, spawn_timeout).await?; + let (mut idle_worker, worker_handle) = spawn_with_program_path( + "prepare", + program_path, + Some(cache_path), + &extra_args, + spawn_timeout, + ) + .await?; send_handshake(&mut idle_worker.stream, Handshake { landlock_enabled }) .await .map_err(|error| { @@ -117,6 +123,12 @@ pub async fn start_work( ); with_tmp_file(stream, pid, cache_path, |tmp_file, mut stream| async move { + // Linux: Pass the socket path relative to the cache_path (what the child thinks is root). + #[cfg(target_os = "linux")] + let tmp_file = Path::new(".").with_file_name( + tmp_file.file_name().expect("tmp files are created with a filename; qed"), + ); + let preparation_timeout = pvf.prep_timeout(); if let Err(err) = send_request(&mut stream, pvf, &tmp_file).await { gum::warn!( diff --git a/node/core/pvf/src/worker_intf.rs b/node/core/pvf/src/worker_intf.rs index c5061b3e5251..81fe79ae4349 100644 --- a/node/core/pvf/src/worker_intf.rs +++ b/node/core/pvf/src/worker_intf.rs @@ -46,6 +46,9 @@ pub const JOB_TIMEOUT_WALL_CLOCK_FACTOR: u32 = 4; /// /// - `program_path`: The path to the program. /// +/// - `socket_dir_path`: An optional path to the dir where the socket should be created, if `None` +/// use a temp dir. +/// /// - `extra_args`: Optional extra CLI arguments to the program. NOTE: Should only contain data /// required before the handshake, like node/worker versions for the version check. Other data /// should go through the handshake. @@ -55,11 +58,12 @@ pub const JOB_TIMEOUT_WALL_CLOCK_FACTOR: u32 = 4; pub async fn spawn_with_program_path( debug_id: &'static str, program_path: impl Into, + socket_dir_path: Option<&Path>, extra_args: &[&str], spawn_timeout: Duration, ) -> Result<(IdleWorker, WorkerHandle), SpawnErr> { let program_path = program_path.into(); - with_transient_socket_path(debug_id, |socket_path| { + with_transient_socket_path(debug_id, socket_dir_path, |socket_path| { let socket_path = socket_path.to_owned(); let extra_args: Vec = extra_args.iter().map(|arg| arg.to_string()).collect(); @@ -121,14 +125,23 @@ pub async fn spawn_with_program_path( .await } -async fn with_transient_socket_path(debug_id: &'static str, f: F) -> Result +async fn with_transient_socket_path( + debug_id: &'static str, + socket_dir_path: Option<&Path>, + f: F, +) -> Result where F: FnOnce(&Path) -> Fut, Fut: futures::Future> + 'static, { - let socket_path = tmpfile(&format!("pvf-host-{}", debug_id)) - .await - .map_err(|_| SpawnErr::TmpFile)?; + let socket_prefix = format!("pvf-host-{}-", debug_id); + let socket_path = if let Some(socket_dir_path) = socket_dir_path { + tmpfile_in(&socket_prefix, socket_dir_path).await + } else { + tmpfile(&socket_prefix).await + } + .map_err(|_| SpawnErr::TmpFile)?; + let result = f(&socket_path).await; // Best effort to remove the socket file. Under normal circumstances the socket will be removed @@ -235,10 +248,22 @@ impl WorkerHandle { extra_args: &[String], socket_path: impl AsRef, ) -> io::Result { + // Linux: Pass the socket path relative to the cache_path (what the child thinks is root). + #[cfg(target_os = "linux")] + let socket_path = Path::new(".").with_file_name( + socket_path + .as_ref() + .file_name() + .expect("socket paths are created with a filename; qed"), + ); + // Non-Linux: We are only able to pivot-root on Linux, so pass the socket path as-is. + #[cfg(not(target_os = "linux"))] + let socket_path = socket_path.as_ref().as_os_str(); + let mut child = process::Command::new(program.as_ref()) .args(extra_args) .arg("--socket-path") - .arg(socket_path.as_ref().as_os_str()) + .arg(socket_path) .stdout(std::process::Stdio::piped()) .kill_on_drop(true) .spawn()?; diff --git a/node/core/pvf/tests/it/main.rs b/node/core/pvf/tests/it/main.rs index 72c459c2f632..0f30efefc4cd 100644 --- a/node/core/pvf/tests/it/main.rs +++ b/node/core/pvf/tests/it/main.rs @@ -258,7 +258,7 @@ async fn execute_queue_doesnt_stall_with_varying_executor_params() { #[tokio::test] async fn deleting_prepared_artifact_does_not_dispute() { let host = TestHost::new(); - let cache_dir = host.cache_dir.path().clone(); + let cache_dir = host.cache_dir.path(); let result = host .validate_candidate( diff --git a/node/core/pvf/tests/it/worker_common.rs b/node/core/pvf/tests/it/worker_common.rs index 7aa67a853ee5..9fb8be3fc08a 100644 --- a/node/core/pvf/tests/it/worker_common.rs +++ b/node/core/pvf/tests/it/worker_common.rs @@ -24,7 +24,7 @@ use crate::PUPPET_EXE; #[tokio::test] async fn spawn_immediate_exit() { let result = - spawn_with_program_path("integration-test", PUPPET_EXE, &["exit"], Duration::from_secs(2)) + spawn_with_program_path("integration-test", PUPPET_EXE, None, &["exit"], Duration::from_secs(2)) .await; assert!(matches!(result, Err(SpawnErr::AcceptTimeout))); } @@ -32,7 +32,7 @@ async fn spawn_immediate_exit() { #[tokio::test] async fn spawn_timeout() { let result = - spawn_with_program_path("integration-test", PUPPET_EXE, &["sleep"], Duration::from_secs(2)) + spawn_with_program_path("integration-test", PUPPET_EXE, None, &["sleep"], Duration::from_secs(2)) .await; assert!(matches!(result, Err(SpawnErr::AcceptTimeout))); } @@ -43,6 +43,7 @@ async fn should_fail_without_cache_path() { let result = spawn_with_program_path( "integration-test", PUPPET_EXE, + None, &["prepare-worker"], Duration::from_secs(2), ) @@ -53,12 +54,13 @@ async fn should_fail_without_cache_path() { #[tokio::test] async fn should_connect() { let cache_path = tempfile::tempdir().unwrap(); - let cache_path = cache_path.path().to_str().unwrap(); + let cache_path_str = cache_path.path().to_str().unwrap(); let _ = spawn_with_program_path( "integration-test", PUPPET_EXE, - &["prepare-worker", "--cache-path", cache_path], + Some(cache_path.path()), + &["prepare-worker", "--cache-path", cache_path_str], Duration::from_secs(2), ) .await