Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

PVF: Move landlock out of thread into process; add landlock exceptions #7580

Draft
wants to merge 16 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions node/core/pvf/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master

[target.'cfg(target_os = "linux")'.dependencies]
landlock = "0.2.0"
rand = "0.8.5"

[dev-dependencies]
assert_matches = "1.4.0"
Expand Down
143 changes: 102 additions & 41 deletions node/core/pvf/common/src/worker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,52 +117,12 @@ pub fn worker_event_loop<F, Fut>(
let worker_pid = std::process::id();
gum::debug!(target: LOG_TARGET, %worker_pid, "starting pvf worker ({})", debug_id);

// Change root to be the artifact directory.
//
// SAFETY: TODO
#[cfg(target_os = "linux")]
{
use std::ffi::CString;

let cache_path_c = CString::new(cache_path).unwrap();
let root_relative_c = CString::new(".").unwrap();
let oldroot_relative_c = CString::new("./oldroot").unwrap();
let root_absolute_c = CString::new("/").unwrap();
let oldroot_absolute_c = CString::new("/oldroot").unwrap();

unsafe {
// 1. `unshare` the user and the mount namespaces.
libc::unshare(libc::CLONE_NEWUSER);
libc::unshare(libc::CLONE_NEWNS);

// 2. `pivot_root` to the artifact directory.
libc::chdir(cache_path_c.as_ptr());
libc::mount(
root_relative_c.as_ptr(),
root_relative_c.as_ptr(),
std::ptr::null(), // ignored when MS_BIND is used
libc::MS_BIND | libc::MS_REC | libc::MS_NOEXEC,
std::ptr::null(), // ignored when MS_BIND is used
);
libc::mkdir(oldroot_relative_c.as_ptr(), 0755);
libc::syscall(
libc::SYS_pivot_root,
root_relative_c.as_ptr(),
oldroot_relative_c.as_ptr(),
);

// 3. Change to the new root, `unmount2` and remove the old root.
libc::chdir(root_absolute_c.as_ptr());
libc::umount2(oldroot_absolute_c.as_ptr(), libc::MNT_DETACH);
libc::rmdir(oldroot_absolute_c.as_ptr());
}
}

// Check for a mismatch between the node and worker versions.
if let (Some(node_version), Some(worker_version)) = (node_version, worker_version) {
if node_version != worker_version {
gum::error!(
target: LOG_TARGET,
%debug_id,
%worker_pid,
%node_version,
%worker_version,
Expand All @@ -175,8 +135,28 @@ pub fn worker_event_loop<F, Fut>(
}
}

#[cfg(target_os = "linux")]
{
if let Err(err_ctx) = change_root(cache_path) {
let err = io::Error::last_os_error();
gum::error!(
target: LOG_TARGET,
%debug_id,
%worker_pid,
%err_ctx,
?cache_path,
"Could not change root to be the cache path: {}",
err
);
worker_shutdown_message(debug_id, worker_pid, err);
return
}
}

remove_env_vars(debug_id);

gum::info!(target: LOG_TARGET, "5. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::<Vec<PathBuf>>());

// Run the main worker loop.
let rt = Runtime::new().expect("Creates tokio runtime. If this panics the worker will die and the host will detect that and deal with it.");
let err = rt
Expand All @@ -199,6 +179,87 @@ pub fn worker_event_loop<F, Fut>(
rt.shutdown_background();
}

/// Change root to be the artifact directory.
#[cfg(target_os = "linux")]
fn change_root(cache_path: &Path) -> Result<(), &'static str> {
use rand::{distributions::Alphanumeric, Rng};
use std::{ffi::CString, os::unix::ffi::OsStrExt, ptr};

const RANDOM_LEN: usize = 10;
let mut buf = Vec::with_capacity(RANDOM_LEN);
buf.extend(rand::thread_rng().sample_iter(&Alphanumeric).take(RANDOM_LEN));
Comment on lines +189 to +190
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should be able to do a .collect here directly into the buffer.

let s = std::str::from_utf8(&buf)
.expect("the string is collected from a valid utf-8 sequence; qed");

let cache_path_str = match cache_path.to_str() {
Some(s) => s,
None => return Err("cache path is not valid UTF-8")
};
mrcnski marked this conversation as resolved.
Show resolved Hide resolved
let cache_path_c = CString::new(cache_path.as_os_str().as_bytes()).unwrap();
let root_absolute_c = CString::new("/").unwrap();
// Append a random string to prevent races and to avoid dealing with the dir already existing.
let oldroot_relative_c = CString::new(format!("{}/oldroot-{}", cache_path_str, s)).unwrap();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use join or push instead of manually merging these with /.

let oldroot_absolute_c = CString::new(format!("/oldroot-{}", s)).unwrap();

// SAFETY: TODO
unsafe {
// 1. `unshare` the user and the mount namespaces.
if libc::unshare(libc::CLONE_NEWUSER) < 0 {
return Err("unshare user namespace")
}
if libc::unshare(libc::CLONE_NEWNS) < 0 {
return Err("unshare mount namespace")
}
Comment on lines +207 to +212
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. These are flags so this can be done in a single unshare call.
  2. Since this is also switching to a new user namespace this function should be renamed as it's not strictly only changing the root.
  3. On some distros this could actually fail (due to user namespaces being disabled). In this case we'd probably want to abort execution unless a flag is passed on the command-line?


// 2. `pivot_root` to the artifact directory.
gum::info!(target: LOG_TARGET, "1. {:?}", std::env::current_dir());
gum::info!(target: LOG_TARGET, "1.5. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::<Vec<PathBuf>>());
// TODO: Ensure that 'new_root' and its parent mount don't have shared propagation.
if libc::mount(ptr::null(), root_absolute_c.as_ptr(), ptr::null(), libc::MS_REC | libc::MS_PRIVATE, ptr::null()) < 0 {
return Err("mount MS_PRIVATE")
}
if libc::mount(
cache_path_c.as_ptr(),
cache_path_c.as_ptr(),
ptr::null(), // ignored when MS_BIND is used
libc::MS_BIND | libc::MS_REC | libc::MS_NOEXEC,
ptr::null(), // ignored when MS_BIND is used
) < 0
{
return Err("mount MS_BIND")
}
if libc::mkdir(oldroot_relative_c.as_ptr(), 0755) < 0 {
return Err("mkdir oldroot")
}
if libc::syscall(
libc::SYS_pivot_root,
cache_path_c.as_ptr(),
oldroot_relative_c.as_ptr(),
) < 0
{
return Err("pivot_root")
}
Comment on lines +231 to +241
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you call the pivot root syscall with both paths being "." then this mkdir should be unnecessary.


// 3. Change to the new root, `unmount2` and remove the old root.
if libc::chdir(root_absolute_c.as_ptr()) < 0 {
return Err("chdir to new root")
}
gum::info!(target: LOG_TARGET, "2. {:?}", std::env::current_dir());
gum::info!(target: LOG_TARGET, "3. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::<Vec<PathBuf>>());
if libc::umount2(oldroot_absolute_c.as_ptr(), libc::MNT_DETACH) < 0 {
return Err("umount2 the oldroot")
}
if libc::rmdir(oldroot_absolute_c.as_ptr()) < 0 {
return Err("rmdir the oldroot")
}
gum::info!(target: LOG_TARGET, "4. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::<Vec<PathBuf>>());

// TODO: do some assertions
}

Ok(())
}

/// Delete all env vars to prevent malicious code from accessing them.
fn remove_env_vars(debug_id: &'static str) {
for (key, value) in std::env::vars_os() {
Expand Down
84 changes: 44 additions & 40 deletions node/core/pvf/prepare-worker/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,46 +150,50 @@ pub fn worker_entrypoint(
|mut stream| async move {
let worker_pid = std::process::id();

gum::info!(target: LOG_TARGET, "10. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::<Vec<PathBuf>>());

let Handshake { landlock_enabled } = recv_handshake(&mut stream).await?;

gum::info!(target: LOG_TARGET, "11. {:?}", std::fs::read_dir(".").unwrap().map(|entry| entry.unwrap().path()).collect::<Vec<PathBuf>>());

// Try to enable landlock.
{
#[cfg(target_os = "linux")]
let landlock_status = {
use polkadot_node_core_pvf_common::worker::security::landlock::{
path_beneath_rules, try_restrict, Access, AccessFs, LANDLOCK_ABI,
};

// Allow an exception for writing to the artifact cache, with no allowance for
// listing the directory contents. Since we prepend artifact names with a random
// hash, this means attackers can't discover artifacts apart from the current
// job.
try_restrict(path_beneath_rules(
&[cache_path],
AccessFs::from_write(LANDLOCK_ABI),
))
.map(LandlockStatus::from_ruleset_status)
.map_err(|e| e.to_string())
};
#[cfg(not(target_os = "linux"))]
let landlock_status: Result<LandlockStatus, String> = Ok(LandlockStatus::NotEnforced);

// Error if the host determined that landlock is fully enabled and we couldn't fully
// enforce it here.
if landlock_enabled && !matches!(landlock_status, Ok(LandlockStatus::FullyEnforced))
{
gum::warn!(
target: LOG_TARGET,
%worker_pid,
"could not fully enable landlock: {:?}",
landlock_status
);
return Err(io::Error::new(
io::ErrorKind::Other,
format!("could not fully enable landlock: {:?}", landlock_status),
))
}
}
// {
// #[cfg(target_os = "linux")]
// let landlock_status = {
// use polkadot_node_core_pvf_common::worker::security::landlock::{
// path_beneath_rules, try_restrict, Access, AccessFs, LANDLOCK_ABI,
// };

// // Allow an exception for writing to the artifact cache, with no allowance for
// // listing the directory contents. Since we prepend artifact names with a random
// // hash, this means attackers can't discover artifacts apart from the current
// // job.
// try_restrict(path_beneath_rules(
// &[cache_path],
// AccessFs::from_write(LANDLOCK_ABI),
// ))
// .map(LandlockStatus::from_ruleset_status)
// .map_err(|e| e.to_string())
// };
// #[cfg(not(target_os = "linux"))]
// let landlock_status: Result<LandlockStatus, String> = Ok(LandlockStatus::NotEnforced);

// // Error if the host determined that landlock is fully enabled and we couldn't fully
// // enforce it here.
// if landlock_enabled && !matches!(landlock_status, Ok(LandlockStatus::FullyEnforced))
// {
// gum::warn!(
// target: LOG_TARGET,
// %worker_pid,
// "could not fully enable landlock: {:?}",
// landlock_status
// );
// return Err(io::Error::new(
// io::ErrorKind::Other,
// format!("could not fully enable landlock: {:?}", landlock_status),
// ))
// }
// }

loop {
let (pvf, temp_artifact_dest) = recv_request(&mut stream).await?;
Expand All @@ -199,9 +203,9 @@ pub fn worker_entrypoint(
"worker: preparing artifact",
);

if !temp_artifact_dest.starts_with(cache_path) {
return Err(io::Error::new(io::ErrorKind::Other, format!("received an artifact path {temp_artifact_dest:?} that does not belong to expected cache path {cache_path:?}")))
}
// if !temp_artifact_dest.starts_with(cache_path) {
// return Err(io::Error::new(io::ErrorKind::Other, format!("received an artifact path {temp_artifact_dest:?} that does not belong to expected cache path {cache_path:?}")))
// }

let preparation_timeout = pvf.prep_timeout();
let prepare_job_kind = pvf.prep_kind();
Expand Down
6 changes: 3 additions & 3 deletions node/core/pvf/src/execute/worker_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,17 @@ pub async fn spawn(
cache_path: &Path,
landlock_enabled: bool,
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
let cache_path = match cache_path.to_str() {
let cache_path_str = match cache_path.to_str() {
Some(a) => a,
None => return Err(SpawnErr::InvalidCachePath(cache_path.to_owned())),
};
let mut extra_args = vec!["execute-worker", "--cache-path", cache_path];
let mut extra_args = vec!["execute-worker", "--cache-path", cache_path_str];
if let Some(node_version) = node_version {
extra_args.extend_from_slice(&["--node-impl-version", node_version]);
}

let (mut idle_worker, worker_handle) =
spawn_with_program_path("execute", program_path, &extra_args, spawn_timeout).await?;
spawn_with_program_path("execute", program_path, Some(cache_path), &extra_args, spawn_timeout).await?;
send_handshake(&mut idle_worker.stream, Handshake { executor_params, landlock_enabled })
.await
.map_err(|error| {
Expand Down
20 changes: 16 additions & 4 deletions node/core/pvf/src/prepare/worker_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,23 @@ pub async fn spawn(
cache_path: &Path,
landlock_enabled: bool,
) -> Result<(IdleWorker, WorkerHandle), SpawnErr> {
let cache_path = match cache_path.to_str() {
let cache_path_str = match cache_path.to_str() {
Some(a) => a,
None => return Err(SpawnErr::InvalidCachePath(cache_path.to_owned())),
};
let mut extra_args = vec!["prepare-worker", "--cache-path", cache_path];
let mut extra_args = vec!["prepare-worker", "--cache-path", cache_path_str];
if let Some(node_version) = node_version {
extra_args.extend_from_slice(&["--node-impl-version", node_version]);
}

let (mut idle_worker, worker_handle) =
spawn_with_program_path("prepare", program_path, &extra_args, spawn_timeout).await?;
let (mut idle_worker, worker_handle) = spawn_with_program_path(
"prepare",
program_path,
Some(cache_path),
&extra_args,
spawn_timeout,
)
.await?;
send_handshake(&mut idle_worker.stream, Handshake { landlock_enabled })
.await
.map_err(|error| {
Expand Down Expand Up @@ -117,6 +123,12 @@ pub async fn start_work(
);

with_tmp_file(stream, pid, cache_path, |tmp_file, mut stream| async move {
// Linux: Pass the socket path relative to the cache_path (what the child thinks is root).
#[cfg(target_os = "linux")]
let tmp_file = Path::new(".").with_file_name(
tmp_file.file_name().expect("tmp files are created with a filename; qed"),
);

let preparation_timeout = pvf.prep_timeout();
if let Err(err) = send_request(&mut stream, pvf, &tmp_file).await {
gum::warn!(
Expand Down
Loading