From 6181f3a566b62d8a16d1c8c7318b54f84c3f32d2 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 2 Feb 2024 13:26:18 -0800 Subject: [PATCH] wasm: Store rlib metadata in wasm object files The goal of this commit is to remove warnings using LLVM tip-of-tree `wasm-ld`. In llvm/llvm-project#78658 the `wasm-ld` LLD driver no longer looks at archive indices and instead looks at all the objects in archives. Previously `lib.rmeta` files were simply raw rustc metadata bytes, not wasm objects, meaning that `wasm-ld` would emit a warning indicating so. WebAssembly targets previously passed `--fatal-warnings` to `wasm-ld` by default which meant that if Rust were to update to LLVM 18 then all wasm targets would not work. This immediate blocker was resolved in rust-lang/rust#120278 which removed `--fatal-warnings` which enabled a theoretical update to LLVM 18 for wasm targets. This current state is ok-enough for now because rustc squashes all linker output by default if it doesn't fail. This means, for example, that rustc squashes all the linker warnings coming out of `wasm-ld` about `lib.rmeta` files with LLVM 18. This again isn't a pressing issue because the information is all hidden, but it runs the risk of being annoying if another linker error were to happen and then the output would have all these unrelated warnings that couldn't be fixed. Thus, this PR comes into the picture. The goal of this PR is to resolve these warnings by using the WebAssembly object file format on wasm targets instead of using raw rustc metadata. When I first implemented the rlib-in-objects scheme in #84449 I remember either concluding that `wasm-ld` would either include the metadata in the output or I thought we didn't have to do anything there at all. I think I was wrong on both counts as `wasm-ld` does not include the metadata in the final output unless the object is referenced and we do actually need to do something to resolve these warnings. This PR updates the object file format containing rustc metadata on WebAssembly targets to be an actual WebAssembly file. This enables the `wasm` feature of the `object` crate to be able to read the custom section in the same manner as other platforms, but currently `object` doesn't support writing wasm object files so a handwritten encoder is used instead. The only caveat I know of with this is that if `wasm-ld` does indeed look at the object file then the metadata will be included in the final output. I believe the only thing that could cause that at this time is `--whole-archive` which I don't think is passed for rlibs. I would clarify that I'm not 100% certain about this, however. --- Cargo.lock | 11 +++ compiler/rustc_codegen_ssa/Cargo.toml | 2 +- .../rustc_codegen_ssa/src/back/metadata.rs | 88 ++++++++++++++----- src/tools/tidy/src/deps.rs | 17 ++++ 4 files changed, 97 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 61f9c130e38d7..848c826f9408d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2627,6 +2627,7 @@ dependencies = [ "rustc-std-workspace-alloc", "rustc-std-workspace-core", "ruzstd", + "wasmparser", ] [[package]] @@ -6128,6 +6129,16 @@ version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" +[[package]] +name = "wasmparser" +version = "0.118.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77f1154f1ab868e2a01d9834a805faca7bf8b50d041b4ca714d005d0dab1c50c" +dependencies = [ + "indexmap", + "semver", +] + [[package]] name = "web-sys" version = "0.3.68" diff --git a/compiler/rustc_codegen_ssa/Cargo.toml b/compiler/rustc_codegen_ssa/Cargo.toml index e144b1dc1bd88..781c54bdef876 100644 --- a/compiler/rustc_codegen_ssa/Cargo.toml +++ b/compiler/rustc_codegen_ssa/Cargo.toml @@ -48,7 +48,7 @@ libc = "0.2.50" [dependencies.object] version = "0.32.1" default-features = false -features = ["read_core", "elf", "macho", "pe", "xcoff", "unaligned", "archive", "write"] +features = ["read_core", "elf", "macho", "pe", "xcoff", "unaligned", "archive", "write", "wasm"] [target.'cfg(windows)'.dependencies.windows] version = "0.52.0" diff --git a/compiler/rustc_codegen_ssa/src/back/metadata.rs b/compiler/rustc_codegen_ssa/src/back/metadata.rs index b683e1b45a8d1..8e76e47cfefca 100644 --- a/compiler/rustc_codegen_ssa/src/back/metadata.rs +++ b/compiler/rustc_codegen_ssa/src/back/metadata.rs @@ -15,6 +15,7 @@ use rustc_data_structures::owned_slice::{try_slice_owned, OwnedSlice}; use rustc_metadata::creader::MetadataLoader; use rustc_metadata::fs::METADATA_FILENAME; use rustc_metadata::EncodedMetadata; +use rustc_serialize::leb128; use rustc_session::Session; use rustc_span::sym; use rustc_target::abi::Endian; @@ -420,10 +421,9 @@ pub enum MetadataPosition { /// it's not in an allowlist of otherwise well known dwarf section names to /// go into the final artifact. /// -/// * WebAssembly - we actually don't have any container format for this -/// target. WebAssembly doesn't support the `dylib` crate type anyway so -/// there's no need for us to support this at this time. Consequently the -/// metadata bytes are simply stored as-is into an rlib. +/// * WebAssembly - this uses wasm files themselves as the object file format +/// so an empty file with no linking metadata but a single custom section is +/// created holding our metadata. /// /// * COFF - Windows-like targets create an object with a section that has /// the `IMAGE_SCN_LNK_REMOVE` flag set which ensures that if the linker @@ -438,22 +438,13 @@ pub fn create_wrapper_file( data: &[u8], ) -> (Vec, MetadataPosition) { let Some(mut file) = create_object_file(sess) else { - // This is used to handle all "other" targets. This includes targets - // in two categories: - // - // * Some targets don't have support in the `object` crate just yet - // to write an object file. These targets are likely to get filled - // out over time. - // - // * Targets like WebAssembly don't support dylibs, so the purpose - // of putting metadata in object files, to support linking rlibs - // into dylibs, is moot. - // - // In both of these cases it means that linking into dylibs will - // not be supported by rustc. This doesn't matter for targets like - // WebAssembly and for targets not supported by the `object` crate - // yet it means that work will need to be done in the `object` crate - // to add a case above. + if sess.target.is_like_wasm { + return (create_metadata_file_for_wasm(data, §ion_name), MetadataPosition::First); + } + + // Targets using this branch don't have support implemented here yet or + // they're not yet implemented in the `object` crate and will likely + // fill out this module over time. return (data.to_vec(), MetadataPosition::Last); }; let section = if file.format() == BinaryFormat::Xcoff { @@ -532,6 +523,9 @@ pub fn create_compressed_metadata_file( packed_metadata.extend(metadata.raw_data()); let Some(mut file) = create_object_file(sess) else { + if sess.target.is_like_wasm { + return create_metadata_file_for_wasm(&packed_metadata, b".rustc"); + } return packed_metadata.to_vec(); }; if file.format() == BinaryFormat::Xcoff { @@ -624,3 +618,57 @@ pub fn create_compressed_metadata_file_for_xcoff( file.append_section_data(section, data, 1); file.write().unwrap() } + +/// Creates a simple WebAssembly object file, which is itself a wasm module, +/// that contains a custom section of the name `section_name` with contents +/// `data`. +/// +/// NB: the `object` crate does not yet have support for writing the the wasm +/// object file format. The format is simple enough that for now an extra crate +/// from crates.io (such as `wasm-encoder`). The file format is: +/// +/// * 4-byte header "\0asm" +/// * 4-byte version number - 1u32 in little-endian format +/// * concatenated sections, which for this object is always "custom sections" +/// +/// Custom sections are then defined by: +/// * 1-byte section identifier - 0 for a custom section +/// * leb-encoded section length (size of the contents beneath this bullet) +/// * leb-encoded custom section name length +/// * custom section name +/// * section contents +/// +/// One custom section, `linking`, is added here in accordance with +/// +/// which is required to inform LLD that this is an object file but it should +/// otherwise basically ignore it if it otherwise looks at it. The linking +/// section currently is defined by a single version byte (2) and then further +/// sections, but we have no more sections, so it's just the byte "2". +/// +/// The next custom section is the one we're interested in. +pub fn create_metadata_file_for_wasm(data: &[u8], section_name: &[u8]) -> Vec { + let mut bytes = b"\0asm\x01\0\0\0".to_vec(); + + let mut append_custom_section = |section_name: &[u8], data: &[u8]| { + let mut section_name_len = [0; leb128::max_leb128_len::()]; + let off = leb128::write_usize_leb128(&mut section_name_len, section_name.len()); + let section_name_len = §ion_name_len[..off]; + + let mut section_len = [0; leb128::max_leb128_len::()]; + let off = leb128::write_usize_leb128( + &mut section_len, + data.len() + section_name_len.len() + section_name.len(), + ); + let section_len = §ion_len[..off]; + + bytes.push(0u8); + bytes.extend_from_slice(section_len); + bytes.extend_from_slice(section_name_len); + bytes.extend_from_slice(section_name); + bytes.extend_from_slice(data); + }; + + append_custom_section(b"linking", &[2]); + append_custom_section(section_name, data); + bytes +} diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index b36b6da308ecd..cff219285dc7b 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -92,6 +92,7 @@ const EXCEPTIONS: ExceptionList = &[ ("ryu", "Apache-2.0 OR BSL-1.0"), // BSL is not acceptble, but we use it under Apache-2.0 // cargo/... (because of serde) ("self_cell", "Apache-2.0"), // rustc (fluent translations) ("snap", "BSD-3-Clause"), // rustc + ("wasmparser", "Apache-2.0 WITH LLVM-exception"), // rustc // tidy-alphabetical-end ]; @@ -379,6 +380,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "valuable", "version_check", "wasi", + "wasmparser", "winapi", "winapi-i686-pc-windows-gnu", "winapi-util", @@ -564,6 +566,21 @@ fn check_runtime_license_exceptions( if pkg.name == "fortanix-sgx-abi" && pkg.license.as_deref() == Some("MPL-2.0") { continue; } + + // This exception is due to the fact that the feature set of the + // `object` crate is different between rustc and libstd. In the + // standard library only a conservative set of features are enabled + // which notably does not include the `wasm` feature which pulls in + // this dependency. In the compiler, however, the `wasm` feature is + // enabled. This exception is intended to be here so long as the + // `EXCEPTIONS` above contains `wasmparser`, but once that goes away + // this can be removed. + if pkg.name == "wasmparser" + && pkg.license.as_deref() == Some("Apache-2.0 WITH LLVM-exception") + { + continue; + } + tidy_error!(bad, "invalid license `{}` in `{}`", license, pkg.id); } }