From 33aa373eecb9b6bb084885277d267ad3dfda4c0a Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Tue, 23 May 2023 19:37:29 +0200 Subject: [PATCH] src: use Blob{Des|S}erializer for SEA blobs PR-URL: https://github.com/nodejs/node/pull/47962 Reviewed-By: Darshan Sen --- src/blob_serializer_deserializer-inl.h | 52 ++++--- src/blob_serializer_deserializer.h | 18 ++- src/debug_utils.h | 1 + src/node_main_instance.cc | 10 +- src/node_sea.cc | 147 +++++++++++------- src/node_sea.h | 19 ++- src/node_snapshotable.cc | 8 +- ...est-single-executable-application-empty.js | 44 ++++++ 8 files changed, 206 insertions(+), 93 deletions(-) create mode 100644 test/sequential/test-single-executable-application-empty.js diff --git a/src/blob_serializer_deserializer-inl.h b/src/blob_serializer_deserializer-inl.h index 9383adee0b8d49..354d9267cf1f41 100644 --- a/src/blob_serializer_deserializer-inl.h +++ b/src/blob_serializer_deserializer-inl.h @@ -13,8 +13,8 @@ #include "debug_utils-inl.h" -// This is related to the blob that is used in snapshots and has nothing to do -// with `node_blob.h`. +// This is related to the blob that is used in snapshots and single executable +// applications and has nothing to do with `node_blob.h`. namespace node { @@ -130,22 +130,22 @@ std::vector BlobDeserializer::ReadVector() { template std::string BlobDeserializer::ReadString() { - size_t length = ReadArithmetic(); - - if (is_debug) { - Debug("ReadString(), length=%d: ", length); - } + std::string_view view = ReadStringView(StringLogMode::kAddressAndContent); + return std::string(view); +} - CHECK_GT(length, 0); // There should be no empty strings. - MallocedBuffer buf(length + 1); - memcpy(buf.data, sink.data() + read_total, length + 1); - std::string result(buf.data, length); // This creates a copy of buf.data. +template +std::string_view BlobDeserializer::ReadStringView(StringLogMode mode) { + size_t length = ReadArithmetic(); + Debug("ReadStringView(), length=%zu: ", length); - if (is_debug) { - Debug("\"%s\", read %zu bytes\n", result.c_str(), length + 1); + std::string_view result(sink.data() + read_total, length); + Debug("%p, read %zu bytes\n", result.data(), result.size()); + if (mode == StringLogMode::kAddressAndContent) { + Debug("%s", result); } - read_total += length + 1; + read_total += length; return result; } @@ -262,26 +262,28 @@ size_t BlobSerializer::WriteVector(const std::vector& data) { // [ 4/8 bytes ] length // [ |length| bytes ] contents template -size_t BlobSerializer::WriteString(const std::string& data) { - CHECK_GT(data.size(), 0); // No empty strings should be written. +size_t BlobSerializer::WriteStringView(std::string_view data, + StringLogMode mode) { + Debug("WriteStringView(), length=%zu: %p\n", data.size(), data.data()); size_t written_total = WriteArithmetic(data.size()); - if (is_debug) { - std::string str = ToStr(data); - Debug("WriteString(), length=%zu: \"%s\"\n", data.size(), data.c_str()); - } - // Write the null-terminated string. - size_t length = data.size() + 1; - sink.insert(sink.end(), data.c_str(), data.c_str() + length); + size_t length = data.size(); + sink.insert(sink.end(), data.data(), data.data() + length); written_total += length; - if (is_debug) { - Debug("WriteString() wrote %zu bytes\n", written_total); + Debug("WriteStringView() wrote %zu bytes\n", written_total); + if (mode == StringLogMode::kAddressAndContent) { + Debug("%s", data); } return written_total; } +template +size_t BlobSerializer::WriteString(const std::string& data) { + return WriteStringView(data, StringLogMode::kAddressAndContent); +} + // Helper for writing an array of numeric types. template template diff --git a/src/blob_serializer_deserializer.h b/src/blob_serializer_deserializer.h index 3715c5e7c5eaec..aa07bee54fd1bc 100644 --- a/src/blob_serializer_deserializer.h +++ b/src/blob_serializer_deserializer.h @@ -6,8 +6,8 @@ #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS -// This is related to the blob that is used in snapshots and has nothing to do -// with `node_blob.h`. +// This is related to the blob that is used in snapshots and single executable +// applications and has nothing to do with `node_blob.h`. namespace node { @@ -27,6 +27,11 @@ class BlobSerializerDeserializer { bool is_debug = false; }; +enum class StringLogMode { + kAddressOnly, // Can be used when the string contains binary content. + kAddressAndContent, +}; + // Child classes are expected to implement T Read() where // !std::is_arithmetic_v && !std::is_same_v template @@ -52,7 +57,9 @@ class BlobDeserializer : public BlobSerializerDeserializer { template std::vector ReadVector(); + // ReadString() creates a copy of the data. ReadStringView() doesn't. std::string ReadString(); + std::string_view ReadStringView(StringLogMode mode); // Helper for reading an array of numeric types. template @@ -77,11 +84,7 @@ template class BlobSerializer : public BlobSerializerDeserializer { public: explicit BlobSerializer(bool is_debug_v) - : BlobSerializerDeserializer(is_debug_v) { - // Currently the snapshot blob built with an empty script is around 4MB. - // So use that as the default sink size. - sink.reserve(4 * 1024 * 1024); - } + : BlobSerializerDeserializer(is_debug_v) {} ~BlobSerializer() {} Impl* impl() { return static_cast(this); } @@ -102,6 +105,7 @@ class BlobSerializer : public BlobSerializerDeserializer { // The layout of a written string: // [ 4/8 bytes ] length // [ |length| bytes ] contents + size_t WriteStringView(std::string_view data, StringLogMode mode); size_t WriteString(const std::string& data); // Helper for writing an array of numeric types. diff --git a/src/debug_utils.h b/src/debug_utils.h index e2e702f586e20f..c8371d392be896 100644 --- a/src/debug_utils.h +++ b/src/debug_utils.h @@ -48,6 +48,7 @@ void NODE_EXTERN_PRIVATE FWrite(FILE* file, const std::string& str); V(INSPECTOR_PROFILER) \ V(CODE_CACHE) \ V(NGTCP2_DEBUG) \ + V(SEA) \ V(WASI) \ V(MKSNAPSHOT) diff --git a/src/node_main_instance.cc b/src/node_main_instance.cc index f0dc6ca275b007..34ed9075e0fe16 100644 --- a/src/node_main_instance.cc +++ b/src/node_main_instance.cc @@ -87,14 +87,16 @@ ExitCode NodeMainInstance::Run() { void NodeMainInstance::Run(ExitCode* exit_code, Environment* env) { if (*exit_code == ExitCode::kNoFailure) { - bool is_sea = false; + bool runs_sea_code = false; #ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION if (sea::IsSingleExecutable()) { - is_sea = true; - LoadEnvironment(env, sea::FindSingleExecutableCode()); + runs_sea_code = true; + sea::SeaResource sea = sea::FindSingleExecutableResource(); + std::string_view code = sea.code; + LoadEnvironment(env, code); } #endif - if (!is_sea) { + if (!runs_sea_code) { LoadEnvironment(env, StartExecutionCallback{}); } diff --git a/src/node_sea.cc b/src/node_sea.cc index 796123eae47bd7..88741a5fce9d48 100644 --- a/src/node_sea.cc +++ b/src/node_sea.cc @@ -1,5 +1,6 @@ #include "node_sea.h" +#include "blob_serializer_deserializer-inl.h" #include "debug_utils-inl.h" #include "env-inl.h" #include "json_parser.h" @@ -34,16 +35,6 @@ namespace node { namespace sea { namespace { -// A special number that will appear at the beginning of the single executable -// preparation blobs ready to be injected into the binary. We use this to check -// that the data given to us are intended for building single executable -// applications. -const uint32_t kMagic = 0x143da20; - -enum class SeaFlags : uint32_t { - kDefault = 0, - kDisableExperimentalSeaWarning = 1 << 0, -}; SeaFlags operator|(SeaFlags x, SeaFlags y) { return static_cast(static_cast(x) | @@ -59,47 +50,100 @@ SeaFlags operator|=(/* NOLINT (runtime/references) */ SeaFlags& x, SeaFlags y) { return x = x | y; } -struct SeaResource { - SeaFlags flags = SeaFlags::kDefault; - std::string_view code; - static constexpr size_t kHeaderSize = sizeof(kMagic) + sizeof(SeaFlags); +class SeaSerializer : public BlobSerializer { + public: + SeaSerializer() + : BlobSerializer( + per_process::enabled_debug_list.enabled(DebugCategory::SEA)) {} + + template ::value>* = nullptr, + std::enable_if_t::value>* = nullptr> + size_t Write(const T& data); }; -SeaResource FindSingleExecutableResource() { +template <> +size_t SeaSerializer::Write(const SeaResource& sea) { + sink.reserve(SeaResource::kHeaderSize + sea.code.size()); + + Debug("Write SEA magic %x\n", kMagic); + size_t written_total = WriteArithmetic(kMagic); + + uint32_t flags = static_cast(sea.flags); + Debug("Write SEA flags %x\n", flags); + written_total += WriteArithmetic(flags); + DCHECK_EQ(written_total, SeaResource::kHeaderSize); + + Debug("Write SEA resource code %p, size=%zu\n", + sea.code.data(), + sea.code.size()); + written_total += WriteStringView(sea.code, StringLogMode::kAddressAndContent); + return written_total; +} + +class SeaDeserializer : public BlobDeserializer { + public: + explicit SeaDeserializer(std::string_view v) + : BlobDeserializer( + per_process::enabled_debug_list.enabled(DebugCategory::SEA), v) {} + + template ::value>* = nullptr, + std::enable_if_t::value>* = nullptr> + T Read(); +}; + +template <> +SeaResource SeaDeserializer::Read() { + uint32_t magic = ReadArithmetic(); + Debug("Read SEA magic %x\n", magic); + + CHECK_EQ(magic, kMagic); + SeaFlags flags(static_cast(ReadArithmetic())); + Debug("Read SEA flags %x\n", static_cast(flags)); + CHECK_EQ(read_total, SeaResource::kHeaderSize); + + std::string_view code = ReadStringView(StringLogMode::kAddressAndContent); + Debug("Read SEA resource code %p, size=%zu\n", code.data(), code.size()); + return {flags, code}; +} + +std::string_view FindSingleExecutableBlob() { CHECK(IsSingleExecutable()); - static const SeaResource sea_resource = []() -> SeaResource { + static const std::string_view result = []() -> std::string_view { size_t size; #ifdef __APPLE__ postject_options options; postject_options_init(&options); options.macho_segment_name = "NODE_SEA"; - const char* code = static_cast( + const char* blob = static_cast( postject_find_resource("NODE_SEA_BLOB", &size, &options)); #else - const char* code = static_cast( + const char* blob = static_cast( postject_find_resource("NODE_SEA_BLOB", &size, nullptr)); #endif - uint32_t first_word = reinterpret_cast(code)[0]; - CHECK_EQ(first_word, kMagic); - SeaFlags flags{ - reinterpret_cast(code + sizeof(first_word))[0]}; - // TODO(joyeecheung): do more checks here e.g. matching the versions. - return { - flags, - { - code + SeaResource::kHeaderSize, - size - SeaResource::kHeaderSize, - }, - }; + return {blob, size}; }(); - return sea_resource; + per_process::Debug(DebugCategory::SEA, + "Found SEA blob %p, size=%zu\n", + result.data(), + result.size()); + return result; } -} // namespace +} // anonymous namespace -std::string_view FindSingleExecutableCode() { - SeaResource sea_resource = FindSingleExecutableResource(); - return sea_resource.code; +SeaResource FindSingleExecutableResource() { + static const SeaResource sea_resource = []() -> SeaResource { + std::string_view blob = FindSingleExecutableBlob(); + per_process::Debug(DebugCategory::SEA, + "Found SEA resource %p, size=%zu\n", + blob.data(), + blob.size()); + SeaDeserializer deserializer(blob); + return deserializer.Read(); + }(); + return sea_resource; } bool IsSingleExecutable() { @@ -194,38 +238,33 @@ std::optional ParseSingleExecutableConfig( return result; } -bool GenerateSingleExecutableBlob(const SeaConfig& config) { +ExitCode GenerateSingleExecutableBlob(const SeaConfig& config) { std::string main_script; // TODO(joyeecheung): unify the file utils. int r = ReadFileSync(&main_script, config.main_path.c_str()); if (r != 0) { const char* err = uv_strerror(r); FPrintF(stderr, "Cannot read main script %s:%s\n", config.main_path, err); - return false; + return ExitCode::kGenericUserError; } - std::vector sink; - // TODO(joyeecheung): reuse the SnapshotSerializerDeserializer for this. - sink.reserve(SeaResource::kHeaderSize + main_script.size()); - const char* pos = reinterpret_cast(&kMagic); - sink.insert(sink.end(), pos, pos + sizeof(kMagic)); - pos = reinterpret_cast(&(config.flags)); - sink.insert(sink.end(), pos, pos + sizeof(SeaFlags)); - sink.insert( - sink.end(), main_script.data(), main_script.data() + main_script.size()); - - uv_buf_t buf = uv_buf_init(sink.data(), sink.size()); + SeaResource sea{config.flags, main_script}; + + SeaSerializer serializer; + serializer.Write(sea); + + uv_buf_t buf = uv_buf_init(serializer.sink.data(), serializer.sink.size()); r = WriteFileSync(config.output_path.c_str(), buf); if (r != 0) { const char* err = uv_strerror(r); FPrintF(stderr, "Cannot write output to %s:%s\n", config.output_path, err); - return false; + return ExitCode::kGenericUserError; } FPrintF(stderr, "Wrote single executable preparation blob to %s\n", config.output_path); - return true; + return ExitCode::kNoFailure; } } // anonymous namespace @@ -233,12 +272,12 @@ bool GenerateSingleExecutableBlob(const SeaConfig& config) { ExitCode BuildSingleExecutableBlob(const std::string& config_path) { std::optional config_opt = ParseSingleExecutableConfig(config_path); - if (!config_opt.has_value() || - !GenerateSingleExecutableBlob(config_opt.value())) { - return ExitCode::kGenericUserError; + if (config_opt.has_value()) { + ExitCode code = GenerateSingleExecutableBlob(config_opt.value()); + return code; } - return ExitCode::kNoFailure; + return ExitCode::kGenericUserError; } void Initialize(Local target, diff --git a/src/node_sea.h b/src/node_sea.h index b171595dd7ed61..c3e102d841a2e9 100644 --- a/src/node_sea.h +++ b/src/node_sea.h @@ -11,9 +11,26 @@ namespace node { namespace sea { +// A special number that will appear at the beginning of the single executable +// preparation blobs ready to be injected into the binary. We use this to check +// that the data given to us are intended for building single executable +// applications. +const uint32_t kMagic = 0x143da20; + +enum class SeaFlags : uint32_t { + kDefault = 0, + kDisableExperimentalSeaWarning = 1 << 0, +}; + +struct SeaResource { + SeaFlags flags = SeaFlags::kDefault; + std::string_view code; + + static constexpr size_t kHeaderSize = sizeof(kMagic) + sizeof(SeaFlags); +}; bool IsSingleExecutable(); -std::string_view FindSingleExecutableCode(); +SeaResource FindSingleExecutableResource(); std::tuple FixupArgsForSEA(int argc, char** argv); node::ExitCode BuildSingleExecutableBlob(const std::string& config_path); } // namespace sea diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index 0db70f5ce08640..b2be437efff788 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -159,7 +159,11 @@ class SnapshotSerializer : public BlobSerializer { SnapshotSerializer() : BlobSerializer( per_process::enabled_debug_list.enabled( - DebugCategory::MKSNAPSHOT)) {} + DebugCategory::MKSNAPSHOT)) { + // Currently the snapshot blob built with an empty script is around 4MB. + // So use that as the default sink size. + sink.reserve(4 * 1024 * 1024); + } template ::value>* = nullptr, @@ -554,7 +558,7 @@ size_t SnapshotSerializer::Write(const SnapshotMetadata& data) { // We need the Node.js version, platform and arch to match because // Node.js may perform synchronizations that are platform-specific and they // can be changed in semver-patches. - Debug("Write snapshot type %" PRIu8 "\n", static_cast(data.type)); + Debug("Write snapshot type %d\n", static_cast(data.type)); written_total += WriteArithmetic(static_cast(data.type)); Debug("Write Node.js version %s\n", data.node_version.c_str()); written_total += WriteString(data.node_version); diff --git a/test/sequential/test-single-executable-application-empty.js b/test/sequential/test-single-executable-application-empty.js new file mode 100644 index 00000000000000..961ae0018368cf --- /dev/null +++ b/test/sequential/test-single-executable-application-empty.js @@ -0,0 +1,44 @@ +'use strict'; + +require('../common'); + +const { + injectAndCodeSign, + skipIfSingleExecutableIsNotSupported, +} = require('../common/sea'); + +skipIfSingleExecutableIsNotSupported(); + +// This tests the creation of a single executable application with an empty +// script. + +const tmpdir = require('../common/tmpdir'); +const { copyFileSync, writeFileSync, existsSync } = require('fs'); +const { execFileSync } = require('child_process'); +const { join } = require('path'); +const assert = require('assert'); + +const configFile = join(tmpdir.path, 'sea-config.json'); +const seaPrepBlob = join(tmpdir.path, 'sea-prep.blob'); +const outputFile = join(tmpdir.path, process.platform === 'win32' ? 'sea.exe' : 'sea'); + +tmpdir.refresh(); + +writeFileSync(join(tmpdir.path, 'empty.js'), '', 'utf-8'); +writeFileSync(configFile, ` +{ + "main": "empty.js", + "output": "sea-prep.blob" +} +`); + +execFileSync(process.execPath, ['--experimental-sea-config', 'sea-config.json'], { + cwd: tmpdir.path +}); + +assert(existsSync(seaPrepBlob)); + +copyFileSync(process.execPath, outputFile); +injectAndCodeSign(outputFile, seaPrepBlob); + +execFileSync(outputFile);