diff --git a/doc/contributing/maintaining-single-executable-application-support.md b/doc/contributing/maintaining-single-executable-application-support.md new file mode 100644 index 00000000000000..388198bf0c7952 --- /dev/null +++ b/doc/contributing/maintaining-single-executable-application-support.md @@ -0,0 +1,118 @@ +# Maintaining Single Executable Applications support + +Support for [single executable applications](https://github.com/nodejs/node/blob/master/doc/contributing/technical-priorities.md#single-executable-applications) +is one of the key technical priorities identified for the success of Node.js. + +## High level strategy + +From the [next-10 discussions](https://github.com/nodejs/next-10/blob/main/meetings/summit-nov-2021.md#single-executable-applications) +there are 2 approaches the project believes are important to support: + +* Compile with Node.js into executable (`boxnode` approach). +* Bundle into existing Node.js executable (`pkg` approach). + +### Compile with node into executable + +No additional code within the Node.js project is needed to support the +option of compiling a bundled application along with Node.js into a single +executable application. + +### Bundle into existing Node.js executable + +The project does not plan to provide the complete solution but instead the key +elements which are required in the Node.js executable in order to enable +bundling with the pre-built Node.js binaries. This includes: + +* Looking for a segment within the executable that holds bundled code. +* Running the bundled code when such a segment is found. + +It is left up to external tools/solutions to: + +* Bundle code into a single script that can be executed with `-e` on + the command line. +* Generate a command line with appropriate options, including `-e` to + run the bundled script. +* Add a segment to an existing Node.js executable which contains + the command line and appropriate headers. +* Re-generate or removing signatures on the resulting executable +* Provide a virtual file system, and hooking it in if needed to + support native modules or reading file contents. + +## Maintaining + +### Compile with node into executable + +The approach of compiling with node into an executable requires that we +maintain a stable [em-bedder API](https://nodejs.org/dist/latest/docs/api/embedding.html). + +### Bundle into existing Node.js executable + +The following header must be included in a segment in order to have it run +as a single executable application: + +NODEJSSEAVVVVVVVVFFFFFFFFFAAAAAAAA + +where: + +* `VVVVVVVV` represents the version to be used to interpret the section, + for example `00000001`. +* `FFFFFFFF` represents the flags to be used in the process of starting + the bundled application. Currently this must be `00000000` to indicate that + no flags are set or `000000001` to indicate that a binary block + of data is included in addition to the arguments. +* `AAAAAAAA` is the number of arguments being provided + +The characters in both `VVVVVVVV`, `FFFFFFFF` and `AAAAAAAA` are +restricted to being hexadecimal characters (`0` through `9` and +`A` through `F`) that form a 32-bit, big endian integer. + +Following the header are AAAAAAAA strings, each terminated for 0x00 +one for each of the parameters passed. These parameters are is treated +as a set of command line options that are used as a prefix to any +additional command line options passed when the executable is started. +For example, for a simple single hello world for version `00000001` could be: + +```text +NODEJSSEA000000010000000000000002-e\0console.log('Hello from single binary')\0 +``` + +If the flags are set to `00000001` then there must be at least one charater +of binary data following the argument strings and a pointer to this +data will be exposed through `process.seaBinaryData`. + +Support for bundling into existing Node.js binaries is maintained +in `src/node_single_binary.*`. + +Currently only POSIX-compliant platforms are supported. The goal +is to expand this to include Windows and macOS as well. + +If a breaking change to the content after the header is required, the version +`VVVVVVVV` should be incremented. Support for a new format +may be introduced as a semver-minor provided that older versions +are still supported. Removing support for a version is semver-major. + +The `FFFFFFFF` is a set of flags that is used to control the +process of starting the application. For example they might indicate +that some set of arguments should be suppressed on the command line. +Currently no flags are in use. + +For test purposes [LIEF](https://github.com/lief-project/LIEF) can +be used to add a section in the required format. The following is a +simple example for using LIEF on Linux. It can be improved as it +currently replaces an existing section instead of adding a new +one: + +```text +#!/usr/bin/env python +import lief +binary = lief.parse('node') + +segment = lief.ELF.Segment() +segment.type = lief.ELF.SEGMENT_TYPES.LOAD +segment.flags = lief.ELF.SEGMENT_FLAGS.R +stringContent = "NODEJSSEA000000010000000000000002-e\0console.log('Hello from single binary')\0" +segment.content = bytearray(stringContent.encode()) +segment = binary.replace(segment, binary[lief.ELF.SEGMENT_TYPES.NOTE]) + +binary.write("hello") +``` diff --git a/lib/internal/main/single_executable_application.js b/lib/internal/main/single_executable_application.js new file mode 100644 index 00000000000000..f6f84f869e55ab --- /dev/null +++ b/lib/internal/main/single_executable_application.js @@ -0,0 +1,26 @@ +'use strict'; + +const { + prepareMainThreadExecution +} = require('internal/bootstrap/pre_execution'); + +const { getOptionValue } = require('internal/options'); + +const { + evalModule, + evalScript, + readStdin +} = require('internal/process/execution'); + +prepareMainThreadExecution(); +markBootstrapComplete(); + +const source = getOptionValue('--eval'); +const print = getOptionValue('--print'); +if (getOptionValue('--input-type') === 'module') + evalModule(source, print); +else + evalScript('[eval]', + source, + getOptionValue('--inspect-brk'), + print); diff --git a/node.gyp b/node.gyp index d063a6408072ac..62dff272045c94 100644 --- a/node.gyp +++ b/node.gyp @@ -523,6 +523,7 @@ 'src/node_report_utils.cc', 'src/node_serdes.cc', 'src/node_shadow_realm.cc', + 'src/node_single_executable_application.cc', 'src/node_snapshotable.cc', 'src/node_sockaddr.cc', 'src/node_stat_watcher.cc', diff --git a/src/node.cc b/src/node.cc index 78e93c74d3c3c4..3cdcecc1d1a36b 100644 --- a/src/node.cc +++ b/src/node.cc @@ -38,6 +38,7 @@ #include "node_process-inl.h" #include "node_report.h" #include "node_revert.h" +#include "node_single_executable_application.h" #include "node_snapshot_builder.h" #include "node_v8_platform-inl.h" #include "node_version.h" @@ -160,6 +161,9 @@ PVOID old_vectored_exception_handler; // node_v8_platform-inl.h struct V8Platform v8_platform; + +bool single_executable_application = false; +char* sea_binary_data = nullptr; } // namespace per_process // The section in the OpenSSL configuration file to be loaded. @@ -519,6 +523,21 @@ MaybeLocal StartExecution(Environment* env, StartExecutionCallback cb) { return StartExecution(env, "internal/main/prof_process"); } + if (env->options()->has_eval_string && + per_process::single_executable_application) { + if (per_process::sea_binary_data != nullptr) { + Isolate* isolate = env->isolate(); + Local context = env->context(); + READONLY_PROPERTY( + env->process_object(), + "seaBinaryData", + ToV8Value(context, + reinterpret_cast(per_process::sea_binary_data)) + .ToLocalChecked()); + } + return StartExecution(env, "internal/main/single_executable_application"); + } + // -e/--eval without -i/--interactive if (env->options()->has_eval_string && !env->options()->force_repl) { return StartExecution(env, "internal/main/eval_string"); @@ -1026,7 +1045,12 @@ InitializationResult InitializeOncePerProcess( argv = uv_setup_args(argc, argv); InitializationResult result; - result.args = std::vector(argv, argv + argc); + if (single_executable_application::CheckForSEA( + argc, argv, &(result.args), &per_process::sea_binary_data)) { + per_process::single_executable_application = true; + } else { + result.args = std::vector(argv, argv + argc); + } std::vector errors; // This needs to run *before* V8::Initialize(). diff --git a/src/node_single_executable_application.cc b/src/node_single_executable_application.cc new file mode 100644 index 00000000000000..b9bb819487552c --- /dev/null +++ b/src/node_single_executable_application.cc @@ -0,0 +1,193 @@ +#include "node_single_executable_application.h" +#if defined(__POSIX__) && !defined(_AIX) && !defined(__APPLE__) +#include +#include +#include +#include +#endif // defined(__POSIX__) && !defined(_AIX) && !defined(__APPLE__) +#include + +#include "env-inl.h" + +namespace node { +namespace single_executable_application { + +const char* sea_fuse = "AE249F4D38193B9BEFE654DF3AFD7065:00"; +constexpr const int fuse_sentinal_length = 33; + +constexpr const char* magic_header = "NODEJSSEA"; +constexpr const char* version_chars = "00000001"; +constexpr const int kFlagsLength = 8; +constexpr const int flags_offset = strlen(magic_header) + strlen(version_chars); +constexpr const int argc_offset = flags_offset + kFlagsLength; +constexpr const int kArgcLength = 8; +constexpr const int kBinaryDataIncludedFlag = 0x1; + +#if defined(__POSIX__) && !defined(_AIX) && !defined(__APPLE__) +static int callback(struct dl_phdr_info* info, size_t size, void* data) { + // look for the segment with the magic number + for (int index = 0; index < info->dlpi_phnum; index++) { + if (info->dlpi_phdr[index].p_type == PT_LOAD) { + char* content = reinterpret_cast(info->dlpi_addr + + info->dlpi_phdr[index].p_vaddr); + if (strncmp(magic_header, content, strlen(magic_header)) == 0) { + *(static_cast(data)) = content; + break; + } + } + } + return 0; +} +#endif // defined(__POSIX__) && !defined(_AIX) && !defined(__APPLE__) + +bool CheckFuse(void) { + return (strncmp(sea_fuse + fuse_sentinal_length, + "01", + fuse_sentinal_length + 2) == 0); +} + +// from Jesec's version +// 4096 chars should be more than enough to deal with +// header + node options + script size +// but definitely not elegant to have this limit +constexpr const int kSeaBufSize = 4096; +char sea_buf[kSeaBufSize]; +std::string GetExecutablePath() { + char exec_path_buf[2 * PATH_MAX]; + size_t exec_path_len = sizeof(exec_path_buf); + + if (uv_exepath(exec_path_buf, &exec_path_len) == 0) { + return std::string(exec_path_buf, exec_path_len); + } + + return ""; +} + +char* SearchExecutableForSEAData() { + std::string exec = GetExecutablePath(); + if (exec.empty()) { + return nullptr; + } + + auto f = new std::ifstream(exec); + if (!f->is_open() || !f->good() || f->eof()) { + delete f; + return nullptr; + } + + std::string needle; + needle += magic_header; + needle += version_chars; + + constexpr auto buf_size = 1 << 20; + + auto buf = new char[buf_size]; + auto buf_view = std::string_view(buf, buf_size); + auto buf_pos = buf_view.npos; + + size_t f_pos = 0; + + // first read + f->read(buf, buf_size); + f_pos += f->gcount(); + buf_pos = buf_view.find(needle); + if (buf_pos != buf_view.npos) { + f_pos = f_pos - f->gcount() + buf_pos; + + f->clear(); + f->seekg(f_pos, std::ios::beg); + + delete[] buf; + f->read(sea_buf, kSeaBufSize); + return (sea_buf); + } + + // subsequent reads, moving window + while (!f->eof()) { + std::memcpy(buf, buf + buf_size - needle.size(), needle.size()); + f->read(buf + needle.size(), buf_size - needle.size()); + f_pos += f->gcount(); + buf_pos = buf_view.find(needle); + if (buf_pos != buf_view.npos) { + f_pos = f_pos - f->gcount() - needle.size() + buf_pos; + + f->clear(); + f->seekg(f_pos, std::ios::beg); + + delete[] buf; + f->read(sea_buf, kSeaBufSize); + return (sea_buf); + } + } + + delete[] buf; + delete f; + return nullptr; +} + +char* GetSEAData() { + char* single_executable_data = nullptr; +#if defined(__POSIX__) && !defined(_AIX) && !defined(__APPLE__) + dl_iterate_phdr(callback, static_cast(&single_executable_data)); +#endif // defined(__POSIX__) && !defined(_AIX) && !defined(__APPLE__) + + if (single_executable_data == nullptr) { + // no special segment so read binary instead + single_executable_data = SearchExecutableForSEAData(); + } + + return single_executable_data; +} + +bool CheckForSEA(int argc, + char** argv, + std::vector* new_argv, + char** sea_binary_data) { + bool single_executable_application = false; + + if (CheckFuse()) { + char* single_executable_data = GetSEAData(); + if (single_executable_data != nullptr) { + // get the Flags + std::string flags_string( + static_cast(&single_executable_data[flags_offset]), + kFlagsLength); + int flags = std::stoi(flags_string, 0, 16); + + // get the new arguments info + std::string argc_string( + static_cast(&single_executable_data[argc_offset]), + kArgcLength); + int argument_count = std::stoi(argc_string, 0, 16); + char* arguments = &(single_executable_data[argc_offset + kArgcLength]); + + // copy over the first argument which needs to stay in place + new_argv->push_back(argv[0]); + + // copy over the new arguments + for (int i = 0; i < argument_count; i++) { + new_argv->push_back(arguments); + int length = strlen(arguments); + // TODO(mhdawson): add check that we don't overrun the segment + arguments = arguments + length + 1; + } + + // remaining data after arguments in binary data + // that can be used by the single executable applicaiton. + if (flags & kBinaryDataIncludedFlag) { + *sea_binary_data = arguments; + } + + // copy over the arguments passed when the executable was started + for (int i = 1; i < argc; i++) { + new_argv->push_back(argv[i]); + } + + single_executable_application = true; + } + } + return single_executable_application; +} + +} // namespace single_executable_application +} // namespace node diff --git a/src/node_single_executable_application.h b/src/node_single_executable_application.h new file mode 100644 index 00000000000000..9d7e8de7d84216 --- /dev/null +++ b/src/node_single_executable_application.h @@ -0,0 +1,22 @@ +#ifndef SRC_NODE_SINGLE_EXECUTABLE_APPLICATION_H_ +#define SRC_NODE_SINGLE_EXECUTABLE_APPLICATION_H_ + +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#include +#include + +namespace node { +namespace single_executable_application { + +bool CheckForSEA(int argc, + char** argv, + std::vector* new_argv, + char** sea_binary_data); + +} // namespace single_executable_application +} // namespace node + +#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#endif // SRC_NODE_SINGLE_EXECUTABLE_APPLICATION_H_