From ff1644d4c41c7315b1105f76552859d85767159e Mon Sep 17 00:00:00 2001 From: Gilad S Date: Thu, 26 Oct 2023 01:45:51 +0300 Subject: [PATCH] fix: bugs (#80) * feat: adapt to the latest `llama.cpp` interface * feat: print helpful information to help resolve a clone issue when it happens * feat: print helpful information to help resolve build issues related to CUDA * feat: make portable cmake on Windows more stable * feat: update `CMakeLists.txt` to match `llama.cpp` better * fix: do not download redundant node headers * fix: improve cmake custom options handling * fix: do not set `CMAKE_GENERATOR_TOOLSET` for CUDA * fix: do not fetch information from GitHub when using a local git bundle * fix: GBNF JSON schema string const formatting * docs: document a solution to a compilation problem on macOS * docs: document more CUDA build error solutions * docs: explain about ES modules in the getting started guide * chore: update `.commitlintrc.json` * chore: remove redundant dependency --- .commitlintrc.json | 5 +- .github/workflows/build.yml | 3 + docs/guide/CUDA.md | 33 +++++++-- docs/guide/building-from-source.md | 2 + docs/guide/cli/build.md | 4 + docs/guide/cli/download.md | 4 + docs/guide/index.md | 5 ++ llama/CMakeLists.txt | 4 +- llama/addon.cpp | 29 +++----- llama/package.json | 5 ++ package.json | 1 - src/cli/commands/DownloadCommand.ts | 74 +++++++++++-------- src/config.ts | 5 ++ src/utils/cloneLlamaCppRepo.ts | 33 +++++++-- src/utils/cmake.ts | 6 ++ src/utils/compileLLamaCpp.ts | 56 ++++++++------ .../gbnfJson/terminals/GbnfStringValue.ts | 6 +- .../llamaEvaluator/LlamaGrammar.test.ts | 32 ++++---- vitest.config.ts | 7 ++ 19 files changed, 207 insertions(+), 107 deletions(-) create mode 100644 llama/package.json create mode 100644 vitest.config.ts diff --git a/.commitlintrc.json b/.commitlintrc.json index f4fbb7dd..6bb21b39 100644 --- a/.commitlintrc.json +++ b/.commitlintrc.json @@ -1,3 +1,6 @@ { - "extends": ["@commitlint/config-conventional"] + "extends": ["@commitlint/config-conventional"], + "rules": { + "subject-case": [0, "never"] + } } diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1036e533..750e7166 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -231,6 +231,9 @@ jobs: - name: Install modules run: npm ci + - name: Build binary + run: node ./dist/cli/cli.js build + - name: Run standalone tests run: npm run test:standalone diff --git a/docs/guide/CUDA.md b/docs/guide/CUDA.md index 48589952..7cba62f8 100644 --- a/docs/guide/CUDA.md +++ b/docs/guide/CUDA.md @@ -23,18 +23,39 @@ To build `node-llama-cpp` with any of these options, set an environment variable ### Fix the `Failed to detect a default CUDA architecture` build error To fix this issue you have to set the `CUDACXX` environment variable to the path of the `nvcc` compiler. -For example, if you installed CUDA Toolkit 12.2 on Windows, you have to run the following command: -```bash -set CUDACXX=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\bin\nvcc.exe +For example, if you have installed CUDA Toolkit 12.2, you have to run a command like this: +::: code-group +```bash [Linux] +export CUDACXX=/usr/local/cuda-12.2/bin/nvcc ``` -On Linux, it would be something like this: -```bash -export CUDACXX=/usr/local/cuda-12.2/bin/nvcc +```bash [Windows] +set CUDACXX=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\bin\nvcc.exe ``` +::: Then run the build command again to check whether setting the `CUDACXX` environment variable fixed the issue. +### Fix the `The CUDA compiler identification is unknown` build error +The solution to this error is the same as [the solution to the `Failed to detect a default CUDA architecture` error](#fix-the-failed-to-detect-a-default-cuda-architecture-build-error). + +### Fix the `A single input file is required for a non-link phase when an outputfile is specified` build error +To fix this issue you have to set the `CMAKE_GENERATOR_TOOLSET` cmake option to the CUDA home directory, usually already set as the `CUDA_PATH` environment variable. + +To do this, set the `NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_GENERATOR_TOOLSET` environment variable to the path of your CUDA home directory: + +::: code-group +```bash [Linux] +export NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_GENERATOR_TOOLSET=$CUDA_PATH +``` + +```bash [Windows] +set NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_GENERATOR_TOOLSET=%CUDA_PATH% +``` +::: + +Then run the build command again to check whether setting the `CMAKE_GENERATOR_TOOLSET` cmake option fixed the issue. + ## Using `node-llama-cpp` with CUDA After you build `node-llama-cpp` with CUDA support, you can use it normally. diff --git a/docs/guide/building-from-source.md b/docs/guide/building-from-source.md index 81817d57..2f62f92e 100644 --- a/docs/guide/building-from-source.md +++ b/docs/guide/building-from-source.md @@ -27,6 +27,8 @@ If `cmake` is not installed on your machine, `node-llama-cpp` will automatically If the build fails, make sure you have the required dependencies of `cmake` installed on your machine. More info is available [here](https://github.com/cmake-js/cmake-js#:~:text=projectRoot/build%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%5Bstring%5D-,Requirements%3A,-CMake) (you don't have to install `cmake` or `cmake-js`, just the dependencies). +If the build fails on macOS with the error `"/usr/bin/cc" is not able to compile a simple test program`, try running `xcode-select --install` to install the Xcode command line tools. + ::: ## `download` and `build` commands diff --git a/docs/guide/cli/build.md b/docs/guide/cli/build.md index f31433bf..e64276a0 100644 --- a/docs/guide/cli/build.md +++ b/docs/guide/cli/build.md @@ -10,6 +10,10 @@ const commandDoc = docs.build; {{commandDoc.description}} +::: info +If the build fails on macOS with the error `"/usr/bin/cc" is not able to compile a simple test program`, try running `xcode-select --install` to install the Xcode command line tools. +::: + ## Usage ```shell-vue {{commandDoc.usage}} diff --git a/docs/guide/cli/download.md b/docs/guide/cli/download.md index 1f71afb2..2a37316e 100644 --- a/docs/guide/cli/download.md +++ b/docs/guide/cli/download.md @@ -20,6 +20,10 @@ This is useful for building from source on machines that aren't connected to the ::: +::: info +If the build fails on macOS with the error `"/usr/bin/cc" is not able to compile a simple test program`, try running `xcode-select --install` to install the Xcode command line tools. +::: + ## Usage ```shell-vue {{commandDoc.usage}} diff --git a/docs/guide/index.md b/docs/guide/index.md index 8cba284a..13e9151e 100644 --- a/docs/guide/index.md +++ b/docs/guide/index.md @@ -14,6 +14,11 @@ npm install --save node-llama-cpp > If binaries are not available for your platform, it'll fallback to download a release of `llama.cpp` and build it from source with `cmake`. > To disable this behavior, set the environment variable `NODE_LLAMA_CPP_SKIP_DOWNLOAD` to `true`. +## ESM usage +`node-llama-cpp` is an [ES module](https://nodejs.org/api/esm.html#modules-ecmascript-modules), so can only use `import` to load it and cannot use `require`. + +To make sure you can use it in your project, make sure your `package.json` file has `"type": "module"` in it. + ## CUDA and Metal support **Metal:** Metal support is enabled by default on macOS. If you're using a Mac with an Intel chip, [you might want to disable it](./Metal.md). diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt index aec7785d..8991cbbe 100644 --- a/llama/CMakeLists.txt +++ b/llama/CMakeLists.txt @@ -1,6 +1,6 @@ -cmake_minimum_required(VERSION 3.12) +cmake_minimum_required(VERSION 3.13) -project ("llama-addon") +project("llama-addon" C CXX) if (MSVC) # add_compile_options(/EHsc) diff --git a/llama/addon.cpp b/llama/addon.cpp index 6a192799..552d42cc 100644 --- a/llama/addon.cpp +++ b/llama/addon.cpp @@ -208,13 +208,13 @@ class LLAMAContext : public Napi::ObjectWrap { return Napi::String::New(info.Env(), ss.str()); } Napi::Value TokenBos(const Napi::CallbackInfo& info) { - return Napi::Number::From(info.Env(), llama_token_bos(ctx)); + return Napi::Number::From(info.Env(), llama_token_bos(model->model)); // TODO: move this to the model } Napi::Value TokenEos(const Napi::CallbackInfo& info) { - return Napi::Number::From(info.Env(), llama_token_eos(ctx)); + return Napi::Number::From(info.Env(), llama_token_eos(model->model)); // TODO: move this to the model } Napi::Value TokenNl(const Napi::CallbackInfo& info) { - return Napi::Number::From(info.Env(), llama_token_nl(ctx)); + return Napi::Number::From(info.Env(), llama_token_nl(model->model)); // TODO: move this to the model } Napi::Value GetContextSize(const Napi::CallbackInfo& info) { return Napi::Number::From(info.Env(), llama_n_ctx(ctx)); @@ -223,7 +223,7 @@ class LLAMAContext : public Napi::ObjectWrap { int token = info[0].As().Int32Value(); std::stringstream ss; - const char* str = llama_token_get_text(ctx, token); + const char* str = llama_token_get_text(model->model, token); // TODO: move this to the model if (str == nullptr) { return info.Env().Undefined(); } @@ -336,18 +336,14 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred { protected: void Execute() { - llama_batch batch = llama_batch_init(tokens.size(), 0); + llama_batch batch = llama_batch_init(tokens.size(), 0, 1); - batch.n_tokens = tokens.size(); - - for (int32_t i = 0; i < batch.n_tokens; i++) { - batch.token[i] = tokens[i]; - batch.pos[i] = ctx->n_cur; - batch.seq_id[i] = 0; - batch.logits[i] = false; + for (size_t i = 0; i < tokens.size(); i++) { + llama_batch_add(batch, tokens[i], ctx->n_cur, { 0 }, false); ctx->n_cur++; } + GGML_ASSERT(batch.n_tokens == (int) tokens.size()); batch.logits[batch.n_tokens - 1] = true; @@ -381,14 +377,11 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred { llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false }; - auto eos_token = llama_token_eos(ctx->ctx); + auto eos_token = llama_token_eos(ctx->model->model); if (use_repeat_penalty && !repeat_penalty_tokens.empty()) { - llama_sample_repetition_penalty( - ctx->ctx, &candidates_p, repeat_penalty_tokens.data(), repeat_penalty_tokens.size(), repeat_penalty - ); - llama_sample_frequency_and_presence_penalties( - ctx->ctx, &candidates_p, repeat_penalty_tokens.data(), repeat_penalty_tokens.size(), + llama_sample_repetition_penalties( + ctx->ctx, &candidates_p, repeat_penalty_tokens.data(), repeat_penalty_tokens.size(), repeat_penalty, repeat_penalty_frequency_penalty, repeat_penalty_presence_penalty ); } diff --git a/llama/package.json b/llama/package.json new file mode 100644 index 00000000..857c5087 --- /dev/null +++ b/llama/package.json @@ -0,0 +1,5 @@ +{ + "binary": { + "napi_versions": [7] + } +} diff --git a/package.json b/package.json index 7c920cfe..bd578aaa 100644 --- a/package.json +++ b/package.json @@ -100,7 +100,6 @@ "@commitlint/cli": "^17.7.1", "@commitlint/config-conventional": "^17.7.0", "@semantic-release/exec": "^6.0.3", - "@types/bytes": "^3.1.1", "@types/cli-progress": "^3.11.0", "@types/cross-spawn": "^6.0.2", "@types/fs-extra": "^11.0.1", diff --git a/src/cli/commands/DownloadCommand.ts b/src/cli/commands/DownloadCommand.ts index 831feb4f..0c785020 100644 --- a/src/cli/commands/DownloadCommand.ts +++ b/src/cli/commands/DownloadCommand.ts @@ -13,7 +13,10 @@ import {setBinariesGithubRelease} from "../../utils/binariesGithubRelease.js"; import {downloadCmakeIfNeeded} from "../../utils/cmake.js"; import withStatusLogs from "../../utils/withStatusLogs.js"; import {getIsInDocumentationMode} from "../../state.js"; -import {unshallowAndSquashCurrentRepoAndSaveItAsReleaseBundle} from "../../utils/gitReleaseBundles.js"; +import { + getGitBundlePathForRelease, + unshallowAndSquashCurrentRepoAndSaveItAsReleaseBundle +} from "../../utils/gitReleaseBundles.js"; import {cloneLlamaCppRepo} from "../../utils/cloneLlamaCppRepo.js"; type DownloadCommandArgs = { @@ -91,6 +94,7 @@ export const DownloadCommand: CommandModule = { export async function DownloadLlamaCppCommand({ repo, release, arch, nodeTarget, metal, cuda, skipBuild, noBundle, updateBinariesReleaseMetadataAndSaveGitBundle }: DownloadCommandArgs) { + const useBundle = noBundle != true; const octokit = new Octokit(); const [githubOwner, githubRepo] = repo.split("/"); @@ -110,37 +114,45 @@ export async function DownloadLlamaCppCommand({ type GithubReleaseType = Awaited> | Awaited>; - let githubRelease: GithubReleaseType | null = null; - await withOra({ - loading: chalk.blue("Fetching llama.cpp info"), - success: chalk.blue("Fetched llama.cpp info"), - fail: chalk.blue("Failed to fetch llama.cpp info") - }, async () => { - try { - if (release === "latest") { - githubRelease = await octokit.rest.repos.getLatestRelease({ - owner: githubOwner, - repo: githubRepo - }); - } else { - githubRelease = await octokit.rest.repos.getReleaseByTag({ - owner: githubOwner, - repo: githubRepo, - tag: release - }); + let githubReleaseTag: string | null = (useBundle && (await getGitBundlePathForRelease(githubOwner, githubRepo, release)) != null) + ? release + : null; + + if (githubReleaseTag == null) + await withOra({ + loading: chalk.blue("Fetching llama.cpp info"), + success: chalk.blue("Fetched llama.cpp info"), + fail: chalk.blue("Failed to fetch llama.cpp info") + }, async () => { + let githubRelease: GithubReleaseType | null = null; + + try { + if (release === "latest") { + githubRelease = await octokit.rest.repos.getLatestRelease({ + owner: githubOwner, + repo: githubRepo + }); + } else { + githubRelease = await octokit.rest.repos.getReleaseByTag({ + owner: githubOwner, + repo: githubRepo, + tag: release + }); + } + } catch (err) { + console.error("Failed to fetch llama.cpp release info", err); } - } catch (err) { - console.error("Failed to fetch llama.cpp release info", err); - } - if (githubRelease == null) { - throw new Error(`Failed to find release "${release}" of "${repo}"`); - } + if (githubRelease == null) { + throw new Error(`Failed to find release "${release}" of "${repo}"`); + } - if (githubRelease!.data.tag_name == null) { - throw new Error(`Failed to find tag of release "${release}" of "${repo}"`); - } - }); + if (githubRelease.data.tag_name == null) { + throw new Error(`Failed to find tag of release "${release}" of "${repo}"`); + } + + githubReleaseTag = githubRelease.data.tag_name; + }); await clearTempFolder(); @@ -153,7 +165,7 @@ export async function DownloadLlamaCppCommand({ }); console.log(chalk.blue("Cloning llama.cpp")); - await cloneLlamaCppRepo(githubOwner, githubRepo, githubRelease!.data.tag_name, noBundle != true); + await cloneLlamaCppRepo(githubOwner, githubRepo, githubReleaseTag!, useBundle); if (!skipBuild) { await downloadCmakeIfNeeded(true); @@ -174,7 +186,7 @@ export async function DownloadLlamaCppCommand({ } if (isCI && updateBinariesReleaseMetadataAndSaveGitBundle) { - await setBinariesGithubRelease(githubRelease!.data.tag_name); + await setBinariesGithubRelease(githubReleaseTag!); await unshallowAndSquashCurrentRepoAndSaveItAsReleaseBundle(); } diff --git a/src/config.ts b/src/config.ts index 4710a193..135db6bc 100644 --- a/src/config.ts +++ b/src/config.ts @@ -57,3 +57,8 @@ export const defaultChatSystemPrompt = "You are a helpful, respectful and honest "If you don't know the answer to a question, please don't share false information."; export const cliBinName = "node-llama-cpp"; export const npxRunPrefix = "npx --no "; + +const documentationUrl = "https://withcatai.github.io/node-llama-cpp"; +export const documentationPageUrls = { + CUDA: documentationUrl + "/guide/CUDA" +} as const; diff --git a/src/utils/cloneLlamaCppRepo.ts b/src/utils/cloneLlamaCppRepo.ts index 5ae0b54e..d881ee44 100644 --- a/src/utils/cloneLlamaCppRepo.ts +++ b/src/utils/cloneLlamaCppRepo.ts @@ -55,14 +55,35 @@ export async function cloneLlamaCppRepo(githubOwner: string, githubRepo: string, } catch (err) { await fs.remove(llamaCppDirectory); console.error("Failed to clone git bundle, cloning from GitHub instead", err); + + printCloneErrorHelp(String(err)); } } - await withGitCloneProgress("GitHub", async (gitWithCloneProgress) => { - await gitWithCloneProgress.clone(remoteGitUrl, llamaCppDirectory, { - "--depth": 1, - "--branch": tag, - "--quiet": null + try { + await withGitCloneProgress("GitHub", async (gitWithCloneProgress) => { + await gitWithCloneProgress.clone(remoteGitUrl, llamaCppDirectory, { + "--depth": 1, + "--branch": tag, + "--quiet": null + }); }); - }); + } catch (err) { + printCloneErrorHelp(String(err)); + + throw err; + } +} + +function printCloneErrorHelp(error: string) { + // This error happens with some docker images where the current user is different + // from the owner of the files due to mounting a volume. + // In such cases, print a helpful message to help the user resolve the issue. + if (error.toLowerCase().includes("detected dubious ownership in repository")) + console.info("\n" + + chalk.grey("[node-llama-cpp]") + chalk.yellow(" To fix this issue, try running this command to fix it for the current module directory:") + "\n" + + 'git config --global --add safe.directory "' + llamaCppDirectory + '"\n\n' + + chalk.yellow("Or run this command to fix it everywhere:") + "\n" + + 'git config --global --add safe.directory "*"' + ); } diff --git a/src/utils/cmake.ts b/src/utils/cmake.ts index 92875e2d..08cfff51 100644 --- a/src/utils/cmake.ts +++ b/src/utils/cmake.ts @@ -35,6 +35,12 @@ export async function getCmakePath() { if (resolvedPath.toLowerCase().endsWith(".cmd")) resolvedPath = (await getBinFromWindowCmd(resolvedPath, "cmake.exe")) ?? ""; + else if (resolvedPath.toLowerCase().endsWith(".ps1")) { + const cmdFilePath = resolvedPath.slice(0, -".ps1".length) + ".cmd"; + + if (await fs.pathExists(cmdFilePath)) + resolvedPath = (await getBinFromWindowCmd(cmdFilePath, "cmake.exe")) ?? ""; + } if (resolvedPath !== "") return resolvedPath; diff --git a/src/utils/compileLLamaCpp.ts b/src/utils/compileLLamaCpp.ts index b186e69f..90e53494 100644 --- a/src/utils/compileLLamaCpp.ts +++ b/src/utils/compileLLamaCpp.ts @@ -2,7 +2,10 @@ import path from "path"; import {fileURLToPath} from "url"; import process from "process"; import fs from "fs-extra"; -import {customCmakeOptionsEnvVarPrefix, llamaCppDirectory, llamaDirectory, llamaToolchainsDirectory} from "../config.js"; +import chalk from "chalk"; +import { + customCmakeOptionsEnvVarPrefix, documentationPageUrls, llamaCppDirectory, llamaDirectory, llamaToolchainsDirectory +} from "../config.js"; import {clearLlamaBuild} from "./clearLlamaBuild.js"; import {setUsedBinFlag} from "./usedBinFlag.js"; import {spawnCommand} from "./spawnCommand.js"; @@ -24,35 +27,33 @@ export async function compileLlamaCpp({ const cmakePathArgs = await getCmakePathArgs(); const toolchainFile = await getToolchainFileForArch(arch); const runtimeVersion = nodeTarget.startsWith("v") ? nodeTarget.slice("v".length) : nodeTarget; - const cmakeCustomOptions = []; - - if ((metal && process.platform === "darwin") || process.env.LLAMA_METAL === "1") cmakeCustomOptions.push("LLAMA_METAL=1"); - else cmakeCustomOptions.push("LLAMA_METAL=OFF"); - - if (cuda || process.env.LLAMA_CUBLAS === "1") cmakeCustomOptions.push("LLAMA_CUBLAS=1"); - if (cuda && process.env.CUDA_PATH != null && await fs.pathExists(process.env.CUDA_PATH)) - cmakeCustomOptions.push("CMAKE_GENERATOR_TOOLSET=" + process.env.CUDA_PATH); - - if (process.env.LLAMA_MPI === "1") cmakeCustomOptions.push("LLAMA_MPI=1"); - if (process.env.LLAMA_OPENBLAS === "1") cmakeCustomOptions.push("LLAMA_OPENBLAS=1"); - if (process.env.LLAMA_BLAS_VENDOR != null) cmakeCustomOptions.push("LLAMA_BLAS_VENDOR=" + process.env.LLAMA_BLAS_VENDOR); - if (process.env.LLAMA_CUDA_FORCE_DMMV != null) cmakeCustomOptions.push("LLAMA_CUDA_FORCE_DMMV=" + process.env.LLAMA_CUDA_FORCE_DMMV); - if (process.env.LLAMA_CUDA_DMMV_X != null) cmakeCustomOptions.push("LLAMA_CUDA_DMMV_X=" + process.env.LLAMA_CUDA_DMMV_X); - if (process.env.LLAMA_CUDA_MMV_Y != null) cmakeCustomOptions.push("LLAMA_CUDA_MMV_Y=" + process.env.LLAMA_CUDA_MMV_Y); - if (process.env.LLAMA_CUDA_F16 != null) cmakeCustomOptions.push("LLAMA_CUDA_F16=" + process.env.LLAMA_CUDA_F16); - if (process.env.LLAMA_CUDA_KQUANTS_ITER != null) cmakeCustomOptions.push("LLAMA_CUDA_KQUANTS_ITER=" + process.env.LLAMA_CUDA_KQUANTS_ITER); - if (process.env.LLAMA_CUDA_PEER_MAX_BATCH_SIZE != null) cmakeCustomOptions.push("LLAMA_CUDA_PEER_MAX_BATCH_SIZE=" + process.env.LLAMA_CUDA_PEER_MAX_BATCH_SIZE); - if (process.env.LLAMA_HIPBLAS === "1") cmakeCustomOptions.push("LLAMA_HIPBLAS=1"); - if (process.env.LLAMA_CLBLAST === "1") cmakeCustomOptions.push("LLAMA_CLBLAST=1"); + const cmakeCustomOptions = new Map(); + + if ((metal && process.platform === "darwin") || process.env.LLAMA_METAL === "1") cmakeCustomOptions.set("LLAMA_METAL", "1"); + else cmakeCustomOptions.set("LLAMA_METAL", "OFF"); + + if (cuda || process.env.LLAMA_CUBLAS === "1") cmakeCustomOptions.set("LLAMA_CUBLAS", "1"); + + if (process.env.LLAMA_MPI === "1") cmakeCustomOptions.set("LLAMA_MPI", "1"); + if (process.env.LLAMA_OPENBLAS === "1") cmakeCustomOptions.set("LLAMA_OPENBLAS", "1"); + if (process.env.LLAMA_BLAS_VENDOR != null) cmakeCustomOptions.set("LLAMA_BLAS_VENDOR", process.env.LLAMA_BLAS_VENDOR); + if (process.env.LLAMA_CUDA_FORCE_DMMV != null) cmakeCustomOptions.set("LLAMA_CUDA_FORCE_DMMV", process.env.LLAMA_CUDA_FORCE_DMMV); + if (process.env.LLAMA_CUDA_DMMV_X != null) cmakeCustomOptions.set("LLAMA_CUDA_DMMV_X", process.env.LLAMA_CUDA_DMMV_X); + if (process.env.LLAMA_CUDA_MMV_Y != null) cmakeCustomOptions.set("LLAMA_CUDA_MMV_Y", process.env.LLAMA_CUDA_MMV_Y); + if (process.env.LLAMA_CUDA_F16 != null) cmakeCustomOptions.set("LLAMA_CUDA_F16", process.env.LLAMA_CUDA_F16); + if (process.env.LLAMA_CUDA_KQUANTS_ITER != null) cmakeCustomOptions.set("LLAMA_CUDA_KQUANTS_ITER", process.env.LLAMA_CUDA_KQUANTS_ITER); + if (process.env.LLAMA_CUDA_PEER_MAX_BATCH_SIZE != null) cmakeCustomOptions.set("LLAMA_CUDA_PEER_MAX_BATCH_SIZE", process.env.LLAMA_CUDA_PEER_MAX_BATCH_SIZE); + if (process.env.LLAMA_HIPBLAS === "1") cmakeCustomOptions.set("LLAMA_HIPBLAS", "1"); + if (process.env.LLAMA_CLBLAST === "1") cmakeCustomOptions.set("LLAMA_CLBLAST", "1"); if (toolchainFile != null) - cmakeCustomOptions.push("CMAKE_TOOLCHAIN_FILE=" + toolchainFile); + cmakeCustomOptions.set("CMAKE_TOOLCHAIN_FILE", toolchainFile); for (const key in process.env) { if (key.startsWith(customCmakeOptionsEnvVarPrefix)) { const option = key.slice(customCmakeOptionsEnvVarPrefix.length); const value = process.env[key]; - cmakeCustomOptions.push(`${option}=${value}`); + cmakeCustomOptions.set(option, value!); } } @@ -63,7 +64,7 @@ export async function compileLlamaCpp({ await spawnCommand( "npm", ["run", "-s", "cmake-js-llama", "--", "compile", "--log-level", "warn", "--arch=" + arch, "--runtime-version=" + runtimeVersion, ...cmakePathArgs] - .concat(cmakeCustomOptions.map(option => "--CD" + option)), + .concat([...cmakeCustomOptions].map(([key, value]) => "--CD" + key + "=" + value)), __dirname ); @@ -89,6 +90,13 @@ export async function compileLlamaCpp({ if (setUsedBinFlagArg) await setUsedBinFlag("prebuiltBinaries"); + if (cuda) + console.info("\n" + + chalk.grey("[node-llama-cpp] ") + + chalk.yellow("To resolve errors related to CUDA compilation, see the CUDA guide: ") + + documentationPageUrls.CUDA + ); + throw err; } finally { await fixXpackPermissions(); diff --git a/src/utils/gbnfJson/terminals/GbnfStringValue.ts b/src/utils/gbnfJson/terminals/GbnfStringValue.ts index 8c674285..aeca0fdb 100644 --- a/src/utils/gbnfJson/terminals/GbnfStringValue.ts +++ b/src/utils/gbnfJson/terminals/GbnfStringValue.ts @@ -11,14 +11,16 @@ export class GbnfStringValue extends GbnfTerminal { override getGrammar(): string { return [ - "\"", + '"', + '\\"', this.value .replaceAll("\\", "\\\\") .replaceAll("\t", "\\t") .replaceAll("\r", "\\r") .replaceAll("\n", "\\n") .replaceAll('"', "\\\\" + '\\"'), - "\"" + '\\"', + '"' ].join(""); } } diff --git a/test/standalone/llamaEvaluator/LlamaGrammar.test.ts b/test/standalone/llamaEvaluator/LlamaGrammar.test.ts index 8cf9fdbc..dff715bc 100644 --- a/test/standalone/llamaEvaluator/LlamaGrammar.test.ts +++ b/test/standalone/llamaEvaluator/LlamaGrammar.test.ts @@ -104,7 +104,7 @@ describe("grammar for JSON schema", () => { }; expect(grammar.grammar).toMatchInlineSnapshot(` - "root ::= \\"{\\" whitespace-new-lines-rule \\"message\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"numberOfWordsInMessage\\" \\":\\" [ ]? integer-number-rule \\",\\" whitespace-new-lines-rule \\"feelingGoodPercentage\\" \\":\\" [ ]? fractional-number-rule \\",\\" whitespace-new-lines-rule \\"feelingGood\\" \\":\\" [ ]? boolean-rule \\",\\" whitespace-new-lines-rule \\"feelingOverall\\" \\":\\" [ ]? rule5 \\",\\" whitespace-new-lines-rule \\"verbsInMessage\\" \\":\\" [ ]? rule6 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]* + "root ::= \\"{\\" whitespace-new-lines-rule \\"\\\\\\"message\\\\\\"\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"\\\\\\"numberOfWordsInMessage\\\\\\"\\" \\":\\" [ ]? integer-number-rule \\",\\" whitespace-new-lines-rule \\"\\\\\\"feelingGoodPercentage\\\\\\"\\" \\":\\" [ ]? fractional-number-rule \\",\\" whitespace-new-lines-rule \\"\\\\\\"feelingGood\\\\\\"\\" \\":\\" [ ]? boolean-rule \\",\\" whitespace-new-lines-rule \\"\\\\\\"feelingOverall\\\\\\"\\" \\":\\" [ ]? rule5 \\",\\" whitespace-new-lines-rule \\"\\\\\\"verbsInMessage\\\\\\"\\" \\":\\" [ ]? rule6 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]* whitespace-new-lines-rule ::= [\\\\n]? [ \\\\t]* [\\\\n]? string-rule ::= \\"\\\\\\"\\" ( [^\\"\\\\\\\\] | \\"\\\\\\\\\\" ([\\"\\\\\\\\/bfnrt] | \\"u\\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* \\"\\\\\\"\\" null-rule ::= \\"null\\" @@ -114,8 +114,8 @@ describe("grammar for JSON schema", () => { rule1 ::= \\"true\\" rule2 ::= \\"false\\" boolean-rule ::= ( rule1 | rule2 ) - rule3 ::= \\"good\\" - rule4 ::= \\"bad\\" + rule3 ::= \\"\\\\\\"good\\\\\\"\\" + rule4 ::= \\"\\\\\\"bad\\\\\\"\\" rule5 ::= ( rule3 | rule4 ) rule7 ::= ( string-rule ) ( \\",\\" whitespace-new-lines-rule string-rule )* rule8 ::= ( string-rule )? @@ -210,7 +210,7 @@ describe("grammar for JSON schema", () => { "root ::= \\"[\\" whitespace-new-lines-rule ( rule2 | rule3 ) whitespace-new-lines-rule \\"]\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]* whitespace-new-lines-rule ::= [\\\\n]? [ \\\\t]* [\\\\n]? string-rule ::= \\"\\\\\\"\\" ( [^\\"\\\\\\\\] | \\"\\\\\\\\\\" ([\\"\\\\\\\\/bfnrt] | \\"u\\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* \\"\\\\\\"\\" - rule0 ::= \\"{\\" whitespace-new-lines-rule \\"message\\" \\":\\" [ ]? string-rule whitespace-new-lines-rule \\"}\\" + rule0 ::= \\"{\\" whitespace-new-lines-rule \\"\\\\\\"message\\\\\\"\\" \\":\\" [ ]? string-rule whitespace-new-lines-rule \\"}\\" rule1 ::= ( rule0 | string-rule ) rule2 ::= ( rule1 ) ( \\",\\" whitespace-new-lines-rule rule1 )* rule3 ::= ( rule1 )?" @@ -297,12 +297,12 @@ describe("grammar for JSON schema", () => { }; expect(grammar.grammar).toMatchInlineSnapshot(` - "root ::= \\"{\\" whitespace-new-lines-rule \\"onlyPositiveText\\" \\":\\" [ ]? \\"true\\" \\",\\" whitespace-new-lines-rule \\"onlyNegativeText\\" \\":\\" [ ]? \\"false\\" \\",\\" whitespace-new-lines-rule \\"onlyVibe\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"onlyNumber\\" \\":\\" [ ]? \\"10\\" \\",\\" whitespace-new-lines-rule \\"worstThing\\" \\":\\" [ ]? null-rule \\",\\" whitespace-new-lines-rule \\"withNewLine\\" \\":\\" [ ]? rule1 \\",\\" whitespace-new-lines-rule \\"withQuotes\\" \\":\\" [ ]? rule2 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]* + "root ::= \\"{\\" whitespace-new-lines-rule \\"\\\\\\"onlyPositiveText\\\\\\"\\" \\":\\" [ ]? \\"true\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyNegativeText\\\\\\"\\" \\":\\" [ ]? \\"false\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyVibe\\\\\\"\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyNumber\\\\\\"\\" \\":\\" [ ]? \\"10\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"worstThing\\\\\\"\\" \\":\\" [ ]? null-rule \\",\\" whitespace-new-lines-rule \\"\\\\\\"withNewLine\\\\\\"\\" \\":\\" [ ]? rule1 \\",\\" whitespace-new-lines-rule \\"\\\\\\"withQuotes\\\\\\"\\" \\":\\" [ ]? rule2 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]* whitespace-new-lines-rule ::= [\\\\n]? [ \\\\t]* [\\\\n]? - rule0 ::= \\"good\\" + rule0 ::= \\"\\\\\\"good\\\\\\"\\" null-rule ::= \\"null\\" - rule1 ::= \\"Hooray!\\\\nYes!\\\\t/\\\\\\\\\\" - rule2 ::= \\"The message is \\\\\\\\\\\\\\"Hi!\\\\\\\\\\\\\\".\\"" + rule1 ::= \\"\\\\\\"Hooray!\\\\nYes!\\\\t/\\\\\\\\\\\\\\"\\" + rule2 ::= \\"\\\\\\"The message is \\\\\\\\\\\\\\"Hi!\\\\\\\\\\\\\\".\\\\\\"\\"" `); const parsedValue = grammar.parse(JSON.stringify(exampleValidValue)); @@ -374,12 +374,12 @@ describe("grammar for JSON schema", () => { }; expect(grammar.grammar).toMatchInlineSnapshot(` - "root ::= \\"{\\" whitespace-new-lines-rule \\"onlyPositiveText\\" \\":\\" [ ]? \\"true\\" \\",\\" whitespace-new-lines-rule \\"onlyNegativeText\\" \\":\\" [ ]? \\"false\\" \\",\\" whitespace-new-lines-rule \\"onlyVibe\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"onlyNumber\\" \\":\\" [ ]? \\"10\\" \\",\\" whitespace-new-lines-rule \\"worstThing\\" \\":\\" [ ]? null-rule \\",\\" whitespace-new-lines-rule \\"withNewLine\\" \\":\\" [ ]? rule1 \\",\\" whitespace-new-lines-rule \\"withQuotes\\" \\":\\" [ ]? rule2 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]* + "root ::= \\"{\\" whitespace-new-lines-rule \\"\\\\\\"onlyPositiveText\\\\\\"\\" \\":\\" [ ]? \\"true\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyNegativeText\\\\\\"\\" \\":\\" [ ]? \\"false\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyVibe\\\\\\"\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyNumber\\\\\\"\\" \\":\\" [ ]? \\"10\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"worstThing\\\\\\"\\" \\":\\" [ ]? null-rule \\",\\" whitespace-new-lines-rule \\"\\\\\\"withNewLine\\\\\\"\\" \\":\\" [ ]? rule1 \\",\\" whitespace-new-lines-rule \\"\\\\\\"withQuotes\\\\\\"\\" \\":\\" [ ]? rule2 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]* whitespace-new-lines-rule ::= [\\\\n]? [ \\\\t]* [\\\\n]? - rule0 ::= \\"good\\" + rule0 ::= \\"\\\\\\"good\\\\\\"\\" null-rule ::= \\"null\\" - rule1 ::= \\"Hooray!\\\\nYes!\\\\t/\\\\\\\\\\" - rule2 ::= \\"The message is \\\\\\\\\\\\\\"Hi!\\\\\\\\\\\\\\".\\"" + rule1 ::= \\"\\\\\\"Hooray!\\\\nYes!\\\\t/\\\\\\\\\\\\\\"\\" + rule2 ::= \\"\\\\\\"The message is \\\\\\\\\\\\\\"Hi!\\\\\\\\\\\\\\".\\\\\\"\\"" `); const parsedValue = grammar.parse(JSON.stringify(exampleValidValue)); @@ -452,12 +452,12 @@ describe("grammar for JSON schema", () => { }; expect(grammar.grammar).toMatchInlineSnapshot(` - "root ::= \\"{\\" whitespace-new-lines-rule \\"onlyPositiveText\\" \\":\\" [ ]? \\"true\\" \\",\\" whitespace-new-lines-rule \\"onlyNegativeText\\" \\":\\" [ ]? \\"false\\" \\",\\" whitespace-new-lines-rule \\"onlyVibe\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"onlyNumber\\" \\":\\" [ ]? \\"10\\" \\",\\" whitespace-new-lines-rule \\"worstThing\\" \\":\\" [ ]? null-rule \\",\\" whitespace-new-lines-rule \\"withNewLine\\" \\":\\" [ ]? rule1 \\",\\" whitespace-new-lines-rule \\"withQuotes\\" \\":\\" [ ]? rule2 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]* + "root ::= \\"{\\" whitespace-new-lines-rule \\"\\\\\\"onlyPositiveText\\\\\\"\\" \\":\\" [ ]? \\"true\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyNegativeText\\\\\\"\\" \\":\\" [ ]? \\"false\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyVibe\\\\\\"\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyNumber\\\\\\"\\" \\":\\" [ ]? \\"10\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"worstThing\\\\\\"\\" \\":\\" [ ]? null-rule \\",\\" whitespace-new-lines-rule \\"\\\\\\"withNewLine\\\\\\"\\" \\":\\" [ ]? rule1 \\",\\" whitespace-new-lines-rule \\"\\\\\\"withQuotes\\\\\\"\\" \\":\\" [ ]? rule2 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]* whitespace-new-lines-rule ::= [\\\\n]? [ \\\\t]* [\\\\n]? - rule0 ::= \\"good\\" + rule0 ::= \\"\\\\\\"good\\\\\\"\\" null-rule ::= \\"null\\" - rule1 ::= \\"Hooray!\\\\nYes!\\\\t/\\\\\\\\\\" - rule2 ::= \\"The message is \\\\\\\\\\\\\\"Hi!\\\\\\\\\\\\\\".\\"" + rule1 ::= \\"\\\\\\"Hooray!\\\\nYes!\\\\t/\\\\\\\\\\\\\\"\\" + rule2 ::= \\"\\\\\\"The message is \\\\\\\\\\\\\\"Hi!\\\\\\\\\\\\\\".\\\\\\"\\"" `); const parsedValue = grammar.parse(JSON.stringify(exampleValidValue)); diff --git a/vitest.config.ts b/vitest.config.ts new file mode 100644 index 00000000..7925dbdf --- /dev/null +++ b/vitest.config.ts @@ -0,0 +1,7 @@ +import {defineConfig} from "vitest/config"; + +export default defineConfig({ + test: { + threads: false + } +});