From ff1644d4c41c7315b1105f76552859d85767159e Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@users.noreply.github.com>
Date: Thu, 26 Oct 2023 01:45:51 +0300
Subject: [PATCH] fix: bugs (#80)

* feat: adapt to the latest `llama.cpp` interface
* feat: print helpful information to help resolve a clone issue when it happens
* feat: print helpful information to help resolve build issues related to CUDA
* feat: make portable cmake on Windows more stable
* feat: update `CMakeLists.txt` to match `llama.cpp` better
* fix: do not download redundant node headers
* fix: improve cmake custom options handling
* fix: do not set `CMAKE_GENERATOR_TOOLSET` for CUDA
* fix: do not fetch information from GitHub when using a local git bundle
* fix: GBNF JSON schema string const formatting
* docs: document a solution to a compilation problem on macOS
* docs: document more CUDA build error solutions
* docs: explain about ES modules in the getting started guide
* chore: update `.commitlintrc.json`
* chore: remove redundant dependency
---
 .commitlintrc.json                            |  5 +-
 .github/workflows/build.yml                   |  3 +
 docs/guide/CUDA.md                            | 33 +++++++--
 docs/guide/building-from-source.md            |  2 +
 docs/guide/cli/build.md                       |  4 +
 docs/guide/cli/download.md                    |  4 +
 docs/guide/index.md                           |  5 ++
 llama/CMakeLists.txt                          |  4 +-
 llama/addon.cpp                               | 29 +++-----
 llama/package.json                            |  5 ++
 package.json                                  |  1 -
 src/cli/commands/DownloadCommand.ts           | 74 +++++++++++--------
 src/config.ts                                 |  5 ++
 src/utils/cloneLlamaCppRepo.ts                | 33 +++++++--
 src/utils/cmake.ts                            |  6 ++
 src/utils/compileLLamaCpp.ts                  | 56 ++++++++------
 .../gbnfJson/terminals/GbnfStringValue.ts     |  6 +-
 .../llamaEvaluator/LlamaGrammar.test.ts       | 32 ++++----
 vitest.config.ts                              |  7 ++
 19 files changed, 207 insertions(+), 107 deletions(-)
 create mode 100644 llama/package.json
 create mode 100644 vitest.config.ts

diff --git a/.commitlintrc.json b/.commitlintrc.json
index f4fbb7dd..6bb21b39 100644
--- a/.commitlintrc.json
+++ b/.commitlintrc.json
@@ -1,3 +1,6 @@
 {
-    "extends": ["@commitlint/config-conventional"]
+    "extends": ["@commitlint/config-conventional"],
+    "rules": {
+        "subject-case": [0, "never"]
+    }
 }
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 1036e533..750e7166 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -231,6 +231,9 @@ jobs:
       - name: Install modules
         run: npm ci
 
+      - name: Build binary
+        run: node ./dist/cli/cli.js build
+
       - name: Run standalone tests
         run: npm run test:standalone
 
diff --git a/docs/guide/CUDA.md b/docs/guide/CUDA.md
index 48589952..7cba62f8 100644
--- a/docs/guide/CUDA.md
+++ b/docs/guide/CUDA.md
@@ -23,18 +23,39 @@ To build `node-llama-cpp` with any of these options, set an environment variable
 ### Fix the `Failed to detect a default CUDA architecture` build error
 To fix this issue you have to set the `CUDACXX` environment variable to the path of the `nvcc` compiler.
 
-For example, if you installed CUDA Toolkit 12.2 on Windows, you have to run the following command:
-```bash
-set CUDACXX=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\bin\nvcc.exe
+For example, if you have installed CUDA Toolkit 12.2, you have to run a command like this:
+::: code-group
+```bash [Linux]
+export CUDACXX=/usr/local/cuda-12.2/bin/nvcc
 ```
 
-On Linux, it would be something like this:
-```bash
-export CUDACXX=/usr/local/cuda-12.2/bin/nvcc
+```bash [Windows]
+set CUDACXX=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\bin\nvcc.exe
 ```
+:::
 
 Then run the build command again to check whether setting the `CUDACXX` environment variable fixed the issue.
 
+### Fix the `The CUDA compiler identification is unknown` build error
+The solution to this error is the same as [the solution to the `Failed to detect a default CUDA architecture` error](#fix-the-failed-to-detect-a-default-cuda-architecture-build-error).
+
+### Fix the `A single input file is required for a non-link phase when an outputfile is specified` build error
+To fix this issue you have to set the `CMAKE_GENERATOR_TOOLSET` cmake option to the CUDA home directory, usually already set as the `CUDA_PATH` environment variable.
+
+To do this, set the `NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_GENERATOR_TOOLSET` environment variable to the path of your CUDA home directory:
+
+::: code-group
+```bash [Linux]
+export NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_GENERATOR_TOOLSET=$CUDA_PATH
+```
+
+```bash [Windows]
+set NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_GENERATOR_TOOLSET=%CUDA_PATH%
+```
+:::
+
+Then run the build command again to check whether setting the `CMAKE_GENERATOR_TOOLSET` cmake option fixed the issue.
+
 ## Using `node-llama-cpp` with CUDA
 After you build `node-llama-cpp` with CUDA support, you can use it normally.
 
diff --git a/docs/guide/building-from-source.md b/docs/guide/building-from-source.md
index 81817d57..2f62f92e 100644
--- a/docs/guide/building-from-source.md
+++ b/docs/guide/building-from-source.md
@@ -27,6 +27,8 @@ If `cmake` is not installed on your machine, `node-llama-cpp` will automatically
 
 If the build fails, make sure you have the required dependencies of `cmake` installed on your machine. More info is available [here](https://github.com/cmake-js/cmake-js#:~:text=projectRoot/build%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%5Bstring%5D-,Requirements%3A,-CMake) (you don't have to install `cmake` or `cmake-js`, just the dependencies).
 
+If the build fails on macOS with the error `"/usr/bin/cc" is not able to compile a simple test program`, try running `xcode-select --install` to install the Xcode command line tools.
+
 :::
 
 ## `download` and `build` commands
diff --git a/docs/guide/cli/build.md b/docs/guide/cli/build.md
index f31433bf..e64276a0 100644
--- a/docs/guide/cli/build.md
+++ b/docs/guide/cli/build.md
@@ -10,6 +10,10 @@ const commandDoc = docs.build;
 
 {{commandDoc.description}}
 
+::: info
+If the build fails on macOS with the error `"/usr/bin/cc" is not able to compile a simple test program`, try running `xcode-select --install` to install the Xcode command line tools.
+:::
+
 ## Usage
 ```shell-vue
 {{commandDoc.usage}}
diff --git a/docs/guide/cli/download.md b/docs/guide/cli/download.md
index 1f71afb2..2a37316e 100644
--- a/docs/guide/cli/download.md
+++ b/docs/guide/cli/download.md
@@ -20,6 +20,10 @@ This is useful for building from source on machines that aren't connected to the
 
 :::
 
+::: info
+If the build fails on macOS with the error `"/usr/bin/cc" is not able to compile a simple test program`, try running `xcode-select --install` to install the Xcode command line tools.
+:::
+
 ## Usage
 ```shell-vue
 {{commandDoc.usage}}
diff --git a/docs/guide/index.md b/docs/guide/index.md
index 8cba284a..13e9151e 100644
--- a/docs/guide/index.md
+++ b/docs/guide/index.md
@@ -14,6 +14,11 @@ npm install --save node-llama-cpp
 > If binaries are not available for your platform, it'll fallback to download a release of `llama.cpp` and build it from source with `cmake`.
 > To disable this behavior, set the environment variable `NODE_LLAMA_CPP_SKIP_DOWNLOAD` to `true`.
 
+## ESM usage
+`node-llama-cpp` is an [ES module](https://nodejs.org/api/esm.html#modules-ecmascript-modules), so can only use `import` to load it and cannot use `require`.
+
+To make sure you can use it in your project, make sure your `package.json` file has `"type": "module"` in it.
+
 ## CUDA and Metal support
 **Metal:** Metal support is enabled by default on macOS. If you're using a Mac with an Intel chip, [you might want to disable it](./Metal.md).
 
diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
index aec7785d..8991cbbe 100644
--- a/llama/CMakeLists.txt
+++ b/llama/CMakeLists.txt
@@ -1,6 +1,6 @@
-cmake_minimum_required(VERSION 3.12)
+cmake_minimum_required(VERSION 3.13)
 
-project ("llama-addon")
+project("llama-addon" C CXX)
 
 if (MSVC)
   # add_compile_options(/EHsc)
diff --git a/llama/addon.cpp b/llama/addon.cpp
index 6a192799..552d42cc 100644
--- a/llama/addon.cpp
+++ b/llama/addon.cpp
@@ -208,13 +208,13 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
     return Napi::String::New(info.Env(), ss.str());
   }
   Napi::Value TokenBos(const Napi::CallbackInfo& info) {
-    return Napi::Number::From(info.Env(), llama_token_bos(ctx));
+    return Napi::Number::From(info.Env(), llama_token_bos(model->model)); // TODO: move this to the model
   }
   Napi::Value TokenEos(const Napi::CallbackInfo& info) {
-    return Napi::Number::From(info.Env(), llama_token_eos(ctx));
+    return Napi::Number::From(info.Env(), llama_token_eos(model->model)); // TODO: move this to the model
   }
   Napi::Value TokenNl(const Napi::CallbackInfo& info) {
-    return Napi::Number::From(info.Env(), llama_token_nl(ctx));
+    return Napi::Number::From(info.Env(), llama_token_nl(model->model)); // TODO: move this to the model
   }
   Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
     return Napi::Number::From(info.Env(), llama_n_ctx(ctx));
@@ -223,7 +223,7 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
     int token = info[0].As<Napi::Number>().Int32Value();
     std::stringstream ss;
 
-    const char* str = llama_token_get_text(ctx, token);
+    const char* str = llama_token_get_text(model->model, token); // TODO: move this to the model
     if (str == nullptr) {
       return info.Env().Undefined();
     }
@@ -336,18 +336,14 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
 
   protected:
   void Execute() {
-    llama_batch batch = llama_batch_init(tokens.size(), 0);
+    llama_batch batch = llama_batch_init(tokens.size(), 0, 1);
 
-    batch.n_tokens = tokens.size();
-
-    for (int32_t i = 0; i < batch.n_tokens; i++) {
-        batch.token[i]  = tokens[i];
-        batch.pos[i]    = ctx->n_cur;
-        batch.seq_id[i] = 0;
-        batch.logits[i] = false;
+    for (size_t i = 0; i < tokens.size(); i++) {
+        llama_batch_add(batch, tokens[i], ctx->n_cur, { 0 }, false);
 
         ctx->n_cur++;
     }
+    GGML_ASSERT(batch.n_tokens == (int) tokens.size());
 
     batch.logits[batch.n_tokens - 1] = true;
 
@@ -381,14 +377,11 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
 
     llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
 
-    auto eos_token = llama_token_eos(ctx->ctx);
+    auto eos_token = llama_token_eos(ctx->model->model);
 
     if (use_repeat_penalty && !repeat_penalty_tokens.empty()) {
-      llama_sample_repetition_penalty(
-        ctx->ctx, &candidates_p, repeat_penalty_tokens.data(), repeat_penalty_tokens.size(), repeat_penalty
-      );
-      llama_sample_frequency_and_presence_penalties(
-        ctx->ctx, &candidates_p, repeat_penalty_tokens.data(), repeat_penalty_tokens.size(),
+      llama_sample_repetition_penalties(
+        ctx->ctx, &candidates_p, repeat_penalty_tokens.data(), repeat_penalty_tokens.size(), repeat_penalty,
         repeat_penalty_frequency_penalty, repeat_penalty_presence_penalty
       );
     }
diff --git a/llama/package.json b/llama/package.json
new file mode 100644
index 00000000..857c5087
--- /dev/null
+++ b/llama/package.json
@@ -0,0 +1,5 @@
+{
+  "binary": {
+    "napi_versions": [7]
+  }
+}
diff --git a/package.json b/package.json
index 7c920cfe..bd578aaa 100644
--- a/package.json
+++ b/package.json
@@ -100,7 +100,6 @@
     "@commitlint/cli": "^17.7.1",
     "@commitlint/config-conventional": "^17.7.0",
     "@semantic-release/exec": "^6.0.3",
-    "@types/bytes": "^3.1.1",
     "@types/cli-progress": "^3.11.0",
     "@types/cross-spawn": "^6.0.2",
     "@types/fs-extra": "^11.0.1",
diff --git a/src/cli/commands/DownloadCommand.ts b/src/cli/commands/DownloadCommand.ts
index 831feb4f..0c785020 100644
--- a/src/cli/commands/DownloadCommand.ts
+++ b/src/cli/commands/DownloadCommand.ts
@@ -13,7 +13,10 @@ import {setBinariesGithubRelease} from "../../utils/binariesGithubRelease.js";
 import {downloadCmakeIfNeeded} from "../../utils/cmake.js";
 import withStatusLogs from "../../utils/withStatusLogs.js";
 import {getIsInDocumentationMode} from "../../state.js";
-import {unshallowAndSquashCurrentRepoAndSaveItAsReleaseBundle} from "../../utils/gitReleaseBundles.js";
+import {
+    getGitBundlePathForRelease,
+    unshallowAndSquashCurrentRepoAndSaveItAsReleaseBundle
+} from "../../utils/gitReleaseBundles.js";
 import {cloneLlamaCppRepo} from "../../utils/cloneLlamaCppRepo.js";
 
 type DownloadCommandArgs = {
@@ -91,6 +94,7 @@ export const DownloadCommand: CommandModule<object, DownloadCommandArgs> = {
 export async function DownloadLlamaCppCommand({
     repo, release, arch, nodeTarget, metal, cuda, skipBuild, noBundle, updateBinariesReleaseMetadataAndSaveGitBundle
 }: DownloadCommandArgs) {
+    const useBundle = noBundle != true;
     const octokit = new Octokit();
     const [githubOwner, githubRepo] = repo.split("/");
 
@@ -110,37 +114,45 @@ export async function DownloadLlamaCppCommand({
     type GithubReleaseType = Awaited<ReturnType<typeof octokit.rest.repos.getLatestRelease>> |
         Awaited<ReturnType<typeof octokit.rest.repos.getReleaseByTag>>;
 
-    let githubRelease: GithubReleaseType | null = null;
-    await withOra({
-        loading: chalk.blue("Fetching llama.cpp info"),
-        success: chalk.blue("Fetched llama.cpp info"),
-        fail: chalk.blue("Failed to fetch llama.cpp info")
-    }, async () => {
-        try {
-            if (release === "latest") {
-                githubRelease = await octokit.rest.repos.getLatestRelease({
-                    owner: githubOwner,
-                    repo: githubRepo
-                });
-            } else {
-                githubRelease = await octokit.rest.repos.getReleaseByTag({
-                    owner: githubOwner,
-                    repo: githubRepo,
-                    tag: release
-                });
+    let githubReleaseTag: string | null = (useBundle && (await getGitBundlePathForRelease(githubOwner, githubRepo, release)) != null)
+        ? release
+        : null;
+
+    if (githubReleaseTag == null)
+        await withOra({
+            loading: chalk.blue("Fetching llama.cpp info"),
+            success: chalk.blue("Fetched llama.cpp info"),
+            fail: chalk.blue("Failed to fetch llama.cpp info")
+        }, async () => {
+            let githubRelease: GithubReleaseType | null = null;
+
+            try {
+                if (release === "latest") {
+                    githubRelease = await octokit.rest.repos.getLatestRelease({
+                        owner: githubOwner,
+                        repo: githubRepo
+                    });
+                } else {
+                    githubRelease = await octokit.rest.repos.getReleaseByTag({
+                        owner: githubOwner,
+                        repo: githubRepo,
+                        tag: release
+                    });
+                }
+            } catch (err) {
+                console.error("Failed to fetch llama.cpp release info", err);
             }
-        } catch (err) {
-            console.error("Failed to fetch llama.cpp release info", err);
-        }
 
-        if (githubRelease == null) {
-            throw new Error(`Failed to find release "${release}" of "${repo}"`);
-        }
+            if (githubRelease == null) {
+                throw new Error(`Failed to find release "${release}" of "${repo}"`);
+            }
 
-        if (githubRelease!.data.tag_name == null) {
-            throw new Error(`Failed to find tag of release "${release}" of "${repo}"`);
-        }
-    });
+            if (githubRelease.data.tag_name == null) {
+                throw new Error(`Failed to find tag of release "${release}" of "${repo}"`);
+            }
+
+            githubReleaseTag = githubRelease.data.tag_name;
+        });
 
     await clearTempFolder();
 
@@ -153,7 +165,7 @@ export async function DownloadLlamaCppCommand({
     });
 
     console.log(chalk.blue("Cloning llama.cpp"));
-    await cloneLlamaCppRepo(githubOwner, githubRepo, githubRelease!.data.tag_name, noBundle != true);
+    await cloneLlamaCppRepo(githubOwner, githubRepo, githubReleaseTag!, useBundle);
 
     if (!skipBuild) {
         await downloadCmakeIfNeeded(true);
@@ -174,7 +186,7 @@ export async function DownloadLlamaCppCommand({
     }
 
     if (isCI && updateBinariesReleaseMetadataAndSaveGitBundle) {
-        await setBinariesGithubRelease(githubRelease!.data.tag_name);
+        await setBinariesGithubRelease(githubReleaseTag!);
         await unshallowAndSquashCurrentRepoAndSaveItAsReleaseBundle();
     }
 
diff --git a/src/config.ts b/src/config.ts
index 4710a193..135db6bc 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -57,3 +57,8 @@ export const defaultChatSystemPrompt = "You are a helpful, respectful and honest
     "If you don't know the answer to a question, please don't share false information.";
 export const cliBinName = "node-llama-cpp";
 export const npxRunPrefix = "npx --no ";
+
+const documentationUrl = "https://withcatai.github.io/node-llama-cpp";
+export const documentationPageUrls = {
+    CUDA: documentationUrl + "/guide/CUDA"
+} as const;
diff --git a/src/utils/cloneLlamaCppRepo.ts b/src/utils/cloneLlamaCppRepo.ts
index 5ae0b54e..d881ee44 100644
--- a/src/utils/cloneLlamaCppRepo.ts
+++ b/src/utils/cloneLlamaCppRepo.ts
@@ -55,14 +55,35 @@ export async function cloneLlamaCppRepo(githubOwner: string, githubRepo: string,
         } catch (err) {
             await fs.remove(llamaCppDirectory);
             console.error("Failed to clone git bundle, cloning from GitHub instead", err);
+
+            printCloneErrorHelp(String(err));
         }
     }
 
-    await withGitCloneProgress("GitHub", async (gitWithCloneProgress) => {
-        await gitWithCloneProgress.clone(remoteGitUrl, llamaCppDirectory, {
-            "--depth": 1,
-            "--branch": tag,
-            "--quiet": null
+    try {
+        await withGitCloneProgress("GitHub", async (gitWithCloneProgress) => {
+            await gitWithCloneProgress.clone(remoteGitUrl, llamaCppDirectory, {
+                "--depth": 1,
+                "--branch": tag,
+                "--quiet": null
+            });
         });
-    });
+    } catch (err) {
+        printCloneErrorHelp(String(err));
+
+        throw err;
+    }
+}
+
+function printCloneErrorHelp(error: string) {
+    // This error happens with some docker images where the current user is different
+    // from the owner of the files due to mounting a volume.
+    // In such cases, print a helpful message to help the user resolve the issue.
+    if (error.toLowerCase().includes("detected dubious ownership in repository"))
+        console.info("\n" +
+            chalk.grey("[node-llama-cpp]") + chalk.yellow(" To fix this issue, try running this command to fix it for the current module directory:") + "\n" +
+            'git config --global --add safe.directory "' + llamaCppDirectory + '"\n\n' +
+            chalk.yellow("Or run this command to fix it everywhere:") + "\n" +
+            'git config --global --add safe.directory "*"'
+        );
 }
diff --git a/src/utils/cmake.ts b/src/utils/cmake.ts
index 92875e2d..08cfff51 100644
--- a/src/utils/cmake.ts
+++ b/src/utils/cmake.ts
@@ -35,6 +35,12 @@ export async function getCmakePath() {
 
         if (resolvedPath.toLowerCase().endsWith(".cmd"))
             resolvedPath = (await getBinFromWindowCmd(resolvedPath, "cmake.exe")) ?? "";
+        else if (resolvedPath.toLowerCase().endsWith(".ps1")) {
+            const cmdFilePath = resolvedPath.slice(0, -".ps1".length) + ".cmd";
+
+            if (await fs.pathExists(cmdFilePath))
+                resolvedPath = (await getBinFromWindowCmd(cmdFilePath, "cmake.exe")) ?? "";
+        }
 
         if (resolvedPath !== "")
             return resolvedPath;
diff --git a/src/utils/compileLLamaCpp.ts b/src/utils/compileLLamaCpp.ts
index b186e69f..90e53494 100644
--- a/src/utils/compileLLamaCpp.ts
+++ b/src/utils/compileLLamaCpp.ts
@@ -2,7 +2,10 @@ import path from "path";
 import {fileURLToPath} from "url";
 import process from "process";
 import fs from "fs-extra";
-import {customCmakeOptionsEnvVarPrefix, llamaCppDirectory, llamaDirectory, llamaToolchainsDirectory} from "../config.js";
+import chalk from "chalk";
+import {
+    customCmakeOptionsEnvVarPrefix, documentationPageUrls, llamaCppDirectory, llamaDirectory, llamaToolchainsDirectory
+} from "../config.js";
 import {clearLlamaBuild} from "./clearLlamaBuild.js";
 import {setUsedBinFlag} from "./usedBinFlag.js";
 import {spawnCommand} from "./spawnCommand.js";
@@ -24,35 +27,33 @@ export async function compileLlamaCpp({
         const cmakePathArgs = await getCmakePathArgs();
         const toolchainFile = await getToolchainFileForArch(arch);
         const runtimeVersion = nodeTarget.startsWith("v") ? nodeTarget.slice("v".length) : nodeTarget;
-        const cmakeCustomOptions = [];
-
-        if ((metal && process.platform === "darwin") || process.env.LLAMA_METAL === "1") cmakeCustomOptions.push("LLAMA_METAL=1");
-        else cmakeCustomOptions.push("LLAMA_METAL=OFF");
-
-        if (cuda || process.env.LLAMA_CUBLAS === "1") cmakeCustomOptions.push("LLAMA_CUBLAS=1");
-        if (cuda && process.env.CUDA_PATH != null && await fs.pathExists(process.env.CUDA_PATH))
-            cmakeCustomOptions.push("CMAKE_GENERATOR_TOOLSET=" + process.env.CUDA_PATH);
-
-        if (process.env.LLAMA_MPI === "1") cmakeCustomOptions.push("LLAMA_MPI=1");
-        if (process.env.LLAMA_OPENBLAS === "1") cmakeCustomOptions.push("LLAMA_OPENBLAS=1");
-        if (process.env.LLAMA_BLAS_VENDOR != null) cmakeCustomOptions.push("LLAMA_BLAS_VENDOR=" + process.env.LLAMA_BLAS_VENDOR);
-        if (process.env.LLAMA_CUDA_FORCE_DMMV != null) cmakeCustomOptions.push("LLAMA_CUDA_FORCE_DMMV=" + process.env.LLAMA_CUDA_FORCE_DMMV);
-        if (process.env.LLAMA_CUDA_DMMV_X != null) cmakeCustomOptions.push("LLAMA_CUDA_DMMV_X=" + process.env.LLAMA_CUDA_DMMV_X);
-        if (process.env.LLAMA_CUDA_MMV_Y != null) cmakeCustomOptions.push("LLAMA_CUDA_MMV_Y=" + process.env.LLAMA_CUDA_MMV_Y);
-        if (process.env.LLAMA_CUDA_F16 != null) cmakeCustomOptions.push("LLAMA_CUDA_F16=" + process.env.LLAMA_CUDA_F16);
-        if (process.env.LLAMA_CUDA_KQUANTS_ITER != null) cmakeCustomOptions.push("LLAMA_CUDA_KQUANTS_ITER=" + process.env.LLAMA_CUDA_KQUANTS_ITER);
-        if (process.env.LLAMA_CUDA_PEER_MAX_BATCH_SIZE != null) cmakeCustomOptions.push("LLAMA_CUDA_PEER_MAX_BATCH_SIZE=" + process.env.LLAMA_CUDA_PEER_MAX_BATCH_SIZE);
-        if (process.env.LLAMA_HIPBLAS === "1") cmakeCustomOptions.push("LLAMA_HIPBLAS=1");
-        if (process.env.LLAMA_CLBLAST === "1") cmakeCustomOptions.push("LLAMA_CLBLAST=1");
+        const cmakeCustomOptions = new Map<string, string>();
+
+        if ((metal && process.platform === "darwin") || process.env.LLAMA_METAL === "1") cmakeCustomOptions.set("LLAMA_METAL", "1");
+        else cmakeCustomOptions.set("LLAMA_METAL", "OFF");
+
+        if (cuda || process.env.LLAMA_CUBLAS === "1") cmakeCustomOptions.set("LLAMA_CUBLAS", "1");
+
+        if (process.env.LLAMA_MPI === "1") cmakeCustomOptions.set("LLAMA_MPI", "1");
+        if (process.env.LLAMA_OPENBLAS === "1") cmakeCustomOptions.set("LLAMA_OPENBLAS", "1");
+        if (process.env.LLAMA_BLAS_VENDOR != null) cmakeCustomOptions.set("LLAMA_BLAS_VENDOR", process.env.LLAMA_BLAS_VENDOR);
+        if (process.env.LLAMA_CUDA_FORCE_DMMV != null) cmakeCustomOptions.set("LLAMA_CUDA_FORCE_DMMV", process.env.LLAMA_CUDA_FORCE_DMMV);
+        if (process.env.LLAMA_CUDA_DMMV_X != null) cmakeCustomOptions.set("LLAMA_CUDA_DMMV_X", process.env.LLAMA_CUDA_DMMV_X);
+        if (process.env.LLAMA_CUDA_MMV_Y != null) cmakeCustomOptions.set("LLAMA_CUDA_MMV_Y", process.env.LLAMA_CUDA_MMV_Y);
+        if (process.env.LLAMA_CUDA_F16 != null) cmakeCustomOptions.set("LLAMA_CUDA_F16", process.env.LLAMA_CUDA_F16);
+        if (process.env.LLAMA_CUDA_KQUANTS_ITER != null) cmakeCustomOptions.set("LLAMA_CUDA_KQUANTS_ITER", process.env.LLAMA_CUDA_KQUANTS_ITER);
+        if (process.env.LLAMA_CUDA_PEER_MAX_BATCH_SIZE != null) cmakeCustomOptions.set("LLAMA_CUDA_PEER_MAX_BATCH_SIZE", process.env.LLAMA_CUDA_PEER_MAX_BATCH_SIZE);
+        if (process.env.LLAMA_HIPBLAS === "1") cmakeCustomOptions.set("LLAMA_HIPBLAS", "1");
+        if (process.env.LLAMA_CLBLAST === "1") cmakeCustomOptions.set("LLAMA_CLBLAST", "1");
 
         if (toolchainFile != null)
-            cmakeCustomOptions.push("CMAKE_TOOLCHAIN_FILE=" + toolchainFile);
+            cmakeCustomOptions.set("CMAKE_TOOLCHAIN_FILE", toolchainFile);
 
         for (const key in process.env) {
             if (key.startsWith(customCmakeOptionsEnvVarPrefix)) {
                 const option = key.slice(customCmakeOptionsEnvVarPrefix.length);
                 const value = process.env[key];
-                cmakeCustomOptions.push(`${option}=${value}`);
+                cmakeCustomOptions.set(option, value!);
             }
         }
 
@@ -63,7 +64,7 @@ export async function compileLlamaCpp({
         await spawnCommand(
             "npm",
             ["run", "-s", "cmake-js-llama", "--", "compile", "--log-level", "warn", "--arch=" + arch, "--runtime-version=" + runtimeVersion, ...cmakePathArgs]
-                .concat(cmakeCustomOptions.map(option => "--CD" + option)),
+                .concat([...cmakeCustomOptions].map(([key, value]) => "--CD" + key + "=" + value)),
             __dirname
         );
 
@@ -89,6 +90,13 @@ export async function compileLlamaCpp({
         if (setUsedBinFlagArg)
             await setUsedBinFlag("prebuiltBinaries");
 
+        if (cuda)
+            console.info("\n" +
+                chalk.grey("[node-llama-cpp] ") +
+                chalk.yellow("To resolve errors related to CUDA compilation, see the CUDA guide: ") +
+                documentationPageUrls.CUDA
+            );
+
         throw err;
     } finally {
         await fixXpackPermissions();
diff --git a/src/utils/gbnfJson/terminals/GbnfStringValue.ts b/src/utils/gbnfJson/terminals/GbnfStringValue.ts
index 8c674285..aeca0fdb 100644
--- a/src/utils/gbnfJson/terminals/GbnfStringValue.ts
+++ b/src/utils/gbnfJson/terminals/GbnfStringValue.ts
@@ -11,14 +11,16 @@ export class GbnfStringValue extends GbnfTerminal {
 
     override getGrammar(): string {
         return [
-            "\"",
+            '"',
+            '\\"',
             this.value
                 .replaceAll("\\", "\\\\")
                 .replaceAll("\t", "\\t")
                 .replaceAll("\r", "\\r")
                 .replaceAll("\n", "\\n")
                 .replaceAll('"', "\\\\" + '\\"'),
-            "\""
+            '\\"',
+            '"'
         ].join("");
     }
 }
diff --git a/test/standalone/llamaEvaluator/LlamaGrammar.test.ts b/test/standalone/llamaEvaluator/LlamaGrammar.test.ts
index 8cf9fdbc..dff715bc 100644
--- a/test/standalone/llamaEvaluator/LlamaGrammar.test.ts
+++ b/test/standalone/llamaEvaluator/LlamaGrammar.test.ts
@@ -104,7 +104,7 @@ describe("grammar for JSON schema", () => {
         };
 
         expect(grammar.grammar).toMatchInlineSnapshot(`
-          "root ::= \\"{\\" whitespace-new-lines-rule \\"message\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"numberOfWordsInMessage\\" \\":\\" [ ]? integer-number-rule \\",\\" whitespace-new-lines-rule \\"feelingGoodPercentage\\" \\":\\" [ ]? fractional-number-rule \\",\\" whitespace-new-lines-rule \\"feelingGood\\" \\":\\" [ ]? boolean-rule \\",\\" whitespace-new-lines-rule \\"feelingOverall\\" \\":\\" [ ]? rule5 \\",\\" whitespace-new-lines-rule \\"verbsInMessage\\" \\":\\" [ ]? rule6 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]*
+          "root ::= \\"{\\" whitespace-new-lines-rule \\"\\\\\\"message\\\\\\"\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"\\\\\\"numberOfWordsInMessage\\\\\\"\\" \\":\\" [ ]? integer-number-rule \\",\\" whitespace-new-lines-rule \\"\\\\\\"feelingGoodPercentage\\\\\\"\\" \\":\\" [ ]? fractional-number-rule \\",\\" whitespace-new-lines-rule \\"\\\\\\"feelingGood\\\\\\"\\" \\":\\" [ ]? boolean-rule \\",\\" whitespace-new-lines-rule \\"\\\\\\"feelingOverall\\\\\\"\\" \\":\\" [ ]? rule5 \\",\\" whitespace-new-lines-rule \\"\\\\\\"verbsInMessage\\\\\\"\\" \\":\\" [ ]? rule6 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]*
           whitespace-new-lines-rule ::= [\\\\n]? [ \\\\t]* [\\\\n]?
           string-rule ::= \\"\\\\\\"\\" ( [^\\"\\\\\\\\] | \\"\\\\\\\\\\" ([\\"\\\\\\\\/bfnrt] | \\"u\\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* \\"\\\\\\"\\"
           null-rule ::= \\"null\\"
@@ -114,8 +114,8 @@ describe("grammar for JSON schema", () => {
           rule1 ::= \\"true\\"
           rule2 ::= \\"false\\"
           boolean-rule ::= ( rule1 | rule2 )
-          rule3 ::= \\"good\\"
-          rule4 ::= \\"bad\\"
+          rule3 ::= \\"\\\\\\"good\\\\\\"\\"
+          rule4 ::= \\"\\\\\\"bad\\\\\\"\\"
           rule5 ::= ( rule3 | rule4 )
           rule7 ::= ( string-rule ) ( \\",\\" whitespace-new-lines-rule string-rule )*
           rule8 ::= ( string-rule )?
@@ -210,7 +210,7 @@ describe("grammar for JSON schema", () => {
           "root ::= \\"[\\" whitespace-new-lines-rule ( rule2 | rule3 ) whitespace-new-lines-rule \\"]\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]*
           whitespace-new-lines-rule ::= [\\\\n]? [ \\\\t]* [\\\\n]?
           string-rule ::= \\"\\\\\\"\\" ( [^\\"\\\\\\\\] | \\"\\\\\\\\\\" ([\\"\\\\\\\\/bfnrt] | \\"u\\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* \\"\\\\\\"\\"
-          rule0 ::= \\"{\\" whitespace-new-lines-rule \\"message\\" \\":\\" [ ]? string-rule whitespace-new-lines-rule \\"}\\"
+          rule0 ::= \\"{\\" whitespace-new-lines-rule \\"\\\\\\"message\\\\\\"\\" \\":\\" [ ]? string-rule whitespace-new-lines-rule \\"}\\"
           rule1 ::= ( rule0 | string-rule )
           rule2 ::= ( rule1 ) ( \\",\\" whitespace-new-lines-rule rule1 )*
           rule3 ::= ( rule1 )?"
@@ -297,12 +297,12 @@ describe("grammar for JSON schema", () => {
         };
 
         expect(grammar.grammar).toMatchInlineSnapshot(`
-          "root ::= \\"{\\" whitespace-new-lines-rule \\"onlyPositiveText\\" \\":\\" [ ]? \\"true\\" \\",\\" whitespace-new-lines-rule \\"onlyNegativeText\\" \\":\\" [ ]? \\"false\\" \\",\\" whitespace-new-lines-rule \\"onlyVibe\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"onlyNumber\\" \\":\\" [ ]? \\"10\\" \\",\\" whitespace-new-lines-rule \\"worstThing\\" \\":\\" [ ]? null-rule \\",\\" whitespace-new-lines-rule \\"withNewLine\\" \\":\\" [ ]? rule1 \\",\\" whitespace-new-lines-rule \\"withQuotes\\" \\":\\" [ ]? rule2 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]*
+          "root ::= \\"{\\" whitespace-new-lines-rule \\"\\\\\\"onlyPositiveText\\\\\\"\\" \\":\\" [ ]? \\"true\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyNegativeText\\\\\\"\\" \\":\\" [ ]? \\"false\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyVibe\\\\\\"\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyNumber\\\\\\"\\" \\":\\" [ ]? \\"10\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"worstThing\\\\\\"\\" \\":\\" [ ]? null-rule \\",\\" whitespace-new-lines-rule \\"\\\\\\"withNewLine\\\\\\"\\" \\":\\" [ ]? rule1 \\",\\" whitespace-new-lines-rule \\"\\\\\\"withQuotes\\\\\\"\\" \\":\\" [ ]? rule2 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]*
           whitespace-new-lines-rule ::= [\\\\n]? [ \\\\t]* [\\\\n]?
-          rule0 ::= \\"good\\"
+          rule0 ::= \\"\\\\\\"good\\\\\\"\\"
           null-rule ::= \\"null\\"
-          rule1 ::= \\"Hooray!\\\\nYes!\\\\t/\\\\\\\\\\"
-          rule2 ::= \\"The message is \\\\\\\\\\\\\\"Hi!\\\\\\\\\\\\\\".\\""
+          rule1 ::= \\"\\\\\\"Hooray!\\\\nYes!\\\\t/\\\\\\\\\\\\\\"\\"
+          rule2 ::= \\"\\\\\\"The message is \\\\\\\\\\\\\\"Hi!\\\\\\\\\\\\\\".\\\\\\"\\""
         `);
 
         const parsedValue = grammar.parse(JSON.stringify(exampleValidValue));
@@ -374,12 +374,12 @@ describe("grammar for JSON schema", () => {
         };
 
         expect(grammar.grammar).toMatchInlineSnapshot(`
-          "root ::= \\"{\\" whitespace-new-lines-rule \\"onlyPositiveText\\" \\":\\" [ ]? \\"true\\" \\",\\" whitespace-new-lines-rule \\"onlyNegativeText\\" \\":\\" [ ]? \\"false\\" \\",\\" whitespace-new-lines-rule \\"onlyVibe\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"onlyNumber\\" \\":\\" [ ]? \\"10\\" \\",\\" whitespace-new-lines-rule \\"worstThing\\" \\":\\" [ ]? null-rule \\",\\" whitespace-new-lines-rule \\"withNewLine\\" \\":\\" [ ]? rule1 \\",\\" whitespace-new-lines-rule \\"withQuotes\\" \\":\\" [ ]? rule2 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]*
+          "root ::= \\"{\\" whitespace-new-lines-rule \\"\\\\\\"onlyPositiveText\\\\\\"\\" \\":\\" [ ]? \\"true\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyNegativeText\\\\\\"\\" \\":\\" [ ]? \\"false\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyVibe\\\\\\"\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyNumber\\\\\\"\\" \\":\\" [ ]? \\"10\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"worstThing\\\\\\"\\" \\":\\" [ ]? null-rule \\",\\" whitespace-new-lines-rule \\"\\\\\\"withNewLine\\\\\\"\\" \\":\\" [ ]? rule1 \\",\\" whitespace-new-lines-rule \\"\\\\\\"withQuotes\\\\\\"\\" \\":\\" [ ]? rule2 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]*
           whitespace-new-lines-rule ::= [\\\\n]? [ \\\\t]* [\\\\n]?
-          rule0 ::= \\"good\\"
+          rule0 ::= \\"\\\\\\"good\\\\\\"\\"
           null-rule ::= \\"null\\"
-          rule1 ::= \\"Hooray!\\\\nYes!\\\\t/\\\\\\\\\\"
-          rule2 ::= \\"The message is \\\\\\\\\\\\\\"Hi!\\\\\\\\\\\\\\".\\""
+          rule1 ::= \\"\\\\\\"Hooray!\\\\nYes!\\\\t/\\\\\\\\\\\\\\"\\"
+          rule2 ::= \\"\\\\\\"The message is \\\\\\\\\\\\\\"Hi!\\\\\\\\\\\\\\".\\\\\\"\\""
         `);
 
         const parsedValue = grammar.parse(JSON.stringify(exampleValidValue));
@@ -452,12 +452,12 @@ describe("grammar for JSON schema", () => {
         };
 
         expect(grammar.grammar).toMatchInlineSnapshot(`
-          "root ::= \\"{\\" whitespace-new-lines-rule \\"onlyPositiveText\\" \\":\\" [ ]? \\"true\\" \\",\\" whitespace-new-lines-rule \\"onlyNegativeText\\" \\":\\" [ ]? \\"false\\" \\",\\" whitespace-new-lines-rule \\"onlyVibe\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"onlyNumber\\" \\":\\" [ ]? \\"10\\" \\",\\" whitespace-new-lines-rule \\"worstThing\\" \\":\\" [ ]? null-rule \\",\\" whitespace-new-lines-rule \\"withNewLine\\" \\":\\" [ ]? rule1 \\",\\" whitespace-new-lines-rule \\"withQuotes\\" \\":\\" [ ]? rule2 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]*
+          "root ::= \\"{\\" whitespace-new-lines-rule \\"\\\\\\"onlyPositiveText\\\\\\"\\" \\":\\" [ ]? \\"true\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyNegativeText\\\\\\"\\" \\":\\" [ ]? \\"false\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyVibe\\\\\\"\\" \\":\\" [ ]? rule0 \\",\\" whitespace-new-lines-rule \\"\\\\\\"onlyNumber\\\\\\"\\" \\":\\" [ ]? \\"10\\" \\",\\" whitespace-new-lines-rule \\"\\\\\\"worstThing\\\\\\"\\" \\":\\" [ ]? null-rule \\",\\" whitespace-new-lines-rule \\"\\\\\\"withNewLine\\\\\\"\\" \\":\\" [ ]? rule1 \\",\\" whitespace-new-lines-rule \\"\\\\\\"withQuotes\\\\\\"\\" \\":\\" [ ]? rule2 whitespace-new-lines-rule \\"}\\" [\\\\n] [\\\\n] [\\\\n] [\\\\n] [\\\\n]*
           whitespace-new-lines-rule ::= [\\\\n]? [ \\\\t]* [\\\\n]?
-          rule0 ::= \\"good\\"
+          rule0 ::= \\"\\\\\\"good\\\\\\"\\"
           null-rule ::= \\"null\\"
-          rule1 ::= \\"Hooray!\\\\nYes!\\\\t/\\\\\\\\\\"
-          rule2 ::= \\"The message is \\\\\\\\\\\\\\"Hi!\\\\\\\\\\\\\\".\\""
+          rule1 ::= \\"\\\\\\"Hooray!\\\\nYes!\\\\t/\\\\\\\\\\\\\\"\\"
+          rule2 ::= \\"\\\\\\"The message is \\\\\\\\\\\\\\"Hi!\\\\\\\\\\\\\\".\\\\\\"\\""
         `);
 
         const parsedValue = grammar.parse(JSON.stringify(exampleValidValue));
diff --git a/vitest.config.ts b/vitest.config.ts
new file mode 100644
index 00000000..7925dbdf
--- /dev/null
+++ b/vitest.config.ts
@@ -0,0 +1,7 @@
+import {defineConfig} from "vitest/config";
+
+export default defineConfig({
+    test: {
+        threads: false
+    }
+});