withcatai · giladgd · Apr 13, 2024 · Apr 12, 2024 · Apr 12, 2024 · Apr 12, 2024
diff --git a/docs/guide/vulkan.md b/docs/guide/vulkan.md
@@ -17,9 +17,11 @@ You should see an output like this:
 ```ansi
 [33mVulkan:[39m [32mavailable[39m
 
+[33mVulkan device:[39m Apple M1 Max[39m
 [33mVulkan used VRAM:[39m 0% [90m(64KB/21.33GB)[39m
 [33mVulkan free VRAM:[39m 99.99% [90m(21.33GB/21.33GB)[39m
 
+[33mCPU model:[39m Apple M1 Max[39m
 [33mUsed RAM:[39m 97.37% [90m(31.16GB/32GB)[39m
 [33mFree RAM:[39m 2.62% [90m(860.72MB/32GB)[39m
 ```

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -137,9 +137,9 @@
     "semantic-release": "^22.0.8",
     "tslib": "^2.6.1",
     "typedoc": "^0.25.3",
-    "typedoc-plugin-markdown": "^4.0.0-next.53",
-    "typedoc-plugin-mdn-links": "^3.1.5",
-    "typedoc-vitepress-theme": "^1.0.0-next.9",
+    "typedoc-plugin-markdown": "^4.0.0-next.55",
+    "typedoc-plugin-mdn-links": "^3.1.19",
+    "typedoc-vitepress-theme": "^1.0.0-next.10",
     "typescript": "^5.2.2",
     "vite-node": "^1.4.0",
     "vitepress": "1.0.0-rc.22",
@@ -157,7 +157,7 @@
     "cross-spawn": "^7.0.3",
     "env-var": "^7.3.1",
     "fs-extra": "^11.2.0",
-    "ipull": "^3.0.8",
+    "ipull": "^3.0.11",
     "is-unicode-supported": "^2.0.0",
     "lifecycle-utils": "^1.4.1",
     "log-symbols": "^5.1.0",

diff --git a/src/cli/commands/inspect/commands/InspectGpuCommand.ts b/src/cli/commands/inspect/commands/InspectGpuCommand.ts
@@ -65,6 +65,10 @@ async function logGpuVramUsage(gpu: BuildGpu) {
         });
         const gpuName = getPrettyBuildGpuName(gpu);
         const vramStatus = llama.getVramState();
+        const gpuDeviceNames = llama.getGpuDeviceNames();
+
+        if (gpuDeviceNames.length > 0)
+            console.info(`${chalk.yellow(`${gpuName} device${gpuDeviceNames.length > 1 ? "s" : ""}:`)} ${gpuDeviceNames.join(", ")}`);
 
         console.info(`${chalk.yellow(`${gpuName} used VRAM:`)} ${getPercentageString(vramStatus.used, vramStatus.total)}% ${chalk.gray("(" + bytes(vramStatus.used) + "/" + bytes(vramStatus.total) + ")")}`);
         console.info(`${chalk.yellow(`${gpuName} free VRAM:`)} ${getPercentageString(vramStatus.free, vramStatus.total)}% ${chalk.gray("(" + bytes(vramStatus.free) + "/" + bytes(vramStatus.total) + ")")}`);
@@ -75,6 +79,16 @@ async function logRamUsage() {
     const totalMemory = os.totalmem();
     const freeMemory = os.freemem();
     const usedMemory = totalMemory - freeMemory;
+    const cpuDeviceNames = Array.from(
+        new Set(
+            os.cpus()
+                .map((cpu) => (cpu.model?.trim?.() ?? ""))
+                .filter((deviceName) => deviceName.length > 0)
+        )
+    );
+
+    if (cpuDeviceNames.length > 0)
+        console.info(`${chalk.yellow("CPU model" + (cpuDeviceNames.length > 1 ? "s" : "") + ":")} ${cpuDeviceNames.join(", ")}`);
 
     console.info(`${chalk.yellow("Used RAM:")} ${getPercentageString(usedMemory, totalMemory)}% ${chalk.gray("(" + bytes(usedMemory) + "/" + bytes(totalMemory) + ")")}`);
     console.info(`${chalk.yellow("Free RAM:")} ${getPercentageString(freeMemory, totalMemory)}% ${chalk.gray("(" + bytes(freeMemory) + "/" + bytes(totalMemory) + ")")}`);

diff --git a/src/evaluator/LlamaContext/LlamaContext.ts b/src/evaluator/LlamaContext/LlamaContext.ts
@@ -932,7 +932,7 @@ export class LlamaContextSequence {
                     if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
                         throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
 
-                    const {tokenBiasKeys, tokenBiasValues} = getTokenBiasesForAddon(tokenBias);
+                    const {tokenBiasKeys, tokenBiasValues} = getTokenBiasesForAddon(tokenBias, this.model);
 
                     return this._context._ctx.sampleToken(batchLogitIndex, removeNullFields({
                         temperature,
@@ -1108,7 +1108,7 @@ type CurrentBatchItem = {
     processAmount: number
 };
 
-function getTokenBiasesForAddon(tokenBias?: TokenBias | (() => TokenBias)) {
+function getTokenBiasesForAddon(tokenBias: undefined | TokenBias | (() => TokenBias), currentModel: LlamaModel) {
     if (tokenBias == null)
         return {
             tokenBiasKeys: undefined,
@@ -1118,6 +1118,12 @@ function getTokenBiasesForAddon(tokenBias?: TokenBias | (() => TokenBias)) {
     if (tokenBias instanceof Function)
         tokenBias = tokenBias();
 
+    if (tokenBias._model !== currentModel)
+        throw new Error(
+            "This TokenBias instance was created with a different model than the one used by this context. " +
+            "Make sure you use the model instance of the context sequence for the TokenBias you use it with."
+        );
+
     const tokenBiasKeys: Token[] = [];
     const tokenBiasValues: number[] = [];
 

diff --git a/src/evaluator/LlamaGrammar.ts b/src/evaluator/LlamaGrammar.ts
@@ -32,8 +32,9 @@ export class LlamaGrammar {
 
     /**
      * > GBNF files are supported.
-     * > More info here: [github:ggerganov/llama.cpp:grammars/README.md](
-     * > https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
+     * > More info here: [
+     * github:ggerganov/llama.cpp:grammars/README.md
+     * ](https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
      * @param options
      */
     public constructor({

diff --git a/src/evaluator/TokenBias.ts b/src/evaluator/TokenBias.ts
@@ -4,7 +4,7 @@ import {tokenizeInput} from "../utils/tokenizeInput.js";
 import {LlamaModel} from "./LlamaModel.js";
 
 export class TokenBias {
-    /** @internal */ private readonly _model: LlamaModel;
+    /** @internal */ public readonly _model: LlamaModel;
     /** @internal */ public readonly _biases = new Map<Token, number>();
 
     public constructor(model: LlamaModel) {