Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(inspect gpu command): print device names #198

Merged
merged 7 commits into from
Apr 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/guide/vulkan.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ You should see an output like this:
```ansi
Vulkan: available

Vulkan device: Apple M1 Max
Vulkan used VRAM: 0% (64KB/21.33GB)
Vulkan free VRAM: 99.99% (21.33GB/21.33GB)

CPU model: Apple M1 Max
Used RAM: 97.37% (31.16GB/32GB)
Free RAM: 2.62% (860.72MB/32GB)
```
Expand Down
32 changes: 16 additions & 16 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -137,9 +137,9 @@
"semantic-release": "^22.0.8",
"tslib": "^2.6.1",
"typedoc": "^0.25.3",
"typedoc-plugin-markdown": "^4.0.0-next.53",
"typedoc-plugin-mdn-links": "^3.1.5",
"typedoc-vitepress-theme": "^1.0.0-next.9",
"typedoc-plugin-markdown": "^4.0.0-next.55",
"typedoc-plugin-mdn-links": "^3.1.19",
"typedoc-vitepress-theme": "^1.0.0-next.10",
"typescript": "^5.2.2",
"vite-node": "^1.4.0",
"vitepress": "1.0.0-rc.22",
Expand All @@ -157,7 +157,7 @@
"cross-spawn": "^7.0.3",
"env-var": "^7.3.1",
"fs-extra": "^11.2.0",
"ipull": "^3.0.8",
"ipull": "^3.0.11",
"is-unicode-supported": "^2.0.0",
"lifecycle-utils": "^1.4.1",
"log-symbols": "^5.1.0",
Expand Down
14 changes: 14 additions & 0 deletions src/cli/commands/inspect/commands/InspectGpuCommand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ async function logGpuVramUsage(gpu: BuildGpu) {
});
const gpuName = getPrettyBuildGpuName(gpu);
const vramStatus = llama.getVramState();
const gpuDeviceNames = llama.getGpuDeviceNames();

if (gpuDeviceNames.length > 0)
console.info(`${chalk.yellow(`${gpuName} device${gpuDeviceNames.length > 1 ? "s" : ""}:`)} ${gpuDeviceNames.join(", ")}`);

console.info(`${chalk.yellow(`${gpuName} used VRAM:`)} ${getPercentageString(vramStatus.used, vramStatus.total)}% ${chalk.gray("(" + bytes(vramStatus.used) + "/" + bytes(vramStatus.total) + ")")}`);
console.info(`${chalk.yellow(`${gpuName} free VRAM:`)} ${getPercentageString(vramStatus.free, vramStatus.total)}% ${chalk.gray("(" + bytes(vramStatus.free) + "/" + bytes(vramStatus.total) + ")")}`);
Expand All @@ -75,6 +79,16 @@ async function logRamUsage() {
const totalMemory = os.totalmem();
const freeMemory = os.freemem();
const usedMemory = totalMemory - freeMemory;
const cpuDeviceNames = Array.from(
new Set(
os.cpus()
.map((cpu) => (cpu.model?.trim?.() ?? ""))
.filter((deviceName) => deviceName.length > 0)
)
);

if (cpuDeviceNames.length > 0)
console.info(`${chalk.yellow("CPU model" + (cpuDeviceNames.length > 1 ? "s" : "") + ":")} ${cpuDeviceNames.join(", ")}`);

console.info(`${chalk.yellow("Used RAM:")} ${getPercentageString(usedMemory, totalMemory)}% ${chalk.gray("(" + bytes(usedMemory) + "/" + bytes(totalMemory) + ")")}`);
console.info(`${chalk.yellow("Free RAM:")} ${getPercentageString(freeMemory, totalMemory)}% ${chalk.gray("(" + bytes(freeMemory) + "/" + bytes(totalMemory) + ")")}`);
Expand Down
10 changes: 8 additions & 2 deletions src/evaluator/LlamaContext/LlamaContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ export class LlamaContextSequence {
if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");

const {tokenBiasKeys, tokenBiasValues} = getTokenBiasesForAddon(tokenBias);
const {tokenBiasKeys, tokenBiasValues} = getTokenBiasesForAddon(tokenBias, this.model);

return this._context._ctx.sampleToken(batchLogitIndex, removeNullFields({
temperature,
Expand Down Expand Up @@ -1108,7 +1108,7 @@ type CurrentBatchItem = {
processAmount: number
};

function getTokenBiasesForAddon(tokenBias?: TokenBias | (() => TokenBias)) {
function getTokenBiasesForAddon(tokenBias: undefined | TokenBias | (() => TokenBias), currentModel: LlamaModel) {
if (tokenBias == null)
return {
tokenBiasKeys: undefined,
Expand All @@ -1118,6 +1118,12 @@ function getTokenBiasesForAddon(tokenBias?: TokenBias | (() => TokenBias)) {
if (tokenBias instanceof Function)
tokenBias = tokenBias();

if (tokenBias._model !== currentModel)
throw new Error(
"This TokenBias instance was created with a different model than the one used by this context. " +
"Make sure you use the model instance of the context sequence for the TokenBias you use it with."
);

const tokenBiasKeys: Token[] = [];
const tokenBiasValues: number[] = [];

Expand Down
5 changes: 3 additions & 2 deletions src/evaluator/LlamaGrammar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ export class LlamaGrammar {

/**
* > GBNF files are supported.
* > More info here: [github:ggerganov/llama.cpp:grammars/README.md](
* > https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
* > More info here: [
* github:ggerganov/llama.cpp:grammars/README.md
* ](https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
* @param options
*/
public constructor({
Expand Down
2 changes: 1 addition & 1 deletion src/evaluator/TokenBias.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import {tokenizeInput} from "../utils/tokenizeInput.js";
import {LlamaModel} from "./LlamaModel.js";

export class TokenBias {
/** @internal */ private readonly _model: LlamaModel;
/** @internal */ public readonly _model: LlamaModel;
/** @internal */ public readonly _biases = new Map<Token, number>();

public constructor(model: LlamaModel) {
Expand Down
Loading