Skip to content

Commit

Permalink
feat: automatically adapt to current free VRAM state (#182)
Browse files Browse the repository at this point in the history
* feat: read tensor info from `gguf` files
* feat: `inspect gguf` command
* feat: `inspect measure` command
* feat: `readGgufFileInfo` function
* feat: GGUF file info on `LlamaModel`
* feat: estimate VRAM usage of the model and context with certain options to adapt to current VRAM state and set great defaults for `gpuLayers` and `contextSize`. no manual configuration of those options is needed anymore to maximize performance
* feat: `JinjaTemplateChatWrapper`
* feat: use the `tokenizer.chat_template` header from the `gguf` file when available - use it to find a better specialized chat wrapper or use `JinjaTemplateChatWrapper` with it as a fallback
* feat: improve `resolveChatWrapper`
* feat: simplify generation CLI commands: `chat`, `complete`, `infill`
* feat: read GPU device names
* feat: get token type
* refactor: gguf
* test: separate gguf tests to model dependent and model independent tests
* test: switch to new vitest test signature
* fix: use the new `llama.cpp` CUDA flag
* fix: improve chat wrappers tokenization
* fix: bugs
  • Loading branch information
giladgd authored Apr 4, 2024
1 parent f3b7f81 commit 35e6f50
Show file tree
Hide file tree
Showing 146 changed files with 10,767 additions and 2,632 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ node_modules
/.eslintcache
/.vitepress/.cache
/test/.models
/test/temp
/coverage

/llama/compile_commands.json
Expand All @@ -20,6 +21,8 @@ node_modules
/llama/lastBuild.json
/llama/gitRelease.bundle
/llama/.temp
/llama/.idea
/llama/cmake-build-debug
/llama/localBuilds
/llama/Release
/llama/Debug
Expand Down
31 changes: 28 additions & 3 deletions .vitepress/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,22 @@ export default defineConfig({
pageData.frontmatter.editLink = false;
pageData.frontmatter.lastUpdated = false;
}

let canonicalUrl = hostname + pageData.relativePath;
if (canonicalUrl.endsWith("/index.html"))
canonicalUrl = canonicalUrl.slice(0, -"index.html".length);
if (canonicalUrl.endsWith("/index.md"))
canonicalUrl = canonicalUrl.slice(0, -"index.md".length);
else if (canonicalUrl.endsWith(".html"))
canonicalUrl = canonicalUrl.slice(0, -".html".length);
else if (canonicalUrl.endsWith(".md"))
canonicalUrl = canonicalUrl.slice(0, -".md".length);

pageData.frontmatter.head ??= [];
pageData.frontmatter.head.push([
"link",
{rel: "canonical", href: canonicalUrl}
])
},
themeConfig: {
editLink: {
Expand Down Expand Up @@ -183,7 +199,16 @@ export default defineConfig({
{text: "Download", link: "/download"},
{text: "Complete", link: "/complete"},
{text: "Infill", link: "/infill"},
{text: "Inspect", link: "/inspect"},
{
text: "Inspect",
link: "/inspect",
collapsed: true,
items: [
{text: "GPU", link: "/inspect/gpu"},
{text: "GGUF", link: "/inspect/gguf"},
{text: "Measure", link: "/inspect/measure"},
]
},
{text: "Build", link: "/build"},
{text: "Clear", link: "/clear"}
]
Expand Down Expand Up @@ -302,7 +327,7 @@ function orderClasses(sidebar: typeof typedocSidebar) {
items: []
};
(classes.items as DefaultTheme.SidebarItem[]).push(LlamaTextGroup);
const LlamaTextGroupItemsOrder = ["SpecialToken", "BuiltinSpecialToken"];
const LlamaTextGroupItemsOrder = ["SpecialTokensText", "SpecialToken"];

groupItems(
classes.items,
Expand All @@ -327,7 +352,7 @@ function orderTypes(sidebar: typeof typedocSidebar) {
(item) => (
item.text === "BatchItem" ||
item.text === "CustomBatchingDispatchSchedule" ||
item.text === "CustomBatchingPrioritizeStrategy" ||
item.text === "CustomBatchingPrioritizationStrategy" ||
item.text === "PrioritizedBatchItem"
),
{collapsed: false}
Expand Down
92 changes: 81 additions & 11 deletions .vitepress/utils/getCommandHtmlDoc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,54 @@ import {cliBinName, npxRunPrefix} from "../../src/config.js";
import {buildHtmlTable} from "./buildHtmlTable.js";
import {buildHtmlHeading} from "./buildHtmlHeading.js";

export async function getCommandHtmlDoc(command: CommandModule<any, any>, cliName: string = cliBinName) {
const title = cliName + " " + (command.command ?? "");
export async function getCommandHtmlDoc(command: CommandModule<any, any>, {
cliName = cliBinName,
parentCommand,
subCommandsParentPageLink
}: {
cliName?: string,
parentCommand?: CommandModule<any, any>,
subCommandsParentPageLink?: string
} = {}) {
const currentCommandCliCommand = resolveCommandCliCommand(command);
const resolvedParentCommandCliCommand = resolveCommandCliCommand(parentCommand);
const title = cliName + " " + (resolvedParentCommandCliCommand ?? "<command>").replace("<command>", currentCommandCliCommand ?? "");
const description = command.describe ?? "";
const optionGroups = await getOptionsGroupFromCommand(command);
const {subCommands, optionGroups} = await parseCommandDefinition(command);

let res = "";

if (subCommands.length > 0) {
res += buildHtmlHeading("h2", htmlEscape("Commands"), "commands");

res += buildHtmlTable(
[
"Command",
"Description"
].map(htmlEscape),
subCommands
.map((subCommand) => {
if (subCommand.command == null || subCommand.describe === false)
return null;

const resolvedCommandCliCommand = resolveCommandCliCommand(subCommand) ?? "";
const commandPageLink = resolveCommandPageLink(subCommand);

let cliCommand = resolvedCommandCliCommand;
cliCommand = (currentCommandCliCommand ?? "<command>").replace("<command>", cliCommand);

if (parentCommand != null)
cliCommand = (resolvedParentCommandCliCommand ?? "<command>").replace("<command>", cliCommand);

return [
`<a href="${subCommandsParentPageLink != null ? (subCommandsParentPageLink + "/") : ""}${commandPageLink}"><code>` + htmlEscape(cliName + " " + cliCommand) + "</code></a>",
htmlEscape(String(subCommand.describe ?? ""))
];
})
.filter((row): row is string[] => row != null)
);
}

if (optionGroups.length !== 0) {
res += buildHtmlHeading("h2", htmlEscape("Options"), "options");

Expand All @@ -37,7 +78,10 @@ export async function getCommandHtmlDoc(command: CommandModule<any, any>, cliNam
}


async function getOptionsGroupFromCommand(command: CommandModule<any, any>): Promise<OptionsGroup[]> {
async function parseCommandDefinition(command: CommandModule<any, any>): Promise<{
subCommands: CommandModule<any, any>[],
optionGroups: OptionsGroup[]
}> {
const yargsStub = getYargsStub();
function getYargsStub() {
function option(name: string, option: Options) {
Expand All @@ -57,10 +101,16 @@ async function getOptionsGroupFromCommand(command: CommandModule<any, any>): Pro
return yargsStub;
}

return {option};
function command(subCommand: CommandModule<any, any>) {
subCommands.push(subCommand);
return yargsStub;
}

return {option, command};
}

const options: Record<string, {name: string, option: Options}[]> = {};
const subCommands: CommandModule<any, any>[] = [];
const groups: string[] = [];

if (command.builder instanceof Function)
Expand Down Expand Up @@ -97,10 +147,13 @@ async function getOptionsGroupFromCommand(command: CommandModule<any, any>): Pro
return 0;
});

return groups.map((group) => ({
name: normalizeGroupName(group),
options: options[group]!
}));
return {
subCommands,
optionGroups: groups.map((group) => ({
name: normalizeGroupName(group),
options: options[group]!
}))
};
}

function normalizeGroupName(groupName: string): string {
Expand Down Expand Up @@ -156,8 +209,12 @@ function renderOptionsGroupOptionsTable(options: {name: string, option: Options}

let optionDescription: string[] = option.description != null ? [htmlEscape(option.description)] : [];

if (option.default != null) {
optionDescription.push(`(${htmlEscape("default: ")}<code>${htmlEscape(option.default)}</code>)`);
const hasDefaultDescription = option.defaultDescription != null && option.defaultDescription.trim().length > 0;
if (option.default != null || hasDefaultDescription) {
if (hasDefaultDescription && option.defaultDescription != null)
optionDescription.push(`<span style="opacity: 0.72">(${htmlEscape("default: ")}${htmlEscape(option.defaultDescription.trim())})</span>`);
else
optionDescription.push(`<span style="opacity: 0.72">(${htmlEscape("default: ")}<code>${htmlEscape(option.default)}</code>)</span>`);
}

if (option.type != null) {
Expand All @@ -184,6 +241,19 @@ function renderOptionsGroupOptionsTable(options: {name: string, option: Options}
return buildHtmlTable(tableHeaders, tableRows);
}

function resolveCommandCliCommand(command?: CommandModule<any, any>) {
if (command == null)
return undefined;

return command.command instanceof Array
? command.command[0]
: command.command;
}

function resolveCommandPageLink(command: CommandModule<any, any>) {
return resolveCommandCliCommand(command)?.split(" ")?.[0];
}

type OptionsGroup = {
name: string,
options: Array<{
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
## Features
* Run a text generation model locally on your machine
* Metal, CUDA and Vulkan support
* Pre-built binaries are provided, with a fallback to building from source without `node-gyp` or Python
* Pre-built binaries are provided, with a fallback to building from source _**without**_ `node-gyp` or Python
* Chat with a model using a chat wrapper
* Use the CLI to chat with a model without writing any code
* Up-to-date with the latest version of `llama.cpp`. Download and compile the latest release with a single CLI command.
Expand Down
20 changes: 18 additions & 2 deletions docs/guide/cli/cli.data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@ import {BuildCommand} from "../../../src/cli/commands/BuildCommand.js";
import {ChatCommand} from "../../../src/cli/commands/ChatCommand.js";
import {CompleteCommand} from "../../../src/cli/commands/CompleteCommand.js";
import {InfillCommand} from "../../../src/cli/commands/InfillCommand.js";
import {InspectCommand} from "../../../src/cli/commands/InspectCommand.js";
import {InspectCommand} from "../../../src/cli/commands/inspect/InspectCommand.js";
import {InspectGpuCommand} from "../../../src/cli/commands/inspect/commands/InspectGpuCommand.js";
import {InspectGgufCommand} from "../../../src/cli/commands/inspect/commands/InspectGgufCommand.js";
import {DownloadCommand} from "../../../src/cli/commands/DownloadCommand.js";
import {ClearCommand} from "../../../src/cli/commands/ClearCommand.js";
import {htmlEscape} from "../../../.vitepress/utils/htmlEscape.js";
import {cliBinName, npxRunPrefix} from "../../../src/config.js";
import {buildHtmlHeading} from "../../../.vitepress/utils/buildHtmlHeading.js";
import {buildHtmlTable} from "../../../.vitepress/utils/buildHtmlTable.js";
import {setIsInDocumentationMode} from "../../../src/state.js";
import {InspectMeasureCommand} from "../../../src/cli/commands/inspect/commands/InspectMeasureCommand.js";

export default {
async load() {
Expand All @@ -31,7 +34,20 @@ export default {
chat: await getCommandHtmlDoc(ChatCommand),
complete: await getCommandHtmlDoc(CompleteCommand),
infill: await getCommandHtmlDoc(InfillCommand),
inspect: await getCommandHtmlDoc(InspectCommand),
inspect: {
index: await getCommandHtmlDoc(InspectCommand, {
subCommandsParentPageLink: "inspect"
}),
gpu: await getCommandHtmlDoc(InspectGpuCommand, {
parentCommand: InspectCommand
}),
gguf: await getCommandHtmlDoc(InspectGgufCommand, {
parentCommand: InspectCommand
}),
measure: await getCommandHtmlDoc(InspectMeasureCommand, {
parentCommand: InspectCommand
})
},
download: await getCommandHtmlDoc(DownloadCommand),
build: await getCommandHtmlDoc(BuildCommand),
clear: await getCommandHtmlDoc(ClearCommand)
Expand Down
2 changes: 1 addition & 1 deletion docs/guide/cli/inspect.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ outline: deep

<script setup lang="ts">
import {data as docs} from "./cli.data.js";
const commandDoc = docs.inspect;
const commandDoc = docs.inspect.index;
</script>

{{commandDoc.description}}
Expand Down
17 changes: 17 additions & 0 deletions docs/guide/cli/inspect/gguf.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
outline: deep
---
# `inspect gguf` command

<script setup lang="ts">
import {data as docs} from "../cli.data.js";
const commandDoc = docs.inspect.gguf;
</script>

{{commandDoc.description}}

## Usage
```shell-vue
{{commandDoc.usage}}
```
<div v-html="commandDoc.options"></div>
17 changes: 17 additions & 0 deletions docs/guide/cli/inspect/gpu.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
outline: deep
---
# `inspect gpu` command

<script setup lang="ts">
import {data as docs} from "../cli.data.js";
const commandDoc = docs.inspect.gpu;
</script>

{{commandDoc.description}}

## Usage
```shell-vue
{{commandDoc.usage}}
```
<div v-html="commandDoc.options"></div>
17 changes: 17 additions & 0 deletions docs/guide/cli/inspect/measure.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
outline: deep
---
# `inspect measure` command

<script setup lang="ts">
import {data as docs} from "../cli.data.js";
const commandDoc = docs.inspect.measure;
</script>

{{commandDoc.description}}

## Usage
```shell-vue
{{commandDoc.usage}}
```
<div v-html="commandDoc.options"></div>
10 changes: 5 additions & 5 deletions llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,19 @@ include_directories("gpuInfo")
include_directories("llama.cpp")
include_directories("./llama.cpp/common")

if (LLAMA_CUBLAS)
if (LLAMA_CUDA)
cmake_minimum_required(VERSION 3.17)

find_package(CUDAToolkit)
if (CUDAToolkit_FOUND)
message(STATUS "Using cuBLAS for GPU info")
message(STATUS "Using CUDA for GPU info")

enable_language(CUDA)

set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/cuda-gpu-info.h)
set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/cuda-gpu-info.cu)

add_compile_definitions(GPU_INFO_USE_CUBLAS)
add_compile_definitions(GPU_INFO_USE_CUDA)

if (LLAMA_STATIC)
set(LLAMA_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart_static)
Expand All @@ -60,7 +60,7 @@ if (LLAMA_CUBLAS)
endif()
endif()
else()
message(FATAL_ERROR "cuBLAS was not found")
message(FATAL_ERROR "CUDA was not found")
endif()
endif()

Expand Down Expand Up @@ -100,7 +100,7 @@ if (LLAMA_HIPBLAS)

if (${hipblas_FOUND} AND ${hip_FOUND})
message(STATUS "Using HIP and hipBLAS for GPU info")
add_compile_definitions(GPU_INFO_USE_HIPBLAS GPU_INFO_USE_CUBLAS)
add_compile_definitions(GPU_INFO_USE_HIPBLAS GPU_INFO_USE_CUDA)
add_library(gpu-info-rocm OBJECT gpuInfo/cuda-gpu-info.cu gpuInfo/cuda-gpu-info.h)
set_source_files_properties(gpuInfo/cuda-gpu-info.cu PROPERTIES LANGUAGE CXX)
target_link_libraries(gpu-info-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
Expand Down
Loading

0 comments on commit 35e6f50

Please sign in to comment.