Skip to content

Commit

Permalink
local-apps: update llama.cpp snippet (huggingface#1103)
Browse files Browse the repository at this point in the history
This change is related to these upstream PR:
- ggerganov/llama.cpp#11195 allows using
tag-based repo name like on ollama
- ggerganov/llama.cpp#11214 automatically turn
on `--conversation` mode for models having chat template

Example:

```sh
# for "instruct" model, conversation mode is enabled automatically
llama-cli -hf bartowski/Llama-3.2-1B-Instruct-GGUF

# for non-instruct model, it runs as completion
llama-cli -hf TheBloke/Llama-2-7B-GGUF -p "Once upon a time,"
```
  • Loading branch information
ngxson authored and aykutkardas committed Jan 20, 2025
1 parent dc0f69d commit 9da26a4
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 14 deletions.
10 changes: 2 additions & 8 deletions packages/tasks/src/local-apps.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,7 @@ describe("local-apps", () => {
const snippet = snippetFunc(model);

expect(snippet[0].content).toEqual(`# Load and run the model:
llama-cli \\
--hf-repo "bartowski/Llama-3.2-3B-Instruct-GGUF" \\
--hf-file {{GGUF_FILE}} \\
-p "You are a helpful assistant" \\
--conversation`);
llama-cli -hf bartowski/Llama-3.2-3B-Instruct-GGUF`);
});

it("llama.cpp non-conversational", async () => {
Expand All @@ -30,9 +26,7 @@ llama-cli \\
const snippet = snippetFunc(model);

expect(snippet[0].content).toEqual(`# Load and run the model:
llama-cli \\
--hf-repo "mlabonne/gemma-2b-GGUF" \\
--hf-file {{GGUF_FILE}} \\
llama-cli -hf mlabonne/gemma-2b-GGUF \\
-p "Once upon a time,"`);
});

Expand Down
15 changes: 9 additions & 6 deletions packages/tasks/src/local-apps.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,17 +95,20 @@ function isMlxModel(model: ModelData) {
}

const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
let tagName = "";
if (filepath) {
const quantLabel = parseGGUFQuantLabel(filepath);
tagName = quantLabel ? `:${quantLabel}` : "";
}
const command = (binary: string) => {
const snippet = [
"# Load and run the model:",
`${binary} \\`,
` --hf-repo "${model.id}" \\`,
` --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`,
` -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time,"}"`,
`${binary} -hf ${model.id}${tagName}`,
];
if (model.tags.includes("conversational")) {
if (!model.tags.includes("conversational")) {
// for non-conversational models, add a prompt
snippet[snippet.length - 1] += " \\";
snippet.push(" --conversation");
snippet.push(" -p \"Once upon a time,\"");
}
return snippet.join("\n");
};
Expand Down

0 comments on commit 9da26a4

Please sign in to comment.