Skip to content

Commit bdc211a

Browse files
authored
feat(llm): support model import for vllm (#24499)
1 parent dfaf5bc commit bdc211a

File tree

3 files changed

+98
-26
lines changed

3 files changed

+98
-26
lines changed

pkg/llm/drivers/llm_container/vllm.go

Lines changed: 90 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@ package llm_container
22

33
import (
44
"context"
5+
"encoding/json"
56
"fmt"
67
"net/http"
8+
"net/url"
9+
"os"
710
"path"
11+
"path/filepath"
812
"strconv"
913
"strings"
1014
"time"
@@ -474,38 +478,102 @@ func (v *vllm) UninstallModel(ctx context.Context, userCred mcclient.TokenCreden
474478
return nil
475479
}
476480

477-
func (v *vllm) DownloadModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, tmpDir string, modelName string, modelTag string) (string, []string, error) {
478-
lc, err := llm.GetLLMContainer()
479-
if err != nil {
480-
return "", nil, errors.Wrap(err, "get llm container")
481+
func resolveHfdRevision(modelTag string) string {
482+
if strings.TrimSpace(modelTag) == "" {
483+
return "main"
481484
}
485+
return strings.TrimSpace(modelTag)
486+
}
482487

483-
// Logic to download model inside the container
484-
// modelName is expected to be like "facebook/opt-125m"
485-
targetDir := path.Join(api.LLM_VLLM_MODELS_PATH, modelName)
488+
type hfModelAPIResponse struct {
489+
Siblings []struct {
490+
RFilename string `json:"rfilename"`
491+
} `json:"siblings"`
492+
}
486493

487-
// Check if already exists
488-
checkCmd := fmt.Sprintf("[ -d '%s' ] && echo 'EXIST'", targetDir)
489-
out, _ := exec(ctx, lc.CmpId, checkCmd, 10)
490-
if strings.Contains(out, "EXIST") {
491-
log.Infof("Model %s already exists at %s", modelName, targetDir)
492-
return modelName, []string{targetDir}, nil
494+
func escapeURLPathPreserveSlash(p string) string {
495+
if p == "" {
496+
return ""
497+
}
498+
parts := strings.Split(p, "/")
499+
for i := range parts {
500+
parts[i] = url.PathEscape(parts[i])
493501
}
502+
return strings.Join(parts, "/")
503+
}
494504

495-
// Try to use huggingface-cli
496-
// Assuming container has internet access and tools
497-
downloadCmd := fmt.Sprintf("mkdir -p %s && huggingface-cli download %s --local-dir %s --local-dir-use-symlinks False", targetDir, modelName, targetDir)
505+
func isNonEmptyFile(p string) bool {
506+
st, err := os.Stat(p)
507+
if err != nil {
508+
return false
509+
}
510+
return !st.IsDir() && st.Size() > 0
511+
}
512+
513+
func (v *vllm) DownloadModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, tmpDir string, modelName string, modelTag string) (string, []string, error) {
514+
// Download HF model on host into tmpDir for instant-model import.
515+
// We place files under tmpDir/huggingface/<org>/<repo> so that the archive contains relative paths.
516+
if strings.TrimSpace(tmpDir) == "" {
517+
return "", nil, errors.Error("tmpDir is empty")
518+
}
519+
if strings.TrimSpace(modelName) == "" {
520+
return "", nil, errors.Error("modelName is empty")
521+
}
498522

499-
// If huggingface-cli is missing, try installing it (if pip available)
500-
// fallback to pip install
501-
fullCmd := fmt.Sprintf("if ! command -v huggingface-cli &> /dev/null; then pip install -U huggingface_hub; fi; %s", downloadCmd)
523+
localDir := filepath.Join(tmpDir, "huggingface", filepath.FromSlash(modelName))
524+
if err := os.MkdirAll(localDir, 0755); err != nil {
525+
return "", nil, errors.Wrap(err, "mkdir local model dir")
526+
}
527+
// If already downloaded, short-circuit (directory exists and non-empty).
528+
if entries, err := os.ReadDir(localDir); err == nil && len(entries) > 0 {
529+
targetDir := path.Join(api.LLM_VLLM_MODELS_PATH, modelName)
530+
log.Infof("Model %s already exists in import dir %s", modelName, localDir)
531+
return modelName, []string{targetDir}, nil
532+
}
502533

503-
log.Infof("Downloading model %s with cmd: %s", modelName, fullCmd)
504-
_, err = exec(ctx, lc.CmpId, fullCmd, 3600) // 1 hour timeout for large models
534+
rev := resolveHfdRevision(modelTag)
535+
apiURL := fmt.Sprintf("%s/api/models/%s?revision=%s", api.LLM_VLLM_HF_ENDPOINT, escapeURLPathPreserveSlash(modelName), url.QueryEscape(rev))
536+
log.Infof("Downloading HF model via HF Mirror API: %s", func() string {
537+
b, _ := json.Marshal(map[string]string{
538+
"model": modelName,
539+
"revision": rev,
540+
"dir": localDir,
541+
"endpoint": api.LLM_VLLM_HF_ENDPOINT,
542+
"api": apiURL,
543+
})
544+
return string(b)
545+
}())
546+
metaBody, err := llm.HttpGet(ctx, apiURL)
505547
if err != nil {
506-
return "", nil, errors.Wrapf(err, "failed to download model %s", modelName)
548+
return "", nil, errors.Wrapf(err, "fetch hf model metadata failed: %s", apiURL)
549+
}
550+
meta := hfModelAPIResponse{}
551+
if err := json.Unmarshal(metaBody, &meta); err != nil {
552+
return "", nil, errors.Wrap(err, "unmarshal hf model metadata")
553+
}
554+
if len(meta.Siblings) == 0 {
555+
return "", nil, errors.Errorf("hf model metadata has no siblings: %s", apiURL)
507556
}
508557

558+
for _, s := range meta.Siblings {
559+
rf := strings.TrimSpace(s.RFilename)
560+
if rf == "" {
561+
continue
562+
}
563+
dst := filepath.Join(localDir, filepath.FromSlash(rf))
564+
if isNonEmptyFile(dst) {
565+
continue
566+
}
567+
if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
568+
return "", nil, errors.Wrapf(err, "mkdir for %s", dst)
569+
}
570+
fileURL := fmt.Sprintf("%s/%s/resolve/%s/%s", api.LLM_VLLM_HF_ENDPOINT, escapeURLPathPreserveSlash(modelName), url.PathEscape(rev), escapeURLPathPreserveSlash(rf))
571+
if err := llm.HttpDownloadFile(ctx, fileURL, dst); err != nil {
572+
return "", nil, errors.Wrapf(err, "download file failed: %s -> %s", fileURL, dst)
573+
}
574+
}
575+
576+
targetDir := path.Join(api.LLM_VLLM_MODELS_PATH, modelName)
509577
return modelName, []string{targetDir}, nil
510578
}
511579

pkg/llm/models/instantmodel.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -846,7 +846,11 @@ func (model *SInstantModel) DoImport(ctx context.Context, userCred mcclient.Toke
846846
imgFileSize := imgFileStat.Size()
847847

848848
imgParams := imageapi.ImageCreateInput{}
849-
imgParams.GenerateName = fmt.Sprintf("%s-%s", input.ModelName, input.ModelTag)
849+
safeModelName := strings.ReplaceAll(strings.TrimSpace(input.ModelName), "/", "_")
850+
if safeModelName == "" {
851+
safeModelName = "instant-model"
852+
}
853+
imgParams.GenerateName = fmt.Sprintf("%s-%s", safeModelName, strings.TrimSpace(input.ModelTag))
850854
imgParams.DiskFormat = "tgz"
851855
imgParams.Size = &imgFileSize
852856
imgParams.Properties = map[string]string{

pkg/mcclient/options/llm/instantmodel.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ func (o *LLMInstantModelShowOptions) Params() (jsonutils.JSONObject, error) {
2929
type LLMInstantModelCreateOptions struct {
3030
options.BaseCreateOptions
3131

32-
LLM_TYPE string `help:"llm container type" choices:"ollama" json:"llm_type"`
32+
LLM_TYPE string `help:"llm container type" choices:"ollama|vllm" json:"llm_type"`
3333
MODEL_NAME string `json:"model_name"`
3434
MODEL_TAG string `json:"model_tag"`
3535

@@ -63,8 +63,8 @@ func (o *LLMInstantModelDeleteOptions) Params() (jsonutils.JSONObject, error) {
6363
}
6464

6565
type LLMInstantModelImportOptions struct {
66-
LLM_TYPE string `help:"llm container type" choices:"ollama" json:"llm_type"`
67-
MODEL_NAME string `help:"model name to import, e.g. qwen3" json:"model_name"`
66+
LLM_TYPE string `help:"llm container type" choices:"ollama|vllm" json:"llm_type"`
67+
MODEL_NAME string `help:"model name to import, e.g. qwen3 or Qwen/Qwen3-VL-8B-Instruct" json:"model_name"`
6868
MODEL_TAG string `help:"model tag to import, e.g. 8b" json:"model_tag"`
6969
}
7070

0 commit comments

Comments
 (0)