diff --git a/Dockerfile b/Dockerfile index 3f2eb3261e45d..f2129f2c1e213 100644 --- a/Dockerfile +++ b/Dockerfile @@ -191,6 +191,8 @@ ENV \ MOONSHOT_API_KEY="" MOONSHOT_MODEL_LIST="" MOONSHOT_PROXY_URL="" \ # Novita NOVITA_API_KEY="" NOVITA_MODEL_LIST="" \ + # Nvidia NIM + NVIDIA_API_KEY="" NVIDIA_MODEL_LIST="" NVIDIA_PROXY_URL="" \ # Ollama ENABLED_OLLAMA="" OLLAMA_MODEL_LIST="" OLLAMA_PROXY_URL="" \ # OpenAI diff --git a/Dockerfile.database b/Dockerfile.database index 307caf95443f4..88fdcf539ec4f 100644 --- a/Dockerfile.database +++ b/Dockerfile.database @@ -228,6 +228,8 @@ ENV \ MOONSHOT_API_KEY="" MOONSHOT_MODEL_LIST="" MOONSHOT_PROXY_URL="" \ # Novita NOVITA_API_KEY="" NOVITA_MODEL_LIST="" \ + # Nvidia NIM + NVIDIA_API_KEY="" NVIDIA_MODEL_LIST="" NVIDIA_PROXY_URL="" \ # Ollama ENABLED_OLLAMA="" OLLAMA_MODEL_LIST="" OLLAMA_PROXY_URL="" \ # OpenAI diff --git a/src/app/[variants]/(main)/settings/llm/ProviderList/providers.tsx b/src/app/[variants]/(main)/settings/llm/ProviderList/providers.tsx index 333864ec7fc37..9313269567551 100644 --- a/src/app/[variants]/(main)/settings/llm/ProviderList/providers.tsx +++ b/src/app/[variants]/(main)/settings/llm/ProviderList/providers.tsx @@ -17,6 +17,7 @@ import { MistralProviderCard, MoonshotProviderCard, NovitaProviderCard, + NvidiaProviderCard, OpenRouterProviderCard, PerplexityProviderCard, QwenProviderCard, @@ -68,6 +69,7 @@ export const useProviderList = (): ProviderItem[] => { TogetherAIProviderCard, FireworksAIProviderCard, GroqProviderCard, + NvidiaProviderCard, PerplexityProviderCard, MistralProviderCard, Ai21ProviderCard, diff --git a/src/config/aiModels/index.ts b/src/config/aiModels/index.ts index e0d9b9654daf0..d2a255dae92c2 100644 --- a/src/config/aiModels/index.ts +++ b/src/config/aiModels/index.ts @@ -23,6 +23,7 @@ import { default as minimax } from './minimax'; import { default as mistral } from './mistral'; import { default as moonshot } from './moonshot'; import { default as novita } from './novita'; +import { default as nvidia } from './nvidia'; import { default as ollama } from './ollama'; import { default as openai } from './openai'; import { default as openrouter } from './openrouter'; @@ -84,6 +85,7 @@ export const LOBE_DEFAULT_MODEL_LIST = buildDefaultModelList({ mistral, moonshot, novita, + nvidia, ollama, openai, openrouter, @@ -126,6 +128,7 @@ export { default as minimax } from './minimax'; export { default as mistral } from './mistral'; export { default as moonshot } from './moonshot'; export { default as novita } from './novita'; +export { default as nvidia } from './nvidia'; export { default as ollama } from './ollama'; export { default as openai } from './openai'; export { default as openrouter } from './openrouter'; diff --git a/src/config/aiModels/nvidia.ts b/src/config/aiModels/nvidia.ts new file mode 100644 index 0000000000000..60469debda45c --- /dev/null +++ b/src/config/aiModels/nvidia.ts @@ -0,0 +1,155 @@ +import { AIChatModelCard } from '@/types/aiModel'; + +const nvidiaChatModels: AIChatModelCard[] = [ + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 128_000, + description: '先进的 LLM,擅长推理、数学、常识和函数调用。', + displayName: 'Llama 3.3 70B Instruct', + enabled: true, + id: 'meta/llama-3.3-70b-instruct', + type: 'chat' + }, + { + contextWindowTokens: 128_000, + description: '先进的最尖端小型语言模型,具备语言理解、卓越的推理能力和文本生成能力。', + displayName: 'Llama 3.2 1B Instruct', + id: 'meta/llama-3.2-1b-instruct', + type: 'chat' + }, + { + contextWindowTokens: 128_000, + description: '先进的最尖端小型语言模型,具备语言理解、卓越的推理能力和文本生成能力。', + displayName: 'Llama 3.2 3B Instruct', + id: 'meta/llama-3.2-3b-instruct', + type: 'chat' + }, + { + abilities: { + vision: true, + }, + contextWindowTokens: 128_000, + description: '尖端的视觉-语言模型,擅长从图像中进行高质量推理。', + displayName: 'Llama 3.2 11B Vision Instruct', + enabled: true, + id: 'meta/llama-3.2-11b-vision-instruct', + type: 'chat' + }, + { + abilities: { + vision: true, + }, + contextWindowTokens: 128_000, + description: '尖端的视觉-语言模型,擅长从图像中进行高质量推理。', + displayName: 'Llama 3.2 90B Vision Instruct', + id: 'meta/llama-3.2-90b-vision-instruct', + type: 'chat' + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 128_000, + description: '先进的最尖端模型,具备语言理解、卓越的推理能力和文本生成能力。', + displayName: 'Llama 3.1 8B Instruct', + id: 'meta/llama-3.1-8b-instruct', + type: 'chat' + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 128_000, + description: '赋能复杂对话,具备卓越的上下文理解、推理能力和文本生成能力。', + displayName: 'Llama 3.1 70B Instruct', + id: 'meta/llama-3.1-70b-instruct', + type: 'chat' + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 128_000, + description: '高级 LLM,支持合成数据生成、知识蒸馏和推理,适用于聊天机器人、编程和特定领域任务。', + displayName: 'Llama 3.1 405B Instruct', + id: 'meta/llama-3.1-405b-instruct', + type: 'chat' + }, + { + contextWindowTokens: 32_768, + description: '独特的语言模型,提供无与伦比的准确性和效率表现。', + displayName: 'Llama 3.1 Nemotron 51B Instruct', + id: 'nvidia/llama-3.1-nemotron-51b-instruct', + type: 'chat' + }, + { + contextWindowTokens: 32_768, + description: 'Llama-3.1-Nemotron-70B-Instruct 是 NVIDIA 定制的大型语言模型,旨在提高 LLM 生成的响应的帮助性。', + displayName: 'Llama 3.1 Nemotron 70B Instruct', + id: 'nvidia/llama-3.1-nemotron-70b-instruct', + type: 'chat' + }, + { + contextWindowTokens: 8192, + description: '面向边缘应用的高级小型语言生成 AI 模型。', + displayName: 'Gemma 2 2B Instruct', + id: 'google/gemma-2-2b-it', + type: 'chat' + }, + { + contextWindowTokens: 8192, + description: '尖端文本生成模型,擅长文本理解、转换和代码生成。', + displayName: 'Gemma 2 9B Instruct', + id: 'google/gemma-2-9b-it', + type: 'chat' + }, + { + contextWindowTokens: 8192, + description: '尖端文本生成模型,擅长文本理解、转换和代码生成。', + displayName: 'Gemma 2 27B Instruct', + id: 'google/gemma-2-27b-it', + type: 'chat' + }, + { + abilities: { + reasoning: true, + }, + contextWindowTokens: 128_000, + description: '最先进的高效 LLM,擅长推理、数学和编程。', + displayName: 'DeepSeek R1', + enabled: true, + id: 'deepseek-ai/deepseek-r1', + type: 'chat' + }, + { + abilities: { + functionCall: true, + }, + contextWindowTokens: 32_768, + description: '面向中文和英文的 LLM,针对语言、编程、数学、推理等领域。', + displayName: 'Qwen2.5 7B Instruct', + enabled: true, + id: 'qwen/qwen2.5-7b-instruct', + type: 'chat' + }, + { + contextWindowTokens: 32_768, + description: '强大的中型代码模型,支持 32K 上下文长度,擅长多语言编程。', + displayName: 'Qwen2.5 Coder 7B Instruct', + id: 'qwen/qwen2.5-coder-7b-instruct', + type: 'chat' + }, + { + contextWindowTokens: 32_768, + description: '高级 LLM,支持代码生成、推理和修复,涵盖主流编程语言。', + displayName: 'Qwen2.5 Coder 32B Instruct', + id: 'qwen/qwen2.5-coder-32b-instruct', + type: 'chat' + }, +] + +export const allModels = [...nvidiaChatModels]; + +export default allModels; diff --git a/src/config/llm.ts b/src/config/llm.ts index f5513ff1bf300..7cc577273ae0d 100644 --- a/src/config/llm.ts +++ b/src/config/llm.ts @@ -77,6 +77,9 @@ export const getLLMConfig = () => { ENABLED_NOVITA: z.boolean(), NOVITA_API_KEY: z.string().optional(), + ENABLED_NVIDIA: z.boolean(), + NVIDIA_API_KEY: z.string().optional(), + ENABLED_BAICHUAN: z.boolean(), BAICHUAN_API_KEY: z.string().optional(), @@ -202,6 +205,9 @@ export const getLLMConfig = () => { ENABLED_NOVITA: !!process.env.NOVITA_API_KEY, NOVITA_API_KEY: process.env.NOVITA_API_KEY, + ENABLED_NVIDIA: !!process.env.NVIDIA_API_KEY, + NVIDIA_API_KEY: process.env.NVIDIA_API_KEY, + ENABLED_BAICHUAN: !!process.env.BAICHUAN_API_KEY, BAICHUAN_API_KEY: process.env.BAICHUAN_API_KEY, diff --git a/src/config/modelProviders/index.ts b/src/config/modelProviders/index.ts index 8f869170fb9e0..6805b2db65e3b 100644 --- a/src/config/modelProviders/index.ts +++ b/src/config/modelProviders/index.ts @@ -23,6 +23,7 @@ import MinimaxProvider from './minimax'; import MistralProvider from './mistral'; import MoonshotProvider from './moonshot'; import NovitaProvider from './novita'; +import NvidiaProvider from './nvidia'; import OllamaProvider from './ollama'; import OpenAIProvider from './openai'; import OpenRouterProvider from './openrouter'; @@ -67,6 +68,7 @@ export const LOBE_DEFAULT_MODEL_LIST: ChatModelCard[] = [ ZeroOneProvider.chatModels, StepfunProvider.chatModels, NovitaProvider.chatModels, + NvidiaProvider.chatModels, BaichuanProvider.chatModels, TaichuProvider.chatModels, CloudflareProvider.chatModels, @@ -96,6 +98,7 @@ export const DEFAULT_MODEL_PROVIDER_LIST = [ CloudflareProvider, GithubProvider, NovitaProvider, + NvidiaProvider, TogetherAIProvider, FireworksAIProvider, GroqProvider, @@ -158,6 +161,7 @@ export { default as MinimaxProviderCard } from './minimax'; export { default as MistralProviderCard } from './mistral'; export { default as MoonshotProviderCard } from './moonshot'; export { default as NovitaProviderCard } from './novita'; +export { default as NvidiaProviderCard } from './nvidia'; export { default as OllamaProviderCard } from './ollama'; export { default as OpenAIProviderCard } from './openai'; export { default as OpenRouterProviderCard } from './openrouter'; diff --git a/src/config/modelProviders/nvidia.ts b/src/config/modelProviders/nvidia.ts new file mode 100644 index 0000000000000..4770402c7aa0e --- /dev/null +++ b/src/config/modelProviders/nvidia.ts @@ -0,0 +1,21 @@ +import { ModelProviderCard } from '@/types/llm'; + +const Nvidia: ModelProviderCard = { + chatModels: [], + checkModel: 'meta/llama-3.2-1b-instruct', + description: 'NVIDIA NIM™ 提供容器,可用于自托管 GPU 加速推理微服务,支持在云端、数据中心、RTX™ AI 个人电脑和工作站上部署预训练和自定义 AI 模型。', + id: 'nvidia', + modelList: { showModelFetcher: true }, + modelsUrl: 'https://build.nvidia.com/models', + name: 'Nvidia', + settings: { + proxyUrl: { + placeholder: 'https://integrate.api.nvidia.com/v1', + }, + sdkType: 'openai', + showModelFetcher: true, + }, + url: 'https://build.nvidia.com', +}; + +export default Nvidia; diff --git a/src/libs/agent-runtime/AgentRuntime.ts b/src/libs/agent-runtime/AgentRuntime.ts index 7bef8a26f5f1c..8c8bc55e3065d 100644 --- a/src/libs/agent-runtime/AgentRuntime.ts +++ b/src/libs/agent-runtime/AgentRuntime.ts @@ -26,6 +26,7 @@ import { LobeMinimaxAI } from './minimax'; import { LobeMistralAI } from './mistral'; import { LobeMoonshotAI } from './moonshot'; import { LobeNovitaAI } from './novita'; +import { LobeNvidiaAI } from './nvidia'; import { LobeOllamaAI } from './ollama'; import { LobeOpenAI } from './openai'; import { LobeOpenRouterAI } from './openrouter'; @@ -157,6 +158,7 @@ class AgentRuntime { mistral: Partial; moonshot: Partial; novita: Partial; + nvidia: Partial; ollama: Partial; openai: Partial; openrouter: Partial; @@ -300,6 +302,11 @@ class AgentRuntime { break; } + case ModelProvider.Nvidia: { + runtimeModel = new LobeNvidiaAI(params.nvidia); + break; + } + case ModelProvider.Baichuan: { runtimeModel = new LobeBaichuanAI(params.baichuan ?? {}); break; diff --git a/src/libs/agent-runtime/nvidia/index.ts b/src/libs/agent-runtime/nvidia/index.ts new file mode 100644 index 0000000000000..6b505a86580cd --- /dev/null +++ b/src/libs/agent-runtime/nvidia/index.ts @@ -0,0 +1,44 @@ +import { ModelProvider } from '../types'; +import { LobeOpenAICompatibleFactory } from '../utils/openaiCompatibleFactory'; + +import type { ChatModelCard } from '@/types/llm'; + +export interface NvidiaModelCard { + id: string; +} + +export const LobeNvidiaAI = LobeOpenAICompatibleFactory({ + baseURL: 'https://integrate.api.nvidia.com/v1', + debug: { + chatCompletion: () => process.env.DEBUG_NVIDIA_CHAT_COMPLETION === '1', + }, + models: async ({ client }) => { + const { LOBE_DEFAULT_MODEL_LIST } = await import('@/config/aiModels'); + + const modelsPage = await client.models.list() as any; + const modelList: NvidiaModelCard[] = modelsPage.data; + + return modelList + .map((model) => { + const knownModel = LOBE_DEFAULT_MODEL_LIST.find((m) => model.id.toLowerCase() === m.id.toLowerCase()); + + return { + contextWindowTokens: knownModel?.contextWindowTokens ?? undefined, + displayName: knownModel?.displayName ?? undefined, + enabled: knownModel?.enabled || false, + functionCall: + knownModel?.abilities?.functionCall + || false, + id: model.id, + reasoning: + knownModel?.abilities?.reasoning + || false, + vision: + knownModel?.abilities?.vision + || false, + }; + }) + .filter(Boolean) as ChatModelCard[]; + }, + provider: ModelProvider.Nvidia, +}); diff --git a/src/libs/agent-runtime/types/type.ts b/src/libs/agent-runtime/types/type.ts index e772d2e16289a..1015456f5895f 100644 --- a/src/libs/agent-runtime/types/type.ts +++ b/src/libs/agent-runtime/types/type.ts @@ -45,6 +45,7 @@ export enum ModelProvider { Mistral = 'mistral', Moonshot = 'moonshot', Novita = 'novita', + Nvidia = 'nvidia', Ollama = 'ollama', OpenAI = 'openai', OpenRouter = 'openrouter', diff --git a/src/types/user/settings/keyVaults.ts b/src/types/user/settings/keyVaults.ts index 3fb2a200f260b..cc9e6d551c1c5 100644 --- a/src/types/user/settings/keyVaults.ts +++ b/src/types/user/settings/keyVaults.ts @@ -50,6 +50,7 @@ export interface UserKeyVaults { mistral?: OpenAICompatibleKeyVault; moonshot?: OpenAICompatibleKeyVault; novita?: OpenAICompatibleKeyVault; + nvidia?: OpenAICompatibleKeyVault; ollama?: OpenAICompatibleKeyVault; openai?: OpenAICompatibleKeyVault; openrouter?: OpenAICompatibleKeyVault;