Skip to content

Commit

Permalink
✨ feat: add Nvidia NIM provider support (#6142)
Browse files Browse the repository at this point in the history
* ✨ feat: add Nvidia NIM provider support

* 🐛 fix: fix build error

* 🔨 chore: sort code

* 💄 style: update model list

* 🐛 fix: fix ci error
  • Loading branch information
hezhijie0327 authored Feb 14, 2025
1 parent bf6699c commit ab796a7
Show file tree
Hide file tree
Showing 12 changed files with 248 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ ENV \
MOONSHOT_API_KEY="" MOONSHOT_MODEL_LIST="" MOONSHOT_PROXY_URL="" \
# Novita
NOVITA_API_KEY="" NOVITA_MODEL_LIST="" \
# Nvidia NIM
NVIDIA_API_KEY="" NVIDIA_MODEL_LIST="" NVIDIA_PROXY_URL="" \
# Ollama
ENABLED_OLLAMA="" OLLAMA_MODEL_LIST="" OLLAMA_PROXY_URL="" \
# OpenAI
Expand Down
2 changes: 2 additions & 0 deletions Dockerfile.database
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ ENV \
MOONSHOT_API_KEY="" MOONSHOT_MODEL_LIST="" MOONSHOT_PROXY_URL="" \
# Novita
NOVITA_API_KEY="" NOVITA_MODEL_LIST="" \
# Nvidia NIM
NVIDIA_API_KEY="" NVIDIA_MODEL_LIST="" NVIDIA_PROXY_URL="" \
# Ollama
ENABLED_OLLAMA="" OLLAMA_MODEL_LIST="" OLLAMA_PROXY_URL="" \
# OpenAI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
MistralProviderCard,
MoonshotProviderCard,
NovitaProviderCard,
NvidiaProviderCard,
OpenRouterProviderCard,
PerplexityProviderCard,
QwenProviderCard,
Expand Down Expand Up @@ -68,6 +69,7 @@ export const useProviderList = (): ProviderItem[] => {
TogetherAIProviderCard,
FireworksAIProviderCard,
GroqProviderCard,
NvidiaProviderCard,
PerplexityProviderCard,
MistralProviderCard,
Ai21ProviderCard,
Expand Down
3 changes: 3 additions & 0 deletions src/config/aiModels/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { default as minimax } from './minimax';
import { default as mistral } from './mistral';
import { default as moonshot } from './moonshot';
import { default as novita } from './novita';
import { default as nvidia } from './nvidia';
import { default as ollama } from './ollama';
import { default as openai } from './openai';
import { default as openrouter } from './openrouter';
Expand Down Expand Up @@ -84,6 +85,7 @@ export const LOBE_DEFAULT_MODEL_LIST = buildDefaultModelList({
mistral,
moonshot,
novita,
nvidia,
ollama,
openai,
openrouter,
Expand Down Expand Up @@ -126,6 +128,7 @@ export { default as minimax } from './minimax';
export { default as mistral } from './mistral';
export { default as moonshot } from './moonshot';
export { default as novita } from './novita';
export { default as nvidia } from './nvidia';
export { default as ollama } from './ollama';
export { default as openai } from './openai';
export { default as openrouter } from './openrouter';
Expand Down
155 changes: 155 additions & 0 deletions src/config/aiModels/nvidia.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import { AIChatModelCard } from '@/types/aiModel';

const nvidiaChatModels: AIChatModelCard[] = [
{
abilities: {
functionCall: true,
},
contextWindowTokens: 128_000,
description: '先进的 LLM,擅长推理、数学、常识和函数调用。',
displayName: 'Llama 3.3 70B Instruct',
enabled: true,
id: 'meta/llama-3.3-70b-instruct',
type: 'chat'
},
{
contextWindowTokens: 128_000,
description: '先进的最尖端小型语言模型,具备语言理解、卓越的推理能力和文本生成能力。',
displayName: 'Llama 3.2 1B Instruct',
id: 'meta/llama-3.2-1b-instruct',
type: 'chat'
},
{
contextWindowTokens: 128_000,
description: '先进的最尖端小型语言模型,具备语言理解、卓越的推理能力和文本生成能力。',
displayName: 'Llama 3.2 3B Instruct',
id: 'meta/llama-3.2-3b-instruct',
type: 'chat'
},
{
abilities: {
vision: true,
},
contextWindowTokens: 128_000,
description: '尖端的视觉-语言模型,擅长从图像中进行高质量推理。',
displayName: 'Llama 3.2 11B Vision Instruct',
enabled: true,
id: 'meta/llama-3.2-11b-vision-instruct',
type: 'chat'
},
{
abilities: {
vision: true,
},
contextWindowTokens: 128_000,
description: '尖端的视觉-语言模型,擅长从图像中进行高质量推理。',
displayName: 'Llama 3.2 90B Vision Instruct',
id: 'meta/llama-3.2-90b-vision-instruct',
type: 'chat'
},
{
abilities: {
functionCall: true,
},
contextWindowTokens: 128_000,
description: '先进的最尖端模型,具备语言理解、卓越的推理能力和文本生成能力。',
displayName: 'Llama 3.1 8B Instruct',
id: 'meta/llama-3.1-8b-instruct',
type: 'chat'
},
{
abilities: {
functionCall: true,
},
contextWindowTokens: 128_000,
description: '赋能复杂对话,具备卓越的上下文理解、推理能力和文本生成能力。',
displayName: 'Llama 3.1 70B Instruct',
id: 'meta/llama-3.1-70b-instruct',
type: 'chat'
},
{
abilities: {
functionCall: true,
},
contextWindowTokens: 128_000,
description: '高级 LLM,支持合成数据生成、知识蒸馏和推理,适用于聊天机器人、编程和特定领域任务。',
displayName: 'Llama 3.1 405B Instruct',
id: 'meta/llama-3.1-405b-instruct',
type: 'chat'
},
{
contextWindowTokens: 32_768,
description: '独特的语言模型,提供无与伦比的准确性和效率表现。',
displayName: 'Llama 3.1 Nemotron 51B Instruct',
id: 'nvidia/llama-3.1-nemotron-51b-instruct',
type: 'chat'
},
{
contextWindowTokens: 32_768,
description: 'Llama-3.1-Nemotron-70B-Instruct 是 NVIDIA 定制的大型语言模型,旨在提高 LLM 生成的响应的帮助性。',
displayName: 'Llama 3.1 Nemotron 70B Instruct',
id: 'nvidia/llama-3.1-nemotron-70b-instruct',
type: 'chat'
},
{
contextWindowTokens: 8192,
description: '面向边缘应用的高级小型语言生成 AI 模型。',
displayName: 'Gemma 2 2B Instruct',
id: 'google/gemma-2-2b-it',
type: 'chat'
},
{
contextWindowTokens: 8192,
description: '尖端文本生成模型,擅长文本理解、转换和代码生成。',
displayName: 'Gemma 2 9B Instruct',
id: 'google/gemma-2-9b-it',
type: 'chat'
},
{
contextWindowTokens: 8192,
description: '尖端文本生成模型,擅长文本理解、转换和代码生成。',
displayName: 'Gemma 2 27B Instruct',
id: 'google/gemma-2-27b-it',
type: 'chat'
},
{
abilities: {
reasoning: true,
},
contextWindowTokens: 128_000,
description: '最先进的高效 LLM,擅长推理、数学和编程。',
displayName: 'DeepSeek R1',
enabled: true,
id: 'deepseek-ai/deepseek-r1',
type: 'chat'
},
{
abilities: {
functionCall: true,
},
contextWindowTokens: 32_768,
description: '面向中文和英文的 LLM,针对语言、编程、数学、推理等领域。',
displayName: 'Qwen2.5 7B Instruct',
enabled: true,
id: 'qwen/qwen2.5-7b-instruct',
type: 'chat'
},
{
contextWindowTokens: 32_768,
description: '强大的中型代码模型,支持 32K 上下文长度,擅长多语言编程。',
displayName: 'Qwen2.5 Coder 7B Instruct',
id: 'qwen/qwen2.5-coder-7b-instruct',
type: 'chat'
},
{
contextWindowTokens: 32_768,
description: '高级 LLM,支持代码生成、推理和修复,涵盖主流编程语言。',
displayName: 'Qwen2.5 Coder 32B Instruct',
id: 'qwen/qwen2.5-coder-32b-instruct',
type: 'chat'
},
]

export const allModels = [...nvidiaChatModels];

export default allModels;
6 changes: 6 additions & 0 deletions src/config/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ export const getLLMConfig = () => {
ENABLED_NOVITA: z.boolean(),
NOVITA_API_KEY: z.string().optional(),

ENABLED_NVIDIA: z.boolean(),
NVIDIA_API_KEY: z.string().optional(),

ENABLED_BAICHUAN: z.boolean(),
BAICHUAN_API_KEY: z.string().optional(),

Expand Down Expand Up @@ -202,6 +205,9 @@ export const getLLMConfig = () => {
ENABLED_NOVITA: !!process.env.NOVITA_API_KEY,
NOVITA_API_KEY: process.env.NOVITA_API_KEY,

ENABLED_NVIDIA: !!process.env.NVIDIA_API_KEY,
NVIDIA_API_KEY: process.env.NVIDIA_API_KEY,

ENABLED_BAICHUAN: !!process.env.BAICHUAN_API_KEY,
BAICHUAN_API_KEY: process.env.BAICHUAN_API_KEY,

Expand Down
4 changes: 4 additions & 0 deletions src/config/modelProviders/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import MinimaxProvider from './minimax';
import MistralProvider from './mistral';
import MoonshotProvider from './moonshot';
import NovitaProvider from './novita';
import NvidiaProvider from './nvidia';
import OllamaProvider from './ollama';
import OpenAIProvider from './openai';
import OpenRouterProvider from './openrouter';
Expand Down Expand Up @@ -67,6 +68,7 @@ export const LOBE_DEFAULT_MODEL_LIST: ChatModelCard[] = [
ZeroOneProvider.chatModels,
StepfunProvider.chatModels,
NovitaProvider.chatModels,
NvidiaProvider.chatModels,
BaichuanProvider.chatModels,
TaichuProvider.chatModels,
CloudflareProvider.chatModels,
Expand Down Expand Up @@ -96,6 +98,7 @@ export const DEFAULT_MODEL_PROVIDER_LIST = [
CloudflareProvider,
GithubProvider,
NovitaProvider,
NvidiaProvider,
TogetherAIProvider,
FireworksAIProvider,
GroqProvider,
Expand Down Expand Up @@ -158,6 +161,7 @@ export { default as MinimaxProviderCard } from './minimax';
export { default as MistralProviderCard } from './mistral';
export { default as MoonshotProviderCard } from './moonshot';
export { default as NovitaProviderCard } from './novita';
export { default as NvidiaProviderCard } from './nvidia';
export { default as OllamaProviderCard } from './ollama';
export { default as OpenAIProviderCard } from './openai';
export { default as OpenRouterProviderCard } from './openrouter';
Expand Down
21 changes: 21 additions & 0 deletions src/config/modelProviders/nvidia.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { ModelProviderCard } from '@/types/llm';

const Nvidia: ModelProviderCard = {
chatModels: [],
checkModel: 'meta/llama-3.2-1b-instruct',
description: 'NVIDIA NIM™ 提供容器,可用于自托管 GPU 加速推理微服务,支持在云端、数据中心、RTX™ AI 个人电脑和工作站上部署预训练和自定义 AI 模型。',
id: 'nvidia',
modelList: { showModelFetcher: true },
modelsUrl: 'https://build.nvidia.com/models',
name: 'Nvidia',
settings: {
proxyUrl: {
placeholder: 'https://integrate.api.nvidia.com/v1',
},
sdkType: 'openai',
showModelFetcher: true,
},
url: 'https://build.nvidia.com',
};

export default Nvidia;
7 changes: 7 additions & 0 deletions src/libs/agent-runtime/AgentRuntime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import { LobeMinimaxAI } from './minimax';
import { LobeMistralAI } from './mistral';
import { LobeMoonshotAI } from './moonshot';
import { LobeNovitaAI } from './novita';
import { LobeNvidiaAI } from './nvidia';
import { LobeOllamaAI } from './ollama';
import { LobeOpenAI } from './openai';
import { LobeOpenRouterAI } from './openrouter';
Expand Down Expand Up @@ -157,6 +158,7 @@ class AgentRuntime {
mistral: Partial<ClientOptions>;
moonshot: Partial<ClientOptions>;
novita: Partial<ClientOptions>;
nvidia: Partial<ClientOptions>;
ollama: Partial<ClientOptions>;
openai: Partial<ClientOptions>;
openrouter: Partial<ClientOptions>;
Expand Down Expand Up @@ -300,6 +302,11 @@ class AgentRuntime {
break;
}

case ModelProvider.Nvidia: {
runtimeModel = new LobeNvidiaAI(params.nvidia);
break;
}

case ModelProvider.Baichuan: {
runtimeModel = new LobeBaichuanAI(params.baichuan ?? {});
break;
Expand Down
44 changes: 44 additions & 0 deletions src/libs/agent-runtime/nvidia/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import { ModelProvider } from '../types';
import { LobeOpenAICompatibleFactory } from '../utils/openaiCompatibleFactory';

import type { ChatModelCard } from '@/types/llm';

export interface NvidiaModelCard {
id: string;
}

export const LobeNvidiaAI = LobeOpenAICompatibleFactory({
baseURL: 'https://integrate.api.nvidia.com/v1',
debug: {
chatCompletion: () => process.env.DEBUG_NVIDIA_CHAT_COMPLETION === '1',
},
models: async ({ client }) => {
const { LOBE_DEFAULT_MODEL_LIST } = await import('@/config/aiModels');

const modelsPage = await client.models.list() as any;
const modelList: NvidiaModelCard[] = modelsPage.data;

return modelList
.map((model) => {
const knownModel = LOBE_DEFAULT_MODEL_LIST.find((m) => model.id.toLowerCase() === m.id.toLowerCase());

return {
contextWindowTokens: knownModel?.contextWindowTokens ?? undefined,
displayName: knownModel?.displayName ?? undefined,
enabled: knownModel?.enabled || false,
functionCall:
knownModel?.abilities?.functionCall
|| false,
id: model.id,
reasoning:
knownModel?.abilities?.reasoning
|| false,
vision:
knownModel?.abilities?.vision
|| false,
};
})
.filter(Boolean) as ChatModelCard[];
},
provider: ModelProvider.Nvidia,
});
1 change: 1 addition & 0 deletions src/libs/agent-runtime/types/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export enum ModelProvider {
Mistral = 'mistral',
Moonshot = 'moonshot',
Novita = 'novita',
Nvidia = 'nvidia',
Ollama = 'ollama',
OpenAI = 'openai',
OpenRouter = 'openrouter',
Expand Down
1 change: 1 addition & 0 deletions src/types/user/settings/keyVaults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ export interface UserKeyVaults {
mistral?: OpenAICompatibleKeyVault;
moonshot?: OpenAICompatibleKeyVault;
novita?: OpenAICompatibleKeyVault;
nvidia?: OpenAICompatibleKeyVault;
ollama?: OpenAICompatibleKeyVault;
openai?: OpenAICompatibleKeyVault;
openrouter?: OpenAICompatibleKeyVault;
Expand Down

0 comments on commit ab796a7

Please sign in to comment.