diff --git a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx index 41b5c3e754..d04f7cb622 100644 --- a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx +++ b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx @@ -169,18 +169,22 @@ export default function OllamaLLMOptions({ settings }) { className="tooltip !text-xs max-w-xs" >

- Note: Only change this setting if you - understand its implications on performance and resource usage. + Note: Be careful with the Maximum mode. It may + increase resource usage significantly.


Base: Ollama automatically limits the context - to 2048 tokens, reducing VRAM usage. Suitable for most users. + to 2048 tokens, keeping resources usage low while maintaining + good performance. Suitable for most users and models.


Maximum: Uses the full context window (up to - Max Tokens). May increase VRAM usage significantly. + Max Tokens). Will result in increased resource usage but allows + for larger context conversations.
+
+ This is not recommended for most users.

diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js index b62c80929d..3ba8ad1d2d 100644 --- a/server/utils/AiProviders/ollama/index.js +++ b/server/utils/AiProviders/ollama/index.js @@ -29,6 +29,13 @@ class OllamaAILLM { this.client = new Ollama({ host: this.basePath }); this.embedder = embedder ?? new NativeEmbedder(); this.defaultTemp = 0.7; + this.#log( + `OllamaAILLM initialized with\nmodel: ${this.model}\nperf: ${this.performanceMode}\nn_ctx: ${this.promptWindowLimit()}` + ); + } + + #log(text, ...args) { + console.log(`\x1b[32m[Ollama]\x1b[0m ${text}`, ...args); } #appendContext(contextTexts = []) { @@ -131,11 +138,11 @@ class OllamaAILLM { keep_alive: this.keepAlive, options: { temperature, - useMLock: true, + use_mlock: true, // There are currently only two performance settings so if its not "base" - its max context. ...(this.performanceMode === "base" ? {} - : { numCtx: this.promptWindowLimit() }), + : { num_ctx: this.promptWindowLimit() }), }, }) .then((res) => { @@ -179,11 +186,11 @@ class OllamaAILLM { keep_alive: this.keepAlive, options: { temperature, - useMLock: true, + use_mlock: false, // There are currently only two performance settings so if its not "base" - its max context. ...(this.performanceMode === "base" ? {} - : { numCtx: this.promptWindowLimit() }), + : { num_ctx: this.promptWindowLimit() }), }, }), messages,