diff --git a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
index 41b5c3e754..d04f7cb622 100644
--- a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
@@ -169,18 +169,22 @@ export default function OllamaLLMOptions({ settings }) {
className="tooltip !text-xs max-w-xs"
>
- Note: Only change this setting if you
- understand its implications on performance and resource usage.
+ Note: Be careful with the Maximum mode. It may
+ increase resource usage significantly.
Base: Ollama automatically limits the context
- to 2048 tokens, reducing VRAM usage. Suitable for most users.
+ to 2048 tokens, keeping resources usage low while maintaining
+ good performance. Suitable for most users and models.
Maximum: Uses the full context window (up to
- Max Tokens). May increase VRAM usage significantly.
+ Max Tokens). Will result in increased resource usage but allows
+ for larger context conversations.
+
+ This is not recommended for most users.
diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js
index b62c80929d..3ba8ad1d2d 100644
--- a/server/utils/AiProviders/ollama/index.js
+++ b/server/utils/AiProviders/ollama/index.js
@@ -29,6 +29,13 @@ class OllamaAILLM {
this.client = new Ollama({ host: this.basePath });
this.embedder = embedder ?? new NativeEmbedder();
this.defaultTemp = 0.7;
+ this.#log(
+ `OllamaAILLM initialized with\nmodel: ${this.model}\nperf: ${this.performanceMode}\nn_ctx: ${this.promptWindowLimit()}`
+ );
+ }
+
+ #log(text, ...args) {
+ console.log(`\x1b[32m[Ollama]\x1b[0m ${text}`, ...args);
}
#appendContext(contextTexts = []) {
@@ -131,11 +138,11 @@ class OllamaAILLM {
keep_alive: this.keepAlive,
options: {
temperature,
- useMLock: true,
+ use_mlock: true,
// There are currently only two performance settings so if its not "base" - its max context.
...(this.performanceMode === "base"
? {}
- : { numCtx: this.promptWindowLimit() }),
+ : { num_ctx: this.promptWindowLimit() }),
},
})
.then((res) => {
@@ -179,11 +186,11 @@ class OllamaAILLM {
keep_alive: this.keepAlive,
options: {
temperature,
- useMLock: true,
+ use_mlock: false,
// There are currently only two performance settings so if its not "base" - its max context.
...(this.performanceMode === "base"
? {}
- : { numCtx: this.promptWindowLimit() }),
+ : { num_ctx: this.promptWindowLimit() }),
},
}),
messages,