diff --git a/x-pack/platform/packages/shared/ai-infra/inference-common/index.ts b/x-pack/platform/packages/shared/ai-infra/inference-common/index.ts
index 51532a108f2e5..b76625fe37d91 100644
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/index.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/index.ts
@@ -77,6 +77,7 @@ export {
   type ToolNamesOf,
   type ToolsOfChoice,
   type ToolCallArguments,
+  type ToolCallbackResult,
 } from './src/chat_complete';
 
 export type { BoundInferenceClient, InferenceClient } from './src/inference_client';
diff --git a/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts b/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts
index 2de72ee121654..566c7faae5003 100644
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts
@@ -49,6 +49,7 @@ export {
   type ToolChoice,
   type CustomToolChoice,
   type ToolCallArguments,
+  type ToolCallbackResult,
 } from './tools';
 
 export type {
@@ -58,6 +59,7 @@ export type {
   ToolCallbacksOfToolOptions,
   ToolNamesOf,
   ToolsOfChoice,
+  ToolCallsOfToolOptions,
 } from './tools_of';
 
 export type { ChatCompleteMetadata, ConnectorTelemetryMetadata } from './metadata';
diff --git a/x-pack/platform/packages/shared/ai-infra/inference-common/src/prompt/create_prompt.ts b/x-pack/platform/packages/shared/ai-infra/inference-common/src/prompt/create_prompt.ts
index bbc4365d3892e..15e4bec51f2ea 100644
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/prompt/create_prompt.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/prompt/create_prompt.ts
@@ -10,7 +10,7 @@ import type { Prompt, PromptFactory, PromptVersion } from './types';
 
 export function createPrompt<TInput>(init: {
   name: string;
-  description: string;
+  description?: string;
   input: z.Schema<TInput>;
 }): PromptFactory<TInput, []> {
   function inner<TVersions extends PromptVersion[], TNextVersions extends PromptVersion[]>(
diff --git a/x-pack/platform/packages/shared/ai-infra/inference-common/src/prompt/types.ts b/x-pack/platform/packages/shared/ai-infra/inference-common/src/prompt/types.ts
index 9e55562ed3544..c9a348bc8fd1c 100644
--- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/prompt/types.ts
+++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/prompt/types.ts
@@ -98,7 +98,7 @@ export interface Prompt<TInput = any, TPromptVersions extends PromptVersion[] =
   /**
    * A human-readable description of what the prompt does. Used for evaluations.
    */
-  description: string;
+  description?: string;
   /**
    * A zod schema that will validate and transform the input variables for a prompt.
    */
@@ -130,8 +130,6 @@ export type ToolOptionsOfPrompt<TPrompt extends Prompt> = TPrompt['versions'] ex
   infer TPromptVersion
 >
   ? TPromptVersion extends PromptVersion
-    ? TPromptVersion extends { tools?: ToolDefinitions }
-      ? Pick<TPromptVersion, 'tools'>
-      : {}
+    ? Pick<TPromptVersion, 'tools'>
     : {}
   : {};
diff --git a/x-pack/platform/packages/shared/kbn-ai-tools/src/tools/esql/index.ts b/x-pack/platform/packages/shared/kbn-ai-tools/src/tools/esql/index.ts
index e7b7350f7a26d..16266f1f94c11 100644
--- a/x-pack/platform/packages/shared/kbn-ai-tools/src/tools/esql/index.ts
+++ b/x-pack/platform/packages/shared/kbn-ai-tools/src/tools/esql/index.ts
@@ -20,6 +20,7 @@ import { EsqlDocumentBase, runAndValidateEsqlQuery } from '@kbn/inference-plugin
 import { executeAsReasoningAgent } from '@kbn/inference-prompt-utils';
 import { omit, once } from 'lodash';
 import moment from 'moment';
+import { indexPatternToCcs } from '@kbn/es-query';
 import { describeDataset, sortAndTruncateAnalyzedFields } from '../../..';
 import { EsqlPrompt } from './prompt';
 
@@ -91,7 +92,11 @@ export async function executeAsEsqlAgent({
         return {
           response: await esClient.indices
             .resolveIndex({
-              name: toolCall.function.arguments.name.flatMap((index) => index.split(',')),
+              name: indexPatternToCcs(
+                toolCall.function.arguments.name.length
+                  ? toolCall.function.arguments.name.flatMap((index) => index.split(','))
+                  : '*'
+              ),
               allow_no_indices: true,
             })
             .then((response) => {
diff --git a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/index.ts b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/index.ts
index 19059a861abfb..8aa6c8650fa8f 100644
--- a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/index.ts
+++ b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/index.ts
@@ -6,3 +6,7 @@
  */
 
 export { executeAsReasoningAgent } from './src/flows/reasoning/execute_as_reasoning_agent';
+export type {
+  ReasoningPromptResponse,
+  ReasoningPromptResponseOf,
+} from './src/flows/reasoning/types';
diff --git a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/prompts/reasoning/reasoning_meta_prompt.text b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/prompts/reasoning/reasoning_meta_prompt.text
index 544119dd3be64..48a19df440856 100644
--- a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/prompts/reasoning/reasoning_meta_prompt.text
+++ b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/prompts/reasoning/reasoning_meta_prompt.text
@@ -1,4 +1,4 @@
-Rewrite the system prompt below with the task description in mind, in a natural way. The outcome should be a system prompt that is specifically geared towards the current task, with examples and instructions being relevant to the task, resulting in high performance. Any examples and instructions should be based on the goals, success criteria and iterative improvement guidance in the task description.
+Rewrite the system prompt below with the task description in mind, in a natural way. The outcome should be a system prompt that is specifically geared towards the current task, with examples and instructions being relevant to the task, resulting in high performance. Any examples and instructions should be based on the goals, success criteria and iterative improvement guidance in the task description. Generate examples of A) high quality reasoning monologues that actively reflect on the user's question and tool call results, B) definitive output responses that exemplify how the gathering phase should accurately and thoroughly capture the previous steps for the user, who does not have access to the previous steps, only the definitive output.
 
 When integrating the task-specific things into the workflow description, add the following sections:
 
@@ -7,6 +7,7 @@ When integrating the task-specific things into the workflow description, add the
 - Tool call examples
 - Iterative refinement strategies 
 - Error => repair examples
+- 5 Q/A sets that cover various types of questions. Only output the Q/A pairs, not the internal reasoning.
 - Tips & hints
 
 You must include ALL task instructions, either via examples (preferred) or in other places.
diff --git a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/prompts/reasoning/reasoning_system_prompt.text b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/prompts/reasoning/reasoning_system_prompt.text
index 3fa98f07f1746..ceb9815501b75 100644
--- a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/prompts/reasoning/reasoning_system_prompt.text
+++ b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/prompts/reasoning/reasoning_system_prompt.text
@@ -1,10 +1,16 @@
 ## 1 Purpose
 
-You are an **expert reasoning agent**. Your task is to answer the user’s question **accurately and safely** by
+You are an **expert reasoning agent**. Your task is to answer the user’s question **accurately and safely** by:
 
 1. **Gathering context** with task‑specific tools.
 2. **Thinking in the clear** via a structured **Reasoning Monologue** wrapped in sentinel tags after *every* tool response.
-3. Repeating Steps 1‑2 until reflection says you have enough to answer, then producing one final answer.
+3. Repeating Steps 1–2 until reflection says you have enough to answer, then calling `complete()` and producing one final answer.
+
+> **Visibility & user experience**
+>
+> - The **user only sees the Definitive Output** that follows a successful `complete()` call. All tool calls and Reasoning Monologues are invisible to the user.
+>
+> - Treat gathered context as if it were part of your background knowledge at answer time. Write the final response naturally, and you *may* mention actions you took (e.g., searches, code runs) as narrative, but **do not** expose internal tags or the monologue format.
 
 ---
 
@@ -18,95 +24,67 @@ You are an **expert reasoning agent**. Your task is to answer the user’s quest
 
 ---
 
-## 3 Core Loop  Gather ➜ Reason ➜ Act/Complete
+## 3 Core Loop — Act/Gather ➜ **Reason** ➜ Decide (continue or complete)
 
 ```
 <Task tool produces result>
       ↓  (must call reason())
-reason()  →  Monologue (inside sentinels)
+Reasoning Monologue (inside sentinels)
       ↓  (control returns to orchestrator)
-<Next turn> →  (Task tool **or** complete())
+<Next turn> →  (Action Call **or** COMPLETE)
 ```
 
-### Monologue Format — **Simple Tag Pair**
+### Monologue Format — **Simple Tag Pair**
 
-```text
-{"tool":"reason","arguments":{}}
-# (orchestrator now returns the reason() tool response containing `stepsLeft = N`)
+```
 <<<BEGIN_INTERNAL>>>
 [stepsLeft = N]
-PLAN>     (optional high‑level roadmap – only on first reasoning turn or when re‑planning)
-GATHER>   (which tool you will call next and why)
-REFLECT>  (what the last tool taught you; did it solve the sub‑goal?)
-continue = yes/no
+GOAL> (rephrase the user’s question and state success criteria, **scoped by your current capabilities/tools/constraints**)
+REFLECT> (what the last action/result taught you; are assumptions still valid? what changed?)
+PLAN> (describe in natural language what you will do next and why—do not specify the exact payload of any tool call. If you are ready to answer, state that explicitly, and end your monologue.)
 <<<END_INTERNAL>>>
 ```
 
-* If `continue = yes` → the very next assistant turn **must** be a single JSON task‑tool call.
-* If `continue = no` → the very next assistant turn **must** be `{"tool":"complete","arguments":{}}`.
-
----
-
-## 4 Rules
-
-1. **Strict alternation** – Two task‑tool calls may never occur back‑to‑back; a `reason()` turn must sit in between.
-2. **Mandatory monologue** – After *every* task‑tool response, you must author a monologue wrapped in `<<<BEGIN_INTERNAL>>> … <<<END_INTERNAL>>>`.
-3. **No leaks before complete()** – Do *not* reveal any part of the answer until the orchestrator has acknowledged `complete()` and invited Definitive Output.
-4. **Structured tool calls only** – When calling a tool, the assistant message must contain **only** the JSON invocation.
-5. **Budget awareness** – Echo `[stepsLeft = N]` at the top of every monologue.
-6. **After complete()** – Immediately produce the **Definitive Output**: a single, comprehensive answer for the user, omitting all internal tags and jargon.
+**Decision — example lines:**
 
----
-
-## 5 Orchestrator Enforcement (reference)
+```
+continue = yes    # proceed with another step using the tool API (no call details here)
+```
 
-* Reject any tool call that follows another tool call without an intervening `reason()`.
-* Reject `complete()` unless the latest monologue ends with `continue = no`.
-* If `stepsLeft` reaches 0, the orchestrator auto‑inserts `complete()`.
-* The orchestrator strips everything between `<<<BEGIN_INTERNAL>>>` and `<<<END_INTERNAL>>>` before exposing messages to the user.
+**Completion — example line:**
 
----
+```
+continue = no     # stop gathering; produce Definitive Output next
+```
 
-## 6 Quick Reference Templates
+#### Initial reflection & planning
 
-\### After a tool result
+For the **first** monologue, perform a **comprehensive intent assessment**:
 
-```text
-{"tool":"reason","arguments":{}}
-# (orchestrator now returns the reason() tool response containing `stepsLeft = N`)
-<<<BEGIN_INTERNAL>>>
-[stepsLeft = 7]
-PLAN> verify GDP stats
-GATHER> call web.search for “World Bank GDP 2025”
-REFLECT> last search outdated; need newer data
-continue = yes
-<<<END_INTERNAL>>>
-```
+* **GOAL (capability‑aware):** Rephrase the user’s question and define success criteria, explicitly noting which of your capabilities/tools/constraints apply.
+* **Capability scope:** List relevant capabilities and limitations (available tools, connectivity, data access, sandbox, safety constraints, latency/budget). Note how these shape feasible outcomes.
+* **Assumptions:** List key assumptions and how you will validate them.
+* **Outcomes:** Enumerate plausible user intents/outcomes and decide which to pursue first and why, pruning options that exceed capabilities or constraints.
+* **Clarifications:** Note any clarifying questions and how you’ll resolve them (via tools or follow‑ups).
 
-\### Gathering again
+Conclude with a concrete plan for the next step.
 
-```text
-{"tool":"web.search","arguments":{"q":"World Bank GDP 2025","recency":365}}
-```
+---
 
-\### Finishing
+## 4 Rules
 
-```text
-{"tool":"reason","arguments":{}}
-# (orchestrator now returns the reason() tool response containing `stepsLeft = N`)
-<<<BEGIN_INTERNAL>>>
-[stepsLeft = 2]
-REFLECT> data sufficient; no further tools needed.
-continue = no
-<<<END_INTERNAL>>>
-```
+1. **Strict alternation** – Never follow an Action Call with another Action Call; a Reasoning Monologue must sit in between.
+2. **Mandatory monologue** – After *every* tool result, you must write a Reasoning Monologue inside `<<<BEGIN_INTERNAL>>> … <<<END_INTERNAL>>>`.
+3. **Goal anchoring at every step** – Begin each monologue with a **GOAL>** line that restates the question and success criteria **in the context of your capabilities**. If your plan diverges, note the drift and realign.
 
-```text
-{"tool":"complete","arguments":{}}
-```
+   * Re‑evaluate assumptions, intent, and **capability fit** each step; update them as evidence arrives.
+   * Prefer actions that move directly toward the stated goal; defer tangents.
+4. **No leaks before completion** – **You cannot output any text outside of the sentinel tags until you issue ****`>> COMPLETE`**** and the orchestrator invites Definitive Output.** Any turn that is not inside the sentinels must be either an **Action Call** (`>> ACTION: …`) or `>> COMPLETE` — never free‑form prose.
+5. **Budget awareness** – Echo `[stepsLeft = N]` at the top of every monologue.
+6. **After completion** – Immediately produce the **Definitive Output**: a single, comprehensive answer for the user, omitting all internal tags and jargon. This is the **only** content the user will see.
 
 ---
 
-## 7 Definitive Output Stage
+## 5 Definitive Output Stage
 
-Once the orchestrator acknowledges `complete()`, write the final answer for the task caller. Summarise or cite relevant tool outputs, but do **not** mention internal tags, stepsLeft, or other private reasoning.
+Once the orchestrator acknowledges complete(), write the final answer for the task caller. Summarize, link, or cite relevant tool outputs as appropriate, but **do not** mention internal tags, `stepsLeft`, or other private structure. Present a natural, fluent response that integrates the gathered context as if it were prior knowledge, optionally noting the actions you took (e.g., “I searched…”, “I ran…”) without exposing internal mechanics.
diff --git a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/create_reason_tool_call.ts b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/create_reason_tool_call.ts
index 501601a6bcf81..b3138a0241926 100644
--- a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/create_reason_tool_call.ts
+++ b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/create_reason_tool_call.ts
@@ -26,14 +26,18 @@ export function createReasonToolCall(): [AssistantMessage, ToolMessage] {
         },
       ],
     },
-    {
-      role: MessageRole.Tool,
-      toolCallId,
-      name: 'reason',
-      response: {
-        acknowledged: true,
-        instructions: REASON_INSTRUCTIONS,
-      },
-    },
+    createReasonToolCallResponse(toolCallId),
   ];
 }
+
+export function createReasonToolCallResponse(toolCallId: string): ToolMessage {
+  return {
+    role: MessageRole.Tool,
+    toolCallId,
+    name: 'reason',
+    response: {
+      acknowledged: true,
+      instructions: REASON_INSTRUCTIONS,
+    },
+  };
+}
diff --git a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/execute_as_reasoning_agent.test.ts b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/execute_as_reasoning_agent.test.ts
new file mode 100644
index 0000000000000..d7b8c131f0cc1
--- /dev/null
+++ b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/execute_as_reasoning_agent.test.ts
@@ -0,0 +1,340 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { BoundInferenceClient, Prompt, ToolMessage } from '@kbn/inference-common';
+import { MessageRole } from '@kbn/inference-common';
+import { executeAsReasoningAgent } from './execute_as_reasoning_agent';
+import { z } from '@kbn/zod';
+import { END_INTERNAL_REASONING_MARKER } from './markers';
+
+function makePrompt() {
+  return {
+    name: 'test_prompt',
+    input: z.object({
+      foo: z.string(),
+    }),
+    versions: [
+      {
+        template: {
+          static: {
+            content: 'do it',
+          },
+        },
+        tools: {
+          complete: {
+            description: 'complete task',
+            schema: { type: 'object', properties: {} },
+          },
+          fetch_data: {
+            description: 'fetch data',
+            schema: { type: 'object', properties: {} },
+          },
+        },
+      },
+    ],
+  } satisfies Prompt;
+}
+
+describe('executeAsReasoningAgent', () => {
+  test('returns final tool call when finalToolChoice is provided', async () => {
+    const prompt = makePrompt();
+    const inferenceClient = {
+      prompt: jest.fn().mockResolvedValue({
+        content: 'done',
+        toolCalls: [
+          {
+            type: 'function',
+            function: { name: 'complete', arguments: {} },
+            toolCallId: '1',
+          },
+        ],
+        tokens: 1,
+      }),
+    } as Partial<jest.Mocked<BoundInferenceClient>> as jest.Mocked<BoundInferenceClient>;
+
+    const result = await executeAsReasoningAgent({
+      inferenceClient,
+      prompt,
+      maxSteps: 1,
+      toolCallbacks: {
+        fetch_data: jest.fn(),
+        complete: jest.fn(),
+      },
+      input: {
+        foo: '',
+      },
+      finalToolChoice: { type: 'function', function: 'complete' },
+    });
+
+    expect(inferenceClient.prompt).toHaveBeenCalled();
+    expect(result.toolCalls).toHaveLength(1);
+    expect(result.toolCalls?.[0].function.name).toBe('complete');
+  });
+
+  test('throws when planning tool response includes additional tool calls', async () => {
+    const prompt = makePrompt();
+    const inferenceClient = {
+      prompt: jest.fn().mockResolvedValue({
+        content: 'thinking',
+        toolCalls: [
+          { type: 'function', function: { name: 'reason', arguments: {} }, toolCallId: 'a' },
+          { type: 'function', function: { name: 'fetch_data', arguments: {} }, toolCallId: 'b' },
+        ],
+        tokens: 1,
+      }),
+    } as Partial<jest.Mocked<BoundInferenceClient>> as jest.Mocked<BoundInferenceClient>;
+
+    await expect(
+      executeAsReasoningAgent({
+        inferenceClient,
+        prompt,
+        maxSteps: 1,
+        toolCallbacks: { fetch_data: jest.fn(), complete: jest.fn() },
+        input: {
+          foo: '',
+        },
+      })
+    ).rejects.toThrow('only a single tool call is allowed');
+  });
+
+  test('executes task tool callbacks and injects stepsLeft', async () => {
+    const prompt = makePrompt();
+    const inferenceClient = {
+      prompt: jest
+        .fn()
+        .mockResolvedValueOnce({
+          content: 'call tool',
+          toolCalls: [
+            { type: 'function', function: { name: 'fetch_data', arguments: {} }, toolCallId: 'x' },
+          ],
+          tokens: 1,
+        })
+        .mockResolvedValueOnce({ content: 'final', toolCalls: [], tokens: 1 }),
+    } as Partial<jest.Mocked<BoundInferenceClient>> as jest.Mocked<BoundInferenceClient>;
+
+    const fetchData = jest.fn().mockResolvedValue({ response: { result: 'ok' } });
+
+    await executeAsReasoningAgent({
+      inferenceClient,
+      prompt,
+      maxSteps: 1,
+      toolCallbacks: { fetch_data: fetchData, complete: jest.fn() },
+      input: { foo: '' },
+    });
+
+    expect(fetchData).toHaveBeenCalledTimes(1);
+    expect(inferenceClient.prompt).toHaveBeenCalledTimes(2);
+    const secondArgs = inferenceClient.prompt.mock.calls[1][0];
+    const prevMessages = secondArgs.prevMessages!;
+    const toolMsg = prevMessages.find((m): m is ToolMessage => m.role === MessageRole.Tool);
+
+    expect(toolMsg?.response).toEqual({ result: 'ok', stepsLeft: 1 });
+  });
+
+  test('completes next turn when content includes external part after END_INTERNAL marker', async () => {
+    const prompt = makePrompt();
+    const inferenceClient = {
+      prompt: jest
+        .fn()
+        .mockResolvedValueOnce({
+          content: `internal${END_INTERNAL_REASONING_MARKER}this should trigger completion on next turn because it is long enough to pass threshold of buffer characters`,
+          toolCalls: [],
+          tokens: 1,
+        })
+        .mockResolvedValueOnce({ content: 'final', toolCalls: [], tokens: 1 }),
+    } as Partial<jest.Mocked<BoundInferenceClient>> as jest.Mocked<BoundInferenceClient>;
+
+    await executeAsReasoningAgent({
+      inferenceClient,
+      prompt,
+      maxSteps: 2,
+      toolCallbacks: { fetch_data: jest.fn(), complete: jest.fn() },
+      input: { foo: '' },
+    });
+
+    expect(inferenceClient.prompt).toHaveBeenCalledTimes(2);
+  });
+
+  test('tool callback error is captured and injected into tool response', async () => {
+    const prompt = makePrompt();
+    const inferenceClient = {
+      prompt: jest
+        .fn()
+        .mockResolvedValueOnce({
+          content: 'call tool',
+          toolCalls: [
+            { type: 'function', function: { name: 'fetch_data', arguments: {} }, toolCallId: 'x' },
+          ],
+          tokens: 1,
+        })
+        .mockResolvedValueOnce({ content: 'final', toolCalls: [], tokens: 1 }),
+    } as Partial<jest.Mocked<BoundInferenceClient>> as jest.Mocked<BoundInferenceClient>;
+
+    const fetchData = jest.fn().mockRejectedValue(new Error('nope'));
+
+    await executeAsReasoningAgent({
+      inferenceClient,
+      prompt,
+      maxSteps: 1,
+      toolCallbacks: { fetch_data: fetchData, complete: jest.fn() },
+      input: { foo: '' },
+    });
+
+    expect(fetchData).toHaveBeenCalledTimes(1);
+    const secondArgs = inferenceClient.prompt.mock.calls[1][0];
+    const prevMessages = secondArgs.prevMessages!;
+    const toolMsg = prevMessages.find((m) => m.role === MessageRole.Tool);
+    expect(toolMsg).toEqual({
+      name: 'fetch_data',
+      response: {
+        error: expect.any(Object),
+        stepsLeft: 1,
+      },
+      role: 'tool',
+      toolCallId: 'x',
+    });
+  });
+
+  test('string tool callback result is mapped to response.result', async () => {
+    const prompt = makePrompt();
+    const inferenceClient = {
+      prompt: jest
+        .fn()
+        .mockResolvedValueOnce({
+          content: 'call tool',
+          toolCalls: [
+            { type: 'function', function: { name: 'fetch_data', arguments: {} }, toolCallId: 'x' },
+          ],
+          tokens: 1,
+        })
+        .mockResolvedValueOnce({ content: 'final', toolCalls: [], tokens: 1 }),
+    } as Partial<jest.Mocked<BoundInferenceClient>> as jest.Mocked<BoundInferenceClient>;
+
+    const fetchData = jest.fn().mockResolvedValue({ response: 'ok' });
+
+    await executeAsReasoningAgent({
+      inferenceClient,
+      prompt,
+      maxSteps: 1,
+      toolCallbacks: { fetch_data: fetchData, complete: jest.fn() },
+      input: { foo: '' },
+    });
+
+    const secondArgs = inferenceClient.prompt.mock.calls[1][0];
+    const prevMessages = secondArgs.prevMessages!;
+
+    const toolMsg = prevMessages.find((m): m is ToolMessage => m.role === MessageRole.Tool);
+
+    expect(toolMsg?.response).toEqual({ content: 'ok', stepsLeft: 1 });
+  });
+
+  test('planning tools merged when not completing, omitted when completing; toolChoice set on completing', async () => {
+    const prompt = makePrompt();
+    const inferenceClient = {
+      prompt: jest.fn().mockResolvedValue({ content: 'final', toolCalls: [], tokens: 1 }),
+    } as Partial<jest.Mocked<BoundInferenceClient>> as jest.Mocked<BoundInferenceClient>;
+
+    // First call: not completing, planning tools should be merged
+    await executeAsReasoningAgent({
+      inferenceClient,
+      prompt,
+      maxSteps: 1,
+      toolCallbacks: { fetch_data: jest.fn(), complete: jest.fn() },
+      input: { foo: '' },
+    });
+    const firstCall = inferenceClient.prompt.mock.calls[0][0];
+    const toolsNonCompleting = Object.keys(firstCall.prompt.versions[0].tools ?? {});
+    expect(toolsNonCompleting).toContain('reason');
+
+    // Second run: completing turn from the start
+    (inferenceClient.prompt as jest.Mock).mockClear();
+    await executeAsReasoningAgent({
+      inferenceClient,
+      prompt,
+      maxSteps: 0,
+      toolCallbacks: { fetch_data: jest.fn(), complete: jest.fn() },
+      input: { foo: '' },
+      finalToolChoice: { type: 'function', function: 'complete' },
+    });
+    const completingCall = (inferenceClient.prompt as jest.Mock).mock.calls[0][0];
+    const toolsCompleting = Object.keys(completingCall.prompt.versions[0].tools);
+    expect(toolsCompleting).not.toContain('reason');
+    expect(completingCall.toolChoice).toEqual({ type: 'function', function: 'complete' });
+  });
+
+  test('input is sanitized on completion (system tool calls removed)', async () => {
+    const prompt = makePrompt();
+    const inferenceClient = {
+      prompt: jest.fn().mockResolvedValue({ content: 'final', toolCalls: [], tokens: 1 }),
+    } as Partial<jest.Mocked<BoundInferenceClient>> as jest.Mocked<BoundInferenceClient>;
+
+    const res = await executeAsReasoningAgent({
+      inferenceClient,
+      prompt,
+      maxSteps: 0,
+      toolCallbacks: { fetch_data: jest.fn(), complete: jest.fn() },
+      input: { foo: '' },
+      finalToolChoice: { type: 'function', function: 'complete' },
+    });
+
+    // No tool messages with planning tool names in sanitized input
+    const hasPlanningTool = res.input.some(
+      (m) => m.role === MessageRole.Tool && m.name && ['reason', 'complete'].includes(m.name)
+    );
+    expect(hasPlanningTool).toBe(false);
+  });
+
+  test('earlier reason tool calls are pruned', async () => {
+    const prompt = makePrompt();
+    const inferenceClient = {
+      prompt: jest
+        .fn()
+        .mockResolvedValueOnce({
+          content: 'gathering-1',
+          toolCalls: [
+            { type: 'function', function: { name: 'fetch_data', arguments: {} }, toolCallId: 'f1' },
+          ],
+          tokens: 1,
+        })
+        .mockResolvedValueOnce({
+          content: 'gathering-2',
+          toolCalls: [
+            { type: 'function', function: { name: 'fetch_data', arguments: {} }, toolCallId: 'f2' },
+          ],
+          tokens: 1,
+        })
+        .mockResolvedValueOnce({ content: 'final', toolCalls: [], tokens: 1 }),
+    } as Partial<jest.Mocked<BoundInferenceClient>> as jest.Mocked<BoundInferenceClient>;
+
+    await executeAsReasoningAgent({
+      inferenceClient,
+      prompt,
+      maxSteps: 2,
+      toolCallbacks: {
+        fetch_data: jest.fn().mockResolvedValue({ response: 'ok' }),
+        complete: jest.fn(),
+      },
+      input: { foo: '' },
+    });
+
+    const thirdArgs = inferenceClient.prompt.mock.calls[2][0];
+    const prevMessages = thirdArgs.prevMessages!;
+
+    const fetchDataToolIds = prevMessages
+      .filter((m): m is ToolMessage => m.role === MessageRole.Tool && m.name === 'fetch_data')
+      .map((m) => m.toolCallId);
+
+    const reasonToolCalls = prevMessages.filter(
+      (message): message is ToolMessage =>
+        message.role === MessageRole.Tool && message.name === 'reason'
+    );
+
+    expect(fetchDataToolIds).toEqual(['f1', 'f2']);
+
+    expect(reasonToolCalls.length).toEqual(0);
+  });
+});
diff --git a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/execute_as_reasoning_agent.ts b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/execute_as_reasoning_agent.ts
index 15afb412c5f7e..558abe92a894a 100644
--- a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/execute_as_reasoning_agent.ts
+++ b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/execute_as_reasoning_agent.ts
@@ -6,64 +6,40 @@
  */
 import type {
   AssistantMessage,
-  BoundInferenceClient,
   Message,
   PromptOptions,
-  PromptResponse,
   ToolCall,
-  ToolCallOfToolDefinitions,
   ToolCallback,
+  ToolCallbackResult,
   ToolCallbacksOfToolOptions,
+  ToolChoice,
   ToolMessage,
+  ToolNamesOf,
   ToolOptionsOfPrompt,
   UnboundPromptOptions,
 } from '@kbn/inference-common';
 import { MessageRole, type Prompt } from '@kbn/inference-common';
-import { withExecuteToolSpan } from '@kbn/inference-tracing';
-import { last, partition, takeRightWhile } from 'lodash';
+import { withActiveInferenceSpan, withExecuteToolSpan } from '@kbn/inference-tracing';
+import { trace } from '@opentelemetry/api';
+import { omit, partition, takeRightWhile } from 'lodash';
 import {
   createCompleteToolCall,
   createCompleteToolCallResponse,
 } from './create_complete_tool_call';
-import { createReasonToolCall } from './create_reason_tool_call';
-
-const planningTools = {
-  reason: {
-    description: 'reason or reflect about the task ahead or the results',
-    schema: {
-      type: 'object',
-      properties: {},
-    },
-  },
-  complete: {
-    description: 'complete the task based on the last output',
-    schema: {
-      type: 'object',
-      properties: {},
-    },
-  },
-} as const;
-
-type PlanningTools = typeof planningTools;
-
-type PlanningToolCallName = keyof PlanningTools;
-
-type PlanningToolCall = ToolCallOfToolDefinitions<PlanningTools>;
-
-function isPlanningToolName(name: string) {
-  return Object.keys(planningTools).includes(name);
-}
-
-function removeReasonToolCalls(messages: Message[]) {
-  return messages.filter((message) => {
-    const isInternalMessage =
-      (message.role === MessageRole.Tool && message.name === 'reason') ||
-      (message.role === MessageRole.Assistant &&
-        message.toolCalls?.some((toolCall) => toolCall.function.name === 'reason'));
-
-    return !isInternalMessage;
-  });
-}
+import { createReasonToolCall, createReasonToolCallResponse } from './create_reason_tool_call';
+import { BEGIN_INTERNAL_REASONING_MARKER, END_INTERNAL_REASONING_MARKER } from './markers';
+import type { PlanningToolCall, PlanningToolCallName } from './planning_tools';
+import {
+  PLANNING_TOOLS,
+  isPlanningToolName,
+  removeReasonToolCalls,
+  removeSystemToolCalls,
+} from './planning_tools';
+import type {
+  ReasoningPromptOptions,
+  ReasoningPromptResponse,
+  ReasoningPromptResponseOf,
+} from './types';
 
 function prepareMessagesForLLM({
   stepsLeft,
@@ -76,12 +52,23 @@ function prepareMessagesForLLM({
   canCallTaskTools: boolean;
   canCallPlanningTools: boolean;
 }) {
-  const lastMessage = last(messages);
+  /**
+   * This removes all system tool calls except if it is the last, to compact the
+   * conversation and not distract the LLM with tool calls that don't impact the
+   * conversation.
+   */
+  const lastToolMessage = messages.findLast(
+    (message): message is ToolMessage => message.role === MessageRole.Tool
+  );
 
-  const next =
-    lastMessage?.role === MessageRole.Tool && isPlanningToolName(lastMessage.name)
-      ? removeReasonToolCalls(messages.slice(0, -2)).concat(messages.slice(-2))
-      : removeReasonToolCalls(messages);
+  let next = messages;
+
+  if (lastToolMessage && isPlanningToolName(lastToolMessage.name)) {
+    const idx = messages.indexOf(lastToolMessage) - 1;
+    next = removeReasonToolCalls(messages.slice(0, idx)).concat(messages.slice(idx));
+  } else {
+    next = removeReasonToolCalls(messages);
+  }
 
   const lastToolResponse = next.findLast(
     (message): message is ToolMessage => message.role === MessageRole.Tool
@@ -92,7 +79,9 @@ function prepareMessagesForLLM({
       return {
         ...lastToolResponse,
         response: {
-          ...(lastToolResponse.response as Record<string, any>),
+          ...(typeof lastToolResponse.response === 'string'
+            ? { content: lastToolResponse.response }
+            : {}),
           stepsLeft,
         },
       };
@@ -101,29 +90,57 @@ function prepareMessagesForLLM({
   });
 }
 
-interface PromptReasoningAgentOptions {
-  inferenceClient: BoundInferenceClient;
-  maxSteps?: number;
-  prevMessages?: undefined;
-}
-
 export function executeAsReasoningAgent<
   TPrompt extends Prompt,
-  TPromptOptions extends PromptOptions<TPrompt>
+  TPromptOptions extends PromptOptions<TPrompt>,
+  TToolCallbacks extends ToolCallbacksOfToolOptions<ToolOptionsOfPrompt<TPrompt>>,
+  TFinalToolChoice extends ToolChoice<ToolNamesOf<ToolOptionsOfPrompt<TPrompt>>> | undefined =
+    | ToolChoice<ToolNamesOf<ToolOptionsOfPrompt<TPrompt>>>
+    | undefined
 >(
-  options: UnboundPromptOptions &
-    PromptReasoningAgentOptions & { prompt: TPrompt } & {
-      toolCallbacks: ToolCallbacksOfToolOptions<ToolOptionsOfPrompt<TPrompt>>;
+  options: UnboundPromptOptions<TPrompt> &
+    ReasoningPromptOptions & { prompt: TPrompt } & {
+      toolCallbacks: TToolCallbacks;
+      finalToolChoice?: TFinalToolChoice;
     }
-): Promise<PromptResponse<TPromptOptions>>;
-
-export function executeAsReasoningAgent(
+): Promise<
+  ReasoningPromptResponseOf<
+    TPrompt,
+    TPromptOptions & { toolChoice: TFinalToolChoice },
+    TToolCallbacks
+  >
+>;
+
+/**
+ * Executes a prompt in a loop in a way that the LLM will use the specified tools
+ * to gather context, and then produce a final output, which may or may not include
+ * a final tool call.
+ *
+ * The rules are as follows:
+ * - when `reason()` is called, the LLM SHOULD reason about the task or the tool call
+ * results
+ * - when `reason()` is called, the LLM CAN call another tool
+ * - when `complete()` is called, and `finalToolChoice` is NOT specified, the LLM
+ * MUST produce a summarization text
+ * - when `complete()` is called, and `finalToolChoice` is specified, the LLM MUST
+ * call a tool to complete the task, and the LLM SHOULD produce a summarization text
+ * - when `finalToolChoice` is specified, and the LLM calls this tool, the task MUST
+ * be completed by the orchestrator
+ * - when the available number of steps have been exhausted, the LLM MUST produce
+ * its final output
+ * - if the LLM fails to produce its final output (e.g. by calling an unavailable tool),
+ * the orchestrator MUST complete the task
+ * - if `finalToolChoice` is specified and is not included as part of the definitive output,
+ * the orchestrator MUST fail the task
+ */
+export async function executeAsReasoningAgent(
   options: UnboundPromptOptions &
-    PromptReasoningAgentOptions & {
+    ReasoningPromptOptions & {
       toolCallbacks: Record<string, ToolCallback>;
+      finalToolChoice?: ToolChoice;
     }
-): Promise<PromptResponse> {
-  const { inferenceClient, maxSteps = 10, toolCallbacks, toolChoice } = options;
+): Promise<ReasoningPromptResponse> {
+  const { inferenceClient, maxSteps = 10, toolCallbacks } = options;
 
   async function callTools(toolCalls: ToolCall[]): Promise<ToolMessage[]> {
     return await Promise.all(
@@ -143,7 +160,12 @@ export function executeAsReasoningAgent(
             },
           },
           () => callback(toolCall)
-        );
+        ).catch((error): ToolCallbackResult => {
+          trace.getActiveSpan()?.recordException(error);
+          return {
+            response: { error, data: undefined },
+          };
+        });
 
         return {
           response: response.response,
@@ -164,12 +186,12 @@ export function executeAsReasoningAgent(
     messages: Message[];
     stepsLeft: number;
     temperature?: number;
-  }): Promise<PromptResponse> {
+  }): Promise<ReasoningPromptResponse> {
+    // Append a complete() tool call to force the LLM to generate the final response
     const prevMessages =
       stepsLeft <= 0 ? givenMessages.concat(createCompleteToolCall()) : givenMessages;
 
     const withoutSystemToolCalls = removeReasonToolCalls(prevMessages);
-
     const consecutiveReasoningSteps = takeRightWhile(withoutSystemToolCalls, (msg) => {
       return msg.role === MessageRole.Assistant && !msg.toolCalls?.length;
     }).length;
@@ -183,6 +205,7 @@ export function executeAsReasoningAgent(
 
     const isCompleting = lastSystemToolCallName === 'complete';
 
+    // Nudge the LLM to reason if it has not done after the last tool call
     const mustReason =
       !isCompleting && lastSystemToolCallName === 'reason' && consecutiveReasoningSteps === 0;
 
@@ -197,16 +220,14 @@ export function executeAsReasoningAgent(
 
         const mergedToolOptions = {
           tools: promptTools,
-          toolChoice,
         };
 
         const nextTools = isCompleting
           ? mergedToolOptions
           : {
-              toolChoice: undefined,
               tools: {
                 ...mergedToolOptions.tools,
-                ...planningTools,
+                ...PLANNING_TOOLS,
               },
             };
 
@@ -218,7 +239,7 @@ export function executeAsReasoningAgent(
     };
 
     const promptOptions = {
-      ...options,
+      ...omit(options, 'finalToolChoice'),
       prompt: nextPrompt,
     };
 
@@ -226,17 +247,47 @@ export function executeAsReasoningAgent(
       ...promptOptions,
       stream: false,
       temperature,
+      toolChoice: isCompleting ? options.finalToolChoice : undefined,
       prevMessages: prepareMessagesForLLM({
         stepsLeft,
         messages: prevMessages,
         canCallTaskTools,
         canCallPlanningTools,
       }),
+      stopSequences: [END_INTERNAL_REASONING_MARKER],
     });
 
+    let content = response.content;
+
+    /**
+     * If the LLM hasn't used these markers, we assume it wants to complete its
+     * output.
+     */
+
+    let completeNextTurn =
+      content &&
+      !content.includes(BEGIN_INTERNAL_REASONING_MARKER) &&
+      !content.includes(END_INTERNAL_REASONING_MARKER) &&
+      !response.toolCalls.length;
+
+    /**
+     * Remove content after <<<END_INTERNAL>>>. This means that the LLM has combined final output
+     * with internal reasoning, and it usually leads the LLM into a loop where it repeats itself.
+     */
+
+    const [internalContent, ...externalContentParts] = content.split(END_INTERNAL_REASONING_MARKER);
+
+    const externalContent = externalContentParts.join(END_INTERNAL_REASONING_MARKER).trim();
+
+    // use some kind of buffer to allow small artifacts around the markers, like markdown.
+    if (externalContent.length && externalContent.length > 25) {
+      content = internalContent + END_INTERNAL_REASONING_MARKER;
+      completeNextTurn = true;
+    }
+
     const assistantMessage: AssistantMessage = {
       role: MessageRole.Assistant,
-      content: response.content,
+      content,
       toolCalls: response.toolCalls,
     };
 
@@ -249,46 +300,73 @@ export function executeAsReasoningAgent(
       throw new Error(`When using system tools, only a single tool call is allowed`);
     }
 
-    if (isCompleting) {
-      return response;
-    }
+    const finalToolCallName =
+      options.finalToolChoice && typeof options.finalToolChoice === 'object'
+        ? options.finalToolChoice.function
+        : undefined;
 
-    if (response.toolCalls.length === 0 || nonSystemToolCalls.length > 0) {
-      const toolMessages = (await callTools(nonSystemToolCalls)).map((toolMessage) => {
-        return {
-          ...toolMessage,
-          response: {
-            ...(toolMessage.response as Record<string, any>),
-            stepsLeft,
-          },
-        };
-      });
+    const hasCalledFinalTool = response.toolCalls.some(
+      (toolCall) => toolCall.function.name === finalToolCallName
+    );
 
-      return innerCallPromptUntil({
-        messages: prevMessages.concat(
-          assistantMessage,
-          ...(toolMessages.length > 0 ? [...toolMessages, ...createReasonToolCall()] : [])
+    if (isCompleting || hasCalledFinalTool) {
+      // We don't want to send these results back to the LLM, if we are already
+      // completing
+      return {
+        content: response.content,
+        tokens: response.tokens,
+        toolCalls: response.toolCalls.filter(
+          (toolCall) => toolCall.function.name === finalToolCallName
         ),
-        stepsLeft: stepsLeft - 1,
-      });
+        input: removeSystemToolCalls(prevMessages),
+      };
     }
 
-    const systemToolCall = systemToolCalls[0];
+    const toolMessagesForNonSystemToolCalls = nonSystemToolCalls.length
+      ? (await callTools(nonSystemToolCalls)).map((toolMessage) => {
+          return {
+            ...toolMessage,
+            response: {
+              ...(typeof toolMessage.response === 'string'
+                ? { content: toolMessage.response }
+                : toolMessage.response),
+              stepsLeft,
+            },
+          };
+        })
+      : [];
+
+    const systemToolMessages = systemToolCalls.map((systemToolCall) => {
+      if (systemToolCall.function.name === 'reason') {
+        return createReasonToolCallResponse(systemToolCall.toolCallId);
+      }
+      return createCompleteToolCallResponse(systemToolCall.toolCallId);
+    });
 
-    const systemToolCallName: PlanningToolCallName = systemToolCall.function.name;
+    const allToolMessages = [...toolMessagesForNonSystemToolCalls, ...systemToolMessages];
+
+    if (completeNextTurn) {
+      return innerCallPromptUntil({
+        messages: prevMessages.concat(assistantMessage, ...allToolMessages),
+        stepsLeft: 0,
+      });
+    }
 
     return innerCallPromptUntil({
-      stepsLeft: stepsLeft - 1,
       messages: prevMessages.concat(
-        systemToolCallName === 'complete'
-          ? [assistantMessage, createCompleteToolCallResponse(systemToolCall.toolCallId)]
-          : createReasonToolCall()
+        assistantMessage,
+        ...allToolMessages,
+        ...(nonSystemToolCalls.length ? createReasonToolCall() : [])
       ),
+      stepsLeft: stepsLeft - 1,
     });
   }
 
-  return innerCallPromptUntil({
-    messages: createReasonToolCall(),
-    stepsLeft: maxSteps,
-  });
+  return await withActiveInferenceSpan('reason', () =>
+    innerCallPromptUntil({
+      // nudge the LLM to go into reasoning mode
+      messages: createReasonToolCall(),
+      stepsLeft: maxSteps,
+    })
+  );
 }
diff --git a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/markers.ts b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/markers.ts
new file mode 100644
index 0000000000000..e2dbf2520a5d4
--- /dev/null
+++ b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/markers.ts
@@ -0,0 +1,9 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export const BEGIN_INTERNAL_REASONING_MARKER = `<<<BEGIN_INTERNAL>>>`;
+export const END_INTERNAL_REASONING_MARKER = `<<<END_INTERNAL>>>`;
diff --git a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/planning_tools.ts b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/planning_tools.ts
new file mode 100644
index 0000000000000..fd1a7b486554a
--- /dev/null
+++ b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/planning_tools.ts
@@ -0,0 +1,58 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { MessageRole } from '@kbn/inference-common';
+import type { Message, ToolCallOfToolDefinitions } from '@kbn/inference-common';
+
+export const PLANNING_TOOLS = {
+  reason: {
+    description: 'reason or reflect about the task ahead or the results',
+    schema: {
+      type: 'object',
+      properties: {},
+    },
+  },
+  complete: {
+    description: 'complete the task based on the last output',
+    schema: {
+      type: 'object',
+      properties: {},
+    },
+  },
+} as const;
+
+export type PlanningTools = typeof PLANNING_TOOLS;
+
+export type PlanningToolCallName = keyof PlanningTools;
+
+export type PlanningToolCall = ToolCallOfToolDefinitions<PlanningTools>;
+
+export function isPlanningToolName(name: string) {
+  return Object.keys(PLANNING_TOOLS).includes(name);
+}
+
+export function removeReasonToolCalls(messages: Message[]) {
+  return messages.filter((message) => {
+    const isInternalMessage =
+      (message.role === MessageRole.Tool && message.name === 'reason') ||
+      (message.role === MessageRole.Assistant &&
+        message.toolCalls?.some((toolCall) => toolCall.function.name === 'reason'));
+
+    return !isInternalMessage;
+  });
+}
+
+export function removeSystemToolCalls(messages: Message[]) {
+  return messages.filter((message) => {
+    const isInternalMessage =
+      (message.role === MessageRole.Tool && isPlanningToolName(message.name)) ||
+      (message.role === MessageRole.Assistant &&
+        message.toolCalls?.some((toolCall) => isPlanningToolName(toolCall.function.name)));
+
+    return !isInternalMessage;
+  });
+}
diff --git a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/types.ts b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/types.ts
new file mode 100644
index 0000000000000..99e704229768d
--- /dev/null
+++ b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/types.ts
@@ -0,0 +1,42 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type {
+  BoundInferenceClient,
+  Message,
+  MessageOf,
+  Prompt,
+  PromptOptions,
+  PromptResponse,
+  ToolCallbacksOfToolOptions,
+  ToolOptionsOfPrompt,
+} from '@kbn/inference-common';
+
+export interface ReasoningPromptOptions {
+  inferenceClient: BoundInferenceClient;
+  maxSteps?: number;
+  prevMessages?: undefined;
+}
+
+export type ReasoningPromptResponseOf<
+  TPrompt extends Prompt = Prompt,
+  TPromptOptions extends PromptOptions<TPrompt> = PromptOptions<TPrompt>,
+  TToolCallbacks extends ToolCallbacksOfToolOptions<
+    ToolOptionsOfPrompt<TPrompt>
+  > = ToolCallbacksOfToolOptions<ToolOptionsOfPrompt<TPrompt>>
+> = PromptResponse<TPromptOptions> & {
+  input: Array<
+    MessageOf<
+      ToolOptionsOfPrompt<TPrompt>,
+      {
+        [key in keyof TToolCallbacks]: Awaited<ReturnType<TToolCallbacks[key]>>;
+      }
+    >
+  >;
+};
+
+export type ReasoningPromptResponse = PromptResponse & { input: Message[] };
diff --git a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/tsconfig.json b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/tsconfig.json
index d82d0a2693841..a225a58f03fe5 100644
--- a/x-pack/platform/packages/shared/kbn-inference-prompt-utils/tsconfig.json
+++ b/x-pack/platform/packages/shared/kbn-inference-prompt-utils/tsconfig.json
@@ -19,5 +19,6 @@
     "@kbn/dev-cli-errors",
     "@kbn/inference-common",
     "@kbn/inference-tracing",
+    "@kbn/zod",
   ]
 }
diff --git a/x-pack/platform/plugins/shared/inference/common/http_apis.ts b/x-pack/platform/plugins/shared/inference/common/http_apis.ts
index a2365b1534090..e35eb228e46a7 100644
--- a/x-pack/platform/plugins/shared/inference/common/http_apis.ts
+++ b/x-pack/platform/plugins/shared/inference/common/http_apis.ts
@@ -12,6 +12,7 @@ import type {
   InferenceConnector,
   Prompt,
   ChatCompleteMetadata,
+  ToolChoice,
 } from '@kbn/inference-common';
 
 export interface ChatCompleteRequestBodyBase {
@@ -24,6 +25,7 @@ export interface ChatCompleteRequestBodyBase {
     retryOn?: 'all' | 'auto';
   };
   metadata?: ChatCompleteMetadata;
+  toolChoice?: ToolChoice;
 }
 
 export type ChatCompleteRequestBody = ChatCompleteRequestBodyBase & {
diff --git a/x-pack/platform/plugins/shared/inference/common/rest/prompt.ts b/x-pack/platform/plugins/shared/inference/common/rest/prompt.ts
index 391724261430a..89c81d8a24856 100644
--- a/x-pack/platform/plugins/shared/inference/common/rest/prompt.ts
+++ b/x-pack/platform/plugins/shared/inference/common/rest/prompt.ts
@@ -41,6 +41,7 @@ export function createPromptRestApi({ fetch, signal }: PublicInferenceClientCrea
       connectorId,
       functionCalling,
       prevMessages,
+      toolChoice,
     } = options;
 
     const body: PromptRequestBody = {
@@ -54,6 +55,7 @@ export function createPromptRestApi({ fetch, signal }: PublicInferenceClientCrea
       input,
       prevMessages,
       metadata,
+      toolChoice,
     };
 
     const validationResult = inputSchema.safeParse(input);
diff --git a/x-pack/platform/plugins/shared/inference/server/routes/prompt.ts b/x-pack/platform/plugins/shared/inference/server/routes/prompt.ts
index 825c7fac6fbc5..2bddbeac32b4a 100644
--- a/x-pack/platform/plugins/shared/inference/server/routes/prompt.ts
+++ b/x-pack/platform/plugins/shared/inference/server/routes/prompt.ts
@@ -54,6 +54,7 @@ export function registerPromptRoute({
       temperature = 0.25,
       metadata,
       prevMessages,
+      toolChoice,
     } = request.body;
 
     return client.prompt({
@@ -73,6 +74,7 @@ export function registerPromptRoute({
       temperature,
       metadata,
       prevMessages,
+      toolChoice,
     });
   }
 
diff --git a/x-pack/platform/plugins/shared/inference/server/routes/schemas.ts b/x-pack/platform/plugins/shared/inference/server/routes/schemas.ts
index ecca9b940df1f..3204ef439de2e 100644
--- a/x-pack/platform/plugins/shared/inference/server/routes/schemas.ts
+++ b/x-pack/platform/plugins/shared/inference/server/routes/schemas.ts
@@ -22,32 +22,36 @@ export const toolCallSchema: Type<ToolCall[]> = schema.arrayOf(
   })
 );
 
+export const toolsSchema = schema.maybe(
+  schema.recordOf(
+    schema.string(),
+    schema.object({
+      description: schema.string(),
+      schema: schema.maybe(
+        schema.object({
+          type: schema.literal('object'),
+          properties: schema.recordOf(schema.string(), schema.any()),
+          required: schema.maybe(schema.arrayOf(schema.string())),
+        })
+      ),
+    })
+  )
+);
+
+export const toolChoiceSchema = schema.maybe(
+  schema.oneOf([
+    schema.literal(ToolChoiceType.auto),
+    schema.literal(ToolChoiceType.none),
+    schema.literal(ToolChoiceType.required),
+    schema.object({
+      function: schema.string(),
+    }),
+  ])
+);
+
 export const messageOptionsSchema = schema.object({
-  tools: schema.maybe(
-    schema.recordOf(
-      schema.string(),
-      schema.object({
-        description: schema.string(),
-        schema: schema.maybe(
-          schema.object({
-            type: schema.literal('object'),
-            properties: schema.recordOf(schema.string(), schema.any()),
-            required: schema.maybe(schema.arrayOf(schema.string())),
-          })
-        ),
-      })
-    )
-  ),
-  toolChoice: schema.maybe(
-    schema.oneOf([
-      schema.literal(ToolChoiceType.auto),
-      schema.literal(ToolChoiceType.none),
-      schema.literal(ToolChoiceType.required),
-      schema.object({
-        function: schema.string(),
-      }),
-    ])
-  ),
+  tools: toolsSchema,
+  toolChoice: toolChoiceSchema,
 });
 
 export const chatCompleteBaseSchema = schema.object({
@@ -108,7 +112,7 @@ export const chatCompleteBodySchema: Type<ChatCompleteRequestBody> = schema.allO
 const promptSchema = schema.object({
   prompt: schema.object({
     name: schema.string(),
-    description: schema.string(),
+    description: schema.maybe(schema.string()),
     versions: schema.arrayOf(
       schema.allOf([
         messageOptionsSchema,
@@ -174,6 +178,7 @@ const promptSchema = schema.object({
   }),
   input: schema.any(),
   prevMessages: schema.maybe(schema.arrayOf(messageSchema)),
+  toolChoice: toolChoiceSchema,
 });
 
 export const promptBodySchema: Type<PromptRequestBody> = schema.allOf([
diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/task.ts b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/task.ts
index 1c3f04970b2fc..a6bd7e5ab9877 100644
--- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/task.ts
+++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/task.ts
@@ -37,11 +37,10 @@ export function naturalLanguageToEsql({
     switchMap((docBase) => {
       const systemMessage = `You are a helpful assistant for generating and executing ES|QL queries.
 Your goal is to help the user construct an ES|QL query for their data.
-
 VERY IMPORTANT: When writing ES|QL queries, make sure to ONLY use commands, functions
 and operators listed in the current documentation.
-
 ${docBase.getSystemMessage()}`;
+
       const messages: Message[] =
         'input' in rest ? [{ role: MessageRole.User, content: rest.input }] : rest.messages;
 
diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/public/index.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/public/index.ts
index 158063e936554..ed899d8569b16 100644
--- a/x-pack/platform/plugins/shared/observability_ai_assistant/public/index.ts
+++ b/x-pack/platform/plugins/shared/observability_ai_assistant/public/index.ts
@@ -34,7 +34,10 @@ export type {
 export { aiAssistantCapabilities } from '../common/capabilities';
 export { ConnectorSelectorBase } from './components/connector_selector/connector_selector_base';
 export { useAbortableAsync, type AbortableAsyncState } from './hooks/use_abortable_async';
-export { useGenAIConnectorsWithoutContext } from './hooks/use_genai_connectors';
+export {
+  useGenAIConnectorsWithoutContext,
+  type UseGenAIConnectorsResult,
+} from './hooks/use_genai_connectors';
 
 export { createStorybookChatService, createStorybookService } from './storybook_mock';