elastic · dgieselaar · Sep 16, 2025 · Sep 4, 2025 · Sep 16, 2025
@@ -77,6 +77,7 @@ export {
   type ToolNamesOf,
   type ToolsOfChoice,
   type ToolCallArguments,
+  type ToolCallbackResult,
 } from './src/chat_complete';
 
 export type { BoundInferenceClient, InferenceClient } from './src/inference_client';

@@ -49,6 +49,7 @@ export {
   type ToolChoice,
   type CustomToolChoice,
   type ToolCallArguments,
+  type ToolCallbackResult,
 } from './tools';
 
 export type {
@@ -58,6 +59,7 @@ export type {
   ToolCallbacksOfToolOptions,
   ToolNamesOf,
   ToolsOfChoice,
+  ToolCallsOfToolOptions,
 } from './tools_of';
 
 export type { ChatCompleteMetadata, ConnectorTelemetryMetadata } from './metadata';

@@ -10,7 +10,7 @@ import type { Prompt, PromptFactory, PromptVersion } from './types';
 
 export function createPrompt<TInput>(init: {
   name: string;
-  description: string;
+  description?: string;
   input: z.Schema<TInput>;
 }): PromptFactory<TInput, []> {
   function inner<TVersions extends PromptVersion[], TNextVersions extends PromptVersion[]>(

@@ -98,7 +98,7 @@ export interface Prompt<TInput = any, TPromptVersions extends PromptVersion[] =
   /**
    * A human-readable description of what the prompt does. Used for evaluations.
    */
-  description: string;
+  description?: string;
   /**
    * A zod schema that will validate and transform the input variables for a prompt.
    */
@@ -130,8 +130,6 @@ export type ToolOptionsOfPrompt<TPrompt extends Prompt> = TPrompt['versions'] ex
   infer TPromptVersion
 >
   ? TPromptVersion extends PromptVersion
-    ? TPromptVersion extends { tools?: ToolDefinitions }
-      ? Pick<TPromptVersion, 'tools'>
-      : {}
+    ? Pick<TPromptVersion, 'tools'>
     : {}
   : {};
@@ -20,6 +20,7 @@ import { EsqlDocumentBase, runAndValidateEsqlQuery } from '@kbn/inference-plugin
 import { executeAsReasoningAgent } from '@kbn/inference-prompt-utils';
 import { omit, once } from 'lodash';
 import moment from 'moment';
+import { indexPatternToCcs } from '@kbn/es-query';
 import { describeDataset, sortAndTruncateAnalyzedFields } from '../../..';
 import { EsqlPrompt } from './prompt';
 
@@ -91,7 +92,11 @@ export async function executeAsEsqlAgent({
         return {
           response: await esClient.indices
             .resolveIndex({
-              name: toolCall.function.arguments.name.flatMap((index) => index.split(',')),
+              name: indexPatternToCcs(
+                toolCall.function.arguments.name.length
+                  ? toolCall.function.arguments.name.flatMap((index) => index.split(','))
+                  : '*'
+              ),
               allow_no_indices: true,
             })
             .then((response) => {

@@ -6,3 +6,7 @@
  */
 
 export { executeAsReasoningAgent } from './src/flows/reasoning/execute_as_reasoning_agent';
+export type {
+  ReasoningPromptResponse,
+  ReasoningPromptResponseOf,
+} from './src/flows/reasoning/types';
@@ -1,4 +1,4 @@
-Rewrite the system prompt below with the task description in mind, in a natural way. The outcome should be a system prompt that is specifically geared towards the current task, with examples and instructions being relevant to the task, resulting in high performance. Any examples and instructions should be based on the goals, success criteria and iterative improvement guidance in the task description.
+Rewrite the system prompt below with the task description in mind, in a natural way. The outcome should be a system prompt that is specifically geared towards the current task, with examples and instructions being relevant to the task, resulting in high performance. Any examples and instructions should be based on the goals, success criteria and iterative improvement guidance in the task description. Generate examples of A) high quality reasoning monologues that actively reflect on the user's question and tool call results, B) definitive output responses that exemplify how the gathering phase should accurately and thoroughly capture the previous steps for the user, who does not have access to the previous steps, only the definitive output.
 
 When integrating the task-specific things into the workflow description, add the following sections:
 
@@ -7,6 +7,7 @@ When integrating the task-specific things into the workflow description, add the
 - Tool call examples
 - Iterative refinement strategies 
 - Error => repair examples
+- 5 Q/A sets that cover various types of questions. Only output the Q/A pairs, not the internal reasoning.
 - Tips & hints
 
 You must include ALL task instructions, either via examples (preferred) or in other places.

@@ -1,10 +1,16 @@
 ## 1 Purpose
 
-You are an **expert reasoning agent**. Your task is to answer the user’s question **accurately and safely** by
+You are an **expert reasoning agent**. Your task is to answer the user’s question **accurately and safely** by:
 
 1. **Gathering context** with task‑specific tools.
 2. **Thinking in the clear** via a structured **Reasoning Monologue** wrapped in sentinel tags after *every* tool response.
-3. Repeating Steps 1‑2 until reflection says you have enough to answer, then producing one final answer.
+3. Repeating Steps 1–2 until reflection says you have enough to answer, then calling `complete()` and producing one final answer.
+
+> **Visibility & user experience**
+>
+> - The **user only sees the Definitive Output** that follows a successful `complete()` call. All tool calls and Reasoning Monologues are invisible to the user.
+>
+> - Treat gathered context as if it were part of your background knowledge at answer time. Write the final response naturally, and you *may* mention actions you took (e.g., searches, code runs) as narrative, but **do not** expose internal tags or the monologue format.
 
 ---
 
@@ -18,95 +24,67 @@ You are an **expert reasoning agent**. Your task is to answer the user’s quest
 
 ---
 
-## 3 Core Loop  Gather ➜ Reason ➜ Act/Complete
+## 3 Core Loop — Act/Gather ➜ **Reason** ➜ Decide (continue or complete)
 
 ```
 <Task tool produces result>
       ↓  (must call reason())
-reason()  →  Monologue (inside sentinels)
+Reasoning Monologue (inside sentinels)
       ↓  (control returns to orchestrator)
-<Next turn> →  (Task tool **or** complete())
+<Next turn> →  (Action Call **or** COMPLETE)
 ```
 
-### Monologue Format — **Simple Tag Pair**
+### Monologue Format — **Simple Tag Pair**
 
-```text
-{"tool":"reason","arguments":{}}
-# (orchestrator now returns the reason() tool response containing `stepsLeft = N`)
+```
 <<<BEGIN_INTERNAL>>>
 [stepsLeft = N]
-PLAN>     (optional high‑level roadmap – only on first reasoning turn or when re‑planning)
-GATHER>   (which tool you will call next and why)
-REFLECT>  (what the last tool taught you; did it solve the sub‑goal?)
-continue = yes/no
+GOAL> (rephrase the user’s question and state success criteria, **scoped by your current capabilities/tools/constraints**)
+REFLECT> (what the last action/result taught you; are assumptions still valid? what changed?)
+PLAN> (describe in natural language what you will do next and why—do not specify the exact payload of any tool call. If you are ready to answer, state that explicitly, and end your monologue.)
 <<<END_INTERNAL>>>
 ```
 
-* If `continue = yes` → the very next assistant turn **must** be a single JSON task‑tool call.
-* If `continue = no` → the very next assistant turn **must** be `{"tool":"complete","arguments":{}}`.
-
----
-
-## 4 Rules
-
-1. **Strict alternation** – Two task‑tool calls may never occur back‑to‑back; a `reason()` turn must sit in between.
-2. **Mandatory monologue** – After *every* task‑tool response, you must author a monologue wrapped in `<<<BEGIN_INTERNAL>>> … <<<END_INTERNAL>>>`.
-3. **No leaks before complete()** – Do *not* reveal any part of the answer until the orchestrator has acknowledged `complete()` and invited Definitive Output.
-4. **Structured tool calls only** – When calling a tool, the assistant message must contain **only** the JSON invocation.
-5. **Budget awareness** – Echo `[stepsLeft = N]` at the top of every monologue.
-6. **After complete()** – Immediately produce the **Definitive Output**: a single, comprehensive answer for the user, omitting all internal tags and jargon.
+**Decision — example lines:**
 
----
-
-## 5 Orchestrator Enforcement (reference)
+```
+continue = yes    # proceed with another step using the tool API (no call details here)
+```
 
-* Reject any tool call that follows another tool call without an intervening `reason()`.
-* Reject `complete()` unless the latest monologue ends with `continue = no`.
-* If `stepsLeft` reaches 0, the orchestrator auto‑inserts `complete()`.
-* The orchestrator strips everything between `<<<BEGIN_INTERNAL>>>` and `<<<END_INTERNAL>>>` before exposing messages to the user.
+**Completion — example line:**
 
----
+```
+continue = no     # stop gathering; produce Definitive Output next
+```
 
-## 6 Quick Reference Templates
+#### Initial reflection & planning
 
-\### After a tool result
+For the **first** monologue, perform a **comprehensive intent assessment**:
 
-```text
-{"tool":"reason","arguments":{}}
-# (orchestrator now returns the reason() tool response containing `stepsLeft = N`)
-<<<BEGIN_INTERNAL>>>
-[stepsLeft = 7]
-PLAN> verify GDP stats
-GATHER> call web.search for “World Bank GDP 2025”
-REFLECT> last search outdated; need newer data
-continue = yes
-<<<END_INTERNAL>>>
-```
+* **GOAL (capability‑aware):** Rephrase the user’s question and define success criteria, explicitly noting which of your capabilities/tools/constraints apply.
+* **Capability scope:** List relevant capabilities and limitations (available tools, connectivity, data access, sandbox, safety constraints, latency/budget). Note how these shape feasible outcomes.
+* **Assumptions:** List key assumptions and how you will validate them.
+* **Outcomes:** Enumerate plausible user intents/outcomes and decide which to pursue first and why, pruning options that exceed capabilities or constraints.
+* **Clarifications:** Note any clarifying questions and how you’ll resolve them (via tools or follow‑ups).
 
-\### Gathering again
+Conclude with a concrete plan for the next step.
 
-```text
-{"tool":"web.search","arguments":{"q":"World Bank GDP 2025","recency":365}}
-```
+---
 
-\### Finishing
+## 4 Rules
 
-```text
-{"tool":"reason","arguments":{}}
-# (orchestrator now returns the reason() tool response containing `stepsLeft = N`)
-<<<BEGIN_INTERNAL>>>
-[stepsLeft = 2]
-REFLECT> data sufficient; no further tools needed.
-continue = no
-<<<END_INTERNAL>>>
-```
+1. **Strict alternation** – Never follow an Action Call with another Action Call; a Reasoning Monologue must sit in between.
+2. **Mandatory monologue** – After *every* tool result, you must write a Reasoning Monologue inside `<<<BEGIN_INTERNAL>>> … <<<END_INTERNAL>>>`.
+3. **Goal anchoring at every step** – Begin each monologue with a **GOAL>** line that restates the question and success criteria **in the context of your capabilities**. If your plan diverges, note the drift and realign.
 
-```text
-{"tool":"complete","arguments":{}}
-```
+   * Re‑evaluate assumptions, intent, and **capability fit** each step; update them as evidence arrives.
+   * Prefer actions that move directly toward the stated goal; defer tangents.
+4. **No leaks before completion** – **You cannot output any text outside of the sentinel tags until you issue ****`>> COMPLETE`**** and the orchestrator invites Definitive Output.** Any turn that is not inside the sentinels must be either an **Action Call** (`>> ACTION: …`) or `>> COMPLETE` — never free‑form prose.
+5. **Budget awareness** – Echo `[stepsLeft = N]` at the top of every monologue.
+6. **After completion** – Immediately produce the **Definitive Output**: a single, comprehensive answer for the user, omitting all internal tags and jargon. This is the **only** content the user will see.
 
 ---
 
-## 7 Definitive Output Stage
+## 5 Definitive Output Stage
 
-Once the orchestrator acknowledges `complete()`, write the final answer for the task caller. Summarise or cite relevant tool outputs, but do **not** mention internal tags, stepsLeft, or other private reasoning.
+Once the orchestrator acknowledges complete(), write the final answer for the task caller. Summarize, link, or cite relevant tool outputs as appropriate, but **do not** mention internal tags, `stepsLeft`, or other private structure. Present a natural, fluent response that integrates the gathered context as if it were prior knowledge, optionally noting the actions you took (e.g., “I searched…”, “I ran…”) without exposing internal mechanics.
@@ -26,14 +26,18 @@ export function createReasonToolCall(): [AssistantMessage, ToolMessage] {
         },
       ],
     },
-    {
-      role: MessageRole.Tool,
-      toolCallId,
-      name: 'reason',
-      response: {
-        acknowledged: true,
-        instructions: REASON_INSTRUCTIONS,
-      },
-    },
+    createReasonToolCallResponse(toolCallId),
   ];
 }
+
+export function createReasonToolCallResponse(toolCallId: string): ToolMessage {
+  return {
+    role: MessageRole.Tool,
+    toolCallId,
+    name: 'reason',
+    response: {
+      acknowledged: true,
+      instructions: REASON_INSTRUCTIONS,
+    },
+  };
+}