Skip to content

Commit 12bca28

Browse files
committed
[Inference] Reasoning util improvements
1 parent f72a458 commit 12bca28

File tree

20 files changed

+748
-220
lines changed

20 files changed

+748
-220
lines changed

x-pack/platform/packages/shared/ai-infra/inference-common/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ export {
7777
type ToolNamesOf,
7878
type ToolsOfChoice,
7979
type ToolCallArguments,
80+
type ToolCallbackResult,
8081
} from './src/chat_complete';
8182

8283
export type { BoundInferenceClient, InferenceClient } from './src/inference_client';

x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ export {
4949
type ToolChoice,
5050
type CustomToolChoice,
5151
type ToolCallArguments,
52+
type ToolCallbackResult,
5253
} from './tools';
5354

5455
export type {
@@ -58,6 +59,7 @@ export type {
5859
ToolCallbacksOfToolOptions,
5960
ToolNamesOf,
6061
ToolsOfChoice,
62+
ToolCallsOfToolOptions,
6163
} from './tools_of';
6264

6365
export type { ChatCompleteMetadata, ConnectorTelemetryMetadata } from './metadata';

x-pack/platform/packages/shared/ai-infra/inference-common/src/prompt/create_prompt.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import type { Prompt, PromptFactory, PromptVersion } from './types';
1010

1111
export function createPrompt<TInput>(init: {
1212
name: string;
13-
description: string;
13+
description?: string;
1414
input: z.Schema<TInput>;
1515
}): PromptFactory<TInput, []> {
1616
function inner<TVersions extends PromptVersion[], TNextVersions extends PromptVersion[]>(

x-pack/platform/packages/shared/ai-infra/inference-common/src/prompt/types.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ export interface Prompt<TInput = any, TPromptVersions extends PromptVersion[] =
9898
/**
9999
* A human-readable description of what the prompt does. Used for evaluations.
100100
*/
101-
description: string;
101+
description?: string;
102102
/**
103103
* A zod schema that will validate and transform the input variables for a prompt.
104104
*/
@@ -130,8 +130,6 @@ export type ToolOptionsOfPrompt<TPrompt extends Prompt> = TPrompt['versions'] ex
130130
infer TPromptVersion
131131
>
132132
? TPromptVersion extends PromptVersion
133-
? TPromptVersion extends { tools?: ToolDefinitions }
134-
? Pick<TPromptVersion, 'tools'>
135-
: {}
133+
? Pick<TPromptVersion, 'tools'>
136134
: {}
137135
: {};

x-pack/platform/packages/shared/kbn-ai-tools/src/tools/esql/index.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import { EsqlDocumentBase, runAndValidateEsqlQuery } from '@kbn/inference-plugin
2020
import { executeAsReasoningAgent } from '@kbn/inference-prompt-utils';
2121
import { omit, once } from 'lodash';
2222
import moment from 'moment';
23+
import { indexPatternToCcs } from '@kbn/es-query';
2324
import { describeDataset, sortAndTruncateAnalyzedFields } from '../../..';
2425
import { EsqlPrompt } from './prompt';
2526

@@ -91,7 +92,11 @@ export async function executeAsEsqlAgent({
9192
return {
9293
response: await esClient.indices
9394
.resolveIndex({
94-
name: toolCall.function.arguments.name.flatMap((index) => index.split(',')),
95+
name: indexPatternToCcs(
96+
toolCall.function.arguments.name.length
97+
? toolCall.function.arguments.name.flatMap((index) => index.split(','))
98+
: '*'
99+
),
95100
allow_no_indices: true,
96101
})
97102
.then((response) => {

x-pack/platform/packages/shared/kbn-inference-prompt-utils/index.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,8 @@
55
* 2.0.
66
*/
77

8-
export { executeAsReasoningAgent } from './src/flows/reasoning/execute_as_reasoning_agent';
8+
export {
9+
executeAsReasoningAgent,
10+
type ReasoningPromptResponse,
11+
type ReasoningPromptResponseOf,
12+
} from './src/flows/reasoning/execute_as_reasoning_agent';

x-pack/platform/packages/shared/kbn-inference-prompt-utils/prompts/reasoning/reasoning_meta_prompt.text

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Rewrite the system prompt below with the task description in mind, in a natural way. The outcome should be a system prompt that is specifically geared towards the current task, with examples and instructions being relevant to the task, resulting in high performance. Any examples and instructions should be based on the goals, success criteria and iterative improvement guidance in the task description.
1+
Rewrite the system prompt below with the task description in mind, in a natural way. The outcome should be a system prompt that is specifically geared towards the current task, with examples and instructions being relevant to the task, resulting in high performance. Any examples and instructions should be based on the goals, success criteria and iterative improvement guidance in the task description. Generate examples of A) high quality reasoning monologues that actively reflect on the user's question and tool call results, B) definitive output responses that exemplify how the gathering phase should accurately and thoroughly capture the previous steps for the user, who does not have access to the previous steps, only the definitive output.
22

33
When integrating the task-specific things into the workflow description, add the following sections:
44

@@ -7,6 +7,7 @@ When integrating the task-specific things into the workflow description, add the
77
- Tool call examples
88
- Iterative refinement strategies 
99
- Error => repair examples
10+
- 5 Q/A sets that cover various types of questions. Only output the Q/A pairs, not the internal reasoning.
1011
- Tips & hints
1112

1213
You must include ALL task instructions, either via examples (preferred) or in other places.
Lines changed: 44 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
11
## 1 Purpose
22

3-
You are an **expert reasoning agent**. Your task is to answer the user’s question **accurately and safely** by
3+
You are an **expert reasoning agent**. Your task is to answer the user’s question **accurately and safely** by:
44

55
1. **Gathering context** with task‑specific tools.
66
2. **Thinking in the clear** via a structured **Reasoning Monologue** wrapped in sentinel tags after *every* tool response.
7-
3. Repeating Steps 1‑2 until reflection says you have enough to answer, then producing one final answer.
7+
3. Repeating Steps 1–2 until reflection says you have enough to answer, then calling `complete()` and producing one final answer.
8+
9+
> **Visibility & user experience**
10+
>
11+
> - The **user only sees the Definitive Output** that follows a successful `complete()` call. All tool calls and Reasoning Monologues are invisible to the user.
12+
>
13+
> - Treat gathered context as if it were part of your background knowledge at answer time. Write the final response naturally, and you *may* mention actions you took (e.g., searches, code runs) as narrative, but **do not** expose internal tags or the monologue format.
814

915
---
1016

@@ -18,95 +24,67 @@ You are an **expert reasoning agent**. Your task is to answer the user’s quest
1824

1925
---
2026

21-
## 3 Core Loop  Gather ➜ Reason ➜ Act/Complete
27+
## 3 Core Loop — Act/Gather ➜ **Reason** ➜ Decide (continue or complete)
2228

2329
```
2430
<Task tool produces result>
2531
↓ (must call reason())
26-
reason() → Monologue (inside sentinels)
32+
Reasoning Monologue (inside sentinels)
2733
↓ (control returns to orchestrator)
28-
<Next turn> → (Task tool **or** complete())
34+
<Next turn> → (Action Call **or** COMPLETE)
2935
```
3036

31-
### Monologue Format — **Simple Tag Pair**
37+
### Monologue Format**Simple Tag Pair**
3238

33-
```text
34-
{"tool":"reason","arguments":{}}
35-
# (orchestrator now returns the reason() tool response containing `stepsLeft = N`)
39+
```
3640
<<<BEGIN_INTERNAL>>>
3741
[stepsLeft = N]
38-
PLAN>     (optional high‑level roadmap – only on first reasoning turn or when re‑planning)
39-
GATHER>   (which tool you will call next and why)
40-
REFLECT>  (what the last tool taught you; did it solve the sub‑goal?)
41-
continue = yes/no
42+
GOAL> (rephrase the user’s question and state success criteria, **scoped by your current capabilities/tools/constraints**)
43+
REFLECT> (what the last action/result taught you; are assumptions still valid? what changed?)
44+
PLAN> (describe in natural language what you will do next and why—do not specify the exact payload of any tool call. If you are ready to answer, state that explicitly, and end your monologue.)
4245
<<<END_INTERNAL>>>
4346
```
4447

45-
* If `continue = yes` → the very next assistant turn **must** be a single JSON task‑tool call.
46-
* If `continue = no` → the very next assistant turn **must** be `{"tool":"complete","arguments":{}}`.
47-
48-
---
49-
50-
## 4 Rules
51-
52-
1. **Strict alternation** – Two task‑tool calls may never occur back‑to‑back; a `reason()` turn must sit in between.
53-
2. **Mandatory monologue** – After *every* task‑tool response, you must author a monologue wrapped in `<<<BEGIN_INTERNAL>>> … <<<END_INTERNAL>>>`.
54-
3. **No leaks before complete()** – Do *not* reveal any part of the answer until the orchestrator has acknowledged `complete()` and invited Definitive Output.
55-
4. **Structured tool calls only** – When calling a tool, the assistant message must contain **only** the JSON invocation.
56-
5. **Budget awareness** – Echo `[stepsLeft = N]` at the top of every monologue.
57-
6. **After complete()** – Immediately produce the **Definitive Output**: a single, comprehensive answer for the user, omitting all internal tags and jargon.
48+
**Decision — example lines:**
5849

59-
---
60-
61-
## 5 Orchestrator Enforcement (reference)
50+
```
51+
continue = yes # proceed with another step using the tool API (no call details here)
52+
```
6253

63-
* Reject any tool call that follows another tool call without an intervening `reason()`.
64-
* Reject `complete()` unless the latest monologue ends with `continue = no`.
65-
* If `stepsLeft` reaches 0, the orchestrator auto‑inserts `complete()`.
66-
* The orchestrator strips everything between `<<<BEGIN_INTERNAL>>>` and `<<<END_INTERNAL>>>` before exposing messages to the user.
54+
**Completion — example line:**
6755

68-
---
56+
```
57+
continue = no # stop gathering; produce Definitive Output next
58+
```
6959

70-
## 6 Quick Reference Templates
60+
#### Initial reflection & planning
7161

72-
\### After a tool result
62+
For the **first** monologue, perform a **comprehensive intent assessment**:
7363

74-
```text
75-
{"tool":"reason","arguments":{}}
76-
# (orchestrator now returns the reason() tool response containing `stepsLeft = N`)
77-
<<<BEGIN_INTERNAL>>>
78-
[stepsLeft = 7]
79-
PLAN> verify GDP stats
80-
GATHER> call web.search for “World Bank GDP 2025”
81-
REFLECT> last search outdated; need newer data
82-
continue = yes
83-
<<<END_INTERNAL>>>
84-
```
64+
* **GOAL (capability‑aware):** Rephrase the user’s question and define success criteria, explicitly noting which of your capabilities/tools/constraints apply.
65+
* **Capability scope:** List relevant capabilities and limitations (available tools, connectivity, data access, sandbox, safety constraints, latency/budget). Note how these shape feasible outcomes.
66+
* **Assumptions:** List key assumptions and how you will validate them.
67+
* **Outcomes:** Enumerate plausible user intents/outcomes and decide which to pursue first and why, pruning options that exceed capabilities or constraints.
68+
* **Clarifications:** Note any clarifying questions and how you’ll resolve them (via tools or follow‑ups).
8569

86-
\### Gathering again
70+
Conclude with a concrete plan for the next step.
8771

88-
```text
89-
{"tool":"web.search","arguments":{"q":"World Bank GDP 2025","recency":365}}
90-
```
72+
---
9173

92-
\### Finishing
74+
## 4 Rules
9375

94-
```text
95-
{"tool":"reason","arguments":{}}
96-
# (orchestrator now returns the reason() tool response containing `stepsLeft = N`)
97-
<<<BEGIN_INTERNAL>>>
98-
[stepsLeft = 2]
99-
REFLECT> data sufficient; no further tools needed.
100-
continue = no
101-
<<<END_INTERNAL>>>
102-
```
76+
1. **Strict alternation** – Never follow an Action Call with another Action Call; a Reasoning Monologue must sit in between.
77+
2. **Mandatory monologue** – After *every* tool result, you must write a Reasoning Monologue inside `<<<BEGIN_INTERNAL>>> … <<<END_INTERNAL>>>`.
78+
3. **Goal anchoring at every step** – Begin each monologue with a **GOAL>** line that restates the question and success criteria **in the context of your capabilities**. If your plan diverges, note the drift and realign.
10379

104-
```text
105-
{"tool":"complete","arguments":{}}
106-
```
80+
* Re‑evaluate assumptions, intent, and **capability fit** each step; update them as evidence arrives.
81+
* Prefer actions that move directly toward the stated goal; defer tangents.
82+
4. **No leaks before completion** – **You cannot output any text outside of the sentinel tags until you issue ****`>> COMPLETE`**** and the orchestrator invites Definitive Output.** Any turn that is not inside the sentinels must be either an **Action Call** (`>> ACTION: …`) or `>> COMPLETE` — never free‑form prose.
83+
5. **Budget awareness** – Echo `[stepsLeft = N]` at the top of every monologue.
84+
6. **After completion** – Immediately produce the **Definitive Output**: a single, comprehensive answer for the user, omitting all internal tags and jargon. This is the **only** content the user will see.
10785

10886
---
10987

110-
## 7 Definitive Output Stage
88+
## 5 Definitive Output Stage
11189

112-
Once the orchestrator acknowledges `complete()`, write the final answer for the task caller. Summarise or cite relevant tool outputs, but do **not** mention internal tags, stepsLeft, or other private reasoning.
90+
Once the orchestrator acknowledges complete(), write the final answer for the task caller. Summarize, link, or cite relevant tool outputs as appropriate, but **do not** mention internal tags, `stepsLeft`, or other private structure. Present a natural, fluent response that integrates the gathered context as if it were prior knowledge, optionally noting the actions you took (e.g., “I searched…”, “I ran…”) without exposing internal mechanics.

x-pack/platform/packages/shared/kbn-inference-prompt-utils/src/flows/reasoning/create_reason_tool_call.ts

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,18 @@ export function createReasonToolCall(): [AssistantMessage, ToolMessage] {
2626
},
2727
],
2828
},
29-
{
30-
role: MessageRole.Tool,
31-
toolCallId,
32-
name: 'reason',
33-
response: {
34-
acknowledged: true,
35-
instructions: REASON_INSTRUCTIONS,
36-
},
37-
},
29+
createReasonToolCallResponse(toolCallId),
3830
];
3931
}
32+
33+
export function createReasonToolCallResponse(toolCallId: string): ToolMessage {
34+
return {
35+
role: MessageRole.Tool,
36+
toolCallId,
37+
name: 'reason',
38+
response: {
39+
acknowledged: true,
40+
instructions: REASON_INSTRUCTIONS,
41+
},
42+
};
43+
}

0 commit comments

Comments
 (0)