Enforce max_output_length for shell tool outputs (#2299)

gustavz · web-flow · commit b1ffad0efe32 · 2026-01-15T23:21:39.000+09:00
diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py
@@ -1739,6 +1739,9 @@ async def execute(
         shell_output_payload: list[dict[str, Any]] | None = None
         provider_meta: dict[str, Any] | None = None
         max_output_length: int | None = None
+        requested_max_output_length = _normalize_max_output_length(
+            shell_call.action.max_output_length
+        )
 
         try:
             executor_result = call.shell_tool.executor(request)
@@ -1748,15 +1751,31 @@ async def execute(
 
             if isinstance(result, ShellResult):
                 normalized = [_normalize_shell_output(entry) for entry in result.output]
+                result_max_output_length = _normalize_max_output_length(result.max_output_length)
+                if result_max_output_length is None:
+                    max_output_length = requested_max_output_length
+                elif requested_max_output_length is None:
+                    max_output_length = result_max_output_length
+                else:
+                    max_output_length = min(result_max_output_length, requested_max_output_length)
+                if max_output_length is not None:
+                    normalized = _truncate_shell_outputs(normalized, max_output_length)
                 output_text = _render_shell_outputs(normalized)
+                if max_output_length is not None:
+                    output_text = output_text[:max_output_length]
                 shell_output_payload = [_serialize_shell_output(entry) for entry in normalized]
                 provider_meta = dict(result.provider_data or {})
-                max_output_length = result.max_output_length
             else:
                 output_text = str(result)
+                if requested_max_output_length is not None:
+                    max_output_length = requested_max_output_length
+                    output_text = output_text[:max_output_length]
         except Exception as exc:
             status = "failed"
             output_text = _format_shell_error(exc)
+            if requested_max_output_length is not None:
+                max_output_length = requested_max_output_length
+                output_text = output_text[:max_output_length]
             logger.error("Shell executor failed: %s", exc, exc_info=True)
 
         await asyncio.gather(
@@ -2029,6 +2048,51 @@ def _render_shell_outputs(outputs: Sequence[ShellCommandOutput]) -> str:
     return "\n\n".join(rendered_chunks)
 
 
+def _truncate_shell_outputs(
+    outputs: Sequence[ShellCommandOutput], max_length: int
+) -> list[ShellCommandOutput]:
+    if max_length <= 0:
+        return [
+            ShellCommandOutput(
+                stdout="",
+                stderr="",
+                outcome=output.outcome,
+                command=output.command,
+                provider_data=output.provider_data,
+            )
+            for output in outputs
+        ]
+
+    remaining = max_length
+    truncated: list[ShellCommandOutput] = []
+    for output in outputs:
+        stdout = ""
+        stderr = ""
+        if remaining > 0 and output.stdout:
+            stdout = output.stdout[:remaining]
+            remaining -= len(stdout)
+        if remaining > 0 and output.stderr:
+            stderr = output.stderr[:remaining]
+            remaining -= len(stderr)
+        truncated.append(
+            ShellCommandOutput(
+                stdout=stdout,
+                stderr=stderr,
+                outcome=output.outcome,
+                command=output.command,
+                provider_data=output.provider_data,
+            )
+        )
+
+    return truncated
+
+
+def _normalize_max_output_length(value: int | None) -> int | None:
+    if value is None:
+        return None
+    return max(0, value)
+
+
 def _format_shell_error(error: Exception | BaseException | Any) -> str:
     if isinstance(error, Exception):
         message = str(error)
@@ -2078,9 +2142,9 @@ def _coerce_shell_call(tool_call: Any) -> ShellCallData:
     )
     timeout_ms = int(timeout_value) if isinstance(timeout_value, (int, float)) else None
 
-    max_length_value = _get_mapping_or_attr(
-        action_payload, "max_output_length"
-    ) or _get_mapping_or_attr(action_payload, "maxOutputLength")
+    max_length_value = _get_mapping_or_attr(action_payload, "max_output_length")
+    if max_length_value is None:
+        max_length_value = _get_mapping_or_attr(action_payload, "maxOutputLength")
     max_output_length = (
         int(max_length_value) if isinstance(max_length_value, (int, float)) else None
     )
diff --git a/tests/test_shell_tool.py b/tests/test_shell_tool.py
@@ -94,15 +94,19 @@ async def test_shell_tool_structured_output_is_rendered() -> None:
 async def test_shell_tool_executor_failure_returns_error() -> None:
     class ExplodingExecutor:
         def __call__(self, request):
-            raise RuntimeError("boom")
+            raise RuntimeError("boom" * 10)
 
     shell_tool = ShellTool(executor=ExplodingExecutor())
     tool_call = {
         "type": "shell_call",
         "id": "shell_call_fail",
         "call_id": "call_shell_fail",
         "status": "completed",
-        "action": {"commands": ["echo boom"], "timeout_ms": 1000},
+        "action": {
+            "commands": ["echo boom"],
+            "timeout_ms": 1000,
+            "max_output_length": 6,
+        },
     }
     tool_run = ToolRunShellCall(tool_call=tool_call, shell_tool=shell_tool)
     agent = Agent(name="shell-agent", tools=[shell_tool])
@@ -117,12 +121,13 @@ def __call__(self, request):
     )
 
     assert isinstance(result, ToolCallOutputItem)
-    assert "boom" in result.output
+    assert result.output == "boombo"
     raw_item = cast(dict[str, Any], result.raw_item)
     assert raw_item["type"] == "shell_call_output"
     assert raw_item["status"] == "failed"
+    assert raw_item["max_output_length"] == 6
     assert isinstance(raw_item["output"], list)
-    assert "boom" in raw_item["output"][0]["stdout"]
+    assert raw_item["output"][0]["stdout"] == "boombo"
     first_output = raw_item["output"][0]
     assert first_output["outcome"]["type"] == "exit"
     assert first_output["outcome"]["exit_code"] == 1
@@ -135,3 +140,235 @@ def __call__(self, request):
     assert "status" not in payload_dict
     assert "shell_output" not in payload_dict
     assert "provider_data" not in payload_dict
+
+
+@pytest.mark.asyncio
+async def test_shell_tool_output_respects_max_output_length() -> None:
+    shell_tool = ShellTool(
+        executor=lambda request: ShellResult(
+            output=[
+                ShellCommandOutput(
+                    stdout="0123456789",
+                    stderr="abcdef",
+                    outcome=ShellCallOutcome(type="exit", exit_code=0),
+                )
+            ],
+        )
+    )
+
+    tool_call = {
+        "type": "shell_call",
+        "id": "shell_call",
+        "call_id": "call_shell",
+        "status": "completed",
+        "action": {
+            "commands": ["echo hi"],
+            "timeout_ms": 1000,
+            "max_output_length": 6,
+        },
+    }
+
+    tool_run = ToolRunShellCall(tool_call=tool_call, shell_tool=shell_tool)
+    agent = Agent(name="shell-agent", tools=[shell_tool])
+    context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None)
+
+    result = await ShellAction.execute(
+        agent=agent,
+        call=tool_run,
+        hooks=RunHooks[Any](),
+        context_wrapper=context_wrapper,
+        config=RunConfig(),
+    )
+
+    assert isinstance(result, ToolCallOutputItem)
+    assert result.output == "012345"
+    raw_item = cast(dict[str, Any], result.raw_item)
+    assert raw_item["max_output_length"] == 6
+    assert raw_item["output"][0]["stdout"] == "012345"
+    assert raw_item["output"][0]["stderr"] == ""
+
+
+@pytest.mark.asyncio
+async def test_shell_tool_uses_smaller_max_output_length() -> None:
+    shell_tool = ShellTool(
+        executor=lambda request: ShellResult(
+            output=[
+                ShellCommandOutput(
+                    stdout="0123456789",
+                    stderr="abcdef",
+                    outcome=ShellCallOutcome(type="exit", exit_code=0),
+                )
+            ],
+            max_output_length=8,
+        )
+    )
+
+    tool_call = {
+        "type": "shell_call",
+        "id": "shell_call",
+        "call_id": "call_shell",
+        "status": "completed",
+        "action": {
+            "commands": ["echo hi"],
+            "timeout_ms": 1000,
+            "max_output_length": 6,
+        },
+    }
+
+    tool_run = ToolRunShellCall(tool_call=tool_call, shell_tool=shell_tool)
+    agent = Agent(name="shell-agent", tools=[shell_tool])
+    context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None)
+
+    result = await ShellAction.execute(
+        agent=agent,
+        call=tool_run,
+        hooks=RunHooks[Any](),
+        context_wrapper=context_wrapper,
+        config=RunConfig(),
+    )
+
+    assert isinstance(result, ToolCallOutputItem)
+    assert result.output == "012345"
+    raw_item = cast(dict[str, Any], result.raw_item)
+    assert raw_item["max_output_length"] == 6
+    assert raw_item["output"][0]["stdout"] == "012345"
+    assert raw_item["output"][0]["stderr"] == ""
+
+
+@pytest.mark.asyncio
+async def test_shell_tool_executor_can_override_max_output_length_to_zero() -> None:
+    shell_tool = ShellTool(
+        executor=lambda request: ShellResult(
+            output=[
+                ShellCommandOutput(
+                    stdout="0123456789",
+                    stderr="abcdef",
+                    outcome=ShellCallOutcome(type="exit", exit_code=0),
+                )
+            ],
+            max_output_length=0,
+        )
+    )
+
+    tool_call = {
+        "type": "shell_call",
+        "id": "shell_call",
+        "call_id": "call_shell",
+        "status": "completed",
+        "action": {
+            "commands": ["echo hi"],
+            "timeout_ms": 1000,
+            "max_output_length": 6,
+        },
+    }
+
+    tool_run = ToolRunShellCall(tool_call=tool_call, shell_tool=shell_tool)
+    agent = Agent(name="shell-agent", tools=[shell_tool])
+    context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None)
+
+    result = await ShellAction.execute(
+        agent=agent,
+        call=tool_run,
+        hooks=RunHooks[Any](),
+        context_wrapper=context_wrapper,
+        config=RunConfig(),
+    )
+
+    assert isinstance(result, ToolCallOutputItem)
+    assert result.output == ""
+    raw_item = cast(dict[str, Any], result.raw_item)
+    assert raw_item["max_output_length"] == 0
+    assert raw_item["output"][0]["stdout"] == ""
+    assert raw_item["output"][0]["stderr"] == ""
+
+
+@pytest.mark.asyncio
+async def test_shell_tool_action_can_request_zero_max_output_length() -> None:
+    shell_tool = ShellTool(
+        executor=lambda request: ShellResult(
+            output=[
+                ShellCommandOutput(
+                    stdout="0123456789",
+                    stderr="abcdef",
+                    outcome=ShellCallOutcome(type="exit", exit_code=0),
+                )
+            ],
+        )
+    )
+
+    tool_call = {
+        "type": "shell_call",
+        "id": "shell_call",
+        "call_id": "call_shell",
+        "status": "completed",
+        "action": {
+            "commands": ["echo hi"],
+            "timeout_ms": 1000,
+            "max_output_length": 0,
+        },
+    }
+
+    tool_run = ToolRunShellCall(tool_call=tool_call, shell_tool=shell_tool)
+    agent = Agent(name="shell-agent", tools=[shell_tool])
+    context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None)
+
+    result = await ShellAction.execute(
+        agent=agent,
+        call=tool_run,
+        hooks=RunHooks[Any](),
+        context_wrapper=context_wrapper,
+        config=RunConfig(),
+    )
+
+    assert isinstance(result, ToolCallOutputItem)
+    assert result.output == ""
+    raw_item = cast(dict[str, Any], result.raw_item)
+    assert raw_item["max_output_length"] == 0
+    assert raw_item["output"][0]["stdout"] == ""
+    assert raw_item["output"][0]["stderr"] == ""
+
+
+@pytest.mark.asyncio
+async def test_shell_tool_action_negative_max_output_length_clamps_to_zero() -> None:
+    shell_tool = ShellTool(
+        executor=lambda request: ShellResult(
+            output=[
+                ShellCommandOutput(
+                    stdout="0123456789",
+                    stderr="abcdef",
+                    outcome=ShellCallOutcome(type="exit", exit_code=0),
+                )
+            ],
+        )
+    )
+
+    tool_call = {
+        "type": "shell_call",
+        "id": "shell_call",
+        "call_id": "call_shell",
+        "status": "completed",
+        "action": {
+            "commands": ["echo hi"],
+            "timeout_ms": 1000,
+            "max_output_length": -5,
+        },
+    }
+
+    tool_run = ToolRunShellCall(tool_call=tool_call, shell_tool=shell_tool)
+    agent = Agent(name="shell-agent", tools=[shell_tool])
+    context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None)
+
+    result = await ShellAction.execute(
+        agent=agent,
+        call=tool_run,
+        hooks=RunHooks[Any](),
+        context_wrapper=context_wrapper,
+        config=RunConfig(),
+    )
+
+    assert isinstance(result, ToolCallOutputItem)
+    assert result.output == ""
+    raw_item = cast(dict[str, Any], result.raw_item)
+    assert raw_item["max_output_length"] == 0
+    assert raw_item["output"][0]["stdout"] == ""
+    assert raw_item["output"][0]["stderr"] == ""