letta-ai
diff --git a/‎.github/scripts/model-sweep/model_sweep.py‎
Lines changed: 1 addition & 1 deletion b/‎.github/scripts/model-sweep/model_sweep.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/send-message-integration-tests.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/send-message-integration-tests.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎letta/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎letta/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎letta/agents/letta_agent.py‎
Lines changed: 22 additions & 10 deletions b/‎letta/agents/letta_agent.py‎
Lines changed: 22 additions & 10 deletions
diff --git a/‎letta/constants.py‎
Lines changed: 7 additions & 0 deletions b/‎letta/constants.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎letta/functions/function_sets/base.py‎
Lines changed: 1 addition & 1 deletion b/‎letta/functions/function_sets/base.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎letta/helpers/converters.py‎
Lines changed: 19 additions & 0 deletions b/‎letta/helpers/converters.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎letta/helpers/json_helpers.py‎
Lines changed: 1 addition & 1 deletion b/‎letta/helpers/json_helpers.py‎
Lines changed: 1 addition & 1 deletion
@@ -96,7 +96,7 @@ def roll_dice(num_sides: int) -> int:
     "openai-gpt-4o-mini.json",
     # "azure-gpt-4o-mini.json", # TODO: Re-enable on new agent loop
     "claude-3-5-sonnet.json",
-    "claude-3-7-sonnet.json",
+    "claude-4-sonnet-extended.json",
     "claude-3-7-sonnet-extended.json",
     "gemini-1.5-pro.json",
     "gemini-2.5-flash-vertex.json",
 
@@ -19,7 +19,7 @@ jobs:
           - "openai-gpt-4o-mini.json"
           - "azure-gpt-4o-mini.json"
           - "claude-3-5-sonnet.json"
-          - "claude-3-7-sonnet.json"
+          - "claude-4-sonnet-extended.json"
           - "claude-3-7-sonnet-extended.json"
           - "gemini-pro.json"
           - "gemini-vertex.json"
 
@@ -5,7 +5,7 @@
     __version__ = version("letta")
 except PackageNotFoundError:
     # Fallback for development installations
-    __version__ = "0.11.2"
+    __version__ = "0.11.3"
 
 if os.environ.get("LETTA_VERSION"):
     __version__ = os.environ["LETTA_VERSION"]
 
@@ -220,6 +220,7 @@ async def step_stream_no_tokens(
             actor=self.actor,
         )
         stop_reason = None
+        job_update_metadata = None
         usage = LettaUsageStatistics()
 
         # span for request
@@ -367,6 +368,7 @@ async def step_stream_no_tokens(
             except Exception as e:
                 # Handle any unexpected errors during step processing
                 self.logger.error(f"Error during step processing: {e}")
+                job_update_metadata = {"error": str(e)}
 
                 # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
                 if not stop_reason:
@@ -429,7 +431,7 @@ async def step_stream_no_tokens(
                         self.logger.error("Invalid StepProgression value")
 
                     if settings.track_stop_reason:
-                        await self._log_request(request_start_timestamp_ns, request_span)
+                        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
 
                 except Exception as e:
                     self.logger.error("Failed to update step: %s", e)
@@ -447,7 +449,7 @@ async def step_stream_no_tokens(
                 force=False,
             )
 
-        await self._log_request(request_start_timestamp_ns, request_span)
+        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=False)
 
         # Return back usage
         for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
@@ -485,6 +487,7 @@ async def _step(
         request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
 
         stop_reason = None
+        job_update_metadata = None
         usage = LettaUsageStatistics()
         for i in range(max_steps):
             # If dry run, build request data and return it without making LLM call
@@ -622,6 +625,7 @@ async def _step(
             except Exception as e:
                 # Handle any unexpected errors during step processing
                 self.logger.error(f"Error during step processing: {e}")
+                job_update_metadata = {"error": str(e)}
 
                 # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
                 if not stop_reason:
@@ -680,7 +684,7 @@ async def _step(
                         self.logger.error("Invalid StepProgression value")
 
                     if settings.track_stop_reason:
-                        await self._log_request(request_start_timestamp_ns, request_span)
+                        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
 
                 except Exception as e:
                     self.logger.error("Failed to update step: %s", e)
@@ -698,7 +702,7 @@ async def _step(
                 force=False,
             )
 
-        await self._log_request(request_start_timestamp_ns, request_span)
+        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=False)
 
         return current_in_context_messages, new_in_context_messages, stop_reason, usage
 
@@ -748,6 +752,7 @@ async def step_stream(
             actor=self.actor,
         )
         stop_reason = None
+        job_update_metadata = None
         usage = LettaUsageStatistics()
         first_chunk, request_span = True, None
         if request_start_timestamp_ns:
@@ -977,6 +982,7 @@ async def step_stream(
             except Exception as e:
                 # Handle any unexpected errors during step processing
                 self.logger.error(f"Error during step processing: {e}")
+                job_update_metadata = {"error": str(e)}
 
                 # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
                 if not stop_reason:
@@ -1039,7 +1045,7 @@ async def step_stream(
 
                     # Do tracking for failure cases. Can consolidate with success conditions later.
                     if settings.track_stop_reason:
-                        await self._log_request(request_start_timestamp_ns, request_span)
+                        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
 
                 except Exception as e:
                     self.logger.error("Failed to update step: %s", e)
@@ -1056,20 +1062,28 @@ async def step_stream(
                 force=False,
             )
 
-        await self._log_request(request_start_timestamp_ns, request_span)
+        await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=False)
 
         for finish_chunk in self.get_finish_chunks_for_stream(usage, stop_reason):
             yield f"data: {finish_chunk}\n\n"
 
-    async def _log_request(self, request_start_timestamp_ns: int, request_span: "Span | None"):
+    async def _log_request(
+        self, request_start_timestamp_ns: int, request_span: "Span | None", job_update_metadata: dict | None, is_error: bool
+    ):
         if request_start_timestamp_ns:
             now_ns, now = get_utc_timestamp_ns(), get_utc_time()
             duration_ns = now_ns - request_start_timestamp_ns
             if request_span:
                 request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(duration_ns)})
             await self._update_agent_last_run_metrics(now, ns_to_ms(duration_ns))
-            if self.current_run_id:
+            if settings.track_agent_run and self.current_run_id:
                 await self.job_manager.record_response_duration(self.current_run_id, duration_ns, self.actor)
+                await self.job_manager.safe_update_job_status_async(
+                    job_id=self.current_run_id,
+                    new_status=JobStatus.failed if is_error else JobStatus.completed,
+                    actor=self.actor,
+                    metadata=job_update_metadata,
+                )
         if request_span:
             request_span.end()
 
@@ -1507,8 +1521,6 @@ async def _execute_tool(
                 status="error",
             )
 
-        print(target_tool)
-
         # TODO: This temp. Move this logic and code to executors
 
         if agent_step_span:
 
@@ -208,6 +208,13 @@ def FUNCTION_RETURN_VALUE_TRUNCATED(return_str, return_char: int, return_char_li
     "deepseek-chat": 64000,
     "deepseek-reasoner": 64000,
     ## OpenAI models: https://platform.openai.com/docs/models/overview
+    # gpt-5
+    "gpt-5": 400000,
+    "gpt-5-2025-08-07": 400000,
+    "gpt-5-mini": 400000,
+    "gpt-5-mini-2025-08-07": 400000,
+    "gpt-5-nano": 400000,
+    "gpt-5-nano-2025-08-07": 400000,
     # reasoners
     "o1": 200000,
     # "o1-pro": 200000,  # responses API only
 
@@ -264,7 +264,7 @@ def memory_insert(agent_state: "AgentState", label: str, new_str: str, insert_li
 
     Args:
         label (str): Section of the memory to be edited, identified by its label.
-        new_str (str): The text to insert.
+        new_str (str): The text to insert. Do not include line number prefixes.
         insert_line (int): The line number after which to insert the text (0 for beginning of file). Defaults to -1 (end of the file).
 
     Returns:
 
@@ -395,6 +395,24 @@ def deserialize_agent_step_state(data: Optional[Dict]) -> Optional[AgentStepStat
     if not data:
         return None
 
+    if solver_data := data.get("tool_rules_solver"):
+        # Get existing tool_rules or reconstruct from categorized fields for backwards compatibility
+        tool_rules_data = solver_data.get("tool_rules", [])
+
+        if not tool_rules_data:
+            for field_name in (
+                "init_tool_rules",
+                "continue_tool_rules",
+                "child_based_tool_rules",
+                "parent_tool_rules",
+                "terminal_tool_rules",
+                "required_before_exit_tool_rules",
+            ):
+                if field_data := solver_data.get(field_name):
+                    tool_rules_data.extend(field_data)
+
+        solver_data["tool_rules"] = deserialize_tool_rules(tool_rules_data)
+
     return AgentStepState(**data)
 
 
@@ -418,6 +436,7 @@ def deserialize_response_format(data: Optional[Dict]) -> Optional[ResponseFormat
         return JsonSchemaResponseFormat(**data)
     if data["type"] == ResponseFormatType.json_object:
         return JsonObjectResponseFormat(**data)
+    raise ValueError(f"Unknown Response Format type: {data['type']}")
 
 
 # --------------------------
 
@@ -15,7 +15,7 @@ def safe_serializer(obj):
             try:
                 return obj.decode("utf-8")
             except Exception:
-                print(f"Error decoding bytes as utf-8: {obj}")
+                # TODO: this is to handle Gemini thought signatures, b64 decode this back to bytes when sending back to Gemini
                 return base64.b64encode(obj).decode("utf-8")
         raise TypeError(f"Type {type(obj)} not serializable")