Avoid unexpected error when stream chat doesn't yield (run-llama#13422)

joelrorseth · logan-markewich · web-flow · commit 6011233b3cd8 · 2024-05-14T15:35:48.000-06:00
Fix nonyielding stream chat bug

Co-authored-by: Logan Markewich &lt;logan.markewich@live.com&gt;
diff --git a/llama-index-core/llama_index/core/instrumentation/events/llm.py b/llama-index-core/llama_index/core/instrumentation/events/llm.py
@@ -1,5 +1,4 @@
 from typing import Any, List, Optional
-
 from llama_index.core.bridge.pydantic import BaseModel
 from llama_index.core.base.llms.types import (
     ChatMessage,
@@ -138,7 +137,7 @@ class LLMChatInProgressEvent(BaseEvent):
 
     Args:
         messages (List[ChatMessage]): List of chat messages.
-        response (ChatResponse): Chat response currently beiung streamed.
+        response (ChatResponse): Chat response currently being streamed.
     """
 
     messages: List[ChatMessage]
@@ -155,11 +154,11 @@ class LLMChatEndEvent(BaseEvent):
 
     Args:
         messages (List[ChatMessage]): List of chat messages.
-        response (ChatResponse): Chat response.
+        response (Optional[ChatResponse]): Last chat response.
     """
 
     messages: List[ChatMessage]
-    response: ChatResponse
+    response: Optional[ChatResponse]
 
     @classmethod
     def class_name(cls):
diff --git a/llama-index-core/llama_index/core/llms/callbacks.py b/llama-index-core/llama_index/core/llms/callbacks.py
@@ -97,7 +97,7 @@ async def wrapped_gen() -> ChatResponseAsyncGen:
                         dispatcher.event(
                             LLMChatEndEvent(
                                 messages=messages,
-                                response=x,
+                                response=last_response,
                                 span_id=span_id,
                             )
                         )
@@ -173,7 +173,7 @@ def wrapped_gen() -> ChatResponseGen:
                         dispatcher.event(
                             LLMChatEndEvent(
                                 messages=messages,
-                                response=x,
+                                response=last_response,
                                 span_id=span_id,
                             )
                         )
diff --git a/llama-index-core/llama_index/core/llms/mock.py b/llama-index-core/llama_index/core/llms/mock.py
@@ -1,13 +1,13 @@
 from typing import Any, Callable, Optional, Sequence
-
 from llama_index.core.base.llms.types import (
     ChatMessage,
+    ChatResponseGen,
     CompletionResponse,
     CompletionResponseGen,
     LLMMetadata,
 )
 from llama_index.core.callbacks import CallbackManager
-from llama_index.core.llms.callbacks import llm_completion_callback
+from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
 from llama_index.core.llms.custom import CustomLLM
 from llama_index.core.types import PydanticProgramMode
 
@@ -76,3 +76,11 @@ def gen_response(max_tokens: int) -> CompletionResponseGen:
                 )
 
         return gen_response(self.max_tokens) if self.max_tokens else gen_prompt()
+
+
+class MockLLMWithNonyieldingChatStream(MockLLM):
+    @llm_chat_callback()
+    def stream_chat(
+        self, messages: Sequence[ChatMessage], **kwargs: Any
+    ) -> ChatResponseGen:
+        yield from []
diff --git a/llama-index-core/tests/llms/test_callbacks.py b/llama-index-core/tests/llms/test_callbacks.py
@@ -1,6 +1,13 @@
 import pytest
+from llama_index.core.base.llms.types import ChatMessage
 from llama_index.core.llms.llm import LLM
 from llama_index.core.llms.mock import MockLLM
+from llama_index.core.llms.mock import MockLLMWithNonyieldingChatStream
+
+
+@pytest.fixture()
+def nonyielding_llm() -> LLM:
+    return MockLLMWithNonyieldingChatStream()
 
 
 @pytest.fixture()
@@ -13,6 +20,25 @@ def prompt() -> str:
     return "test prompt"
 
 
+def test_llm_stream_chat_handles_nonyielding_stream(
+    nonyielding_llm: LLM, prompt: str
+) -> None:
+    response = nonyielding_llm.stream_chat([ChatMessage(role="user", content=prompt)])
+    for _ in response:
+        pass
+
+
+@pytest.mark.asyncio()
+async def test_llm_astream_chat_handles_nonyielding_stream(
+    nonyielding_llm: LLM, prompt: str
+) -> None:
+    response = await nonyielding_llm.astream_chat(
+        [ChatMessage(role="user", content=prompt)]
+    )
+    async for _ in response:
+        pass
+
+
 def test_llm_complete_prompt_arg(llm: LLM, prompt: str) -> None:
     res = llm.complete(prompt)
     expected_res_text = prompt

Original file line number	Diff line number	Diff line change
`@@ -97,7 +97,7 @@ async def wrapped_gen() -> ChatResponseAsyncGen:`
`97`	`97`	`dispatcher.event(`
`98`	`98`	`LLMChatEndEvent(`
`99`	`99`	`messages=messages,`
`100`		`- response=x,`
	`100`	`+ response=last_response,`
`101`	`101`	`span_id=span_id,`
`102`	`102`	`)`
`103`	`103`	`)`
`@@ -173,7 +173,7 @@ def wrapped_gen() -> ChatResponseGen:`
`173`	`173`	`dispatcher.event(`
`174`	`174`	`LLMChatEndEvent(`
`175`	`175`	`messages=messages,`
`176`		`- response=x,`
	`176`	`+ response=last_response,`
`177`	`177`	`span_id=span_id,`
`178`	`178`	`)`
`179`	`179`	`)`