Fix LLM special token issue

Signed-off-by: lvliang-intel <[email protected]>
opea-project · Nov 13, 2024 · c885c48 · c885c48
1 parent 9a50131
commit c885c48
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 1 deletion.
diff --git a/comps/llms/text-generation/tgi/llm.py b/comps/llms/text-generation/tgi/llm.py
@@ -93,6 +93,8 @@ async def stream_generator():
                 chat_response = ""
                 async for text in text_generation:
                     stream_gen_time.append(time.time() - start)
+                    if text in ["<|im_end|>", "<|endoftext|>"]:
+                        text = ""
                     chat_response += text
                     chunk_repr = repr(text.encode("utf-8"))
                     if logflag:
@@ -143,6 +145,8 @@ async def stream_generator():
                 chat_response = ""
                 async for text in text_generation:
                     stream_gen_time.append(time.time() - start)
+                    if text in ["<|im_end|>", "<|endoftext|>"]:
+                        text = ""
                     chat_response += text
                     chunk_repr = repr(text.encode("utf-8"))
                     if logflag:
@@ -252,7 +256,10 @@ def stream_generator():
                 for c in chat_completion:
                     if logflag:
                         logger.info(c)
-                    yield f"data: {c.model_dump_json()}\n\n"
+                    chunk = c.model_dump_json()
+                    for token in ["<|im_end|>", "<|endoftext|>"]:
+                        chunk = chunk.replace(token, "")
+                    yield f"data: {chunk}\n\n"
                 yield "data: [DONE]\n\n"
 
             return StreamingResponse(stream_generator(), media_type="text/event-stream")

diff --git a/comps/llms/text-generation/vllm/langchain/llm.py b/comps/llms/text-generation/vllm/langchain/llm.py
@@ -90,6 +90,8 @@ async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, Searche
             async def stream_generator():
                 chat_response = ""
                 async for text in llm.astream(new_input.query, **parameters):
+                    if text in ["<|im_end|>", "<|endoftext|>"]:
+                        text = ""
                     chat_response += text
                     chunk_repr = repr(text.encode("utf-8"))
                     if logflag:
@@ -141,6 +143,8 @@ async def stream_generator():
             async def stream_generator():
                 chat_response = ""
                 async for text in llm.astream(prompt, **parameters):
+                    if text in ["<|im_end|>", "<|endoftext|>"]:
+                        text = ""
                     chat_response += text
                     chunk_repr = repr(text.encode("utf-8"))
                     if logflag:

diff --git a/comps/llms/text-generation/vllm/llama_index/llm.py b/comps/llms/text-generation/vllm/llama_index/llm.py
@@ -58,6 +58,8 @@ async def llm_generate(input: LLMParamsDoc):
 
         async def stream_generator():
             async for text in llm.astream_complete(input.query):
+                if text.text in ["<|im_end|>", "<|endoftext|>"]:
+                    text.text = ""
                 output = text.text
                 yield f"data: {output}\n\n"
             if logflag: