Skip to content

Commit

Permalink
Fix LLM special token issue
Browse files Browse the repository at this point in the history
Signed-off-by: lvliang-intel <[email protected]>
  • Loading branch information
lvliang-intel committed Nov 13, 2024
1 parent 9a50131 commit c885c48
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 1 deletion.
9 changes: 8 additions & 1 deletion comps/llms/text-generation/tgi/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ async def stream_generator():
chat_response = ""
async for text in text_generation:
stream_gen_time.append(time.time() - start)
if text in ["<|im_end|>", "<|endoftext|>"]:
text = ""
chat_response += text
chunk_repr = repr(text.encode("utf-8"))
if logflag:
Expand Down Expand Up @@ -143,6 +145,8 @@ async def stream_generator():
chat_response = ""
async for text in text_generation:
stream_gen_time.append(time.time() - start)
if text in ["<|im_end|>", "<|endoftext|>"]:
text = ""
chat_response += text
chunk_repr = repr(text.encode("utf-8"))
if logflag:
Expand Down Expand Up @@ -252,7 +256,10 @@ def stream_generator():
for c in chat_completion:
if logflag:
logger.info(c)
yield f"data: {c.model_dump_json()}\n\n"
chunk = c.model_dump_json()
for token in ["<|im_end|>", "<|endoftext|>"]:
chunk = chunk.replace(token, "")
yield f"data: {chunk}\n\n"
yield "data: [DONE]\n\n"

return StreamingResponse(stream_generator(), media_type="text/event-stream")
Expand Down
4 changes: 4 additions & 0 deletions comps/llms/text-generation/vllm/langchain/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, Searche
async def stream_generator():
chat_response = ""
async for text in llm.astream(new_input.query, **parameters):
if text in ["<|im_end|>", "<|endoftext|>"]:
text = ""
chat_response += text
chunk_repr = repr(text.encode("utf-8"))
if logflag:
Expand Down Expand Up @@ -141,6 +143,8 @@ async def stream_generator():
async def stream_generator():
chat_response = ""
async for text in llm.astream(prompt, **parameters):
if text in ["<|im_end|>", "<|endoftext|>"]:
text = ""
chat_response += text
chunk_repr = repr(text.encode("utf-8"))
if logflag:
Expand Down
2 changes: 2 additions & 0 deletions comps/llms/text-generation/vllm/llama_index/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ async def llm_generate(input: LLMParamsDoc):

async def stream_generator():
async for text in llm.astream_complete(input.query):
if text.text in ["<|im_end|>", "<|endoftext|>"]:
text.text = ""
output = text.text
yield f"data: {output}\n\n"
if logflag:
Expand Down

0 comments on commit c885c48

Please sign in to comment.