-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathai_chat.py
40 lines (32 loc) · 1.4 KB
/
ai_chat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from llama_cpp import Llama
import json
from typing import List, Union, Generator
from knowledge_store import MarqoKnowledgeStore
# Initialize LLM model with updated configuration
LLM = Llama(
model_path="models/8B/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf",
n_ctx=4096, # Increased context size
n_gpu_layers=1 # Enable GPU acceleration if available
)
def answer(user_input: str, mks: MarqoKnowledgeStore, limit: int = 5) -> Generator[str, None, None]:
"""Generate an answer based on user input using a LLM and Marqo Knowledge Store.
Args:
user_input (str): The user's query.
mks (MarqoKnowledgeStore): An instance of the MarqoKnowledgeStore for querying context.
limit (int, optional): The maximum number of context entries to retrieve. Defaults to 5.
Yields:
Generator[str, None, None]: The LLM's response in chunks.
"""
context = mks.query_for_content(user_input, "text", limit)
# print(json.dumps(context, indent=4))
sources = "\n".join(f"[{i+1}] {source}" for i, source in enumerate(context))
print(f"QUERY: {user_input}")
print("Context from Marqo:", json.dumps(context, indent=4))
prompt = f"""
{sources}
Q: {user_input}
A:"""
response = ""
for resp in LLM(prompt, max_tokens=512, stop=["Q:"], stream=True):
response += resp["choices"][0]["text"]
yield resp["choices"][0]["text"].encode("utf-8")