-
Notifications
You must be signed in to change notification settings - Fork 2
/
llm_engines.py
171 lines (129 loc) · 5.5 KB
/
llm_engines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# implementingc llm model class that gives completions using openai-like api
import requests
import dotenv
import os
from urllib3.exceptions import InsecureRequestWarning
# Suppress the warnings from urllib3
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)
# if .dotenv file is present, load environment variables from it, for api keys etc.
if os.path.exists(".env"):
dotenv.load_dotenv(dotenv.find_dotenv())
open_ai_key = os.getenv("OPENAI_API_KEY")
url= "http://127.0.0.1:1200/v1/chat/completions"
model_info_url= "http://127.0.0.1:1200/v1/internal/model/info"
load_model_url = "http://127.0.0.1:1200/v1/internal/model/load"
headers = {
"Content-Type": "application/json"
}
openai_headers = {
"Content-Type": "application/json",
"Authorization": "Bearer " + open_ai_key
}
# first a dummy llm model class that just gives dummy responses
class LLM:
instance = None
def __init__(self, history=[]):
self.headers = headers
self.history = history
# get first prompt with role system as system prompt
if len(self.history) > 0:
self.system_prompt = self.history[0]["content"] if self.history[0]["role"] == "system" else ""
else:
self.system_prompt = ""
def set_system_prompt(self, system_prompt):
self.system_prompt = system_prompt
if len(self.history) > 0:
if self.history[0]["role"] == "system":
self.history[0]["content"] = system_prompt
else:
self.history.insert(0, {"role":"system","content": system_prompt})
else:
self.history.insert(0, {"role":"system","content": system_prompt})
def get_system_prompt(self):
return self.system_prompt
def set_history(self, history):
self.history = history
# update system prompt
if len(self.history) > 0:
self.system_prompt = self.history[0]["content"] if self.history[0]["role"] == "system" else ""
else:
self.system_prompt = ""
def get_history(self):
return self.history
def generate_response(self, user_prompt):
# append user prompt to history
self.history.append({"role":"user","content": user_prompt})
# generate dummy response
response = "dummy response"
# append response to history
self.history.append({"role":"assistant","content": response})
if response != "":
self.history = []
return response
# now a real llm model class that gives completions using openai-like api
class LLMApi(LLM):
def __init__(self, history=[], model="turboderp_Mixtral-8x7B-instruct-exl2_5.0bpw"):
super().__init__(history)
self.url = url
self.headers = headers
self.model = model
def get_current_model(self):
# send a request to get the current model
response = requests.get(model_info_url, headers=self.headers, verify=False)
return response.json()["model_name"]
def generate_response(self, user_prompt):
assistant_message = ""
# make sure the model is loaded
if self.get_current_model() != self.model:
data = {
"model_name": self.model
}
response = requests.post(load_model_url, headers=self.headers, json=data, verify=False)
# append user prompt to history
self.history.append({"role":"user","content": user_prompt})
# query api for response
data = {
"mode": "instruct",
"messages": self.history,
}
response = requests.post(url, headers=headers, json=data, verify=False, )
assistant_message = response.json()['choices'][0]['message']['content']
self.history.append({"role": "assistant", "content": assistant_message})
# get rid of messages except system prompt
if self.system_prompt != "":
self.history = self.history[:1]
else:
self.history = []
return assistant_message
class ChatgptLLM(LLM):
def __init__(self, history=[]):
super().__init__(history)
self.url = "https://api.openai.com/v1/chat/completions"
self.headers = headers
self.model = "gpt-3.5-turbo"
self.temperature = None
def generate_response(self, user_prompt):
assistant_message = ""
try:
self.history.append({"role":"user","content": user_prompt})
# query api for response, model is gpt 3.5 turbo
data = {
"model": self.model,
"messages": self.history
}
if self.temperature is not None:
data["temperature"] = self.temperature
response = requests.post(self.url, headers=openai_headers, json=data, verify=False)
assistant_message = response.json()['choices'][0]['message']['content']
self.history.append({"role": "assistant", "content": assistant_message})
except Exception as e:
# print the response code and error message
print(response.status_code)
print(response.json())
return assistant_message
# clear history if assistant message is not empty
if self.system_prompt != "":
self.history = self.history[:1]
else:
self.history = []
return assistant_message