-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathflask_api.py
executable file
·244 lines (190 loc) · 8.89 KB
/
flask_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import json
import os
import queue
import time
import prompt_creation_helpers
import flask
import numpy as np
from chunk_handler import ChunkHandler
from conversation_handler import ConversationHandler
from chatgpt import ChatGPT
from flask import Flask, Response, jsonify, request, send_from_directory
from TtS import TextToSpeech
from src.backend.logging_util import def_logger, prepare_log_file
from voice_handler import VoiceHandler
from wav_handler import get_empty_wave_bytes, get_wave_header, split_wave_bytes_into_chunks
from db_handler import DB_Handler
from call_model import Call
logger = def_logger.getChild(__name__)
app = Flask(__name__, static_folder="./../frontend")
generate_debug_file = True
tts = TextToSpeech()
voice_handler = VoiceHandler()
chatgpt = ChatGPT()
def _init_conv():
app.chunk_handler = ChunkHandler()
app.conv_handler = ConversationHandler("conversation.md")
app.db_handler = DB_Handler()
app.listening_audio = b""
app.count_to_write = 0
app.data_queue = queue.Queue()
app.writing_data = False
app.conv_started = False
app.opener_text = ""
def _get_listening_audio() -> bytes:
return get_wave_header(1, 32, 16000, len(app.listening_audio)) + app.listening_audio
_init_conv()
def _write_to_queue(bytes):
# print("write_to_queue")
for byte_chunk in split_wave_bytes_into_chunks(bytes):
app.data_queue.put(byte_chunk)
def _generate_audio():
print("generate_audio")
# stream_count = 0
if not app.writing_data and app.data_queue.empty():
time.sleep(1)
while app.writing_data or not app.data_queue.empty():
if not app.data_queue.empty():
data = app.data_queue.get()
yield data
# stream_count += 1
# print(f"streaming audio back {stream_count}")
if app.writing_data and app.data_queue.empty():
time.sleep(0.5)
print("waiting for new audio data to stream...")
print("finished gen audio")
@app.route("/reset_conv", methods=["POST"])
def reset_conv():
_init_conv()
response = {"message": "Reset performed successfully"}
return jsonify(response)
@app.route("/stream_audio")
def stream_audio():
return Response(_generate_audio(), mimetype="audio/x-wav")
@app.route("/calls", methods=["GET"])
def get_calls():
calls = app.db_handler.getAllCalls()
return [json.dumps(call.__dict__) for call in calls]
@app.route("/add_call", methods=["POST"])
def add_call():
print(request.headers)
call_json = request.get_json()
new_call = Call(**call_json)
app.db_handler.insertNewCall(new_call)
return "Call erfolgreich erstellt", 201
@app.route("/start_call", methods=["POST"])
def start_call():
app.writing_data = True
app.conv_started = False
app.chunk_handler.start_call()
data = request.json
# data = {'title': 'asdadasd', 'state': 1, 'receiverName': 'Deim', 'receiverPhonenr': 'dfdf', 'initiatorName': 'Hoffmann', 'possibleDatetimes': [{'selectedDate': '2023-05-20', 'selectedStartTime': '08:00', 'selectedEndTime': '12:00'}], 'result': None}
new_call = Call(**data)
app.db_handler.insertNewCall(new_call)
date_app_req = prompt_creation_helpers.date_to_string(
data["possibleDatetimes"][0]["selectedDate"]
)
start_time = data["possibleDatetimes"][0]["selectedStartTime"]
end_time = data["possibleDatetimes"][0]["selectedEndTime"]
date_text = prompt_creation_helpers.datum_text(
data["possibleDatetimes"][0]["selectedDate"]
)
start_time_text = prompt_creation_helpers.uhrzeit_text(start_time)
end_time_text = prompt_creation_helpers.uhrzeit_text(end_time)
receiver = data["receiverName"]
initiator = data["initiatorName"]
opener_text = f"Hallo, ich möchte gerne bei Doktor {receiver} einen Termin für {initiator} ausmachen. Haben Sie am {date_text} um {start_time_text} zeit?"
system_instruction = f"""
Act as participant in a conversation in german language between you and appointment manager.
The appointment managers responses will be delimited with {chatgpt.user_delimiter} characters.
Any previous reponses of you are delimited with {chatgpt.assistant_delimiter} characters.
Your Role setting is: you want to make an appointment at doctor {receiver},
the for you possible time-frame is on the {date_app_req} from {start_time_text} to {end_time_text}.
Accept all appointment offers in between this time-frame without any further questions.
For instance, if your possible time-frame is from 2pm to 4pm, accept an offer at 2:30pm or any offer between 2pm and 4pm.
Decline every offer which is not within the given time-frame. Don't forget that time in german is written in the 24h format.
Continue the following conversation by one response of you, the caller. Do not write any repsonse of the appointment manager."""
print(system_instruction)
app.conv_handler.append_initiator_text(opener_text)
chatgpt.add_system_message(system_instruction)
chatgpt.add_assistant_message(opener_text)
audio_segment = tts.text_to_speech_numpy_pmc(opener_text)
# print(delta)
bytes = audio_segment.tobytes()
app.data_queue.put(get_wave_header())
_write_to_queue(bytes)
response = {"message": "Alright alright alright!"}
print("start call success")
return jsonify(response)
@app.route("/static/<path:filename>")
def return_client_files(filename: str):
return send_from_directory("./../frontend", filename)
@app.route("/")
def index():
"""Displays the index page accessible at '/'"""
print(app.instance_path)
return flask.send_file("./../frontend/index.html")
@app.route("/recieve_audio", methods=["POST"])
def recieve_audio():
"""Displays the index page accessible at '/'"""
data = request.data
# data_with_head = get_wave_header(sample_rate=16000, len_bytes=len(data)) + data
data_np = np.frombuffer(data, dtype=np.int32)
# print("chunk size", data_np.shape)
data_processed, can_speak = app.chunk_handler.process_chunk(data_np)
print("state: " + app.chunk_handler.state_machine.state)
if app.chunk_handler.state_machine.state == "waiting_in_queue":
pass
# print("waiting in queue")
# app.chunk_handler.transition_to_wait() #TODO maybe remove in future
elif app.chunk_handler.state_machine.state == "start_opener_speaking":
# moved to /start_call for now ..
# app.chunk_handler.transition_to_wait()
# print("from start_opener_speaking to wait")
pass
elif app.chunk_handler.state_machine.state == "listening":
# we detected speech and now are listening to the other person
app.listening_audio = app.listening_audio + data
if generate_debug_file and app.count_to_write != -1:
app.count_to_write += 1
if app.count_to_write >= 3:
with open("listening_test.wav", "wb") as f:
f.write(_get_listening_audio())
print("wrote example")
app.count_to_write = -1
# while listening, send empty bytes
n_chunks = data_np.shape[0] // 2000
_write_to_queue(get_empty_wave_bytes(header=False, n_chunks=n_chunks))
elif app.chunk_handler.state_machine.state == "start_speaking":
transcript = voice_handler.handle_input_byte_string(_get_listening_audio())
app.listening_audio = b""
if transcript is None or transcript == "":
print("nothing to transcript")
else:
app.conv_handler.append_receiver_text(transcript)
print("****\n****transcripted: " + transcript)
print("**** user_answer: " + transcript)
gpt_answer = " "
for delta in chatgpt.get_response_by_delimiter(transcript, with_history=True):
res_delta = delta.replace(" a ", "").replace("#a #","").replace(chatgpt.assistant_delimiter, "").replace("#","")
audio_segment = tts.text_to_speech_numpy_pmc(res_delta)
gpt_answer += " " + res_delta
bytes = audio_segment.tobytes()
_write_to_queue(bytes)
# app.chunk_handler.transition_to_wait()
app.conv_handler.append_initiator_text(gpt_answer)
print("******** assistance answer ***** " + gpt_answer)
elif app.chunk_handler.state_machine.state == "speaking":
pass
#when we are still in speaking mode we don't have to do anything
response = jsonify("Alright alright alright!")
response.headers.add('Access-Control-Allow-Origin', '*')
return response
if __name__ == "__main__":
prepare_log_file(log_file_path=os.environ.get("LOG_FILE_PATH", "./log_backend.log"), overwrite=True)
# app.run(host=os.environ.get("FLASK_HOST_IP", "localhost"))
app.run(host='0.0.0.0', port=5000)
prepare_log_file(
log_file_path=os.environ.get("LOG_FILE_PATH", "./log_backend.log"),
overwrite=True,
)