-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspeechaction.py
More file actions
129 lines (110 loc) · 4.72 KB
/
speechaction.py
File metadata and controls
129 lines (110 loc) · 4.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import threading
import queue
import time
import json
import openai
import sounddevice as sd
import vosk
import numpy as np
import panda_py
from panda_py import libfranka
from llm_tamp import RobotController
openai.api_key = "your-openai-key"
# Queues for inter-thread communication
audio_q = queue.Queue()
text_q = queue.Queue()
command_q = queue.Queue()
# Audio device ID for microphone
DEVICE_ID = 1
# Load Vosk model
model = vosk.Model(lang="en-us")
# Voice input thread
def listen_audio():
recognizer = vosk.KaldiRecognizer(model, 16000)
def callback(indata, frames, time, status):
if recognizer.AcceptWaveform(indata):
result = json.loads(recognizer.Result())
audio_q.put(result.get("text", ""))
with sd.RawInputStream(samplerate=16000, blocksize=8000, dtype='int16',
channels=1, callback=callback, device=DEVICE_ID):
while True:
time.sleep(0.1)
# Speech to text thread
def speech_to_text():
while True:
text = audio_q.get()
if text:
text_q.put(text)
# LLM parsing thread
def llm_parse_text():
while True:
user_input = text_q.get()
try:
response = openai.ChatCompletion.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "Extract structured command in JSON format with keys: task, location, modifiers."},
{"role": "user", "content": user_input}
]
)
content = response['choices'][0]['message']['content']
parsed_json = json.loads(content)
command_q.put(parsed_json)
except Exception as e:
print(f"LLM parsing error: {e}")
# Command execution thread
def execute_command(robot, object_poses, object_grasp_pose):
while True:
try:
cmd = command_q.get()
task = cmd.get("task", "").lower()
location = cmd.get("location", "")
modifiers = cmd.get("modifiers", [])
if task == "draw":
robot.prepare(task='draw', ort=np.array(object_grasp_pose['marker']))
if location == "top right":
robot.move_to(object_poses['writing_loc_top_right'], modifier=modifiers[0] if modifiers else 'direct')
robot.draw(object_poses['writing_loc_top_right'])
elif location == "top left":
robot.move_to(object_poses['writing_loc_top_left'], modifier=modifiers[0] if modifiers else 'direct')
robot.draw_two(object_poses['writing_loc_top_left'])
elif task == "erase":
robot.prepare(task='erase', ort=np.array(object_grasp_pose['up_eraser_align']))
if location == "top":
robot.move_to(object_poses['up_erasing_loc'], modifier=modifiers[0] if modifiers else 'direct')
robot.erase(time=5)
elif location == "bottom":
robot.prepare(task='erase', ort=np.array(object_grasp_pose['down_eraser_align']))
robot.move_to(object_poses['bottom_erasing_loc'], modifier=modifiers[0] if modifiers else 'direct')
robot.erase(time=5)
elif task == "move":
for mod in modifiers:
robot.move_gen(mod)
except Exception as e:
print(f"Execution error: {e}")
# Main function
def main():
hostname = '192.168.1.11'
panda = panda_py.Panda(hostname)
gripper = libfranka.Gripper(hostname)
robot = RobotController(panda=panda, gripper=gripper)
robot.move_to_study()
object_poses = {
'writing_loc_top_right': [0.711947263682881, 0.12706318489784938, 0.5052814249098799],
'writing_loc_top_left': [0.711947263682881, 0.24706318489784938, 0.5052814249098799],
'bottom_erasing_loc': [0.7421822003037818, 0.12347043377115799, 0.24469861149105856],
'up_erasing_loc': [0.7668475668639307, 0.11783189894165687, 0.5541111324550737]
}
object_grasp_pose = {
'down_eraser_align': [0.08793943573778525, 0.7047919107466506, -0.07203306288116887, 0.7002472758907934],
'up_eraser_align': [0.01796533906041393, 0.7122159863223589, -0.03631220920003831, 0.7007903040869043],
'marker': [0.08229516371960562, 0.7730591118216822, -0.06885761869359781, 0.6251925586127405]
}
threading.Thread(target=listen_audio, daemon=True).start()
threading.Thread(target=speech_to_text, daemon=True).start()
threading.Thread(target=llm_parse_text, daemon=True).start()
threading.Thread(target=execute_command, args=(robot, object_poses, object_grasp_pose), daemon=True).start()
while True:
time.sleep(1)
if __name__ == "__main__":
main()