Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RealtimeTranscriber stops transcribing after 20-25 words #107

Open
yohankoshydt opened this issue Jan 24, 2025 · 0 comments
Open

RealtimeTranscriber stops transcribing after 20-25 words #107

yohankoshydt opened this issue Jan 24, 2025 · 0 comments

Comments

@yohankoshydt
Copy link

RealtimeTranscriber abruptly stops transcribing after listening for 1.5-2 minutes of speech.
Additionally, setting the end_utterance_silence_threshold as given below seems to have to effect. The expected behaviour of stopping transcribing after the given threshold is not achieved.

`import assemblyai as aai
from elevenlabs import stream
from elevenlabs.client import ElevenLabs
from openai import OpenAI

class AI_Assistant:
def init(self):
aai.settings.api_key = "xxx"
self.openai_client = OpenAI(api_key = "xxx")
self.elevenlabs_api_key = "xxx"

    self.elevenlabs_client = ElevenLabs(api_key = self.elevenlabs_api_key)

    self.transcriber = None

    self.interaction = [
        {"role":"system", "content":"You are a interviewer conducting an interview for the role of a Software Developer. Please continue the conversation from the point where it is. Do not answer the questions yourself and have a professional tone, less friendly."},
    ]

def stop_transcription(self):
    if self.transcriber:
        self.transcriber.close()
        self.transcriber = None

def on_open(self, session_opened: aai.RealtimeSessionOpened):
    print("Session ID:", session_opened.session_id)
    return

def on_error(self, error: aai.RealtimeError):
    print("An error occured:", error)
    return

def on_close(self):
    print("Closing Session")
    return

def on_data(self, transcript: aai.RealtimeTranscript):
    if not transcript.text:
        return
    if isinstance(transcript, aai.RealtimeFinalTranscript):
        self.generate_ai_response(transcript)
    else:
        print(transcript.text, end="\r")

def start_transcription(self):
    self.transcriber = aai.RealtimeTranscriber(
        sample_rate = 16000,
        on_data = self.on_data,
        on_error = self.on_error,
        on_open = self.on_open,
        on_close = self.on_close,
        end_utterance_silence_threshold = 4000,


    )

    self.transcriber.configure_end_utterance_silence_threshold(4000)

    self.transcriber.connect()
    microphone_stream = aai.extras.MicrophoneStream(sample_rate=16000)
    self.transcriber.stream(microphone_stream)

def generate_ai_response(self, transcript):

    self.stop_transcription()

    self.interaction.append({"role":"user", "content": transcript.text})
    print(f"\nTourist: {transcript.text}", end="\r\n")

    response = self.openai_client.chat.completions.create(
        model = "gpt-3.5-turbo",
        messages = self.interaction
    )

    ai_response = response.choices[0].message.content

    self.generate_audio(ai_response)

    self.start_transcription()
    print(f"\nReal-time transcription: ", end="\r\n")


def generate_audio(self, text):

    self.interaction.append({"role":"assistant", "content": text})
    print(f"\nAI Guide: {text}")

    audio_stream = self.elevenlabs_client.generate(
        text = text,
        voice = "Rachel",
        stream = True
    )

    stream(audio_stream)

greeting = "Hello Yohan"
ai_assistant = AI_Assistant()
ai_assistant.generate_audio(greeting)
ai_assistant.start_transcription()`

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant