Skip to content

Commit 6a4c090

Browse files
authored
Zg/tts python livekit example (#173)
1 parent 6215aef commit 6a4c090

File tree

9 files changed

+1717
-0
lines changed

9 files changed

+1717
-0
lines changed

tts/tts-python-livekit/.env.example

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
HUME_API_KEY=
2+
ANTHROPIC_API_KEY=
3+
GROQ_API_KEY=
4+
5+
LIVEKIT_URL=
6+
LIVEKIT_API_KEY=
7+
LIVEKIT_API_SECRET=

tts/tts-python-livekit/.gitignore

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Python internals
2+
__pycache__/
3+
*.py[cod]
4+
*.pyd
5+
*.pyo
6+
*.so
7+
*.dylib
8+
9+
# Virtual-environment directories
10+
.venv/
11+
venv/
12+
env/
13+
14+
# Secrets & local config
15+
.env
16+
17+
# Build / packaging artefacts
18+
build/
19+
dist/
20+
*.egg-info/
21+
*.egg
22+
*.whl
23+
24+
# Logs & runtime files
25+
*.log
26+
logs/
27+
28+
# OS-specific noise
29+
.DS_Store
30+
Thumbs.db
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.11

tts/tts-python-livekit/README.md

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
<div align="center">
2+
<img src="https://storage.googleapis.com/hume-public-logos/hume/hume-banner.png">
3+
<h1>Text-to-Speech | Python LiveKit Agents Example</h1>
4+
</div>
5+
6+
## Overview
7+
8+
This example demonstrates how to use the **Hume Python LiveKit plugin** to integrate:
9+
10+
1. **Speech-to-Text with Voice Activity Detection (VAD)** (Silero VAD + Groq Whisper)
11+
2. **A conversational LLM** (Anthropic Claude Haiku)
12+
3. **Low-latency Text-to-Speech** (Hume AI's streaming API for Octave)
13+
14+
…inside a LiveKit Agents worker that runs in **console mode** by default. No front-end required—just your terminal and microphone.
15+
16+
## Instructions
17+
18+
1. **Clone this examples repository**
19+
20+
```sh
21+
git clone https://github.com/humeai/hume-api-examples
22+
cd hume-api-examples/tts/tts-python-livekit
23+
```
24+
25+
2. **Set up the environment**
26+
27+
We recommend `uv` but you can adapt these commands to your preferred package manager.
28+
29+
```sh
30+
uv sync
31+
```
32+
33+
3. **Configure your API keys**
34+
35+
You’ll need accounts and credentials for:
36+
37+
- **Hume AI**: https://platform.hume.ai
38+
- **Anthropic**: https://console.anthropic.com
39+
- **Groq**: https://console.groq.com
40+
- **LiveKit**: https://livekit.com
41+
42+
Copy the example and fill in your credentials:
43+
44+
```sh
45+
cp .env.example .env
46+
```
47+
48+
Edit .env to include:
49+
50+
```dotenv
51+
HUME_API_KEY=… # from Hume AI
52+
GROQ_API_KEY=… # from Groq console
53+
ANTHROPIC_API_KEY=… # from Anthropic console
54+
LIVEKIT_URL=… # your LiveKit deployment URL
55+
LIVEKIT_API_KEY=… # your LiveKit API key
56+
LIVEKIT_API_SECRET=… # your LiveKit API secret
57+
```
58+
59+
4. **Run the demo**
60+
61+
Start the console-based assistant and begin talking:
62+
63+
```sh
64+
uv run python main.py
65+
```
66+
67+
Speak into your mic and the assistant will respond.
68+
69+
> **Optional**: Tweak additional demo settings in settings.py (e.g. models, prompt, voice, VAD thresholds).

tts/tts-python-livekit/main.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Demo: LiveKit Agents with STT (Groq), LLM (Claude Haiku), and TTS (Hume).
4+
"""
5+
6+
import sys
7+
8+
# third-party
9+
from livekit.agents import Agent, AgentSession, JobContext, WorkerOptions, cli
10+
from livekit.agents.stt.stream_adapter import StreamAdapter
11+
from livekit.plugins import hume, groq, anthropic, silero
12+
13+
# local
14+
from utils import validate_env_vars
15+
from settings import (
16+
STT_MODEL,
17+
LLM_MODEL,
18+
LLM_TEMPERATURE,
19+
LLM_PROMPT,
20+
GREETING,
21+
HUME_VOICE,
22+
VAD_SPEECH_DURATION,
23+
VAD_SILENCE_DURATION,
24+
)
25+
26+
27+
class VoiceAssistant(Agent):
28+
"""Agent using the voice-assistant prompt."""
29+
def __init__(self):
30+
super().__init__(instructions=LLM_PROMPT)
31+
32+
33+
async def entrypoint(ctx: JobContext):
34+
"""Configure and run STT, LLM, and TTS in a LiveKit session."""
35+
await ctx.connect()
36+
37+
# voice-activity detection + buffering for non-streaming STT
38+
vad = silero.VAD.load(
39+
min_speech_duration=VAD_SPEECH_DURATION,
40+
min_silence_duration=VAD_SILENCE_DURATION
41+
)
42+
stt = StreamAdapter(
43+
stt=groq.STT(model=STT_MODEL, language="en"),
44+
vad=vad,
45+
)
46+
47+
# assemble the pipeline
48+
session = AgentSession(
49+
vad=vad,
50+
stt=stt,
51+
llm=anthropic.LLM(model=LLM_MODEL, temperature=LLM_TEMPERATURE),
52+
tts=hume.TTS(voice=HUME_VOICE, instant_mode=True),
53+
)
54+
55+
await session.start(agent=VoiceAssistant(), room=ctx.room)
56+
await session.generate_reply(instructions=GREETING)
57+
58+
59+
def main():
60+
"""Validate env vars, default to console mode, then launch the worker."""
61+
validate_env_vars() # fail fast if keys/URLs are missing
62+
63+
if len(sys.argv) == 1:
64+
sys.argv.append("console")
65+
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
66+
67+
68+
if __name__ == "__main__":
69+
main()

tts/tts-python-livekit/pyproject.toml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[project]
2+
name = "tts-python-livekit"
3+
version = "0.1.0"
4+
description = "Add your description here"
5+
readme = "README.md"
6+
requires-python = ">=3.11"
7+
dependencies = [
8+
"livekit-agents[hume]>=1.0.20",
9+
"livekit-plugins-anthropic>=1.0.20",
10+
"livekit-plugins-groq>=1.0.20",
11+
"livekit-plugins-silero>=1.0.20",
12+
"python-dotenv>=1.1.0",
13+
]

tts/tts-python-livekit/settings.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""Settings for the LiveKit Agents demo."""
2+
3+
# — STT (speech-to-text) —
4+
# The Groq Whisper model used for transcribing incoming audio
5+
STT_MODEL = "whisper-large-v3-turbo"
6+
7+
8+
# — LLM (language model) —
9+
# The Anthropic Claude model for generating replies
10+
LLM_MODEL = "claude-3-5-haiku-latest"
11+
# How "creative" the LLM should be: 0.0 = fully deterministic, higher = more varied
12+
LLM_TEMPERATURE = 0.5
13+
# The system prompt passed to the LLM at startup to set persona & tone
14+
LLM_PROMPT = """\
15+
VOICE ASSISTANT GUIDELINES
16+
17+
CORE IDENTITY:
18+
- Helpful, professional voice assistant communicating via audio
19+
- Warm, conversational tone using short, clear sentences
20+
- No references to underlying model or implementation
21+
22+
INTERACTION PATTERN:
23+
- Keep responses concise (~50 words/30 seconds of spoken audio)
24+
- Provide longer responses only when explicitly requested
25+
- Ask one focused follow-up question if user request is unclear
26+
- When interrupted, stop immediately and respond to new input
27+
28+
INFORMATION HANDLING:
29+
- Prioritize accuracy over completeness
30+
- Acknowledge uncertainty rather than guessing
31+
- When unsure, offer to suggest next steps
32+
"""
33+
34+
35+
# — TTS (text-to-speech) —
36+
# Pick a voice in the Hume Voice Library
37+
# https://platform.hume.ai/tts/voice-library
38+
# Use "HUME_AI" for Hume library voices or "CUSTOM_VOICE" for voices you’ve created
39+
HUME_VOICE = {
40+
"name": "Male English Actor",
41+
"provider": "HUME_AI",
42+
}
43+
44+
45+
# — Initial greeting —
46+
# The exact text the agent will speak on startup
47+
GREETING = "Say 'Hi there! How can I help you today?'"
48+
49+
50+
# — VAD (voice-activity detection) —
51+
# Minimum seconds of continuous speech before sending to STT
52+
VAD_SPEECH_DURATION = 0.1
53+
# Minimum seconds of silence to mark the end of a speech segment
54+
VAD_SILENCE_DURATION = 0.5

tts/tts-python-livekit/utils.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
"""
2+
Utility functions for the LiveKit Agents demo.
3+
"""
4+
5+
import os
6+
import sys
7+
8+
# third-party
9+
from dotenv import load_dotenv
10+
11+
12+
# Environment variables required to run the demo
13+
REQUIRED_ENV_VARS = [
14+
"HUME_API_KEY",
15+
"GROQ_API_KEY",
16+
"ANTHROPIC_API_KEY",
17+
"LIVEKIT_URL",
18+
"LIVEKIT_API_KEY",
19+
"LIVEKIT_API_SECRET",
20+
]
21+
22+
23+
def validate_env_vars():
24+
"""
25+
Load environment variables from .env, then ensure all REQUIRED_ENV_VARS are set.
26+
If any are missing, exit with a helpful message pointing to the .env.example file.
27+
"""
28+
# Load from .env into environment
29+
load_dotenv(override=True)
30+
31+
# Check which vars are missing
32+
missing = [var for var in REQUIRED_ENV_VARS if not os.getenv(var)]
33+
if missing:
34+
example_filename = ".env.example"
35+
message = (
36+
"\nERROR: Missing environment variables: "
37+
+ ", ".join(missing)
38+
+ "\n\nPlease create a .env file in the project root "
39+
+ f"based on {example_filename} and fill in the values:\n\n"
40+
+ "\n".join(f" {var}=" for var in REQUIRED_ENV_VARS)
41+
+ "\n"
42+
)
43+
sys.exit(message)

0 commit comments

Comments
 (0)