-
-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add piperTTS in-browser text-to-speech
- Loading branch information
1 parent
cc594d4
commit 686c8c1
Showing
20 changed files
with
689 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
221 changes: 221 additions & 0 deletions
221
frontend/src/components/TextToSpeech/PiperTTSOptions/index.jsx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
import { useState, useEffect, useRef } from "react"; | ||
import PiperTTSClient from "@/utils/piperTTS"; | ||
import { titleCase } from "text-case"; | ||
import { humanFileSize } from "@/utils/numbers"; | ||
import showToast from "@/utils/toast"; | ||
import { CircleNotch, PauseCircle, PlayCircle } from "@phosphor-icons/react"; | ||
|
||
export default function PiperTTSOptions({ settings }) { | ||
return ( | ||
<> | ||
<p className="text-sm font-base text-white text-opacity-60 mb-4"> | ||
All PiperTTS models will run in your browser locally. This can be | ||
resource intensive on lower-end devices. | ||
</p> | ||
<div className="flex gap-x-4 items-center"> | ||
<PiperTTSModelSelection settings={settings} /> | ||
</div> | ||
</> | ||
); | ||
} | ||
|
||
function voicesByLanguage(voices = []) { | ||
const voicesByLanguage = voices.reduce((acc, voice) => { | ||
const langName = voice?.language?.name_english ?? "Unlisted"; | ||
acc[langName] = acc[langName] || []; | ||
acc[langName].push(voice); | ||
return acc; | ||
}, {}); | ||
return Object.entries(voicesByLanguage); | ||
} | ||
|
||
function voiceDisplayName(voice) { | ||
const { is_stored, name, quality, files } = voice; | ||
const onnxFileKey = Object.keys(files).find((key) => key.endsWith(".onnx")); | ||
const fileSize = files?.[onnxFileKey]?.size_bytes || 0; | ||
return `${is_stored ? "✔ " : ""}${titleCase(name)}-${quality === "low" ? "Low" : "HQ"} (${humanFileSize(fileSize)})`; | ||
} | ||
|
||
function PiperTTSModelSelection({ settings }) { | ||
const [loading, setLoading] = useState(true); | ||
const [voices, setVoices] = useState([]); | ||
const [selectedVoice, setSelectedVoice] = useState( | ||
settings?.TTSPiperTTSVoiceModel | ||
); | ||
|
||
function flushVoices() { | ||
PiperTTSClient.flush() | ||
.then(() => | ||
showToast("All voices flushed from browser storage", "info", { | ||
clear: true, | ||
}) | ||
) | ||
.catch((e) => console.error(e)); | ||
} | ||
|
||
useEffect(() => { | ||
PiperTTSClient.voices() | ||
.then((voices) => { | ||
if (voices?.length !== 0) return setVoices(voices); | ||
throw new Error("Could not fetch voices from web worker."); | ||
}) | ||
.catch((e) => { | ||
console.error(e); | ||
}) | ||
.finally(() => setLoading(false)); | ||
}, []); | ||
|
||
if (loading) { | ||
return ( | ||
<div className="flex flex-col w-60"> | ||
<label className="text-white text-sm font-semibold block mb-3"> | ||
Voice Model Selection | ||
</label> | ||
<select | ||
name="TTSPiperTTSVoiceModel" | ||
disabled={true} | ||
className="border-none bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" | ||
> | ||
<option disabled={true} selected={true}> | ||
-- loading available models -- | ||
</option> | ||
</select> | ||
</div> | ||
); | ||
} | ||
|
||
return ( | ||
<div className="flex flex-col w-fit"> | ||
<div className="flex flex-col w-60"> | ||
<label className="text-white text-sm font-semibold block mb-3"> | ||
Voice Model Selection | ||
</label> | ||
<div className="flex items-center w-fit gap-x-4 mb-2"> | ||
<select | ||
name="TTSPiperTTSVoiceModel" | ||
required={true} | ||
onChange={(e) => setSelectedVoice(e.target.value)} | ||
value={selectedVoice} | ||
className="border-none flex-shrink-0 bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" | ||
> | ||
{voicesByLanguage(voices).map(([lang, voices]) => { | ||
return ( | ||
<optgroup key={lang} label={lang}> | ||
{voices.map((voice) => ( | ||
<option | ||
selected={voice.key === selectedVoice} | ||
value={voice.key} | ||
> | ||
{voiceDisplayName(voice)} | ||
</option> | ||
))} | ||
</optgroup> | ||
); | ||
})} | ||
</select> | ||
<DemoVoiceSample voiceId={selectedVoice} /> | ||
</div> | ||
<p className="text-xs text-white/40"> | ||
The "✔" indicates this model is already stored in your browser and | ||
does not need to be downloaded | ||
</p> | ||
</div> | ||
{!!voices.find((voice) => voice.is_stored) && ( | ||
<button | ||
type="button" | ||
onClick={flushVoices} | ||
className="w-fit border-none hover:text-white hover:underline text-white/40 text-sm my-4" | ||
> | ||
Flush voice cache | ||
</button> | ||
)} | ||
</div> | ||
); | ||
} | ||
|
||
function DemoVoiceSample({ voiceId }) { | ||
const playerRef = useRef(null); | ||
const [speaking, setSpeaking] = useState(false); | ||
const [loading, setLoading] = useState(false); | ||
const [audioSrc, setAudioSrc] = useState(null); | ||
|
||
async function speakMessage(e) { | ||
e.preventDefault(); | ||
if (speaking) { | ||
playerRef?.current?.pause(); | ||
return; | ||
} | ||
|
||
try { | ||
if (!audioSrc) { | ||
setLoading(true); | ||
const client = new PiperTTSClient({ voiceId }); | ||
const blobUrl = await client.getAudioBlobForText( | ||
"Hello, welcome to AnythingLLM!" | ||
); | ||
setAudioSrc(blobUrl); | ||
setLoading(false); | ||
client.worker?.terminate(); | ||
PiperTTSClient._instance = null; | ||
} else { | ||
playerRef.current.play(); | ||
} | ||
} catch (e) { | ||
console.error(e); | ||
setLoading(false); | ||
setSpeaking(false); | ||
} | ||
} | ||
|
||
useEffect(() => { | ||
function setupPlayer() { | ||
if (!playerRef?.current) return; | ||
playerRef.current.addEventListener("play", () => { | ||
setSpeaking(true); | ||
}); | ||
|
||
playerRef.current.addEventListener("pause", () => { | ||
playerRef.current.currentTime = 0; | ||
setSpeaking(false); | ||
setAudioSrc(null); | ||
}); | ||
} | ||
setupPlayer(); | ||
}, []); | ||
|
||
return ( | ||
<button | ||
type="button" | ||
onClick={speakMessage} | ||
className="border-none text-zinc-300 flex items-center gap-x-1" | ||
> | ||
{speaking ? ( | ||
<> | ||
<PauseCircle size={20} className="flex-shrink-0" /> | ||
<p className="text-sm flex-shrink-0">Stop demo</p> | ||
</> | ||
) : ( | ||
<> | ||
{loading ? ( | ||
<> | ||
<CircleNotch size={20} className="animate-spin flex-shrink-0" /> | ||
<p className="text-sm flex-shrink-0">Loading voice</p> | ||
</> | ||
) : ( | ||
<> | ||
<PlayCircle size={20} className="flex-shrink-0" /> | ||
<p className="text-sm flex-shrink-0">Play sample</p> | ||
</> | ||
)} | ||
</> | ||
)} | ||
<audio | ||
ref={playerRef} | ||
hidden={true} | ||
src={audioSrc} | ||
autoPlay={true} | ||
controls={false} | ||
/> | ||
</button> | ||
); | ||
} |
21 changes: 18 additions & 3 deletions
21
...nts/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/TTSButton/index.jsx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,38 @@ | ||
import { useEffect, useState } from "react"; | ||
import NativeTTSMessage from "./native"; | ||
import AsyncTTSMessage from "./asyncTts"; | ||
import PiperTTSMessage from "./piperTTS"; | ||
import System from "@/models/system"; | ||
|
||
export default function TTSMessage({ slug, chatId, message }) { | ||
const [settings, setSettings] = useState({}); | ||
const [provider, setProvider] = useState("native"); | ||
const [loading, setLoading] = useState(true); | ||
|
||
useEffect(() => { | ||
async function getSettings() { | ||
const _settings = await System.keys(); | ||
setProvider(_settings?.TextToSpeechProvider ?? "native"); | ||
setSettings(_settings); | ||
setLoading(false); | ||
} | ||
getSettings(); | ||
}, []); | ||
|
||
if (!chatId || loading) return null; | ||
if (provider !== "native") | ||
return <AsyncTTSMessage slug={slug} chatId={chatId} />; | ||
return <NativeTTSMessage message={message} />; | ||
|
||
switch (provider) { | ||
case "openai": | ||
case "elevenlabs": | ||
return <AsyncTTSMessage slug={slug} chatId={chatId} />; | ||
case "piper_local": | ||
return ( | ||
<PiperTTSMessage | ||
voiceId={settings?.TTSPiperTTSVoiceModel} | ||
message={message} | ||
/> | ||
); | ||
default: | ||
return <NativeTTSMessage message={message} />; | ||
} | ||
} |
90 changes: 90 additions & 0 deletions
90
.../WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/TTSButton/piperTTS.jsx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import { useEffect, useState, useRef } from "react"; | ||
import { SpeakerHigh, PauseCircle, CircleNotch } from "@phosphor-icons/react"; | ||
import { Tooltip } from "react-tooltip"; | ||
import PiperTTSClient from "@/utils/piperTTS"; | ||
|
||
export default function PiperTTS({ voiceId = null, message }) { | ||
const playerRef = useRef(null); | ||
const [speaking, setSpeaking] = useState(false); | ||
const [loading, setLoading] = useState(false); | ||
const [audioSrc, setAudioSrc] = useState(null); | ||
|
||
async function speakMessage(e) { | ||
e.preventDefault(); | ||
if (speaking) { | ||
playerRef?.current?.pause(); | ||
return; | ||
} | ||
|
||
try { | ||
if (!audioSrc) { | ||
setLoading(true); | ||
const client = new PiperTTSClient({ voiceId }); | ||
const blobUrl = await client.getAudioBlobForText(message); | ||
setAudioSrc(blobUrl); | ||
setLoading(false); | ||
} else { | ||
playerRef.current.play(); | ||
} | ||
} catch (e) { | ||
console.error(e); | ||
setLoading(false); | ||
setSpeaking(false); | ||
} | ||
} | ||
|
||
useEffect(() => { | ||
function setupPlayer() { | ||
if (!playerRef?.current) return; | ||
playerRef.current.addEventListener("play", () => { | ||
setSpeaking(true); | ||
}); | ||
|
||
playerRef.current.addEventListener("pause", () => { | ||
playerRef.current.currentTime = 0; | ||
setSpeaking(false); | ||
}); | ||
} | ||
setupPlayer(); | ||
}, []); | ||
|
||
return ( | ||
<div className="mt-3 relative"> | ||
<button | ||
type="button" | ||
onClick={speakMessage} | ||
data-tooltip-id="message-to-speech" | ||
data-tooltip-content={ | ||
speaking ? "Pause TTS speech of message" : "TTS Speak message" | ||
} | ||
className="border-none text-zinc-300" | ||
aria-label={speaking ? "Pause speech" : "Speak message"} | ||
> | ||
{speaking ? ( | ||
<PauseCircle size={18} className="mb-1" /> | ||
) : ( | ||
<> | ||
{loading ? ( | ||
<CircleNotch size={18} className="mb-1 animate-spin" /> | ||
) : ( | ||
<SpeakerHigh size={18} className="mb-1" /> | ||
)} | ||
</> | ||
)} | ||
<audio | ||
ref={playerRef} | ||
hidden={true} | ||
src={audioSrc} | ||
autoPlay={true} | ||
controls={false} | ||
/> | ||
</button> | ||
<Tooltip | ||
id="message-to-speech" | ||
place="bottom" | ||
delayShow={300} | ||
className="tooltip !text-xs" | ||
/> | ||
</div> | ||
); | ||
} |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.