Skip to content

Commit 6215aef

Browse files
authored
Zg/update evi typescript quickstart (#172)
1 parent 3fb3c2d commit 6215aef

File tree

14 files changed

+420
-498
lines changed

14 files changed

+420
-498
lines changed

evi/evi-typescript-quickstart/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
</p>
77
</div>
88

9+
![preview.png](preview.png)
10+
911
## Overview
1012

1113
This project features a sample implementation of Hume's [Empathic Voice Interface (EVI)](https://dev.hume.ai/docs/empathic-voice-interface-evi/overview) using Hume's [Typescript SDK](https://github.com/HumeAI/hume-typescript-sdk). It demonstrates how to authenticate, connect to, and display output from EVI in a frontend web application.

evi/evi-typescript-quickstart/index.html

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,29 @@
33
<head>
44
<meta charset="UTF-8" />
55
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
6+
<link rel="icon" type="image/x-icon" href="/favicon.ico" />
67
<title>Empathic Voice Interface</title>
78
</head>
89
<body>
9-
<div id="app">
10-
<div id="btn-container">
11-
<button id="start-btn">Start</button>
12-
<button id="stop-btn" disabled="true">Stop</button>
13-
</div>
14-
<div id="heading-container">
15-
<h2>Empathic Voice Interface (EVI)</h2>
16-
<p>
17-
Welcome to our TypeScript sample implementation of the Empathic Voice Interface!
18-
Click the "Start" button and begin talking to interact with EVI.
19-
</p>
20-
</div>
21-
<div id="chat"></div>
22-
</div>
10+
<main id="app">
11+
<header id="heading-container">
12+
<div id="instructions-container">
13+
<h1>EVI TypeScript Quickstart</h2>
14+
<p id="instructions">
15+
Click <strong>Start</strong> to connect, grant mic access, then speak.
16+
Click <strong>Stop</strong> to end the session. </br>
17+
⚙️ Open your browser console to see socket logs and errors.
18+
</p>
19+
</div>
20+
<div id="btn-container">
21+
<button id="start-btn">Start</button>
22+
<button id="stop-btn" disabled="true">Stop</button>
23+
</div>
24+
</header>
25+
26+
<section id="chat"></section>
27+
</main>
28+
2329
<script type="module" src="/src/main.ts"></script>
2430
</body>
2531
</html>

evi/evi-typescript-quickstart/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99
"preview": "vite preview"
1010
},
1111
"dependencies": {
12-
"hume": "^0.10.3"
12+
"hume": "^0.11.0"
1313
},
1414
"devDependencies": {
15+
"@types/node": "^22.15.18",
1516
"typescript": "^5.2.2",
1617
"vite": "^5.1.4"
1718
},

evi/evi-typescript-quickstart/pnpm-lock.yaml

Lines changed: 23 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
288 KB
Loading
4.19 KB
Binary file not shown.
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import {
2+
convertBlobToBase64,
3+
ensureSingleValidAudioTrack,
4+
getAudioStream,
5+
getBrowserSupportedMimeType,
6+
MimeType,
7+
} from "hume";
8+
import type { ChatSocket } from "hume/api/resources/empathicVoice/resources/chat";
9+
10+
/**
11+
* Begins capturing microphone audio and streams it into the given EVI ChatSocket.
12+
*
13+
* This function:
14+
* 1. Prompts the user for microphone access and obtains a single valid audio track.
15+
* 2. Creates a MediaRecorder using the specified MIME type.
16+
* 3. Slices the audio into blobs at the given interval, converts each blob to a base64 string,
17+
* and sends it over the provided WebSocket-like ChatSocket via `socket.sendAudioInput`.
18+
* 4. Logs any recorder errors to the console.
19+
*
20+
* @param socket - The Hume EVI ChatSocket to which encoded audio frames will be sent.
21+
* @param mimeType - The audio MIME type to use for the MediaRecorder (e.g., WEBM, OGG).
22+
* @param timeSliceMs - How often (in milliseconds) to emit audio blobs. Defaults to 80ms.
23+
*
24+
* @returns A MediaRecorder instance controlling the ongoing microphone capture.
25+
* Call `.stop()` on it to end streaming.
26+
*
27+
* @throws {DOMException} If the user denies microphone access or if no audio track is available.
28+
* @throws {Error} If MediaRecorder cannot be constructed with the given MIME type.
29+
*/
30+
export async function startAudioCapture(
31+
socket: ChatSocket,
32+
timeSliceMs = 80
33+
): Promise<MediaRecorder> {
34+
const mimeTypeResult = getBrowserSupportedMimeType();
35+
const mimeType = mimeTypeResult.success ? mimeTypeResult.mimeType : MimeType.WEBM;
36+
37+
const micAudioStream = await getAudioStream();
38+
ensureSingleValidAudioTrack(micAudioStream);
39+
40+
const recorder = new MediaRecorder(micAudioStream, { mimeType });
41+
recorder.ondataavailable = async (e: BlobEvent) => {
42+
if (e.data.size > 0 && socket.readyState === WebSocket.OPEN) {
43+
const data = await convertBlobToBase64(e.data);
44+
socket.sendAudioInput({ data });
45+
}
46+
};
47+
recorder.onerror = (e) => console.error("MediaRecorder error:", e);
48+
recorder.start(timeSliceMs);
49+
50+
return recorder;
51+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import { HumeClient } from "hume";
2+
import type { ChatSocket } from "hume/api/resources/empathicVoice/resources/chat";
3+
4+
let client: HumeClient | null = null;
5+
6+
function getClient(apiKey: string): HumeClient {
7+
if (!client) {
8+
client = new HumeClient({ apiKey });
9+
}
10+
return client;
11+
}
12+
13+
/**
14+
* Initializes and opens an Empathic Voice Interface (EVI) ChatSocket.
15+
*
16+
* This function ensures a singleton HumeClient is created using the provided API key,
17+
* then connects to the EVI WebSocket endpoint (optionally with a specific config ID),
18+
* and registers your event handlers for the socket's lifecycle events.
19+
*
20+
* @param apiKey Your Hume API key. Must be a non-empty string.
21+
* @param handlers Callback handlers for socket events:
22+
* - open: Invoked when the connection is successfully established.
23+
* - message: Invoked for each incoming SubscribeEvent.
24+
* - error: Invoked on transport or protocol errors.
25+
* - close: Invoked when the socket is closed.
26+
* @param configId (Optional) EVI configuration ID to apply; if omitted, default EVI configuration is used.
27+
*
28+
* @returns The connected ChatSocket instance, ready for sending and receiving audio/text messages.
29+
*
30+
* @throws {Error} If `apiKey` is falsy or an empty string.
31+
*/
32+
export function connectEVI(
33+
apiKey: string,
34+
handlers: ChatSocket.EventHandlers,
35+
configId?: string
36+
): ChatSocket {
37+
if (!apiKey) {
38+
throw new Error("VITE_HUME_API_KEY is not set.");
39+
}
40+
41+
const client = getClient(apiKey);
42+
const socket = client.empathicVoice.chat.connect({ configId });
43+
44+
socket.on("open", handlers.open);
45+
socket.on("message", handlers.message);
46+
socket.on("error", handlers.error);
47+
socket.on("close", handlers.close);
48+
49+
return socket;
50+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
export * from "./audio";
2+
export * from "./evi";
3+
export * from "./ui";
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import { AssistantMessage, UserMessage } from "hume/api/resources/empathicVoice";
2+
3+
/**
4+
* Extracts and returns the top three emotion scores from a prosody analysis.
5+
*
6+
* This function pulls the `scores` object out of the `message.models.prosody`
7+
* (if available), converts it into an array of `[emotion, numericScore]` entries,
8+
* sorts that array in descending order by score, and then returns the top three
9+
* as objects with the emotion name and a stringified score (two decimal places).
10+
*
11+
* @param message A `UserMessage` or `AssistantMessage` containing `models.prosody.scores`
12+
* where keys are emotion labels and values are numeric scores.
13+
* @returns An array of up to three `{ emotion, score }` objects, sorted by highest score first.
14+
* The `score` property is formatted as a string with exactly two decimal places.
15+
*/
16+
function extractTopThreeEmotions(
17+
message: UserMessage | AssistantMessage
18+
): { emotion: string; score: string }[] {
19+
const scores = message.models.prosody?.scores;
20+
const scoresArray = Object.entries(scores || {});
21+
22+
scoresArray.sort((a, b) => b[1] - a[1]);
23+
24+
const topThreeEmotions = scoresArray.slice(0, 3).map(([emotion, score]) => ({
25+
emotion,
26+
score: Number(score).toFixed(2),
27+
}));
28+
29+
return topThreeEmotions;
30+
}
31+
32+
/**
33+
* Appends a chat message bubble to the container and scrolls to show it.
34+
*
35+
* @param container - The element that holds chat messages.
36+
* @param msg - A UserMessage or AssistantMessage with content and emotion scores.
37+
*/
38+
export function appendChatMessage(
39+
container: HTMLElement | null,
40+
msg: UserMessage | AssistantMessage
41+
): void {
42+
if (!container || !msg) return;
43+
44+
const { role, content } = msg.message;
45+
const timestamp = new Date().toLocaleTimeString();
46+
47+
const card = document.createElement("div");
48+
card.className = `chat-card ${role}`;
49+
50+
card.innerHTML = `
51+
<div class="role">${role[0].toUpperCase() + role.slice(1)}</div>
52+
<div class="timestamp"><strong>${timestamp}</strong></div>
53+
<div class="content">${content}</div>
54+
`;
55+
56+
const scoresEl = document.createElement("div");
57+
scoresEl.className = "scores";
58+
59+
const topEmotions = extractTopThreeEmotions(msg);
60+
topEmotions.forEach(({ emotion, score }) => {
61+
const item = document.createElement("div");
62+
item.className = "score-item";
63+
item.innerHTML = `${emotion}: <strong>${score}</strong>`;
64+
scoresEl.appendChild(item);
65+
});
66+
67+
card.appendChild(scoresEl);
68+
container.appendChild(card);
69+
70+
container.scrollTop = container.scrollHeight;
71+
}

0 commit comments

Comments
 (0)