HumeAI · zgreathouse · May 16, 2025 · May 15, 2025 · May 15, 2025 · May 16, 2025
diff --git a/evi/evi-typescript-quickstart/README.md b/evi/evi-typescript-quickstart/README.md
@@ -6,6 +6,8 @@
   </p>
 </div>
 
+![preview.png](preview.png)
+
 ## Overview
 
 This project features a sample implementation of Hume's [Empathic Voice Interface (EVI)](https://dev.hume.ai/docs/empathic-voice-interface-evi/overview) using Hume's [Typescript SDK](https://github.com/HumeAI/hume-typescript-sdk). It demonstrates how to authenticate, connect to, and display output from EVI in a frontend web application.

diff --git a/evi/evi-typescript-quickstart/index.html b/evi/evi-typescript-quickstart/index.html
@@ -3,23 +3,29 @@
   <head>
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <link rel="icon" type="image/x-icon" href="/favicon.ico" />
     <title>Empathic Voice Interface</title>
   </head>
   <body>
-    <div id="app">
-      <div id="btn-container">
-        <button id="start-btn">Start</button>
-        <button id="stop-btn" disabled="true">Stop</button>
-      </div>
-      <div id="heading-container">
-        <h2>Empathic Voice Interface (EVI)</h2>
-        <p>
-          Welcome to our TypeScript sample implementation of the Empathic Voice Interface!
-          Click the "Start" button and begin talking to interact with EVI.
-        </p>
-      </div>
-      <div id="chat"></div>
-    </div>
+    <main id="app">
+      <header id="heading-container">
+        <div id="instructions-container">
+          <h1>EVI TypeScript Quickstart</h2>
+          <p id="instructions">
+            Click <strong>Start</strong> to connect, grant mic access, then speak.
+            Click <strong>Stop</strong> to end the session. </br>
+            ⚙️ Open your browser console to see socket logs and errors.
+          </p>
+        </div>
+        <div id="btn-container">
+          <button id="start-btn">Start</button>
+          <button id="stop-btn" disabled="true">Stop</button>
+        </div>
+      </header>
+
+      <section id="chat"></section>
+    </main>
+
     <script type="module" src="/src/main.ts"></script>
   </body>
 </html>
diff --git a/evi/evi-typescript-quickstart/package.json b/evi/evi-typescript-quickstart/package.json
@@ -9,9 +9,10 @@
     "preview": "vite preview"
   },
   "dependencies": {
-    "hume": "^0.10.3"
+    "hume": "^0.11.0"
   },
   "devDependencies": {
+    "@types/node": "^22.15.18",
     "typescript": "^5.2.2",
     "vite": "^5.1.4"
   },

diff --git a/evi/evi-typescript-quickstart/pnpm-lock.yaml b/evi/evi-typescript-quickstart/pnpm-lock.yaml
diff --git a/evi/evi-typescript-quickstart/preview.png b/evi/evi-typescript-quickstart/preview.png
diff --git a/evi/evi-typescript-quickstart/public/favicon.ico b/evi/evi-typescript-quickstart/public/favicon.ico
diff --git a/evi/evi-typescript-quickstart/src/lib/audio.ts b/evi/evi-typescript-quickstart/src/lib/audio.ts
@@ -0,0 +1,51 @@
+import {
+  convertBlobToBase64,
+  ensureSingleValidAudioTrack,
+  getAudioStream,
+  getBrowserSupportedMimeType,
+  MimeType,
+} from "hume";
+import type { ChatSocket } from "hume/api/resources/empathicVoice/resources/chat";
+
+/**
+ * Begins capturing microphone audio and streams it into the given EVI ChatSocket.
+ *
+ * This function:
+ * 1. Prompts the user for microphone access and obtains a single valid audio track.
+ * 2. Creates a MediaRecorder using the specified MIME type.
+ * 3. Slices the audio into blobs at the given interval, converts each blob to a base64 string,
+ *    and sends it over the provided WebSocket-like ChatSocket via `socket.sendAudioInput`.
+ * 4. Logs any recorder errors to the console.
+ *
+ * @param socket - The Hume EVI ChatSocket to which encoded audio frames will be sent.
+ * @param mimeType - The audio MIME type to use for the MediaRecorder (e.g., WEBM, OGG).
+ * @param timeSliceMs - How often (in milliseconds) to emit audio blobs. Defaults to 80ms.
+ *
+ * @returns A MediaRecorder instance controlling the ongoing microphone capture.
+ *          Call `.stop()` on it to end streaming.
+ *
+ * @throws {DOMException} If the user denies microphone access or if no audio track is available.
+ * @throws {Error} If MediaRecorder cannot be constructed with the given MIME type.
+ */
+export async function startAudioCapture(
+  socket: ChatSocket,
+  timeSliceMs = 80
+): Promise<MediaRecorder> {
+  const mimeTypeResult = getBrowserSupportedMimeType();
+  const mimeType = mimeTypeResult.success ? mimeTypeResult.mimeType : MimeType.WEBM;
+
+  const micAudioStream = await getAudioStream();
+  ensureSingleValidAudioTrack(micAudioStream);
+
+  const recorder = new MediaRecorder(micAudioStream, { mimeType });
+  recorder.ondataavailable = async (e: BlobEvent) => {
+    if (e.data.size > 0 && socket.readyState === WebSocket.OPEN) {
+      const data = await convertBlobToBase64(e.data);
+      socket.sendAudioInput({ data });
+    }
+  };
+  recorder.onerror = (e) => console.error("MediaRecorder error:", e);
+  recorder.start(timeSliceMs);
+
+  return recorder;
+}
diff --git a/evi/evi-typescript-quickstart/src/lib/evi.ts b/evi/evi-typescript-quickstart/src/lib/evi.ts
@@ -0,0 +1,50 @@
+import { HumeClient } from "hume";
+import type { ChatSocket } from "hume/api/resources/empathicVoice/resources/chat";
+
+let client: HumeClient | null = null;
+
+function getClient(apiKey: string): HumeClient {
+  if (!client) {
+    client = new HumeClient({ apiKey });
+  }
+  return client;
+}
+
+/**
+ * Initializes and opens an Empathic Voice Interface (EVI) ChatSocket.
+ *
+ * This function ensures a singleton HumeClient is created using the provided API key,
+ * then connects to the EVI WebSocket endpoint (optionally with a specific config ID),
+ * and registers your event handlers for the socket's lifecycle events.
+ *
+ * @param apiKey Your Hume API key. Must be a non-empty string.
+ * @param handlers Callback handlers for socket events:
+ *                 - open:    Invoked when the connection is successfully established.
+ *                 - message: Invoked for each incoming SubscribeEvent.
+ *                 - error:   Invoked on transport or protocol errors.
+ *                 - close:   Invoked when the socket is closed.
+ * @param configId (Optional) EVI configuration ID to apply; if omitted, default EVI configuration is used.
+ *
+ * @returns The connected ChatSocket instance, ready for sending and receiving audio/text messages.
+ *
+ * @throws {Error} If `apiKey` is falsy or an empty string.
+ */
+export function connectEVI(
+  apiKey: string,
+  handlers: ChatSocket.EventHandlers,
+  configId?: string
+): ChatSocket {
+  if (!apiKey) {
+    throw new Error("VITE_HUME_API_KEY is not set.");
+  }
+
+  const client = getClient(apiKey);
+  const socket = client.empathicVoice.chat.connect({ configId });
+
+  socket.on("open", handlers.open);
+  socket.on("message", handlers.message);
+  socket.on("error", handlers.error);
+  socket.on("close", handlers.close);
+
+  return socket;
+}
diff --git a/evi/evi-typescript-quickstart/src/lib/index.ts b/evi/evi-typescript-quickstart/src/lib/index.ts
@@ -0,0 +1,3 @@
+export * from "./audio";
+export * from "./evi";
+export * from "./ui";
diff --git a/evi/evi-typescript-quickstart/src/lib/ui.ts b/evi/evi-typescript-quickstart/src/lib/ui.ts
@@ -0,0 +1,71 @@
+import { AssistantMessage, UserMessage } from "hume/api/resources/empathicVoice";
+
+/**
+ * Extracts and returns the top three emotion scores from a prosody analysis.
+ *
+ * This function pulls the `scores` object out of the `message.models.prosody`
+ * (if available), converts it into an array of `[emotion, numericScore]` entries,
+ * sorts that array in descending order by score, and then returns the top three
+ * as objects with the emotion name and a stringified score (two decimal places).
+ *
+ * @param message A `UserMessage` or `AssistantMessage` containing `models.prosody.scores`
+ *                where keys are emotion labels and values are numeric scores.
+ * @returns An array of up to three `{ emotion, score }` objects, sorted by highest score first.
+ *          The `score` property is formatted as a string with exactly two decimal places.
+ */
+function extractTopThreeEmotions(
+  message: UserMessage | AssistantMessage
+): { emotion: string; score: string }[] {
+  const scores = message.models.prosody?.scores;
+  const scoresArray = Object.entries(scores || {});
+
+  scoresArray.sort((a, b) => b[1] - a[1]);
+
+  const topThreeEmotions = scoresArray.slice(0, 3).map(([emotion, score]) => ({
+    emotion,
+    score: Number(score).toFixed(2),
+  }));
+
+  return topThreeEmotions;
+}
+
+/**
+ * Appends a chat message bubble to the container and scrolls to show it.
+ *
+ * @param container - The element that holds chat messages.
+ * @param msg       - A UserMessage or AssistantMessage with content and emotion scores.
+ */
+export function appendChatMessage(
+  container: HTMLElement | null,
+  msg: UserMessage | AssistantMessage
+): void {
+  if (!container || !msg) return;
+
+  const { role, content } = msg.message;
+  const timestamp = new Date().toLocaleTimeString();
+
+  const card = document.createElement("div");
+  card.className = `chat-card ${role}`;
+
+  card.innerHTML = `
+  <div class="role">${role[0].toUpperCase() + role.slice(1)}</div>
+  <div class="timestamp"><strong>${timestamp}</strong></div>
+  <div class="content">${content}</div>
+  `;
+
+  const scoresEl = document.createElement("div");
+  scoresEl.className = "scores";
+
+  const topEmotions = extractTopThreeEmotions(msg);
+  topEmotions.forEach(({ emotion, score }) => {
+    const item = document.createElement("div");
+    item.className = "score-item";
+    item.innerHTML = `${emotion}: <strong>${score}</strong>`;
+    scoresEl.appendChild(item);
+  });
+
+  card.appendChild(scoresEl);
+  container.appendChild(card);
+
+  container.scrollTop = container.scrollHeight;
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,6 +6,8 @@ @@
       </p>
     </div>
+    ![preview.png](preview.png)
     ## Overview
     This project features a sample implementation of Hume's [Empathic Voice Interface (EVI)](https://dev.hume.ai/docs/empathic-voice-interface-evi/overview) using Hume's [Typescript SDK](https://github.com/HumeAI/hume-typescript-sdk). It demonstrates how to authenticate, connect to, and display output from EVI in a frontend web application.
@@ Expand Down @@