From cab4441e4119f495e8174a2707fd31816a34220e Mon Sep 17 00:00:00 2001 From: Freddy Boulton <41651716+freddyaboulton@users.noreply.github.com> Date: Mon, 28 Apr 2025 12:47:13 -0400 Subject: [PATCH 01/70] Commit --- demo/dia_dialogue_demo/app.py | 108 +++++ gradio/__init__.py | 2 + gradio/components/__init__.py | 2 + gradio/components/dialogue.py | 135 ++++++ gradio/stubs/anyio.pyi | 2 +- js/dialogue/Dialogue.svelte | 499 ++++++++++++++++++++++ js/dialogue/DropdownOptions.svelte | 165 +++++++ js/dialogue/Example.svelte | 19 + js/dialogue/Index.svelte | 96 +++++ js/dialogue/main.ts | 2 + js/dialogue/package.json | 42 ++ js/dialogue/utils.ts | 4 + js/dropdown/Index.svelte | 1 + js/dropdown/shared/DropdownOptions.svelte | 12 +- package.json | 1 + pnpm-lock.yaml | 28 ++ 16 files changed, 1115 insertions(+), 3 deletions(-) create mode 100644 demo/dia_dialogue_demo/app.py create mode 100644 gradio/components/dialogue.py create mode 100644 js/dialogue/Dialogue.svelte create mode 100644 js/dialogue/DropdownOptions.svelte create mode 100644 js/dialogue/Example.svelte create mode 100644 js/dialogue/Index.svelte create mode 100644 js/dialogue/main.ts create mode 100644 js/dialogue/package.json create mode 100644 js/dialogue/utils.ts diff --git a/demo/dia_dialogue_demo/app.py b/demo/dia_dialogue_demo/app.py new file mode 100644 index 0000000000..181aae15be --- /dev/null +++ b/demo/dia_dialogue_demo/app.py @@ -0,0 +1,108 @@ +import gradio as gr +import httpx + + +emotions = [ + "(laughs)", + "(clears throat)", + "(sighs)", + "(gasps)", + "(coughs)", + "(singing)", + "(sings)", + "(mumbles)", + "(beep)", + "(groans)", + "(sniffs)", + "(claps)", + "(screams)", + "(inhales)", + "(exhales)", + "(applause)", + "(burps)", + "(humming)", + "(sneezes)", + "(chuckle)", + "(whistles)", +] +speakers = ["Speaker 1", "Speaker 2"] + +client = httpx.AsyncClient(timeout=180) +API_URL = "https://router.huggingface.co/fal-ai/fal-ai/dia-tts" + + +async def query(dialogue: str, token: gr.OAuthToken | None): + if token is None: + raise gr.Error( + "No token provided. Use Sign in with Hugging Face to get a token." + ) + headers = { + "Authorization": f"Bearer {token.token}", + } + response = await client.post(API_URL, headers=headers, json={"text": dialogue}) + url = response.json()["audio"]["url"] + print("URL: ", url) + return url + + +def formatter(speaker, text): + speaker = speaker.split(" ")[1] + return f"[S{speaker}] {text}" + + +with gr.Blocks() as demo: + with gr.Sidebar(): + login_button = gr.LoginButton() + gr.HTML( + """ +

+ Dancing Huggy Dia Dialogue Generation Model +

+

Model by Nari Labs. Powered by HF and Fal AI API.

+

Dia is a dialogue generation model that can generate realistic dialogue between two speakers. Use the dialogue component to create a conversation and then hit the submit button in the bottom right corner to see it come to life .

+ """ + ) + with gr.Row(): + with gr.Column(): + dialogue = gr.Dialogue( + speakers=speakers, emotions=emotions, formatter=formatter + ) + with gr.Column(): + with gr.Row(): + audio = gr.Audio(label="Audio") + with gr.Row(): + gr.DeepLinkButton(value="Share Audio via Link") + with gr.Row(): + gr.Examples( + examples=[ + [ + [ + { + "speaker": "Speaker 1", + "text": "Why did the chicken cross the road?", + }, + {"speaker": "Speaker 2", "text": "I don't know!"}, + { + "speaker": "Speaker 1", + "text": "to get to the other side! (laughs)", + }, + ] + ], + [ + [ + { + "speaker": "Speaker 1", + "text": "I am a little tired today (sighs).", + }, + {"speaker": "Speaker 2", "text": "Hang in there!"}, + ] + ], + ], + inputs=[dialogue], + cache_examples=False, + ) + + dialogue.submit(query, [dialogue], audio) + +if __name__ == "__main__": + demo.launch() diff --git a/gradio/__init__.py b/gradio/__init__.py index e6ab8bfbef..c5d7b1f9f1 100644 --- a/gradio/__init__.py +++ b/gradio/__init__.py @@ -31,6 +31,7 @@ Dataset, DateTime, DeepLinkButton, + Dialogue, DownloadButton, Dropdown, DuplicateButton, @@ -145,6 +146,7 @@ "Dataframe", "Dataset", "DateTime", + "Dialogue", "DeletedFileData", "DownloadButton", "DownloadData", diff --git a/gradio/components/__init__.py b/gradio/components/__init__.py index 5252175f3b..2456093686 100644 --- a/gradio/components/__init__.py +++ b/gradio/components/__init__.py @@ -21,6 +21,7 @@ from gradio.components.dataset import Dataset from gradio.components.datetime import DateTime from gradio.components.deep_link_button import DeepLinkButton +from gradio.components.dialogue import Dialogue from gradio.components.download_button import DownloadButton from gradio.components.dropdown import Dropdown from gradio.components.duplicate_button import DuplicateButton @@ -78,6 +79,7 @@ "Dataframe", "DataFrame", "Dataset", + "Dialogue", "DownloadButton", "DuplicateButton", "Fallback", diff --git a/gradio/components/dialogue.py b/gradio/components/dialogue.py new file mode 100644 index 0000000000..65e8f17c35 --- /dev/null +++ b/gradio/components/dialogue.py @@ -0,0 +1,135 @@ +from collections.abc import Callable +from typing import List + +from gradio.components.base import server +from gradio.components.textbox import Textbox +from gradio.data_classes import GradioModel, GradioRootModel +from gradio.events import Events + + +class DialogueLine(GradioModel): + speaker: str + text: str + +class DialogueModel(GradioRootModel): + root: List[DialogueLine] + +class Dialogue(Textbox): + """ + Creates a dialogue components for users to enter dialogue between speakers. + + Demos: dia_dialogue_demo + """ + + EVENTS = [ + Events.change, + Events.input, + Events.submit, + ] + + data_model = DialogueModel + def __init__(self, + value: list[dict[str, str]] | Callable | None = None, + *, + speakers: list[str] | None = None, + formatter: Callable | None = None, + emotions: list[str] | None = None, + separator: str = " ", + label: str | None = "Dialogue", + info: str | None = "Type colon (:) in the dialogue line to see the available emotion and intonation tags", + placeholder: str | None = "Enter dialogue here...", + show_label: bool | None = None, + container: bool = True, + scale: int | None = None, + min_width: int = 160, + interactive: bool | None = None, + visible: bool = True, + elem_id: str | None = None, + autofocus: bool = False, + autoscroll: bool = True, + elem_classes: list[str] | str | None = None, + render: bool = True, + key: int | str | None = None, + max_lines: int | None = None, + show_submit_button: bool = True, + show_copy_button: bool = True, + ): + """ + Parameters: + value: Value of the dialogue. It is a list of dictionaries, each containing a 'speaker' key and a 'text' key. If a function is provided, the function will be called each time the app loads to set the initial value of this component. + speakers: The different speakers allowed in the dialogue. + formatter: A function that formats the dialogue line dictionary, e.g. {"speaker": "Speaker 1", "text": "Hello, how are you?"} into a string, e.g. "Speaker 1: Hello, how are you?". + emotions: The different emotions and intonation allowed in the dialogue. Emotions are displayed in an autocomplete menu below the input textbox when the user starts typing `:`. Use the exact emotion name expected by the AI model or inference function. + separator: The separator between the different dialogue lines used to join the formatted dialogue lines into a single string. For example, a newline character or empty string. + max_lines: maximum number of lines allowed in the dialogue. + placeholder: placeholder hint to provide behind textarea. + label: the label for this component, displayed above the component if `show_label` is `True` and is also used as the header if there are a table of examples for this component. If None and used in a `gr.Interface`, the label will be the name of the parameter this component corresponds to. + show_label: if True, will display the label. If False, the copy button is hidden as well as well as the label. + container: if True, will place the component in a container - providing some extra padding around the border. + scale: relative size compared to adjacent Components. For example if Components A and B are in a Row, and A has scale=2, and B has scale=1, A will be twice as wide as B. Should be an integer. scale applies in Rows, and to top-level Components in Blocks where fill_height=True. + min_width: minimum pixel width, will wrap if not sufficient screen space to satisfy this value. If a certain scale value results in this Component being narrower than min_width, the min_width parameter will be respected first. + interactive: if True, will be rendered as an editable textbox; if False, editing will be disabled. If not provided, this is inferred based on whether the component is used as an input or output. + visible: If False, component will be hidden. + autofocus: If True, will focus on the textbox when the page loads. Use this carefully, as it can cause usability issues for sighted and non-sighted users. + elem_id: An optional string that is assigned as the id of this component in the HTML DOM. Can be used for targeting CSS styles. + elem_classes: An optional list of strings that are assigned as the classes of this component in the HTML DOM. Can be used for targeting CSS styles. + render: If False, component will not render be rendered in the Blocks context. Should be used if the intention is to assign event listeners now but render the component later. + key: if assigned, will be used to assume identity across a re-render. Components that have the same key across a re-render will have their value preserved. + show_copy_button: If True, includes a copy button to copy the text in the textbox. Only applies if show_label is True. + show_submit_button: If True, includes a submit button to submit the dialogue. + autoscroll: If True, will automatically scroll to the bottom of the textbox when the value changes, unless the user scrolls up. If False, will not scroll to the bottom of the textbox when the value changes. + """ + super().__init__(value="", + label=label, info=info, placeholder=placeholder, show_label=show_label, container=container, scale=scale, min_width=min_width, interactive=interactive, visible=visible, elem_id=elem_id, autofocus=autofocus, autoscroll=autoscroll, elem_classes=elem_classes, render=render, key=key, max_lines=max_lines) + self.speakers = speakers + self.emotions = emotions or [] + self.formatter = formatter + self.separator = separator + self.show_submit_button = show_submit_button + self.show_copy_button = show_copy_button + if isinstance(value, Callable): + value = value() + self.value = self.preprocess(DialogueModel(root=value)) if value is not None else value # type: ignore + + def preprocess(self, payload: DialogueModel) -> str: + """ + This docstring is used to generate the docs for this custom component. + Parameters: + payload: the data to be preprocessed, sent from the frontend + Returns: + the data after preprocessing, sent to the user's function in the backend + """ + formatter = self.formatter + if not formatter: + formatter = self.default_formatter + return self.separator.join([formatter(line.speaker, line.text) for line in payload.root]) + + @staticmethod + def default_formatter(speaker: str, text: str) -> str: + return f"[{speaker}] {text}" + + @server + async def format(self, value: list[dict]): + """Format the dialogue in the frontend into a string that's copied to the clipboard.""" + data = DialogueModel(root=value) # type: ignore + return self.preprocess(data) + + def postprocess(self, value): + """ + This docstring is used to generate the docs for this custom component. + Parameters: + payload: the data to be postprocessed, sent from the user's function in the backend + Returns: + the data after postprocessing, sent to the frontend + """ + return value + + def as_example(self, value): + return self.preprocess(DialogueModel(root=value)) + + def example_payload(self): + return [{"speaker": "Speaker 1", "text": "Hello, how are you?"}, {"speaker": "Speaker 2", "text": "I'm fine, thank you!"}] + + def example_value(self): + return [{"speaker": "Speaker 1", "text": "Hello, how are you?"}, {"speaker": "Speaker 2", "text": "I'm fine, thank you!"}] + diff --git a/gradio/stubs/anyio.pyi b/gradio/stubs/anyio.pyi index 0a354c32ef..d83eb5143a 100644 --- a/gradio/stubs/anyio.pyi +++ b/gradio/stubs/anyio.pyi @@ -1,6 +1,6 @@ """ This module contains type hints for the anyio library. It was auto-generated so may include errors.""" -from typing import Any, Callable, Coroutine, TypeVar, overload, Optional, Union from types import TracebackType +from typing import Any, Callable, Coroutine, Optional, TypeVar, Union, overload T = TypeVar('T') T_Retval = TypeVar('T_Retval') diff --git a/js/dialogue/Dialogue.svelte b/js/dialogue/Dialogue.svelte new file mode 100644 index 0000000000..0b9e692dda --- /dev/null +++ b/js/dialogue/Dialogue.svelte @@ -0,0 +1,499 @@ + + + + + + + diff --git a/js/dialogue/DropdownOptions.svelte b/js/dialogue/DropdownOptions.svelte new file mode 100644 index 0000000000..fb53ce203a --- /dev/null +++ b/js/dialogue/DropdownOptions.svelte @@ -0,0 +1,165 @@ + + + + +
+{#if show_options && !disabled} +
    dispatch("change", e)} + on:scroll={(e) => (list_scroll_y = e.currentTarget.scrollTop)} + style:top + style:bottom + style:max-height={`calc(${max_height}px - var(--window-padding))`} + style:width={input_width + "px"} + bind:this={listElement} + role="listbox" + > + {#each filtered_indices as index} +
  • + + ✓ + + {choices[index][0]} +
  • + {/each} +
+{/if} + + diff --git a/js/dialogue/Example.svelte b/js/dialogue/Example.svelte new file mode 100644 index 0000000000..6b6a4a08c9 --- /dev/null +++ b/js/dialogue/Example.svelte @@ -0,0 +1,19 @@ + + +
+ {value} +
+ + diff --git a/js/dialogue/Index.svelte b/js/dialogue/Index.svelte new file mode 100644 index 0000000000..b999988815 --- /dev/null +++ b/js/dialogue/Index.svelte @@ -0,0 +1,96 @@ + + + + + + + + + + {#if loading_status} + gradio.dispatch("clear_status", loading_status)} + /> + {/if} + + gradio.dispatch("change", value)} + on:input={() => gradio.dispatch("input")} + on:submit={() => gradio.dispatch("submit")} + on:blur={() => gradio.dispatch("blur")} + on:select={(e) => gradio.dispatch("select", e.detail)} + on:focus={() => gradio.dispatch("focus")} + on:copy={(e) => gradio.dispatch("copy", e.detail)} + disabled={!interactive} + /> + diff --git a/js/dialogue/main.ts b/js/dialogue/main.ts new file mode 100644 index 0000000000..7bdb9b1777 --- /dev/null +++ b/js/dialogue/main.ts @@ -0,0 +1,2 @@ +import { default as Index } from "./Index.svelte"; +export default Index; \ No newline at end of file diff --git a/js/dialogue/package.json b/js/dialogue/package.json new file mode 100644 index 0000000000..8d7988777f --- /dev/null +++ b/js/dialogue/package.json @@ -0,0 +1,42 @@ +{ + "name": "@gradio/dialogue", + "version": "0.0.1", + "description": "Gradio dialogue component", + "type": "module", + "author": "Gradio", + "license": "ISC", + "private": false, + "main_changeset": true, + "exports": { + ".": { + "gradio": "./Index.svelte", + "svelte": "./dist/Index.svelte", + "types": "./dist/Index.svelte.d.ts" + }, + "./example": { + "gradio": "./Example.svelte", + "svelte": "./dist/Example.svelte", + "types": "./dist/Example.svelte.d.ts" + }, + "./package.json": "./package.json" + }, + "dependencies": { + "@gradio/atoms": "workspace:^", + "@gradio/icons": "workspace:^", + "@gradio/statustracker": "workspace:^", + "@gradio/utils": "workspace:^", + "@gradio/dropdown": "workspace:^" + }, + "devDependencies": { + "@gradio/preview": "workspace:^" + }, + "peerDependencies": { + "svelte": "^4.0.0" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/gradio-app/gradio.git", + "directory": "js/dialogue" + } +} + diff --git a/js/dialogue/utils.ts b/js/dialogue/utils.ts new file mode 100644 index 0000000000..4908866429 --- /dev/null +++ b/js/dialogue/utils.ts @@ -0,0 +1,4 @@ +export interface DialogueLine { + speaker: string; + text: string; +} diff --git a/js/dropdown/Index.svelte b/js/dropdown/Index.svelte index 76708b053d..a3f0d36673 100644 --- a/js/dropdown/Index.svelte +++ b/js/dropdown/Index.svelte @@ -1,5 +1,6 @@ diff --git a/js/dropdown/shared/DropdownOptions.svelte b/js/dropdown/shared/DropdownOptions.svelte index 93cb90bec1..fb53ce203a 100644 --- a/js/dropdown/shared/DropdownOptions.svelte +++ b/js/dropdown/shared/DropdownOptions.svelte @@ -8,6 +8,8 @@ export let selected_indices: (string | number)[] = []; export let active_index: number | null = null; export let remember_scroll = false; + export let offset_from_top = 0; + export let from_top = false; let distance_from_top: number; let distance_from_bottom: number; @@ -19,10 +21,15 @@ let innerHeight: number; let list_scroll_y = 0; + function calculate_window_distance(): void { const { top: ref_top, bottom: ref_bottom } = refElement.getBoundingClientRect(); - distance_from_top = ref_top; + if (from_top) { + distance_from_top = offset_from_top; + } else { + distance_from_top = ref_top; + } distance_from_bottom = innerHeight - ref_bottom; } @@ -66,7 +73,8 @@ input_height = rect?.height || 0; input_width = rect?.width || 0; } - if (distance_from_bottom > distance_from_top) { + if (distance_from_bottom > distance_from_top || from_top) { + console.log("distance_from_top", distance_from_top); top = `${distance_from_top}px`; max_height = distance_from_bottom; bottom = null; diff --git a/package.json b/package.json index f9e169c3fc..82d1c28d5f 100644 --- a/package.json +++ b/package.json @@ -106,6 +106,7 @@ "@gradio/colorpicker": "workspace:^", "@gradio/column": "workspace:^", "@gradio/core": "workspace:^", + "@gradio/dialogue": "workspace:^", "@gradio/dataframe": "workspace:^", "@gradio/dataset": "workspace:^", "@gradio/datetime": "workspace:^", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e56d286b5f..d9ee766f65 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -239,6 +239,9 @@ importers: '@gradio/datetime': specifier: workspace:^ version: link:js/datetime + '@gradio/dialogue': + specifier: workspace:^ + version: link:js/dialogue '@gradio/downloadbutton': specifier: workspace:^ version: link:js/downloadbutton @@ -1312,6 +1315,31 @@ importers: specifier: workspace:^ version: link:../preview + js/dialogue: + dependencies: + '@gradio/atoms': + specifier: workspace:^ + version: link:../atoms + '@gradio/dropdown': + specifier: workspace:^ + version: link:../dropdown + '@gradio/icons': + specifier: workspace:^ + version: link:../icons + '@gradio/statustracker': + specifier: workspace:^ + version: link:../statustracker + '@gradio/utils': + specifier: workspace:^ + version: link:../utils + svelte: + specifier: ^4.0.0 + version: 4.2.15 + devDependencies: + '@gradio/preview': + specifier: workspace:^ + version: link:../preview + js/downloadbutton: dependencies: '@gradio/button': From 9d3fccfef8ca594476c6d782f02c09b246a50e7d Mon Sep 17 00:00:00 2001 From: gradio-pr-bot Date: Mon, 28 Apr 2025 16:50:59 +0000 Subject: [PATCH 02/70] add changeset --- .changeset/spotty-sides-sneeze.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/spotty-sides-sneeze.md diff --git a/.changeset/spotty-sides-sneeze.md b/.changeset/spotty-sides-sneeze.md new file mode 100644 index 0000000000..6f4c09999f --- /dev/null +++ b/.changeset/spotty-sides-sneeze.md @@ -0,0 +1,7 @@ +--- +"@gradio/dialogue": minor +"@gradio/dropdown": minor +"gradio": minor +--- + +feat:Add gr.Dialogue component From a56dd562b6ab0f63fb028559341098b6e84369d2 Mon Sep 17 00:00:00 2001 From: Freddy Boulton <41651716+freddyaboulton@users.noreply.github.com> Date: Mon, 28 Apr 2025 12:51:46 -0400 Subject: [PATCH 03/70] Add code --- js/dialogue/DropdownOptions.svelte | 165 ----------------------------- 1 file changed, 165 deletions(-) delete mode 100644 js/dialogue/DropdownOptions.svelte diff --git a/js/dialogue/DropdownOptions.svelte b/js/dialogue/DropdownOptions.svelte deleted file mode 100644 index fb53ce203a..0000000000 --- a/js/dialogue/DropdownOptions.svelte +++ /dev/null @@ -1,165 +0,0 @@ - - - - -
-{#if show_options && !disabled} -
    dispatch("change", e)} - on:scroll={(e) => (list_scroll_y = e.currentTarget.scrollTop)} - style:top - style:bottom - style:max-height={`calc(${max_height}px - var(--window-padding))`} - style:width={input_width + "px"} - bind:this={listElement} - role="listbox" - > - {#each filtered_indices as index} -
  • - - ✓ - - {choices[index][0]} -
  • - {/each} -
-{/if} - - From 632535cd07f8f920466626cb0143ba49e942a701 Mon Sep 17 00:00:00 2001 From: Freddy Boulton <41651716+freddyaboulton@users.noreply.github.com> Date: Mon, 28 Apr 2025 12:54:32 -0400 Subject: [PATCH 04/70] rename --- demo/dia_dialogue_demo/run.ipynb | 1 + demo/dia_dialogue_demo/{app.py => run.py} | 0 2 files changed, 1 insertion(+) create mode 100644 demo/dia_dialogue_demo/run.ipynb rename demo/dia_dialogue_demo/{app.py => run.py} (100%) diff --git a/demo/dia_dialogue_demo/run.ipynb b/demo/dia_dialogue_demo/run.ipynb new file mode 100644 index 0000000000..a016628a25 --- /dev/null +++ b/demo/dia_dialogue_demo/run.ipynb @@ -0,0 +1 @@ +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dia_dialogue_demo"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import httpx\n", "\n", "\n", "emotions = [\n", " \"(laughs)\",\n", " \"(clears throat)\",\n", " \"(sighs)\",\n", " \"(gasps)\",\n", " \"(coughs)\",\n", " \"(singing)\",\n", " \"(sings)\",\n", " \"(mumbles)\",\n", " \"(beep)\",\n", " \"(groans)\",\n", " \"(sniffs)\",\n", " \"(claps)\",\n", " \"(screams)\",\n", " \"(inhales)\",\n", " \"(exhales)\",\n", " \"(applause)\",\n", " \"(burps)\",\n", " \"(humming)\",\n", " \"(sneezes)\",\n", " \"(chuckle)\",\n", " \"(whistles)\",\n", "]\n", "speakers = [\"Speaker 1\", \"Speaker 2\"]\n", "\n", "client = httpx.AsyncClient(timeout=180)\n", "API_URL = \"https://router.huggingface.co/fal-ai/fal-ai/dia-tts\"\n", "\n", "\n", "async def query(dialogue: str, token: gr.OAuthToken | None):\n", " if token is None:\n", " raise gr.Error(\n", " \"No token provided. Use Sign in with Hugging Face to get a token.\"\n", " )\n", " headers = {\n", " \"Authorization\": f\"Bearer {token.token}\",\n", " }\n", " response = await client.post(API_URL, headers=headers, json={\"text\": dialogue})\n", " url = response.json()[\"audio\"][\"url\"]\n", " print(\"URL: \", url)\n", " return url\n", "\n", "\n", "def formatter(speaker, text):\n", " speaker = speaker.split(\" \")[1]\n", " return f\"[S{speaker}] {text}\"\n", "\n", "\n", "with gr.Blocks() as demo:\n", " with gr.Sidebar():\n", " login_button = gr.LoginButton()\n", " gr.HTML(\n", " \"\"\"\n", "

\n", " \"Dancing Dia Dialogue Generation Model\n", "

\n", "

Model by Nari Labs. Powered by HF and Fal AI API.

\n", "

Dia is a dialogue generation model that can generate realistic dialogue between two speakers. Use the dialogue component to create a conversation and then hit the submit button in the bottom right corner to see it come to life .

\n", " \"\"\"\n", " )\n", " with gr.Row():\n", " with gr.Column():\n", " dialogue = gr.Dialogue(\n", " speakers=speakers, emotions=emotions, formatter=formatter\n", " )\n", " with gr.Column():\n", " with gr.Row():\n", " audio = gr.Audio(label=\"Audio\")\n", " with gr.Row():\n", " gr.DeepLinkButton(value=\"Share Audio via Link\")\n", " with gr.Row():\n", " gr.Examples(\n", " examples=[\n", " [\n", " [\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"Why did the chicken cross the road?\",\n", " },\n", " {\"speaker\": \"Speaker 2\", \"text\": \"I don't know!\"},\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"to get to the other side! (laughs)\",\n", " },\n", " ]\n", " ],\n", " [\n", " [\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"I am a little tired today (sighs).\",\n", " },\n", " {\"speaker\": \"Speaker 2\", \"text\": \"Hang in there!\"},\n", " ]\n", " ],\n", " ],\n", " inputs=[dialogue],\n", " cache_examples=False,\n", " )\n", "\n", " dialogue.submit(query, [dialogue], audio)\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/dia_dialogue_demo/app.py b/demo/dia_dialogue_demo/run.py similarity index 100% rename from demo/dia_dialogue_demo/app.py rename to demo/dia_dialogue_demo/run.py From d18c6bd5daae0d15cb9f12dc5df6c3af12d6922e Mon Sep 17 00:00:00 2001 From: Freddy Boulton <41651716+freddyaboulton@users.noreply.github.com> Date: Mon, 28 Apr 2025 13:24:01 -0400 Subject: [PATCH 05/70] lint --- gradio/components/dialogue.py | 58 ++++++-- js/dialogue/Dialogue.svelte | 174 ++++++++++++---------- js/dialogue/Index.svelte | 5 +- js/dialogue/main.ts | 2 +- js/dialogue/package.json | 79 +++++----- js/dialogue/utils.ts | 4 +- js/dropdown/shared/DropdownOptions.svelte | 1 - 7 files changed, 183 insertions(+), 140 deletions(-) diff --git a/gradio/components/dialogue.py b/gradio/components/dialogue.py index 65e8f17c35..05c9dedce5 100644 --- a/gradio/components/dialogue.py +++ b/gradio/components/dialogue.py @@ -1,5 +1,6 @@ +from __future__ import annotations + from collections.abc import Callable -from typing import List from gradio.components.base import server from gradio.components.textbox import Textbox @@ -11,8 +12,10 @@ class DialogueLine(GradioModel): speaker: str text: str + class DialogueModel(GradioRootModel): - root: List[DialogueLine] + root: list[DialogueLine] + class Dialogue(Textbox): """ @@ -28,7 +31,9 @@ class Dialogue(Textbox): ] data_model = DialogueModel - def __init__(self, + + def __init__( + self, value: list[dict[str, str]] | Callable | None = None, *, speakers: list[str] | None = None, @@ -36,7 +41,8 @@ def __init__(self, emotions: list[str] | None = None, separator: str = " ", label: str | None = "Dialogue", - info: str | None = "Type colon (:) in the dialogue line to see the available emotion and intonation tags", + info: str + | None = "Type colon (:) in the dialogue line to see the available emotion and intonation tags", placeholder: str | None = "Enter dialogue here...", show_label: bool | None = None, container: bool = True, @@ -53,7 +59,7 @@ def __init__(self, max_lines: int | None = None, show_submit_button: bool = True, show_copy_button: bool = True, - ): + ): """ Parameters: value: Value of the dialogue. It is a list of dictionaries, each containing a 'speaker' key and a 'text' key. If a function is provided, the function will be called each time the app loads to set the initial value of this component. @@ -79,8 +85,25 @@ def __init__(self, show_submit_button: If True, includes a submit button to submit the dialogue. autoscroll: If True, will automatically scroll to the bottom of the textbox when the value changes, unless the user scrolls up. If False, will not scroll to the bottom of the textbox when the value changes. """ - super().__init__(value="", - label=label, info=info, placeholder=placeholder, show_label=show_label, container=container, scale=scale, min_width=min_width, interactive=interactive, visible=visible, elem_id=elem_id, autofocus=autofocus, autoscroll=autoscroll, elem_classes=elem_classes, render=render, key=key, max_lines=max_lines) + super().__init__( + value="", + label=label, + info=info, + placeholder=placeholder, + show_label=show_label, + container=container, + scale=scale, + min_width=min_width, + interactive=interactive, + visible=visible, + elem_id=elem_id, + autofocus=autofocus, + autoscroll=autoscroll, + elem_classes=elem_classes, + render=render, + key=key, + max_lines=max_lines, + ) self.speakers = speakers self.emotions = emotions or [] self.formatter = formatter @@ -89,7 +112,9 @@ def __init__(self, self.show_copy_button = show_copy_button if isinstance(value, Callable): value = value() - self.value = self.preprocess(DialogueModel(root=value)) if value is not None else value # type: ignore + self.value = ( + self.preprocess(DialogueModel(root=value)) if value is not None else value + ) # type: ignore def preprocess(self, payload: DialogueModel) -> str: """ @@ -102,7 +127,9 @@ def preprocess(self, payload: DialogueModel) -> str: formatter = self.formatter if not formatter: formatter = self.default_formatter - return self.separator.join([formatter(line.speaker, line.text) for line in payload.root]) + return self.separator.join( + [formatter(line.speaker, line.text) for line in payload.root] + ) @staticmethod def default_formatter(speaker: str, text: str) -> str: @@ -111,7 +138,7 @@ def default_formatter(speaker: str, text: str) -> str: @server async def format(self, value: list[dict]): """Format the dialogue in the frontend into a string that's copied to the clipboard.""" - data = DialogueModel(root=value) # type: ignore + data = DialogueModel(root=value) # type: ignore return self.preprocess(data) def postprocess(self, value): @@ -128,8 +155,13 @@ def as_example(self, value): return self.preprocess(DialogueModel(root=value)) def example_payload(self): - return [{"speaker": "Speaker 1", "text": "Hello, how are you?"}, {"speaker": "Speaker 2", "text": "I'm fine, thank you!"}] + return [ + {"speaker": "Speaker 1", "text": "Hello, how are you?"}, + {"speaker": "Speaker 2", "text": "I'm fine, thank you!"}, + ] def example_value(self): - return [{"speaker": "Speaker 1", "text": "Hello, how are you?"}, {"speaker": "Speaker 2", "text": "I'm fine, thank you!"}] - + return [ + {"speaker": "Speaker 1", "text": "Hello, how are you?"}, + {"speaker": "Speaker 2", "text": "I'm fine, thank you!"}, + ] diff --git a/js/dialogue/Dialogue.svelte b/js/dialogue/Dialogue.svelte index 0b9e692dda..2ceaab506a 100644 --- a/js/dialogue/Dialogue.svelte +++ b/js/dialogue/Dialogue.svelte @@ -1,8 +1,5 @@ - Date: Mon, 28 Apr 2025 14:21:57 -0400 Subject: [PATCH 06/70] lint --- gradio/components/dialogue.py | 4 ++-- js/dialogue/Dialogue.svelte | 2 +- js/dialogue/Index.svelte | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gradio/components/dialogue.py b/gradio/components/dialogue.py index 05c9dedce5..c0423edaeb 100644 --- a/gradio/components/dialogue.py +++ b/gradio/components/dialogue.py @@ -113,8 +113,8 @@ def __init__( if isinstance(value, Callable): value = value() self.value = ( - self.preprocess(DialogueModel(root=value)) if value is not None else value - ) # type: ignore + self.preprocess(DialogueModel(root=value)) if value is not None else value # type: ignore + ) def preprocess(self, payload: DialogueModel) -> str: """ diff --git a/js/dialogue/Dialogue.svelte b/js/dialogue/Dialogue.svelte index 2ceaab506a..19f6c4c68a 100644 --- a/js/dialogue/Dialogue.svelte +++ b/js/dialogue/Dialogue.svelte @@ -309,7 +309,7 @@ />
- {#if !!!max_lines || (max_lines && i < max_lines - 1)} + {#if max_lines == undefined || (max_lines && i < max_lines - 1)}
{#if max_lines == undefined || (max_lines && i < max_lines - 1)} -
+
{/if} {#if i > 0} -
+
- {#if show_submit_button} + {#if show_submit_button && !disabled}
{/if} - {#if i > 0} -
- -
- {/if} +
+ +
{/each}
@@ -578,7 +568,12 @@ {disabled} on:input={(event) => { handle_input(event, 0); - value = textbox_value; + if (speakers.length === 0) { + value = textbox_value; + } else { + const parsed_lines = string_to_dialogue_lines(textbox_value); + value = [...parsed_lines]; + } }} on:focus={(event) => handle_input(event, 0)} on:keydown={(event) => { @@ -749,7 +744,6 @@ } .action-column { - flex: 0 0 40px; display: flex; justify-content: center; } From 6919e9605e65ceae2794b753935743980870d3e7 Mon Sep 17 00:00:00 2001 From: Dawood Date: Mon, 4 Aug 2025 12:09:27 -0400 Subject: [PATCH 62/70] fixes --- demo/dialogue_component/run.ipynb | 1 + demo/dialogue_component/run.py | 6 ++++++ gradio/components/dialogue.py | 6 ++++++ js/dialogue/Dialogue.svelte | 7 +------ 4 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 demo/dialogue_component/run.ipynb create mode 100644 demo/dialogue_component/run.py diff --git a/demo/dialogue_component/run.ipynb b/demo/dialogue_component/run.ipynb new file mode 100644 index 0000000000..3771eeb1f5 --- /dev/null +++ b/demo/dialogue_component/run.ipynb @@ -0,0 +1 @@ +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dialogue_component"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "with gr.Blocks() as demo:\n", "\n", " gr.Dialogue(speakers=[\"Speaker 1\", \"Speaker 2\"], formatter=None, tags=[\"laughs\", \"sighs\", \"clears throat\"])\n", "demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/dialogue_component/run.py b/demo/dialogue_component/run.py new file mode 100644 index 0000000000..1115c52213 --- /dev/null +++ b/demo/dialogue_component/run.py @@ -0,0 +1,6 @@ +import gradio as gr + +with gr.Blocks() as demo: + + gr.Dialogue(speakers=["Speaker 1", "Speaker 2"], formatter=None, tags=["laughs", "sighs", "clears throat"]) +demo.launch() diff --git a/gradio/components/dialogue.py b/gradio/components/dialogue.py index 6fe944495f..e09050a24a 100644 --- a/gradio/components/dialogue.py +++ b/gradio/components/dialogue.py @@ -122,6 +122,12 @@ def preprocess(self, payload: DialogueModel) -> str: # type: ignore Returns: Returns the dialogue as a string. """ + if (isinstance(payload.root, str) and payload.root == "") or ( + isinstance(payload.root, list) + and len(payload.root) == 1 + and payload.root[0].text == "" + ): + return "" formatter = self.formatter if not formatter: formatter = self.default_formatter diff --git a/js/dialogue/Dialogue.svelte b/js/dialogue/Dialogue.svelte index a8700f87df..535a0decf5 100644 --- a/js/dialogue/Dialogue.svelte +++ b/js/dialogue/Dialogue.svelte @@ -43,10 +43,6 @@ checked = true; } - $: if (value.length === 0 && dialogue_lines.length === 0) { - dialogue_lines = [{ speaker: speakers[0], text: "" }]; - } - $: { if (dialogue_lines.length > input_elements.length) { input_elements = [ @@ -343,7 +339,7 @@ $: if (JSON.stringify(value) !== old_value) { old_value = JSON.stringify(value); - if (typeof value !== "string") { + if (value && typeof value !== "string") { dialogue_lines = [...value]; const formatted = value .map((line: DialogueLine) => `${line.speaker}: ${line.text}`) @@ -371,7 +367,6 @@ if (!text.trim()) { return [{ speaker: speakers[0] || "", text: "" }]; } - const dialogueLines: DialogueLine[] = []; const speakerMatches = []; const speakerRegex = /\b(Speaker\s+\d+):\s*/g; From dcdd692636ffd868133ae361462cbc207c970185 Mon Sep 17 00:00:00 2001 From: Dawood Date: Mon, 4 Aug 2025 15:02:36 -0400 Subject: [PATCH 63/70] changes --- gradio/components/dialogue.py | 2 ++ js/dialogue/Dialogue.svelte | 68 +++++++++++++++++++++++++++-------- js/dialogue/Index.svelte | 2 +- 3 files changed, 57 insertions(+), 15 deletions(-) diff --git a/gradio/components/dialogue.py b/gradio/components/dialogue.py index e09050a24a..c06c47d098 100644 --- a/gradio/components/dialogue.py +++ b/gradio/components/dialogue.py @@ -114,6 +114,8 @@ def __init__( self.value = ( self.preprocess(DialogueModel(root=value)) if value is not None else value # type: ignore ) + if not interactive: + self.info = None def preprocess(self, payload: DialogueModel) -> str: # type: ignore """ diff --git a/js/dialogue/Dialogue.svelte b/js/dialogue/Dialogue.svelte index 535a0decf5..18a025aef8 100644 --- a/js/dialogue/Dialogue.svelte +++ b/js/dialogue/Dialogue.svelte @@ -10,9 +10,9 @@ export let speakers: string[] = []; export let tags: string[] = []; - export let value: DialogueLine[] | string = ""; + export let value: DialogueLine[] | string = []; export let value_is_output = false; - export let placeholder = "Type here..."; + export let placeholder: string | undefined = undefined; export let label: string; export let info: string | undefined = undefined; export let disabled = false; @@ -38,9 +38,29 @@ let textarea_element: HTMLTextAreaElement; let old_value = JSON.stringify(value); let offset_from_top = 0; + let copied = false; + let timer: any; + let textbox_value = ""; if (speakers.length === 0) { checked = true; + value = ""; + } + + $: if ( + value && + value.length === 0 && + dialogue_lines.length === 0 && + speakers.length !== 0 + ) { + dialogue_lines = [{ speaker: speakers[0], text: "" }]; + value = [...dialogue_lines]; + if (typeof value !== "string") { + const formatted = value + .map((line: DialogueLine) => `${line.speaker}: ${line.text}`) + .join(" "); + textbox_value = formatted; + } } $: { @@ -70,6 +90,12 @@ { speaker: newSpeaker, text: "" }, ...dialogue_lines.slice(index + 1) ]; + if (typeof value !== "string") { + const formatted = value + .map((line: DialogueLine) => `${line.speaker}: ${line.text}`) + .join(" "); + textbox_value = formatted; + } tick().then(() => { if (input_elements[index + 1]) { @@ -83,6 +109,12 @@ ...dialogue_lines.slice(0, index), ...dialogue_lines.slice(index + 1) ]; + if (typeof value !== "string") { + const formatted = value + .map((line: DialogueLine) => `${line.speaker}: ${line.text}`) + .join(" "); + textbox_value = formatted; + } } function update_line( @@ -92,6 +124,12 @@ ): void { dialogue_lines[index][key] = value; dialogue_lines = [...dialogue_lines]; + if (typeof value !== "string") { + const formatted = dialogue_lines + .map((line: DialogueLine) => `${line.speaker}: ${line.text}`) + .join(" "); + textbox_value = formatted; + } } function handle_input(event: Event, index: number): void { @@ -303,10 +341,6 @@ } } - let copied = false; - let timer: any; - let textbox_value = ""; - const dispatch = createEventDispatcher<{ change: DialogueLine[] | string; submit: undefined; @@ -325,19 +359,25 @@ } function sync_value(dialogueLines: DialogueLine[]): void { - value = [...dialogueLines]; - if (JSON.stringify(value) !== old_value) { - handle_change(); - old_value = JSON.stringify(value); - value_to_string(value).then((result) => { - textbox_value = result; - }); + if (speakers.length !== 0) { + value = [...dialogueLines]; + if (JSON.stringify(value) !== old_value) { + handle_change(); + old_value = JSON.stringify(value); + const formatted = value + .map((line: DialogueLine) => `${line.speaker}: ${line.text}`) + .join(" "); + textbox_value = formatted; + } } } $: sync_value(dialogue_lines); $: if (JSON.stringify(value) !== old_value) { + if (value == null) { + dialogue_lines = []; + } old_value = JSON.stringify(value); if (value && typeof value !== "string") { dialogue_lines = [...value]; @@ -347,7 +387,7 @@ textbox_value = formatted; } else { textbox_value = value; - if (!checked && speakers.length > 0) { + if (!checked && speakers.length > 0 && value) { dialogue_lines = string_to_dialogue_lines(value); } } diff --git a/js/dialogue/Index.svelte b/js/dialogue/Index.svelte index 2ed2984c0e..7e7a9f92bf 100644 --- a/js/dialogue/Index.svelte +++ b/js/dialogue/Index.svelte @@ -35,7 +35,7 @@ export let elem_id = ""; export let elem_classes: string[] = []; export let visible = true; - export let value: DialogueLine[] | string = ""; + export let value: DialogueLine[] | string = []; export let show_label: boolean; export let max_lines: number | undefined = undefined; export let container = true; From d63a6eb561515654ac6b26d96af2defedce0a425 Mon Sep 17 00:00:00 2001 From: Dawood Date: Mon, 4 Aug 2025 15:18:54 -0400 Subject: [PATCH 64/70] demos --- demo/dialogue_component/run.ipynb | 2 +- demo/dialogue_component/run.py | 2 +- .../requirements.txt | 9 ++ demo/dialogue_diarization_demo/run.ipynb | 1 + demo/dialogue_diarization_demo/run.py | 125 ++++++++++++++++++ 5 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 demo/dialogue_diarization_demo/requirements.txt create mode 100644 demo/dialogue_diarization_demo/run.ipynb create mode 100644 demo/dialogue_diarization_demo/run.py diff --git a/demo/dialogue_component/run.ipynb b/demo/dialogue_component/run.ipynb index 3771eeb1f5..1383cb2f97 100644 --- a/demo/dialogue_component/run.ipynb +++ b/demo/dialogue_component/run.ipynb @@ -1 +1 @@ -{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dialogue_component"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "with gr.Blocks() as demo:\n", "\n", " gr.Dialogue(speakers=[\"Speaker 1\", \"Speaker 2\"], formatter=None, tags=[\"laughs\", \"sighs\", \"clears throat\"])\n", "demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dialogue_component"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "with gr.Blocks() as demo:\n", "\n", " gr.Dialogue(speakers=[\"Speaker 1\", \"Speaker 2\"], formatter=None, tags=[\"(laughs)\", \"(sighs)\", \"(clears throat)\"])\n", "demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/dialogue_component/run.py b/demo/dialogue_component/run.py index 1115c52213..f09ddaf11b 100644 --- a/demo/dialogue_component/run.py +++ b/demo/dialogue_component/run.py @@ -2,5 +2,5 @@ with gr.Blocks() as demo: - gr.Dialogue(speakers=["Speaker 1", "Speaker 2"], formatter=None, tags=["laughs", "sighs", "clears throat"]) + gr.Dialogue(speakers=["Speaker 1", "Speaker 2"], formatter=None, tags=["(laughs)", "(sighs)", "(clears throat)"]) demo.launch() diff --git a/demo/dialogue_diarization_demo/requirements.txt b/demo/dialogue_diarization_demo/requirements.txt new file mode 100644 index 0000000000..548affd021 --- /dev/null +++ b/demo/dialogue_diarization_demo/requirements.txt @@ -0,0 +1,9 @@ +gradio +torch +torchaudio +pyannote.audio +openai-whisper +librosa +numpy +transformers +speechbrain \ No newline at end of file diff --git a/demo/dialogue_diarization_demo/run.ipynb b/demo/dialogue_diarization_demo/run.ipynb new file mode 100644 index 0000000000..b2d4890d11 --- /dev/null +++ b/demo/dialogue_diarization_demo/run.ipynb @@ -0,0 +1 @@ +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dialogue_diarization_demo"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio gradio torch torchaudio pyannote.audio openai-whisper librosa numpy transformers speechbrain "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from pyannote.audio import Pipeline\n", "import whisper\n", "\n", "diarization_pipeline = None\n", "whisper_model = None\n", "\n", "\n", "def load_models():\n", " global diarization_pipeline, whisper_model # noqa: PLW0603\n", "\n", " if diarization_pipeline is None:\n", " diarization_pipeline = Pipeline.from_pretrained(\n", " \"pyannote/speaker-diarization-3.1\", use_auth_token=True\n", " )\n", "\n", " if whisper_model is None:\n", " whisper_model = whisper.load_model(\"base\")\n", "\n", "\n", "def real_diarization(audio_file_path: str) -> list[dict[str, str]]:\n", " try:\n", " load_models()\n", "\n", " if diarization_pipeline is None or whisper_model is None:\n", " raise Exception(\"Failed to load models\")\n", "\n", " diarization = diarization_pipeline(audio_file_path)\n", "\n", " transcription = whisper_model.transcribe(audio_file_path)\n", " segments = transcription[\"segments\"]\n", "\n", " dialogue_segments = []\n", " speaker_mapping = {}\n", " speaker_counter = 1\n", "\n", " for segment in segments:\n", " start_time = segment[\"start\"]\n", " end_time = segment[\"end\"]\n", " text = segment[\"text\"].strip()\n", "\n", " speaker = \"Speaker 1\"\n", " for turn, _, speaker_label in diarization.itertracks(yield_label=True):\n", " if (\n", " turn.start <= start_time <= turn.end\n", " or turn.start <= end_time <= turn.end\n", " ):\n", " if speaker_label not in speaker_mapping:\n", " speaker_mapping[speaker_label] = f\"Speaker {speaker_counter}\"\n", " speaker_counter += 1\n", " speaker = speaker_mapping[speaker_label]\n", " break\n", "\n", " if text:\n", " dialogue_segments.append({\"speaker\": speaker, \"text\": text})\n", "\n", " return dialogue_segments\n", "\n", " except Exception as e:\n", " print(f\"Error in diarization: {str(e)}\")\n", " return []\n", "\n", "\n", "def process_audio(audio_file):\n", " if audio_file is None:\n", " gr.Warning(\"Please upload an audio file first.\")\n", " return []\n", "\n", " try:\n", " dialogue_segments = real_diarization(audio_file)\n", " return dialogue_segments\n", " except Exception as e:\n", " gr.Error(f\"Error processing audio: {str(e)}\")\n", " return []\n", "\n", "\n", "speakers = [\n", " \"Speaker 1\",\n", " \"Speaker 2\",\n", " \"Speaker 3\",\n", " \"Speaker 4\",\n", " \"Speaker 5\",\n", " \"Speaker 6\",\n", "]\n", "tags = [\n", " \"(pause)\",\n", " \"(background noise)\",\n", " \"(unclear)\",\n", " \"(overlap)\",\n", " \"(phone ringing)\",\n", " \"(door closing)\",\n", " \"(music)\",\n", " \"(applause)\",\n", " \"(laughter)\",\n", "]\n", "\n", "\n", "def format_speaker(speaker, text):\n", " return f\"{speaker}: {text}\"\n", "\n", "\n", "with gr.Blocks(title=\"Audio Diarization Demo\") as demo:\n", " with gr.Row():\n", " with gr.Column(scale=1):\n", " audio_input = gr.Audio(\n", " label=\"Upload Audio File\",\n", " type=\"filepath\",\n", " sources=[\"upload\", \"microphone\"],\n", " )\n", "\n", " process_btn = gr.Button(\"\ud83d\udd0d Analyze Speakers\", variant=\"primary\", size=\"lg\")\n", "\n", " with gr.Column(scale=2):\n", " dialogue_output = gr.Dialogue(\n", " speakers=speakers,\n", " tags=tags,\n", " formatter=format_speaker,\n", " label=\"AI-generated speaker-separated conversation\",\n", " value=[],\n", " )\n", "\n", " process_btn.click(fn=process_audio, inputs=[audio_input], outputs=[dialogue_output])\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/dialogue_diarization_demo/run.py b/demo/dialogue_diarization_demo/run.py new file mode 100644 index 0000000000..bff68034d5 --- /dev/null +++ b/demo/dialogue_diarization_demo/run.py @@ -0,0 +1,125 @@ +import gradio as gr +from pyannote.audio import Pipeline +import whisper + +diarization_pipeline = None +whisper_model = None + + +def load_models(): + global diarization_pipeline, whisper_model # noqa: PLW0603 + + if diarization_pipeline is None: + diarization_pipeline = Pipeline.from_pretrained( + "pyannote/speaker-diarization-3.1", use_auth_token=True + ) + + if whisper_model is None: + whisper_model = whisper.load_model("base") + + +def real_diarization(audio_file_path: str) -> list[dict[str, str]]: + try: + load_models() + + if diarization_pipeline is None or whisper_model is None: + raise Exception("Failed to load models") + + diarization = diarization_pipeline(audio_file_path) + + transcription = whisper_model.transcribe(audio_file_path) + segments = transcription["segments"] + + dialogue_segments = [] + speaker_mapping = {} + speaker_counter = 1 + + for segment in segments: + start_time = segment["start"] + end_time = segment["end"] + text = segment["text"].strip() + + speaker = "Speaker 1" + for turn, _, speaker_label in diarization.itertracks(yield_label=True): + if ( + turn.start <= start_time <= turn.end + or turn.start <= end_time <= turn.end + ): + if speaker_label not in speaker_mapping: + speaker_mapping[speaker_label] = f"Speaker {speaker_counter}" + speaker_counter += 1 + speaker = speaker_mapping[speaker_label] + break + + if text: + dialogue_segments.append({"speaker": speaker, "text": text}) + + return dialogue_segments + + except Exception as e: + print(f"Error in diarization: {str(e)}") + return [] + + +def process_audio(audio_file): + if audio_file is None: + gr.Warning("Please upload an audio file first.") + return [] + + try: + dialogue_segments = real_diarization(audio_file) + return dialogue_segments + except Exception as e: + gr.Error(f"Error processing audio: {str(e)}") + return [] + + +speakers = [ + "Speaker 1", + "Speaker 2", + "Speaker 3", + "Speaker 4", + "Speaker 5", + "Speaker 6", +] +tags = [ + "(pause)", + "(background noise)", + "(unclear)", + "(overlap)", + "(phone ringing)", + "(door closing)", + "(music)", + "(applause)", + "(laughter)", +] + + +def format_speaker(speaker, text): + return f"{speaker}: {text}" + + +with gr.Blocks(title="Audio Diarization Demo") as demo: + with gr.Row(): + with gr.Column(scale=1): + audio_input = gr.Audio( + label="Upload Audio File", + type="filepath", + sources=["upload", "microphone"], + ) + + process_btn = gr.Button("🔍 Analyze Speakers", variant="primary", size="lg") + + with gr.Column(scale=2): + dialogue_output = gr.Dialogue( + speakers=speakers, + tags=tags, + formatter=format_speaker, + label="AI-generated speaker-separated conversation", + value=[], + ) + + process_btn.click(fn=process_audio, inputs=[audio_input], outputs=[dialogue_output]) + +if __name__ == "__main__": + demo.launch() From ab81ab1bc82d37a4db0b211f03a2afc9217230a6 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Mon, 4 Aug 2025 13:21:06 -0700 Subject: [PATCH 65/70] visual changes --- demo/dialogue_mock_diarization/run.ipynb | 1 + demo/dialogue_mock_diarization/run.py | 48 ++++++++++++++++++++++++ js/dialogue/Dialogue.svelte | 35 ++++++++++------- 3 files changed, 71 insertions(+), 13 deletions(-) create mode 100644 demo/dialogue_mock_diarization/run.ipynb create mode 100644 demo/dialogue_mock_diarization/run.py diff --git a/demo/dialogue_mock_diarization/run.ipynb b/demo/dialogue_mock_diarization/run.ipynb new file mode 100644 index 0000000000..b2d4890d11 --- /dev/null +++ b/demo/dialogue_mock_diarization/run.ipynb @@ -0,0 +1 @@ +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dialogue_diarization_demo"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio gradio torch torchaudio pyannote.audio openai-whisper librosa numpy transformers speechbrain "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from pyannote.audio import Pipeline\n", "import whisper\n", "\n", "diarization_pipeline = None\n", "whisper_model = None\n", "\n", "\n", "def load_models():\n", " global diarization_pipeline, whisper_model # noqa: PLW0603\n", "\n", " if diarization_pipeline is None:\n", " diarization_pipeline = Pipeline.from_pretrained(\n", " \"pyannote/speaker-diarization-3.1\", use_auth_token=True\n", " )\n", "\n", " if whisper_model is None:\n", " whisper_model = whisper.load_model(\"base\")\n", "\n", "\n", "def real_diarization(audio_file_path: str) -> list[dict[str, str]]:\n", " try:\n", " load_models()\n", "\n", " if diarization_pipeline is None or whisper_model is None:\n", " raise Exception(\"Failed to load models\")\n", "\n", " diarization = diarization_pipeline(audio_file_path)\n", "\n", " transcription = whisper_model.transcribe(audio_file_path)\n", " segments = transcription[\"segments\"]\n", "\n", " dialogue_segments = []\n", " speaker_mapping = {}\n", " speaker_counter = 1\n", "\n", " for segment in segments:\n", " start_time = segment[\"start\"]\n", " end_time = segment[\"end\"]\n", " text = segment[\"text\"].strip()\n", "\n", " speaker = \"Speaker 1\"\n", " for turn, _, speaker_label in diarization.itertracks(yield_label=True):\n", " if (\n", " turn.start <= start_time <= turn.end\n", " or turn.start <= end_time <= turn.end\n", " ):\n", " if speaker_label not in speaker_mapping:\n", " speaker_mapping[speaker_label] = f\"Speaker {speaker_counter}\"\n", " speaker_counter += 1\n", " speaker = speaker_mapping[speaker_label]\n", " break\n", "\n", " if text:\n", " dialogue_segments.append({\"speaker\": speaker, \"text\": text})\n", "\n", " return dialogue_segments\n", "\n", " except Exception as e:\n", " print(f\"Error in diarization: {str(e)}\")\n", " return []\n", "\n", "\n", "def process_audio(audio_file):\n", " if audio_file is None:\n", " gr.Warning(\"Please upload an audio file first.\")\n", " return []\n", "\n", " try:\n", " dialogue_segments = real_diarization(audio_file)\n", " return dialogue_segments\n", " except Exception as e:\n", " gr.Error(f\"Error processing audio: {str(e)}\")\n", " return []\n", "\n", "\n", "speakers = [\n", " \"Speaker 1\",\n", " \"Speaker 2\",\n", " \"Speaker 3\",\n", " \"Speaker 4\",\n", " \"Speaker 5\",\n", " \"Speaker 6\",\n", "]\n", "tags = [\n", " \"(pause)\",\n", " \"(background noise)\",\n", " \"(unclear)\",\n", " \"(overlap)\",\n", " \"(phone ringing)\",\n", " \"(door closing)\",\n", " \"(music)\",\n", " \"(applause)\",\n", " \"(laughter)\",\n", "]\n", "\n", "\n", "def format_speaker(speaker, text):\n", " return f\"{speaker}: {text}\"\n", "\n", "\n", "with gr.Blocks(title=\"Audio Diarization Demo\") as demo:\n", " with gr.Row():\n", " with gr.Column(scale=1):\n", " audio_input = gr.Audio(\n", " label=\"Upload Audio File\",\n", " type=\"filepath\",\n", " sources=[\"upload\", \"microphone\"],\n", " )\n", "\n", " process_btn = gr.Button(\"\ud83d\udd0d Analyze Speakers\", variant=\"primary\", size=\"lg\")\n", "\n", " with gr.Column(scale=2):\n", " dialogue_output = gr.Dialogue(\n", " speakers=speakers,\n", " tags=tags,\n", " formatter=format_speaker,\n", " label=\"AI-generated speaker-separated conversation\",\n", " value=[],\n", " )\n", "\n", " process_btn.click(fn=process_audio, inputs=[audio_input], outputs=[dialogue_output])\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/dialogue_mock_diarization/run.py b/demo/dialogue_mock_diarization/run.py new file mode 100644 index 0000000000..58ec0f9b5d --- /dev/null +++ b/demo/dialogue_mock_diarization/run.py @@ -0,0 +1,48 @@ +import gradio as gr + +speakers = [ + "Speaker 1", + "Speaker 2", +] + +def format_speaker(speaker, text): + return f"{speaker}: {text}" + +def mock_diarization(audio): + return [ + { + "speaker": "Speaker 1", + "text": "Hello, how are you?", + }, + { + "speaker": "Speaker 2", + "text": "I'm fine, thank you!", + }, + { + "speaker": "Speaker 1", + "text": "What's your name?", + }, + { + "speaker": "Speaker 2", + "text": "My name is John Doe.", + }, + { + "speaker": "Speaker 1", + "text": "Nice to meet you!", + }, + { + "speaker": "Speaker 2", + "text": "Nice to meet you!", + }, + ] + +demo = gr.Interface( + fn=mock_diarization, + inputs=[gr.Audio(sources=["microphone"])], + outputs=[gr.Dialogue(speakers=speakers, tags=None, formatter=format_speaker)], + title="Mock Speech Diarization", + description="Mock speech diarization", +) + +if __name__ == "__main__": + demo.launch() diff --git a/js/dialogue/Dialogue.svelte b/js/dialogue/Dialogue.svelte index 18a025aef8..7fc68691a2 100644 --- a/js/dialogue/Dialogue.svelte +++ b/js/dialogue/Dialogue.svelte @@ -495,6 +495,11 @@ {label} + {#if speakers.length !== 0} +
+ +
+ {/if} {#if !checked}
{#each dialogue_lines as line, i} @@ -656,19 +661,13 @@
{/if} -
-
- + {#if show_submit_button && !disabled} +
+
- - {#if show_submit_button && !disabled} -
- -
- {/if} -
+ {/if} From ffa588d0c63570e6cff9d9e0127a8528644cd423 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Mon, 4 Aug 2025 13:26:41 -0700 Subject: [PATCH 66/70] background color --- js/dialogue/Dialogue.svelte | 45 ++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/js/dialogue/Dialogue.svelte b/js/dialogue/Dialogue.svelte index 7fc68691a2..a74aaec526 100644 --- a/js/dialogue/Dialogue.svelte +++ b/js/dialogue/Dialogue.svelte @@ -41,6 +41,29 @@ let copied = false; let timer: any; let textbox_value = ""; + + // Default color palette for speakers + const defaultColors = [ + "#fef3c7", // amber-100 + "#dbeafe", // blue-100 + "#d1fae5", // emerald-100 + "#fce7f3", // pink-100 + "#e9d5ff", // purple-100 + "#fed7aa", // orange-100 + "#e0e7ff", // indigo-100 + "#cffafe", // cyan-100 + "#f3e8ff", // violet-100 + "#fecaca" // red-100 + ]; + + // Create color mapping for speakers + let speakerColors: Record = {}; + $: { + speakerColors = {}; + speakers.forEach((speaker, index) => { + speakerColors[speaker] = defaultColors[index % defaultColors.length]; + }); + } if (speakers.length === 0) { checked = true; @@ -503,7 +526,7 @@ {#if !checked}
{#each dialogue_lines as line, i} -
+
Date: Mon, 4 Aug 2025 13:41:49 -0700 Subject: [PATCH 67/70] changes --- demo/dialogue_diarization_demo/run.ipynb | 2 +- demo/dialogue_diarization_demo/run.py | 1 + demo/dialogue_mock_diarization/run.ipynb | 2 +- demo/diff_texts/run.ipynb | 2 +- demo/diff_texts/run.py | 4 +- gradio/components/dialogue.py | 3 ++ js/dialogue/Dialogue.svelte | 65 ++++++++++++++++-------- 7 files changed, 52 insertions(+), 27 deletions(-) diff --git a/demo/dialogue_diarization_demo/run.ipynb b/demo/dialogue_diarization_demo/run.ipynb index b2d4890d11..c38e55a4fa 100644 --- a/demo/dialogue_diarization_demo/run.ipynb +++ b/demo/dialogue_diarization_demo/run.ipynb @@ -1 +1 @@ -{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dialogue_diarization_demo"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio gradio torch torchaudio pyannote.audio openai-whisper librosa numpy transformers speechbrain "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from pyannote.audio import Pipeline\n", "import whisper\n", "\n", "diarization_pipeline = None\n", "whisper_model = None\n", "\n", "\n", "def load_models():\n", " global diarization_pipeline, whisper_model # noqa: PLW0603\n", "\n", " if diarization_pipeline is None:\n", " diarization_pipeline = Pipeline.from_pretrained(\n", " \"pyannote/speaker-diarization-3.1\", use_auth_token=True\n", " )\n", "\n", " if whisper_model is None:\n", " whisper_model = whisper.load_model(\"base\")\n", "\n", "\n", "def real_diarization(audio_file_path: str) -> list[dict[str, str]]:\n", " try:\n", " load_models()\n", "\n", " if diarization_pipeline is None or whisper_model is None:\n", " raise Exception(\"Failed to load models\")\n", "\n", " diarization = diarization_pipeline(audio_file_path)\n", "\n", " transcription = whisper_model.transcribe(audio_file_path)\n", " segments = transcription[\"segments\"]\n", "\n", " dialogue_segments = []\n", " speaker_mapping = {}\n", " speaker_counter = 1\n", "\n", " for segment in segments:\n", " start_time = segment[\"start\"]\n", " end_time = segment[\"end\"]\n", " text = segment[\"text\"].strip()\n", "\n", " speaker = \"Speaker 1\"\n", " for turn, _, speaker_label in diarization.itertracks(yield_label=True):\n", " if (\n", " turn.start <= start_time <= turn.end\n", " or turn.start <= end_time <= turn.end\n", " ):\n", " if speaker_label not in speaker_mapping:\n", " speaker_mapping[speaker_label] = f\"Speaker {speaker_counter}\"\n", " speaker_counter += 1\n", " speaker = speaker_mapping[speaker_label]\n", " break\n", "\n", " if text:\n", " dialogue_segments.append({\"speaker\": speaker, \"text\": text})\n", "\n", " return dialogue_segments\n", "\n", " except Exception as e:\n", " print(f\"Error in diarization: {str(e)}\")\n", " return []\n", "\n", "\n", "def process_audio(audio_file):\n", " if audio_file is None:\n", " gr.Warning(\"Please upload an audio file first.\")\n", " return []\n", "\n", " try:\n", " dialogue_segments = real_diarization(audio_file)\n", " return dialogue_segments\n", " except Exception as e:\n", " gr.Error(f\"Error processing audio: {str(e)}\")\n", " return []\n", "\n", "\n", "speakers = [\n", " \"Speaker 1\",\n", " \"Speaker 2\",\n", " \"Speaker 3\",\n", " \"Speaker 4\",\n", " \"Speaker 5\",\n", " \"Speaker 6\",\n", "]\n", "tags = [\n", " \"(pause)\",\n", " \"(background noise)\",\n", " \"(unclear)\",\n", " \"(overlap)\",\n", " \"(phone ringing)\",\n", " \"(door closing)\",\n", " \"(music)\",\n", " \"(applause)\",\n", " \"(laughter)\",\n", "]\n", "\n", "\n", "def format_speaker(speaker, text):\n", " return f\"{speaker}: {text}\"\n", "\n", "\n", "with gr.Blocks(title=\"Audio Diarization Demo\") as demo:\n", " with gr.Row():\n", " with gr.Column(scale=1):\n", " audio_input = gr.Audio(\n", " label=\"Upload Audio File\",\n", " type=\"filepath\",\n", " sources=[\"upload\", \"microphone\"],\n", " )\n", "\n", " process_btn = gr.Button(\"\ud83d\udd0d Analyze Speakers\", variant=\"primary\", size=\"lg\")\n", "\n", " with gr.Column(scale=2):\n", " dialogue_output = gr.Dialogue(\n", " speakers=speakers,\n", " tags=tags,\n", " formatter=format_speaker,\n", " label=\"AI-generated speaker-separated conversation\",\n", " value=[],\n", " )\n", "\n", " process_btn.click(fn=process_audio, inputs=[audio_input], outputs=[dialogue_output])\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dialogue_diarization_demo"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio gradio torch torchaudio pyannote.audio openai-whisper librosa numpy transformers speechbrain "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# type: ignore\n", "import gradio as gr\n", "from pyannote.audio import Pipeline\n", "import whisper\n", "\n", "diarization_pipeline = None\n", "whisper_model = None\n", "\n", "\n", "def load_models():\n", " global diarization_pipeline, whisper_model # noqa: PLW0603\n", "\n", " if diarization_pipeline is None:\n", " diarization_pipeline = Pipeline.from_pretrained(\n", " \"pyannote/speaker-diarization-3.1\", use_auth_token=True\n", " )\n", "\n", " if whisper_model is None:\n", " whisper_model = whisper.load_model(\"base\")\n", "\n", "\n", "def real_diarization(audio_file_path: str) -> list[dict[str, str]]:\n", " try:\n", " load_models()\n", "\n", " if diarization_pipeline is None or whisper_model is None:\n", " raise Exception(\"Failed to load models\")\n", "\n", " diarization = diarization_pipeline(audio_file_path)\n", "\n", " transcription = whisper_model.transcribe(audio_file_path)\n", " segments = transcription[\"segments\"]\n", "\n", " dialogue_segments = []\n", " speaker_mapping = {}\n", " speaker_counter = 1\n", "\n", " for segment in segments:\n", " start_time = segment[\"start\"]\n", " end_time = segment[\"end\"]\n", " text = segment[\"text\"].strip()\n", "\n", " speaker = \"Speaker 1\"\n", " for turn, _, speaker_label in diarization.itertracks(yield_label=True):\n", " if (\n", " turn.start <= start_time <= turn.end\n", " or turn.start <= end_time <= turn.end\n", " ):\n", " if speaker_label not in speaker_mapping:\n", " speaker_mapping[speaker_label] = f\"Speaker {speaker_counter}\"\n", " speaker_counter += 1\n", " speaker = speaker_mapping[speaker_label]\n", " break\n", "\n", " if text:\n", " dialogue_segments.append({\"speaker\": speaker, \"text\": text})\n", "\n", " return dialogue_segments\n", "\n", " except Exception as e:\n", " print(f\"Error in diarization: {str(e)}\")\n", " return []\n", "\n", "\n", "def process_audio(audio_file):\n", " if audio_file is None:\n", " gr.Warning(\"Please upload an audio file first.\")\n", " return []\n", "\n", " try:\n", " dialogue_segments = real_diarization(audio_file)\n", " return dialogue_segments\n", " except Exception as e:\n", " gr.Error(f\"Error processing audio: {str(e)}\")\n", " return []\n", "\n", "\n", "speakers = [\n", " \"Speaker 1\",\n", " \"Speaker 2\",\n", " \"Speaker 3\",\n", " \"Speaker 4\",\n", " \"Speaker 5\",\n", " \"Speaker 6\",\n", "]\n", "tags = [\n", " \"(pause)\",\n", " \"(background noise)\",\n", " \"(unclear)\",\n", " \"(overlap)\",\n", " \"(phone ringing)\",\n", " \"(door closing)\",\n", " \"(music)\",\n", " \"(applause)\",\n", " \"(laughter)\",\n", "]\n", "\n", "\n", "def format_speaker(speaker, text):\n", " return f\"{speaker}: {text}\"\n", "\n", "\n", "with gr.Blocks(title=\"Audio Diarization Demo\") as demo:\n", " with gr.Row():\n", " with gr.Column(scale=1):\n", " audio_input = gr.Audio(\n", " label=\"Upload Audio File\",\n", " type=\"filepath\",\n", " sources=[\"upload\", \"microphone\"],\n", " )\n", "\n", " process_btn = gr.Button(\"\ud83d\udd0d Analyze Speakers\", variant=\"primary\", size=\"lg\")\n", "\n", " with gr.Column(scale=2):\n", " dialogue_output = gr.Dialogue(\n", " speakers=speakers,\n", " tags=tags,\n", " formatter=format_speaker,\n", " label=\"AI-generated speaker-separated conversation\",\n", " value=[],\n", " )\n", "\n", " process_btn.click(fn=process_audio, inputs=[audio_input], outputs=[dialogue_output])\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/dialogue_diarization_demo/run.py b/demo/dialogue_diarization_demo/run.py index bff68034d5..5a0a6e9720 100644 --- a/demo/dialogue_diarization_demo/run.py +++ b/demo/dialogue_diarization_demo/run.py @@ -1,3 +1,4 @@ +# type: ignore import gradio as gr from pyannote.audio import Pipeline import whisper diff --git a/demo/dialogue_mock_diarization/run.ipynb b/demo/dialogue_mock_diarization/run.ipynb index b2d4890d11..32f413192b 100644 --- a/demo/dialogue_mock_diarization/run.ipynb +++ b/demo/dialogue_mock_diarization/run.ipynb @@ -1 +1 @@ -{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dialogue_diarization_demo"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio gradio torch torchaudio pyannote.audio openai-whisper librosa numpy transformers speechbrain "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from pyannote.audio import Pipeline\n", "import whisper\n", "\n", "diarization_pipeline = None\n", "whisper_model = None\n", "\n", "\n", "def load_models():\n", " global diarization_pipeline, whisper_model # noqa: PLW0603\n", "\n", " if diarization_pipeline is None:\n", " diarization_pipeline = Pipeline.from_pretrained(\n", " \"pyannote/speaker-diarization-3.1\", use_auth_token=True\n", " )\n", "\n", " if whisper_model is None:\n", " whisper_model = whisper.load_model(\"base\")\n", "\n", "\n", "def real_diarization(audio_file_path: str) -> list[dict[str, str]]:\n", " try:\n", " load_models()\n", "\n", " if diarization_pipeline is None or whisper_model is None:\n", " raise Exception(\"Failed to load models\")\n", "\n", " diarization = diarization_pipeline(audio_file_path)\n", "\n", " transcription = whisper_model.transcribe(audio_file_path)\n", " segments = transcription[\"segments\"]\n", "\n", " dialogue_segments = []\n", " speaker_mapping = {}\n", " speaker_counter = 1\n", "\n", " for segment in segments:\n", " start_time = segment[\"start\"]\n", " end_time = segment[\"end\"]\n", " text = segment[\"text\"].strip()\n", "\n", " speaker = \"Speaker 1\"\n", " for turn, _, speaker_label in diarization.itertracks(yield_label=True):\n", " if (\n", " turn.start <= start_time <= turn.end\n", " or turn.start <= end_time <= turn.end\n", " ):\n", " if speaker_label not in speaker_mapping:\n", " speaker_mapping[speaker_label] = f\"Speaker {speaker_counter}\"\n", " speaker_counter += 1\n", " speaker = speaker_mapping[speaker_label]\n", " break\n", "\n", " if text:\n", " dialogue_segments.append({\"speaker\": speaker, \"text\": text})\n", "\n", " return dialogue_segments\n", "\n", " except Exception as e:\n", " print(f\"Error in diarization: {str(e)}\")\n", " return []\n", "\n", "\n", "def process_audio(audio_file):\n", " if audio_file is None:\n", " gr.Warning(\"Please upload an audio file first.\")\n", " return []\n", "\n", " try:\n", " dialogue_segments = real_diarization(audio_file)\n", " return dialogue_segments\n", " except Exception as e:\n", " gr.Error(f\"Error processing audio: {str(e)}\")\n", " return []\n", "\n", "\n", "speakers = [\n", " \"Speaker 1\",\n", " \"Speaker 2\",\n", " \"Speaker 3\",\n", " \"Speaker 4\",\n", " \"Speaker 5\",\n", " \"Speaker 6\",\n", "]\n", "tags = [\n", " \"(pause)\",\n", " \"(background noise)\",\n", " \"(unclear)\",\n", " \"(overlap)\",\n", " \"(phone ringing)\",\n", " \"(door closing)\",\n", " \"(music)\",\n", " \"(applause)\",\n", " \"(laughter)\",\n", "]\n", "\n", "\n", "def format_speaker(speaker, text):\n", " return f\"{speaker}: {text}\"\n", "\n", "\n", "with gr.Blocks(title=\"Audio Diarization Demo\") as demo:\n", " with gr.Row():\n", " with gr.Column(scale=1):\n", " audio_input = gr.Audio(\n", " label=\"Upload Audio File\",\n", " type=\"filepath\",\n", " sources=[\"upload\", \"microphone\"],\n", " )\n", "\n", " process_btn = gr.Button(\"\ud83d\udd0d Analyze Speakers\", variant=\"primary\", size=\"lg\")\n", "\n", " with gr.Column(scale=2):\n", " dialogue_output = gr.Dialogue(\n", " speakers=speakers,\n", " tags=tags,\n", " formatter=format_speaker,\n", " label=\"AI-generated speaker-separated conversation\",\n", " value=[],\n", " )\n", "\n", " process_btn.click(fn=process_audio, inputs=[audio_input], outputs=[dialogue_output])\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dialogue_mock_diarization"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "speakers = [\n", " \"Speaker 1\",\n", " \"Speaker 2\",\n", "]\n", "\n", "def format_speaker(speaker, text):\n", " return f\"{speaker}: {text}\"\n", "\n", "def mock_diarization(audio):\n", " return [\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"Hello, how are you?\",\n", " },\n", " {\n", " \"speaker\": \"Speaker 2\",\n", " \"text\": \"I'm fine, thank you!\",\n", " },\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"What's your name?\",\n", " },\n", " {\n", " \"speaker\": \"Speaker 2\",\n", " \"text\": \"My name is John Doe.\",\n", " },\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"Nice to meet you!\",\n", " },\n", " {\n", " \"speaker\": \"Speaker 2\",\n", " \"text\": \"Nice to meet you!\",\n", " },\n", " ]\n", "\n", "demo = gr.Interface(\n", " fn=mock_diarization,\n", " inputs=[gr.Audio(sources=[\"microphone\"])],\n", " outputs=[gr.Dialogue(speakers=speakers, tags=None, formatter=format_speaker)],\n", " title=\"Mock Speech Diarization\",\n", " description=\"Mock speech diarization\",\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/diff_texts/run.ipynb b/demo/diff_texts/run.ipynb index 2e5d180f5b..e1893c54bf 100644 --- a/demo/diff_texts/run.ipynb +++ b/demo/diff_texts/run.ipynb @@ -1 +1 @@ -{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: diff_texts"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["from difflib import Differ\n", "\n", "import gradio as gr\n", "\n", "def diff_texts(text1, text2):\n", " d = Differ()\n", " return [\n", " (token[2:], token[0] if token[0] != \" \" else None)\n", " for token in d.compare(text1, text2)\n", " ]\n", "\n", "demo = gr.Interface(\n", " diff_texts,\n", " [\n", " gr.Textbox(\n", " label=\"Text 1\",\n", " info=\"Initial text\",\n", " lines=3,\n", " value=\"The quick brown fox jumped over the lazy dogs.\",\n", " ),\n", " gr.Textbox(\n", " label=\"Text 2\",\n", " info=\"Text to compare\",\n", " lines=3,\n", " value=\"The fast brown fox jumps over lazy dogs.\",\n", " ),\n", " ],\n", " gr.HighlightedText(\n", " label=\"Diff\",\n", " combine_adjacent=True,\n", " show_legend=True,\n", " color_map={\"+\": \"red\", \"-\": \"green\"}),\n", " theme=gr.themes.Base()\n", ")\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: diff_texts"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["from difflib import Differ\n", "\n", "import gradio as gr\n", "\n", "def diff_texts(text1, text2):\n", " d = Differ()\n", " return [\n", " (token[2:], token[0] if token[0] != \" \" else None)\n", " for token in d.compare(text1, text2)\n", " ]\n", "\n", "demo = gr.Interface(\n", " diff_texts,\n", " [\n", " gr.Textbox(\n", " label=\"Text 1\",\n", " info=\"Initial text\",\n", " lines=3,\n", " value=\"The quick brown fox jumped over the lazy dogs.\",\n", " ),\n", " gr.Textbox(\n", " label=\"Text 2\",\n", " info=\"Text to compare\",\n", " lines=3,\n", " value=\"The fast brown fox jumps over lazy dogs.\",\n", " ),\n", " ],\n", " gr.HighlightedText(\n", " label=\"Diff\",\n", " combine_adjacent=True,\n", " show_legend=True,\n", " ),\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/diff_texts/run.py b/demo/diff_texts/run.py index 7475a49190..a83f2da580 100644 --- a/demo/diff_texts/run.py +++ b/demo/diff_texts/run.py @@ -29,8 +29,8 @@ def diff_texts(text1, text2): label="Diff", combine_adjacent=True, show_legend=True, - color_map={"+": "red", "-": "green"}), - theme=gr.themes.Base() + ), ) + if __name__ == "__main__": demo.launch() diff --git a/gradio/components/dialogue.py b/gradio/components/dialogue.py index c06c47d098..e2d6e94d7b 100644 --- a/gradio/components/dialogue.py +++ b/gradio/components/dialogue.py @@ -39,6 +39,7 @@ def __init__( formatter: Callable | None = None, tags: list[str] | None = None, separator: str = " ", + color_map: dict[str, str] | None = None, label: str | None = "Dialogue", info: str | None = "Type colon (:) in the dialogue line to see the available tags", @@ -66,6 +67,7 @@ def __init__( formatter: A function that formats the dialogue line dictionary, e.g. {"speaker": "Speaker 1", "text": "Hello, how are you?"} into a string, e.g. "Speaker 1: Hello, how are you?". This function is run on user input and the resulting string is passed into the prediction function. tags: The different tags allowed in the dialogue. Tags are displayed in an autocomplete menu below the input textbox when the user starts typing `:`. Use the exact tag name expected by the AI model or inference function. separator: The separator between the different dialogue lines used to join the formatted dialogue lines into a single string. For example, a newline character or empty string. + color_map: A dictionary mapping speaker names to colors. The colors may be specified as hex codes or by their names. For example: {"Speaker 1": "red", "Speaker 2": "#FFEE22"}. If not provided, default colors will be assigned to speakers. max_lines: maximum number of lines allowed in the dialogue. placeholder: placeholder hint to provide behind textarea. label: the label for this component, displayed above the component if `show_label` is `True` and is also used as the header if there are a table of examples for this component. If None and used in a `gr.Interface`, the label will be the name of the parameter this component corresponds to. @@ -107,6 +109,7 @@ def __init__( self.tags = tags or [] self.formatter = formatter self.separator = separator + self.color_map = color_map self.show_submit_button = show_submit_button self.show_copy_button = show_copy_button if isinstance(value, Callable): diff --git a/js/dialogue/Dialogue.svelte b/js/dialogue/Dialogue.svelte index a74aaec526..b509c270f7 100644 --- a/js/dialogue/Dialogue.svelte +++ b/js/dialogue/Dialogue.svelte @@ -21,6 +21,7 @@ export let max_lines: number | undefined = undefined; export let show_copy_button = false; export let show_submit_button = true; + export let color_map: Record | null = null; let checked = false; export let server: { @@ -41,28 +42,44 @@ let copied = false; let timer: any; let textbox_value = ""; - - // Default color palette for speakers - const defaultColors = [ - "#fef3c7", // amber-100 - "#dbeafe", // blue-100 - "#d1fae5", // emerald-100 - "#fce7f3", // pink-100 - "#e9d5ff", // purple-100 - "#fed7aa", // orange-100 - "#e0e7ff", // indigo-100 - "#cffafe", // cyan-100 - "#f3e8ff", // violet-100 - "#fecaca" // red-100 + + const defaultColorNames = [ + "red", + "green", + "blue", + "yellow", + "purple", + "teal", + "orange", + "cyan", + "lime", + "pink" ]; - - // Create color mapping for speakers + + const colorNameToHex: Record = { + red: "rgba(254, 202, 202, 0.7)", + green: "rgba(209, 250, 229, 0.7)", + blue: "rgba(219, 234, 254, 0.7)", + yellow: "rgba(254, 243, 199, 0.7)", + purple: "rgba(233, 213, 255, 0.7)", + teal: "rgba(204, 251, 241, 0.7)", + orange: "rgba(254, 215, 170, 0.7)", + cyan: "rgba(207, 250, 254, 0.7)", + lime: "rgba(217, 249, 157, 0.7)", + pink: "rgba(252, 231, 243, 0.7)" + }; + let speakerColors: Record = {}; $: { - speakerColors = {}; - speakers.forEach((speaker, index) => { - speakerColors[speaker] = defaultColors[index % defaultColors.length]; - }); + if (color_map) { + speakerColors = { ...color_map }; + } else { + speakerColors = {}; + speakers.forEach((speaker, index) => { + const colorName = defaultColorNames[index % defaultColorNames.length]; + speakerColors[speaker] = colorNameToHex[colorName]; + }); + } } if (speakers.length === 0) { @@ -526,7 +543,11 @@ {#if !checked}
{#each dialogue_lines as line, i} -
+
Date: Mon, 4 Aug 2025 13:48:54 -0700 Subject: [PATCH 68/70] changes --- demo/diff_texts/run.ipynb | 2 +- demo/diff_texts/run.py | 4 ++-- js/dialogue/Dialogue.svelte | 15 ++++++++++++--- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/demo/diff_texts/run.ipynb b/demo/diff_texts/run.ipynb index e1893c54bf..2e5d180f5b 100644 --- a/demo/diff_texts/run.ipynb +++ b/demo/diff_texts/run.ipynb @@ -1 +1 @@ -{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: diff_texts"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["from difflib import Differ\n", "\n", "import gradio as gr\n", "\n", "def diff_texts(text1, text2):\n", " d = Differ()\n", " return [\n", " (token[2:], token[0] if token[0] != \" \" else None)\n", " for token in d.compare(text1, text2)\n", " ]\n", "\n", "demo = gr.Interface(\n", " diff_texts,\n", " [\n", " gr.Textbox(\n", " label=\"Text 1\",\n", " info=\"Initial text\",\n", " lines=3,\n", " value=\"The quick brown fox jumped over the lazy dogs.\",\n", " ),\n", " gr.Textbox(\n", " label=\"Text 2\",\n", " info=\"Text to compare\",\n", " lines=3,\n", " value=\"The fast brown fox jumps over lazy dogs.\",\n", " ),\n", " ],\n", " gr.HighlightedText(\n", " label=\"Diff\",\n", " combine_adjacent=True,\n", " show_legend=True,\n", " ),\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: diff_texts"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["from difflib import Differ\n", "\n", "import gradio as gr\n", "\n", "def diff_texts(text1, text2):\n", " d = Differ()\n", " return [\n", " (token[2:], token[0] if token[0] != \" \" else None)\n", " for token in d.compare(text1, text2)\n", " ]\n", "\n", "demo = gr.Interface(\n", " diff_texts,\n", " [\n", " gr.Textbox(\n", " label=\"Text 1\",\n", " info=\"Initial text\",\n", " lines=3,\n", " value=\"The quick brown fox jumped over the lazy dogs.\",\n", " ),\n", " gr.Textbox(\n", " label=\"Text 2\",\n", " info=\"Text to compare\",\n", " lines=3,\n", " value=\"The fast brown fox jumps over lazy dogs.\",\n", " ),\n", " ],\n", " gr.HighlightedText(\n", " label=\"Diff\",\n", " combine_adjacent=True,\n", " show_legend=True,\n", " color_map={\"+\": \"red\", \"-\": \"green\"}),\n", " theme=gr.themes.Base()\n", ")\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/diff_texts/run.py b/demo/diff_texts/run.py index a83f2da580..7475a49190 100644 --- a/demo/diff_texts/run.py +++ b/demo/diff_texts/run.py @@ -29,8 +29,8 @@ def diff_texts(text1, text2): label="Diff", combine_adjacent=True, show_legend=True, - ), + color_map={"+": "red", "-": "green"}), + theme=gr.themes.Base() ) - if __name__ == "__main__": demo.launch() diff --git a/js/dialogue/Dialogue.svelte b/js/dialogue/Dialogue.svelte index b509c270f7..df989f6873 100644 --- a/js/dialogue/Dialogue.svelte +++ b/js/dialogue/Dialogue.svelte @@ -42,6 +42,7 @@ let copied = false; let timer: any; let textbox_value = ""; + let hoveredSpeaker: string | null = null; const defaultColorNames = [ "red", @@ -545,10 +546,16 @@ {#each dialogue_lines as line, i}
-
+
(hoveredSpeaker = line.speaker)} + on:mouseleave={() => (hoveredSpeaker = null)} + > update_line(i, "speaker", line.speaker)} @@ -755,6 +762,7 @@ .speaker-column :global(.wrap) { background-color: var(--speaker-bg-color) !important; border-radius: var(--radius-sm); + transition: background-color 0.2s ease; } .speaker-column :global(.wrap input) { @@ -773,6 +781,7 @@ border-radius: var(--radius-sm); color: var(--body-text-color); background: var(--speaker-bg-color); + transition: background-color 0.2s ease; height: auto; min-height: 30px; max-height: none; From 187eb9b206e8363c9c25dc0125889f859a4e041d Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Mon, 4 Aug 2025 13:54:44 -0700 Subject: [PATCH 69/70] add pytests --- js/dialogue/Dialogue.svelte | 2 + test/components/test_dialogue.py | 138 +++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 test/components/test_dialogue.py diff --git a/js/dialogue/Dialogue.svelte b/js/dialogue/Dialogue.svelte index df989f6873..527a564bc7 100644 --- a/js/dialogue/Dialogue.svelte +++ b/js/dialogue/Dialogue.svelte @@ -553,6 +553,8 @@ >
(hoveredSpeaker = line.speaker)} on:mouseleave={() => (hoveredSpeaker = null)} > diff --git a/test/components/test_dialogue.py b/test/components/test_dialogue.py new file mode 100644 index 0000000000..2c80ebb4c3 --- /dev/null +++ b/test/components/test_dialogue.py @@ -0,0 +1,138 @@ +import gradio as gr +from gradio.components.dialogue import DialogueLine + + +class TestDialogue: + def test_component_functions(self): + """ + Test preprocess, postprocess, and basic functionality + """ + dialogue = gr.Dialogue(speakers=["Speaker 1", "Speaker 2"]) + + dialogue_data = [ + DialogueLine(speaker="Speaker 1", text="Hello there!"), + DialogueLine(speaker="Speaker 2", text="Hi, how are you?"), + ] + + preprocessed = dialogue.preprocess(gr.Dialogue.data_model(root=dialogue_data)) + assert preprocessed == "[Speaker 1] Hello there! [Speaker 2] Hi, how are you?" + + postprocessed = dialogue.postprocess( + [ + {"speaker": "Speaker 1", "text": "Hello there!"}, + {"speaker": "Speaker 2", "text": "Hi, how are you?"}, + ] + ) + assert postprocessed is not None + assert isinstance(postprocessed.root, list) + assert len(postprocessed.root) == 2 + assert postprocessed.root[0].speaker == "Speaker 1" + assert postprocessed.root[0].text == "Hello there!" + + postprocessed_str = dialogue.postprocess("Hello world") + assert postprocessed_str is not None + assert isinstance(postprocessed_str.root, str) + assert postprocessed_str.root == "Hello world" + + assert dialogue.postprocess(None) is None + + def test_dialogue_with_tags(self): + """ + Test dialogue with tags parameter + """ + dialogue = gr.Dialogue( + speakers=["Agent", "Customer"], + tags=["greeting", "question", "answer", "closing"], + ) + + assert dialogue.tags == ["greeting", "question", "answer", "closing"] + assert dialogue.speakers == ["Agent", "Customer"] + + def test_dialogue_with_color_map(self): + """ + Test dialogue with custom color map + """ + color_map = {"Speaker 1": "#ff0000", "Speaker 2": "#00ff00"} + dialogue = gr.Dialogue(speakers=["Speaker 1", "Speaker 2"], color_map=color_map) + + assert dialogue.color_map == color_map + + def test_dialogue_with_formatter(self): + """ + Test dialogue with custom formatter + """ + + def custom_formatter(speaker, text): + return f"{speaker}: {text}" + + dialogue = gr.Dialogue(speakers=["Alice", "Bob"], formatter=custom_formatter) + + dialogue_data = [ + DialogueLine(speaker="Alice", text="Hello!"), + DialogueLine(speaker="Bob", text="Hi there!"), + ] + + preprocessed = dialogue.preprocess(gr.Dialogue.data_model(root=dialogue_data)) + assert preprocessed == "Alice: Hello! Bob: Hi there!" + + def test_dialogue_without_speakers(self): + """ + Test dialogue without speakers (plain text mode) + """ + dialogue = gr.Dialogue(speakers=None) + + assert dialogue.speakers is None + + preprocessed = dialogue.preprocess( + gr.Dialogue.data_model(root="Just some text") + ) + assert preprocessed == "Just some text" + + def test_get_config(self): + """ + Test get_config returns expected configuration + """ + dialogue = gr.Dialogue( + speakers=["A", "B"], + label="Test Dialogue", + show_copy_button=True, + max_lines=10, + ) + + config = dialogue.get_config() + assert config["speakers"] == ["A", "B"] + assert config["label"] == "Test Dialogue" + assert config["show_copy_button"] is True + assert config["max_lines"] == 10 + assert config["name"] == "dialogue" + + def test_dialogue_separator(self): + """ + Test dialogue with custom separator + """ + dialogue = gr.Dialogue(speakers=["A", "B"], separator="\n") + + dialogue_data = [ + DialogueLine(speaker="A", text="First line"), + DialogueLine(speaker="B", text="Second line"), + ] + + preprocessed = dialogue.preprocess(gr.Dialogue.data_model(root=dialogue_data)) + assert preprocessed == "[A] First line\n[B] Second line" + + def test_example_value(self): + """ + Test example_value and as_example methods + """ + dialogue = gr.Dialogue(speakers=["Speaker 1", "Speaker 2"]) + + example = dialogue.example_value() + assert isinstance(example, list) + assert len(example) == 2 + assert example[0]["speaker"] == "Speaker 1" + assert example[0]["text"] == "Hello, how are you?" + + example_str = dialogue.as_example(example) + assert isinstance(example_str, str) + assert "Speaker 1" in example_str + assert "Hello, how are you?" in example_str From db7460a5c258ab01af90c6416c453d664c3e1c27 Mon Sep 17 00:00:00 2001 From: Abubakar Abid Date: Mon, 4 Aug 2025 14:29:16 -0700 Subject: [PATCH 70/70] dark mode fixes --- gradio/components/dialogue.py | 2 +- js/dialogue/Dialogue.svelte | 74 +++++++++++++++++++++++++---------- 2 files changed, 55 insertions(+), 21 deletions(-) diff --git a/gradio/components/dialogue.py b/gradio/components/dialogue.py index e2d6e94d7b..3e38b127bb 100644 --- a/gradio/components/dialogue.py +++ b/gradio/components/dialogue.py @@ -67,7 +67,7 @@ def __init__( formatter: A function that formats the dialogue line dictionary, e.g. {"speaker": "Speaker 1", "text": "Hello, how are you?"} into a string, e.g. "Speaker 1: Hello, how are you?". This function is run on user input and the resulting string is passed into the prediction function. tags: The different tags allowed in the dialogue. Tags are displayed in an autocomplete menu below the input textbox when the user starts typing `:`. Use the exact tag name expected by the AI model or inference function. separator: The separator between the different dialogue lines used to join the formatted dialogue lines into a single string. For example, a newline character or empty string. - color_map: A dictionary mapping speaker names to colors. The colors may be specified as hex codes or by their names. For example: {"Speaker 1": "red", "Speaker 2": "#FFEE22"}. If not provided, default colors will be assigned to speakers. + color_map: A dictionary mapping speaker names to colors. The colors may be specified as hex codes or by their names. For example: {"Speaker 1": "red", "Speaker 2": "#FFEE22"}. If not provided, default colors will be assigned to speakers. This is only used if `interactive` is False. max_lines: maximum number of lines allowed in the dialogue. placeholder: placeholder hint to provide behind textarea. label: the label for this component, displayed above the component if `show_label` is `True` and is also used as the header if there are a table of examples for this component. If None and used in a `gr.Interface`, the label will be the name of the parameter this component corresponds to. diff --git a/js/dialogue/Dialogue.svelte b/js/dialogue/Dialogue.svelte index 527a564bc7..e130073142 100644 --- a/js/dialogue/Dialogue.svelte +++ b/js/dialogue/Dialogue.svelte @@ -546,8 +546,8 @@ {#each dialogue_lines as line, i}
@@ -555,18 +555,26 @@ class="speaker-column" role="button" tabindex="0" - on:mouseenter={() => (hoveredSpeaker = line.speaker)} - on:mouseleave={() => (hoveredSpeaker = null)} + on:mouseenter={() => disabled && (hoveredSpeaker = line.speaker)} + on:mouseleave={() => disabled && (hoveredSpeaker = null)} > - update_line(i, "speaker", line.speaker)} - {disabled} - choices={speakers.map((s) => [s, s])} - show_label={false} - container={true} - label={""} - /> + {#if disabled} +