Skip to content

Add gr.Dialogue component #11092

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 43 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
cab4441
Commit
freddyaboulton Apr 28, 2025
9d3fccf
add changeset
gradio-pr-bot Apr 28, 2025
a56dd56
Add code
freddyaboulton Apr 28, 2025
632535c
rename
freddyaboulton Apr 28, 2025
d18c6bd
lint
freddyaboulton Apr 28, 2025
2638540
lint
freddyaboulton Apr 28, 2025
a91c861
typecheck
freddyaboulton Apr 28, 2025
ea455c5
Add code
freddyaboulton Apr 29, 2025
c26e950
change to github url (#11083)
aliabd Apr 25, 2025
5a38378
chore: update versions (#11077)
gradio-pr-bot Apr 25, 2025
32616c8
fix: ensure all translation files work as expected (#11088)
Col0ring Apr 27, 2025
d8fd6c8
Fix scaling issue when setting height in Image component (#11091)
freddyaboulton Apr 28, 2025
3e87f24
Update client.py to always send file data, even for files without ext…
edmcman Apr 28, 2025
2199902
chore: update versions (#11089)
gradio-pr-bot Apr 28, 2025
f6537b7
Adding requirements for mcp (#11096)
abidlabs Apr 29, 2025
bcb4a61
changes
abidlabs Apr 29, 2025
8999bdd
Fix #10320: Chatbot - Ensure all messages in a group are editable (#1…
eduardopalricas33 Apr 29, 2025
7a0156b
Let Gradio apps also be MCP Servers (#10984)
abidlabs Apr 29, 2025
a4b6b1b
chore: update versions (#11100)
gradio-pr-bot Apr 29, 2025
fd6f4eb
switch to correct link (#11104)
aliabd Apr 30, 2025
bc86fb1
Fix #10281: Dragging image replaces existing instead of opening new t…
Martim-Rito Apr 30, 2025
d6266b9
Update MCP docs (#11106)
abidlabs Apr 30, 2025
50c5404
Fix file upload progress (#11102)
freddyaboulton Apr 30, 2025
8e64b7f
Add `openapi.json` route for Gradio apps as well as docs on the "view…
abidlabs May 1, 2025
9cdc4ac
Fix gradio cc environment (#11111)
freddyaboulton May 1, 2025
3f328f9
Fix passing datetime.datetime instance to gr.Datetime (#11112)
freddyaboulton May 1, 2025
60e8467
update STDIO instructions to specify sse-only transport (#11115)
evalstate May 1, 2025
73737ac
Docs mcp server (#11120)
aliabd May 1, 2025
3b435cb
Fix markdown change event (#11113)
freddyaboulton May 2, 2025
a523deb
Replace invalid characters in MCP tool name (#11119)
abidlabs May 2, 2025
6418c4c
chore: update versions (#11107)
gradio-pr-bot May 2, 2025
e1fda40
Ocean theme quickfix (#11129)
aliabid94 May 5, 2025
6038e0b
Fix python client SSE decoding issue (#11172)
freddyaboulton May 12, 2025
57c6582
Adds docs for `gr.api()` which were previously missing from the websi…
abidlabs May 12, 2025
055b2fb
Fix builtin functions for gr.State value (#11151)
freddyaboulton May 12, 2025
14ff6ed
Fix "delete" diff instruction in JS client (#11147)
freddyaboulton May 12, 2025
2578be6
chore: Support Path type for the favicon (#11159)
wdroz May 12, 2025
b8408ce
Remove warning from gr.File about specifying file_types when file_cou…
freddyaboulton May 12, 2025
b714ccd
fix changelogs
abidlabs May 13, 2025
9eddc37
Parallelize pytests (#11182)
abidlabs May 13, 2025
56c53d7
Add code
freddyaboulton May 13, 2025
54c8f2d
Merge branch 'main' into dialogue-component
freddyaboulton May 13, 2025
ed79ab4
empty
freddyaboulton May 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changeset/spotty-sides-sneeze.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"@gradio/dialogue": minor
"@gradio/dropdown": minor
"gradio": minor
---

feat:Add gr.Dialogue component
1 change: 1 addition & 0 deletions demo/dia_dialogue_demo/run.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dia_dialogue_demo"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import httpx\n", "\n", "\n", "emotions = [\n", " \"(laughs)\",\n", " \"(clears throat)\",\n", " \"(sighs)\",\n", " \"(gasps)\",\n", " \"(coughs)\",\n", " \"(singing)\",\n", " \"(sings)\",\n", " \"(mumbles)\",\n", " \"(beep)\",\n", " \"(groans)\",\n", " \"(sniffs)\",\n", " \"(claps)\",\n", " \"(screams)\",\n", " \"(inhales)\",\n", " \"(exhales)\",\n", " \"(applause)\",\n", " \"(burps)\",\n", " \"(humming)\",\n", " \"(sneezes)\",\n", " \"(chuckle)\",\n", " \"(whistles)\",\n", "]\n", "speakers = [\"Speaker 1\", \"Speaker 2\"]\n", "\n", "client = httpx.AsyncClient(timeout=180)\n", "API_URL = \"https://router.huggingface.co/fal-ai/fal-ai/dia-tts\"\n", "\n", "\n", "async def query(dialogue: str, token: gr.OAuthToken | None):\n", " if token is None:\n", " raise gr.Error(\n", " \"No token provided. Use Sign in with Hugging Face to get a token.\"\n", " )\n", " headers = {\n", " \"Authorization\": f\"Bearer {token.token}\",\n", " }\n", " response = await client.post(API_URL, headers=headers, json={\"text\": dialogue})\n", " url = response.json()[\"audio\"][\"url\"]\n", " print(\"URL: \", url)\n", " return url\n", "\n", "\n", "def formatter(speaker, text):\n", " speaker = speaker.split(\" \")[1]\n", " return f\"[S{speaker}] {text}\"\n", "\n", "\n", "with gr.Blocks() as demo:\n", " with gr.Sidebar():\n", " login_button = gr.LoginButton()\n", " gr.HTML(\n", " \"\"\"\n", " <h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>\n", " <img src=\"https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/dancing_huggy.gif\" alt=\"Dancing Huggy\" style=\"height: 100px; margin-right: 10px\"> Dia Dialogue Generation Model\n", " </h1>\n", " <h2 style='text-align: center; display: flex; align-items: center; justify-content: center;'>Model by <a href=\"https://huggingface.co/nari-labs/Dia-1.6B\"> Nari Labs</a>. Powered by HF and <a href=\"https://fal.ai/\">Fal AI</a> API.</h2>\n", " <h3>Dia is a dialogue generation model that can generate realistic dialogue between two speakers. Use the dialogue component to create a conversation and then hit the submit button in the bottom right corner to see it come to life .</h3>\n", " \"\"\"\n", " )\n", " with gr.Row():\n", " with gr.Column():\n", " dialogue = gr.Dialogue(\n", " speakers=speakers, emotions=emotions, formatter=formatter\n", " )\n", " with gr.Column():\n", " with gr.Row():\n", " audio = gr.Audio(label=\"Audio\")\n", " with gr.Row():\n", " gr.DeepLinkButton(value=\"Share Audio via Link\")\n", " with gr.Row():\n", " gr.Examples(\n", " examples=[\n", " [\n", " [\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"Why did the chicken cross the road?\",\n", " },\n", " {\"speaker\": \"Speaker 2\", \"text\": \"I don't know!\"},\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"to get to the other side! (laughs)\",\n", " },\n", " ]\n", " ],\n", " [\n", " [\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"I am a little tired today (sighs).\",\n", " },\n", " {\"speaker\": \"Speaker 2\", \"text\": \"Hang in there!\"},\n", " ]\n", " ],\n", " ],\n", " inputs=[dialogue],\n", " cache_examples=False,\n", " )\n", "\n", " dialogue.submit(query, [dialogue], audio)\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
108 changes: 108 additions & 0 deletions demo/dia_dialogue_demo/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import gradio as gr
import httpx


emotions = [
"(laughs)",
"(clears throat)",
"(sighs)",
"(gasps)",
"(coughs)",
"(singing)",
"(sings)",
"(mumbles)",
"(beep)",
"(groans)",
"(sniffs)",
"(claps)",
"(screams)",
"(inhales)",
"(exhales)",
"(applause)",
"(burps)",
"(humming)",
"(sneezes)",
"(chuckle)",
"(whistles)",
]
speakers = ["Speaker 1", "Speaker 2"]

client = httpx.AsyncClient(timeout=180)
API_URL = "https://router.huggingface.co/fal-ai/fal-ai/dia-tts"


async def query(dialogue: str, token: gr.OAuthToken | None):
if token is None:
raise gr.Error(
"No token provided. Use Sign in with Hugging Face to get a token."
)
headers = {
"Authorization": f"Bearer {token.token}",
}
response = await client.post(API_URL, headers=headers, json={"text": dialogue})
url = response.json()["audio"]["url"]
print("URL: ", url)
return url


def formatter(speaker, text):
speaker = speaker.split(" ")[1]
return f"[S{speaker}] {text}"


with gr.Blocks() as demo:
with gr.Sidebar():
login_button = gr.LoginButton()
gr.HTML(
"""
<h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>
<img src="https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/dancing_huggy.gif" alt="Dancing Huggy" style="height: 100px; margin-right: 10px"> Dia Dialogue Generation Model
</h1>
<h2 style='text-align: center; display: flex; align-items: center; justify-content: center;'>Model by <a href="https://huggingface.co/nari-labs/Dia-1.6B"> Nari Labs</a>. Powered by HF and <a href="https://fal.ai/">Fal AI</a> API.</h2>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For some reason I'm not seeing spaces before and after the links:

image
Suggested change
<h2 style='text-align: center; display: flex; align-items: center; justify-content: center;'>Model by <a href="https://huggingface.co/nari-labs/Dia-1.6B"> Nari Labs</a>. Powered by HF and <a href="https://fal.ai/">Fal AI</a> API.</h2>
<h2 style='text-align: center; display: flex; align-items: center; justify-content: center;'>Model by &nbsp;<a href="https://huggingface.co/nari-labs/Dia-1.6B"> Nari Labs</a>. Powered by HF and &nbsp; <a href="https://fal.ai/">Fal AI</a>&nbsp; API.</h2>

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really great demo! Btw for some reason I'm getting some strange outputs. The output audio is always exactly 30 seconds long and has long periods of silence and other artifacts. Not sure if it's an issue with their API or something with our preprocessing:

image

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's the Fal API. The zero-gpu demo in their org is a lot better.

<h3>Dia is a dialogue generation model that can generate realistic dialogue between two speakers. Use the dialogue component to create a conversation and then hit the submit button in the bottom right corner to see it come to life .</h3>
"""
)
with gr.Row():
with gr.Column():
dialogue = gr.Dialogue(
speakers=speakers, emotions=emotions, formatter=formatter
)
with gr.Column():
with gr.Row():
audio = gr.Audio(label="Audio")
with gr.Row():
gr.DeepLinkButton(value="Share Audio via Link")
with gr.Row():
gr.Examples(
examples=[
[
[
{
"speaker": "Speaker 1",
"text": "Why did the chicken cross the road?",
},
{"speaker": "Speaker 2", "text": "I don't know!"},
{
"speaker": "Speaker 1",
"text": "to get to the other side! (laughs)",
},
]
],
[
[
{
"speaker": "Speaker 1",
"text": "I am a little tired today (sighs).",
},
{"speaker": "Speaker 2", "text": "Hang in there!"},
]
],
],
inputs=[dialogue],
cache_examples=False,
)

dialogue.submit(query, [dialogue], audio)

if __name__ == "__main__":
demo.launch()
2 changes: 2 additions & 0 deletions gradio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
Dataset,
DateTime,
DeepLinkButton,
Dialogue,
DownloadButton,
Dropdown,
DuplicateButton,
Expand Down Expand Up @@ -145,6 +146,7 @@
"Dataframe",
"Dataset",
"DateTime",
"Dialogue",
"DeletedFileData",
"DownloadButton",
"DownloadData",
Expand Down
2 changes: 2 additions & 0 deletions gradio/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from gradio.components.dataset import Dataset
from gradio.components.datetime import DateTime
from gradio.components.deep_link_button import DeepLinkButton
from gradio.components.dialogue import Dialogue
from gradio.components.download_button import DownloadButton
from gradio.components.dropdown import Dropdown
from gradio.components.duplicate_button import DuplicateButton
Expand Down Expand Up @@ -78,6 +79,7 @@
"Dataframe",
"DataFrame",
"Dataset",
"Dialogue",
"DownloadButton",
"DuplicateButton",
"Fallback",
Expand Down
155 changes: 155 additions & 0 deletions gradio/components/dialogue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
from __future__ import annotations

from collections.abc import Callable

from gradio.components.base import server
from gradio.components.textbox import Textbox
from gradio.data_classes import GradioModel, GradioRootModel
from gradio.events import Events


class DialogueLine(GradioModel):
speaker: str
text: str


class DialogueModel(GradioRootModel):
root: list[DialogueLine] | str


class Dialogue(Textbox):
Copy link
Member

@abidlabs abidlabs May 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the benefit of inherting from gr.Textbox? If there's not a strong benefit, consider keeping separate as it couples these components in way that we might need to change later.

"""
Creates a dialogue components for users to enter dialogue between speakers.

Demos: dia_dialogue_demo
"""

EVENTS = [
Events.change,
Events.input,
Events.submit,
]

data_model = DialogueModel

def __init__(
self,
value: list[dict[str, str]] | Callable | None = None,
*,
speakers: list[str] | None = None,
formatter: Callable | None = None,
emotions: list[str] | None = None,
separator: str = " ",
label: str | None = "Dialogue",
info: str
| None = "Type colon (:) in the dialogue line to see the available emotion and intonation tags",
placeholder: str | None = "Enter dialogue here...",
Comment on lines +44 to +46
Copy link
Member

@abidlabs abidlabs May 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really want to have default values for info and placeholder? For consistency with other components, it may be better to leave as None

show_label: bool | None = None,
container: bool = True,
scale: int | None = None,
min_width: int = 160,
interactive: bool | None = None,
visible: bool = True,
elem_id: str | None = None,
autofocus: bool = False,
autoscroll: bool = True,
elem_classes: list[str] | str | None = None,
render: bool = True,
key: int | str | None = None,
max_lines: int | None = None,
show_submit_button: bool = True,
show_copy_button: bool = True,
):
"""
Parameters:
value: Value of the dialogue. It is a list of dictionaries, each containing a 'speaker' key and a 'text' key. If a function is provided, the function will be called each time the app loads to set the initial value of this component.
speakers: The different speakers allowed in the dialogue.
formatter: A function that formats the dialogue line dictionary, e.g. {"speaker": "Speaker 1", "text": "Hello, how are you?"} into a string, e.g. "Speaker 1: Hello, how are you?".
emotions: The different emotions and intonation allowed in the dialogue. Emotions are displayed in an autocomplete menu below the input textbox when the user starts typing `:`. Use the exact emotion name expected by the AI model or inference function.
Copy link
Member

@abidlabs abidlabs May 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as discussed in slack somewhere, consider a more general name for this parameter like completions or tags

Suggested change
emotions: The different emotions and intonation allowed in the dialogue. Emotions are displayed in an autocomplete menu below the input textbox when the user starts typing `:`. Use the exact emotion name expected by the AI model or inference function.
emotions: The different emotions and intonation allowed in the dialogue. Emotions are displayed in an autocomplete menu below the input textbox when the user starts typing `:`. Use the exact emotion name expected by the AI model or inference function.

separator: The separator between the different dialogue lines used to join the formatted dialogue lines into a single string. For example, a newline character or empty string.
max_lines: maximum number of lines allowed in the dialogue.
placeholder: placeholder hint to provide behind textarea.
label: the label for this component, displayed above the component if `show_label` is `True` and is also used as the header if there are a table of examples for this component. If None and used in a `gr.Interface`, the label will be the name of the parameter this component corresponds to.
show_label: if True, will display the label. If False, the copy button is hidden as well as well as the label.
container: if True, will place the component in a container - providing some extra padding around the border.
scale: relative size compared to adjacent Components. For example if Components A and B are in a Row, and A has scale=2, and B has scale=1, A will be twice as wide as B. Should be an integer. scale applies in Rows, and to top-level Components in Blocks where fill_height=True.
min_width: minimum pixel width, will wrap if not sufficient screen space to satisfy this value. If a certain scale value results in this Component being narrower than min_width, the min_width parameter will be respected first.
interactive: if True, will be rendered as an editable textbox; if False, editing will be disabled. If not provided, this is inferred based on whether the component is used as an input or output.
visible: If False, component will be hidden.
autofocus: If True, will focus on the textbox when the page loads. Use this carefully, as it can cause usability issues for sighted and non-sighted users.
elem_id: An optional string that is assigned as the id of this component in the HTML DOM. Can be used for targeting CSS styles.
elem_classes: An optional list of strings that are assigned as the classes of this component in the HTML DOM. Can be used for targeting CSS styles.
render: If False, component will not render be rendered in the Blocks context. Should be used if the intention is to assign event listeners now but render the component later.
key: if assigned, will be used to assume identity across a re-render. Components that have the same key across a re-render will have their value preserved.
show_copy_button: If True, includes a copy button to copy the text in the textbox. Only applies if show_label is True.
show_submit_button: If True, includes a submit button to submit the dialogue.
autoscroll: If True, will automatically scroll to the bottom of the textbox when the value changes, unless the user scrolls up. If False, will not scroll to the bottom of the textbox when the value changes.
"""
super().__init__(
value="",
label=label,
info=info,
placeholder=placeholder,
show_label=show_label,
container=container,
scale=scale,
min_width=min_width,
interactive=interactive,
visible=visible,
elem_id=elem_id,
autofocus=autofocus,
autoscroll=autoscroll,
elem_classes=elem_classes,
render=render,
key=key,
max_lines=max_lines,
)
self.speakers = speakers
self.emotions = emotions or []
self.formatter = formatter
self.separator = separator
self.show_submit_button = show_submit_button
self.show_copy_button = show_copy_button
if isinstance(value, Callable):
value = value()
self.value = (
self.preprocess(DialogueModel(root=value)) if value is not None else value # type: ignore
)

def preprocess(self, payload: DialogueModel) -> str: # type: ignore
formatter = self.formatter
if not formatter:
formatter = self.default_formatter
if isinstance(payload.root, str):
return payload.root
return self.separator.join(
[formatter(line.speaker, line.text) for line in payload.root]
)

@staticmethod
def default_formatter(speaker: str, text: str) -> str:
return f"[{speaker}] {text}"

@server
async def format(self, value: list[dict]):
"""Format the dialogue in the frontend into a string that's copied to the clipboard."""
data = DialogueModel(root=value) # type: ignore
return self.preprocess(data)

def postprocess(self, value):
return value

def as_example(self, value):
return self.preprocess(DialogueModel(root=value))

def example_payload(self):
return [
{"speaker": "Speaker 1", "text": "Hello, how are you?"},
{"speaker": "Speaker 2", "text": "I'm fine, thank you!"},
]

def example_value(self):
return [
{"speaker": "Speaker 1", "text": "Hello, how are you?"},
{"speaker": "Speaker 2", "text": "I'm fine, thank you!"},
]
2 changes: 1 addition & 1 deletion gradio/stubs/anyio.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
""" This module contains type hints for the anyio library. It was auto-generated so may include errors."""
from typing import Any, Callable, Coroutine, TypeVar, overload, Optional, Union
from types import TracebackType
from typing import Any, Callable, Coroutine, Optional, TypeVar, Union, overload

T = TypeVar('T')
T_Retval = TypeVar('T_Retval')
Expand Down
Loading
Loading