diff --git a/.changeset/spotty-sides-sneeze.md b/.changeset/spotty-sides-sneeze.md new file mode 100644 index 0000000000..6f4c09999f --- /dev/null +++ b/.changeset/spotty-sides-sneeze.md @@ -0,0 +1,7 @@ +--- +"@gradio/dialogue": minor +"@gradio/dropdown": minor +"gradio": minor +--- + +feat:Add gr.Dialogue component diff --git a/demo/dia_dialogue_demo/run.ipynb b/demo/dia_dialogue_demo/run.ipynb new file mode 100644 index 0000000000..1b182627aa --- /dev/null +++ b/demo/dia_dialogue_demo/run.ipynb @@ -0,0 +1 @@ +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dia_dialogue_demo"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import httpx\n", "\n", "\n", "tags = [\n", " \"(laughs)\",\n", " \"(clears throat)\",\n", " \"(sighs)\",\n", " \"(gasps)\",\n", " \"(coughs)\",\n", " \"(singing)\",\n", " \"(sings)\",\n", " \"(mumbles)\",\n", " \"(beep)\",\n", " \"(groans)\",\n", " \"(sniffs)\",\n", " \"(claps)\",\n", " \"(screams)\",\n", " \"(inhales)\",\n", " \"(exhales)\",\n", " \"(applause)\",\n", " \"(burps)\",\n", " \"(humming)\",\n", " \"(sneezes)\",\n", " \"(chuckle)\",\n", " \"(whistles)\",\n", "]\n", "speakers = [\"Speaker 1\", \"Speaker 2\"]\n", "\n", "client = httpx.AsyncClient(timeout=180)\n", "API_URL = \"https://router.huggingface.co/fal-ai/fal-ai/dia-tts\"\n", "\n", "\n", "async def query(dialogue: str, token: gr.OAuthToken | None):\n", " if token is None:\n", " raise gr.Error(\n", " \"No token provided. Use Sign in with Hugging Face to get a token.\"\n", " )\n", " headers = {\n", " \"Authorization\": f\"Bearer {token.token}\",\n", " }\n", " response = await client.post(API_URL, headers=headers, json={\"text\": dialogue})\n", " url = response.json()[\"audio\"][\"url\"]\n", " print(\"URL: \", url)\n", " return url\n", "\n", "\n", "def formatter(speaker, text):\n", " speaker = speaker.split(\" \")[1]\n", " return f\"[S{speaker}] {text}\"\n", "\n", "\n", "with gr.Blocks() as demo:\n", " with gr.Sidebar():\n", " login_button = gr.LoginButton()\n", " gr.HTML(\n", " \"\"\"\n", "

\n", " \"Dancing Dia Dialogue Generation Model\n", "

\n", "

Model by   Nari Labs. Powered by HF and   Fal AI  API.

\n", "

Dia is a dialogue generation model that can generate realistic dialogue between two speakers. Use the dialogue component to create a conversation and then hit the submit button in the bottom right corner to see it come to life .

\n", " \"\"\"\n", " )\n", " with gr.Row():\n", " with gr.Column():\n", " dialogue = gr.Dialogue(\n", " speakers=speakers, tags=tags, formatter=formatter\n", " )\n", " with gr.Column():\n", " with gr.Row():\n", " audio = gr.Audio(label=\"Audio\")\n", " with gr.Row():\n", " gr.DeepLinkButton(value=\"Share Audio via Link\")\n", " with gr.Row():\n", " gr.Examples(\n", " examples=[\n", " [\n", " [\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"Why did the chicken cross the road?\",\n", " },\n", " {\"speaker\": \"Speaker 2\", \"text\": \"I don't know!\"},\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"to get to the other side! (laughs)\",\n", " },\n", " ]\n", " ],\n", " [\n", " [\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"I am a little tired today (sighs).\",\n", " },\n", " {\"speaker\": \"Speaker 2\", \"text\": \"Hang in there!\"},\n", " ]\n", " ],\n", " ],\n", " inputs=[dialogue],\n", " cache_examples=False,\n", " )\n", "\n", " dialogue.submit(query, [dialogue], audio)\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/dia_dialogue_demo/run.py b/demo/dia_dialogue_demo/run.py new file mode 100644 index 0000000000..5e6aa47273 --- /dev/null +++ b/demo/dia_dialogue_demo/run.py @@ -0,0 +1,108 @@ +import gradio as gr +import httpx + + +tags = [ + "(laughs)", + "(clears throat)", + "(sighs)", + "(gasps)", + "(coughs)", + "(singing)", + "(sings)", + "(mumbles)", + "(beep)", + "(groans)", + "(sniffs)", + "(claps)", + "(screams)", + "(inhales)", + "(exhales)", + "(applause)", + "(burps)", + "(humming)", + "(sneezes)", + "(chuckle)", + "(whistles)", +] +speakers = ["Speaker 1", "Speaker 2"] + +client = httpx.AsyncClient(timeout=180) +API_URL = "https://router.huggingface.co/fal-ai/fal-ai/dia-tts" + + +async def query(dialogue: str, token: gr.OAuthToken | None): + if token is None: + raise gr.Error( + "No token provided. Use Sign in with Hugging Face to get a token." + ) + headers = { + "Authorization": f"Bearer {token.token}", + } + response = await client.post(API_URL, headers=headers, json={"text": dialogue}) + url = response.json()["audio"]["url"] + print("URL: ", url) + return url + + +def formatter(speaker, text): + speaker = speaker.split(" ")[1] + return f"[S{speaker}] {text}" + + +with gr.Blocks() as demo: + with gr.Sidebar(): + login_button = gr.LoginButton() + gr.HTML( + """ +

+ Dancing Huggy Dia Dialogue Generation Model +

+

Model by   Nari Labs. Powered by HF and   Fal AI  API.

+

Dia is a dialogue generation model that can generate realistic dialogue between two speakers. Use the dialogue component to create a conversation and then hit the submit button in the bottom right corner to see it come to life .

+ """ + ) + with gr.Row(): + with gr.Column(): + dialogue = gr.Dialogue( + speakers=speakers, tags=tags, formatter=formatter + ) + with gr.Column(): + with gr.Row(): + audio = gr.Audio(label="Audio") + with gr.Row(): + gr.DeepLinkButton(value="Share Audio via Link") + with gr.Row(): + gr.Examples( + examples=[ + [ + [ + { + "speaker": "Speaker 1", + "text": "Why did the chicken cross the road?", + }, + {"speaker": "Speaker 2", "text": "I don't know!"}, + { + "speaker": "Speaker 1", + "text": "to get to the other side! (laughs)", + }, + ] + ], + [ + [ + { + "speaker": "Speaker 1", + "text": "I am a little tired today (sighs).", + }, + {"speaker": "Speaker 2", "text": "Hang in there!"}, + ] + ], + ], + inputs=[dialogue], + cache_examples=False, + ) + + dialogue.submit(query, [dialogue], audio) + +if __name__ == "__main__": + demo.launch() diff --git a/demo/dialogue_component/run.ipynb b/demo/dialogue_component/run.ipynb new file mode 100644 index 0000000000..1383cb2f97 --- /dev/null +++ b/demo/dialogue_component/run.ipynb @@ -0,0 +1 @@ +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dialogue_component"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "with gr.Blocks() as demo:\n", "\n", " gr.Dialogue(speakers=[\"Speaker 1\", \"Speaker 2\"], formatter=None, tags=[\"(laughs)\", \"(sighs)\", \"(clears throat)\"])\n", "demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/dialogue_component/run.py b/demo/dialogue_component/run.py new file mode 100644 index 0000000000..f09ddaf11b --- /dev/null +++ b/demo/dialogue_component/run.py @@ -0,0 +1,6 @@ +import gradio as gr + +with gr.Blocks() as demo: + + gr.Dialogue(speakers=["Speaker 1", "Speaker 2"], formatter=None, tags=["(laughs)", "(sighs)", "(clears throat)"]) +demo.launch() diff --git a/demo/dialogue_diarization_demo/requirements.txt b/demo/dialogue_diarization_demo/requirements.txt new file mode 100644 index 0000000000..548affd021 --- /dev/null +++ b/demo/dialogue_diarization_demo/requirements.txt @@ -0,0 +1,9 @@ +gradio +torch +torchaudio +pyannote.audio +openai-whisper +librosa +numpy +transformers +speechbrain \ No newline at end of file diff --git a/demo/dialogue_diarization_demo/run.ipynb b/demo/dialogue_diarization_demo/run.ipynb new file mode 100644 index 0000000000..c38e55a4fa --- /dev/null +++ b/demo/dialogue_diarization_demo/run.ipynb @@ -0,0 +1 @@ +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dialogue_diarization_demo"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio gradio torch torchaudio pyannote.audio openai-whisper librosa numpy transformers speechbrain "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# type: ignore\n", "import gradio as gr\n", "from pyannote.audio import Pipeline\n", "import whisper\n", "\n", "diarization_pipeline = None\n", "whisper_model = None\n", "\n", "\n", "def load_models():\n", " global diarization_pipeline, whisper_model # noqa: PLW0603\n", "\n", " if diarization_pipeline is None:\n", " diarization_pipeline = Pipeline.from_pretrained(\n", " \"pyannote/speaker-diarization-3.1\", use_auth_token=True\n", " )\n", "\n", " if whisper_model is None:\n", " whisper_model = whisper.load_model(\"base\")\n", "\n", "\n", "def real_diarization(audio_file_path: str) -> list[dict[str, str]]:\n", " try:\n", " load_models()\n", "\n", " if diarization_pipeline is None or whisper_model is None:\n", " raise Exception(\"Failed to load models\")\n", "\n", " diarization = diarization_pipeline(audio_file_path)\n", "\n", " transcription = whisper_model.transcribe(audio_file_path)\n", " segments = transcription[\"segments\"]\n", "\n", " dialogue_segments = []\n", " speaker_mapping = {}\n", " speaker_counter = 1\n", "\n", " for segment in segments:\n", " start_time = segment[\"start\"]\n", " end_time = segment[\"end\"]\n", " text = segment[\"text\"].strip()\n", "\n", " speaker = \"Speaker 1\"\n", " for turn, _, speaker_label in diarization.itertracks(yield_label=True):\n", " if (\n", " turn.start <= start_time <= turn.end\n", " or turn.start <= end_time <= turn.end\n", " ):\n", " if speaker_label not in speaker_mapping:\n", " speaker_mapping[speaker_label] = f\"Speaker {speaker_counter}\"\n", " speaker_counter += 1\n", " speaker = speaker_mapping[speaker_label]\n", " break\n", "\n", " if text:\n", " dialogue_segments.append({\"speaker\": speaker, \"text\": text})\n", "\n", " return dialogue_segments\n", "\n", " except Exception as e:\n", " print(f\"Error in diarization: {str(e)}\")\n", " return []\n", "\n", "\n", "def process_audio(audio_file):\n", " if audio_file is None:\n", " gr.Warning(\"Please upload an audio file first.\")\n", " return []\n", "\n", " try:\n", " dialogue_segments = real_diarization(audio_file)\n", " return dialogue_segments\n", " except Exception as e:\n", " gr.Error(f\"Error processing audio: {str(e)}\")\n", " return []\n", "\n", "\n", "speakers = [\n", " \"Speaker 1\",\n", " \"Speaker 2\",\n", " \"Speaker 3\",\n", " \"Speaker 4\",\n", " \"Speaker 5\",\n", " \"Speaker 6\",\n", "]\n", "tags = [\n", " \"(pause)\",\n", " \"(background noise)\",\n", " \"(unclear)\",\n", " \"(overlap)\",\n", " \"(phone ringing)\",\n", " \"(door closing)\",\n", " \"(music)\",\n", " \"(applause)\",\n", " \"(laughter)\",\n", "]\n", "\n", "\n", "def format_speaker(speaker, text):\n", " return f\"{speaker}: {text}\"\n", "\n", "\n", "with gr.Blocks(title=\"Audio Diarization Demo\") as demo:\n", " with gr.Row():\n", " with gr.Column(scale=1):\n", " audio_input = gr.Audio(\n", " label=\"Upload Audio File\",\n", " type=\"filepath\",\n", " sources=[\"upload\", \"microphone\"],\n", " )\n", "\n", " process_btn = gr.Button(\"\ud83d\udd0d Analyze Speakers\", variant=\"primary\", size=\"lg\")\n", "\n", " with gr.Column(scale=2):\n", " dialogue_output = gr.Dialogue(\n", " speakers=speakers,\n", " tags=tags,\n", " formatter=format_speaker,\n", " label=\"AI-generated speaker-separated conversation\",\n", " value=[],\n", " )\n", "\n", " process_btn.click(fn=process_audio, inputs=[audio_input], outputs=[dialogue_output])\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/dialogue_diarization_demo/run.py b/demo/dialogue_diarization_demo/run.py new file mode 100644 index 0000000000..5a0a6e9720 --- /dev/null +++ b/demo/dialogue_diarization_demo/run.py @@ -0,0 +1,126 @@ +# type: ignore +import gradio as gr +from pyannote.audio import Pipeline +import whisper + +diarization_pipeline = None +whisper_model = None + + +def load_models(): + global diarization_pipeline, whisper_model # noqa: PLW0603 + + if diarization_pipeline is None: + diarization_pipeline = Pipeline.from_pretrained( + "pyannote/speaker-diarization-3.1", use_auth_token=True + ) + + if whisper_model is None: + whisper_model = whisper.load_model("base") + + +def real_diarization(audio_file_path: str) -> list[dict[str, str]]: + try: + load_models() + + if diarization_pipeline is None or whisper_model is None: + raise Exception("Failed to load models") + + diarization = diarization_pipeline(audio_file_path) + + transcription = whisper_model.transcribe(audio_file_path) + segments = transcription["segments"] + + dialogue_segments = [] + speaker_mapping = {} + speaker_counter = 1 + + for segment in segments: + start_time = segment["start"] + end_time = segment["end"] + text = segment["text"].strip() + + speaker = "Speaker 1" + for turn, _, speaker_label in diarization.itertracks(yield_label=True): + if ( + turn.start <= start_time <= turn.end + or turn.start <= end_time <= turn.end + ): + if speaker_label not in speaker_mapping: + speaker_mapping[speaker_label] = f"Speaker {speaker_counter}" + speaker_counter += 1 + speaker = speaker_mapping[speaker_label] + break + + if text: + dialogue_segments.append({"speaker": speaker, "text": text}) + + return dialogue_segments + + except Exception as e: + print(f"Error in diarization: {str(e)}") + return [] + + +def process_audio(audio_file): + if audio_file is None: + gr.Warning("Please upload an audio file first.") + return [] + + try: + dialogue_segments = real_diarization(audio_file) + return dialogue_segments + except Exception as e: + gr.Error(f"Error processing audio: {str(e)}") + return [] + + +speakers = [ + "Speaker 1", + "Speaker 2", + "Speaker 3", + "Speaker 4", + "Speaker 5", + "Speaker 6", +] +tags = [ + "(pause)", + "(background noise)", + "(unclear)", + "(overlap)", + "(phone ringing)", + "(door closing)", + "(music)", + "(applause)", + "(laughter)", +] + + +def format_speaker(speaker, text): + return f"{speaker}: {text}" + + +with gr.Blocks(title="Audio Diarization Demo") as demo: + with gr.Row(): + with gr.Column(scale=1): + audio_input = gr.Audio( + label="Upload Audio File", + type="filepath", + sources=["upload", "microphone"], + ) + + process_btn = gr.Button("🔍 Analyze Speakers", variant="primary", size="lg") + + with gr.Column(scale=2): + dialogue_output = gr.Dialogue( + speakers=speakers, + tags=tags, + formatter=format_speaker, + label="AI-generated speaker-separated conversation", + value=[], + ) + + process_btn.click(fn=process_audio, inputs=[audio_input], outputs=[dialogue_output]) + +if __name__ == "__main__": + demo.launch() diff --git a/demo/dialogue_mock_diarization/run.ipynb b/demo/dialogue_mock_diarization/run.ipynb new file mode 100644 index 0000000000..32f413192b --- /dev/null +++ b/demo/dialogue_mock_diarization/run.ipynb @@ -0,0 +1 @@ +{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: dialogue_mock_diarization"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "\n", "speakers = [\n", " \"Speaker 1\",\n", " \"Speaker 2\",\n", "]\n", "\n", "def format_speaker(speaker, text):\n", " return f\"{speaker}: {text}\"\n", "\n", "def mock_diarization(audio):\n", " return [\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"Hello, how are you?\",\n", " },\n", " {\n", " \"speaker\": \"Speaker 2\",\n", " \"text\": \"I'm fine, thank you!\",\n", " },\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"What's your name?\",\n", " },\n", " {\n", " \"speaker\": \"Speaker 2\",\n", " \"text\": \"My name is John Doe.\",\n", " },\n", " {\n", " \"speaker\": \"Speaker 1\",\n", " \"text\": \"Nice to meet you!\",\n", " },\n", " {\n", " \"speaker\": \"Speaker 2\",\n", " \"text\": \"Nice to meet you!\",\n", " },\n", " ]\n", "\n", "demo = gr.Interface(\n", " fn=mock_diarization,\n", " inputs=[gr.Audio(sources=[\"microphone\"])],\n", " outputs=[gr.Dialogue(speakers=speakers, tags=None, formatter=format_speaker)],\n", " title=\"Mock Speech Diarization\",\n", " description=\"Mock speech diarization\",\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} \ No newline at end of file diff --git a/demo/dialogue_mock_diarization/run.py b/demo/dialogue_mock_diarization/run.py new file mode 100644 index 0000000000..58ec0f9b5d --- /dev/null +++ b/demo/dialogue_mock_diarization/run.py @@ -0,0 +1,48 @@ +import gradio as gr + +speakers = [ + "Speaker 1", + "Speaker 2", +] + +def format_speaker(speaker, text): + return f"{speaker}: {text}" + +def mock_diarization(audio): + return [ + { + "speaker": "Speaker 1", + "text": "Hello, how are you?", + }, + { + "speaker": "Speaker 2", + "text": "I'm fine, thank you!", + }, + { + "speaker": "Speaker 1", + "text": "What's your name?", + }, + { + "speaker": "Speaker 2", + "text": "My name is John Doe.", + }, + { + "speaker": "Speaker 1", + "text": "Nice to meet you!", + }, + { + "speaker": "Speaker 2", + "text": "Nice to meet you!", + }, + ] + +demo = gr.Interface( + fn=mock_diarization, + inputs=[gr.Audio(sources=["microphone"])], + outputs=[gr.Dialogue(speakers=speakers, tags=None, formatter=format_speaker)], + title="Mock Speech Diarization", + description="Mock speech diarization", +) + +if __name__ == "__main__": + demo.launch() diff --git a/gradio/__init__.py b/gradio/__init__.py index 7bc3395a81..5512d27324 100644 --- a/gradio/__init__.py +++ b/gradio/__init__.py @@ -31,6 +31,7 @@ Dataset, DateTime, DeepLinkButton, + Dialogue, DownloadButton, Dropdown, DuplicateButton, @@ -149,6 +150,7 @@ "Dataframe", "Dataset", "DateTime", + "Dialogue", "DeletedFileData", "DownloadButton", "DownloadData", diff --git a/gradio/components/__init__.py b/gradio/components/__init__.py index 57028d3c87..d2f6a81fd0 100644 --- a/gradio/components/__init__.py +++ b/gradio/components/__init__.py @@ -21,6 +21,7 @@ from gradio.components.dataset import Dataset from gradio.components.datetime import DateTime from gradio.components.deep_link_button import DeepLinkButton +from gradio.components.dialogue import Dialogue from gradio.components.download_button import DownloadButton from gradio.components.dropdown import Dropdown from gradio.components.duplicate_button import DuplicateButton @@ -78,6 +79,7 @@ "Dataframe", "DataFrame", "Dataset", + "Dialogue", "DownloadButton", "DuplicateButton", "Fallback", diff --git a/gradio/components/dialogue.py b/gradio/components/dialogue.py new file mode 100644 index 0000000000..3e38b127bb --- /dev/null +++ b/gradio/components/dialogue.py @@ -0,0 +1,188 @@ +from __future__ import annotations + +from collections.abc import Callable + +from gradio.components.base import Component, server +from gradio.data_classes import GradioModel, GradioRootModel +from gradio.events import Events + + +class DialogueLine(GradioModel): + speaker: str + text: str + + +class DialogueModel(GradioRootModel): + root: list[DialogueLine] | str + + +class Dialogue(Component): + """ + Creates a Dialogue component for displaying or collecting multi-speaker conversations. This component can be used as input to allow users to enter dialogue involving multiple speakers, or as output to display diarized speech, such as the result of a transcription or speaker identification model. Each message can be associated with a specific speaker, making it suitable for use cases like conversations, interviews, or meetings. + + Demos: dia_dialogue_demo + """ + + EVENTS = [ + Events.change, + Events.input, + Events.submit, + ] + + data_model = DialogueModel + + def __init__( + self, + value: list[dict[str, str]] | Callable | None = None, + *, + speakers: list[str] | None = None, + formatter: Callable | None = None, + tags: list[str] | None = None, + separator: str = " ", + color_map: dict[str, str] | None = None, + label: str | None = "Dialogue", + info: str + | None = "Type colon (:) in the dialogue line to see the available tags", + placeholder: str | None = None, + show_label: bool | None = None, + container: bool = True, + scale: int | None = None, + min_width: int = 160, + interactive: bool | None = None, + visible: bool = True, + elem_id: str | None = None, + autofocus: bool = False, + autoscroll: bool = True, + elem_classes: list[str] | str | None = None, + render: bool = True, + key: int | str | None = None, + max_lines: int | None = None, + show_submit_button: bool = True, + show_copy_button: bool = True, + ): + """ + Parameters: + value: Value of the dialogue. It is a list of dictionaries, each containing a 'speaker' key and a 'text' key. If a function is provided, the function will be called each time the app loads to set the initial value of this component. + speakers: The different speakers allowed in the dialogue. If `None` or an empty list, no speakers will be displayed. Instead, the component will be a standard textarea that optionally supports `tags` autocompletion. + formatter: A function that formats the dialogue line dictionary, e.g. {"speaker": "Speaker 1", "text": "Hello, how are you?"} into a string, e.g. "Speaker 1: Hello, how are you?". This function is run on user input and the resulting string is passed into the prediction function. + tags: The different tags allowed in the dialogue. Tags are displayed in an autocomplete menu below the input textbox when the user starts typing `:`. Use the exact tag name expected by the AI model or inference function. + separator: The separator between the different dialogue lines used to join the formatted dialogue lines into a single string. For example, a newline character or empty string. + color_map: A dictionary mapping speaker names to colors. The colors may be specified as hex codes or by their names. For example: {"Speaker 1": "red", "Speaker 2": "#FFEE22"}. If not provided, default colors will be assigned to speakers. This is only used if `interactive` is False. + max_lines: maximum number of lines allowed in the dialogue. + placeholder: placeholder hint to provide behind textarea. + label: the label for this component, displayed above the component if `show_label` is `True` and is also used as the header if there are a table of examples for this component. If None and used in a `gr.Interface`, the label will be the name of the parameter this component corresponds to. + show_label: if True, will display the label. If False, the copy button is hidden as well as well as the label. + container: if True, will place the component in a container - providing some extra padding around the border. + scale: relative size compared to adjacent Components. For example if Components A and B are in a Row, and A has scale=2, and B has scale=1, A will be twice as wide as B. Should be an integer. scale applies in Rows, and to top-level Components in Blocks where fill_height=True. + min_width: minimum pixel width, will wrap if not sufficient screen space to satisfy this value. If a certain scale value results in this Component being narrower than min_width, the min_width parameter will be respected first. + interactive: if True, will be rendered as an editable textbox; if False, editing will be disabled. If not provided, this is inferred based on whether the component is used as an input or output. + visible: If False, component will be hidden. + autofocus: If True, will focus on the textbox when the page loads. Use this carefully, as it can cause usability issues for sighted and non-sighted users. + elem_id: An optional string that is assigned as the id of this component in the HTML DOM. Can be used for targeting CSS styles. + elem_classes: An optional list of strings that are assigned as the classes of this component in the HTML DOM. Can be used for targeting CSS styles. + render: If False, component will not render be rendered in the Blocks context. Should be used if the intention is to assign event listeners now but render the component later. + key: if assigned, will be used to assume identity across a re-render. Components that have the same key across a re-render will have their value preserved. + show_copy_button: If True, includes a copy button to copy the text in the textbox. Only applies if show_label is True. + show_submit_button: If True, includes a submit button to submit the dialogue. + autoscroll: If True, will automatically scroll to the bottom of the textbox when the value changes, unless the user scrolls up. If False, will not scroll to the bottom of the textbox when the value changes. + """ + super().__init__( + value="", + label=label, + info=info, + show_label=show_label, + container=container, + scale=scale, + min_width=min_width, + interactive=interactive, + visible=visible, + elem_id=elem_id, + elem_classes=elem_classes, + render=render, + key=key, + ) + self.placeholder = placeholder + self.autofocus = autofocus + self.autoscroll = autoscroll + self.max_lines = max_lines + self.speakers = speakers + self.tags = tags or [] + self.formatter = formatter + self.separator = separator + self.color_map = color_map + self.show_submit_button = show_submit_button + self.show_copy_button = show_copy_button + if isinstance(value, Callable): + value = value() + self.value = ( + self.preprocess(DialogueModel(root=value)) if value is not None else value # type: ignore + ) + if not interactive: + self.info = None + + def preprocess(self, payload: DialogueModel) -> str: # type: ignore + """ + Parameters: + value: Expects a `DialogueModel` object or string. + Returns: + Returns the dialogue as a string. + """ + if (isinstance(payload.root, str) and payload.root == "") or ( + isinstance(payload.root, list) + and len(payload.root) == 1 + and payload.root[0].text == "" + ): + return "" + formatter = self.formatter + if not formatter: + formatter = self.default_formatter + if isinstance(payload.root, str): + return payload.root + return self.separator.join( + [formatter(line.speaker, line.text) for line in payload.root] + ) + + @staticmethod + def default_formatter(speaker: str, text: str) -> str: + return f"[{speaker}] {text}" + + @server + async def format(self, value: list[dict] | str): + """Format the dialogue in the frontend into a string that's copied to the clipboard.""" + data = DialogueModel(root=value) # type: ignore + return self.preprocess(data) + + def postprocess( # type: ignore + self, value: list[dict[str, str]] | str | None + ) -> DialogueModel | None: + """ + Parameters: + value: Expects a string or a list of dictionaries of dialogue lines, where each dictionary contains 'speaker' and 'text' keys, or a string. + Returns: + Returns the dialogue as a `DialogueModel` object for the frontend. + """ + if value is None: + return None + + if isinstance(value, str): + return DialogueModel(root=value) + + dialogue_lines = [ + DialogueLine(speaker=line["speaker"], text=line["text"]) for line in value + ] + return DialogueModel(root=dialogue_lines) + + def as_example(self, value): + return self.preprocess(DialogueModel(root=value)) + + def example_payload(self): + return [ + {"speaker": "Speaker 1", "text": "Hello, how are you?"}, + {"speaker": "Speaker 2", "text": "I'm fine, thank you!"}, + ] + + def example_value(self): + return [ + {"speaker": "Speaker 1", "text": "Hello, how are you?"}, + {"speaker": "Speaker 2", "text": "I'm fine, thank you!"}, + ] diff --git a/gradio/stubs/anyio.pyi b/gradio/stubs/anyio.pyi index 0a354c32ef..d83eb5143a 100644 --- a/gradio/stubs/anyio.pyi +++ b/gradio/stubs/anyio.pyi @@ -1,6 +1,6 @@ """ This module contains type hints for the anyio library. It was auto-generated so may include errors.""" -from typing import Any, Callable, Coroutine, TypeVar, overload, Optional, Union from types import TracebackType +from typing import Any, Callable, Coroutine, Optional, TypeVar, Union, overload T = TypeVar('T') T_Retval = TypeVar('T_Retval') diff --git a/js/dialogue/Dialogue.svelte b/js/dialogue/Dialogue.svelte new file mode 100644 index 0000000000..e130073142 --- /dev/null +++ b/js/dialogue/Dialogue.svelte @@ -0,0 +1,1003 @@ + + + + +