Add Info modal

PierreMesure · PierreMesure · commit 2e684a320816 · 2025-04-28T13:05:13.000+01:00
diff --git a/src/components/AudioManager.tsx b/src/components/AudioManager.tsx
@@ -14,7 +14,7 @@ import { Transcriber } from "../hooks/useTranscriber";
 import Progress from "./Progress";
 import AudioRecorder from "./AudioRecorder";
 import { t } from "i18next";
-import { useTranslation } from "react-i18next";
+import { Trans, useTranslation } from "react-i18next";
 
 function titleCase(str: string) {
     str = str.toLowerCase();
@@ -215,6 +215,90 @@ export function AudioManager(props: { transcriber: Transcriber }) {
                 </>
             )}
 
+            <InfoTile
+                className='fixed bottom-4 right-28'
+                icon={<InfoIcon />}
+                title={t("manager.info_title")}
+                content={
+                    <Trans i18nKey='manager.info_content'>
+                        Whisper-web is a small website to help you transcribe
+                        audio speech into text.
+                        <br />
+                        The first time you give it a file, it will download an
+                        open AI model and perform the transcription locally in
+                        your browser. This means that your audio file never
+                        leaves your device. It also means that the transcription
+                        will be slow or fail if your computer/smartphone is not
+                        powerful enough to perform it. In the settings (bottom
+                        right corner), you can pick among different models and
+                        various quantisation levels. A smaller model with a
+                        lower quantisation will be faster but make more
+                        mistakes. By default, Whisper-web uses small models but
+                        you can try a bigger one and see if it works on your
+                        device. For most languages, it is best to use{" "}
+                        <a
+                            className='underline'
+                            target='_blank'
+                            href='https://openai.com/index/whisper/'
+                        >
+                            OpenAI's official models
+                        </a>{" "}
+                        (Multilingual) but for Swedish or Norwegian, it is
+                        recommended to use versions that have been specifically
+                        trained for them. The Swedish models are called{" "}
+                        <a
+                            className='underline'
+                            target='_blank'
+                            href='https://huggingface.co/KBLab/kb-whisper-tiny'
+                        >
+                            KB-whisper
+                        </a>{" "}
+                        and have been trained by the{" "}
+                        <a
+                            className='underline'
+                            target='_blank'
+                            href='https://kb.se/samverkan-och-utveckling/nytt-fran-kb/nyheter-samverkan-och-utveckling/2025-02-20-valtranad-ai-modell-forvandlar-tal-till-text.html'
+                        >
+                            national library
+                        </a>{" "}
+                        on data from parliament debates and the Swedish public
+                        service. The Norwegian ones are named{" "}
+                        <a
+                            className='underline'
+                            target='_blank'
+                            href='https://huggingface.co/collections/NbAiLab/nb-whisper-65cb8322877f943912afcd9f'
+                        >
+                            nb-whisper
+                        </a>{" "}
+                        and have also been trained by the country's{" "}
+                        <a
+                            className='underline'
+                            target='_blank'
+                            href='https://arxiv.org/abs/2402.01917'
+                        >
+                            national library
+                        </a>
+                        . This project's source code is available on{" "}
+                        <a
+                            className='underline'
+                            target='_blank'
+                            href='https://github.com/PierreMesure/whisper-web'
+                        >
+                            Github
+                        </a>
+                        . Feel free to reuse or contribute to it. The website is
+                        hosted on{" "}
+                        <a
+                            className='underline'
+                            target='_blank'
+                            href='https://www.statichost.eu'
+                        >
+                            statichost.eu
+                        </a>
+                        , a privacy-friendly service to host static sites.
+                    </Trans>
+                }
+            />
             <SettingsTile
                 className='fixed bottom-4 right-4'
                 transcriber={props.transcriber}
@@ -224,6 +308,36 @@ export function AudioManager(props: { transcriber: Transcriber }) {
     );
 }
 
+function InfoTile(props: {
+    icon: JSX.Element;
+    className?: string;
+    title: string;
+    content: string | JSX.Element;
+}) {
+    const [showModal, setShowModal] = useState(false);
+
+    const onClick = () => {
+        setShowModal(true);
+    };
+
+    const onClose = () => {
+        setShowModal(false);
+    };
+
+    return (
+        <div className={props.className}>
+            <Tile icon={props.icon} onClick={onClick} />
+            <Modal
+                show={showModal}
+                submitEnabled={false}
+                onClose={onClose}
+                title={props.title}
+                content={props.content}
+            />
+        </div>
+    );
+}
+
 function SettingsTile(props: {
     icon: JSX.Element;
     className?: string;
@@ -511,7 +625,6 @@ function UrlModal(props: {
             onClose={props.onClose}
             submitText={t("manager.submit")}
             onSubmit={onSubmit}
-            cacheSize={0}
         />
     );
 }
@@ -641,7 +754,6 @@ function RecordModal(props: {
             submitText={t("manager.submit")}
             submitEnabled={audioBlob !== undefined}
             onSubmit={onSubmit}
-            cacheSize={0}
         />
     );
 }
@@ -708,7 +820,7 @@ function SettingsIcon() {
             xmlns='http://www.w3.org/2000/svg'
             fill='none'
             viewBox='0 0 24 24'
-            strokeWidth='1.25'
+            strokeWidth='1.75'
             stroke='currentColor'
         >
             <path
@@ -725,6 +837,18 @@ function SettingsIcon() {
     );
 }
 
+function InfoIcon() {
+    return (
+        <svg
+            xmlns='http://www.w3.org/2000/svg'
+            viewBox='0 0 24 24'
+            fill='currentColor'
+        >
+            <path d='M12 17q.425 0 .713-.288T13 16v-4q0-.425-.288-.712T12 11t-.712.288T11 12v4q0 .425.288.713T12 17m0-8q.425 0 .713-.288T13 8t-.288-.712T12 7t-.712.288T11 8t.288.713T12 9m0 13q-2.075 0-3.9-.788t-3.175-2.137T2.788 15.9T2 12t.788-3.9t2.137-3.175T8.1 2.788T12 2t3.9.788t3.175 2.137T21.213 8.1T22 12t-.788 3.9t-2.137 3.175t-3.175 2.138T12 22m0-2q3.35 0 5.675-2.325T20 12t-2.325-5.675T12 4T6.325 6.325T4 12t2.325 5.675T12 20m0-8'></path>
+        </svg>
+    );
+}
+
 function MicrophoneIcon() {
     return (
         <svg
diff --git a/src/components/modal/Modal.tsx b/src/components/modal/Modal.tsx
@@ -5,12 +5,12 @@ import { Fragment, JSX } from "react";
 export interface Props {
     show: boolean;
     onClose: () => void;
-    onSubmit: () => void;
+    onSubmit?: () => void;
     submitText?: string;
     submitEnabled?: boolean;
     title: string | JSX.Element;
     content: string | JSX.Element;
-    cacheSize: number;
+    cacheSize?: number;
 }
 
 export default function Modal({
@@ -62,7 +62,7 @@ export default function Modal({
                                 >
                                     {title}
                                 </DialogTitle>
-                                <div className='mt-3 text-sm text-gray-500'>
+                                <div className='modal-content mt-3 text-sm text-gray-500'>
                                     {content}
                                 </div>
 
diff --git a/src/locale/en.json b/src/locale/en.json
@@ -25,7 +25,9 @@
             "settings": "Settings",
             "gpu": "GPU",
             "gpu_disabled": "GPU (unsupported browser)",
-            "clear_cache": "Clear cache"
+            "clear_cache": "Clear cache",
+            "info_title": "Whisper-web",
+            "info_content": "Whisper-web is a small website to help you transcribe audio speech into text.<1></1><1></1>The first time you give it a file, it will download an open AI model and perform the transcription locally in your browser. This means that your audio file never leaves your device. It also means that the transcription will be slow or fail if your computer/smartphone is not powerful enough to perform it.<1></1><1></1>In the settings (bottom right corner), you can pick among different models and various quantisation levels. A smaller model with a lower quantisation will be faster but make more mistakes. By default, Whisper-web uses small models but you can try a bigger one and see if it works on your device.<1></1><1></1>For most languages, it is best to use <4>OpenAI's official models</4> (Multilingual) but for Swedish or Norwegian, it is recommended to use versions that have been specifically trained for them.<1></1><1></1>The Swedish models are called <8>KB-whisper</8> and have been trained by the <12>national library</12> on data from parliament debates and the Swedish public service. The Norwegian ones are named <16>nb-whisper</16> and have also been trained by the country's <20>national library</20>.<1></1><1></1>This project's source code is available on <23>Github</23>. Feel free to reuse or contribute to it.<1></1><1></1>The website is hosted on <26>statichost.eu</26>, a privacy-friendly service to host static sites."
         },
         "recorder": {
             "start_recording": "Start recording",
diff --git a/src/locale/es.json b/src/locale/es.json
@@ -25,7 +25,9 @@
             "settings": "Configuraciones",
             "gpu": "GPU",
             "gpu_disabled": "GPU (navegador no compatible)",
-            "clear_cache": "Borrar caché"
+            "clear_cache": "Borrar caché",
+            "info_title": "Whisper-web",
+            "info_content": "Whisper-web es un pequeño sitio web para ayudarte a transcribir audio a texto.<1></1><1></1>La primera vez que le das un archivo, descargará un modelo de IA abierta y realizará la transcripción localmente en tu navegador. Esto significa que tu archivo de audio nunca saldrá de tu dispositivo. También significa que la transcripción será lenta o fallará si tu computadora/teléfono inteligente no es lo suficientemente potente para realizarla.<1></1><1></1>En la configuración (esquina inferior derecha), puedes elegir entre diferentes modelos y varios niveles de cuantización. Un modelo más pequeño con menor cuantización será más rápido pero cometerá más errores. Por defecto, Whisper-web utiliza modelos pequeños, pero puedes probar uno más grande y ver si funciona en tu dispositivo.<1></1><1></1>Para la mayoría de los idiomas, es mejor usar <4>los modelos oficiales de OpenAI</4> (Multilingües), pero para el sueco o el noruego se recomienda usar versiones que han sido entrenadas específicamente para ellos.<1></1><1></1>Los modelos suecos se llaman <8>KB-whisper</8> y han sido entrenados por la <12>biblioteca nacional</12> con datos de debates parlamentarios y el servicio público sueco. Los noruegos se llaman <16>nb-whisper</16> y también han sido entrenados por la <20>biblioteca nacional</20> del país.<1></1><1></1>El código fuente de este proyecto está disponible en <23>Github</23>. Siéntete libre de reutilizarlo o contribuir a él.<1></1><1></1>El sitio web está alojado en <26>statichost.eu</26>, un servicio amigable con la privacidad para alojar sitios estáticos."
         },
         "recorder": {
             "start_recording": "Iniciar grabación",
diff --git a/src/locale/no.json b/src/locale/no.json
@@ -25,7 +25,9 @@
             "settings": "Innstillinger",
             "gpu": "GPU",
             "gpu_disabled": "GPU (ikke kompatibel med nettleseren)",
-            "clear_cache": "Tøm hurtigbuffer"
+            "clear_cache": "Tøm hurtigbuffer",
+            "info_title": "Whisper-web",
+            "info_content": "Whisper-web er et lite nettsted for å hjelpe deg med å transkribere lydtale til tekst.<1></1><1></1>Første gang du gir det en fil, vil det laste ned en åpen AI-modell og utføre transkripsjonen lokalt i nettleseren din. Dette betyr at lydfilen din aldri forlater enheten din. Det betyr også at transkripsjonen vil gå sakte eller mislykkes hvis datamaskinen/smarttelefonen din ikke er kraftig nok til å utføre den.<1></1><1></1>I innstillingene (nede i høyre hjørne) kan du velge blant forskjellige modeller og ulike kvantiseringsnivåer. En mindre modell med lavere kvantisering vil være raskere, men gjøre flere feil. Som standard bruker Whisper-web små modeller, men du kan prøve en større og se om det fungerer på enheten din.<1></1><1></1>For de fleste språk er det best å bruke <4>OpenAIs offisielle modeller</4> (Multilingual), men for svensk eller norsk anbefales det å bruke versjoner som har blitt spesifikt trent for dem.<1></1><1></1>De svenske modellene kalles <8>KB-whisper</8> og har blitt trent av <12>det nasjonale biblioteket</12> på data fra parlamentsdebatter og svensk offentlig tjeneste. De norske kalles <16>nb-whisper</16> og har også blitt trent av landets <20>nasjonalbibliotek</20>.<1></1><1></1>Prosjektets kildekode er tilgjengelig på <23>Github</23>. Føl deg fri til å gjenbruke eller bidra til det.<1></1><1></1>Nettstedet er vert på <26>statichost.eu</26>, en personvernvennlig tjeneste for å hoste statiske sider."
         },
         "recorder": {
             "start_recording": "Start opptak",
diff --git a/src/locale/sv.json b/src/locale/sv.json
@@ -25,7 +25,9 @@
             "settings": "Inställningar",
             "gpu": "GPU",
             "gpu_disabled": "GPU (ej kompatibel med webbläsaren)",
-            "clear_cache": "Rensa cache"
+            "clear_cache": "Rensa cache",
+            "info_title": "Whisper-web",
+            "info_content": "Whisper-web är en liten tjänst för att transkribera ljudtal till text.<1></1><1></1>Första gången du anger en ljudfil kommer en öppen AI-modell att laddas ner så transkriptionen kan ske lokalt i din webbläsare. På detta sätt lämnar din ljudfil aldrig din enhet. Det betyder också att transkriptionen kan bli långsam eller misslyckas om din dator/smartphone inte är tillräckligt kraftfull.<1></1><1></1>I inställningarna (nere till höger) kan du välja mellan olika modeller och olika kvantiseringsnivåer. En mindre modell med lägre kvantisering kommer att vara snabbare men göra fler språkmisstag. Som standard använder Whisper-web små modeller men du kan prova en större och se om det fungerar på din enhet.<1></1><1></1>För de flesta språk är det bäst att använda <4>OpenAI:s officiella modeller</4> (Multilingual) men för svenska eller norska rekommenderas särskilda versioner som har tränats specifikt för dem.<1></1><1></1>De svenska modellerna heter <8>KB-whisper</8> och har tränats av <12>Kungliga biblioteket</12> på ljud från riksdagsdebatter, SVT m.fl. De norska heter <16>nb-whisper</16> och har också tränats av landets <20>nationalbibliotek</20>.<1></1><1></1>Projektets källkod finns tillgänglig på <23>Github</23>. Du får gärna återanvända eller bidra till den.<1></1><1></1>Webbplatsen hostas på <26>statichost.eu</26>, en integritetsvänlig tjänst för statiska webbsidor."
         },
         "recorder": {
             "start_recording": "Starta inspelning",