Skip to content

Commit 43cf0f1

Browse files
author
chuxij
committed
add application description
1 parent 4358d7a commit 43cf0f1

File tree

2 files changed

+122
-23
lines changed

2 files changed

+122
-23
lines changed

index.html

Lines changed: 122 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -373,15 +373,119 @@
373373
sectionTitle = 'Variations Generation';
374374
} else if (sectionId === 'Controlability-repaint') {
375375
sectionTitle = 'Repaint';
376+
} else if (sectionId === 'Controlability-edit') {
377+
sectionTitle = 'Edit';
376378
} else if (sectionId === 'Application-Lyric2Vocal') {
377379
sectionTitle = 'Lyric2Vocal (LoRA)';
378380
} else if (sectionId === 'Text2Sample') {
379381
sectionTitle = 'Text2Samples (LoRA)';
380382
}
381383

382-
sectionCell.textContent = sectionTitle;
384+
// Clear existing content
385+
sectionCell.textContent = '';
386+
387+
// Create title element with more prominent styling
388+
const titleElement = document.createElement('div');
389+
titleElement.textContent = sectionTitle;
390+
titleElement.style.marginBottom = '15px';
391+
titleElement.style.fontSize = '1.3em';
392+
titleElement.style.fontWeight = 'bold';
393+
titleElement.style.color = '#333';
394+
titleElement.style.letterSpacing = '1px';
395+
sectionCell.appendChild(titleElement);
396+
397+
// Add note div based on section ID with distinct styling
398+
const noteDiv = document.createElement('div');
399+
noteDiv.style.borderLeft = '4px solid #097EFF';
400+
noteDiv.style.paddingLeft = '15px';
401+
noteDiv.style.marginBottom = '10px';
402+
noteDiv.style.textAlign = 'left';
403+
noteDiv.style.fontSize = '0.85em';
404+
noteDiv.style.color = '#606c71';
405+
noteDiv.style.backgroundColor = '#f8f9fa';
406+
noteDiv.style.padding = '8px 15px';
407+
noteDiv.style.borderRadius = '0 4px 4px 0';
408+
409+
const noteTitle = document.createElement('p');
410+
noteTitle.style.fontWeight = 'bold';
411+
noteTitle.style.marginBottom = '8px';
412+
noteTitle.style.color = '#097EFF';
413+
noteTitle.style.fontSize = '1em';
414+
noteTitle.style.borderBottom = '1px solid #dee2e6';
415+
noteTitle.style.paddingBottom = '4px';
416+
noteTitle.textContent = 'Note:';
417+
noteDiv.appendChild(noteTitle);
418+
419+
// Add specific notes based on section ID
420+
if (sectionId === 'DiverseStyles') {
421+
addNoteItem(noteDiv, 'Supports all mainstream music styles with various description formats including short tags, descriptive text, or use-case scenarios.');
422+
} else if (sectionId === 'InstrumentalStyles') {
423+
addNoteItem(noteDiv, 'Supports various instrumental music generation across different genres and styles.');
424+
addNoteItem(noteDiv, 'Capable of producing realistic instrumental tracks with appropriate timbre and expression for each instrument.');
425+
addNoteItem(noteDiv, 'Can generate complex arrangements with multiple instruments while maintaining musical coherence.');
426+
} else if (sectionId === 'VocalTechniques') {
427+
addNoteItem(noteDiv, 'Capable of rendering various vocal styles and techniques with good quality.');
428+
addNoteItem(noteDiv, 'While audio quality may have some limitations, the system shows promising potential in vocal synthesis.');
429+
addNoteItem(noteDiv, 'Supports different vocal expressions including various singing techniques and styles.');
430+
} else if (sectionId === 'MultipleLang') {
431+
addNoteItem(noteDiv, '19 languages are supported. But due to data imbalance, less common languages may underperform. Top 10 well-performing languages are:');
432+
const langList = document.createElement('p');
433+
langList.style.margin = '0';
434+
langList.style.paddingLeft = '15px';
435+
langList.innerHTML = '• English<br>• Chinese<br>• Russian<br>• Spanish<br>• Japanese<br>• German<br>• French<br>• Portuguese<br>• Italian<br>• Korean';
436+
noteDiv.appendChild(langList);
437+
} else if (sectionId === 'Controlability-retake') {
438+
addNoteItem(noteDiv, 'This feature is implemented using training-free, inference-time optimization techniques.');
439+
addNoteItem(noteDiv, 'Our flow-matching model generates initial noise, then uses trigFlow\'s noise formula to add additional Gaussian noise.');
440+
addNoteItem(noteDiv, 'By controlling the mixing ratio between the original initial noise and the new Gaussian noise, we can adjust the degree of variation in the generated output.');
441+
} else if (sectionId === 'Controlability-repaint') {
442+
addNoteItem(noteDiv, 'Implemented by adding noise to the target audio input and applying mask constraints during the ODE process.');
443+
addNoteItem(noteDiv, 'When input conditions change from the original generation, only specific aspects can be modified while preserving the rest.');
444+
addNoteItem(noteDiv, 'Combined with Variations Generation techniques, it can also create localized variations in style, lyrics, or vocals.');
445+
} else if (sectionId === 'Controlability-edit') {
446+
addNoteItem(noteDiv, 'We\'ve innovatively applied flow-edit technology to enable localized lyric modifications while preserving melody, vocals, and accompaniment.');
447+
addNoteItem(noteDiv, 'Works with both generated content and uploaded audio, greatly enhancing creative possibilities.');
448+
addNoteItem(noteDiv, 'Current limitation: can only modify small segments of lyrics at once to avoid distortion, but multiple edits can be applied sequentially.');
449+
} else if (sectionId === 'Application-Lyric2Vocal') {
450+
addNoteItem(noteDiv, 'Based on a LoRA fine-tuned on pure vocal data, allowing direct generation of vocal samples from lyrics.');
451+
addNoteItem(noteDiv, 'Offers numerous practical applications such as vocal demos, guide tracks, songwriting assistance, and vocal arrangement experimentation.');
452+
addNoteItem(noteDiv, 'Provides a quick way to test how lyrics might sound when sung, helping songwriters iterate faster.');
453+
} else if (sectionId === 'Text2Sample') {
454+
addNoteItem(noteDiv, 'Similar to Lyric2Vocal, but fine-tuned on pure instrumental and sample data.');
455+
addNoteItem(noteDiv, 'Capable of generating conceptual music production samples from text descriptions.');
456+
addNoteItem(noteDiv, 'Useful for quickly creating instrument loops, sound effects, and musical elements for production.');
457+
} else if (sectionId === 'RapMachine') {
458+
addNoteItem(noteDiv, 'Fine-tuned on pure rap data to create an AI system specialized in rap generation.');
459+
addNoteItem(noteDiv, 'Expected capabilities include AI rap battles and narrative expression through rap.');
460+
addNoteItem(noteDiv, 'Rap has exceptional storytelling and expressive capabilities, offering extraordinary application potential.');
461+
} else if (sectionId === 'StemGen') {
462+
addNoteItem(noteDiv, 'A controlnet-lora trained on multi-track data to generate individual instrument stems.');
463+
addNoteItem(noteDiv, 'Takes a reference track and specified instrument (or instrument reference audio) as input.');
464+
addNoteItem(noteDiv, 'Outputs an instrument stem that complements the reference track, such as creating a piano accompaniment for a flute melody or adding jazz drums to a lead guitar.');
465+
} else if (sectionId === 'Singing2Accompaniment') {
466+
addNoteItem(noteDiv, 'The reverse process of StemGen, generating a mixed master track from a single vocal track.');
467+
addNoteItem(noteDiv, 'Takes a vocal track and specified style as input to produce a complete vocal accompaniment.');
468+
addNoteItem(noteDiv, 'Creates full instrumental backing that complements the input vocals, making it easy to add professional-sounding accompaniment to any vocal recording.');
469+
}
470+
471+
// Add the note div to the section cell if it has content
472+
if (noteDiv.childNodes.length > 1) {
473+
sectionCell.appendChild(noteDiv);
474+
}
475+
383476
sectionRow.appendChild(sectionCell);
384477
tbody.appendChild(sectionRow);
478+
479+
// Helper function to add note items
480+
function addNoteItem(parent, text) {
481+
const noteItem = document.createElement('p');
482+
noteItem.style.margin = '0 0 6px 0';
483+
noteItem.style.lineHeight = '1.4';
484+
noteItem.style.textIndent = '-12px';
485+
noteItem.style.paddingLeft = '12px';
486+
noteItem.textContent = '- ' + text;
487+
parent.appendChild(noteItem);
488+
}
385489

386490
// Add samples for this section
387491
samples.forEach(sample => {
@@ -920,6 +1024,13 @@ <h1 id="">
9201024
<span style="font-size: 24px;">Hugging Face</span>
9211025
</a>
9221026

1027+
<a href="#" class="arxiv-link" target="_blank" rel="noopener noreferrer" style="margin-right: 20px;" onclick="return false;">
1028+
<svg fill="currentColor" height="28" width="28" viewBox="0 0 512 512">
1029+
<path d="M128 0C74.98 0 32 42.98 32 96v320c0 53.02 42.98 96 96 96h256c53.02 0 96-42.98 96-96V96c0-53.02-42.98-96-96-96H128zM400 432H112c-8.836 0-16-7.164-16-16V96c0-8.838 7.164-16 16-16h288c8.836 0 16 7.162 16 16v320c0 8.836-7.164 16-16 16zM192 128h-48c-8.836 0-16 7.162-16 16v32c0 8.836 7.164 16 16 16h48c8.836 0 16-7.164 16-16v-32c0-8.838-7.164-16-16-16zm176 0h-48c-8.836 0-16 7.162-16 16v32c0 8.836 7.164 16 16 16h48c8.836 0 16-7.164 16-16v-32c0-8.838-7.164-16-16-16zM192 224h-48c-8.836 0-16 7.164-16 16v32c0 8.836 7.164 16 16 16h48c8.836 0 16-7.164 16-16v-32c0-8.836-7.164-16-16-16zm176 0h-48c-8.836 0-16 7.164-16 16v32c0 8.836 7.164 16 16 16h48c8.836 0 16-7.164 16-16v-32c0-8.836-7.164-16-16-16zM192 320h-48c-8.836 0-16 7.164-16 16v32c0 8.836 7.164 16 16 16h48c8.836 0 16-7.164 16-16v-32c0-8.836-7.164-16-16-16z"/>
1030+
</svg>
1031+
<span style="font-size: 24px;">Paper (Coming Soon)</span>
1032+
</a>
1033+
9231034
<a href="https://huggingface.co/spaces/ACE-Step/ACE-Step" class="huggingface-link" target="_blank"
9241035
rel="noopener noreferrer">
9251036
<svg class="size-8 mr-1.5 dark:mr-2 dark:drop-shadow-md" xmlns="http://www.w3.org/2000/svg"
@@ -963,7 +1074,7 @@ <h2 id="abstract" style="text-align: center;">Abstract<a name="abstract"></a></h
9631074
exceptional musical coherence and lyric alignment across metrics for melody, harmony, and rhythmic consistency. By
9641075
preserving fine-grained acoustic details,
9651076
ACE-Step supports sophisticated control mechanisms, including voice cloning, lyric-editing, remixing, and track
966-
generation (e.g., lyric2vocal or singing2bgm).
1077+
generation (e.g., lyric2vocal or Singing2Accompaniment).
9671078
</p>
9681079

9691080
<p style="text-align: justify;">
@@ -1026,12 +1137,18 @@ <h3>Table of contents</h3>
10261137
<ul>
10271138
<li><a href="#Application-Lyric2Vocal">Lyric2Vocal</a></li>
10281139
<li><a href="#Text2Sample">Text2Sample</a></li>
1140+
</ul>
1141+
<li><a href="#CommingSoon">Coming Soon</a></li>
1142+
<ul>
10291143
<li><a href="#RapMachine">RapMachine</a></li>
10301144
<li><a href="#StemGen">StemGen</a></li>
1031-
<li><a href="#Singing2bgm">Singing2bgm</a></li>
1145+
<li><a href="#Singing2Accompaniment">Singing2Accompaniment</a></li>
10321146
</ul>
10331147
</ul>
10341148
</div>
1149+
<figure>
1150+
<img src="raw/fig/application_map.png" alt="framework" width="1000" height="600">
1151+
</figure>
10351152
<h1 id="BaselineQuality" style="text-align: center;">Baseline Quality<a name="BaselineQuality"></a></h1>
10361153

10371154
<h2 id="DiverseStyles">Modeling Diverse Genres & Vocal Styles<a name="DiverseStyles"></a></h2>
@@ -1040,11 +1157,9 @@ <h2 id="DiverseStyles">Modeling Diverse Genres & Vocal Styles<a name="DiverseSty
10401157
<p style="margin: 0;"> - Lyrics are random picked from AI music generation community or internet and not in our training set.</p>
10411158
<p style="margin: 0;"> - Existing models either lack length control (LLMs) or are fixed-length (diffusion). We
10421159
enable flexible length for practical music composition.</p>
1043-
<p style="margin: 0;"> - Unlike rigid academic tags in open-source models, ours adapt to natural
1044-
language—supporting comma-separated tags tags, long descriptions, or scene-based inputs.</p>
1045-
<p style="margin: 0;"> - 19 languages are supported. But due to data imbalance, less common languages may underperform. Here are the top 10 best-performing languages</p>
10461160
<p style="margin: 0;"> - B.T.W., the project page is vibe coded by Roocode. 😊</p>
10471161
</div>
1162+
10481163
<div class="fixed-toggle-container">
10491164
<button id="toggle-all-lyrics" class="toggle-all-button">Collapse All Lyrics</button>
10501165
</div>
@@ -1061,22 +1176,6 @@ <h2 id="DiverseStyles">Modeling Diverse Genres & Vocal Styles<a name="DiverseSty
10611176
<!-- Table rows will be dynamically generated by JavaScript -->
10621177
</tbody>
10631178
</table>
1064-
1065-
<!-- <h2 id="MultipleLang">Multiple Languages<a name="MultipleLang"></a></h2>
1066-
<div style="border-left: 4px solid #FFD702; padding-left: 15px; margin-bottom: 20px;">
1067-
<p style="font-weight: bold; margin-bottom: 5px;">Note:</p>
1068-
<p style="margin: 0;"> - Our model theoretically supports 19 languages, but due to data imbalance, less common
1069-
languages may underperform. Here are the top 10 best-performing languages:
1070-
- English
1071-
- Chinese
1072-
- Russian
1073-
- Spanish
1074-
- Japanese
1075-
- German
1076-
- French
1077-
- Portuguese
1078-
- Italian
1079-
- Korean</p> -->
10801179
</div>
10811180

10821181
</section>
@@ -1136,4 +1235,4 @@ <h2 id="limitations" style="text-align: center;">Limitations & Future Improvemen
11361235
</section>
11371236
</body>
11381237

1139-
</html>
1238+
</html>

raw/fig/application_map.png

253 KB
Loading

0 commit comments

Comments
 (0)