-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquick_start.py
110 lines (93 loc) · 3.75 KB
/
quick_start.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from pathlib import Path
import yaml
from tqdm import tqdm
import shutil
import librosa
from modules.ds_tools.loudness_norm import loudness_norm_file
from modules.ds_tools.slicer import slice_audio
from modules.ds_tools.wav2words import funasr_folder, pinyin_folder
from modules.SOFA.SOFA_infer import sofa_infer
from validate_labels import validate_labels
from summary_pitch import summary_pitch
from modules.FoxBreatheLabeler.FBL_infer import export
from modules.ds_tools.filter_bad import move_bad
from modules.ds_tools.textgrid2ds import textgrid2ds
from ds_dataset import ds_dataset
def quick_start():
with open('tools_config.yaml', 'r', encoding='utf-8') as file:
data = yaml.safe_load(file)
singer = str(data.get('singer'))
dictionary = str(data.get('dictionary'))
SOFA_ckpt = str(data.get('SOFA_ckpt'))
FBL_ckpt = str(data.get('FBL_ckpt'))
SOME_ckpt = str(data.get('SOME_ckpt'))
work_audios = Path('work_audios')
base_path = Path('data') / singer
original = base_path / 'original'
norm = base_path / 'norm'
wavs = base_path / 'wavs'
lab = base_path / 'lab'
textgrids = base_path / 'textgrids'
bad =base_path / 'bad'
ds = base_path / 'ds'
dataset = base_path / (singer + '_dataset')
for folder in [work_audios, base_path, original, norm, wavs, lab, textgrids, bad, ds, dataset]:
folder.mkdir(parents=True, exist_ok=True)
#移动音频
if any(work_audios.glob('*.wav')) and any(original.glob('*.wav')):
print(f"Error: folder {original} is not empty, change singer in tools_config.yaml or clear folder")
exit(1)
for file in work_audios.glob('*.wav'):
destination_file = original / file.name
shutil.move(file, destination_file)
#响度匹配
for f in tqdm(original.glob('*.wav')):
norm_audio = norm / f.name
loudness_norm_file(f, norm_audio)
print("Step 1: loudness_norm complete")
#音频切片
for f in tqdm(norm.glob('*.wav')):
if librosa.get_duration(filename=str(f)) > 10:
slice_audio(f, wavs, db_threshold=-32)
else:
destination_f = wavs / f.name
shutil.move(f, destination_f)
for f in tqdm(wavs.glob('*.wav')):
if librosa.get_duration(filename=str(f)) > 15:
slice_audio(f, wavs, db_threshold=-24)
f.unlink()
print("Step 2: slice complete")
#生成lab
funasr_folder(wavs, lab)
pinyin_folder(lab, lab, "dictionary/phrases_dict.txt")
validate_labels(lab, wavs, dictionary, 'lab', True)
print("Step 3: lab complete")
#生成textgrid
sofa_infer(SOFA_ckpt, wavs, lab, textgrids, "force", "Dictionary", "NoneAPDetector", "lab", "textgrid", True, dictionary=dictionary)
print("Step 4: SOFA complete")
#呼吸标注
export(FBL_ckpt, wavs, textgrids, textgrids)
print("Step 5: FBL complete")
#筛选标注
confidence = textgrids / "confidence.csv"
move_bad(wavs, textgrids, bad, confidence, ratio=0.4)
print("Step 6: filter_bad complete")
#生成ds
validate_labels(textgrids, wavs, dictionary, 'textgrid', True)
summary_pitch(wavs, textgrids)
textgrid2ds(textgrids, wavs, ds, dictionary, use_some=True, some_model=SOME_ckpt, round_midi=True)
print("Step 7: build_ds complete")
#构建数据集
ds_dataset(wavs, ds, dataset)
validate_labels(dataset, wavs, dictionary, 'csv', summary=True)
print("Step 8: dataset complete")
print("Congratulations! All the steps have been completed.\nThis project is produced by Bai_Shuo.")
#清理文件
# shutil.rmtree(norm)
# shutil.rmtree(wavs)
# shutil.rmtree(lab)
# shutil.rmtree(textgrids)
# shutil.rmtree(bad)
# shutil.rmtree(ds)
if __name__ == '__main__':
quick_start()