Skip to content

Commit 7bd25c4

Browse files
authored
Add files via upload
1 parent e8d92c3 commit 7bd25c4

File tree

4 files changed

+46
-35
lines changed

4 files changed

+46
-35
lines changed

MDXNet.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def demix_base(self, mixes, margin_size):
141141
# del self.model
142142
progress_bar.close()
143143
return _sources
144-
def prediction(self, m,vocal_root,others_root):
144+
def prediction(self, m,vocal_root,others_root,format):
145145
os.makedirs(vocal_root,exist_ok=True)
146146
os.makedirs(others_root,exist_ok=True)
147147
basename = os.path.basename(m)
@@ -151,8 +151,8 @@ def prediction(self, m,vocal_root,others_root):
151151
mix = mix.T
152152
sources = self.demix(mix.T)
153153
opt=sources[0].T
154-
sf.write("%s/%s_main_vocal.wav"%(vocal_root,basename), mix-opt, rate)
155-
sf.write("%s/%s_others.wav"%(others_root,basename), opt , rate)
154+
sf.write("%s/%s_main_vocal.%s"%(vocal_root,basename,format), mix-opt, rate)
155+
sf.write("%s/%s_others.%s"%(others_root,basename,format), opt , rate)
156156

157157
class MDXNetDereverb():
158158
def __init__(self,chunks):
@@ -167,8 +167,8 @@ def __init__(self,chunks):
167167
self.denoise=True
168168
self.pred=Predictor(self)
169169

170-
def _path_audio_(self,input,vocal_root,others_root):
171-
self.pred.prediction(input,vocal_root,others_root)
170+
def _path_audio_(self,input,vocal_root,others_root,format):
171+
self.pred.prediction(input,vocal_root,others_root,format)
172172

173173
if __name__ == '__main__':
174174
dereverb=MDXNetDereverb(15)

infer-web.py

+25-10
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@
7777
SynthesizerTrnMs768NSFsid,
7878
SynthesizerTrnMs768NSFsid_nono,
7979
)
80-
from scipy.io import wavfile
80+
import soundfile as sf
8181
from fairseq import checkpoint_utils
8282
import gradio as gr
8383
import logging
@@ -235,7 +235,8 @@ def vc_multi(
235235
filter_radius,
236236
resample_sr,
237237
rms_mix_rate,
238-
protect
238+
protect,
239+
format1
239240
):
240241
try:
241242
dir_path = (
@@ -271,8 +272,8 @@ def vc_multi(
271272
if "Success" in info:
272273
try:
273274
tgt_sr, audio_opt = opt
274-
wavfile.write(
275-
"%s/%s" % (opt_root, os.path.basename(path)), tgt_sr, audio_opt
275+
sf.write(
276+
"%s/%s.%s" % (opt_root, os.path.basename(path),format1), audio_opt,tgt_sr
276277
)
277278
except:
278279
info += traceback.format_exc()
@@ -283,7 +284,7 @@ def vc_multi(
283284
yield traceback.format_exc()
284285

285286

286-
def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg):
287+
def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg,format0):
287288
infos = []
288289
try:
289290
inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
@@ -318,7 +319,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg):
318319
and info["streams"][0]["sample_rate"] == "44100"
319320
):
320321
need_reformat = 0
321-
pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal)
322+
pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal,format0)
322323
done = 1
323324
except:
324325
need_reformat = 1
@@ -332,7 +333,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg):
332333
inp_path = tmp_path
333334
try:
334335
if done == 0:
335-
pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal)
336+
pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal,format0)
336337
infos.append("%s->Success" % (os.path.basename(inp_path)))
337338
yield "\n".join(infos)
338339
except:
@@ -1341,6 +1342,12 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
13411342
file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
13421343
)
13431344
with gr.Row():
1345+
format1= gr.Radio(
1346+
label=i18n("导出文件格式"),
1347+
choices=["wav", "flac","mp3","m4a"],
1348+
value="flac",
1349+
interactive=True,
1350+
)
13441351
but1 = gr.Button(i18n("转换"), variant="primary")
13451352
vc_output3 = gr.Textbox(label=i18n("输出信息"))
13461353
but1.click(
@@ -1359,7 +1366,8 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
13591366
filter_radius1,
13601367
resample_sr1,
13611368
rms_mix_rate1,
1362-
protect1
1369+
protect1,
1370+
format1
13631371
],
13641372
[vc_output3],
13651373
)
@@ -1402,9 +1410,15 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
14021410
visible=False, # 先不开放调整
14031411
)
14041412
opt_vocal_root = gr.Textbox(
1405-
label=i18n("指定输出人声文件夹"), value="opt"
1413+
label=i18n("指定输出主人声文件夹"), value="opt"
1414+
)
1415+
opt_ins_root = gr.Textbox(label=i18n("指定输出非主人声文件夹"), value="opt")
1416+
format0= gr.Radio(
1417+
label=i18n("导出文件格式"),
1418+
choices=["wav", "flac","mp3","m4a"],
1419+
value="flac",
1420+
interactive=True,
14061421
)
1407-
opt_ins_root = gr.Textbox(label=i18n("指定输出乐器文件夹"), value="opt")
14081422
but2 = gr.Button(i18n("转换"), variant="primary")
14091423
vc_output4 = gr.Textbox(label=i18n("输出信息"))
14101424
but2.click(
@@ -1416,6 +1430,7 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
14161430
wav_inputs,
14171431
opt_ins_root,
14181432
agg,
1433+
format0
14191434
],
14201435
[vc_output4],
14211436
)

infer_uvr5.py

+15-19
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from uvr5_pack.lib_v5 import spec_utils
1212
from uvr5_pack.utils import _get_name_params, inference
1313
from uvr5_pack.lib_v5.model_param_init import ModelParameters
14-
from scipy.io import wavfile
14+
import soundfile as sf
1515
from uvr5_pack.lib_v5.nets_new import CascadedNet
1616
from uvr5_pack.lib_v5 import nets_61968KB as nets
1717

@@ -41,7 +41,7 @@ def __init__(self, agg, model_path, device, is_half):
4141
self.mp = mp
4242
self.model = model
4343

44-
def _path_audio_(self, music_file, ins_root=None, vocal_root=None):
44+
def _path_audio_(self, music_file, ins_root=None, vocal_root=None,format="flac"):
4545
if ins_root is None and vocal_root is None:
4646
return "No save root."
4747
name = os.path.basename(music_file)
@@ -120,12 +120,11 @@ def _path_audio_(self, music_file, ins_root=None, vocal_root=None):
120120
else:
121121
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
122122
print("%s instruments done" % name)
123-
wavfile.write(
123+
sf.write(
124124
os.path.join(
125-
ins_root, "instrument_{}_{}.wav".format(name, self.data["agg"])
125+
ins_root, "instrument_{}_{}.{}".format(name, self.data["agg"],format)
126126
),
127-
self.mp.param["sr"],
128-
(np.array(wav_instrument) * 32768).astype("int16"),
127+
(np.array(wav_instrument) * 32768).astype("int16"), self.mp.param["sr"],
129128
) #
130129
if vocal_root is not None:
131130
if self.data["high_end_process"].startswith("mirroring"):
@@ -138,12 +137,11 @@ def _path_audio_(self, music_file, ins_root=None, vocal_root=None):
138137
else:
139138
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp)
140139
print("%s vocals done" % name)
141-
wavfile.write(
140+
sf.write(
142141
os.path.join(
143-
vocal_root, "vocal_{}_{}.wav".format(name, self.data["agg"])
142+
vocal_root, "vocal_{}_{}.{}".format(name, self.data["agg"],format)
144143
),
145-
self.mp.param["sr"],
146-
(np.array(wav_vocals) * 32768).astype("int16"),
144+
(np.array(wav_vocals) * 32768).astype("int16"), self.mp.param["sr"],
147145
)
148146

149147
class _audio_pre_new:
@@ -173,7 +171,7 @@ def __init__(self, agg, model_path, device, is_half):
173171
self.mp = mp
174172
self.model = model
175173

176-
def _path_audio_(self, music_file, vocal_root=None, ins_root=None):#3个VR模型vocal和ins是反的
174+
def _path_audio_(self, music_file, vocal_root=None, ins_root=None,format="flac"):#3个VR模型vocal和ins是反的
177175
if ins_root is None and vocal_root is None:
178176
return "No save root."
179177
name = os.path.basename(music_file)
@@ -252,12 +250,11 @@ def _path_audio_(self, music_file, vocal_root=None, ins_root=None):#3个VR模型
252250
else:
253251
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
254252
print("%s instruments done" % name)
255-
wavfile.write(
253+
sf.write(
256254
os.path.join(
257-
ins_root, "main_vocal_{}_{}.wav".format(name, self.data["agg"])
255+
ins_root, "main_vocal_{}_{}.{}".format(name, self.data["agg"],format)
258256
),
259-
self.mp.param["sr"],
260-
(np.array(wav_instrument) * 32768).astype("int16"),
257+
(np.array(wav_instrument) * 32768).astype("int16"),self.mp.param["sr"],
261258
) #
262259
if vocal_root is not None:
263260
if self.data["high_end_process"].startswith("mirroring"):
@@ -270,12 +267,11 @@ def _path_audio_(self, music_file, vocal_root=None, ins_root=None):#3个VR模型
270267
else:
271268
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp)
272269
print("%s vocals done" % name)
273-
wavfile.write(
270+
sf.write(
274271
os.path.join(
275-
vocal_root, "others_{}_{}.wav".format(name, self.data["agg"])
272+
vocal_root, "others_{}_{}.{}".format(name, self.data["agg"],format)
276273
),
277-
self.mp.param["sr"],
278-
(np.array(wav_vocals) * 32768).astype("int16"),
274+
(np.array(wav_vocals) * 32768).astype("int16"),self.mp.param["sr"],
279275
)
280276

281277

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,4 @@ colorama>=0.4.5
4040
pyworld>=0.3.2
4141
httpx==0.23.0
4242
onnxruntime-gpu
43-
torchcrepe
43+
torchcrepe

0 commit comments

Comments
 (0)