Add files via upload

RVC-Boss · web-flow · commit 7bd25c4623a1 · 2023-05-28T23:40:54.000+08:00
diff --git a/MDXNet.py b/MDXNet.py
@@ -141,7 +141,7 @@ def demix_base(self, mixes, margin_size):
         # del self.model
         progress_bar.close()
         return _sources
-    def prediction(self, m,vocal_root,others_root):
+    def prediction(self, m,vocal_root,others_root,format):
         os.makedirs(vocal_root,exist_ok=True)
         os.makedirs(others_root,exist_ok=True)
         basename = os.path.basename(m)
@@ -151,8 +151,8 @@ def prediction(self, m,vocal_root,others_root):
         mix = mix.T
         sources = self.demix(mix.T)
         opt=sources[0].T
-        sf.write("%s/%s_main_vocal.wav"%(vocal_root,basename), mix-opt, rate)
-        sf.write("%s/%s_others.wav"%(others_root,basename), opt , rate)
+        sf.write("%s/%s_main_vocal.%s"%(vocal_root,basename,format), mix-opt, rate)
+        sf.write("%s/%s_others.%s"%(others_root,basename,format), opt , rate)
 
 class MDXNetDereverb():
     def __init__(self,chunks):
@@ -167,8 +167,8 @@ def __init__(self,chunks):
         self.denoise=True
         self.pred=Predictor(self)
 
-    def _path_audio_(self,input,vocal_root,others_root):
-        self.pred.prediction(input,vocal_root,others_root)
+    def _path_audio_(self,input,vocal_root,others_root,format):
+        self.pred.prediction(input,vocal_root,others_root,format)
 
 if __name__ == '__main__':
     dereverb=MDXNetDereverb(15)
diff --git a/infer-web.py b/infer-web.py
@@ -77,7 +77,7 @@
     SynthesizerTrnMs768NSFsid,
     SynthesizerTrnMs768NSFsid_nono,
 )
-from scipy.io import wavfile
+import soundfile as sf
 from fairseq import checkpoint_utils
 import gradio as gr
 import logging
@@ -235,7 +235,8 @@ def vc_multi(
     filter_radius,
     resample_sr,
     rms_mix_rate,
-    protect
+    protect,
+    format1
 ):
     try:
         dir_path = (
@@ -271,8 +272,8 @@ def vc_multi(
             if "Success" in info:
                 try:
                     tgt_sr, audio_opt = opt
-                    wavfile.write(
-                        "%s/%s" % (opt_root, os.path.basename(path)), tgt_sr, audio_opt
+                    sf.write(
+                        "%s/%s.%s" % (opt_root, os.path.basename(path),format1), audio_opt,tgt_sr
                     )
                 except:
                     info += traceback.format_exc()
@@ -283,7 +284,7 @@ def vc_multi(
         yield traceback.format_exc()
 
 
-def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg):
+def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg,format0):
     infos = []
     try:
         inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
@@ -318,7 +319,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg):
                     and info["streams"][0]["sample_rate"] == "44100"
                 ):
                     need_reformat = 0
-                    pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal)
+                    pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal,format0)
                     done = 1
             except:
                 need_reformat = 1
@@ -332,7 +333,7 @@ def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg):
                 inp_path = tmp_path
             try:
                 if done == 0:
-                    pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal)
+                    pre_fun._path_audio_(inp_path, save_root_ins, save_root_vocal,format0)
                 infos.append("%s->Success" % (os.path.basename(inp_path)))
                 yield "\n".join(infos)
             except:
@@ -1341,6 +1342,12 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
                             file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
                         )
                     with gr.Row():
+                        format1= gr.Radio(
+                            label=i18n("导出文件格式"),
+                            choices=["wav", "flac","mp3","m4a"],
+                            value="flac",
+                            interactive=True,
+                        )
                         but1 = gr.Button(i18n("转换"), variant="primary")
                         vc_output3 = gr.Textbox(label=i18n("输出信息"))
                     but1.click(
@@ -1359,7 +1366,8 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
                             filter_radius1,
                             resample_sr1,
                             rms_mix_rate1,
-                            protect1
+                            protect1,
+                            format1
                         ],
                         [vc_output3],
                     )
@@ -1402,9 +1410,15 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
                             visible=False,  # 先不开放调整
                         )
                         opt_vocal_root = gr.Textbox(
-                            label=i18n("指定输出人声文件夹"), value="opt"
+                            label=i18n("指定输出主人声文件夹"), value="opt"
+                        )
+                        opt_ins_root = gr.Textbox(label=i18n("指定输出非主人声文件夹"), value="opt")
+                        format0= gr.Radio(
+                            label=i18n("导出文件格式"),
+                            choices=["wav", "flac","mp3","m4a"],
+                            value="flac",
+                            interactive=True,
                         )
-                        opt_ins_root = gr.Textbox(label=i18n("指定输出乐器文件夹"), value="opt")
                     but2 = gr.Button(i18n("转换"), variant="primary")
                     vc_output4 = gr.Textbox(label=i18n("输出信息"))
                     but2.click(
@@ -1416,6 +1430,7 @@ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
                             wav_inputs,
                             opt_ins_root,
                             agg,
+                            format0
                         ],
                         [vc_output4],
                     )
diff --git a/infer_uvr5.py b/infer_uvr5.py
@@ -11,7 +11,7 @@
 from uvr5_pack.lib_v5 import spec_utils
 from uvr5_pack.utils import _get_name_params, inference
 from uvr5_pack.lib_v5.model_param_init import ModelParameters
-from scipy.io import wavfile
+import soundfile as sf
 from uvr5_pack.lib_v5.nets_new import CascadedNet
 from uvr5_pack.lib_v5 import nets_61968KB as nets
 
@@ -41,7 +41,7 @@ def __init__(self, agg, model_path, device, is_half):
         self.mp = mp
         self.model = model
 
-    def _path_audio_(self, music_file, ins_root=None, vocal_root=None):
+    def _path_audio_(self, music_file, ins_root=None, vocal_root=None,format="flac"):
         if ins_root is None and vocal_root is None:
             return "No save root."
         name = os.path.basename(music_file)
@@ -120,12 +120,11 @@ def _path_audio_(self, music_file, ins_root=None, vocal_root=None):
             else:
                 wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
             print("%s instruments done" % name)
-            wavfile.write(
+            sf.write(
                 os.path.join(
-                    ins_root, "instrument_{}_{}.wav".format(name, self.data["agg"])
+                    ins_root, "instrument_{}_{}.{}".format(name, self.data["agg"],format)
                 ),
-                self.mp.param["sr"],
-                (np.array(wav_instrument) * 32768).astype("int16"),
+                (np.array(wav_instrument) * 32768).astype("int16"),                self.mp.param["sr"],
             )  #
         if vocal_root is not None:
             if self.data["high_end_process"].startswith("mirroring"):
@@ -138,12 +137,11 @@ def _path_audio_(self, music_file, ins_root=None, vocal_root=None):
             else:
                 wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp)
             print("%s vocals done" % name)
-            wavfile.write(
+            sf.write(
                 os.path.join(
-                    vocal_root, "vocal_{}_{}.wav".format(name, self.data["agg"])
+                    vocal_root, "vocal_{}_{}.{}".format(name, self.data["agg"],format)
                 ),
-                self.mp.param["sr"],
-                (np.array(wav_vocals) * 32768).astype("int16"),
+                (np.array(wav_vocals) * 32768).astype("int16"),                self.mp.param["sr"],
             )
 
 class _audio_pre_new:
@@ -173,7 +171,7 @@ def __init__(self, agg, model_path, device, is_half):
         self.mp = mp
         self.model = model
 
-    def _path_audio_(self, music_file, vocal_root=None, ins_root=None):#3个VR模型vocal和ins是反的
+    def _path_audio_(self, music_file, vocal_root=None, ins_root=None,format="flac"):#3个VR模型vocal和ins是反的
         if ins_root is None and vocal_root is None:
             return "No save root."
         name = os.path.basename(music_file)
@@ -252,12 +250,11 @@ def _path_audio_(self, music_file, vocal_root=None, ins_root=None):#3个VR模型
             else:
                 wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
             print("%s instruments done" % name)
-            wavfile.write(
+            sf.write(
                 os.path.join(
-                    ins_root, "main_vocal_{}_{}.wav".format(name, self.data["agg"])
+                    ins_root, "main_vocal_{}_{}.{}".format(name, self.data["agg"],format)
                 ),
-                self.mp.param["sr"],
-                (np.array(wav_instrument) * 32768).astype("int16"),
+                (np.array(wav_instrument) * 32768).astype("int16"),self.mp.param["sr"],
             )  #
         if vocal_root is not None:
             if self.data["high_end_process"].startswith("mirroring"):
@@ -270,12 +267,11 @@ def _path_audio_(self, music_file, vocal_root=None, ins_root=None):#3个VR模型
             else:
                 wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp)
             print("%s vocals done" % name)
-            wavfile.write(
+            sf.write(
                 os.path.join(
-                    vocal_root, "others_{}_{}.wav".format(name, self.data["agg"])
+                    vocal_root, "others_{}_{}.{}".format(name, self.data["agg"],format)
                 ),
-                self.mp.param["sr"],
-                (np.array(wav_vocals) * 32768).astype("int16"),
+                (np.array(wav_vocals) * 32768).astype("int16"),self.mp.param["sr"],
             )
 
 
diff --git a/requirements.txt b/requirements.txt
@@ -40,4 +40,4 @@ colorama>=0.4.5
 pyworld>=0.3.2
 httpx==0.23.0
 onnxruntime-gpu
-torchcrepe
+torchcrepe