diff --git a/AIMeiSheng/meisheng_svc_final.py b/AIMeiSheng/meisheng_svc_final.py index 89c57f5..6cddfdc 100644 --- a/AIMeiSheng/meisheng_svc_final.py +++ b/AIMeiSheng/meisheng_svc_final.py @@ -1,227 +1,242 @@ import os import sys sys.path.append(os.path.dirname(__file__)) import time import shutil import glob import hashlib import librosa import soundfile import gradio as gr import pandas as pd import numpy as np from AIMeiSheng.RawNet3.infererence_fang_meisheng import get_embed, get_embed_model from myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main, load_hubert, get_vc, get_rmvpe from gender_classify import load_gender_model from AIMeiSheng.docker_demo.common import gs_svc_model_path, gs_embed_model_path, gs_rmvpe_model_path, gs_err_code_target_silence from slicex.slice_set_silence import del_noise gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" ##混音执行文件 tmp_workspace_name = "batch_test_ocean_fi" # 工作空间名 song_folder = "./data_meisheng/" ##song folder gs_work_dir = f"./data_meisheng/{tmp_workspace_name}" # 工作空间路径 pth_model_path = "./weights/xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth" ##模型文件 cur_dir = os.path.abspath(os.path.dirname(__file__)) abs_path = os.path.join(cur_dir, song_folder, tmp_workspace_name) + '/' f0_method = None def mix(in_path, acc_path, dst_path): # svc转码到442 svc_442_file = in_path + "_442.wav" st = time.time() cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file) os.system(cmd) if not os.path.exists(svc_442_file): return -1 print("transcode,{},sp={}".format(in_path, time.time() - st)) # 混合 st = time.time() cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path) os.system(cmd) print("mixer,{},sp={}".format(in_path, time.time() - st)) def load_model(): global f0_method embed_model = get_embed_model(gs_embed_model_path) hubert_model = load_hubert() get_vc(gs_svc_model_path) f0_method = get_rmvpe(gs_rmvpe_model_path) print("model preload finish!!!") return embed_model, hubert_model # ,svc_model def meisheng_init(): embed_model, hubert_model = load_model() ##提前加载模型 gender_model = load_gender_model() return embed_model, hubert_model, gender_model def pyin_process_single_rmvpe(input_file): global f0_method if f0_method is None: f0_method = get_rmvpe() rate = 16000 # 44100 # 读取音频文件 y, sr = librosa.load(input_file, sr=rate) len_s = len(y) / sr lim_s = 15 # 10 + f0_limit_10ms = 10 if (len_s > lim_s): y1 = y[:sr * lim_s] y2 = y[-sr * lim_s:] f0 = f0_method.infer_from_audio(y1, thred=0.03) f0 = f0[f0 < 600] valid_f0 = f0[f0 > 50] - mean_pitch1 = np.mean(valid_f0) + if len(valid_f0) > f0_limit_10ms: + mean_pitch1 = np.mean(valid_f0) + else: + mean_pitch1 = 0 f0 = f0_method.infer_from_audio(y2, thred=0.03) f0 = f0[f0 < 600] valid_f0 = f0[f0 > 50] - mean_pitch2 = np.mean(valid_f0) + if len(valid_f0) > f0_limit_10ms: + mean_pitch2 = np.mean(valid_f0) + else: + mean_pitch2 = 0 - if abs(mean_pitch1 - mean_pitch2) > 55: + if mean_pitch2 == 0 and mean_pitch1 == 0: + mean_pitch_cur = 0 + elif mean_pitch2 == 0 or mean_pitch1 == 0: + mean_pitch_cur = max(mean_pitch1, mean_pitch2) + elif abs(mean_pitch1 - mean_pitch2) > 55: mean_pitch_cur = min(mean_pitch1, mean_pitch2) else: mean_pitch_cur = (mean_pitch1 + mean_pitch2) / 2 + else: f0 = f0_method.infer_from_audio(y, thred=0.03) f0 = f0[f0 < 600] valid_f0 = f0[f0 > 50] - mean_pitch_cur = np.mean(valid_f0) + if len(valid_f0) > f0_limit_10ms: + mean_pitch_cur = np.mean(valid_f0) + else: + mean_pitch_cur = 0 return mean_pitch_cur def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras): ##计算pitch f0up_key = pyin_process_single_rmvpe(target_wav) if f0up_key < 40 or np.isnan(f0up_key):#unvoice return gs_err_code_target_silence ## get embed, 音色 get_embed(target_wav, embed_npy, embed_md) print("svc main start...") svc_main(song_wav, svc_out_path, embed_npy, f0up_key, hubert_md, paras) print("svc main finished!!") del_noise(song_wav,svc_out_path,paras) print("del noise in silence") return 0 def process_svc_online(song_wav, target_wav, svc_out_path, embed_md, hubert_md, paras): embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置 err_code = meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras) return err_code def process_svc(song_wav, target_wav, svc_out_path, embed_md, hubert_md, paras): song_wav1, target_wav, svc_out_path = os.path.basename(song_wav), os.path.basename( target_wav), os.path.basename(svc_out_path) # 绝对路径 song_wav, target_wav, svc_out_path = song_wav, abs_path + target_wav, abs_path + svc_out_path embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置 # similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras) similar = meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras) return similar def get_svc(target_yinse_wav, song_name, embed_model, hubert_model, paras): ''' :param target_yinse_wav: 目标音色 :param song_name: 歌曲名字 ;param paras: 其他参数 :return: svc路径名 ''' ##清空工作空间临时路径 if os.path.exists(gs_work_dir): # shutil.rmtree(gs_work_dir) cmd = f"rm -rf {gs_work_dir}/*" os.system(cmd) else: os.makedirs(gs_work_dir) gender = paras['gender'] ##为了确定歌曲 ##目标音色读取 f_dst = os.path.join(gs_work_dir, os.path.basename(target_yinse_wav)) # print("dir :", f_dst,"target_yinse_wav:",target_yinse_wav) # shutil.move(target_yinse_wav, f_dst) ##放在工作目录 shutil.copy(target_yinse_wav, f_dst) target_yinse_wav = f_dst ##歌曲/伴奏 读取(路径需要修改) song_wav = os.path.join("{}{}/{}/vocal321.wav".format(song_folder, gender, song_name)) # 歌曲vocal inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder, gender, song_name)) # song_wav = './xusong_long.wav' svc_out_path = os.path.join(gs_work_dir, "svc.wav") ###svc结果名字 print("inputMsg:", song_wav, target_yinse_wav, svc_out_path) ## svc process st = time.time() print("start inference...") similar = process_svc(song_wav, target_yinse_wav, svc_out_path, embed_model, hubert_model, paras) print("svc finished!!") print("time cost = {}".format(time.time() - st)) print("out path name {} ".format(svc_out_path)) # ''' ##加混响 print("add reverbration...") svc_out_path_effect = svc_out_path[:-4] + '_effect.wav' cmd = f"/data/gpu_env_common/bin/effect_tool {svc_out_path} {svc_out_path_effect}" print("cmd :", cmd) os.system(cmd) # # 人声伴奏合并 print("add acc...") out_path = svc_out_path_effect[:-4] + '_music.wav' mix(svc_out_path_effect, inf_acc_path, out_path) print("time cost = {}".format(time.time() - st)) print("out path name {} ".format(out_path)) # ''' return svc_out_path def meisheng_func(target_yinse_wav, song_name, paras): ##init embed_model, hubert_model, gender_model = meisheng_init() ###gender predict gender, female_rate, is_pure = gender_model.process(target_yinse_wav) print('=====================') print("gender:{}, female_rate:{},is_pure:{}".format(gender, female_rate, is_pure)) if gender == 0: gender = 'female' elif gender == 1: gender = 'male' elif female_rate > 0.5: gender = 'female' else: gender = 'male' print("modified gender:{} ".format(gender)) print('=====================') ##美声main paras['gender'] = gender ##单位都是ms get_svc(target_yinse_wav, song_name, embed_model, hubert_model, paras) if __name__ == '__main__': # target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" # 需要完整路径 target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a" song_name = "lost_stars" ##歌曲名字 paras = {'gender': None, 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None} # paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0} ###片段svc测试 meisheng_func(target_yinse_wav, song_name, paras)