diff --git a/AIMeiSheng/docker_demo/svc_online.py b/AIMeiSheng/docker_demo/svc_online.py index f952346..b196832 100644 --- a/AIMeiSheng/docker_demo/svc_online.py +++ b/AIMeiSheng/docker_demo/svc_online.py @@ -1,162 +1,170 @@ # -*- coding: UTF-8 -*- """ SVC的核心处理逻辑 """ import os import shutil import hashlib import time -from AIMeiSheng.meisheng_svc_final import get_svc, process_svc +from AIMeiSheng.meisheng_svc_final import load_model, process_svc_online +from AIMeiSheng.meisheng_env_preparex import meisheng_env_prepare from AIMeiSheng.voice_classification.online.voice_class_online_fang import VoiceClass from AIMeiSheng.RawNet3.infererence_fang_meisheng import get_embed, get_embed_model from AIMeiSheng.myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main, load_hubert, get_vc, get_rmvpe from AIMeiSheng.docker_demo.common import * gs_resource_cache_dir = "/tmp/gs_svc_resource_cache" gs_tmp_dir = "/tmp/gs_svc_tmp" gs_model_dir = "/tmp/models" if os.path.exists(gs_tmp_dir): shutil.rmtree(gs_tmp_dir) os.makedirs(gs_model_dir, exist_ok=True) # 预设参数 gs_gender_models_url = "https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/hub/voice_classification/models.zip" gs_svc_emb_url = "" gs_svc_model_url = "" gs_volume_bin_url = "https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/dataset/AIMeiSheng/ebur128_tool" class GSWorkerAttr: def __init__(self, input_data): vocal_url = input_data["vocal_url"] female_svc_source_url = input_data["female_svc_url"] male_svc_source_url = input_data["male_svc_url"] st_tm = input_data["st_tm"] # 单位是s ed_tm = input_data["ed_tm"] # 单位是s - self.distinct_id = hashlib.md5(vocal_url.encode()).hexdigest() + self.distinct_id = hashlib.md5(vocal_url.encode()).hexdigest()#对url进行哈希(通过哈希值建立索引,提高数据库的响应速度) self.vocal_url = vocal_url self.target_url = input_data["target_url"] ext = vocal_url.split(".")[-1] self.vocal_path = os.path.join(gs_tmp_dir, self.distinct_id + f"_in.{ext}") self.target_wav_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out.wav") self.target_wav_ad_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out_ad.wav") self.target_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out.m4a") self.female_svc_source_url = female_svc_source_url self.male_svc_source_url = male_svc_source_url ext = female_svc_source_url.split(".")[-1] self.female_svc_source_path = hashlib.md5(female_svc_source_url.encode()).hexdigest() + "." + ext ext = male_svc_source_url.split(".")[-1] self.male_svc_source_path = hashlib.md5(male_svc_source_url.encode()).hexdigest() + "." + ext self.st_tm = st_tm self.ed_tm = ed_tm self.target_loudness = input_data["target_loudness"] self.tmp_dir = os.path.join(gs_tmp_dir, self.distinct_id) if os.path.exists(self.tmp_dir): shutil.rmtree(self.tmp_dir) os.makedirs(self.tmp_dir) def __del__(self): if os.path.exists(self.tmp_dir): shutil.rmtree(self.tmp_dir) def init_gender_model(): """ 下载模型 :return: """ dst_model_dir = os.path.join(gs_model_dir, "voice_classification") if not os.path.exists(dst_model_dir): dst_zip_path = os.path.join(gs_model_dir, "models.zip") if not download2disk(gs_gender_models_url, dst_zip_path): logging.fatal(f"download gender_model err={gs_gender_models_url}") cmd = f"cd {gs_model_dir}; unzip {dst_zip_path}; mv models voice_classification; rm -f {dst_zip_path}" os.system(cmd) if not os.path.exists(dst_model_dir): logging.fatal(f"unzip {dst_zip_path} err") music_voice_pure_model = os.path.join(dst_model_dir, "voice_005_rec_v5.pth") music_voice_no_pure_model = os.path.join(dst_model_dir, "voice_10_v5.pth") gender_pure_model = os.path.join(dst_model_dir, "gender_8k_ratev5_v6_adam.pth") gender_no_pure_model = os.path.join(dst_model_dir, "gender_8k_v6_adam.pth") vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model) return vc -def init_svc_model(): - emb_model_path = os.path.join(gs_model_dir, "RawNet3_weights.pt") - if not os.path.exists(emb_model_path): - if not download2disk(gs_svc_emb_url, emb_model_path): - logging.fatal(f"download svc_emb_model err={gs_svc_emb_url}") - embed_model = get_embed_model(emb_model_path) - hubert_model = load_hubert() +# def init_svc_model(): +# emb_model_path = os.path.join(gs_model_dir, "RawNet3_weights.pt") +# if not os.path.exists(emb_model_path): +# if not download2disk(gs_svc_emb_url, emb_model_path): +# logging.fatal(f"download svc_emb_model err={gs_svc_emb_url}") +# embed_model = get_embed_model(emb_model_path) +# hubert_model = load_hubert() +# +# svc_filename = gs_svc_model_url.split("/")[-1] +# svc_model_path = os.path.join(gs_model_dir, svc_filename) +# if not os.path.exists(svc_model_path): +# if not download2disk(gs_svc_model_url, svc_model_path): +# logging.fatal(f"download svc_model err={gs_svc_model_url}") +# +# # 此处内部会生成全局模型 +# get_vc(svc_model_path) +# return embed_model, hubert_model - svc_filename = gs_svc_model_url.split("/")[-1] - svc_model_path = os.path.join(gs_model_dir, svc_filename) - if not os.path.exists(svc_model_path): - if not download2disk(gs_svc_model_url, svc_model_path): - logging.fatal(f"download svc_model err={gs_svc_model_url}") - # 此处内部会生成全局模型 - get_vc(svc_model_path) - return embed_model, hubert_model +def init_svc_model(): + meisheng_env_prepare(logging) + embed_model, hubert_model = load_model() + return embed_model, hubert_model def volume_adjustment(wav_path, target_loudness, out_path): """ 音量调整 :param wav_path: :param target_loudness: :param out_path: :return: """ volume_bin_path = os.path.join(gs_model_dir, "ebur128_tool") if not os.path.exists(volume_bin_path): if not download2disk(gs_volume_bin_url, volume_bin_path): logging.fatal(f"download volume_bin err={gs_volume_bin_url}") cmd = f"{volume_bin_path} {wav_path} {target_loudness} {out_path}" os.system(cmd) class SVCOnline: def __init__(self): st = time.time() self.gender_model = init_gender_model() self.embed_model, self.hubert_model = init_svc_model() logging.info(f"svc init finished, sp = {time.time() - st}") def gender_process(self, worker_attr): st = time.time() gender, female_rate, is_pure = self.gender_model.process(worker_attr.vocal_path) logging.info( f"{worker_attr.vocal_url}, gender={gender}, female_rate={female_rate}, is_pure={is_pure}, " f"gender_process sp = {time.time() - st}") if gender == 0: gender = 'female' elif gender == 1: gender = 'male' elif female_rate > 0.5: gender = 'female' else: gender = 'male' logging.info(f"{worker_attr.vocal_url}, modified gender={gender}") return gender def process(self, worker_attr): gender = self.gender_process(worker_attr) song_path = worker_attr.female_svc_source_path if gender == "male": song_path = worker_attr.male_svc_source_path params = {'gender': gender, 'tst': worker_attr.st_ms, "tnd": worker_attr.ed_tm, 'delay': 0, 'song_path': None} st = time.time() - similar = process_svc(song_path, worker_attr.vocal_path, worker_attr.target_wav_path, params) + similar = process_svc_online(song_path, worker_attr.vocal_path, worker_attr.target_wav_path, self.embed_model, + self.hubert_model, params) logging.info(f"{worker_attr.vocal_url}, similar={similar} process svc sp = {time.time() - st}") diff --git a/AIMeiSheng/meisheng_env_preparex.py b/AIMeiSheng/meisheng_env_preparex.py new file mode 100644 index 0000000..bf6aa2e --- /dev/null +++ b/AIMeiSheng/meisheng_env_preparex.py @@ -0,0 +1,38 @@ +import os +from AIMeiSheng.docker_demo.common import * + +def meisheng_env_prepare(logging,AIMeiSheng_Path='./'): + + cos_path = "https://av-audit-sync-sg-1256122840.cos.ap-singapore.myqcloud.com/dataset/AIMeiSheng/" + + + rmvpe_model_path = os.path.join(AIMeiSheng_Path, 'rmvpe.pt') + gs_rmvpe_model_url = cos_path + "rmvpe.pt" + if not os.path.exists(rmvpe_model_path): + if not download2disk(gs_rmvpe_model_url, rmvpe_model_path): + logging.fatal(f"download rmvpe_model err={gs_rmvpe_model_url}") + + hubert_model_path = os.path.join(AIMeiSheng_Path, 'hubert_base.pt') + gs_hubert_model_url = cos_path + "hubert_base.pt" + if not os.path.exists(hubert_model_path): + if not download2disk(gs_hubert_model_url, hubert_model_path): + logging.fatal(f"download hubert_model err={gs_hubert_model_url}") + + model_svc = "xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth" + svc_model_path = os.path.join(AIMeiSheng_Path, f'weights/{model_svc}') + gs_svc_model_url = cos_path + model_svc + if not os.path.exists(svc_model_path): + if not download2disk(gs_svc_model_url, svc_model_path): + logging.fatal(f"download svc_model err={gs_svc_model_url}") + + + model_embed = "model.pt" + embed_model_path = os.path.join(AIMeiSheng_Path, f'RawNet3/models/weights/{model_embed}') + gs_embed_model_url = cos_path + model_embed + if not os.path.exists(embed_model_path): + if not download2disk(gs_embed_model_url, embed_model_path): + logging.fatal(f"download embed_model err={gs_embed_model_url}") + + +if __name__ == "__main__": + meisheng_env_prepare() diff --git a/AIMeiSheng/meisheng_svc_final.py b/AIMeiSheng/meisheng_svc_final.py index e5a6b3f..1ecaaf7 100644 --- a/AIMeiSheng/meisheng_svc_final.py +++ b/AIMeiSheng/meisheng_svc_final.py @@ -1,215 +1,227 @@ import os,sys import time import shutil import glob import hashlib import librosa import soundfile import gradio as gr import pandas as pd import numpy as np sys.path.append('./RawNet3/') from infererence_fang_meisheng import get_embed, get_embed_model from myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main,load_hubert, get_vc, get_rmvpe from gender_classify import load_gender_model + gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" ##混音执行文件 tmp_workspace_name = "batch_test_ocean_fi"#工作空间名 song_folder = "./data_meisheng/" ##song folder gs_work_dir = f"./data_meisheng/{tmp_workspace_name}" #工作空间路径 pth_model_path = "./weights/xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth" ##模型文件 cur_dir = os.path.abspath(os.path.dirname(__file__)) abs_path = os.path.join(cur_dir,song_folder,tmp_workspace_name) + '/' f0_method = None def mix(in_path, acc_path, dst_path): # svc转码到442 svc_442_file = in_path + "_442.wav" st = time.time() cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file) os.system(cmd) if not os.path.exists(svc_442_file): return -1 print("transcode,{},sp={}".format(in_path, time.time() - st)) # 混合 st = time.time() cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path) os.system(cmd) print("mixer,{},sp={}".format(in_path, time.time() - st)) def load_model(): global f0_method embed_model = get_embed_model() hubert_model = load_hubert() get_vc(pth_model_path) f0_method = get_rmvpe() print("model preload finish!!!") return embed_model, hubert_model#,svc_model -embed_model, hubert_model = load_model() ##提前加载模型 -gender_model = load_gender_model() +def meisheng_init(): + embed_model, hubert_model = load_model() ##提前加载模型 + gender_model = load_gender_model() + return embed_model, hubert_model, gender_model def pyin_process_single_rmvpe(input_file): global f0_method if f0_method is None: f0_method = get_rmvpe() rate = 16000 #44100 # 读取音频文件 y, sr = librosa.load(input_file, sr=rate) len_s = len(y)/sr lim_s = 15 #10 if(len_s > lim_s): y1 = y[:sr*lim_s] y2 = y[-sr*lim_s:] f0 = f0_method.infer_from_audio(y1, thred=0.03) f0 = f0[f0 < 600] valid_f0 = f0[f0 > 50] mean_pitch1 = np.mean(valid_f0) f0 = f0_method.infer_from_audio(y2, thred=0.03) f0 = f0[f0 < 600] valid_f0 = f0[f0 > 50] mean_pitch2 = np.mean(valid_f0) if abs(mean_pitch1 - mean_pitch2) > 55: mean_pitch_cur = min(mean_pitch1, mean_pitch2) else: mean_pitch_cur = (mean_pitch1 + mean_pitch2) / 2 else: f0 = f0_method.infer_from_audio(y, thred=0.03) f0 = f0[f0 < 600] valid_f0 = f0[f0 > 50] mean_pitch_cur = np.mean(valid_f0) return mean_pitch_cur -def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, paras): +def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras): ##计算pitch f0up_key = pyin_process_single_rmvpe(target_wav) ## get embed - get_embed(target_wav, embed_npy, embed_model) - - print("svc main start...") - svc_main(song_wav,svc_out_path,pth_model_path,embed_npy,f0up_key,hubert_model,paras) + get_embed(target_wav, embed_npy, embed_md) + + print("svc main start...") + svc_main(song_wav, svc_out_path, pth_model_path, embed_npy, f0up_key, hubert_md, paras) print("svc main finished!!") return 0 -def process_svc(song_wav, target_wav, svc_out_path,paras): + +def process_svc_online(song_wav, target_wav, svc_out_path, embed_md, hubert_md, paras): + + embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置 + similar = meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras) + + return similar + +def process_svc(song_wav, target_wav, svc_out_path, embed_md, hubert_md, paras): song_wav1, target_wav, svc_out_path = os.path.basename(song_wav), os.path.basename( target_wav), os.path.basename(svc_out_path) #绝对路径 song_wav, target_wav, svc_out_path = song_wav, abs_path + target_wav, abs_path + svc_out_path embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置 - similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras) - + # similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras) + similar = meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras) return similar -def get_svc(target_yinse_wav, song_name, paras): +def get_svc(target_yinse_wav, song_name, embed_model, hubert_model, paras): ''' :param target_yinse_wav: 目标音色 :param song_name: 歌曲名字 ;param paras: 其他参数 :return: svc路径名 ''' ##清空工作空间临时路径 if os.path.exists(gs_work_dir): #shutil.rmtree(gs_work_dir) cmd = f"rm -rf {gs_work_dir}/*" os.system(cmd) else: os.makedirs(gs_work_dir) gender = paras['gender']##为了确定歌曲 ##目标音色读取 f_dst = os.path.join(gs_work_dir, os.path.basename(target_yinse_wav)) #print("dir :", f_dst,"target_yinse_wav:",target_yinse_wav) #shutil.move(target_yinse_wav, f_dst) ##放在工作目录 shutil.copy(target_yinse_wav, f_dst) target_yinse_wav = f_dst ##歌曲/伴奏 读取(路径需要修改) song_wav = os.path.join("{}{}/{}/vocal321.wav".format(song_folder, gender, song_name)) # 歌曲vocal inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder, gender, song_name)) #song_wav = './xusong_long.wav' svc_out_path = os.path.join(gs_work_dir, "svc.wav") ###svc结果名字 print("inputMsg:", song_wav, target_yinse_wav, svc_out_path) ## svc process st = time.time() print("start inference...") - similar = process_svc(song_wav, target_yinse_wav, svc_out_path,paras) + similar = process_svc(song_wav, target_yinse_wav, svc_out_path, embed_model, hubert_model,paras) print("svc finished!!") print("time cost = {}".format(time.time() - st)) print("out path name {} ".format(svc_out_path)) #''' ##加混响 print("add reverbration...") svc_out_path_effect = svc_out_path[:-4] + '_effect.wav' cmd = f"/data/gpu_env_common/bin/effect_tool {svc_out_path} {svc_out_path_effect}" print("cmd :", cmd) os.system(cmd) # # 人声伴奏合并 print("add acc...") out_path = svc_out_path_effect[:-4] + '_music.wav' mix(svc_out_path_effect, inf_acc_path, out_path) print("time cost = {}".format(time.time() - st)) print("out path name {} ".format(out_path)) #''' return svc_out_path -if __name__=='__main__': +def meisheng_func(target_yinse_wav,song_name, paras): + + ##init + embed_model, hubert_model, gender_model = meisheng_init() ###gender predict - target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a" gender, female_rate, is_pure = gender_model.process(target_yinse_wav) print('=====================') - print("gender:{}, female_rate:{},is_pure:{}".format(gender,female_rate,is_pure)) + print("gender:{}, female_rate:{},is_pure:{}".format(gender, female_rate, is_pure)) if gender == 0: gender = 'female' elif gender == 1: gender = 'male' elif female_rate > 0.5: gender = 'female' else: gender = 'male' print("modified gender:{} ".format(gender)) print('=====================') - ###接口函数 - ''' - target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" #需要完整路径 - song_name = "drivers_license" #"Levitating" ##路径会自动添加(要更改) - paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None} ##单位都是ms - #paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0} ###片段svc测试 - #''' + ##美声main + paras['gender'] = gender ##单位都是ms + get_svc(target_yinse_wav, song_name, embed_model, hubert_model, paras) + + +if __name__=='__main__': + + #target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" # 需要完整路径 + target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a" + song_name = "lost_stars" ##歌曲名字 + paras = {'gender': None, 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None} + # paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0} ###片段svc测试 + meisheng_func(target_yinse_wav, song_name, paras) - #''' - #target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a" - song_name = "lost_stars" - #paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None} - paras = {'gender': gender, 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None } - get_svc(target_yinse_wav, song_name, paras) - #'''