Page MenuHomePhabricator

No OneTemporary

diff --git a/AIMeiSheng/docker_demo/svc_online.py b/AIMeiSheng/docker_demo/svc_online.py
index f952346..b196832 100644
--- a/AIMeiSheng/docker_demo/svc_online.py
+++ b/AIMeiSheng/docker_demo/svc_online.py
@@ -1,162 +1,170 @@
# -*- coding: UTF-8 -*-
"""
SVC的核心处理逻辑
"""
import os
import shutil
import hashlib
import time
-from AIMeiSheng.meisheng_svc_final import get_svc, process_svc
+from AIMeiSheng.meisheng_svc_final import load_model, process_svc_online
+from AIMeiSheng.meisheng_env_preparex import meisheng_env_prepare
from AIMeiSheng.voice_classification.online.voice_class_online_fang import VoiceClass
from AIMeiSheng.RawNet3.infererence_fang_meisheng import get_embed, get_embed_model
from AIMeiSheng.myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main, load_hubert, get_vc, get_rmvpe
from AIMeiSheng.docker_demo.common import *
gs_resource_cache_dir = "/tmp/gs_svc_resource_cache"
gs_tmp_dir = "/tmp/gs_svc_tmp"
gs_model_dir = "/tmp/models"
if os.path.exists(gs_tmp_dir):
shutil.rmtree(gs_tmp_dir)
os.makedirs(gs_model_dir, exist_ok=True)
# 预设参数
gs_gender_models_url = "https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/hub/voice_classification/models.zip"
gs_svc_emb_url = ""
gs_svc_model_url = ""
gs_volume_bin_url = "https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/dataset/AIMeiSheng/ebur128_tool"
class GSWorkerAttr:
def __init__(self, input_data):
vocal_url = input_data["vocal_url"]
female_svc_source_url = input_data["female_svc_url"]
male_svc_source_url = input_data["male_svc_url"]
st_tm = input_data["st_tm"] # 单位是s
ed_tm = input_data["ed_tm"] # 单位是s
- self.distinct_id = hashlib.md5(vocal_url.encode()).hexdigest()
+ self.distinct_id = hashlib.md5(vocal_url.encode()).hexdigest()#对url进行哈希(通过哈希值建立索引,提高数据库的响应速度)
self.vocal_url = vocal_url
self.target_url = input_data["target_url"]
ext = vocal_url.split(".")[-1]
self.vocal_path = os.path.join(gs_tmp_dir, self.distinct_id + f"_in.{ext}")
self.target_wav_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out.wav")
self.target_wav_ad_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out_ad.wav")
self.target_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out.m4a")
self.female_svc_source_url = female_svc_source_url
self.male_svc_source_url = male_svc_source_url
ext = female_svc_source_url.split(".")[-1]
self.female_svc_source_path = hashlib.md5(female_svc_source_url.encode()).hexdigest() + "." + ext
ext = male_svc_source_url.split(".")[-1]
self.male_svc_source_path = hashlib.md5(male_svc_source_url.encode()).hexdigest() + "." + ext
self.st_tm = st_tm
self.ed_tm = ed_tm
self.target_loudness = input_data["target_loudness"]
self.tmp_dir = os.path.join(gs_tmp_dir, self.distinct_id)
if os.path.exists(self.tmp_dir):
shutil.rmtree(self.tmp_dir)
os.makedirs(self.tmp_dir)
def __del__(self):
if os.path.exists(self.tmp_dir):
shutil.rmtree(self.tmp_dir)
def init_gender_model():
"""
下载模型
:return:
"""
dst_model_dir = os.path.join(gs_model_dir, "voice_classification")
if not os.path.exists(dst_model_dir):
dst_zip_path = os.path.join(gs_model_dir, "models.zip")
if not download2disk(gs_gender_models_url, dst_zip_path):
logging.fatal(f"download gender_model err={gs_gender_models_url}")
cmd = f"cd {gs_model_dir}; unzip {dst_zip_path}; mv models voice_classification; rm -f {dst_zip_path}"
os.system(cmd)
if not os.path.exists(dst_model_dir):
logging.fatal(f"unzip {dst_zip_path} err")
music_voice_pure_model = os.path.join(dst_model_dir, "voice_005_rec_v5.pth")
music_voice_no_pure_model = os.path.join(dst_model_dir, "voice_10_v5.pth")
gender_pure_model = os.path.join(dst_model_dir, "gender_8k_ratev5_v6_adam.pth")
gender_no_pure_model = os.path.join(dst_model_dir, "gender_8k_v6_adam.pth")
vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model)
return vc
-def init_svc_model():
- emb_model_path = os.path.join(gs_model_dir, "RawNet3_weights.pt")
- if not os.path.exists(emb_model_path):
- if not download2disk(gs_svc_emb_url, emb_model_path):
- logging.fatal(f"download svc_emb_model err={gs_svc_emb_url}")
- embed_model = get_embed_model(emb_model_path)
- hubert_model = load_hubert()
+# def init_svc_model():
+# emb_model_path = os.path.join(gs_model_dir, "RawNet3_weights.pt")
+# if not os.path.exists(emb_model_path):
+# if not download2disk(gs_svc_emb_url, emb_model_path):
+# logging.fatal(f"download svc_emb_model err={gs_svc_emb_url}")
+# embed_model = get_embed_model(emb_model_path)
+# hubert_model = load_hubert()
+#
+# svc_filename = gs_svc_model_url.split("/")[-1]
+# svc_model_path = os.path.join(gs_model_dir, svc_filename)
+# if not os.path.exists(svc_model_path):
+# if not download2disk(gs_svc_model_url, svc_model_path):
+# logging.fatal(f"download svc_model err={gs_svc_model_url}")
+#
+# # 此处内部会生成全局模型
+# get_vc(svc_model_path)
+# return embed_model, hubert_model
- svc_filename = gs_svc_model_url.split("/")[-1]
- svc_model_path = os.path.join(gs_model_dir, svc_filename)
- if not os.path.exists(svc_model_path):
- if not download2disk(gs_svc_model_url, svc_model_path):
- logging.fatal(f"download svc_model err={gs_svc_model_url}")
- # 此处内部会生成全局模型
- get_vc(svc_model_path)
- return embed_model, hubert_model
+def init_svc_model():
+ meisheng_env_prepare(logging)
+ embed_model, hubert_model = load_model()
+ return embed_model, hubert_model
def volume_adjustment(wav_path, target_loudness, out_path):
"""
音量调整
:param wav_path:
:param target_loudness:
:param out_path:
:return:
"""
volume_bin_path = os.path.join(gs_model_dir, "ebur128_tool")
if not os.path.exists(volume_bin_path):
if not download2disk(gs_volume_bin_url, volume_bin_path):
logging.fatal(f"download volume_bin err={gs_volume_bin_url}")
cmd = f"{volume_bin_path} {wav_path} {target_loudness} {out_path}"
os.system(cmd)
class SVCOnline:
def __init__(self):
st = time.time()
self.gender_model = init_gender_model()
self.embed_model, self.hubert_model = init_svc_model()
logging.info(f"svc init finished, sp = {time.time() - st}")
def gender_process(self, worker_attr):
st = time.time()
gender, female_rate, is_pure = self.gender_model.process(worker_attr.vocal_path)
logging.info(
f"{worker_attr.vocal_url}, gender={gender}, female_rate={female_rate}, is_pure={is_pure}, "
f"gender_process sp = {time.time() - st}")
if gender == 0:
gender = 'female'
elif gender == 1:
gender = 'male'
elif female_rate > 0.5:
gender = 'female'
else:
gender = 'male'
logging.info(f"{worker_attr.vocal_url}, modified gender={gender}")
return gender
def process(self, worker_attr):
gender = self.gender_process(worker_attr)
song_path = worker_attr.female_svc_source_path
if gender == "male":
song_path = worker_attr.male_svc_source_path
params = {'gender': gender, 'tst': worker_attr.st_ms, "tnd": worker_attr.ed_tm, 'delay': 0, 'song_path': None}
st = time.time()
- similar = process_svc(song_path, worker_attr.vocal_path, worker_attr.target_wav_path, params)
+ similar = process_svc_online(song_path, worker_attr.vocal_path, worker_attr.target_wav_path, self.embed_model,
+ self.hubert_model, params)
logging.info(f"{worker_attr.vocal_url}, similar={similar} process svc sp = {time.time() - st}")
diff --git a/AIMeiSheng/meisheng_env_preparex.py b/AIMeiSheng/meisheng_env_preparex.py
new file mode 100644
index 0000000..bf6aa2e
--- /dev/null
+++ b/AIMeiSheng/meisheng_env_preparex.py
@@ -0,0 +1,38 @@
+import os
+from AIMeiSheng.docker_demo.common import *
+
+def meisheng_env_prepare(logging,AIMeiSheng_Path='./'):
+
+ cos_path = "https://av-audit-sync-sg-1256122840.cos.ap-singapore.myqcloud.com/dataset/AIMeiSheng/"
+
+
+ rmvpe_model_path = os.path.join(AIMeiSheng_Path, 'rmvpe.pt')
+ gs_rmvpe_model_url = cos_path + "rmvpe.pt"
+ if not os.path.exists(rmvpe_model_path):
+ if not download2disk(gs_rmvpe_model_url, rmvpe_model_path):
+ logging.fatal(f"download rmvpe_model err={gs_rmvpe_model_url}")
+
+ hubert_model_path = os.path.join(AIMeiSheng_Path, 'hubert_base.pt')
+ gs_hubert_model_url = cos_path + "hubert_base.pt"
+ if not os.path.exists(hubert_model_path):
+ if not download2disk(gs_hubert_model_url, hubert_model_path):
+ logging.fatal(f"download hubert_model err={gs_hubert_model_url}")
+
+ model_svc = "xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth"
+ svc_model_path = os.path.join(AIMeiSheng_Path, f'weights/{model_svc}')
+ gs_svc_model_url = cos_path + model_svc
+ if not os.path.exists(svc_model_path):
+ if not download2disk(gs_svc_model_url, svc_model_path):
+ logging.fatal(f"download svc_model err={gs_svc_model_url}")
+
+
+ model_embed = "model.pt"
+ embed_model_path = os.path.join(AIMeiSheng_Path, f'RawNet3/models/weights/{model_embed}')
+ gs_embed_model_url = cos_path + model_embed
+ if not os.path.exists(embed_model_path):
+ if not download2disk(gs_embed_model_url, embed_model_path):
+ logging.fatal(f"download embed_model err={gs_embed_model_url}")
+
+
+if __name__ == "__main__":
+ meisheng_env_prepare()
diff --git a/AIMeiSheng/meisheng_svc_final.py b/AIMeiSheng/meisheng_svc_final.py
index e5a6b3f..1ecaaf7 100644
--- a/AIMeiSheng/meisheng_svc_final.py
+++ b/AIMeiSheng/meisheng_svc_final.py
@@ -1,215 +1,227 @@
import os,sys
import time
import shutil
import glob
import hashlib
import librosa
import soundfile
import gradio as gr
import pandas as pd
import numpy as np
sys.path.append('./RawNet3/')
from infererence_fang_meisheng import get_embed, get_embed_model
from myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main,load_hubert, get_vc, get_rmvpe
from gender_classify import load_gender_model
+
gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" ##混音执行文件
tmp_workspace_name = "batch_test_ocean_fi"#工作空间名
song_folder = "./data_meisheng/" ##song folder
gs_work_dir = f"./data_meisheng/{tmp_workspace_name}" #工作空间路径
pth_model_path = "./weights/xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth" ##模型文件
cur_dir = os.path.abspath(os.path.dirname(__file__))
abs_path = os.path.join(cur_dir,song_folder,tmp_workspace_name) + '/'
f0_method = None
def mix(in_path, acc_path, dst_path):
# svc转码到442
svc_442_file = in_path + "_442.wav"
st = time.time()
cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file)
os.system(cmd)
if not os.path.exists(svc_442_file):
return -1
print("transcode,{},sp={}".format(in_path, time.time() - st))
# 混合
st = time.time()
cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path)
os.system(cmd)
print("mixer,{},sp={}".format(in_path, time.time() - st))
def load_model():
global f0_method
embed_model = get_embed_model()
hubert_model = load_hubert()
get_vc(pth_model_path)
f0_method = get_rmvpe()
print("model preload finish!!!")
return embed_model, hubert_model#,svc_model
-embed_model, hubert_model = load_model() ##提前加载模型
-gender_model = load_gender_model()
+def meisheng_init():
+ embed_model, hubert_model = load_model() ##提前加载模型
+ gender_model = load_gender_model()
+ return embed_model, hubert_model, gender_model
def pyin_process_single_rmvpe(input_file):
global f0_method
if f0_method is None:
f0_method = get_rmvpe()
rate = 16000 #44100
# 读取音频文件
y, sr = librosa.load(input_file, sr=rate)
len_s = len(y)/sr
lim_s = 15 #10
if(len_s > lim_s):
y1 = y[:sr*lim_s]
y2 = y[-sr*lim_s:]
f0 = f0_method.infer_from_audio(y1, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch1 = np.mean(valid_f0)
f0 = f0_method.infer_from_audio(y2, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch2 = np.mean(valid_f0)
if abs(mean_pitch1 - mean_pitch2) > 55:
mean_pitch_cur = min(mean_pitch1, mean_pitch2)
else:
mean_pitch_cur = (mean_pitch1 + mean_pitch2) / 2
else:
f0 = f0_method.infer_from_audio(y, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch_cur = np.mean(valid_f0)
return mean_pitch_cur
-def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, paras):
+def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras):
##计算pitch
f0up_key = pyin_process_single_rmvpe(target_wav)
## get embed
- get_embed(target_wav, embed_npy, embed_model)
-
- print("svc main start...")
- svc_main(song_wav,svc_out_path,pth_model_path,embed_npy,f0up_key,hubert_model,paras)
+ get_embed(target_wav, embed_npy, embed_md)
+
+ print("svc main start...")
+ svc_main(song_wav, svc_out_path, pth_model_path, embed_npy, f0up_key, hubert_md, paras)
print("svc main finished!!")
return 0
-def process_svc(song_wav, target_wav, svc_out_path,paras):
+
+def process_svc_online(song_wav, target_wav, svc_out_path, embed_md, hubert_md, paras):
+
+ embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置
+ similar = meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras)
+
+ return similar
+
+def process_svc(song_wav, target_wav, svc_out_path, embed_md, hubert_md, paras):
song_wav1, target_wav, svc_out_path = os.path.basename(song_wav), os.path.basename(
target_wav), os.path.basename(svc_out_path) #绝对路径
song_wav, target_wav, svc_out_path = song_wav, abs_path + target_wav, abs_path + svc_out_path
embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置
- similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras)
-
+ # similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras)
+ similar = meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras)
return similar
-def get_svc(target_yinse_wav, song_name, paras):
+def get_svc(target_yinse_wav, song_name, embed_model, hubert_model, paras):
'''
:param target_yinse_wav: 目标音色
:param song_name: 歌曲名字
;param paras: 其他参数
:return: svc路径名
'''
##清空工作空间临时路径
if os.path.exists(gs_work_dir):
#shutil.rmtree(gs_work_dir)
cmd = f"rm -rf {gs_work_dir}/*"
os.system(cmd)
else:
os.makedirs(gs_work_dir)
gender = paras['gender']##为了确定歌曲
##目标音色读取
f_dst = os.path.join(gs_work_dir, os.path.basename(target_yinse_wav))
#print("dir :", f_dst,"target_yinse_wav:",target_yinse_wav)
#shutil.move(target_yinse_wav, f_dst) ##放在工作目录
shutil.copy(target_yinse_wav, f_dst)
target_yinse_wav = f_dst
##歌曲/伴奏 读取(路径需要修改)
song_wav = os.path.join("{}{}/{}/vocal321.wav".format(song_folder, gender, song_name)) # 歌曲vocal
inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder, gender, song_name))
#song_wav = './xusong_long.wav'
svc_out_path = os.path.join(gs_work_dir, "svc.wav") ###svc结果名字
print("inputMsg:", song_wav, target_yinse_wav, svc_out_path)
## svc process
st = time.time()
print("start inference...")
- similar = process_svc(song_wav, target_yinse_wav, svc_out_path,paras)
+ similar = process_svc(song_wav, target_yinse_wav, svc_out_path, embed_model, hubert_model,paras)
print("svc finished!!")
print("time cost = {}".format(time.time() - st))
print("out path name {} ".format(svc_out_path))
#'''
##加混响
print("add reverbration...")
svc_out_path_effect = svc_out_path[:-4] + '_effect.wav'
cmd = f"/data/gpu_env_common/bin/effect_tool {svc_out_path} {svc_out_path_effect}"
print("cmd :", cmd)
os.system(cmd)
# # 人声伴奏合并
print("add acc...")
out_path = svc_out_path_effect[:-4] + '_music.wav'
mix(svc_out_path_effect, inf_acc_path, out_path)
print("time cost = {}".format(time.time() - st))
print("out path name {} ".format(out_path))
#'''
return svc_out_path
-if __name__=='__main__':
+def meisheng_func(target_yinse_wav,song_name, paras):
+
+ ##init
+ embed_model, hubert_model, gender_model = meisheng_init()
###gender predict
- target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a"
gender, female_rate, is_pure = gender_model.process(target_yinse_wav)
print('=====================')
- print("gender:{}, female_rate:{},is_pure:{}".format(gender,female_rate,is_pure))
+ print("gender:{}, female_rate:{},is_pure:{}".format(gender, female_rate, is_pure))
if gender == 0:
gender = 'female'
elif gender == 1:
gender = 'male'
elif female_rate > 0.5:
gender = 'female'
else:
gender = 'male'
print("modified gender:{} ".format(gender))
print('=====================')
- ###接口函数
- '''
- target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" #需要完整路径
- song_name = "drivers_license" #"Levitating" ##路径会自动添加(要更改)
- paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None} ##单位都是ms
- #paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0} ###片段svc测试
- #'''
+ ##美声main
+ paras['gender'] = gender ##单位都是ms
+ get_svc(target_yinse_wav, song_name, embed_model, hubert_model, paras)
+
+
+if __name__=='__main__':
+
+ #target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" # 需要完整路径
+ target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a"
+ song_name = "lost_stars" ##歌曲名字
+ paras = {'gender': None, 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None}
+ # paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0} ###片段svc测试
+ meisheng_func(target_yinse_wav, song_name, paras)
- #'''
- #target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a"
- song_name = "lost_stars"
- #paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None}
- paras = {'gender': gender, 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None }
- get_svc(target_yinse_wav, song_name, paras)
- #'''

File Metadata

Mime Type
text/x-diff
Expires
Sun, Jan 12, 08:33 (1 d, 15 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1347138
Default Alt Text
(19 KB)

Event Timeline