Page MenuHomePhabricator

meisheng_svc_final_test.py
No OneTemporary

meisheng_svc_final_test.py

import os,sys
import time
import shutil
import glob
import hashlib
import librosa
import soundfile
import gradio as gr
import pandas as pd
import numpy as np
sys.path.append('./RawNet3/')
from cal_cos_distance_folder import load_and_cal_distance #del
from infererence_fang_meisheng import get_embed, get_embed_model
times_st = 0
times_sum = 0
gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" ##混音执行文件
tmp_workspace_name = "batch_test_ocean_fi"#工作空间名
song_folder = "./data_meisheng/" ##song folder
gs_work_dir = f"./data_meisheng/{tmp_workspace_name}" #工作空间路径
#abs_path = f"/data/bingxiao.fang/voice_conversion/SVC_MEISHENG/svc_vits-diff/Retrieval-based-Voice-Conversion-WebUI/data_meisheng/{tmp_workspace_name}/"
cur_dir = os.path.abspath(os.path.dirname(__file__))
#par_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
abs_path = os.path.join(cur_dir,song_folder,tmp_workspace_name) + '/'
pth_model_path = "./weights/xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth"
from myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main,load_hubert, get_vc, get_rmvpe
def mix(in_path, acc_path, dst_path):
# svc转码到442
svc_442_file = in_path + "_442.wav"
st = time.time()
cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file)
os.system(cmd)
if not os.path.exists(svc_442_file):
return -1
print("transcode,{},sp={}".format(in_path, time.time() - st))
# 混合
st = time.time()
cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path)
os.system(cmd)
print("mixer,{},sp={}".format(in_path, time.time() - st))
def load_model():
global f0_method
print("load embed_model...")
embed_model = get_embed_model()
print("load hubert_model...")
hubert_model = load_hubert()
print("load vc_model...")
get_vc(pth_model_path)
print("load rmvpe...")
f0_method = get_rmvpe()
print("load finish")
return embed_model, hubert_model#,svc_model
embed_model, hubert_model = load_model() ##提前加载模型
def pyin_process_single_rmvpe(input_file):
global f0_method
rate = 16000 #44100
# 读取音频文件
y, sr = librosa.load(input_file, sr=rate)
''' #方法1
f0 = f0_method.infer_from_audio(y, thred=0.03)
valid_f0 = f0[f0 > 50]
mean_pitch_cur = np.mean(valid_f0[:min(len(valid_f0),500)])
#'''
len_s = len(y)/sr
lim_s = 15 #10
if(len_s > lim_s):
y1 = y[:sr*lim_s]
y2 = y[-sr*lim_s:]
f0 = f0_method.infer_from_audio(y1, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch1 = np.mean(valid_f0)
f0 = f0_method.infer_from_audio(y2, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch2 = np.mean(valid_f0)
if abs(mean_pitch1 - mean_pitch2) > 55:
mean_pitch_cur = min(mean_pitch1, mean_pitch2)
else:
mean_pitch_cur = (mean_pitch1 + mean_pitch2) / 2
print("mean_pitch1:",mean_pitch1,"mean_pitch2:",mean_pitch2)
else:
f0 = f0_method.infer_from_audio(y, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch_cur = np.mean(valid_f0)
print("final mean_pitch:",mean_pitch_cur)
return mean_pitch_cur
def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, paras):
##计算pitch
f0up_key = pyin_process_single_rmvpe(target_wav)
print("@@f0up_key:",f0up_key)
## get embed
get_embed(target_wav, embed_npy, embed_model)
print("@@get embed")
global times_st,times_sum
times_sum += (time.time() - times_st)
print("@@@@song_wav:",song_wav)
print("@@@svc_out_path:",svc_out_path)
print("@@@@embed_npy:",embed_npy)
svc_main(song_wav,svc_out_path,pth_model_path,embed_npy,f0up_key,hubert_model,paras)
print("svc main finished!!")
##计算相似度
'''
svc_embed_npy = embed_npy[:-4] + '_svc.npy'
get_embed(svc_out_path, svc_embed_npy, embed_model)
similar = load_and_cal_distance(embed_npy,svc_embed_npy)
print("target_npy:",embed_npy,"svc_npy:",svc_embed_npy)
print("######similar:",similar.numpy())
return similar.numpy()[0]
#'''
return 0
def process_svc(song_wav, target_wav, svc_out_path,paras):
song_wav1, target_wav, svc_out_path = os.path.basename(song_wav), os.path.basename(
target_wav), os.path.basename(svc_out_path)
song_wav, target_wav, svc_out_path = song_wav, abs_path + target_wav, abs_path + svc_out_path
embed_npy = target_wav[:-4] + '.npy' ##npy存储位置
similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras)
return similar
def get_svc(target_yinse_wav, song_name, paras):
'''
:param target_yinse_wav: 目标音色
:param gender: 性别选择
:param song_name: 歌曲名字
:return: svc路径名
'''
##清空工作空间临时路径
if os.path.exists(gs_work_dir):
#shutil.rmtree(gs_work_dir)
cmd = f"rm -rf {gs_work_dir}/*"
os.system(cmd)
else:
os.makedirs(gs_work_dir)
gender = paras['gender']##为了确定歌曲
##目标音色读取
f_dst = os.path.join(gs_work_dir, os.path.basename(target_yinse_wav))
print("dir :", f_dst,"target_yinse_wav:",target_yinse_wav)
#shutil.move(target_yinse_wav, f_dst) ##放在工作目录
shutil.copy(target_yinse_wav, f_dst)
target_yinse_wav = f_dst
##歌曲/伴奏 读取(路径需要修改)
song_wav = os.path.join("{}{}/{}/vocal321.wav".format(song_folder, gender, song_name)) # 歌曲vocal
# inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder, gender, song_name)) # 伴奏
song_wav = './xusong_long.wav'
svc_out_path = os.path.join(gs_work_dir, "svc.wav") ###svc结果名字
print("svc out: {}".format(svc_out_path))
##process
st = time.time()
##保存对应 vocal 到workspace
cmd = f"cp {song_wav} {gs_work_dir}"
os.system(cmd)
global times_st,times_sum
print("start inference...")
print("inputMsg:", song_wav, target_yinse_wav, svc_out_path)
test_num = 20
for idx in range(test_num):
times_st = time.time()
similar = process_svc(song_wav, target_yinse_wav, svc_out_path,paras)
print("svc finished!!")
print("time cost = {}".format(time.time() - st))
print("mean time cost = {}".format((time.time() - st)/test_num))
print("process before svc = {}, mean time cost {}".format(times_sum, times_sum/test_num) )
return similar
if __name__=='__main__':
###接口函数
target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" #需要完整路径
song_name = "drivers_license" #"Levitating" ##路径会自动添加
# gender = 'female'
#song_path是预留的预处理文件存放位置
paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None} ##单位都是ms
#paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0}
similar = get_svc(target_yinse_wav, song_name, paras)

File Metadata

Mime Type
text/x-python
Expires
Sun, Jan 12, 01:58 (1 d, 1 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1347132
Default Alt Text
meisheng_svc_final_test.py (7 KB)

Event Timeline