meisheng_svc_final_test.py
No OneTemporary
Actions

Size

7 KB

Subscribers

None

meisheng_svc_final_test.py
View Options



	import os,sys
	import time
	import shutil
	import glob
	import hashlib
	import librosa
	import soundfile
	import gradio as gr
	import pandas as pd
	import numpy as np
	sys.path.append('./RawNet3/')
	from cal_cos_distance_folder import load_and_cal_distance #del
	from infererence_fang_meisheng import get_embed, get_embed_model

	times_st = 0
	times_sum = 0
	gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" ##混音执行文件
	tmp_workspace_name = "batch_test_ocean_fi"#工作空间名
	song_folder = "./data_meisheng/" ##song folder
	gs_work_dir = f"./data_meisheng/{tmp_workspace_name}" #工作空间路径
	#abs_path = f"/data/bingxiao.fang/voice_conversion/SVC_MEISHENG/svc_vits-diff/Retrieval-based-Voice-Conversion-WebUI/data_meisheng/{tmp_workspace_name}/"
	cur_dir = os.path.abspath(os.path.dirname(__file__))
	#par_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
	abs_path = os.path.join(cur_dir,song_folder,tmp_workspace_name) + '/'

	pth_model_path = "./weights/xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth"

	from myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main,load_hubert, get_vc, get_rmvpe


	def mix(in_path, acc_path, dst_path):
	# svc转码到442
	svc_442_file = in_path + "_442.wav"
	st = time.time()
	cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file)
	os.system(cmd)
	if not os.path.exists(svc_442_file):
	return -1
	print("transcode,{},sp={}".format(in_path, time.time() - st))

	# 混合
	st = time.time()
	cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path)
	os.system(cmd)
	print("mixer,{},sp={}".format(in_path, time.time() - st))


	def load_model():
	global f0_method
	print("load embed_model...")
	embed_model = get_embed_model()
	print("load hubert_model...")
	hubert_model = load_hubert()
	print("load vc_model...")
	get_vc(pth_model_path)
	print("load rmvpe...")
	f0_method = get_rmvpe()
	print("load finish")
	return embed_model, hubert_model#,svc_model

	embed_model, hubert_model = load_model() ##提前加载模型

	def pyin_process_single_rmvpe(input_file):
	global f0_method
	rate = 16000 #44100
	# 读取音频文件
	y, sr = librosa.load(input_file, sr=rate)
	''' #方法1
	f0 = f0_method.infer_from_audio(y, thred=0.03)

	valid_f0 = f0[f0 > 50]
	mean_pitch_cur = np.mean(valid_f0[:min(len(valid_f0),500)])
	#'''

	len_s = len(y)/sr
	lim_s = 15 #10
	if(len_s > lim_s):
	y1 = y[:sr*lim_s]
	y2 = y[-sr*lim_s:]
	f0 = f0_method.infer_from_audio(y1, thred=0.03)
	f0 = f0[f0 < 600]
	valid_f0 = f0[f0 > 50]
	mean_pitch1 = np.mean(valid_f0)
	f0 = f0_method.infer_from_audio(y2, thred=0.03)
	f0 = f0[f0 < 600]
	valid_f0 = f0[f0 > 50]
	mean_pitch2 = np.mean(valid_f0)

	if abs(mean_pitch1 - mean_pitch2) > 55:
	mean_pitch_cur = min(mean_pitch1, mean_pitch2)
	else:
	mean_pitch_cur = (mean_pitch1 + mean_pitch2) / 2

	print("mean_pitch1:",mean_pitch1,"mean_pitch2:",mean_pitch2)
	else:
	f0 = f0_method.infer_from_audio(y, thred=0.03)
	f0 = f0[f0 < 600]
	valid_f0 = f0[f0 > 50]
	mean_pitch_cur = np.mean(valid_f0)

	print("final mean_pitch:",mean_pitch_cur)

	return mean_pitch_cur

	def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, paras):

	##计算pitch
	f0up_key = pyin_process_single_rmvpe(target_wav)
	print("@@f0up_key:",f0up_key)
	## get embed
	get_embed(target_wav, embed_npy, embed_model)
	print("@@get embed")

	global times_st,times_sum
	times_sum += (time.time() - times_st)
	print("@@@@song_wav:",song_wav)
	print("@@@svc_out_path:",svc_out_path)
	print("@@@@embed_npy:",embed_npy)
	svc_main(song_wav,svc_out_path,pth_model_path,embed_npy,f0up_key,hubert_model,paras)
	print("svc main finished!!")

	##计算相似度
	'''
	svc_embed_npy = embed_npy[:-4] + '_svc.npy'
	get_embed(svc_out_path, svc_embed_npy, embed_model)
	similar = load_and_cal_distance(embed_npy,svc_embed_npy)
	print("target_npy:",embed_npy,"svc_npy:",svc_embed_npy)
	print("######similar:",similar.numpy())

	return similar.numpy()[0]
	#'''
	return 0
	def process_svc(song_wav, target_wav, svc_out_path,paras):

	song_wav1, target_wav, svc_out_path = os.path.basename(song_wav), os.path.basename(
	target_wav), os.path.basename(svc_out_path)
	song_wav, target_wav, svc_out_path = song_wav, abs_path + target_wav, abs_path + svc_out_path
	embed_npy = target_wav[:-4] + '.npy' ##npy存储位置

	similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras)


	return similar


	def get_svc(target_yinse_wav, song_name, paras):
	'''
	:param target_yinse_wav: 目标音色
	:param gender: 性别选择
	:param song_name: 歌曲名字
	:return: svc路径名
	'''

	##清空工作空间临时路径
	if os.path.exists(gs_work_dir):
	#shutil.rmtree(gs_work_dir)
	cmd = f"rm -rf {gs_work_dir}/*"
	os.system(cmd)
	else:
	os.makedirs(gs_work_dir)

	gender = paras['gender']##为了确定歌曲

	##目标音色读取
	f_dst = os.path.join(gs_work_dir, os.path.basename(target_yinse_wav))
	print("dir :", f_dst,"target_yinse_wav:",target_yinse_wav)
	#shutil.move(target_yinse_wav, f_dst) ##放在工作目录
	shutil.copy(target_yinse_wav, f_dst)
	target_yinse_wav = f_dst

	##歌曲/伴奏读取（路径需要修改）
	song_wav = os.path.join("{}{}/{}/vocal321.wav".format(song_folder, gender, song_name)) # 歌曲vocal
	# inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder, gender, song_name)) # 伴奏
	song_wav = './xusong_long.wav'
	svc_out_path = os.path.join(gs_work_dir, "svc.wav") ###svc结果名字
	print("svc out: {}".format(svc_out_path))

	##process
	st = time.time()

	##保存对应 vocal 到workspace
	cmd = f"cp {song_wav} {gs_work_dir}"
	os.system(cmd)
	global times_st,times_sum

	print("start inference...")
	print("inputMsg:", song_wav, target_yinse_wav, svc_out_path)
	test_num = 20
	for idx in range(test_num):
	times_st = time.time()
	similar = process_svc(song_wav, target_yinse_wav, svc_out_path,paras)
	print("svc finished!!")
	print("time cost = {}".format(time.time() - st))
	print("mean time cost = {}".format((time.time() - st)/test_num))
	print("process before svc = {}, mean time cost {}".format(times_sum, times_sum/test_num) )

	return similar



	if __name__=='__main__':

	###接口函数
	target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" #需要完整路径
	song_name = "drivers_license" #"Levitating" ##路径会自动添加
	# gender = 'female'
	#song_path是预留的预处理文件存放位置
	paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None} ##单位都是ms
	#paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0}
	similar = get_svc(target_yinse_wav, song_name, paras)

File Metadata

Mime Type: text/x-python
Expires: Sun, Jan 12, 01:58 (1 d, 1 h)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1347132
Default Alt Text: meisheng_svc_final_test.py (7 KB)

meisheng_svc_final_test.pyNo OneTemporaryActions

meisheng_svc_final_test.pyView Options

File Metadata

Event Timeline

meisheng_svc_final_test.py
No OneTemporary
Actions

meisheng_svc_final_test.py
View Options