No OneTemporary
Actions

Size

10 KB

Subscribers

None

View Options

	diff --git a/AIMeiSheng/meisheng_svc_final.py b/AIMeiSheng/meisheng_svc_final.py
	index 9a5d94f..89c57f5 100644
	--- a/AIMeiSheng/meisheng_svc_final.py
	+++ b/AIMeiSheng/meisheng_svc_final.py
	@@ -1,227 +1,227 @@
	import os
	import sys

	sys.path.append(os.path.dirname(__file__))

	import time
	import shutil
	import glob
	import hashlib
	import librosa
	import soundfile
	import gradio as gr
	import pandas as pd
	import numpy as np
	from AIMeiSheng.RawNet3.infererence_fang_meisheng import get_embed, get_embed_model
	from myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main, load_hubert, get_vc, get_rmvpe
	from gender_classify import load_gender_model
	from AIMeiSheng.docker_demo.common import gs_svc_model_path, gs_embed_model_path, gs_rmvpe_model_path, gs_err_code_target_silence
	from slicex.slice_set_silence import del_noise

	gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" ##混音执行文件
	tmp_workspace_name = "batch_test_ocean_fi" # 工作空间名
	song_folder = "./data_meisheng/" ##song folder
	gs_work_dir = f"./data_meisheng/{tmp_workspace_name}" # 工作空间路径
	pth_model_path = "./weights/xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth" ##模型文件

	cur_dir = os.path.abspath(os.path.dirname(__file__))
	abs_path = os.path.join(cur_dir, song_folder, tmp_workspace_name) + '/'

	f0_method = None


	def mix(in_path, acc_path, dst_path):
	# svc转码到442
	svc_442_file = in_path + "_442.wav"
	st = time.time()
	cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file)
	os.system(cmd)
	if not os.path.exists(svc_442_file):
	return -1
	print("transcode,{},sp={}".format(in_path, time.time() - st))

	# 混合
	st = time.time()
	cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path)
	os.system(cmd)
	print("mixer,{},sp={}".format(in_path, time.time() - st))


	def load_model():
	global f0_method
	embed_model = get_embed_model(gs_embed_model_path)
	hubert_model = load_hubert()
	get_vc(gs_svc_model_path)
	f0_method = get_rmvpe(gs_rmvpe_model_path)
	print("model preload finish!!!")
	return embed_model, hubert_model # ,svc_model


	def meisheng_init():
	embed_model, hubert_model = load_model() ##提前加载模型
	gender_model = load_gender_model()
	return embed_model, hubert_model, gender_model


	def pyin_process_single_rmvpe(input_file):
	global f0_method
	if f0_method is None:
	f0_method = get_rmvpe()

	rate = 16000 # 44100
	# 读取音频文件
	y, sr = librosa.load(input_file, sr=rate)
	len_s = len(y) / sr
	lim_s = 15 # 10
	if (len_s > lim_s):
	y1 = y[:sr * lim_s]
	y2 = y[-sr * lim_s:]
	f0 = f0_method.infer_from_audio(y1, thred=0.03)
	f0 = f0[f0 < 600]
	valid_f0 = f0[f0 > 50]
	mean_pitch1 = np.mean(valid_f0)
	f0 = f0_method.infer_from_audio(y2, thred=0.03)
	f0 = f0[f0 < 600]
	valid_f0 = f0[f0 > 50]
	mean_pitch2 = np.mean(valid_f0)

	if abs(mean_pitch1 - mean_pitch2) > 55:
	mean_pitch_cur = min(mean_pitch1, mean_pitch2)
	else:
	mean_pitch_cur = (mean_pitch1 + mean_pitch2) / 2

	else:
	f0 = f0_method.infer_from_audio(y, thred=0.03)
	f0 = f0[f0 < 600]
	valid_f0 = f0[f0 > 50]
	mean_pitch_cur = np.mean(valid_f0)

	return mean_pitch_cur


	def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras):
	##计算pitch
	f0up_key = pyin_process_single_rmvpe(target_wav)
	if f0up_key < 40 or np.isnan(f0up_key):#unvoice
	return gs_err_code_target_silence
	## get embed, 音色
	get_embed(target_wav, embed_npy, embed_md)

	print("svc main start...")
	svc_main(song_wav, svc_out_path, embed_npy, f0up_key, hubert_md, paras)
	print("svc main finished!!")
	- del_noise(song_wav,svc_out_path)
	+ del_noise(song_wav,svc_out_path,paras)
	print("del noise in silence")

	return 0


	def process_svc_online(song_wav, target_wav, svc_out_path, embed_md, hubert_md, paras):
	embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置
	err_code = meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras)

	return err_code


	def process_svc(song_wav, target_wav, svc_out_path, embed_md, hubert_md, paras):
	song_wav1, target_wav, svc_out_path = os.path.basename(song_wav), os.path.basename(
	target_wav), os.path.basename(svc_out_path) # 绝对路径
	song_wav, target_wav, svc_out_path = song_wav, abs_path + target_wav, abs_path + svc_out_path
	embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置

	# similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras)
	similar = meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras)

	return similar


	def get_svc(target_yinse_wav, song_name, embed_model, hubert_model, paras):
	'''
	:param target_yinse_wav: 目标音色
	:param song_name: 歌曲名字
	;param paras: 其他参数
	:return: svc路径名
	'''

	##清空工作空间临时路径
	if os.path.exists(gs_work_dir):
	# shutil.rmtree(gs_work_dir)
	cmd = f"rm -rf {gs_work_dir}/*"
	os.system(cmd)
	else:
	os.makedirs(gs_work_dir)

	gender = paras['gender'] ##为了确定歌曲

	##目标音色读取
	f_dst = os.path.join(gs_work_dir, os.path.basename(target_yinse_wav))
	# print("dir :", f_dst,"target_yinse_wav:",target_yinse_wav)
	# shutil.move(target_yinse_wav, f_dst) ##放在工作目录
	shutil.copy(target_yinse_wav, f_dst)
	target_yinse_wav = f_dst

	##歌曲/伴奏读取（路径需要修改）
	song_wav = os.path.join("{}{}/{}/vocal321.wav".format(song_folder, gender, song_name)) # 歌曲vocal
	inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder, gender, song_name))
	# song_wav = './xusong_long.wav'
	svc_out_path = os.path.join(gs_work_dir, "svc.wav") ###svc结果名字
	print("inputMsg:", song_wav, target_yinse_wav, svc_out_path)

	## svc process
	st = time.time()
	print("start inference...")
	similar = process_svc(song_wav, target_yinse_wav, svc_out_path, embed_model, hubert_model, paras)
	print("svc finished!!")
	print("time cost = {}".format(time.time() - st))
	print("out path name {} ".format(svc_out_path))

	# '''
	##加混响
	print("add reverbration...")
	svc_out_path_effect = svc_out_path[:-4] + '_effect.wav'
	cmd = f"/data/gpu_env_common/bin/effect_tool {svc_out_path} {svc_out_path_effect}"
	print("cmd :", cmd)
	os.system(cmd)
	# # 人声伴奏合并
	print("add acc...")
	out_path = svc_out_path_effect[:-4] + '_music.wav'
	mix(svc_out_path_effect, inf_acc_path, out_path)

	print("time cost = {}".format(time.time() - st))
	print("out path name {} ".format(out_path))
	# '''

	return svc_out_path


	def meisheng_func(target_yinse_wav, song_name, paras):
	##init
	embed_model, hubert_model, gender_model = meisheng_init()

	###gender predict
	gender, female_rate, is_pure = gender_model.process(target_yinse_wav)
	print('=====================')
	print("gender:{}, female_rate:{},is_pure:{}".format(gender, female_rate, is_pure))
	if gender == 0:
	gender = 'female'
	elif gender == 1:
	gender = 'male'
	elif female_rate > 0.5:
	gender = 'female'
	else:
	gender = 'male'
	print("modified gender:{} ".format(gender))
	print('=====================')

	##美声main
	paras['gender'] = gender ##单位都是ms
	get_svc(target_yinse_wav, song_name, embed_model, hubert_model, paras)


	if __name__ == '__main__':
	# target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" # 需要完整路径
	target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a"
	song_name = "lost_stars" ##歌曲名字
	paras = {'gender': None, 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None}
	# paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0} ###片段svc测试
	meisheng_func(target_yinse_wav, song_name, paras)
	diff --git a/AIMeiSheng/slicex/slice_set_silence.py b/AIMeiSheng/slicex/slice_set_silence.py
	index f1b51b6..d906be9 100644
	--- a/AIMeiSheng/slicex/slice_set_silence.py
	+++ b/AIMeiSheng/slicex/slice_set_silence.py
	@@ -1,59 +1,65 @@
	# -- coding: utf-8 --


	import librosa # Optional. Use any library you like to read audio files.
	import soundfile # Optional. Use any library you like to write audio files.
	from slicex.slicer_torch import Slicer


	class silce_silence():
	def __init__(self, sr):
	# audio = torch.from_numpy(audio)
	self.slicer = Slicer(
	sr=sr,
	threshold=-40,
	min_length=5000,
	min_interval=300,
	hop_size=10,
	max_sil_kept=500
	)

	def set_silence(self,chunks,sr, target_audio, target_sr):
	'''
	:param chunks: slice结果 of song wav
	:param sr: song in sr
	:param target_audio: svc_out
	:param target_sr: svc_out sr
	:return:
	'''
	# target_audio = np.zeros(int(len(audio)*target_sr/sr),1)
	# result = []
	for k, v in chunks.items():
	tag = v["split_time"].split(",")
	# if tag[0] != tag[1]:
	# result.append((v["slice"], audio[int(tag[0]):int(tag[1])]))

	if( tag[0] != tag[1] and v["slice"] == True):#静音
	st = int(int(tag[0])*target_sr/sr)
	en = min(int(int(tag[1])*target_sr/sr), len(target_audio))
	target_audio[st:en] = 0#0.001 * target_audio[st:en]
	return target_audio

	def cut(self, audio):
	chunks = self.slicer.slice(audio)
	chunks = dict(chunks)
	return chunks

	-def del_noise(wav_in,svc_out):
	+def del_noise(wav_in,svc_out,paras=None):
	audio, sr = librosa.load(wav_in, sr=None) # Load an audio file with librosa.
	target_audio, target_sr = librosa.load(svc_out, sr=None) # Load an audio file with librosa.

	+ if paras != None:
	+ st = int(paras['tst'] * 16000/1000)
	+ en = len(audio)
	+ if paras['tnd'] != None:
	+ en = min(en,int(paras['tnd'] * 16000/1000))
	+ audio = audio[st:en]

	slice_sil = silce_silence(sr)
	chunks = slice_sil.cut(audio)
	target_audio1 = slice_sil.set_silence(chunks, sr, target_audio, target_sr)
	soundfile.write(svc_out, target_audio1, target_sr)
	return

File Metadata

Mime Type: text/x-diff
Expires: Sun, Jan 12, 08:34 (1 d, 15 h)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1346278
Default Alt Text: (10 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions