No OneTemporary
Actions

Size

59 KB

Subscribers

None

View Options

	diff --git a/AutoCoverTool/online/inference_one.py b/AutoCoverTool/online/inference_one.py
	index 0974c8e..8113e10 100644
	--- a/AutoCoverTool/online/inference_one.py
	+++ b/AutoCoverTool/online/inference_one.py
	@@ -1,684 +1,688 @@
	"""
	单个处理的逻辑
	song_id:
	---src.mp3 // 源数据，需要提前放进去
	---cache
	---vocal.wav // 分离之后产生
	---acc.wav // 分离之后产生
	---vocal_32.wav // 分离之后产生
	---song_id_sp1.wav // 合成之后产生
	---song_id_sp2.wav // 合成之后产生
	---song_id_sp2_d.wav // 降噪之后生成
	---song_id_sp2_dv.wav // 降噪+拉伸之后产生 [占比太高的不产生]
	---song_id_sp2_dve442.wav // 手动调整之后产生
	---song_id_sp2_dve442_replace.wav // 替换之后产生
	---song_id_sp2_dve442_replace_mix.wav // 人声+伴奏混合之后产生
	---song_id
	--acc.mp3 // 44k双声道320k
	--vocal.mp3 // 44k双声道320k
	--src.mp3 // 44k双声道320k
	--song_id_sp2_dv.mp3 // 44k单声道320k
	---song_id_out // 对外输出
	--src.mp3 // 原始音频
	--song_id_sp2_dv_replace_mix.mp3 // 制作完成的音频

	环境安装:
	conda create -n auto_song_cover python=3.9
	# 安装demucs环境[进入到ref.music_remover 执行pip install -r requirements.txt]
	# 安装so_vits_svc环境[进入到ref.so_vits_svc 执行pip install -r requirements.txt]
	pip install librosa
	pip install scikit-maad
	pip install praat-parselmouth
	pip install matplotlib
	pip install torchvision
	pip install madmom
	pip install torchstat
	环境设置:
	export PATH=$PATH:/data/gpu_env_common/env/bin/ffmpeg/bin
	export PYTHONPATH=$PWD:$PWD/ref/music_remover/demucs:$PWD/ref/so_vits_svc:$PWD/ref/split_dirty_frame
	"""

	import os
	import time
	import shutil
	import random
	import logging
	import librosa

	logging.basicConfig(filename='/tmp/inference.log', level=logging.INFO)

	gs_err_code_success = 0
	gs_err_code_no_src_mp3 = 1
	gs_err_code_separate = 2
	gs_err_code_trans_32 = 3
	gs_err_code_encode_err = 4
	gs_err_code_replace_err = 5
	gs_err_code_replace_trans_err = 6
	gs_err_code_mix_err = 7
	gs_err_code_mix_transcode_err = 8
	gs_err_code_no_src_dir = 9
	gs_err_code_volume_err = 10
	gs_err_code_trans2_442 = 11
	gs_err_code_reverb = 12
	gs_err_code_no_good_choice = 13
	gs_err_code_preprocess_vocal = 14
	gs_err_code_replace_except_err = 15

	gs_denoise_exe = "/opt/soft/bin/denoise_exe"
	gs_draw_volume_exe = "/opt/soft/bin/draw_volume"
	gs_simple_mixer_path = "/opt/soft/bin/simple_mixer"
	gs_rever_path = "/opt/soft/bin/dereverbrate"

	from ref.music_remover.separate_interface import SeparateInterface
	from ref.so_vits_svc.inference_main import *
	from ref.split_dirty_frame.script.process_one import ReplaceVocalFrame, construct_power_fragment


	class SongCoverInference:
	def __init__(self):
	self.work_dir = None
	self.cache_dir = None
	self.cid = None
	self.src_mp3 = None
	self.vocal_path = None
	self.vocal_32_path = None
	self.acc_path = None
	self.speakers = [
	10414574138721494,
	10414574140317353,
	1688849864840588,
	3634463651,
	5629499489839033,
	5910973794723621,
	6755399374234747,
	8162774327817435,
	8162774329368194,
	1125899914308640, # 以下为男声,包括这个
	12384898975368914,
	12947848931397021,
	3096224748076687,
	3096224751151928,
	5066549357604730,
	5348024335101054,
	6755399442719465,
	7036874421386111
	]

	self.speakers2gender = {
	10414574138721494: 2,
	10414574140317353: 2,
	1688849864840588: 2,
	3634463651: 2,
	5629499489839033: 2,
	5910973794723621: 2,
	6755399374234747: 2,
	8162774327817435: 2,
	8162774329368194: 2,
	1125899914308640: 1, # 1是男
	12384898975368914: 1,
	12947848931397021: 1,
	3096224748076687: 1,
	3096224751151928: 1,
	5066549357604730: 1,
	5348024335101054: 1,
	6755399442719465: 1,
	7036874421386111: 1
	}
	self.speakers_model_path = "data/train_users/{}/logs/32k/G_2000.pth"
	self.speakers_model_config = "data/train_users/{}/config/config.json"

	st = time.time()
	self.separate_inst = None
	logging.info("post process ... ReplaceVocalFrame init sp={}".format(time.time() - st))
	self.replace_vocal_frame_inst = None
	logging.info("SongCoverInference init sp={}".format(time.time() - st))

	def separate(self, cid, src_mp3, vocal_path, acc_path):
	"""
	人声伴奏分离
	:param cid:
	:param src_mp3:
	:param vocal_path:
	:param acc_path:
	:return:
	"""
	st = time.time()
	if self.separate_inst is None:
	self.separate_inst = SeparateInterface()
	if not self.separate_inst.process(cid, src_mp3, vocal_path, acc_path):
	return gs_err_code_separate
	if not os.path.exists(vocal_path) or not os.path.exists(acc_path):
	return gs_err_code_separate

	# 转码出一个32k单声道的数据
	cmd = "ffmpeg -i {} -ar 32000 -ac 1 -y {} -loglevel fatal".format(vocal_path, self.vocal_32_path)
	os.system(cmd)
	if not os.path.exists(self.vocal_32_path):
	return gs_err_code_trans_32
	print("separate:cid={}\|sp={}".format(cid, time.time() - st))
	return gs_err_code_success

	def get_start_ms(self, vocal_path):
	"""
	给定原始音频，找一段连续10s的音频
	:param vocal_path:
	:return:
	"""
	audio, sr = librosa.load(vocal_path, sr=16000)
	audio = librosa.util.normalize(audio)
	# 帧长100ms,帧移10ms,计算能量
	power_arr = []
	for i in range(0, len(audio) - 1600, 160):
	power_arr.append(np.sum(np.abs(audio[i:i + 160])) / 160)
	# 将能量小于等于10的部分做成段
	power_arr = construct_power_fragment(power_arr)
	fragments = []
	last_pos = 0
	for idx, line in enumerate(power_arr):
	start = round(float(line[0]) * 0.01, 3)
	duration = round(float(line[1]) * 0.01, 3)
	fragments.append([last_pos, start - last_pos])
	last_pos = start + duration
	if last_pos < len(audio) / sr:
	fragments.append([last_pos, len(audio) / sr - last_pos])

	# 合并数据，两者间隔在50ms以内的合并起来
	idx = 0
	while idx < len(fragments) - 1:
	if fragments[idx + 1][0] - (fragments[idx][0] + fragments[idx][1]) < 0.05:
	fragments[idx][1] = fragments[idx + 1][0] + fragments[idx + 1][1] - fragments[idx][0]
	del fragments[idx + 1]
	idx -= 1
	idx += 1

	# out_file = vocal_path + "_power.csv"
	# with open(out_file, "w") as f:
	# f.write("Name\tStart\tDuration\tTime Format\tType\n")
	# for fragment in fragments:
	# start = round(float(fragment[0]), 3)
	# duration = round(float(fragment[1]), 3)
	# strr = "{}\t{}\t{}\t{}\n".format("11", start, duration, "decimal\tCue\t")
	# f.write(strr)

	# 筛选出开始的位置
	# 1. 连续时长大于10s，当前段长度大于3s
	# 2. 不可用
	# 从0到fragments[idx], 包含idx其中人声段的总和
	tot_vocal_duration = [fragments[0][1]]
	for i in range(1, len(fragments)):
	tot_vocal_duration.append(tot_vocal_duration[i - 1] + fragments[i][1])

	# 计算出任意两段之间非人声占比
	for i in range(0, len(fragments)):
	if fragments[i][1] >= 3:
	now_tot = 0
	if i > 0:
	now_tot = tot_vocal_duration[i - 1]
	for j in range(i + 1, len(fragments)):
	cur_rate = tot_vocal_duration[j] - now_tot
	cur_rate = cur_rate / (fragments[j][1] + fragments[j][0] - fragments[i][0])
	if cur_rate > 0.1:
	return fragments[i][0]
	return -1

	def inference_speaker(self):
	"""
	推理生成合成后的音频
	随机取5个干声,选择占比最小的，并且要求占比小于0.3
	:return:
	"""
	st = time.time()
	out_speakers = random.sample(self.speakers, 15)
	out_songs_dict = {}
	for speaker in out_speakers:
	model_path = self.speakers_model_path.format(speaker)
	config_path = self.speakers_model_config.format(speaker)
	song_path = os.path.join(self.cache_dir, "{}_{}.wav".format(self.cid, speaker))
	try:
	inf(model_path, config_path, self.vocal_32_path, song_path, "prod")
	except Exception as ex:
	logging.info("cid={}, inference_speaker err={}".format(self.cid, ex))
	continue
	if os.path.exists(song_path):
	if self.replace_vocal_frame_inst is None:
	self.replace_vocal_frame_inst = ReplaceVocalFrame(
	"data/models/split_dirty_frame_v5_3_epoch3_852.pth")
	rate = self.replace_vocal_frame_inst.get_rate(song_path)
	if rate < 0.3:
	out_songs_dict[song_path] = rate

	# 从内部选择占比最低的
	out_songs = []
	if len(out_songs_dict.keys()) > 0:
	st_sec = self.get_start_ms(self.vocal_path)
	song_msg = sorted(out_songs_dict.items(), key=lambda kv: kv[1])[0]
	out_songs = [song_msg[0]]
	logging.info("GetRate:cid={},song={},rate={},st_tm={}".format(self.cid, song_msg[0], round(song_msg[1], 2),
	round(st_sec, 3)))
	print("GetRate:cid={},song={},rate={},st_tm={}".format(self.cid, song_msg[0], round(song_msg[1], 2),
	round(st_sec, 3)))
	# logging.info("inference_speaker len = {} finish sp = {}".format(len(out_songs), time.time() - st))
	print("inference_speaker len = {} finish sp = {}".format(len(out_songs), time.time() - st))
	return out_songs

	def get_new_vocal_rate(self, songs):
	"""
	获取人声的比率
	:param songs:
	:return:
	"""
	st = time.time()
	need_to_process_song = []
	for song in songs:
	if self.replace_vocal_frame_inst is None:
	self.replace_vocal_frame_inst = ReplaceVocalFrame("data/models/split_dirty_frame_v5_3_epoch3_852.pth")
	rate = self.replace_vocal_frame_inst.get_rate(song)
	logging.info("{} {} replace_rate={}".format(self.cid, song, rate))
	if rate < 1.0:
	need_to_process_song.append(song)
	logging.info(
	"get_new_vocal_rate belen = {} len = {} finish sp = {}".format(len(songs), len(need_to_process_song),
	time.time() - st))
	return need_to_process_song

	def preprocess_vocal(self, songs, vocal_path):
	"""
	1. 降噪
	2. 拉伸
	:param songs:
	:param vocal_path: 参考的音频信号
	:return:
	"""
	st = time.time()
	dv_out_list = []
	for song in songs:
	denoise_path = str(song).replace(".wav", "_d.wav")
	cmd = "{} {} {}".format(gs_denoise_exe, song, denoise_path)
	os.system(cmd)
	if not os.path.exists(denoise_path):
	print("{} {} ERROR denoise".format(self.cid, song))
	continue
	# 拉伸
	volume_path = str(song).replace(".wav", "_dv.wav")
	cmd = "{} {} {} {}".format(gs_draw_volume_exe, denoise_path, vocal_path, volume_path)
	os.system(cmd)
	if not os.path.exists(volume_path):
	print("{} {} ERROR denoise".format(self.cid, volume_path))
	continue
	dv_out_list.append(volume_path)
	print(
	"preprocess_vocal belen = {} len = {} finish sp = {}".format(len(songs), len(dv_out_list),
	time.time() - st))
	return dv_out_list

	def output(self, dv_out_list):
	"""
	对外输出数据
	:param dv_out_list:
	:return:
	"""
	st = time.time()
	out_dir = os.path.join(self.work_dir, self.cid)
	if os.path.exists(out_dir):
	shutil.rmtree(out_dir)
	os.makedirs(out_dir)

	# 拷贝数据
	dst_mp3_path = os.path.join(out_dir, "src_mp3")
	dst_acc_path = os.path.join(out_dir, "acc.mp3")
	dst_vocal_path = os.path.join(out_dir, "vocal.mp3")
	shutil.copyfile(self.src_mp3, dst_mp3_path)
	cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(self.acc_path, dst_acc_path)
	os.system(cmd)
	if not os.path.exists(dst_acc_path):
	return gs_err_code_encode_err
	cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(self.vocal_path, dst_vocal_path)
	os.system(cmd)
	if not os.path.exists(dst_vocal_path):
	return gs_err_code_encode_err

	# 将所有数据放到out_dir中，用于给人工标注
	for dv_wav in dv_out_list:
	dv_wav_name = str(dv_wav).split("/")[-1].replace(".wav", "_441.mp3")
	dst_dv_path = os.path.join(out_dir, dv_wav_name)

	cmd = "ffmpeg -i {} -ar 44100 -ac 1 -ab 320k -y {} -loglevel fatal".format(dv_wav, dst_dv_path)
	os.system(cmd)
	if not os.path.exists(dst_dv_path):
	print("{} encode err!".format(cmd))
	continue
	logging.info(
	"preprocess_vocal output sp = {}".format(time.time() - st))

	def process_one(self, cid, work_dir, enable_output=False):
	logging.info("\nstart:cid={},work_dir={}----------------------->>>>>>>>".format(cid, work_dir))
	self.cid = cid
	self.work_dir = work_dir

	# 所有不对外交付的，全部放到这里
	self.cache_dir = os.path.join(work_dir, "cache")
	if os.path.exists(self.cache_dir):
	shutil.rmtree(self.cache_dir)
	os.makedirs(self.cache_dir)

	self.src_mp3 = os.path.join(self.work_dir, "src.mp3")
	if not os.path.exists(self.src_mp3):
	return gs_err_code_no_src_mp3
	self.vocal_path = os.path.join(self.cache_dir, "vocal.wav")
	self.vocal_32_path = os.path.join(self.cache_dir, "vocal_32.wav")
	self.acc_path = os.path.join(self.cache_dir, "acc.wav")

	if not os.path.exists(self.vocal_32_path):
	logging.info("start separate ... {} {} {}".format(self.src_mp3, self.vocal_path, self.acc_path))
	err = self.separate(cid, self.src_mp3, self.vocal_path, self.acc_path)
	if err != gs_err_code_success:
	return err, None, None
	logging.info("start inference_speaker ...")
	out_songs = self.inference_speaker()
	dv_out_list = self.preprocess_vocal(out_songs, self.vocal_path)
	if len(dv_out_list) == 0:
	return gs_err_code_no_good_choice, None, None

	mix_mp3_path = None
	gender = -1
	if enable_output:
	self.output(dv_out_list)
	else:
	# 默认全部处理一遍
	for dv_out_path in dv_out_list:
	src_path = dv_out_path.replace("_dv.wav", ".wav")
	err, mix_mp3_path = self.after_process(self.cid, self.work_dir, src_path, dv_out_path, self.vocal_path,
	self.acc_path,
	True, False)
	if err != gs_err_code_success:
	logging.info("after_process err {}".format(err))

	# 取出性别属性
	if err == gs_err_code_success and mix_mp3_path is not None:
	gender = self.speakers2gender[int(str(os.path.basename(mix_mp3_path)).split("_")[1])]
	logging.info("finish:cid={},work_dir={}----------------------->>>>>>>>".format(cid, work_dir))
	return gs_err_code_success, mix_mp3_path, gender

	def reverb_by_vocal(self, file):
	st = time.time()
	file_442 = file.replace(".wav", "_442.wav")
	if not os.path.exists(file_442):
	cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {}".format(file, file_442)
	os.system(cmd)
	if not os.path.exists(file_442):
	return gs_err_code_trans2_442, None

	file_dst = file.replace(".wav", "_442_dr.wav")
	cmd = "{} {} {} {}".format(gs_rever_path, self.vocal_path, file_442, file_dst)
	os.system(cmd)
	if not os.path.exists(file_dst):
	return gs_err_code_reverb, None
	print("cid = {}, reverb_by_vocal sp={}".format(self.cid, time.time() - st))
	return gs_err_code_success, file_dst

	def after_process(self, cid, work_dir, in_file, effect_file, vocal_file, acc_file, need_draw=True,
	need_reverb=True):
	"""
	后处理逻辑
	将处理好的音频进行替换，然后和伴奏进行混合，最后进行编码
	:return:
	"""
	if need_reverb:
	# 抓取混响
	err, effect_file = self.reverb_by_vocal(in_file)
	if err != gs_err_code_success:
	return err, None

	if need_draw:
	# 增加一个拉伸的步骤
	volume_path = str(effect_file).replace(".wav", "_dv.wav")
	cmd = "{} {} {} {}".format(gs_draw_volume_exe, effect_file, vocal_file, volume_path)
	print(cmd)
	os.system(cmd)
	if not os.path.exists(volume_path):
	print("{} {} ERROR draw volume".format(self.cid, volume_path))
	return gs_err_code_volume_err, None
	effect_file = volume_path

	st = time.time()
	self.cid = cid
	self.work_dir = work_dir
	self.src_mp3 = os.path.join(self.work_dir, "src.mp3")
	if not os.path.exists(self.work_dir):
	return gs_err_code_no_src_dir
	self.replace_vocal_frame_inst.process(in_file, effect_file, vocal_file)
	dst_path = effect_file + "_replace.wav"
	if not os.path.exists(dst_path):
	return gs_err_code_replace_err, None
	print("replace_vocal_frame_inst sp = {}".format(time.time() - st))

	# 转码
	dst_path_442 = dst_path.replace("_replace.wav", "_replace442.wav")
	cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(dst_path, dst_path_442)
	os.system(cmd)
	if not os.path.exists(dst_path_442):
	return gs_err_code_replace_trans_err, None

	# 合并转码后再做一次拉伸，保证响度
	volume_path = str(dst_path_442).replace(".wav", "_dv.wav")
	cmd = "{} {} {} {}".format(gs_draw_volume_exe, dst_path_442, vocal_file, volume_path)
	print(cmd)
	os.system(cmd)
	if not os.path.exists(volume_path):
	print("{} {} ERROR draw volume".format(self.cid, volume_path))
	return gs_err_code_volume_err, None
	dst_path_442 = volume_path

	# 混合
	mix_path = dst_path_442.replace("_replace442.wav", "_replace442_mix.wav")
	cmd = "{} {} {} {}".format(gs_simple_mixer_path, dst_path_442, acc_file, mix_path)
	print("{}".format(cmd))
	os.system(cmd)
	if not os.path.exists(mix_path):
	return gs_err_code_mix_err, None

	# 编码为mp3
	output_dir = os.path.join(self.work_dir, self.cid + "_out")
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)
	name = str(mix_path).replace("_replace442_mix.wav", "_replace442_mix.mp3").split("/")[-1]
	mix_path_mp3 = os.path.join(output_dir, name)
	cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(mix_path, mix_path_mp3)
	os.system(cmd)
	if not os.path.exists(mix_path_mp3):
	return gs_err_code_mix_transcode_err, None

	# 拷贝src到output_dir
	# shutil.copyfile(self.src_mp3, os.path.join(output_dir, "src.mp3"))
	# logging.info("after_process sp = {}".format(time.time() - st))
	return gs_err_code_success, mix_path_mp3

	####################################新对外接口############################################################
	def prepare_env(self, cid, work_dir, create_dir=False):
	self.cid = cid
	self.work_dir = work_dir

	# 所有不对外交付的，全部放到这里
	self.cache_dir = os.path.join(work_dir, "cache")
	if create_dir:
	if os.path.exists(self.cache_dir):
	shutil.rmtree(self.cache_dir)
	os.makedirs(self.cache_dir)

	self.src_mp3 = os.path.join(self.work_dir, "src.mp3")
	if not os.path.exists(self.src_mp3):
	return gs_err_code_no_src_mp3
	self.vocal_path = os.path.join(self.cache_dir, "vocal.wav")
	self.vocal_32_path = os.path.join(self.cache_dir, "vocal_32.wav")
	self.acc_path = os.path.join(self.cache_dir, "acc.wav")
	return gs_err_code_success

	def generate_svc_file(self, cid, work_dir):
	"""
	:param cid:
	:param work_dir:
	:return:err_code, 生成出的svc的文件名称
	"""
	err = self.prepare_env(cid, work_dir, create_dir=True)
	if err != gs_err_code_success:
	return err, None

	# 音源分离
	if not os.path.exists(self.vocal_32_path):
	st = time.time()
	err = self.separate(cid, self.src_mp3, self.vocal_path, self.acc_path)
	logging.info("cid={},separate,sp={}".format(self.cid, time.time() - st))
	if err != gs_err_code_success:
	return err, None

	# 生成svc,只保留一个最佳的
	st = time.time()
	out_songs = self.inference_speaker()
	if len(out_songs) == 0:
	return gs_err_code_no_good_choice, None
	logging.info("cid={},inference_speaker,{},sp={}".format(self.cid, out_songs[0], time.time() - st))
	-
	- # 预处理人声
	- dv_out_list = self.preprocess_vocal(out_songs, self.vocal_path)
	- if len(dv_out_list) == 0:
	- return gs_err_code_preprocess_vocal, None
	- return gs_err_code_success, dv_out_list[0]
	+ return gs_err_code_success, out_songs[0]

	def effect(self, cid, work_dir, svc_file):
	st = time.time()
	err = self.prepare_env(cid, work_dir)
	if err != gs_err_code_success:
	return err, None
	logging.info("cid={},effect_and_mix,{},sp={}".format(self.cid, svc_file, time.time() - st))
	+
	+ # 预处理人声
	+ dv_out_list = self.preprocess_vocal([svc_file], self.vocal_path)
	+ if len(dv_out_list) == 0:
	+ return gs_err_code_preprocess_vocal, None
	+ svc_file = dv_out_list[0]
	+
	# 做音效
	st = time.time()
	err, effect_file = self.reverb_by_vocal(svc_file)
	if err != gs_err_code_success:
	return err, None
	logging.info("cid={},reverb_by_vocal,{},sp={}".format(self.cid, svc_file, time.time() - st))
	return err, effect_file

	def mix(self, cid, work_dir, svc_file, effect_file):
	"""
	做音效以及合并
	:param cid:
	:param work_dir:
	:param svc_file:
	:param effect_file:
	:return: err_code, 完成的mp3文件
	"""
	st = time.time()
	err = self.prepare_env(cid, work_dir)
	if err != gs_err_code_success:
	return err, None
	logging.info("cid={},effect_and_mix,{},sp={}".format(self.cid, svc_file, time.time() - st))

	# 拉伸
	st = time.time()
	volume_path = str(effect_file).replace(".wav", "_dv.wav")
	cmd = "{} {} {} {}".format(gs_draw_volume_exe, effect_file, self.vocal_path, volume_path)
	os.system(cmd)
	if not os.path.exists(volume_path):
	print("{} {} ERROR draw volume".format(self.cid, volume_path))
	return gs_err_code_volume_err, None
	effect_file = volume_path
	logging.info("cid={},draw_volume,{},sp={}".format(self.cid, svc_file, time.time() - st))

	# 替换
	st = time.time()
	try:
	+ if self.replace_vocal_frame_inst is None:
	+ self.replace_vocal_frame_inst = ReplaceVocalFrame("data/models/split_dirty_frame_v5_3_epoch3_852.pth")
	self.replace_vocal_frame_inst.process(svc_file, effect_file, self.vocal_path)
	except Exception as ex:
	logging.info("{},replace_vocal_frame_inst, {}", self.cid, ex)
	return gs_err_code_replace_except_err, None
	dst_path = effect_file + "_replace.wav"
	if not os.path.exists(dst_path):
	return gs_err_code_replace_err, None
	logging.info("cid={},replace_vocal_frame_inst,{},sp={}".format(self.cid, svc_file, time.time() - st))

	# 转码
	st = time.time()
	dst_path_442 = dst_path.replace("_replace.wav", "_replace442.wav")
	cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(dst_path, dst_path_442)
	os.system(cmd)
	if not os.path.exists(dst_path_442):
	return gs_err_code_replace_trans_err, None
	logging.info("cid={},transcode,{},sp={}".format(self.cid, svc_file, time.time() - st))

	# 合并转码后再做一次拉伸，保证响度
	st = time.time()
	volume_path = str(dst_path_442).replace("_replace442.wav", "_replace442_dv.wav")
	cmd = "{} {} {} {}".format(gs_draw_volume_exe, dst_path_442, self.vocal_path, volume_path)
	os.system(cmd)
	if not os.path.exists(volume_path):
	print("{} {} ERROR draw volume".format(self.cid, volume_path))
	return gs_err_code_volume_err, None
	dst_path_442 = volume_path
	logging.info("cid={},draw_volume2,{},sp={}".format(self.cid, svc_file, time.time() - st))

	# 混合
	st = time.time()
	mix_path = dst_path_442.replace("_replace442_dv.wav", "_replace442_dv_mix.wav")
	cmd = "{} {} {} {}".format(gs_simple_mixer_path, dst_path_442, self.acc_path, mix_path)
	os.system(cmd)
	if not os.path.exists(mix_path):
	return gs_err_code_mix_err, None
	logging.info("cid={},mixer,{},sp={}".format(self.cid, svc_file, time.time() - st))

	# 编码为mp3
	st = time.time()
	output_dir = os.path.join(self.work_dir, self.cid + "_out")
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)
	name = str(mix_path).replace("_replace442_dv_mix.wav", "_replace442_dv_mix.mp3").split("/")[-1]
	mix_path_mp3 = os.path.join(output_dir, name)
	cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(mix_path, mix_path_mp3)
	print(cmd)
	os.system(cmd)
	if not os.path.exists(mix_path_mp3):
	return gs_err_code_mix_transcode_err, None
	logging.info("cid={},encode,{},sp={}".format(self.cid, svc_file, time.time() - st))
	return gs_err_code_success, mix_path_mp3

	def get_gender(self, svc_file):
	- return self.speakers2gender[int(os.path.basename(svc_file).split("_")[1])]
	+ return self.speakers2gender[int(os.path.basename(svc_file.replace(".wav", "")).split("_")[1])]

	def process_one_logic(self, cid, work_dir):
	"""
	搞成两部分:
	1. 分离数据+5次推理，获取最佳结果,并保存
	2. 利用最佳结果做音效以及合并
	:return:
	"""
	err, svc_file = self.generate_svc_file(cid, work_dir)
	gender = -1
	if err != gs_err_code_success:
	return err, svc_file, gender,
	gender = self.get_gender(svc_file)
	err, effect_file = self.effect(cid, work_dir, svc_file)
	if err != gs_err_code_success:
	return err, svc_file, gender
	err, mix_mp3_path = self.mix(cid, work_dir, svc_file, effect_file)
	return err, mix_mp3_path, gender


	def test():
	arr = [
	# "611752105020343687",
	# "611752105023532439",
	# "611752105030419688",
	# "611752105030485748",
	"611752105030485685"
	]
	base_dir = "/data/rsync/jianli.yang/AutoCoverTool/data/test"
	s_inst = SongCoverInference()
	for cid in arr:
	st = time.time()
	# err, mix_mp3, gender = s_inst.process_one(cid, os.path.join(base_dir, cid), False)
	err, mix_mp3, gender = s_inst.process_one_logic(cid, os.path.join(base_dir, cid))
	print(mix_mp3, gender)
	print("cid={} RealFinish err={} sp={}".format(cid, err, time.time() - st))


	if __name__ == '__main__':
	test()
	diff --git a/AutoCoverTool/script/get_song_url.py b/AutoCoverTool/script/get_song_url.py
	index 6327796..1e5314f 100644
	--- a/AutoCoverTool/script/get_song_url.py
	+++ b/AutoCoverTool/script/get_song_url.py
	@@ -1,379 +1,280 @@
	"""
	获取歌曲的地址

	# song_src=2 是来源108和109的歌曲，未被洗过的
	# song_src=1 是曲库给的
	# song_src=3 # 用于轻变调的
	"""
	from script.common import *
	from copy import deepcopy
	from online.common import update_db


	def get_url_by_song_id(song_id):
	sql = "select task_url,starmaker_songid from silence where starmaker_songid = {} order by task_id limit 1".format(
	song_id)
	ban = deepcopy(banned_user_map)
	ban["db"] = "starmaker_musicbook"
	data = get_data_by_mysql(sql, ban)
	if len(data) > 0:
	return data[0][0]
	return None


	def process():
	arr = [
	- "611752105020332343",
	- "611752105022647065",
	- "611752105022704186",
	- "611752105022729268",
	- "611752105022736024",
	- "611752105022739648",
	- "611752105022739650",
	- "611752105022741712",
	- "611752105022743896",
	- "611752105022746068",
	- "611752105022747108",
	- "611752105022757968",
	- "611752105022763880",
	- "611752105022763884",
	- "611752105022764688",
	- "611752105022764801",
	- "611752105022766341",
	- "611752105022767186",
	- "611752105022770004",
	- "611752105022770306",
	- "611752105022773633",
	- "611752105022773776",
	- "611752105022774127",
	- "611752105022774502",
	- "611752105022775091",
	- "611752105022775486",
	- "611752105022775907",
	- "611752105022776719",
	- "611752105022776721",
	- "611752105022776761",
	- "611752105022776857",
	- "611752105022777051",
	- "611752105022777076",
	- "611752105022777328",
	- "611752105022777573",
	- "611752105022777607",
	- "611752105022777608",
	- "611752105022777611",
	- "611752105022777835",
	- "611752105022780287",
	- "611752105022781374",
	- "611752105022785018",
	- "611752105022785313",
	- "611752105022812895",
	- "611752105022825467",
	- "611752105022837452",
	- "611752105022837464",
	- "611752105022840319",
	- "611752105022840637",
	- "611752105022841089",
	- "611752105022841355",
	- "611752105022842184",
	- "611752105022843089",
	- "611752105022843139",
	- "611752105022843331",
	- "611752105022843710",
	- "611752105022843728",
	- "611752105022876795",
	- "611752105022973113",
	- "611752105023184121",
	- "611752105023234496",
	- "611752105023258864",
	- "611752105023262008",
	- "611752105023301455",
	- "611752105023306231",
	- "611752105023329571",
	- "611752105023411931",
	- "611752105023449798",
	- "611752105023458990",
	- "611752105023610603",
	- "611752105023678577",
	- "611752105023683357",
	- "611752105023841037",
	- "611752105023929521",
	- "611752105024170140",
	- "611752105024466658",
	- "611752105024683212",
	- "611752105024765795",
	- "611752105024766050",
	- "611752105025475926",
	- "611752105025486355",
	- "611752105025503613",
	- "611752105025506533",
	- "611752105025515144",
	- "611752105025521388",
	- "611752105025524664",
	- "611752105025524932",
	- "611752105025526555",
	- "611752105025542775",
	- "611752105025542802",
	- "611752105025543710",
	- "611752105025555350",
	- "611752105025558173",
	- "611752105025565020",
	- "611752105025565029",
	- "611752105025565034",
	- "611752105025578884",
	- "611752105025581305",
	- "611752105026003288",
	- "611752105026090255",
	- "611752105026152320",
	- "611752105026180638",
	- "611752105026180797",
	- "611752105026205984",
	- "611752105026227884",
	- "611752105026343282",
	- "611752105026417620",
	- "611752105026449246",
	- "611752105026462848",
	- "611752105026533657",
	- "611752105026577993",
	- "611752105026614487",
	- "611752105026666894",
	- "611752105026666899",
	- "611752105026666904",
	- "611752105026666918",
	- "611752105026666950",
	- "611752105026666964",
	- "611752105026666995",
	- "611752105026667014",
	- "611752105026667025",
	- "611752105027030955",
	- "611752105027216307",
	- "611752105027228689",
	- "611752105027228702",
	- "611752105027460125",
	- "611752105027802526",
	- "611752105027854263",
	- "611752105028204403",
	- "611752105028408823",
	- "611752105028477541",
	- "611752105028558157",
	- "611752105028593043",
	- "611752105028793344",
	- "611752105028820643",
	- "611752105028820644",
	- "611752105028858622",
	- "611752105028878359",
	- "611752105028916096",
	- "611752105028916098",
	- "611752105028990740",
	- "611752105029006327",
	- "611752105029047058",
	- "611752105029054046",
	- "611752105029059915",
	- "611752105029204262",
	- "611752105029291293",
	- "611752105029306974",
	- "611752105029372452",
	- "611752105029648535",
	- "611752105030146069",
	- "611752105030483301",
	- "611752105030483312",
	- "611752105030499117",
	- "611752105030499185",
	- "611752105030499265",
	- "611752105030499310",
	- "611752105030503847",
	- "611752105030547499",
	- "611752105030547630",
	- "611752105030547632",
	- "611752105030547638",
	- "611752105030557261",
	- "611752105030557355",
	- "611752105030558663",
	- "611752105030559471",
	- "611752105030562192",
	- "611752105030562194",
	- "611752105030562196",
	- "611752105030562197",
	- "611752105030562199",
	- "611752105030562203",
	- "611752105030562205",
	- "611752105030562209",
	- "611752105030562211",
	- "611752105030562213",
	- "611752105030562214",
	- "611752105030562218",
	- "611752105030562221",
	- "611752105030562227",
	- "611752105030562228",
	- "611752105030562231",
	- "611752105030562234",
	- "611752105030562236",
	- "611752105030562239",
	- "611752105030562243",
	- "611752105030562245",
	- "611752105030562248",
	- "611752105030562251",
	- "611752105030562254",
	- "611752105030562255",
	- "611752105030562259",
	- "611752105030562262",
	- "611752105030562263",
	- "611752105030562266",
	- "611752105030562268",
	- "611752105030562271",
	- "611752105030562274",
	- "611752105030562277",
	- "611752105030562286",
	- "611752105030562289",
	- "611752105030562291",
	- "611752105030562296",
	- "611752105030562302",
	- "611752105030562303",
	- "611752105030562306",
	- "611752105030562311",
	- "611752105030562314",
	- "611752105030562316",
	- "611752105030562322",
	- "611752105030562325",
	- "611752105030562327",
	- "611752105030562333",
	- "611752105030562335",
	- "611752105030562337",
	- "611752105030562338",
	- "611752105030562345",
	- "611752105030562351",
	- "611752105030562378",
	- "611752105030562380",
	- "611752105030562383",
	- "611752105030562389",
	- "611752105030562391",
	- "611752105030562392",
	- "611752105030562397",
	- "611752105030562398",
	- "611752105030562399",
	- "611752105030562401",
	- "611752105030562404",
	- "611752105030562405",
	- "611752105030562411",
	- "611752105030562413",
	- "611752105030562414",
	- "611752105030562417",
	- "611752105030562419",
	- "611752105030562424",
	- "611752105030562425",
	- "611752105030562426",
	- "611752105030562428",
	- "611752105030562431",
	- "611752105030562448",
	- "611752105030562457",
	- "611752105030562459",
	- "611752105030562460",
	- "611752105030562463",
	- "611752105030562470",
	- "611752105030562472",
	- "611752105030562473",
	- "611752105030562479",
	- "611752105030562483",
	- "611752105030562489",
	- "611752105030562493",
	- "611752105030562494",
	- "611752105030562499",
	- "611752105030562502",
	- "611752105030562504",
	- "611752105030562507",
	- "611752105030562512",
	- "611752105030562513",
	- "611752105030562517",
	- "611752105030562522",
	- "611752105030562919",
	- "611752105030562921",
	- "611752105030562924",
	- "611752105030562925",
	- "611752105030562929",
	- "611752105030562931",
	- "611752105030562936",
	- "611752105030562938",
	- "611752105030562939",
	- "611752105030562940",
	- "611752105030562943",
	- "611752105030562950",
	- "611752105030562953",
	- "611752105030562954",
	- "611752105030562959",
	- "611752105030562960",
	- "611752105030562962",
	- "611752105030562968",
	- "611752105030562974",
	- "611752105030562978",
	- "611752105030562979",
	- "611752105030562981",
	- "611752105030562983",
	- "611752105030562986",
	- "611752105030562988",
	- "611752105030562999",
	- "611752105030563001",
	- "611752105030563003",
	- "611752105030563005",
	- "611752105030563006",
	- "611752105030563010",
	- "611752105030563014",
	- "611752105030563022",
	- "611752105030563025",
	- "611752105030563028",
	- "611752105030563031",
	- "611752105030563034",
	- "611752105030563035",
	- "611752105030563043"
	+ "611752105030484885",
	+ "611752105029543722",
	+ "611752105030556608",
	+ "611752105030585154",
	+ "611752105030556609",
	+ "611752105029054060",
	+ "611752105028975148",
	+ "611752105030558172",
	+ "611752105028778344",
	+ "611752105030556613",
	+ "611752105029290698",
	+ "611752105030556605",
	+ "611752105027484924",
	+ "611752105030559472",
	+ "611752105030534293",
	+ "611752105027148644",
	+ "611752105029292630",
	+ "611752105026900917",
	+ "611752105027103140",
	+ "611752105030589795",
	+ "611752105026915170",
	+ "611752105030534289",
	+ "611752105026751742",
	+ "611752105026452638",
	+ "611752105025979421",
	+ "611752105025817810",
	+ "611752105026536899",
	+ "611752105030534282",
	+ "611752105030534285",
	+ "611752105030559474",
	+ "611752105025219762",
	+ "611752105025034426",
	+ "611752105024938926",
	+ "611752105029648740",
	+ "611752105029675859",
	+ "611752105024598727",
	+ "611752105030548412",
	+ "611752105030487271",
	+ "611752105029648743",
	+ "611752105023692976",
	+ "611752105024135802",
	+ "611752105023616288",
	+ "611752105023255629",
	+ "611752105022728286",
	+ "611752105023206033",
	+ "611752105023091102",
	+ "611752105029792918",
	+ "611752105022729259",
	+ "611752105030487512",
	+ "611752105022842120",
	+ "611752105022842054",
	+ "611752105022785621",
	+ "611752105022840550",
	+ "611752105022838205",
	+ "611752105022839189",
	+ "611752105022835751",
	+ "611752105022818025",
	+ "611752105022797521",
	+ "611752105022784390",
	+ "611752105028820609",
	+ "611752105030488595",
	+ "611752105030517536",
	+ "611752105030501857",
	+ "611752105030478339",
	+ "611752105025957389",
	+ "611752105027484925",
	+ "611752105027484915",
	+ "611752105024415490",
	+ "611752105027854244",
	+ "611752105029527187",
	+ "611752105028870536",
	+ "611752105028444597",
	+ "611752105028778353",
	+ "611752105027877846",
	+ "611752105028906605",
	+ "611752105027781526",
	+ "611752105027877887",
	+ "611752105027795229",
	+ "611752105027734187",
	+ "611752105028820612",
	+ "611752105027626964",
	+ "611752105027460080",
	+ "611752105027507932",
	+ "611752105027611342",
	+ "611752105027435127",
	+ "611752105027307631",
	+ "611752105029648514",
	+ "611752105026874730",
	+ "611752105030591117",
	+ "611752105026437853",
	+ "611752105025541483",
	+ "611752105026536913",
	+ "611752105022647044",
	+ "611752105023440333",
	+ "611752105023460357",
	+ "611752105023604729",
	+ "611752105023510939",
	+ "611752105022842387",
	+ "611752105024230229",
	+ "611752105023674599",
	+ "611752105023160140",
	+ "611752105022647074",
	+ "611752105022615220",
	+ "611752105028408822",
	+ "611752105022816170",
	+ "611752105022772279",
	+ "611752105022614618",
	+ "611752105020417684",
	+ "611752105020382477",
	+ "611752105022780345",
	+ "611752105022780961",
	+ "611752105022837186",
	+ "611752105022778042",
	+ "611752105022775939",
	+ "611752105022764224",
	+ "611752105022781267",
	+ "611752105022839030",
	+ "611752105022767294",
	+ "611752105022784996",
	+ "611752105022775600",
	+ "611752105022780284",
	+ "611752105022768837",
	+ "611752105030590847",
	+ "611752105022780965",
	+ "611752105022779020",
	+ "611752105022777496",
	+ "611752105022781268",
	+ "611752105022785681",
	+ "611752105022779294",
	+ "611752105022823781",
	+ "611752105022780210",
	+ "611752105022774220",
	+ "611752105022768419",
	+ "611752105030590845",
	+ "611752105022835406",
	+ "611752105022774040",
	+ "611752105022783776",
	+ "611752105022781193",
	+ "611752105020390942",
	+ "611752105022783967",
	+ "611752105022763051",
	+ "611752105022780818",
	+ "611752105022835415",
	+ "611752105022782935",
	+ "611752105020402448",
	+ "611752105022781011",
	+ "611752105020384960",
	+ "611752105022779784",
	+ "611752105022781387",
	+ "611752105025580424",
	+ "611752105022765022",
	+ "611752105025492732",
	+ "611752105023683356",
	+ "611752105022842241",
	+ "611752105024231227",
	+ "611752105029291290",
	+ "611752105023104185",
	+ "611752105025565044",
	+ "611752105025458749",
	+ "611752105025458753",
	+ "611752105025090763",
	+ "611752105030590839",
	+ "611752105030534180",
	+ "611752105023908922",
	+ "611752105027326105",
	+ "611752105023725727",
	+ "611752105022647079",
	+ "611752105024082232",
	+ "611752105029648891",
	+ "611752105025504662",
	+ "611752105025496983",
	+ "611752105026716551",
	+ "611752105029648872",
	+ "611752105022614531",
	+ "611752105029041707",
	+ "611752105030483313",
	+ "611752105023219237",
	+ "611752105022842989",
	+ "611752105022746733",
	+ "611752105023162802",
	+ "611752105022729263",
	+ "611752105022777120",
	+ "611752105025584544",
	+ "611752105025458809",
	+ "611752105027648113",
	+ "611752105030590840",
	+ "611752105024183682",
	+ "611752105023086347",
	+ "611752105022839975",
	+ "611752105025348359",
	+ "611752105022781144",
	+ "611752105022647060",
	+ "611752105022728482",
	+ "611752105025840622",
	+ "611752105022836470",
	+ "611752105023246015",
	+ "611752105022838206",
	+ "611752105022780355",
	+ "611752105022768062",
	+ "611752105022777600"
	]

	ban = deepcopy(banned_user_map)
	ban["db"] = "av_db"

	for sid in arr:
	url = get_url_by_song_id(sid)
	if url is not None:
	print("out,{},{}".format(url, sid))
	# 不在数据库中
	sql = "select song_id from svc_queue_table where song_id={}".format(sid)
	data = get_data_by_mysql(sql, ban)
	if len(data) == 0:
	- sql = "insert INTO svc_queue_table (song_id, url, create_time, update_time, song_src) VALUES ({}, \"{}\", NOW(), NOW(), 1)" \
	- .format(sid, url)
	+ tm = int(time.time())
	+ sql = "insert INTO svc_queue_table (song_id, url, create_time, update_time, song_src) VALUES ({}, \"{}\",{}, {}, 1)" \
	+ .format(sid, url, tm, tm)
	update_db(sql, ban)


	def get_data_from_song():
	sql = """
	select tb1.song_id, tb1.recording_count
	from (
	select song_id,recording_count
	from starmaker.song
	where song_src in (108,109) and song_status = 2
	order by recording_count desc
	) as tb1
	left join
	(
	select song_id
	from av_db.svc_queue_table
	) as tb2
	on tb1.song_id = tb2.song_id
	where tb2.song_id is null
	order by tb1.recording_count desc limit 400
	"""
	ban = deepcopy(banned_user_map)
	ban_v1 = deepcopy(banned_user_map)
	ban["db"] = "starmaker_musicbook"
	ban_v1["db"] = "av_db"
	data = get_data_by_mysql(sql, ban)
	for dt in data:
	sid = dt[0]
	url = get_url_by_song_id(sid)
	if url is not None:
	print("out,{},{}".format(url, sid))
	sql = "replace INTO svc_queue_table (song_id, url, create_time, update_time) VALUES ({}, \"{}\", NOW(), NOW())" \
	.format(sid, url)
	update_db(sql, ban_v1)


	if __name__ == '__main__':
	# get_data_from_song()
	process()
	diff --git a/AutoCoverTool/script/shuffle_music.py b/AutoCoverTool/script/shuffle_music.py
	index 0f80872..81b3e60 100644
	--- a/AutoCoverTool/script/shuffle_music.py
	+++ b/AutoCoverTool/script/shuffle_music.py
	@@ -1,225 +1,263 @@
	"""
	载入人声，将人声的频谱进行向上平移
	"""
	import librosa
	import soundfile
	import numpy as np
	from copy import deepcopy


	def local_maxium(x):
	"""
	求序列的极大值
	:param x:
	:return:
	"""
	d = np.diff(x)
	l_d = len(d)
	maxium = []
	loc = []
	for i in range(l_d - 1):
	if d[i] > 0 and d[i + 1] <= 0:
	maxium.append(x[i + 1])
	loc.append(i + 1)
	return maxium, loc


	def Formant_Cepst(u, cepstL):
	"""
	来源: https://github.com/taw19960426/-Speech-signal-processing-experiment-tutorial-_python/blob/master/%E5%85%B1%E6%8C%AF%E5%B3%B0%E4%BC%B0%E8%AE%A1%E5%87%BD%E6%95%B0.py
	倒谱法共振峰估计函数
	:param u:输入信号
	:param cepstL:🔪频率上窗函数的宽度
	:return: val共振峰幅值
	:return: loc共振峰位置
	:return: spec包络线
	"""
	wlen2 = len(u) // 2
	u_fft = np.fft.fft(u) # 按式（2-1）计算
	U = np.log(np.abs(u_fft[:wlen2]))
	Cepst = np.fft.ifft(U) # 按式（2-2）计算
	cepst = np.zeros(wlen2, dtype=np.complex)
	cepst[:cepstL] = Cepst[:cepstL] # 按式（2-3）计算
	cepst[-cepstL + 1:] = Cepst[-cepstL + 1:] # 取第二个式子的相反
	spec = np.real(np.fft.fft(cepst))
	val, loc = local_maxium(spec) # 在包络线上寻找极大值
	return val, loc, spec


	+def get_ref_stft():
	+ sr = 44100
	+ audio, sr = librosa.load(
	+ "/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal_ref.wav", \
	+ sr=sr, mono=True)
	+ stft = librosa.stft(audio, n_fft=2048)
	+ stft = stft.transpose()
	+ print(stft.shape)
	+ data = np.mean(np.abs(stft), axis=0)
	+ data = data / np.max(data)
	+ return data
	+
	+
	def test(in_vocal):
	import matplotlib.pyplot as plt

	sr = 44100
	audio, sr = librosa.load(in_vocal, sr=sr, mono=True)
	+
	stft = librosa.stft(audio, n_fft=2048)
	stft = stft.transpose()
	new_stft = np.zeros_like(stft)
	- for ii in range(0, len(stft)):
	+ w1 = get_ref_stft()
	+ data = np.mean(np.abs(stft), axis=0)
	+ data = data / np.max(data)
	+ w = w1 / data

	- power = np.abs(stft[ii])
	- power = power / (np.max(power))
	-
	- x = np.array(list(range(0, len(stft[ii]))))
	- y = power
	-
	- new_x = []
	- new_y = []
	- for i in range(1, len(x) - 1, 1):
	- if y[i - 1] < y[i] > y[i + 1] and y[i] > 0.01:
	- new_x.append(x[i])
	- new_y.append(y[i])
	-
	- # 前后100hz的合并
	- x = new_x
	- y = new_y
	- new_x = []
	- new_y = []
	- for i in range(1, len(x) - 1, 1):
	- if y[i - 1] < y[i] > y[i + 1]:
	- if x[i] - x[i - 1] > 5:
	- new_x.append(x[i - 1])
	- new_y.append(y[i - 1])
	- new_x.append(x[i])
	- new_y.append(y[i])
	- if x[i + 1] - x[i] > 5:
	- new_x.append(x[i + 1])
	- new_y.append(y[i + 1])
	-
	- if len(new_x) <= 1:
	- new_stft[ii] = deepcopy(stft[ii])
	- continue
	- # 从第一共振峰开始向上加
	- st_freq_idx = 1
	- for i in range(st_freq_idx, len(stft[ii])):
	- dst_i = int(i * 1.12)
	- if dst_i >= len(stft[ii]):
	- continue
	- new_stft[ii][dst_i] = stft[ii][i]
	- new_stft[ii][0] = stft[ii][0]
	- # for i in range(0, len(stft[ii])):
	+ for ii in range(0, len(stft)):
	+ # 第一种，整体向上+3
	+ # for i in range(0, 3):
	# new_stft[ii][i] = stft[ii][i]
	+ # for i in range(0, len(stft[ii]) - 3):
	+ # dst_i = i + 3
	+ # new_stft[ii][dst_i] = stft[ii][i]

	- # new_stft[ii] = deepcopy(stft[ii])
	+ # 第二种,整体向上拉伸1.12倍[2个音高]
	+ # for i in range(0, 1):
	+ # new_stft[ii][i] = stft[ii][i]
	+ # for i in range(1, len(stft[ii])):
	+ # dst_i = int(i * 1.12 + 0.5)
	+ # if dst_i >= len(stft[ii]):
	+ # break
	+ # new_stft[ii][dst_i] += stft[ii][i]

	- # # # 从0.01开始向后走
	+ # 第三种，第一共振峰部分不移动,其他部分移动
	+ # power = np.abs(stft[ii])
	+ # power = power / (np.max(power))
	+ #
	+ # x = np.array(list(range(0, len(stft[ii]))))
	+ # y = power
	+ #
	+ # new_x = []
	+ # new_y = []
	+ # for i in range(1, len(x) - 1, 1):
	+ # if y[i - 1] < y[i] > y[i + 1] and y[i] > 0.01:
	+ # new_x.append(x[i])
	+ # new_y.append(y[i])
	+ #
	+ # # 前后100hz的合并
	+ # x = new_x
	+ # y = new_y
	+ # new_x = []
	+ # new_y = []
	+ # for i in range(1, len(x) - 1, 1):
	+ # if y[i - 1] < y[i] > y[i + 1]:
	+ # if x[i] - x[i - 1] > 5:
	+ # new_x.append(x[i - 1])
	+ # new_y.append(y[i - 1])
	+ # new_x.append(x[i])
	+ # new_y.append(y[i])
	+ # if x[i + 1] - x[i] > 5:
	+ # new_x.append(x[i + 1])
	+ # new_y.append(y[i + 1])
	+ #
	+ # if len(new_x) <= 1:
	+ # new_stft[ii] = deepcopy(stft[ii])
	+ # continue
	+ #
	+ # # 从第一共振峰开始向上加
	# st_freq_idx = new_x[1]
	- # if len(new_x) >= 3:
	- # st_freq_idx = new_x[2]
	- # music_idx = int(4000 / (sr / 2048))
	- # # 当前频率翻1.19倍
	- # kk = -0.19 / (music_idx - st_freq_idx)
	- # bb = 1 - music_idx * kk
	# for i in range(st_freq_idx, len(stft[ii])):
	- # cur_rate = i * kk + bb
	- # if i >= music_idx:
	- # cur_rate = 1.0
	- # dst_idx = int(i * cur_rate + 0.5)
	- # if dst_idx >= len(stft[ii]):
	- # break
	- # new_stft[ii][dst_idx] += stft[ii][i]
	- #
	- # # 加平滑
	- # st_freq_1 = new_x[1]
	- # # 当前频率从1倍翻到1.19倍
	- # kk = 0.19 / (st_freq_idx - st_freq_1)
	- # bb = 1 - st_freq_1 * kk
	- # for i in range(st_freq_1, st_freq_idx):
	- # cur_rate = i * kk + bb
	- # dst_idx = int(i * cur_rate + 0.5)
	- # if dst_idx >= len(stft[ii]):
	- # break
	- # new_stft[ii][dst_idx] += stft[ii][i]
	- # for i in range(0, st_freq_1):
	- # new_stft[ii][i] += stft[ii][i]
	+ # dst_i = int(i * 1.12 + 0.5)
	+ # if dst_i >= len(stft[ii]):
	+ # continue
	+ # new_stft[ii][dst_i] = stft[ii][i]
	+ # new_stft[ii][0] = stft[ii][0]
	+ # for i in range(0, st_freq_idx):
	+ # new_stft[ii][i] = stft[ii][i]
	+ new_stft[ii] = stft[ii] * w

	new_stft = new_stft.transpose()
	istft = librosa.istft(new_stft)
	soundfile.write(str(in_vocal).replace(".wav", "_out.wav"), istft, 44100, format="wav")


	+def test_v5(vocal, vocal_ref, vocal_ref2):
	+ sr = 44100
	+ audio, sr = librosa.load(vocal, sr=sr, mono=True)
	+ stft = librosa.stft(audio, n_fft=2048)
	+ stft = stft.transpose()
	+ new_stft = np.zeros_like(stft)
	+
	+ audio_ref, sr = librosa.load(vocal_ref, sr=sr, mono=True)
	+ stft_ref = librosa.stft(audio_ref, n_fft=2048)
	+ stft_ref = stft_ref.transpose()
	+
	+ audio_ref2, sr = librosa.load(vocal_ref2, sr=sr, mono=True)
	+ stft_ref2 = librosa.stft(audio_ref2, n_fft=2048)
	+ stft_ref2 = stft_ref2.transpose()
	+
	+ w1 = np.ones(len(stft[0]))
	+ for i in range(0, 800):
	+ w1[i] = i / 800
	+ w2 = 1.0 - w1
	+ for i in range(0, min(len(stft), len(stft_ref2), len(stft_ref))):
	+ # new_stft[i] = stft_ref2[i] * w2 + stft[i] * w1
	+ w = np.abs(stft_ref2[i]) / np.abs(stft[i])
	+ new_stft[i] = w * stft[i]
	+
	+ new_stft = new_stft.transpose()
	+ istft = librosa.istft(new_stft)
	+ soundfile.write(str(vocal).replace(".wav", "_out5.wav"), istft, 44100, format="wav")
	+
	+
	def ttt(path):
	from scipy.signal import lfilter
	import matplotlib.pyplot as plt
	# path="C4_3_y.wav"
	# data, fs = soundBase('C4_3_y.wav').audioread()
	data, fs = librosa.load(path, sr=44100, mono=True) # sr=None声音保持原采样频率， mono=False声音保持原通道数
	# 预处理-预加重
	u = lfilter([1, -0.99], [1], data)

	cepstL = 7
	wlen = len(u)
	wlen2 = wlen // 2
	print("帧长={}".format(wlen))
	print("帧移={}".format(wlen2))
	# wlen = 256
	# wlen2 = 256//2
	# 预处理-加窗
	u2 = np.multiply(u, np.hamming(wlen))
	# 预处理-FFT,取对数获得频域图像取一半
	U_abs = np.log(np.abs(np.fft.fft(u2))[:wlen2])
	# 4.3.1
	freq = [i * fs / wlen for i in range(wlen2)]
	# print(freq)
	# val共振峰幅值 loc共振峰位置 spec包络线
	val, loc, spec = Formant_Cepst(u, cepstL)
	plt.subplot(2, 1, 1)
	plt.plot(freq, U_abs, 'k')
	plt.xlabel('频率/Hz') # 设置x，y轴的标签
	plt.ylabel('幅值')
	plt.title('男性a的发音频谱')
	plt.subplot(2, 1, 2)
	plt.plot(freq, spec, 'k')
	plt.xlabel('频率/Hz') # 设置x，y轴的标签
	plt.ylabel('幅值')
	plt.title('倒谱法共振峰估计')
	for i in range(len(loc)):
	plt.subplot(2, 1, 2)
	plt.plot([freq[loc[i]], freq[loc[i]]], [np.min(spec), spec[loc[i]]], '-.k')
	plt.text(freq[loc[i]], spec[loc[i]], 'Freq={}'.format(int(freq[loc[i]])))

	# plt.savefig('images/共振峰估计.png')
	plt.show()
	plt.close()


	def main(path):
	import numpy as np
	import pyworld as pw
	from scipy.signal import freqz
	import librosa
	import math

	"""
	思路:
	先变调，再轻微调整共振峰进行合成
	"""

	base_rate = 1.05946
	pitch = 0

	fs = 44100
	x, sr = librosa.load(path, sr=fs, mono=True)
	x = x.reshape(-1).astype(np.float)
	f0, t = pw.dio(x, fs)
	f0 = pw.stonemask(x, f0, t, fs)
	sp = pw.cheaptrick(x, f0, t, fs)
	sp2 = np.zeros_like(sp)
	cur_rate = 1
	for i in range(sp.shape[1]):
	sp2[:, i] = sp[:, min(int(i * 1 / cur_rate), sp.shape[1] - 1)]
	ap = pw.d4c(x, f0, t, fs)
	rate = math.pow(base_rate, pitch)
	out = pw.synthesize(f0 * rate, sp2, ap, fs).reshape(-1, 1)
	soundfile.write(path.replace(".wav", "_out2.wav"), out, fs)


	if __name__ == '__main__':
	# vc = VoiceChanger()
	# vc.process("/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal.wav",
	# "/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal_out1.wav")

	# test(
	- # "/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal.wav")
	+ # "/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal.wav")
	+
	+ vocal_pp = "/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal_p1.wav"
	+ vocal_p2 = "/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal.wav"
	+ vocal_p3 = "/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal_p2.wav"
	+ test_v5(vocal_pp, vocal_p2, vocal_p3)

	- main("/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal_p2.wav")
	+ # main("/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal_p2.wav")
	# ttt("/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal_02_01.wav")

File Metadata

Mime Type: text/x-diff
Expires: Sun, Jan 12, 08:29 (1 d, 15 h)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1347152
Default Alt Text: (59 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions