diff --git a/AutoCoverTool/online/inference_one.py b/AutoCoverTool/online/inference_one.py index 22f3b71..e347f92 100644 --- a/AutoCoverTool/online/inference_one.py +++ b/AutoCoverTool/online/inference_one.py @@ -1,713 +1,713 @@ """ 单个处理的逻辑 song_id: ---src.mp3 // 源数据,需要提前放进去 ---cache ---vocal.wav // 分离之后产生 ---acc.wav // 分离之后产生 ---vocal_32.wav // 分离之后产生 ---song_id_sp1.wav // 合成之后产生 ---song_id_sp2.wav // 合成之后产生 ---song_id_sp2_d.wav // 降噪之后生成 ---song_id_sp2_dv.wav // 降噪+拉伸之后产生 [占比太高的不产生] ---song_id_sp2_dve442.wav // 手动调整之后产生 ---song_id_sp2_dve442_replace.wav // 替换之后产生 ---song_id_sp2_dve442_replace_mix.wav // 人声+伴奏混合之后产生 ---song_id --acc.mp3 // 44k双声道320k --vocal.mp3 // 44k双声道320k --src.mp3 // 44k双声道320k --song_id_sp2_dv.mp3 // 44k单声道320k ---song_id_out // 对外输出 --src.mp3 // 原始音频 --song_id_sp2_dv_replace_mix.mp3 // 制作完成的音频 环境安装: conda create -n auto_song_cover python=3.9 # 安装demucs环境[进入到ref.music_remover 执行pip install -r requirements.txt] # 安装so_vits_svc环境[进入到ref.so_vits_svc 执行pip install -r requirements.txt] pip install librosa pip install scikit-maad pip install praat-parselmouth pip install matplotlib pip install torchvision pip install madmom pip install torchstat 环境设置: export PATH=$PATH:/data/gpu_env_common/env/bin/ffmpeg/bin export PYTHONPATH=$PWD:$PWD/ref/music_remover/demucs:$PWD/ref/so_vits_svc:$PWD/ref/split_dirty_frame """ import os import time import shutil import random import logging import librosa logging.basicConfig(filename='/tmp/inference.log', level=logging.INFO) gs_err_code_success = 0 gs_err_code_no_src_mp3 = 1 gs_err_code_separate = 2 gs_err_code_trans_32 = 3 gs_err_code_encode_err = 4 gs_err_code_replace_err = 5 gs_err_code_replace_trans_err = 6 gs_err_code_mix_err = 7 gs_err_code_mix_transcode_err = 8 gs_err_code_no_src_dir = 9 gs_err_code_volume_err = 10 gs_err_code_trans2_442 = 11 gs_err_code_reverb = 12 gs_err_code_no_good_choice = 13 gs_err_code_preprocess_vocal = 14 gs_err_code_replace_except_err = 15 gs_denoise_exe = "/opt/soft/bin/denoise_exe" gs_draw_volume_exe = "/opt/soft/bin/draw_volume" gs_simple_mixer_path = "/opt/soft/bin/simple_mixer" gs_rever_path = "/opt/soft/bin/dereverbrate" from ref.music_remover.separate_interface import SeparateInterface from ref.so_vits_svc.inference_main import * from ref.split_dirty_frame.script.process_one import ReplaceVocalFrame, construct_power_fragment class SongCoverInference: def __init__(self): self.work_dir = None self.cache_dir = None self.cid = None self.src_mp3 = None self.vocal_path = None self.vocal_32_path = None self.acc_path = None self.speakers = [ 10414574138721494, 10414574140317353, 1688849864840588, 3634463651, 5629499489839033, 5910973794723621, 6755399374234747, 8162774327817435, 8162774329368194, 1125899914308640, # 以下为男声,包括这个 12384898975368914, 12947848931397021, 3096224748076687, 3096224751151928, 5066549357604730, 5348024335101054, 6755399442719465, 7036874421386111 ] self.speakers2gender = { 10414574138721494: 2, 10414574140317353: 2, 1688849864840588: 2, 3634463651: 2, 5629499489839033: 2, 5910973794723621: 2, 6755399374234747: 2, 8162774327817435: 2, 8162774329368194: 2, 1125899914308640: 1, # 1是男 12384898975368914: 1, 12947848931397021: 1, 3096224748076687: 1, 3096224751151928: 1, 5066549357604730: 1, 5348024335101054: 1, 6755399442719465: 1, 7036874421386111: 1 } self.speakers_model_path = "data/train_users/{}/logs/32k/G_2000.pth" self.speakers_model_config = "data/train_users/{}/config/config.json" st = time.time() self.separate_inst = None logging.info("post process ... ReplaceVocalFrame init sp={}".format(time.time() - st)) self.replace_vocal_frame_inst = None logging.info("SongCoverInference init sp={}".format(time.time() - st)) def separate(self, cid, src_mp3, vocal_path, acc_path): """ 人声伴奏分离 :param cid: :param src_mp3: :param vocal_path: :param acc_path: :return: """ st = time.time() if self.separate_inst is None: self.separate_inst = SeparateInterface() if not self.separate_inst.process(cid, src_mp3, vocal_path, acc_path): return gs_err_code_separate if not os.path.exists(vocal_path) or not os.path.exists(acc_path): return gs_err_code_separate # 转码出一个32k单声道的数据 cmd = "ffmpeg -i {} -ar 32000 -ac 1 -y {} -loglevel fatal".format(vocal_path, self.vocal_32_path) os.system(cmd) if not os.path.exists(self.vocal_32_path): return gs_err_code_trans_32 print("separate:cid={}|sp={}".format(cid, time.time() - st)) return gs_err_code_success def get_start_ms(self, vocal_path): """ 给定原始音频,找一段连续10s的音频 :param vocal_path: :return: """ audio, sr = librosa.load(vocal_path, sr=16000) audio = librosa.util.normalize(audio) # 帧长100ms,帧移10ms,计算能量 power_arr = [] for i in range(0, len(audio) - 1600, 160): power_arr.append(np.sum(np.abs(audio[i:i + 160])) / 160) # 将能量小于等于10的部分做成段 power_arr = construct_power_fragment(power_arr) fragments = [] last_pos = 0 for idx, line in enumerate(power_arr): start = round(float(line[0]) * 0.01, 3) duration = round(float(line[1]) * 0.01, 3) fragments.append([last_pos, start - last_pos]) last_pos = start + duration if last_pos < len(audio) / sr: fragments.append([last_pos, len(audio) / sr - last_pos]) # 合并数据,两者间隔在50ms以内的合并起来 idx = 0 while idx < len(fragments) - 1: if fragments[idx + 1][0] - (fragments[idx][0] + fragments[idx][1]) < 0.05: fragments[idx][1] = fragments[idx + 1][0] + fragments[idx + 1][1] - fragments[idx][0] del fragments[idx + 1] idx -= 1 idx += 1 # out_file = vocal_path + "_power.csv" # with open(out_file, "w") as f: # f.write("Name\tStart\tDuration\tTime Format\tType\n") # for fragment in fragments: # start = round(float(fragment[0]), 3) # duration = round(float(fragment[1]), 3) # strr = "{}\t{}\t{}\t{}\n".format("11", start, duration, "decimal\tCue\t") # f.write(strr) # 筛选出开始的位置 # 1. 连续时长大于10s,当前段长度大于3s # 2. 不可用 # 从0到fragments[idx], 包含idx其中人声段的总和 tot_vocal_duration = [fragments[0][1]] for i in range(1, len(fragments)): tot_vocal_duration.append(tot_vocal_duration[i - 1] + fragments[i][1]) # 计算出任意两段之间非人声占比 for i in range(0, len(fragments)): if fragments[i][1] >= 3: now_tot = 0 if i > 0: now_tot = tot_vocal_duration[i - 1] for j in range(i + 1, len(fragments)): cur_rate = tot_vocal_duration[j] - now_tot cur_rate = cur_rate / (fragments[j][1] + fragments[j][0] - fragments[i][0]) if cur_rate > 0.1: return fragments[i][0] return -1 def inference_speaker(self): """ 推理生成合成后的音频 随机取5个干声,选择占比最小的,并且要求占比小于0.3 :return: """ st = time.time() out_speakers = random.sample(self.speakers, 15) out_songs_dict = {} for speaker in out_speakers: model_path = self.speakers_model_path.format(speaker) config_path = self.speakers_model_config.format(speaker) song_path = os.path.join(self.cache_dir, "{}_{}.wav".format(self.cid, speaker)) try: inf(model_path, config_path, self.vocal_32_path, song_path, "prod") except Exception as ex: logging.info("cid={}, inference_speaker err={}".format(self.cid, ex)) continue if os.path.exists(song_path): if self.replace_vocal_frame_inst is None: self.replace_vocal_frame_inst = ReplaceVocalFrame( "data/models/split_dirty_frame_v5_3_epoch3_852.pth") rate = self.replace_vocal_frame_inst.get_rate(song_path) if rate < 0.3: out_songs_dict[song_path] = rate # 从内部选择占比最低的 out_songs = [] if len(out_songs_dict.keys()) > 0: st_sec = self.get_start_ms(self.vocal_path) song_msg = sorted(out_songs_dict.items(), key=lambda kv: kv[1])[0] out_songs = [song_msg[0]] logging.info("GetRate:cid={},song={},rate={},st_tm={}".format(self.cid, song_msg[0], round(song_msg[1], 2), round(st_sec, 3))) print("GetRate:cid={},song={},rate={},st_tm={}".format(self.cid, song_msg[0], round(song_msg[1], 2), round(st_sec, 3))) # logging.info("inference_speaker len = {} finish sp = {}".format(len(out_songs), time.time() - st)) print("inference_speaker len = {} finish sp = {}".format(len(out_songs), time.time() - st)) return out_songs def get_new_vocal_rate(self, songs): """ 获取人声的比率 :param songs: :return: """ st = time.time() need_to_process_song = [] for song in songs: if self.replace_vocal_frame_inst is None: self.replace_vocal_frame_inst = ReplaceVocalFrame("data/models/split_dirty_frame_v5_3_epoch3_852.pth") rate = self.replace_vocal_frame_inst.get_rate(song) logging.info("{} {} replace_rate={}".format(self.cid, song, rate)) if rate < 1.0: need_to_process_song.append(song) logging.info( "get_new_vocal_rate belen = {} len = {} finish sp = {}".format(len(songs), len(need_to_process_song), time.time() - st)) return need_to_process_song def preprocess_vocal(self, songs, vocal_path): """ 1. 降噪 2. 拉伸 :param songs: :param vocal_path: 参考的音频信号 :return: """ st = time.time() dv_out_list = [] for song in songs: denoise_path = str(song).replace(".wav", "_d.wav") cmd = "{} {} {}".format(gs_denoise_exe, song, denoise_path) os.system(cmd) if not os.path.exists(denoise_path): print("{} {} ERROR denoise".format(self.cid, song)) continue # 拉伸 volume_path = str(song).replace(".wav", "_dv.wav") cmd = "{} {} {} {}".format(gs_draw_volume_exe, denoise_path, vocal_path, volume_path) os.system(cmd) if not os.path.exists(volume_path): print("{} {} ERROR denoise".format(self.cid, volume_path)) continue dv_out_list.append(volume_path) print( "preprocess_vocal belen = {} len = {} finish sp = {}".format(len(songs), len(dv_out_list), time.time() - st)) return dv_out_list def output(self, dv_out_list): """ 对外输出数据 :param dv_out_list: :return: """ st = time.time() out_dir = os.path.join(self.work_dir, self.cid) if os.path.exists(out_dir): shutil.rmtree(out_dir) os.makedirs(out_dir) # 拷贝数据 dst_mp3_path = os.path.join(out_dir, "src_mp3") dst_acc_path = os.path.join(out_dir, "acc.mp3") dst_vocal_path = os.path.join(out_dir, "vocal.mp3") shutil.copyfile(self.src_mp3, dst_mp3_path) cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(self.acc_path, dst_acc_path) os.system(cmd) if not os.path.exists(dst_acc_path): return gs_err_code_encode_err cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(self.vocal_path, dst_vocal_path) os.system(cmd) if not os.path.exists(dst_vocal_path): return gs_err_code_encode_err # 将所有数据放到out_dir中,用于给人工标注 for dv_wav in dv_out_list: dv_wav_name = str(dv_wav).split("/")[-1].replace(".wav", "_441.mp3") dst_dv_path = os.path.join(out_dir, dv_wav_name) cmd = "ffmpeg -i {} -ar 44100 -ac 1 -ab 320k -y {} -loglevel fatal".format(dv_wav, dst_dv_path) os.system(cmd) if not os.path.exists(dst_dv_path): print("{} encode err!".format(cmd)) continue logging.info( "preprocess_vocal output sp = {}".format(time.time() - st)) def process_one(self, cid, work_dir, enable_output=False): logging.info("\nstart:cid={},work_dir={}----------------------->>>>>>>>".format(cid, work_dir)) self.cid = cid self.work_dir = work_dir # 所有不对外交付的,全部放到这里 self.cache_dir = os.path.join(work_dir, "cache") if os.path.exists(self.cache_dir): shutil.rmtree(self.cache_dir) os.makedirs(self.cache_dir) self.src_mp3 = os.path.join(self.work_dir, "src.mp3") if not os.path.exists(self.src_mp3): return gs_err_code_no_src_mp3 self.vocal_path = os.path.join(self.cache_dir, "vocal.wav") self.vocal_32_path = os.path.join(self.cache_dir, "vocal_32.wav") self.acc_path = os.path.join(self.cache_dir, "acc.wav") if not os.path.exists(self.vocal_32_path): logging.info("start separate ... {} {} {}".format(self.src_mp3, self.vocal_path, self.acc_path)) err = self.separate(cid, self.src_mp3, self.vocal_path, self.acc_path) if err != gs_err_code_success: return err, None, None logging.info("start inference_speaker ...") out_songs = self.inference_speaker() dv_out_list = self.preprocess_vocal(out_songs, self.vocal_path) if len(dv_out_list) == 0: return gs_err_code_no_good_choice, None, None mix_mp3_path = None gender = -1 if enable_output: self.output(dv_out_list) else: # 默认全部处理一遍 for dv_out_path in dv_out_list: src_path = dv_out_path.replace("_dv.wav", ".wav") err, mix_mp3_path = self.after_process(self.cid, self.work_dir, src_path, dv_out_path, self.vocal_path, self.acc_path, True, False) if err != gs_err_code_success: logging.info("after_process err {}".format(err)) # 取出性别属性 if err == gs_err_code_success and mix_mp3_path is not None: gender = self.speakers2gender[int(str(os.path.basename(mix_mp3_path)).split("_")[1])] logging.info("finish:cid={},work_dir={}----------------------->>>>>>>>".format(cid, work_dir)) return gs_err_code_success, mix_mp3_path, gender def reverb_by_vocal(self, file): st = time.time() file_442 = file.replace(".wav", "_442.wav") if not os.path.exists(file_442): cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {}".format(file, file_442) os.system(cmd) if not os.path.exists(file_442): return gs_err_code_trans2_442, None file_dst = file.replace(".wav", "_442_dr.wav") cmd = "{} {} {} {}".format(gs_rever_path, self.vocal_path, file_442, file_dst) os.system(cmd) if not os.path.exists(file_dst): return gs_err_code_reverb, None print("cid = {}, reverb_by_vocal sp={}".format(self.cid, time.time() - st)) return gs_err_code_success, file_dst def after_process(self, cid, work_dir, in_file, effect_file, vocal_file, acc_file, need_draw=True, need_reverb=True): """ 后处理逻辑 将处理好的音频进行替换,然后和伴奏进行混合,最后进行编码 :return: """ if need_reverb: # 抓取混响 err, effect_file = self.reverb_by_vocal(in_file) if err != gs_err_code_success: return err, None if need_draw: # 增加一个拉伸的步骤 volume_path = str(effect_file).replace(".wav", "_dv.wav") cmd = "{} {} {} {}".format(gs_draw_volume_exe, effect_file, vocal_file, volume_path) print(cmd) os.system(cmd) if not os.path.exists(volume_path): print("{} {} ERROR draw volume".format(self.cid, volume_path)) return gs_err_code_volume_err, None effect_file = volume_path st = time.time() self.cid = cid self.work_dir = work_dir self.src_mp3 = os.path.join(self.work_dir, "src.mp3") if not os.path.exists(self.work_dir): return gs_err_code_no_src_dir self.replace_vocal_frame_inst.process(in_file, effect_file, vocal_file) dst_path = effect_file + "_replace.wav" if not os.path.exists(dst_path): return gs_err_code_replace_err, None print("replace_vocal_frame_inst sp = {}".format(time.time() - st)) # 转码 dst_path_442 = dst_path.replace("_replace.wav", "_replace442.wav") cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(dst_path, dst_path_442) os.system(cmd) if not os.path.exists(dst_path_442): return gs_err_code_replace_trans_err, None # 合并转码后再做一次拉伸,保证响度 volume_path = str(dst_path_442).replace(".wav", "_dv.wav") cmd = "{} {} {} {}".format(gs_draw_volume_exe, dst_path_442, vocal_file, volume_path) print(cmd) os.system(cmd) if not os.path.exists(volume_path): print("{} {} ERROR draw volume".format(self.cid, volume_path)) return gs_err_code_volume_err, None dst_path_442 = volume_path # 混合 mix_path = dst_path_442.replace("_replace442.wav", "_replace442_mix.wav") cmd = "{} {} {} {}".format(gs_simple_mixer_path, dst_path_442, acc_file, mix_path) print("{}".format(cmd)) os.system(cmd) if not os.path.exists(mix_path): return gs_err_code_mix_err, None # 编码为mp3 output_dir = os.path.join(self.work_dir, self.cid + "_out") if not os.path.exists(output_dir): os.makedirs(output_dir) name = str(mix_path).replace("_replace442_mix.wav", "_replace442_mix.mp3").split("/")[-1] mix_path_mp3 = os.path.join(output_dir, name) cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(mix_path, mix_path_mp3) os.system(cmd) if not os.path.exists(mix_path_mp3): return gs_err_code_mix_transcode_err, None # 拷贝src到output_dir # shutil.copyfile(self.src_mp3, os.path.join(output_dir, "src.mp3")) # logging.info("after_process sp = {}".format(time.time() - st)) return gs_err_code_success, mix_path_mp3 ####################################新对外接口############################################################ def prepare_env(self, cid, work_dir, create_dir=False): self.cid = cid self.work_dir = work_dir # 所有不对外交付的,全部放到这里 self.cache_dir = os.path.join(work_dir, "cache") if create_dir: if os.path.exists(self.cache_dir): shutil.rmtree(self.cache_dir) os.makedirs(self.cache_dir) self.src_mp3 = os.path.join(self.work_dir, "src.mp3") if not os.path.exists(self.src_mp3): return gs_err_code_no_src_mp3 self.vocal_path = os.path.join(self.cache_dir, "vocal.wav") self.vocal_32_path = os.path.join(self.cache_dir, "vocal_32.wav") self.acc_path = os.path.join(self.cache_dir, "acc.wav") return gs_err_code_success def generate_svc_file(self, cid, work_dir): """ :param cid: :param work_dir: :return:err_code, 生成出的svc的文件名称 """ err = self.prepare_env(cid, work_dir, create_dir=True) if err != gs_err_code_success: return err, None # 音源分离 if not os.path.exists(self.vocal_32_path): st = time.time() err = self.separate(cid, self.src_mp3, self.vocal_path, self.acc_path) logging.info("cid={},separate,sp={}".format(self.cid, time.time() - st)) if err != gs_err_code_success: return err, None # 生成svc,只保留一个最佳的 st = time.time() out_songs = self.inference_speaker() if len(out_songs) == 0: return gs_err_code_no_good_choice, None logging.info("cid={},inference_speaker,{},sp={}".format(self.cid, out_songs[0], time.time() - st)) return gs_err_code_success, out_songs[0] def effect(self, cid, work_dir, svc_file): st = time.time() err = self.prepare_env(cid, work_dir) if err != gs_err_code_success: return err, None logging.info("cid={},effect_and_mix,{},sp={}".format(self.cid, svc_file, time.time() - st)) # 预处理人声 dv_out_list = self.preprocess_vocal([svc_file], self.vocal_path) if len(dv_out_list) == 0: return gs_err_code_preprocess_vocal, None svc_file = dv_out_list[0] # 做音效 st = time.time() err, effect_file = self.reverb_by_vocal(svc_file) if err != gs_err_code_success: return err, None logging.info("cid={},reverb_by_vocal,{},sp={}".format(self.cid, svc_file, time.time() - st)) return err, effect_file def mix(self, cid, work_dir, svc_file, effect_file): """ 做音效以及合并 :param cid: :param work_dir: :param svc_file: :param effect_file: :return: err_code, 完成的mp3文件 """ st = time.time() err = self.prepare_env(cid, work_dir) if err != gs_err_code_success: return err, None logging.info("cid={},effect_and_mix,{},sp={}".format(self.cid, svc_file, time.time() - st)) # 拉伸 st = time.time() volume_path = str(effect_file).replace(".wav", "_dv.wav") cmd = "{} {} {} {}".format(gs_draw_volume_exe, effect_file, self.vocal_path, volume_path) os.system(cmd) if not os.path.exists(volume_path): print("{} {} ERROR draw volume".format(self.cid, volume_path)) return gs_err_code_volume_err, None effect_file = volume_path logging.info("cid={},draw_volume,{},sp={}".format(self.cid, svc_file, time.time() - st)) # 替换 st = time.time() try: if self.replace_vocal_frame_inst is None: self.replace_vocal_frame_inst = ReplaceVocalFrame("data/models/split_dirty_frame_v5_3_epoch3_852.pth") self.replace_vocal_frame_inst.process(svc_file, effect_file, self.vocal_path) except Exception as ex: logging.info("{},replace_vocal_frame_inst, {}", self.cid, ex) return gs_err_code_replace_except_err, None dst_path = effect_file + "_replace.wav" if not os.path.exists(dst_path): return gs_err_code_replace_err, None logging.info("cid={},replace_vocal_frame_inst,{},sp={}".format(self.cid, svc_file, time.time() - st)) # 转码 st = time.time() dst_path_442 = dst_path.replace("_replace.wav", "_replace442.wav") cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(dst_path, dst_path_442) os.system(cmd) if not os.path.exists(dst_path_442): return gs_err_code_replace_trans_err, None logging.info("cid={},transcode,{},sp={}".format(self.cid, svc_file, time.time() - st)) # 合并转码后再做一次拉伸,保证响度 st = time.time() volume_path = str(dst_path_442).replace("_replace442.wav", "_replace442_dv.wav") cmd = "{} {} {} {}".format(gs_draw_volume_exe, dst_path_442, self.vocal_path, volume_path) os.system(cmd) if not os.path.exists(volume_path): print("{} {} ERROR draw volume".format(self.cid, volume_path)) return gs_err_code_volume_err, None dst_path_442 = volume_path logging.info("cid={},draw_volume2,{},sp={}".format(self.cid, svc_file, time.time() - st)) # 混合 st = time.time() mix_path = dst_path_442.replace("_replace442_dv.wav", "_replace442_dv_mix.wav") cmd = "{} {} {} {}".format(gs_simple_mixer_path, dst_path_442, self.acc_path, mix_path) os.system(cmd) if not os.path.exists(mix_path): return gs_err_code_mix_err, None logging.info("cid={},mixer,{},sp={}".format(self.cid, svc_file, time.time() - st)) # 编码为mp3 st = time.time() output_dir = os.path.join(self.work_dir, self.cid + "_out") if not os.path.exists(output_dir): os.makedirs(output_dir) name = str(mix_path).replace("_replace442_dv_mix.wav", "_replace442_dv_mix.mp3").split("/")[-1] mix_path_mp3 = os.path.join(output_dir, name) cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(mix_path, mix_path_mp3) print(cmd) os.system(cmd) if not os.path.exists(mix_path_mp3): return gs_err_code_mix_transcode_err, None logging.info("cid={},encode,{},sp={}".format(self.cid, svc_file, time.time() - st)) return gs_err_code_success, mix_path_mp3 def get_gender(self, svc_file): return self.speakers2gender[int(os.path.basename(svc_file.replace(".wav", "")).split("_")[1])] def process_one_logic(self, cid, work_dir): """ 搞成两部分: 1. 分离数据+5次推理,获取最佳结果,并保存 2. 利用最佳结果做音效以及合并 :return: """ err, svc_file = self.generate_svc_file(cid, work_dir) gender = -1 if err != gs_err_code_success: return err, svc_file, gender, gender = self.get_gender(svc_file) err, effect_file = self.effect(cid, work_dir, svc_file) if err != gs_err_code_success: return err, svc_file, gender err, mix_mp3_path = self.mix(cid, work_dir, svc_file, effect_file) return err, mix_mp3_path, gender def test(): arr = [ # "611752105020343687", # "611752105023532439", # "611752105030419688", # "611752105030485748", # "611752105030485685", "dzq", ] base_dir = "/data/rsync/jianli.yang/AutoCoverTool/data/test" s_inst = SongCoverInference() for cid in arr: st = time.time() # err, mix_mp3, gender = s_inst.process_one(cid, os.path.join(base_dir, cid), False) err, mix_mp3, gender = s_inst.process_one_logic(cid, os.path.join(base_dir, cid)) print(mix_mp3, gender) print("cid={} RealFinish err={} sp={}".format(cid, err, time.time() - st)) def test_gene_svc(): base_dir = "/data/rsync/jianli.yang/AutoCoverTool/data/test" # cid = "clean_yibo" cid = "dzq" work_dir = os.path.join(base_dir, cid) st = time.time() - speaker = "train_sing_base" - speakers_model_path = "data/train_users/{}/logs/32k/G_6000.pth" + speaker = "train_sing_base_v1" + speakers_model_path = "data/train_users/{}/logs/32k/G_157000.pth" speakers_model_config = "data/train_users/{}/config/config.json" model_path = speakers_model_path.format(speaker) config_path = speakers_model_config.format(speaker) # 缓存目录: cache_dir = os.path.join(work_dir, "cache") if os.path.exists(cache_dir): shutil.rmtree(cache_dir) os.makedirs(cache_dir) song_path = os.path.join(cache_dir, "{}_{}.wav".format(cid, speaker)) - # vocal_path = os.path.join(work_dir, "vocal_32.wav") - vocal_path = os.path.join(work_dir, "test_silce.wav") + vocal_path = os.path.join(work_dir, "vocal_32.wav") + # vocal_path = os.path.join(work_dir, "test_silce.wav") inf(model_path, config_path, vocal_path, song_path, "prod") print("finish....") if __name__ == '__main__': test_gene_svc() diff --git a/AutoCoverTool/online/inference_worker.py b/AutoCoverTool/online/inference_worker.py index f6e66e0..22d7a3c 100644 --- a/AutoCoverTool/online/inference_worker.py +++ b/AutoCoverTool/online/inference_worker.py @@ -1,240 +1,240 @@ """ 离线worker 数据库字段要求: // 其中state的状态 // 0:默认,1:被取走,<0异常情况,2完成 // 超时到一定程度也会被重新放回来 数据库格式: id,song_id,url,state,svc_url,create_time,update_time,gender 启动时的环境要求: export PATH=$PATH:/data/gpu_env_common/env/bin/ffmpeg/bin export PYTHONPATH=$PWD:$PWD/ref/music_remover/demucs:$PWD/ref/so_vits_svc:$PWD/ref/split_dirty_frame """ import os import shutil import logging import multiprocessing as mp from online.inference_one import * from online.common import * gs_actw_err_code_download_err = 10001 gs_actw_err_code_trans_err = 10002 gs_actw_err_code_upload_err = 10003 gs_state_default = 0 gs_state_use = 1 gs_state_finish = 2 GS_REGION = "ap-singapore" GS_BUCKET_NAME = "starmaker-sg-1256122840" # GS_COSCMD = "/bin/coscmd" -GS_COSCMD = "/opt/soft/anaconda3/bin/coscmd" -GS_RES_DIR = "/srv/dreambooth_worker_resource" +GS_COSCMD = "coscmd" +GS_RES_DIR = "/data/gpu_env_common/res" GS_CONFIG_PATH = os.path.join(GS_RES_DIR, ".online_cos.conf") def exec_cmd(cmd): ret = os.system(cmd) if ret != 0: return False return True def exec_cmd_and_result(cmd): r = os.popen(cmd) text = r.read() r.close() return text def upload_file2cos(key, file_path, region=GS_REGION, bucket_name=GS_BUCKET_NAME): """ 将文件上传到cos :param key: 桶上的具体地址 :param file_path: 本地文件地址 :param region: 区域 :param bucket_name: 桶地址 :return: """ cmd = "{} -c {} -r {} -b {} upload {} {}".format(GS_COSCMD, GS_CONFIG_PATH, region, bucket_name, file_path, key) print(cmd) if exec_cmd(cmd): cmd = "{} -c {} -r {} -b {} info {}".format(GS_COSCMD, GS_CONFIG_PATH, region, bucket_name, key) \ + "| grep Content-Length |awk \'{print $2}\'" res_str = exec_cmd_and_result(cmd) logging.info("{},res={}".format(key, res_str)) size = float(res_str) if size > 0: return True return False return False def post_process_err_callback(msg): print("ERROR|post_process|task_error_callback:", msg) def effect(queue, finish_queue): """ 1. 添加音效 2. 混音 3. 上传到服务端 :return: """ inst = SongCoverInference() while True: logging.info("effect start get...") data = queue.get() song_id, work_dir, svc_file, gender = data logging.info("effect:{},{},{},{}".format(song_id, work_dir, svc_file, gender)) err, effect_file = inst.effect(song_id, work_dir, svc_file) msg = [song_id, err, svc_file, effect_file, gender] logging.info("effect,finish:cid={},state={},svc_file={},effect_file={},gender={}". \ format(song_id, err, svc_file, effect_file, gender)) finish_queue.put(msg) class AutoCoverToolWorker: def __init__(self): self.base_dir = "/tmp" self.work_dir = "" self.inst = SongCoverInference() def update_state(self, song_id, state): sql = "update svc_queue_table set state={},update_time={} where song_id = {}". \ format(state, int(time.time()), song_id) banned_user_map['db'] = "av_db" update_db(sql, banned_user_map) def get_one_data(self): sql = "select song_id, url from svc_queue_table where state = 0 and song_src=1 order by create_time desc limit 1" banned_user_map["db"] = "av_db" data = get_data_by_mysql(sql, banned_user_map) if len(data) == 0: return None, None song_id, song_url = data[0] if song_id != "": self.update_state(song_id, gs_state_use) return str(song_id), song_url def pre_process(self, work_dir, song_url): """ 创建文件夹,下载数据 :return: """ ext = str(song_url).split(".")[-1] dst_file = "{}/src_origin.{}".format(work_dir, ext) cmd = "wget {} -O {}".format(song_url, dst_file) print(cmd) os.system(cmd) if not os.path.exists(dst_file): return gs_actw_err_code_download_err dst_mp3_file = "{}/src.mp3".format(work_dir) cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} ".format(dst_file, dst_mp3_file) os.system(cmd) if not os.path.exists(dst_mp3_file): return gs_actw_err_code_trans_err return gs_err_code_success def post_process(self, msg): song_id, err, svc_file, effect_file, gender = msg work_dir = os.path.join(self.base_dir, str(song_id)) if err != gs_err_code_success: self.update_state(song_id, -err) return # 替换和混音 err, mix_path_mp3 = self.inst.mix(song_id, work_dir, svc_file, effect_file) logging.info( "post_process:song_id={},work_dir={},svc_file={},gender={}".format(song_id, work_dir, svc_file, gender)) svc_url = None state = gs_state_finish if err != gs_err_code_success: state = -err else: # 上传到cos mix_name = os.path.basename(mix_path_mp3) key = "av_res/svc_res/{}".format(mix_name) if not upload_file2cos(key, mix_path_mp3): state = -err else: state = gs_state_finish svc_url = key logging.info("upload_file2cos:song_id={},key={},mix_path_mp3={}".format(song_id, key, mix_path_mp3)) # 更新数据库 if state != gs_state_finish: self.update_state(song_id, state) return sql = "update svc_queue_table set state={},update_time={},svc_url=\"{}\",gender={} where song_id = {}". \ format(gs_state_finish, int(time.time()), svc_url, gender, song_id) logging.info("post_process:song_id={},sql={}".format(song_id, sql)) banned_user_map['db'] = "av_db" update_db(sql, banned_user_map) def process(self): logging.info("start_process....") worker_num = 4 worker_queue = mp.Manager().Queue(maxsize=int(worker_num * 1.5)) finish_queue = mp.Manager().Queue(maxsize=int(worker_num * 1.5)) pool = mp.Pool(processes=worker_num) for i in range(worker_num): pool.apply_async(effect, args=(worker_queue, finish_queue), error_callback=post_process_err_callback) while True: # 将堆积的内容处理一遍 while finish_queue.qsize() > 0: msg = finish_queue.get(timeout=1) self.post_process(msg) song_id, err, svc_file, effect_file, gender = msg work_dir = os.path.join(self.base_dir, str(song_id)) logging.info("clear = song_id={},work_dir={}".format(song_id, work_dir)) shutil.rmtree(work_dir) song_id, song_url = self.get_one_data() logging.info("\n\nget_one_data = {},{}".format(song_id, song_url)) if song_id is None: time.sleep(5) continue # 创建空间 work_dir = os.path.join(self.base_dir, str(song_id)) if os.path.exists(work_dir): shutil.rmtree(work_dir) os.makedirs(work_dir) logging.info("song_id={},work_dir={},finish".format(song_id, work_dir)) # 预处理 err = self.pre_process(work_dir, song_url) if err != gs_err_code_success: self.update_state(song_id, -err) shutil.rmtree(work_dir) continue logging.info("song_id={},work_dir={},pre_process".format(song_id, work_dir)) # 获取svc数据 err, svc_file = self.inst.generate_svc_file(song_id, work_dir) if err != gs_err_code_success: self.update_state(song_id, -err) shutil.rmtree(work_dir) continue logging.info("song_id={},work_dir={},generate_svc_file".format(song_id, work_dir)) # 做音效处理的异步代码 gender = self.inst.get_gender(svc_file) worker_queue.put([song_id, work_dir, svc_file, gender]) logging.info("song_id={},work_dir={},svc_file={},gender={}".format(song_id, work_dir, svc_file, gender)) pool.close() pool.join() if __name__ == '__main__': actw = AutoCoverToolWorker() actw.process() diff --git a/AutoCoverTool/online/tone_shift_one.py b/AutoCoverTool/online/tone_shift_one.py index d4e2b7c..a74bbab 100644 --- a/AutoCoverTool/online/tone_shift_one.py +++ b/AutoCoverTool/online/tone_shift_one.py @@ -1,369 +1,368 @@ """ 变调的方式做处理 1. 下载 2. 分离 3. 针对于人声变调+2,伴奏+1 4. 合成 """ import os import json import shutil import librosa import logging import numpy as np import multiprocessing as mp from ref.music_remover.separate_interface import SeparateInterface from online.inference_worker import upload_file2cos, gs_state_use, gs_state_finish, gs_state_default from online.common import * from ref.online.voice_class_online import VoiceClass logging.basicConfig(filename='/tmp/tone_shift_one.log', level=logging.INFO) gs_tone_shift_exe = "/opt/soft/bin/tone_shift_exe" gs_simple_mixer_path = "/opt/soft/bin/simple_mixer" gs_err_code_success = 0 gs_err_code_tone_shift = 1 gs_err_code_mix = 2 gs_err_code_transcode = 3 gs_err_code_upload = 4 gs_err_code_download = 5 gs_err_code_trans_to_mp3 = 6 gs_err_code_separate = 7 gs_err_code_duration_too_long = 8 gs_err_code_duration_no_vocal = 9 gs_err_code_duration_err = 10 gs_err_code_transcode_acc = 11 gs_err_code_upload_acc = 12 gs_err_code_download_acc = 13 gs_err_code_download_vocal = 14 gs_err_code_transcode_acc_v1 = 15 gs_err_code_transcode_vocal_v1 = 16 gs_err_code_silence_no_data = 17 gs_err_code_silence_no_process = 18 def post_process_err_callback(msg): print("ERROR|post_process|task_error_callback:", msg) def exec_cmd(cmd): r = os.popen(cmd) text = r.read() r.close() return text def get_d(audio_path): cmd = "ffprobe -v quiet -print_format json -show_format -show_streams {}".format(audio_path) data = exec_cmd(cmd) data = json.loads(data) # 返回秒 if 'format' in data.keys() and 'duration' in data['format']: return float(data["format"]["duration"]) return -1 def get_mean_power(audio_path): sr = 44100 audio, sr = librosa.load(audio_path, sr=sr, mono=True) mm = np.mean(np.abs(audio)) return mm def tone_shift_one(in_file, dst_file, pitch): cmd = "{} {} {} {}".format(gs_tone_shift_exe, in_file, dst_file, pitch) os.system(cmd) return os.path.exists(dst_file) def mix(cid, vocal_path, acc_path, tp): if tp == 1: vocal_pitch = 2 acc_pitch = 0 else: vocal_pitch = -2 acc_pitch = 0 vocal_path_2 = vocal_path.replace(".wav", "_{}.wav".format(vocal_pitch)) acc_path_2 = acc_path.replace(".wav", "_{}.wav".format(acc_pitch)) err = tone_shift_one(vocal_path, vocal_path_2, vocal_pitch) if not err: return gs_err_code_tone_shift, None, None, tp err = tone_shift_one(acc_path, acc_path_2, acc_pitch) if not err: return gs_err_code_tone_shift, None, None, tp base_dir = os.path.dirname(vocal_path) mix_path = "{}/mix_{}_{}.wav".format(base_dir, vocal_pitch, acc_pitch) cmd = "{} {} {} {}".format(gs_simple_mixer_path, vocal_path_2, acc_path_2, mix_path) print("exec_cmd={}".format(cmd)) os.system(cmd) if not os.path.exists(mix_path): return gs_err_code_mix, None, None, tp # 转码 mix_path_mp3 = mix_path.replace(".wav", ".mp4") cmd = "ffmpeg -i {} -b:a 128k -c:a aac -ar 44100 -ac 2 -y {} -loglevel fatal".format(mix_path, mix_path_mp3) os.system(cmd) if not os.path.exists(mix_path_mp3): return gs_err_code_transcode, None, None, tp # 上传到cos mix_name = os.path.basename(mix_path_mp3) key = "av_res/svc_res_tone_shift/{}/{}".format(str(cid), mix_name) if not upload_file2cos(key, mix_path_mp3): return gs_err_code_upload, None, None return gs_err_code_success, key, vocal_path_2, tp class ToneShift: def __init__(self): self.separate_inst = SeparateInterface() model_path = "./models" music_voice_pure_model = os.path.join(model_path, "voice_005_rec_v5.pth") music_voice_no_pure_model = os.path.join(model_path, "voice_10_v5.pth") gender_pure_model = os.path.join(model_path, "gender_8k_ratev5_v6_adam.pth") gender_no_pure_model = os.path.join(model_path, "gender_8k_v6_adam.pth") self.voice_class = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model) def update_state(self, song_id, state): sql = "update svc_queue_table set state={},update_time={} where song_id = {}". \ format(state, int(time.time()), song_id) banned_user_map['db'] = "av_db" update_db(sql, banned_user_map) def get_url_by_id(self, song_id): sql = "select song_id, url from svc_queue_table where song_id={}".format(song_id) banned_user_map["db"] = "av_db" data = get_data_by_mysql(sql) if len(data) == 0: return None, None return str(data[0][0]), data[0][1] def get_one_data_logic(self): """ 按照5,4,3的优先级进行获取 :return: """ song_src_arr = [5, 4, 3] for song_src in song_src_arr: song_id, song_url = self.get_one_data(song_src=song_src) if song_id is not None: return song_id, song_url return None, None def get_one_data(self, song_src=3): sql = "select song_id, url from svc_queue_table where state = 0 and song_src={} order by create_time asc limit 1".format( song_src) banned_user_map["db"] = "av_db" data = get_data_by_mysql(sql, banned_user_map) if len(data) == 0: return None, None song_id, song_url = data[0] if song_id != "": self.update_state(song_id, gs_state_use) return str(song_id), song_url def pre_process(self, work_dir, song_url): """ 创建文件夹,下载数据 :return: """ if "?sign=" in song_url: return gs_err_code_download ext = str(song_url).split(".")[-1] dst_file = "{}/src_origin.{}".format(work_dir, ext) cmd = "wget {} -O {}".format(song_url, dst_file) os.system(cmd) if not os.path.exists(dst_file): return gs_err_code_download duration = get_d(dst_file) if duration < 0: return gs_err_code_duration_err print("Duration:", dst_file, duration) if duration > 20 * 60: return gs_err_code_duration_too_long dst_mp3_file = "{}/src.wav".format(work_dir) cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} ".format(dst_file, dst_mp3_file) os.system(cmd) if not os.path.exists(dst_mp3_file): return gs_err_code_trans_to_mp3 return gs_err_code_success def upload_acc(self, cid, acc_path): # 转码 mix_path_aac = acc_path.replace(".wav", ".m4a") cmd = "ffmpeg -i {} -b:a 128k -c:a aac -ar 44100 -ac 2 -y {} -loglevel fatal".format(acc_path, mix_path_aac) os.system(cmd) if not os.path.exists(mix_path_aac): return gs_err_code_transcode_acc, None # 上传 mix_name = os.path.basename(mix_path_aac) key = "av_res/svc_res_tone_shift/{}/{}".format(str(cid), mix_name) if not upload_file2cos(key, mix_path_aac): return gs_err_code_upload_acc, None return gs_err_code_success, key def async_mix(self, cid, vocal_path, acc_path): pool = mp.Pool(processes=2) res = [] for i in range(1, 3): ret = pool.apply_async(mix, args=(cid, vocal_path, acc_path, i), error_callback=post_process_err_callback) res.append(ret) pool.close() pool.join() real_res = [] for i in res: real_res.append(i.get(timeout=10 * 60)) return real_res def process_one(self, cid, work_dir): """ :param cid: :param work_dir: :return: """ src_mp3 = os.path.join(work_dir, "src.wav") vocal_path = os.path.join(work_dir, "vocal.wav") acc_path = os.path.join(work_dir, "acc.wav") if not (os.path.exists(vocal_path) and os.path.exists(acc_path)): if not self.separate_inst.process(cid, src_mp3, vocal_path, acc_path): return gs_err_code_separate, [] if not os.path.exists(vocal_path) or not os.path.exists(acc_path): return gs_err_code_separate, [] # 当人声的平均能量小于一定值时,则认为无人声(0.01是经验值判定,样本分析来看) # 无人声的样本[0.0056, 0.0003], 有人声的样本(目前最小)[0.046, 0.049] print("power:{},{}".format(cid, get_mean_power(vocal_path))) if get_mean_power(vocal_path) < 0.02: return gs_err_code_duration_no_vocal, [] rets = self.async_mix(cid, vocal_path, acc_path) out_mix_mp3 = ["", ""] out_vocal_path = ["", ""] for ret in rets: err, mix_mp3, vocal_path, tp = ret if err != gs_err_code_success: return err, [] out_mix_mp3[tp - 1] = mix_mp3 out_vocal_path[tp - 1] = vocal_path out_gender = [] for i in range(len(out_vocal_path)): gender, female_rate = self.voice_class.process_one(out_vocal_path[i]) # 性别映射,由0:女 1:男 2:未知 映射为 1:男 2:女 3: 未知 # GENDER_FEMALE = 0,GENDER_MALE = 1,GENDER_OTHER = 2 mmap = [2, 1, 3] gender = mmap[gender] out_gender.append(str(gender)) # 音频1,音频2,性别1,性别2 real_msg = [out_mix_mp3[0], out_mix_mp3[1], out_gender[0], out_gender[1]] return gs_err_code_success, real_msg def download_and_transcode(self, url, local_path, local_path_wav): cmd = "wget {} -O {}".format(url, local_path) os.system(cmd) if not os.path.exists(local_path): return -1 cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {}".format(local_path, local_path_wav) os.system(cmd) if not os.path.exists(local_path_wav): return -2 return 0 def get_data_from_mysql(self, cid, work_dir): sql = "select starmaker_songid,task_url,complete_url,voice_url from starmaker_musicbook.silence where starmaker_songid={} order by task_id desc limit 1".format( cid) data = get_data_by_mysql(sql, banned_user_map) if len(data) == 0: return gs_err_code_silence_no_data song_id, task_url, complete_url, voice_url = data[0] if complete_url != "" and voice_url != "": """ 将人声与伴奏下载下来 """ ext = str(complete_url).split(".")[-1] acc_dst_file = os.path.join(work_dir, "acc.{}".format(ext)) acc_wav_dst_file = os.path.join(work_dir, "acc.wav") err = self.download_and_transcode(complete_url, acc_dst_file, acc_wav_dst_file) os.unlink(acc_dst_file) if err == -1: return gs_err_code_download_acc if err == -2: return gs_err_code_transcode_acc_v1 ext = str(voice_url).split(".")[-1] vocal_dst_file = os.path.join(work_dir, "vocal.{}".format(ext)) vocal_wav_dst_file = os.path.join(work_dir, "vocal.wav") err = self.download_and_transcode(voice_url, vocal_dst_file, vocal_wav_dst_file) os.unlink(vocal_dst_file) if err == -1: return gs_err_code_download_vocal if err == -2: return gs_err_code_transcode_vocal_v1 return gs_err_code_success return gs_err_code_silence_no_process def process_worker(self): logging.info("start process_worker .....") base_dir = "/tmp/tone_shift_one" if not os.path.exists(base_dir): os.makedirs(base_dir) while True: worker_st = time.time() cid, song_url = self.get_one_data_logic() # cid, song_url = self.get_url_by_id('611752105030548048') if cid is None: time.sleep(5) logging.info("get one data is None ...") continue work_dir = os.path.join(base_dir, str(cid)) if os.path.exists(work_dir): shutil.rmtree(work_dir) os.makedirs(work_dir) # 先查看消音数据库中是否已经完成了该项目,已经有的话,就直接下载即可 err = self.get_data_from_mysql(cid, work_dir) if err != gs_err_code_success: # 清空磁盘 shutil.rmtree(work_dir) os.makedirs(work_dir) err = self.pre_process(work_dir, song_url) if err != gs_err_code_success: self.update_state(str(cid), -err) continue - st = time.time() - err, data = self.process_one(str(cid), work_dir) - logging.info("process_finish,{},{}".format(cid, time.time() - st)) - if err == gs_err_code_success and len(data) != 0: - sql = "update svc_queue_table set state={},update_time={},svc_url=\"{}\" where song_id = {}". \ - format(gs_state_finish, int(time.time()), ",".join(data), str(cid)) - banned_user_map['db'] = "av_db" - update_db(sql, banned_user_map) - else: - self.update_state(str(cid), -err) - shutil.rmtree(work_dir) - logging.info("process_finish,{},{}".format(cid, time.time() - worker_st)) - + st = time.time() + err, data = self.process_one(str(cid), work_dir) + logging.info("process_finish,{},{}".format(cid, time.time() - st)) + if err == gs_err_code_success and len(data) != 0: + sql = "update svc_queue_table set state={},update_time={},svc_url=\"{}\" where song_id = {}". \ + format(gs_state_finish, int(time.time()), ",".join(data), str(cid)) + banned_user_map['db'] = "av_db" + update_db(sql, banned_user_map) + else: + self.update_state(str(cid), -err) + shutil.rmtree(work_dir) + logging.info("process_finish,{},{}".format(cid, time.time() - worker_st)) if __name__ == '__main__': ts = ToneShift() ts.process_worker() diff --git a/AutoCoverTool/ref/so_vits_svc/inference_main.py b/AutoCoverTool/ref/so_vits_svc/inference_main.py index 57bfdd2..2efecfe 100644 --- a/AutoCoverTool/ref/so_vits_svc/inference_main.py +++ b/AutoCoverTool/ref/so_vits_svc/inference_main.py @@ -1,85 +1,85 @@ import io import os import sys import logging import time from pathlib import Path from copy import deepcopy import torch import librosa import numpy as np import soundfile from inference import infer_tool from inference import slicer from inference.infer_tool import Svc logging.getLogger('numba').setLevel(logging.WARNING) chunks_dict = infer_tool.read_temp("ref/so_vits_svc/inference/chunks_temp.json") def inf(model_path, config_path, raw_audio_path, dst_path, dev): # model_path = "logs/32k/G_174000-Copy1.pth" # config_path = "configs/config.json" svc_model = Svc(model_path, config_path) out_dir = os.path.dirname(dst_path) print(dst_path) os.makedirs(out_dir, exist_ok=True) # 支持多个wav文件,放在raw文件夹下 tran = 0 - spk_list = ['speaker6'] # 每次同时合成多语者音色 + spk_list = ['speaker1'] # 每次同时合成多语者音色 slice_db = -40 # 默认-40,嘈杂的音频可以-30,干声保留呼吸可以-50 wav_format = 'wav' # 音频输出格式 # infer_tool.fill_a_to_b(trans, clean_names) # for clean_name, tran in zip(clean_names, trans): # raw_audio_path = f"raw/{clean_name}" # if "." not in raw_audio_path: # raw_audio_path += ".wav" infer_tool.format_wav(raw_audio_path) wav_path = Path(raw_audio_path).with_suffix('.wav') chunks = slicer.cut(wav_path, db_thresh=slice_db) audio_data, audio_sr = slicer.chunks2audio(wav_path, chunks) for spk in spk_list: audio = [] for (slice_tag, data) in audio_data: print(f'#=====segment start, {round(len(data) / audio_sr, 3)}s======') length = int(np.ceil(len(data) / audio_sr * svc_model.target_sample)) raw_path = io.BytesIO() soundfile.write(raw_path, data, audio_sr, format="wav") raw_path.seek(0) if slice_tag: print('jump empty segment') _audio = np.zeros(length) else: out_audio, out_sr = svc_model.infer(spk, tran, raw_path, dev == "test") _audio = out_audio.cpu().numpy() audio.extend(list(_audio)) soundfile.write(dst_path, audio, svc_model.target_sample, format=wav_format) if __name__ == '__main__': g_model = sys.argv[1] # 模型地址 g_config = sys.argv[2] # 配置文件地址 g_audio_path = sys.argv[3] # 输入的音频文件地址,wav g_dst_path = sys.argv[4] # 输出的音频文件地址 if os.path.exists(g_dst_path): print("{} success ...".format(g_dst_path)) exit(0) g_dev = "prod" if len(sys.argv) > 5: g_dev = sys.argv[5] g_aa, g_sr = librosa.load(g_audio_path) d = librosa.get_duration(g_aa, g_sr) # if g_dev != "test": # if d > 250: # print("{} too long".format(g_audio_path)) # exit(0) st = time.time() inf(g_model, g_config, g_audio_path, g_dst_path, g_dev) print("{}, inference sp={}".format(g_audio_path, time.time() - st)) diff --git a/AutoCoverTool/script/get_song_url.py b/AutoCoverTool/script/get_song_url.py index ecd5f05..2635982 100644 --- a/AutoCoverTool/script/get_song_url.py +++ b/AutoCoverTool/script/get_song_url.py @@ -1,162 +1,546 @@ """ 获取歌曲的地址 # song_src=2 是来源108和109的歌曲,未被洗过的 # song_src=1 是曲库给的 # song_src=3 # 用于轻变调的 """ from script.common import * from copy import deepcopy from online.common import update_db def get_url_by_song_id(song_id): sql = "select task_url,starmaker_songid from silence where starmaker_songid = {} order by task_id desc limit 1".format( song_id) ban = deepcopy(banned_user_map) ban["db"] = "starmaker_musicbook" data = get_data_by_mysql(sql, ban) if len(data) > 0: return data[0][0] return None def process(): arr = [ - "611752105030770437", - "611752105022704186", - "611752105030770446", - "611752105030660018", - "611752105023142842", - "611752105025980261", - "611752105030770459", - "611752105030770462", - "611752105030770468", - "611752105025957813", - "611752105030595983", - "611752105030770482", - "611752105030770497", - "611752105030659982", - "611752105030770510", - "611752105030770516", - "611752105030770523", - "611752105030770545", - "611752105030770549", - "611752105030770552", - "611752105030770557", - "611752105030770569", - "611752105030770577", - "611752105030770581", - "611752105030770585", - "611752105030770588", - "611752105030770591", - "611752105030770660", - "611752105022647066", - "611752105030770665", - "611752105030770669", - "611752105030770675", - "611752105030770681", - "611752105030563555", - "611752105030770688", - "611752105030770694", - "611752105030770697", - "611752105030770701", - "611752105030770706", - "611752105030776532", - "611752105030776552", - "611752105030776562", - "611752105030776580", - "611752105030563422", - "611752105030776597", - "611752105030776600", - "611752105030776606", - "611752105030776616", - "611752105030776624", - "611752105030776627", - "611752105030780547", - "611752105030780562", - "611752105030780604", - "611752105030780617", - "611752105030780621", - "611752105030780630", - "611752105030780636", - "611752105030780640", - "611752105030780645", - "611752105030780648", - "611752105030780650", - "611752105030780655", - "611752105030780657", - "611752105030780664", - "611752105030780667", - "611752105030780676", - "611752105030780750", - "611752105030780760", - "611752105030780766", - "611752105030780787", - "611752105030780795", - "611752105030780801", - "611752105030780805", - "611752105030780806", - "611752105030780814", - "611752105030780833", - "611752105030780840", - "611752105030596072", - "611752105030780846", - "611752105030596128", + "611752105030534669", + "611752105030534671", + "611752105029291860", + "611752105030534665", + "611752105030534667", + "611752105027734182", + "611752105030532701", + "611752105027626957", + "611752105030532696", + "611752105030517855", + "611752105025184103", + "611752105030517856", + "611752105024164143", + "611752105030517852", + "611752105026681421", + "611752105024571437", + "611752105022779865", + "611752105030517845", + "611752105030493464", + "611752105030517843", + "611752105030517018", + "611752105030517014", + "611752105022838003", + "611752105024118499", + "611752105030517015", + "611752105030517012", + "611752105025587378", + "611752105023644389", + "611752105023616289", + "611752105025502433", + "611752105030517008", + "611752105024199100", + "611752105030517003", + "611752105026614996", + "611752105029087255", + "611752105030517518", + "611752105030517840", + "611752105030517844", + "611752105030517859", + "611752105022777749", + "611752105030532705", + "611752105026265975", + "611752105030532703", + "611752105030487368", + "611752105030483712", + "611752105030517850", + "611752105022782535", + "611752105030517021", + "611752105030517854", + "611752105023541359", + "611752105029792918", + "611752105024199117", + "611752105029673594", + "611752105023674624", + "611752105028990732", + "611752105028487811", + "611752105024194923", + "611752105023763921", + "611752105023434554", + "611752105022838184", + "611752105030553757", + "611752105027326100", + "611752105023977089", + "611752105023674617", + "611752105023620980", + "611752105023536537", + "611752105023301456", + "611752105022842491", + "611752105022841266", + "611752105030548398", + "611752105022839768", + "611752105022838572", + "611752105022781228", + "611752105029598755", + "611752105030517531", + "611752105023541369", + "611752105023678576", + "611752105023346238", + "611752105027648094", + "611752105027326090", + "611752105027832575", + "611752105030478339", + "611752105027795229", + "611752105027734187", + "611752105022614618", + "611752105023329571", + "611752105023234496", + "611752105026134338", + "611752105030554109", + "611752105030517489", + "611752105029836690", + "611752105030555726", + "611752105030555723", + "611752105030553598", + "611752105030555722", + "611752105029656131", + "611752105030555720", + "611752105023674607", + "611752105023478557", + "611752105030555716", + "611752105030555717", + "611752105022784360", + "611752105022836164", + "611752105029711726", + "611752105022783553", + "611752105030555714", + "611752105022824550", + "611752105022838413", + "611752105022783797", + "611752105022781620", + "611752105022775924", + "611752105022774759", + "611752105022759877", + "611752105022785234", + "611752105030555710", + "611752105030555712", + "611752105022768451", + "611752105022767499", + "611752105030555707", + "611752105022755443", + "611752105022777511", + "611752105022775921", + "611752105022841864", + "611752105022766831", + "611752105022768795", + "611752105022779062", + "611752105030555708", + "611752105030555700", + "611752105030517183", + "611752105030555703", + "611752105030555697", + "611752105022768285", + "611752105020419088", + "611752105022747795", + "611752105022774521", + "611752105030555698", + "611752105030555694", + "611752105030555696", + "611752105030555689", + "611752105022749162", + "611752105022785417", + "611752105022784953", + "611752105022616389", + "611752105030555690", + "611752105030555687", + "611752105022838918", + "611752105022775551", + "611752105022728634", + "611752105022773679", + "611752105030555686", + "611752105022758328", + "611752105030555684", + "611752105030555681", + "611752105030555683", + "611752105022754068", + "611752105030516960", + "611752105022764207", + "611752105022743905", + "611752105022757953", + "611752105030516957", + "611752105030516959", + "611752105020332899", + "611752105022841838", + "611752105029665047", + "611752105030516953", + "611752105030490291", + "611752105022762732", + "611752105030516955", + "611752105022762600", + "611752105022774589", + "611752105022754286", + "611752105030516949", + "611752105022774560", + "611752105030516946", + "611752105030487061", + "611752105030516947", + "611752105030516933", + "611752105030516940", + "611752105030555748", + "611752105030555742", + "611752105030555740", + "611752105030555741", + "611752105030555737", + "611752105030553605", + "611752105030555739", + "611752105030555733", + "611752105030555729", + "611752105030544017", + "611752105030555727", + "611752105030486334", + "611752105030544028", + "611752105022647043", + "611752105024402503", + "611752105022704184", + "611752105027532732", + "611752105028858105", + "611752105027532750", + "611752105022778279", + "611752105030517318", + "611752105026752362", + "611752105023636284", + "611752105030517315", + "611752105022742205", + "611752105022754485", + "611752105030517314", + "611752105023462684", + "611752105022728585", + "611752105023751007", + "611752105030484779", + "611752105030550189", + "611752105022729202", + "611752105030486059", + "611752105029292588", + "611752105022647103", + "611752105027273004", + "611752105027460081", + "611752105026900908", + "611752105022647046", + "611752105029648513", + "611752105028032107", + "611752105026452639", + "611752105024996266", + "611752105024728131", + "611752105027832575", + "611752105030483919", + "611752105030484774", + "611752105027228696", + "611752105022728126", + "611752105022840114", + "611752105026982813", + "611752105022741687", + "611752105030486054", + "611752105020352152", + "611752105020352156", + "611752105020336897", + "611752105020256286", + "611752105030765622", + "611752105028820609", + "611752105030488595", + "611752105030517536", + "611752105030501857", + "611752105030478339", + "611752105025957389", + "611752105024415490", + "611752105027854244", + "611752105029527187", + "611752105028444597", + "611752105027903168", + "611752105028778353", + "611752105028906605", + "611752105027781526", + "611752105027877887", + "611752105027795229", + "611752105027734187", + "611752105028820612", + "611752105027626964", + "611752105027460080", + "611752105027507932", + "611752105027611342", + "611752105027435127", + "611752105029648514", + "611752105026874730", + "611752105030591117", + "611752105025541483", + "611752105026536913", + "611752105022647044", + "611752105023434557", + "611752105023440333", + "611752105023460357", + "611752105023510939", + "611752105022842387", + "611752105023674599", + "611752105023160140", + "611752105022647074", + "611752105022615220", + "611752105028408822", + "611752105022772279", + "611752105022614618", + "611752105020417684", + "611752105024608150", + "611752105030499232", + "611752105030485430", + "611752105023683357", + "611752105023301455", + "611752105023458990", + "611752105027228689", + "611752105026437878", + "611752105027460089", + "611752105029570157", + "611752105022700847", + "611752105029006303", + "611752105028820629", + "611752105023134539", + "611752105022647087", + "611752105027326104", + "611752105022652047", + "611752105022839468", + "611752105028944645", + "611752105022911042", + "611752105020348944", + "611752105020348945", + "611752105020332345", + "611752105027484925", + "611752105027484915", + "611752105028870536", + "611752105027877846", + "611752105027307631", + "611752105026437853", + "611752105023604729", + "611752105024230229", + "611752105022816170", + "611752105028523417", + "611752105022652046", + "611752105022782720", + "611752105024380150", + "611752105022839949", + "611752105022761851", + "611752105022741054", + "611752105022756250", + "611752105030533676", + "611752105022756563", + "611752105022728598", + "611752105022760225", + "611752105025034424", + "611752105022776389", + "611752105022767205", + "611752105030534929", + "611752105022775308", + "611752105030502738", + "611752105022615626", + "611752105030502742", + "611752105022741814", + "611752105029648552", + "611752105022742136", + "611752105030486351", + "611752105022770959", + "611752105022765477", + "611752105022751650", + "611752105022742577", + "611752105030534505", + "611752105022973044", + "611752105029649153", + "611752105030549651", + "611752105025494340", + "611752105020343697", + "611752105020283852", + "611752105020373961", + "611752105022729294", + "611752105020409111", + "611752105024938882", + "611752105029443802", + "611752105027903154", + "611752105022614626", + "611752105020308424", + "611752105030629613", + "611752105030534740", + "611752105030534855", + "611752105022782724", + "611752105027781516", + "611752105029648513", + "611752105020417686", + "611752105024996266", + "611752105024728131", + "611752105027832575", + "611752105029646791", + "611752105027228696", + "611752105022614749", + "611752105020336084", + "611752105026982813", + "611752105022783586", + "611752105022741687", + "611752105023870020", + "611752105022729203", + "611752105028143469", + "611752105030486054", + "611752105022729411", + "611752105020336950", + "611752105020256284", + "611752105030548045", + "611752105030503007", + "611752105029047774", + "611752105026792339", + "611752105026449363", + "611752105026736869", + "611752105022614727", + "611752105022615372", + "611752105022780524", + "611752105022769594", + "611752105022758407", + "611752105022746664", + "611752105022763120", + "611752105022745603", + "611752105030487351", + "611752105022747491", + "611752105022728209", + "611752105022884087", + "611752105022890433", + "611752105022741836", + "611752105022728574", + "611752105022728612", + "611752105022739185", + "611752105030532703", + "611752105024118493", + "611752105030535938", + "611752105030487366", + "611752105030487368", + "611752105030487365", + "611752105022843075", + "611752105022790159", + "611752105022778099", + "611752105022776703", + "611752105022776364", + "611752105022774641", + "611752105022770768", + "611752105022770226", + "611752105022769617", + "611752105022769056", + "611752105029780685", + "611752105030477448", + "611752105022767219", + "611752105022754490", + "611752105022760812", + "611752105030487360", + "611752105022749100", + "611752105022728481", + "611752105022769181", + "611752105030487358", + "611752105022739209", + "611752105022774610", + "611752105022728721", + "611752105022741064", + "611752105022775968", + "611752105030487354", + "611752105022771053", + "611752105022779825", + "611752105022744563", + "611752105022744436", + "611752105030487355", + "611752105022771161", + "611752105022748598", + "611752105022766486", + "611752105022814952", + "611752105022728118", + "611752105022778616", + "611752105022778275", + "611752105022614337", + "611752105022774253", + "611752105022762324", + "611752105026299314", + "611752105022784079", + "611752105022774583", + "611752105022770293", + "611752105030487359", + "611752105022765790", + "611752105030535451", + "611752105030517752", + "611752105030487362", + "611752105022775916", + "611752105022776159", + "611752105029292588", + "611752105022838005", + "611752105023541359", + "611752105020336965", + "611752105020293286" ] ban = deepcopy(banned_user_map) ban["db"] = "av_db" for sid in arr: url = get_url_by_song_id(sid) if url is not None: print("out,{},{}".format(url, sid)) # 只要没有对外输出过,均可以向其中填充 sql = "select song_id from svc_queue_table where song_id={} and (song_src in (3, 4, 5) and state=2)".format(sid) data = get_data_by_mysql(sql, ban) if len(data) == 0: tm = int(time.time()) - sql = "replace INTO svc_queue_table (song_id, url, create_time, update_time, song_src) VALUES ({}, \"{}\",{}, {}, 5)" \ + sql = "replace INTO svc_queue_table (song_id, url, create_time, update_time, song_src) VALUES ({}, \"{}\",{}, {}, 4)" \ .format(sid, url, tm, tm) update_db(sql, ban) def get_data_from_song(): sql = """ select tb1.song_id, tb1.recording_count from ( select song_id,recording_count from starmaker.song where song_src in (108,109) and song_status = 2 order by recording_count desc ) as tb1 left join ( select song_id from av_db.svc_queue_table ) as tb2 on tb1.song_id = tb2.song_id where tb2.song_id is null order by tb1.recording_count desc limit 5000 """ ban = deepcopy(banned_user_map) ban_v1 = deepcopy(banned_user_map) ban["db"] = "starmaker_musicbook" ban_v1["db"] = "av_db" data = get_data_by_mysql(sql, ban) for dt in data: sid = dt[0] url = get_url_by_song_id(sid) if url is not None: print("out,{},{}".format(url, sid)) tm = int(time.time()) sql = "insert INTO svc_queue_table (song_id, url, create_time, update_time, song_src) VALUES ({}, \"{}\", {}, {}, 3)" \ .format(sid, url, tm, tm) update_db(sql, ban_v1) if __name__ == '__main__': # get_diff_song() # get_data_from_song() process()