diff --git a/AutoCoverTool/online/inference_one.py b/AutoCoverTool/online/inference_one.py index 9893c1a..0974c8e 100644 --- a/AutoCoverTool/online/inference_one.py +++ b/AutoCoverTool/online/inference_one.py @@ -1,683 +1,684 @@ """ 单个处理的逻辑 song_id: ---src.mp3 // 源数据,需要提前放进去 ---cache ---vocal.wav // 分离之后产生 ---acc.wav // 分离之后产生 ---vocal_32.wav // 分离之后产生 ---song_id_sp1.wav // 合成之后产生 ---song_id_sp2.wav // 合成之后产生 ---song_id_sp2_d.wav // 降噪之后生成 ---song_id_sp2_dv.wav // 降噪+拉伸之后产生 [占比太高的不产生] ---song_id_sp2_dve442.wav // 手动调整之后产生 ---song_id_sp2_dve442_replace.wav // 替换之后产生 ---song_id_sp2_dve442_replace_mix.wav // 人声+伴奏混合之后产生 ---song_id --acc.mp3 // 44k双声道320k --vocal.mp3 // 44k双声道320k --src.mp3 // 44k双声道320k --song_id_sp2_dv.mp3 // 44k单声道320k ---song_id_out // 对外输出 --src.mp3 // 原始音频 --song_id_sp2_dv_replace_mix.mp3 // 制作完成的音频 环境安装: conda create -n auto_song_cover python=3.9 # 安装demucs环境[进入到ref.music_remover 执行pip install -r requirements.txt] # 安装so_vits_svc环境[进入到ref.so_vits_svc 执行pip install -r requirements.txt] pip install librosa pip install scikit-maad pip install praat-parselmouth pip install matplotlib pip install torchvision pip install madmom pip install torchstat 环境设置: export PATH=$PATH:/data/gpu_env_common/env/bin/ffmpeg/bin export PYTHONPATH=$PWD:$PWD/ref/music_remover/demucs:$PWD/ref/so_vits_svc:$PWD/ref/split_dirty_frame """ import os import time import shutil import random import logging import librosa logging.basicConfig(filename='/tmp/inference.log', level=logging.INFO) gs_err_code_success = 0 gs_err_code_no_src_mp3 = 1 gs_err_code_separate = 2 gs_err_code_trans_32 = 3 gs_err_code_encode_err = 4 gs_err_code_replace_err = 5 gs_err_code_replace_trans_err = 6 gs_err_code_mix_err = 7 gs_err_code_mix_transcode_err = 8 gs_err_code_no_src_dir = 9 gs_err_code_volume_err = 10 gs_err_code_trans2_442 = 11 gs_err_code_reverb = 12 gs_err_code_no_good_choice = 13 gs_err_code_preprocess_vocal = 14 gs_err_code_replace_except_err = 15 gs_denoise_exe = "/opt/soft/bin/denoise_exe" gs_draw_volume_exe = "/opt/soft/bin/draw_volume" gs_simple_mixer_path = "/opt/soft/bin/simple_mixer" gs_rever_path = "/opt/soft/bin/dereverbrate" from ref.music_remover.separate_interface import SeparateInterface from ref.so_vits_svc.inference_main import * from ref.split_dirty_frame.script.process_one import ReplaceVocalFrame, construct_power_fragment class SongCoverInference: def __init__(self): self.work_dir = None self.cache_dir = None self.cid = None self.src_mp3 = None self.vocal_path = None self.vocal_32_path = None self.acc_path = None self.speakers = [ 10414574138721494, 10414574140317353, 1688849864840588, 3634463651, 5629499489839033, 5910973794723621, 6755399374234747, 8162774327817435, 8162774329368194, 1125899914308640, # 以下为男声,包括这个 12384898975368914, 12947848931397021, 3096224748076687, 3096224751151928, 5066549357604730, 5348024335101054, 6755399442719465, 7036874421386111 ] self.speakers2gender = { 10414574138721494: 2, 10414574140317353: 2, 1688849864840588: 2, 3634463651: 2, 5629499489839033: 2, 5910973794723621: 2, 6755399374234747: 2, 8162774327817435: 2, 8162774329368194: 2, 1125899914308640: 1, # 1是男 12384898975368914: 1, 12947848931397021: 1, 3096224748076687: 1, 3096224751151928: 1, 5066549357604730: 1, 5348024335101054: 1, 6755399442719465: 1, 7036874421386111: 1 } self.speakers_model_path = "data/train_users/{}/logs/32k/G_2000.pth" self.speakers_model_config = "data/train_users/{}/config/config.json" st = time.time() self.separate_inst = None logging.info("post process ... ReplaceVocalFrame init sp={}".format(time.time() - st)) self.replace_vocal_frame_inst = None logging.info("SongCoverInference init sp={}".format(time.time() - st)) def separate(self, cid, src_mp3, vocal_path, acc_path): """ 人声伴奏分离 :param cid: :param src_mp3: :param vocal_path: :param acc_path: :return: """ st = time.time() if self.separate_inst is None: self.separate_inst = SeparateInterface() if not self.separate_inst.process(cid, src_mp3, vocal_path, acc_path): return gs_err_code_separate if not os.path.exists(vocal_path) or not os.path.exists(acc_path): return gs_err_code_separate # 转码出一个32k单声道的数据 cmd = "ffmpeg -i {} -ar 32000 -ac 1 -y {} -loglevel fatal".format(vocal_path, self.vocal_32_path) os.system(cmd) if not os.path.exists(self.vocal_32_path): return gs_err_code_trans_32 print("separate:cid={}|sp={}".format(cid, time.time() - st)) return gs_err_code_success def get_start_ms(self, vocal_path): """ 给定原始音频,找一段连续10s的音频 :param vocal_path: :return: """ audio, sr = librosa.load(vocal_path, sr=16000) audio = librosa.util.normalize(audio) # 帧长100ms,帧移10ms,计算能量 power_arr = [] for i in range(0, len(audio) - 1600, 160): power_arr.append(np.sum(np.abs(audio[i:i + 160])) / 160) # 将能量小于等于10的部分做成段 power_arr = construct_power_fragment(power_arr) fragments = [] last_pos = 0 for idx, line in enumerate(power_arr): start = round(float(line[0]) * 0.01, 3) duration = round(float(line[1]) * 0.01, 3) fragments.append([last_pos, start - last_pos]) last_pos = start + duration if last_pos < len(audio) / sr: fragments.append([last_pos, len(audio) / sr - last_pos]) # 合并数据,两者间隔在50ms以内的合并起来 idx = 0 while idx < len(fragments) - 1: if fragments[idx + 1][0] - (fragments[idx][0] + fragments[idx][1]) < 0.05: fragments[idx][1] = fragments[idx + 1][0] + fragments[idx + 1][1] - fragments[idx][0] del fragments[idx + 1] idx -= 1 idx += 1 # out_file = vocal_path + "_power.csv" # with open(out_file, "w") as f: # f.write("Name\tStart\tDuration\tTime Format\tType\n") # for fragment in fragments: # start = round(float(fragment[0]), 3) # duration = round(float(fragment[1]), 3) # strr = "{}\t{}\t{}\t{}\n".format("11", start, duration, "decimal\tCue\t") # f.write(strr) # 筛选出开始的位置 # 1. 连续时长大于10s,当前段长度大于3s # 2. 不可用 # 从0到fragments[idx], 包含idx其中人声段的总和 tot_vocal_duration = [fragments[0][1]] for i in range(1, len(fragments)): tot_vocal_duration.append(tot_vocal_duration[i - 1] + fragments[i][1]) # 计算出任意两段之间非人声占比 for i in range(0, len(fragments)): if fragments[i][1] >= 3: now_tot = 0 if i > 0: now_tot = tot_vocal_duration[i - 1] for j in range(i + 1, len(fragments)): cur_rate = tot_vocal_duration[j] - now_tot cur_rate = cur_rate / (fragments[j][1] + fragments[j][0] - fragments[i][0]) if cur_rate > 0.1: return fragments[i][0] return -1 def inference_speaker(self): """ 推理生成合成后的音频 随机取5个干声,选择占比最小的,并且要求占比小于0.3 :return: """ st = time.time() out_speakers = random.sample(self.speakers, 15) out_songs_dict = {} for speaker in out_speakers: model_path = self.speakers_model_path.format(speaker) config_path = self.speakers_model_config.format(speaker) song_path = os.path.join(self.cache_dir, "{}_{}.wav".format(self.cid, speaker)) try: inf(model_path, config_path, self.vocal_32_path, song_path, "prod") except Exception as ex: logging.info("cid={}, inference_speaker err={}".format(self.cid, ex)) continue if os.path.exists(song_path): if self.replace_vocal_frame_inst is None: self.replace_vocal_frame_inst = ReplaceVocalFrame( "data/models/split_dirty_frame_v5_3_epoch3_852.pth") rate = self.replace_vocal_frame_inst.get_rate(song_path) if rate < 0.3: out_songs_dict[song_path] = rate # 从内部选择占比最低的 out_songs = [] if len(out_songs_dict.keys()) > 0: st_sec = self.get_start_ms(self.vocal_path) song_msg = sorted(out_songs_dict.items(), key=lambda kv: kv[1])[0] out_songs = [song_msg[0]] logging.info("GetRate:cid={},song={},rate={},st_tm={}".format(self.cid, song_msg[0], round(song_msg[1], 2), round(st_sec, 3))) print("GetRate:cid={},song={},rate={},st_tm={}".format(self.cid, song_msg[0], round(song_msg[1], 2), round(st_sec, 3))) # logging.info("inference_speaker len = {} finish sp = {}".format(len(out_songs), time.time() - st)) print("inference_speaker len = {} finish sp = {}".format(len(out_songs), time.time() - st)) return out_songs def get_new_vocal_rate(self, songs): """ 获取人声的比率 :param songs: :return: """ st = time.time() need_to_process_song = [] for song in songs: if self.replace_vocal_frame_inst is None: self.replace_vocal_frame_inst = ReplaceVocalFrame("data/models/split_dirty_frame_v5_3_epoch3_852.pth") rate = self.replace_vocal_frame_inst.get_rate(song) logging.info("{} {} replace_rate={}".format(self.cid, song, rate)) if rate < 1.0: need_to_process_song.append(song) logging.info( "get_new_vocal_rate belen = {} len = {} finish sp = {}".format(len(songs), len(need_to_process_song), time.time() - st)) return need_to_process_song def preprocess_vocal(self, songs, vocal_path): """ 1. 降噪 2. 拉伸 :param songs: :param vocal_path: 参考的音频信号 :return: """ st = time.time() dv_out_list = [] for song in songs: denoise_path = str(song).replace(".wav", "_d.wav") cmd = "{} {} {}".format(gs_denoise_exe, song, denoise_path) os.system(cmd) if not os.path.exists(denoise_path): print("{} {} ERROR denoise".format(self.cid, song)) continue # 拉伸 volume_path = str(song).replace(".wav", "_dv.wav") cmd = "{} {} {} {}".format(gs_draw_volume_exe, denoise_path, vocal_path, volume_path) os.system(cmd) if not os.path.exists(volume_path): print("{} {} ERROR denoise".format(self.cid, volume_path)) continue dv_out_list.append(volume_path) print( "preprocess_vocal belen = {} len = {} finish sp = {}".format(len(songs), len(dv_out_list), time.time() - st)) return dv_out_list def output(self, dv_out_list): """ 对外输出数据 :param dv_out_list: :return: """ st = time.time() out_dir = os.path.join(self.work_dir, self.cid) if os.path.exists(out_dir): shutil.rmtree(out_dir) os.makedirs(out_dir) # 拷贝数据 dst_mp3_path = os.path.join(out_dir, "src_mp3") dst_acc_path = os.path.join(out_dir, "acc.mp3") dst_vocal_path = os.path.join(out_dir, "vocal.mp3") shutil.copyfile(self.src_mp3, dst_mp3_path) cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(self.acc_path, dst_acc_path) os.system(cmd) if not os.path.exists(dst_acc_path): return gs_err_code_encode_err cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(self.vocal_path, dst_vocal_path) os.system(cmd) if not os.path.exists(dst_vocal_path): return gs_err_code_encode_err # 将所有数据放到out_dir中,用于给人工标注 for dv_wav in dv_out_list: dv_wav_name = str(dv_wav).split("/")[-1].replace(".wav", "_441.mp3") dst_dv_path = os.path.join(out_dir, dv_wav_name) cmd = "ffmpeg -i {} -ar 44100 -ac 1 -ab 320k -y {} -loglevel fatal".format(dv_wav, dst_dv_path) os.system(cmd) if not os.path.exists(dst_dv_path): print("{} encode err!".format(cmd)) continue logging.info( "preprocess_vocal output sp = {}".format(time.time() - st)) def process_one(self, cid, work_dir, enable_output=False): logging.info("\nstart:cid={},work_dir={}----------------------->>>>>>>>".format(cid, work_dir)) self.cid = cid self.work_dir = work_dir # 所有不对外交付的,全部放到这里 self.cache_dir = os.path.join(work_dir, "cache") if os.path.exists(self.cache_dir): shutil.rmtree(self.cache_dir) os.makedirs(self.cache_dir) self.src_mp3 = os.path.join(self.work_dir, "src.mp3") if not os.path.exists(self.src_mp3): return gs_err_code_no_src_mp3 self.vocal_path = os.path.join(self.cache_dir, "vocal.wav") self.vocal_32_path = os.path.join(self.cache_dir, "vocal_32.wav") self.acc_path = os.path.join(self.cache_dir, "acc.wav") if not os.path.exists(self.vocal_32_path): logging.info("start separate ... {} {} {}".format(self.src_mp3, self.vocal_path, self.acc_path)) err = self.separate(cid, self.src_mp3, self.vocal_path, self.acc_path) if err != gs_err_code_success: return err, None, None logging.info("start inference_speaker ...") out_songs = self.inference_speaker() dv_out_list = self.preprocess_vocal(out_songs, self.vocal_path) if len(dv_out_list) == 0: return gs_err_code_no_good_choice, None, None mix_mp3_path = None gender = -1 if enable_output: self.output(dv_out_list) else: # 默认全部处理一遍 for dv_out_path in dv_out_list: src_path = dv_out_path.replace("_dv.wav", ".wav") err, mix_mp3_path = self.after_process(self.cid, self.work_dir, src_path, dv_out_path, self.vocal_path, self.acc_path, True, False) if err != gs_err_code_success: logging.info("after_process err {}".format(err)) # 取出性别属性 if err == gs_err_code_success and mix_mp3_path is not None: gender = self.speakers2gender[int(str(os.path.basename(mix_mp3_path)).split("_")[1])] logging.info("finish:cid={},work_dir={}----------------------->>>>>>>>".format(cid, work_dir)) return gs_err_code_success, mix_mp3_path, gender def reverb_by_vocal(self, file): st = time.time() file_442 = file.replace(".wav", "_442.wav") if not os.path.exists(file_442): cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {}".format(file, file_442) os.system(cmd) if not os.path.exists(file_442): return gs_err_code_trans2_442, None file_dst = file.replace(".wav", "_442_dr.wav") cmd = "{} {} {} {}".format(gs_rever_path, self.vocal_path, file_442, file_dst) os.system(cmd) if not os.path.exists(file_dst): return gs_err_code_reverb, None print("cid = {}, reverb_by_vocal sp={}".format(self.cid, time.time() - st)) return gs_err_code_success, file_dst def after_process(self, cid, work_dir, in_file, effect_file, vocal_file, acc_file, need_draw=True, need_reverb=True): """ 后处理逻辑 将处理好的音频进行替换,然后和伴奏进行混合,最后进行编码 :return: """ if need_reverb: # 抓取混响 err, effect_file = self.reverb_by_vocal(in_file) if err != gs_err_code_success: return err, None if need_draw: # 增加一个拉伸的步骤 volume_path = str(effect_file).replace(".wav", "_dv.wav") cmd = "{} {} {} {}".format(gs_draw_volume_exe, effect_file, vocal_file, volume_path) print(cmd) os.system(cmd) if not os.path.exists(volume_path): print("{} {} ERROR draw volume".format(self.cid, volume_path)) return gs_err_code_volume_err, None effect_file = volume_path st = time.time() self.cid = cid self.work_dir = work_dir self.src_mp3 = os.path.join(self.work_dir, "src.mp3") if not os.path.exists(self.work_dir): return gs_err_code_no_src_dir self.replace_vocal_frame_inst.process(in_file, effect_file, vocal_file) dst_path = effect_file + "_replace.wav" if not os.path.exists(dst_path): return gs_err_code_replace_err, None print("replace_vocal_frame_inst sp = {}".format(time.time() - st)) # 转码 dst_path_442 = dst_path.replace("_replace.wav", "_replace442.wav") cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(dst_path, dst_path_442) os.system(cmd) if not os.path.exists(dst_path_442): return gs_err_code_replace_trans_err, None # 合并转码后再做一次拉伸,保证响度 volume_path = str(dst_path_442).replace(".wav", "_dv.wav") cmd = "{} {} {} {}".format(gs_draw_volume_exe, dst_path_442, vocal_file, volume_path) print(cmd) os.system(cmd) if not os.path.exists(volume_path): print("{} {} ERROR draw volume".format(self.cid, volume_path)) return gs_err_code_volume_err, None dst_path_442 = volume_path # 混合 mix_path = dst_path_442.replace("_replace442.wav", "_replace442_mix.wav") cmd = "{} {} {} {}".format(gs_simple_mixer_path, dst_path_442, acc_file, mix_path) print("{}".format(cmd)) os.system(cmd) if not os.path.exists(mix_path): return gs_err_code_mix_err, None # 编码为mp3 output_dir = os.path.join(self.work_dir, self.cid + "_out") if not os.path.exists(output_dir): os.makedirs(output_dir) name = str(mix_path).replace("_replace442_mix.wav", "_replace442_mix.mp3").split("/")[-1] mix_path_mp3 = os.path.join(output_dir, name) cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(mix_path, mix_path_mp3) os.system(cmd) if not os.path.exists(mix_path_mp3): return gs_err_code_mix_transcode_err, None # 拷贝src到output_dir # shutil.copyfile(self.src_mp3, os.path.join(output_dir, "src.mp3")) # logging.info("after_process sp = {}".format(time.time() - st)) return gs_err_code_success, mix_path_mp3 ####################################新对外接口############################################################ def prepare_env(self, cid, work_dir, create_dir=False): self.cid = cid self.work_dir = work_dir # 所有不对外交付的,全部放到这里 self.cache_dir = os.path.join(work_dir, "cache") if create_dir: if os.path.exists(self.cache_dir): shutil.rmtree(self.cache_dir) os.makedirs(self.cache_dir) self.src_mp3 = os.path.join(self.work_dir, "src.mp3") if not os.path.exists(self.src_mp3): return gs_err_code_no_src_mp3 self.vocal_path = os.path.join(self.cache_dir, "vocal.wav") self.vocal_32_path = os.path.join(self.cache_dir, "vocal_32.wav") self.acc_path = os.path.join(self.cache_dir, "acc.wav") return gs_err_code_success def generate_svc_file(self, cid, work_dir): """ :param cid: :param work_dir: :return:err_code, 生成出的svc的文件名称 """ err = self.prepare_env(cid, work_dir, create_dir=True) if err != gs_err_code_success: return err, None # 音源分离 if not os.path.exists(self.vocal_32_path): st = time.time() err = self.separate(cid, self.src_mp3, self.vocal_path, self.acc_path) logging.info("cid={},separate,sp={}".format(self.cid, time.time() - st)) if err != gs_err_code_success: return err, None # 生成svc,只保留一个最佳的 st = time.time() out_songs = self.inference_speaker() if len(out_songs) == 0: return gs_err_code_no_good_choice, None logging.info("cid={},inference_speaker,{},sp={}".format(self.cid, out_songs[0], time.time() - st)) # 预处理人声 dv_out_list = self.preprocess_vocal(out_songs, self.vocal_path) if len(dv_out_list) == 0: return gs_err_code_preprocess_vocal, None return gs_err_code_success, dv_out_list[0] def effect(self, cid, work_dir, svc_file): st = time.time() err = self.prepare_env(cid, work_dir) if err != gs_err_code_success: return err, None logging.info("cid={},effect_and_mix,{},sp={}".format(self.cid, svc_file, time.time() - st)) # 做音效 st = time.time() err, effect_file = self.reverb_by_vocal(svc_file) if err != gs_err_code_success: return err, None logging.info("cid={},reverb_by_vocal,{},sp={}".format(self.cid, svc_file, time.time() - st)) return err, effect_file def mix(self, cid, work_dir, svc_file, effect_file): """ 做音效以及合并 :param cid: :param work_dir: :param svc_file: :param effect_file: :return: err_code, 完成的mp3文件 """ st = time.time() err = self.prepare_env(cid, work_dir) if err != gs_err_code_success: return err, None logging.info("cid={},effect_and_mix,{},sp={}".format(self.cid, svc_file, time.time() - st)) # 拉伸 st = time.time() volume_path = str(effect_file).replace(".wav", "_dv.wav") cmd = "{} {} {} {}".format(gs_draw_volume_exe, effect_file, self.vocal_path, volume_path) os.system(cmd) if not os.path.exists(volume_path): print("{} {} ERROR draw volume".format(self.cid, volume_path)) return gs_err_code_volume_err, None effect_file = volume_path logging.info("cid={},draw_volume,{},sp={}".format(self.cid, svc_file, time.time() - st)) # 替换 st = time.time() try: self.replace_vocal_frame_inst.process(svc_file, effect_file, self.vocal_path) except Exception as ex: logging.info("{},replace_vocal_frame_inst, {}", self.cid, ex) return gs_err_code_replace_except_err, None dst_path = effect_file + "_replace.wav" if not os.path.exists(dst_path): return gs_err_code_replace_err, None logging.info("cid={},replace_vocal_frame_inst,{},sp={}".format(self.cid, svc_file, time.time() - st)) # 转码 st = time.time() dst_path_442 = dst_path.replace("_replace.wav", "_replace442.wav") cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(dst_path, dst_path_442) os.system(cmd) if not os.path.exists(dst_path_442): return gs_err_code_replace_trans_err, None logging.info("cid={},transcode,{},sp={}".format(self.cid, svc_file, time.time() - st)) # 合并转码后再做一次拉伸,保证响度 st = time.time() volume_path = str(dst_path_442).replace("_replace442.wav", "_replace442_dv.wav") cmd = "{} {} {} {}".format(gs_draw_volume_exe, dst_path_442, self.vocal_path, volume_path) os.system(cmd) if not os.path.exists(volume_path): print("{} {} ERROR draw volume".format(self.cid, volume_path)) return gs_err_code_volume_err, None dst_path_442 = volume_path logging.info("cid={},draw_volume2,{},sp={}".format(self.cid, svc_file, time.time() - st)) # 混合 st = time.time() mix_path = dst_path_442.replace("_replace442_dv.wav", "_replace442_dv_mix.wav") cmd = "{} {} {} {}".format(gs_simple_mixer_path, dst_path_442, self.acc_path, mix_path) os.system(cmd) if not os.path.exists(mix_path): return gs_err_code_mix_err, None logging.info("cid={},mixer,{},sp={}".format(self.cid, svc_file, time.time() - st)) # 编码为mp3 st = time.time() output_dir = os.path.join(self.work_dir, self.cid + "_out") if not os.path.exists(output_dir): os.makedirs(output_dir) name = str(mix_path).replace("_replace442_dv_mix.wav", "_replace442_dv_mix.mp3").split("/")[-1] mix_path_mp3 = os.path.join(output_dir, name) cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(mix_path, mix_path_mp3) print(cmd) os.system(cmd) if not os.path.exists(mix_path_mp3): return gs_err_code_mix_transcode_err, None logging.info("cid={},encode,{},sp={}".format(self.cid, svc_file, time.time() - st)) return gs_err_code_success, mix_path_mp3 def get_gender(self, svc_file): return self.speakers2gender[int(os.path.basename(svc_file).split("_")[1])] def process_one_logic(self, cid, work_dir): """ 搞成两部分: 1. 分离数据+5次推理,获取最佳结果,并保存 2. 利用最佳结果做音效以及合并 :return: """ err, svc_file = self.generate_svc_file(cid, work_dir) gender = -1 if err != gs_err_code_success: return err, svc_file, gender, gender = self.get_gender(svc_file) err, effect_file = self.effect(cid, work_dir, svc_file) if err != gs_err_code_success: return err, svc_file, gender err, mix_mp3_path = self.mix(cid, work_dir, svc_file, effect_file) return err, mix_mp3_path, gender def test(): arr = [ # "611752105020343687", # "611752105023532439", # "611752105030419688", - "611752105030485748", + # "611752105030485748", + "611752105030485685" ] base_dir = "/data/rsync/jianli.yang/AutoCoverTool/data/test" s_inst = SongCoverInference() for cid in arr: st = time.time() # err, mix_mp3, gender = s_inst.process_one(cid, os.path.join(base_dir, cid), False) err, mix_mp3, gender = s_inst.process_one_logic(cid, os.path.join(base_dir, cid)) print(mix_mp3, gender) print("cid={} RealFinish err={} sp={}".format(cid, err, time.time() - st)) if __name__ == '__main__': test() diff --git a/AutoCoverTool/online/inference_worker.py b/AutoCoverTool/online/inference_worker.py index 3ad3882..f6e66e0 100644 --- a/AutoCoverTool/online/inference_worker.py +++ b/AutoCoverTool/online/inference_worker.py @@ -1,239 +1,240 @@ """ 离线worker 数据库字段要求: // 其中state的状态 // 0:默认,1:被取走,<0异常情况,2完成 // 超时到一定程度也会被重新放回来 数据库格式: id,song_id,url,state,svc_url,create_time,update_time,gender 启动时的环境要求: export PATH=$PATH:/data/gpu_env_common/env/bin/ffmpeg/bin export PYTHONPATH=$PWD:$PWD/ref/music_remover/demucs:$PWD/ref/so_vits_svc:$PWD/ref/split_dirty_frame """ import os import shutil import logging import multiprocessing as mp from online.inference_one import * from online.common import * gs_actw_err_code_download_err = 10001 gs_actw_err_code_trans_err = 10002 gs_actw_err_code_upload_err = 10003 gs_state_default = 0 gs_state_use = 1 gs_state_finish = 2 GS_REGION = "ap-singapore" GS_BUCKET_NAME = "starmaker-sg-1256122840" # GS_COSCMD = "/bin/coscmd" GS_COSCMD = "/opt/soft/anaconda3/bin/coscmd" GS_RES_DIR = "/srv/dreambooth_worker_resource" GS_CONFIG_PATH = os.path.join(GS_RES_DIR, ".online_cos.conf") def exec_cmd(cmd): ret = os.system(cmd) if ret != 0: return False return True def exec_cmd_and_result(cmd): r = os.popen(cmd) text = r.read() r.close() return text def upload_file2cos(key, file_path, region=GS_REGION, bucket_name=GS_BUCKET_NAME): """ 将文件上传到cos :param key: 桶上的具体地址 :param file_path: 本地文件地址 :param region: 区域 :param bucket_name: 桶地址 :return: """ cmd = "{} -c {} -r {} -b {} upload {} {}".format(GS_COSCMD, GS_CONFIG_PATH, region, bucket_name, file_path, key) + print(cmd) if exec_cmd(cmd): cmd = "{} -c {} -r {} -b {} info {}".format(GS_COSCMD, GS_CONFIG_PATH, region, bucket_name, key) \ + "| grep Content-Length |awk \'{print $2}\'" res_str = exec_cmd_and_result(cmd) logging.info("{},res={}".format(key, res_str)) size = float(res_str) if size > 0: return True return False return False def post_process_err_callback(msg): print("ERROR|post_process|task_error_callback:", msg) def effect(queue, finish_queue): """ 1. 添加音效 2. 混音 3. 上传到服务端 :return: """ inst = SongCoverInference() while True: logging.info("effect start get...") data = queue.get() song_id, work_dir, svc_file, gender = data logging.info("effect:{},{},{},{}".format(song_id, work_dir, svc_file, gender)) err, effect_file = inst.effect(song_id, work_dir, svc_file) msg = [song_id, err, svc_file, effect_file, gender] logging.info("effect,finish:cid={},state={},svc_file={},effect_file={},gender={}". \ format(song_id, err, svc_file, effect_file, gender)) finish_queue.put(msg) class AutoCoverToolWorker: def __init__(self): self.base_dir = "/tmp" self.work_dir = "" self.inst = SongCoverInference() def update_state(self, song_id, state): sql = "update svc_queue_table set state={},update_time={} where song_id = {}". \ format(state, int(time.time()), song_id) banned_user_map['db'] = "av_db" update_db(sql, banned_user_map) def get_one_data(self): - sql = "select song_id, url from svc_queue_table where state = 0 order by create_time desc limit 1" + sql = "select song_id, url from svc_queue_table where state = 0 and song_src=1 order by create_time desc limit 1" banned_user_map["db"] = "av_db" data = get_data_by_mysql(sql, banned_user_map) if len(data) == 0: return None, None song_id, song_url = data[0] if song_id != "": self.update_state(song_id, gs_state_use) return str(song_id), song_url def pre_process(self, work_dir, song_url): """ 创建文件夹,下载数据 :return: """ ext = str(song_url).split(".")[-1] dst_file = "{}/src_origin.{}".format(work_dir, ext) cmd = "wget {} -O {}".format(song_url, dst_file) print(cmd) os.system(cmd) if not os.path.exists(dst_file): return gs_actw_err_code_download_err dst_mp3_file = "{}/src.mp3".format(work_dir) cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} ".format(dst_file, dst_mp3_file) os.system(cmd) if not os.path.exists(dst_mp3_file): return gs_actw_err_code_trans_err return gs_err_code_success def post_process(self, msg): song_id, err, svc_file, effect_file, gender = msg work_dir = os.path.join(self.base_dir, str(song_id)) if err != gs_err_code_success: self.update_state(song_id, -err) return # 替换和混音 err, mix_path_mp3 = self.inst.mix(song_id, work_dir, svc_file, effect_file) logging.info( "post_process:song_id={},work_dir={},svc_file={},gender={}".format(song_id, work_dir, svc_file, gender)) svc_url = None state = gs_state_finish if err != gs_err_code_success: state = -err else: # 上传到cos mix_name = os.path.basename(mix_path_mp3) key = "av_res/svc_res/{}".format(mix_name) if not upload_file2cos(key, mix_path_mp3): state = -err else: state = gs_state_finish svc_url = key logging.info("upload_file2cos:song_id={},key={},mix_path_mp3={}".format(song_id, key, mix_path_mp3)) # 更新数据库 if state != gs_state_finish: self.update_state(song_id, state) return sql = "update svc_queue_table set state={},update_time={},svc_url=\"{}\",gender={} where song_id = {}". \ format(gs_state_finish, int(time.time()), svc_url, gender, song_id) logging.info("post_process:song_id={},sql={}".format(song_id, sql)) banned_user_map['db'] = "av_db" update_db(sql, banned_user_map) def process(self): logging.info("start_process....") worker_num = 4 worker_queue = mp.Manager().Queue(maxsize=int(worker_num * 1.5)) finish_queue = mp.Manager().Queue(maxsize=int(worker_num * 1.5)) pool = mp.Pool(processes=worker_num) for i in range(worker_num): pool.apply_async(effect, args=(worker_queue, finish_queue), error_callback=post_process_err_callback) while True: # 将堆积的内容处理一遍 while finish_queue.qsize() > 0: msg = finish_queue.get(timeout=1) self.post_process(msg) song_id, err, svc_file, effect_file, gender = msg work_dir = os.path.join(self.base_dir, str(song_id)) logging.info("clear = song_id={},work_dir={}".format(song_id, work_dir)) shutil.rmtree(work_dir) song_id, song_url = self.get_one_data() logging.info("\n\nget_one_data = {},{}".format(song_id, song_url)) if song_id is None: time.sleep(5) continue # 创建空间 work_dir = os.path.join(self.base_dir, str(song_id)) if os.path.exists(work_dir): shutil.rmtree(work_dir) os.makedirs(work_dir) logging.info("song_id={},work_dir={},finish".format(song_id, work_dir)) # 预处理 err = self.pre_process(work_dir, song_url) if err != gs_err_code_success: self.update_state(song_id, -err) shutil.rmtree(work_dir) continue logging.info("song_id={},work_dir={},pre_process".format(song_id, work_dir)) # 获取svc数据 err, svc_file = self.inst.generate_svc_file(song_id, work_dir) if err != gs_err_code_success: self.update_state(song_id, -err) shutil.rmtree(work_dir) continue logging.info("song_id={},work_dir={},generate_svc_file".format(song_id, work_dir)) # 做音效处理的异步代码 gender = self.inst.get_gender(svc_file) worker_queue.put([song_id, work_dir, svc_file, gender]) logging.info("song_id={},work_dir={},svc_file={},gender={}".format(song_id, work_dir, svc_file, gender)) pool.close() pool.join() if __name__ == '__main__': actw = AutoCoverToolWorker() actw.process() diff --git a/AutoCoverTool/online/tone_shift_one.py b/AutoCoverTool/online/tone_shift_one.py new file mode 100644 index 0000000..2abe3e0 --- /dev/null +++ b/AutoCoverTool/online/tone_shift_one.py @@ -0,0 +1,173 @@ +""" +变调的方式做处理 +1. 下载 +2. 分离 +3. 针对于人声变调+2,伴奏+1 +4. 合成 +""" + +import os +import shutil +import logging +from ref.music_remover.separate_interface import SeparateInterface +from online.inference_worker import upload_file2cos, gs_state_use, gs_state_finish, gs_state_default +from online.common import * + +logging.basicConfig(filename='/tmp/tone_shift_one.log', level=logging.INFO) + +gs_tone_shift_exe = "/opt/soft/bin/tone_shift_exe" +gs_simple_mixer_path = "/opt/soft/bin/simple_mixer" + +gs_err_code_success = 0 +gs_err_code_tone_shift = 1 +gs_err_code_mix = 2 +gs_err_code_transcode = 3 +gs_err_code_upload = 4 +gs_err_code_download = 5 +gs_err_code_trans_to_mp3 = 6 +gs_err_code_separate = 7 + + +class ToneShift: + def __init__(self): + self.separate_inst = SeparateInterface() + + def update_state(self, song_id, state): + sql = "update svc_queue_table set state={},update_time={} where song_id = {}". \ + format(state, int(time.time()), song_id) + banned_user_map['db'] = "av_db" + update_db(sql, banned_user_map) + + def get_one_data(self): + sql = "select song_id, url from svc_queue_table where state = 0 and song_src=3 order by create_time desc limit 1" + banned_user_map["db"] = "av_db" + data = get_data_by_mysql(sql, banned_user_map) + if len(data) == 0: + return None, None + song_id, song_url = data[0] + if song_id != "": + self.update_state(song_id, gs_state_use) + return str(song_id), song_url + + def pre_process(self, work_dir, song_url): + """ + 创建文件夹,下载数据 + :return: + """ + + ext = str(song_url).split(".")[-1] + dst_file = "{}/src_origin.{}".format(work_dir, ext) + cmd = "wget {} -O {}".format(song_url, dst_file) + print(cmd) + os.system(cmd) + if not os.path.exists(dst_file): + return gs_err_code_download + dst_mp3_file = "{}/src.mp3".format(work_dir) + cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} ".format(dst_file, dst_mp3_file) + os.system(cmd) + if not os.path.exists(dst_mp3_file): + return gs_err_code_trans_to_mp3 + return gs_err_code_success + + def tone_shift_one(self, in_file, dst_file, pitch): + cmd = "{} {} {} {}".format(gs_tone_shift_exe, in_file, dst_file, pitch) + os.system(cmd) + return os.path.exists(dst_file) + + def mix(self, cid, vocal_path, acc_path, tp): + if tp == 1: + vocal_pitch = 2 + acc_pitch = 0 + else: + vocal_pitch = -2 + acc_pitch = 0 + + vocal_path_2 = vocal_path.replace(".wav", "_{}.wav".format(vocal_pitch)) + acc_path_2 = acc_path.replace(".wav", "_{}.wav".format(acc_pitch)) + err = self.tone_shift_one(vocal_path, vocal_path_2, vocal_pitch) + if not err: + return gs_err_code_tone_shift, None + + err = self.tone_shift_one(acc_path, acc_path_2, acc_pitch) + if not err: + return gs_err_code_tone_shift, None + + base_dir = os.path.dirname(vocal_path) + mix_path = "{}/mix_{}_{}.wav".format(base_dir, vocal_pitch, acc_pitch) + cmd = "{} {} {} {}".format(gs_simple_mixer_path, vocal_path_2, acc_path_2, mix_path) + print("exec_cmd={}".format(cmd)) + os.system(cmd) + + if not os.path.exists(mix_path): + return gs_err_code_mix, None + + # 转码 + mix_path_mp3 = mix_path.replace(".wav", ".mp3") + cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(mix_path, mix_path_mp3) + os.system(cmd) + if not os.path.exists(mix_path_mp3): + return gs_err_code_transcode, None + + # 上传到cos + mix_name = os.path.basename(mix_path_mp3) + key = "av_res/svc_res_tone_shift/{}/{}".format(str(cid), mix_name) + if not upload_file2cos(key, mix_path_mp3): + return gs_err_code_upload, None + return gs_err_code_success, key + + def process_one(self, cid, work_dir): + """ + :param cid: + :param work_dir: + :return: + """ + src_mp3 = os.path.join(work_dir, "src.mp3") + vocal_path = os.path.join(work_dir, "vocal.wav") + acc_path = os.path.join(work_dir, "acc.wav") + if not self.separate_inst.process(cid, src_mp3, vocal_path, acc_path): + return gs_err_code_separate, [] + if not os.path.exists(vocal_path) or not os.path.exists(acc_path): + return gs_err_code_separate, [] + + err, type1_mix_mp3 = self.mix(cid, vocal_path, acc_path, 1) + if err != gs_err_code_success: + return err, [] + + err, type2_mix_mp3 = self.mix(cid, vocal_path, acc_path, 2) + if err != gs_err_code_success: + return err, [] + return gs_err_code_success, [type1_mix_mp3, type2_mix_mp3] + + def process_worker(self): + logging.info("start process_worker .....") + base_dir = "/tmp/tone_shift_one" + if not os.path.exists(base_dir): + os.makedirs(base_dir) + while True: + worker_st = time.time() + cid, song_url = self.get_one_data() + work_dir = os.path.join(base_dir, str(cid)) + if os.path.exists(work_dir): + shutil.rmtree(work_dir) + os.makedirs(work_dir) + err = self.pre_process(work_dir, song_url) + if err != gs_err_code_success: + return err + + st = time.time() + err, data = self.process_one(str(cid), work_dir) + logging.info("process_finish,{},{}".format(cid, time.time() - st)) + if err == gs_err_code_success and len(data) == 2: + sql = "update svc_queue_table set state={},update_time={},svc_url=\"{}\" where song_id = {}". \ + format(gs_state_finish, int(time.time()), ",".join(data), str(cid)) + banned_user_map['db'] = "av_db" + update_db(sql, banned_user_map) + else: + self.update_state(str(cid), -err) + shutil.rmtree(work_dir) + logging.info("process_finish,{},{}".format(cid, time.time() - worker_st)) + + +if __name__ == '__main__': + ts = ToneShift() + ts.process_worker() diff --git a/AutoCoverTool/ref/tools/mixer/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/CMakeLists.txt index 1be8553..3d95d10 100644 --- a/AutoCoverTool/ref/tools/mixer/CMakeLists.txt +++ b/AutoCoverTool/ref/tools/mixer/CMakeLists.txt @@ -1,116 +1,122 @@ cmake_minimum_required(VERSION 2.8) project(mixer) set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib) #����lib���ɵ�Ŀ¼ set(CMAKE_CXX_STANDARD 11) include_directories(alimter/inc) include_directories(waves/inc) include_directories(ebur128/inc) include_directories(audio_mixer/inc) include_directories(audio_effects_lib/inc audio_effects_lib/example) include_directories(denoise/webrtc/include) add_subdirectory(denoise) # 引入audio_effects_lib include_directories(audio_effects_lib) include_directories(audio_effects_lib/src) include_directories(audio_effects_lib/inc) include_directories(audio_effects_lib/common) include_directories(audio_effects_lib/ref) include_directories(audio_effects_lib/ref/al_reverb/inc) include_directories(audio_effects_lib/ref/al_reverb/src) include_directories(audio_effects_lib/ref/autotune/inc) include_directories(audio_effects_lib/ref/autotune/src) include_directories(audio_effects_lib/ref/iir_eq/inc) include_directories(audio_effects_lib/ref/iir_eq/src) include_directories(audio_effects_lib/ref/phonograph/inc) include_directories(audio_effects_lib/ref/phonograph/src) include_directories(audio_effects_lib/ref/reverb/inc) include_directories(audio_effects_lib/ref/reverb/src) include_directories(audio_effects_lib/ref/saudio_effects/inc) include_directories(audio_effects_lib/ref/saudio_effects/src) include_directories(audio_effects_lib/ref/slow_flanging/inc) include_directories(audio_effects_lib/ref/slow_flanging/src) include_directories(audio_effects_lib/ref/tone_shift/inc) include_directories(audio_effects_lib/ref/tone_shift/src) include_directories(audio_effects_lib/ref/common) include_directories(audio_effects_lib/ref/al_reverb/src/biquad_filters) include_directories(audio_effects_lib/ref/al_reverb/src/fast_delay) include_directories(audio_effects_lib/ref/al_reverb/src/filter) include_directories(audio_effects_lib/ref/al_reverb/src/AlReverbApi.cpp) include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb) include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_biquad) include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_common) include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_early_reflection) include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_echo) include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_late_allpass) include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_late_lowpass) include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_late_reverb) include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_modulation) include_directories(audio_effects_lib/ref/iir_eq/src/audacious_arma) include_directories(audio_effects_lib/ref/iir_eq/src/audacious_eq) include_directories(audio_effects_lib/ref/saudio_effects/src/all_plat audio_effects_lib/ref/saudio_effects/src/audio_effect audio_effects_lib/ref/saudio_effects/src/biquad audio_effects_lib/ref/saudio_effects/src/buffer audio_effects_lib/ref/saudio_effects/src/damper audio_effects_lib/ref/saudio_effects/src/delay audio_effects_lib/ref/saudio_effects/src/delayi audio_effects_lib/ref/saudio_effects/src/envelope_follower audio_effects_lib/ref/saudio_effects/src/equalizer audio_effects_lib/ref/saudio_effects/src/reverb audio_effects_lib/ref/saudio_effects/src/simple_delay_effect audio_effects_lib/ref/saudio_effects/src/simple_reverb_effect) include_directories(audio_effects_lib/ref/tone_shift/src/aa_filter) include_directories(audio_effects_lib/ref/tone_shift/src/bpm_detect) include_directories(audio_effects_lib/ref/tone_shift/src/cpu_detect) include_directories(audio_effects_lib/ref/tone_shift/src/fifo_sample_buffer) include_directories(audio_effects_lib/ref/tone_shift/src/fir_filter) include_directories(audio_effects_lib/ref/tone_shift/src/peak_finder) include_directories(audio_effects_lib/ref/tone_shift/src/rate_transposer) include_directories(audio_effects_lib/ref/tone_shift/src/sound_touch) include_directories(audio_effects_lib/ref/tone_shift/src/td_stretch) include_directories(audio_effects_lib/ref/supersound/inc) include_directories(audio_effects_lib/ref/supersound/src) include_directories(audio_effects_lib/ref/supersound/src/common) include_directories(audio_effects_lib/ref/supersound/src/impulse) include_directories(audio_effects_lib/ref/supersound/ref) include_directories(audio_effects_lib/ref/supersound/ref/kiss_fft) AUX_SOURCE_DIRECTORY(audio_effects_lib/common AE_SRC_COMMON_DIR) file(GLOB_RECURSE AE_CPP_SRC_DIR audio_effects_lib/src/*cpp) file(GLOB_RECURSE AE_CPP_REF_DIR audio_effects_lib/ref/*cpp) file(GLOB_RECURSE AE_C_REF_DIR audio_effects_lib/ref/*c) include_directories(audio_effects_lib/ref/waves/inc) list(REMOVE_ITEM AE_CPP_REF_DIR "${CMAKE_CURRENT_SOURCE_DIR}/audio_effects_lib/ref/audio_resample/src/FfmpegResampler.cpp") AUX_SOURCE_DIRECTORY(alimter/src DIR_ALIMTER_SRCS) AUX_SOURCE_DIRECTORY(waves/src DIR_WAVES_SRCS) AUX_SOURCE_DIRECTORY(ebur128/src DIR_EBUR128_SRCS) AUX_SOURCE_DIRECTORY(audio_mixer/src DIR_AUDIO_MIXER_SRCS) #add_executable(mixer main.cpp ${DIR_ALIMTER_SRCS} ${DIR_WAVES_SRCS} ${DIR_EBUR128_SRCS} ${DIR_AUDIO_MIXER_SRCS} # ${AE_CPP_SRC_DIR} ${AE_CPP_REF_DIR} ${AE_C_REF_DIR} ${AE_SRC_COMMON_DIR} # audio_effects_lib/example/ae_server/CAeServer.cpp) #target_link_libraries(mixer ${LIBRARY_OUTPUT_PATH}/libwebrtc.a -lpthread) # 音量拉伸 add_executable(draw_volume draw_volume.cpp ${DIR_ALIMTER_SRCS} ${DIR_WAVES_SRCS} ${DIR_EBUR128_SRCS} ${DIR_AUDIO_MIXER_SRCS} ${AE_CPP_SRC_DIR} ${AE_CPP_REF_DIR} ${AE_C_REF_DIR} ${AE_SRC_COMMON_DIR} audio_effects_lib/example/ae_server/CAeServer.cpp) target_link_libraries(draw_volume ${LIBRARY_OUTPUT_PATH}/libwebrtc.a -lpthread) # 降噪 add_executable(denoise_exe denoise.cpp ${DIR_ALIMTER_SRCS} ${DIR_WAVES_SRCS} ${DIR_EBUR128_SRCS} ${DIR_AUDIO_MIXER_SRCS} ${AE_CPP_SRC_DIR} ${AE_CPP_REF_DIR} ${AE_C_REF_DIR} ${AE_SRC_COMMON_DIR} audio_effects_lib/example/ae_server/CAeServer.cpp) target_link_libraries(denoise_exe ${LIBRARY_OUTPUT_PATH}/libwebrtc.a -lpthread) # 简单的混合 add_executable(simple_mixer simple_mixer.cpp ${DIR_ALIMTER_SRCS} ${DIR_WAVES_SRCS} ${DIR_EBUR128_SRCS} ${DIR_AUDIO_MIXER_SRCS} ${AE_CPP_SRC_DIR} ${AE_CPP_REF_DIR} ${AE_C_REF_DIR} ${AE_SRC_COMMON_DIR} audio_effects_lib/example/ae_server/CAeServer.cpp) target_link_libraries(simple_mixer ${LIBRARY_OUTPUT_PATH}/libwebrtc.a -lpthread) # 音效 add_executable(im_effect_exe im_effect.cpp ${DIR_ALIMTER_SRCS} ${DIR_WAVES_SRCS} ${DIR_EBUR128_SRCS} ${DIR_AUDIO_MIXER_SRCS} ${AE_CPP_SRC_DIR} ${AE_CPP_REF_DIR} ${AE_C_REF_DIR} ${AE_SRC_COMMON_DIR} audio_effects_lib/example/ae_server/CAeServer.cpp) -target_link_libraries(im_effect_exe ${LIBRARY_OUTPUT_PATH}/libwebrtc.a -lpthread) \ No newline at end of file +target_link_libraries(im_effect_exe ${LIBRARY_OUTPUT_PATH}/libwebrtc.a -lpthread) + +# 变调 +add_executable(tone_shift_exe tone_shift.cpp ${DIR_ALIMTER_SRCS} ${DIR_WAVES_SRCS} ${DIR_EBUR128_SRCS} ${DIR_AUDIO_MIXER_SRCS} + ${AE_CPP_SRC_DIR} ${AE_CPP_REF_DIR} ${AE_C_REF_DIR} ${AE_SRC_COMMON_DIR} + audio_effects_lib/example/ae_server/CAeServer.cpp) +target_link_libraries(tone_shift_exe ${LIBRARY_OUTPUT_PATH}/libwebrtc.a -lpthread) \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/simple_mixer.cpp b/AutoCoverTool/ref/tools/mixer/simple_mixer.cpp index fdf1673..036d772 100644 --- a/AutoCoverTool/ref/tools/mixer/simple_mixer.cpp +++ b/AutoCoverTool/ref/tools/mixer/simple_mixer.cpp @@ -1,236 +1,238 @@ // // Created by yangjianli on 2019-09-09. // /** * 输入一个音频和伴奏自动进行混合 * gated_loudness 当前音量 * gain 预期增益 */ #include "iostream" #include "WaveFile.h" #include "math.h" #include "ebur128.h" #include "AudioMixer.h" #include "alimiter.h" #include "waves/inc/WaveFile.h" #include "CAudioEffectsChainApi.h" #include "string" #include "ae_server/CAeServer.h" #include #include #include #include #include #include "denoise/webrtc/include/WebrtcDenoise.h" #define PROC_LEN 1024 #define DEFAULT_BASELINE_DB (float)-14.57f int short2float(short *pInBuf, int nLen, float *pOutBuf) { for (int i = 0; i < nLen; i++) { pOutBuf[i] = pInBuf[i] * 1.0 / 32768; } return 0; } int float2short(float *pInBuf, int nLen, short *pOutBuf) { for (int i = 0; i < nLen; i++) { pOutBuf[i] = int(pInBuf[i] * 32768); } return 0; } /** * 获取增益 * @param nChannel * @param nSampleRate * @param pData * @param nLength * @param gain * @return */ int ebur128_whole(int nChannel, int nSampleRate, short *pData, const int nLength, double &gated_loudness, double &gain) { printf("ebur128_init start .. %d\n", nLength); ebur128_state *st = NULL; st = ebur128_init(nChannel, nSampleRate, EBUR128_MODE_I); if (NULL == st) { return -1; } int nPos = 0; int nTmpLength = 0; int nRet; printf("process start ..\n"); while (nPos < nLength) { nTmpLength = PROC_LEN; if (nLength - nPos < PROC_LEN) { nTmpLength = nLength - nPos; } nRet = ebur128_add_frames_short(st, pData + nPos, nTmpLength / nChannel); if (nRet != 0) { return -2; } nPos += nTmpLength; } printf("process ok..\n"); gated_loudness = -1; ebur128_loudness_global(st, &gated_loudness); float db = (DEFAULT_BASELINE_DB - gated_loudness) / 20.f; gain = pow(10, db); printf("gated_loudness = %f db = %f gain = %f\n", gated_loudness, db, gain); ebur128_destroy(&st); return 0; } /** * 混合音频和伴奏 * @param pVocalIn * @param pAccIn * @param nLength * @param gainVocal * @param gainAcc * @param pOutBuf * @return */ int mix(float *pVocalIn, float *pAccIn, int nLength, double gainVocal, double gainAcc, float *pOutBuf, int nSampleRate, int nChannel, int nDelay) { CAudioMixer *cAudioMixer = new CAudioMixer(); cAudioMixer->init(nSampleRate, nChannel); cAudioMixer->set_acc_delay(nDelay); cAudioMixer->set_vocal_volume(int(gainVocal * 50)); cAudioMixer->set_acc_volume(int(gainAcc * 50)); int nPos = 0; int nStep = 1024; float *fTmp = new float[nStep]; cAudioMixer->reset(); nPos = 0; nStep = 1024; int cnt = 0; while (nPos < nLength) { if (nLength - nPos < nStep) { nStep = nLength - nPos; } cnt++; cAudioMixer->process(pVocalIn + nPos, pAccIn + nPos, pOutBuf + nPos, nStep); nPos += nStep; } delete cAudioMixer; delete[] fTmp; return 0; } int denoise_webrtc(short *pInBuf, int nLength, int nChannel, int nSampleRate) { CWebrtcDenoise cWebrtcDenoise; cWebrtcDenoise.init(nSampleRate, nChannel); float *pTmp = new float[nLength]; for (int i = 0; i < nLength; i++) { pTmp[i] = pInBuf[i] * 1.0 / 32768; } cWebrtcDenoise.set_level(kHigh); int nStep = 512 * nChannel; for (int i = 0; i < nStep; i++) { pTmp[i] = pTmp[i] * i * 1.0 / nStep; } for (int i = 0, cnt = 0; i < nLength; i += nStep, cnt++) { if (nLength - i < nStep) continue; cWebrtcDenoise.process(pTmp + i, nStep); } for (int i = 0; i < nLength; i++) { pInBuf[i] = short(pTmp[i] * 32768); } delete[] pTmp; return 0; } double calc_power_rate(float *in_data, int32_t in_len, float *ref_data, int32_t ref_len) { double in_power = 0; double ref_power = 0; int32_t min_len = in_len > ref_len ? ref_len : in_len; for (int i = 0; i < min_len; i++) { in_power += (in_data[i]) * (in_data[i]); ref_power += (ref_data[i]) * (ref_data[i]); } return ref_power / in_power; } int main(int argc, char *argv[]) { if (argc != 4) { printf("input error! example: ./main vocal_path acc_path mix_path\n"); return -1; } std::string sVocal = argv[1]; std::string sAcc = argv[2]; std::string sMix = argv[3]; // 读取人声 CWaveFile *oWaveFile = new CWaveFile(sVocal.c_str(), false); float *pfVocalBuf = new float[oWaveFile->GetTotalFrames() * oWaveFile->GetChannels()]; oWaveFile->ReadFrameAsfloat(pfVocalBuf, oWaveFile->GetTotalFrames()); //读取伴奏 CWaveFile *oWaveFile1 = new CWaveFile(sAcc.c_str(), false); float *pfAccBuf = new float[oWaveFile1->GetTotalFrames() * oWaveFile1->GetChannels()]; oWaveFile1->ReadFrameAsfloat(pfAccBuf, oWaveFile1->GetTotalFrames()); if (oWaveFile->GetChannels() != oWaveFile1->GetChannels()) { printf("channel not equal!\n"); return -1; } // 混合音频和伴奏 - printf("mix wav:%s and acc:%s!\n", sVocal.c_str(), sAcc.c_str()); + printf("mix wav:%s and acc:%s! %d,%d\n", sVocal.c_str(), sAcc.c_str(), oWaveFile->GetTotalFrames(), oWaveFile1->GetTotalFrames()); int nOutLen = oWaveFile->GetTotalFrames() < oWaveFile1->GetTotalFrames() ? oWaveFile->GetTotalFrames() : oWaveFile1->GetTotalFrames(); + printf("XXXXXXX, %d,%d\n", nOutLen, oWaveFile->GetChannels()); nOutLen = nOutLen * oWaveFile->GetChannels(); + float *pOutBuf = new float[nOutLen]; mix(pfVocalBuf, pfAccBuf, nOutLen, 1.0, 1.0, pOutBuf, oWaveFile->GetSampleRate(), oWaveFile->GetChannels(), 0); //写入文件 printf("write2file nLength:%d path:%s!\n", nOutLen, sMix.c_str()); CWaveFile *oWaveFile2 = new CWaveFile(sMix.c_str(), true); oWaveFile2->SetSampleFormat(SF_IEEE_FLOAT); oWaveFile2->SetSampleRate(oWaveFile->GetSampleRate()); oWaveFile2->SetChannels(oWaveFile->GetChannels()); oWaveFile2->SetupDone(); oWaveFile2->WriteFrame(pOutBuf, nOutLen / oWaveFile->GetChannels()); delete oWaveFile; delete oWaveFile1; delete oWaveFile2; delete[] pfVocalBuf; delete[] pfAccBuf; delete[] pOutBuf; return 0; } \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/simple_mixer.cpp b/AutoCoverTool/ref/tools/mixer/tone_shift.cpp similarity index 69% copy from AutoCoverTool/ref/tools/mixer/simple_mixer.cpp copy to AutoCoverTool/ref/tools/mixer/tone_shift.cpp index fdf1673..7e4e410 100644 --- a/AutoCoverTool/ref/tools/mixer/simple_mixer.cpp +++ b/AutoCoverTool/ref/tools/mixer/tone_shift.cpp @@ -1,236 +1,245 @@ // // Created by yangjianli on 2019-09-09. // /** * 输入一个音频和伴奏自动进行混合 * gated_loudness 当前音量 * gain 预期增益 */ #include "iostream" #include "WaveFile.h" #include "math.h" #include "ebur128.h" #include "AudioMixer.h" #include "alimiter.h" #include "waves/inc/WaveFile.h" #include "CAudioEffectsChainApi.h" #include "string" #include "ae_server/CAeServer.h" #include #include #include #include #include #include "denoise/webrtc/include/WebrtcDenoise.h" #define PROC_LEN 1024 #define DEFAULT_BASELINE_DB (float)-14.57f int short2float(short *pInBuf, int nLen, float *pOutBuf) { for (int i = 0; i < nLen; i++) { pOutBuf[i] = pInBuf[i] * 1.0 / 32768; } return 0; } int float2short(float *pInBuf, int nLen, short *pOutBuf) { for (int i = 0; i < nLen; i++) { pOutBuf[i] = int(pInBuf[i] * 32768); } return 0; } /** * 获取增益 * @param nChannel * @param nSampleRate * @param pData * @param nLength * @param gain * @return */ int ebur128_whole(int nChannel, int nSampleRate, short *pData, const int nLength, double &gated_loudness, double &gain) { printf("ebur128_init start .. %d\n", nLength); ebur128_state *st = NULL; st = ebur128_init(nChannel, nSampleRate, EBUR128_MODE_I); if (NULL == st) { return -1; } int nPos = 0; int nTmpLength = 0; int nRet; printf("process start ..\n"); while (nPos < nLength) { nTmpLength = PROC_LEN; if (nLength - nPos < PROC_LEN) { nTmpLength = nLength - nPos; } nRet = ebur128_add_frames_short(st, pData + nPos, nTmpLength / nChannel); if (nRet != 0) { return -2; } nPos += nTmpLength; } printf("process ok..\n"); gated_loudness = -1; ebur128_loudness_global(st, &gated_loudness); float db = (DEFAULT_BASELINE_DB - gated_loudness) / 20.f; gain = pow(10, db); printf("gated_loudness = %f db = %f gain = %f\n", gated_loudness, db, gain); ebur128_destroy(&st); return 0; } /** * 混合音频和伴奏 * @param pVocalIn * @param pAccIn * @param nLength * @param gainVocal * @param gainAcc * @param pOutBuf * @return */ int mix(float *pVocalIn, float *pAccIn, int nLength, double gainVocal, double gainAcc, float *pOutBuf, - int nSampleRate, int nChannel, int nDelay) + int nSampleRate, int nChannel, int nDelay, std::string effect_file) { CAudioMixer *cAudioMixer = new CAudioMixer(); cAudioMixer->init(nSampleRate, nChannel); cAudioMixer->set_acc_delay(nDelay); cAudioMixer->set_vocal_volume(int(gainVocal * 50)); cAudioMixer->set_acc_volume(int(gainAcc * 50)); int nPos = 0; int nStep = 1024; float *fTmp = new float[nStep]; cAudioMixer->reset(); nPos = 0; nStep = 1024; int cnt = 0; + CAeServer cAeServer; + cAeServer.init(nSampleRate, nChannel, nStep / nChannel); + AE_PARAMS_IM_EFFECT im_params = { + .effect_path = effect_file, + }; + cAeServer.set_params(AE_TYPE_IM_EFFECT, (void *) &im_params); + while (nPos < nLength) { if (nLength - nPos < nStep) { nStep = nLength - nPos; } cnt++; + cAeServer.process(pVocalIn + nPos, pVocalIn + nPos, nStep); cAudioMixer->process(pVocalIn + nPos, pAccIn + nPos, pOutBuf + nPos, nStep); nPos += nStep; } + cAeServer.uninit(); delete cAudioMixer; delete[] fTmp; return 0; } int denoise_webrtc(short *pInBuf, int nLength, int nChannel, int nSampleRate) { CWebrtcDenoise cWebrtcDenoise; cWebrtcDenoise.init(nSampleRate, nChannel); float *pTmp = new float[nLength]; for (int i = 0; i < nLength; i++) { pTmp[i] = pInBuf[i] * 1.0 / 32768; } cWebrtcDenoise.set_level(kHigh); int nStep = 512 * nChannel; for (int i = 0; i < nStep; i++) { pTmp[i] = pTmp[i] * i * 1.0 / nStep; } for (int i = 0, cnt = 0; i < nLength; i += nStep, cnt++) { if (nLength - i < nStep) continue; cWebrtcDenoise.process(pTmp + i, nStep); } for (int i = 0; i < nLength; i++) { pInBuf[i] = short(pTmp[i] * 32768); } delete[] pTmp; return 0; } double calc_power_rate(float *in_data, int32_t in_len, float *ref_data, int32_t ref_len) { double in_power = 0; double ref_power = 0; int32_t min_len = in_len > ref_len ? ref_len : in_len; for (int i = 0; i < min_len; i++) { in_power += (in_data[i]) * (in_data[i]); ref_power += (ref_data[i]) * (ref_data[i]); } return ref_power / in_power; } int main(int argc, char *argv[]) { if (argc != 4) { - printf("input error! example: ./main vocal_path acc_path mix_path\n"); + printf("input error! example: ./main vocal_path dst_path pitch!\n"); return -1; } - std::string sVocal = argv[1]; - std::string sAcc = argv[2]; - std::string sMix = argv[3]; + std::string vocal_path = argv[1]; + std::string dst_path = argv[2]; + float pitch = strtod(argv[3], NULL); // 读取人声 - CWaveFile *oWaveFile = new CWaveFile(sVocal.c_str(), false); - float *pfVocalBuf = new float[oWaveFile->GetTotalFrames() * oWaveFile->GetChannels()]; - oWaveFile->ReadFrameAsfloat(pfVocalBuf, oWaveFile->GetTotalFrames()); + CWaveFile *oWaveFile = new CWaveFile(vocal_path.c_str(), false); + float *pVocalBuf = new float[oWaveFile->GetTotalFrames() * oWaveFile->GetChannels()]; + oWaveFile->ReadFrameAsfloat(pVocalBuf, oWaveFile->GetTotalFrames()); - //读取伴奏 - CWaveFile *oWaveFile1 = new CWaveFile(sAcc.c_str(), false); - float *pfAccBuf = new float[oWaveFile1->GetTotalFrames() * oWaveFile1->GetChannels()]; - oWaveFile1->ReadFrameAsfloat(pfAccBuf, oWaveFile1->GetTotalFrames()); - if (oWaveFile->GetChannels() != oWaveFile1->GetChannels()) + int nStep = 1024; + int nLength = oWaveFile->GetTotalFrames() * oWaveFile->GetChannels(); + CAeServer cAeServer; + cAeServer.init(oWaveFile->GetSampleRate(), oWaveFile->GetChannels(), nStep / oWaveFile->GetChannels()); + AEToneShiftParam ae_param; + ae_param.max_shift = 12; + ae_param.min_shift = -12; + ae_param.tone_shift = pitch; + + cAeServer.set_params(AE_TYPE_TONE_SHIFT, &ae_param); + int nPos = 0; + while (nPos < nLength) { - printf("channel not equal!\n"); - return -1; + if (nLength - nPos < nStep) + { + nStep = nLength - nPos; + } + cAeServer.process(pVocalBuf + nPos, pVocalBuf + nPos, nStep); + nPos += nStep; } - - // 混合音频和伴奏 - printf("mix wav:%s and acc:%s!\n", sVocal.c_str(), sAcc.c_str()); - int nOutLen = oWaveFile->GetTotalFrames() < oWaveFile1->GetTotalFrames() ? oWaveFile->GetTotalFrames() - : oWaveFile1->GetTotalFrames(); - nOutLen = nOutLen * oWaveFile->GetChannels(); - float *pOutBuf = new float[nOutLen]; - - mix(pfVocalBuf, pfAccBuf, nOutLen, 1.0, 1.0, pOutBuf, oWaveFile->GetSampleRate(), oWaveFile->GetChannels(), 0); - + cAeServer.uninit(); //写入文件 - printf("write2file nLength:%d path:%s!\n", nOutLen, sMix.c_str()); - CWaveFile *oWaveFile2 = new CWaveFile(sMix.c_str(), true); - oWaveFile2->SetSampleFormat(SF_IEEE_FLOAT); - oWaveFile2->SetSampleRate(oWaveFile->GetSampleRate()); - oWaveFile2->SetChannels(oWaveFile->GetChannels()); - oWaveFile2->SetupDone(); - oWaveFile2->WriteFrame(pOutBuf, nOutLen / oWaveFile->GetChannels()); + printf("write2file nLength:%d path:%s!\n", oWaveFile->GetTotalFrames() * oWaveFile->GetChannels(), + dst_path.c_str()); + + CWaveFile out_wav = CWaveFile(dst_path.c_str(), true); + out_wav.SetChannels(oWaveFile->GetChannels()); + out_wav.SetSampleRate(oWaveFile->GetSampleRate()); + out_wav.SetSampleFormat(SF_IEEE_FLOAT); + out_wav.SetupDone(); + out_wav.WriteFrame(pVocalBuf, oWaveFile->GetTotalFrames()); delete oWaveFile; - delete oWaveFile1; - delete oWaveFile2; - delete[] pfVocalBuf; - delete[] pfAccBuf; - delete[] pOutBuf; + delete[] pVocalBuf; return 0; } \ No newline at end of file diff --git a/AutoCoverTool/script/get_song_url.py b/AutoCoverTool/script/get_song_url.py index 8775278..6327796 100644 --- a/AutoCoverTool/script/get_song_url.py +++ b/AutoCoverTool/script/get_song_url.py @@ -1,347 +1,379 @@ """ 获取歌曲的地址 + +# song_src=2 是来源108和109的歌曲,未被洗过的 +# song_src=1 是曲库给的 +# song_src=3 # 用于轻变调的 """ from script.common import * from copy import deepcopy from online.common import update_db def get_url_by_song_id(song_id): sql = "select task_url,starmaker_songid from silence where starmaker_songid = {} order by task_id limit 1".format( song_id) ban = deepcopy(banned_user_map) ban["db"] = "starmaker_musicbook" data = get_data_by_mysql(sql, ban) if len(data) > 0: return data[0][0] return None def process(): arr = [ - "611752105016642206", - "611752105016665828", - "611752105020332340", - "611752105020332347", - "611752105020390931", - "611752105020417679", - "611752105021442334", - "611752105021459528", - "611752105021810110", - "611752105021916843", - "611752105022312180", - "611752105022614734", - "611752105022615541", - "611752105022615665", - "611752105022616931", - "611752105022647066", - "611752105022647087", - "611752105022652047", - "611752105022700847", - "611752105022728649", - "611752105022728653", - "611752105022729255", - "611752105022733605", - "611752105022736029", - "611752105022740011", - "611752105022742166", - "611752105022743986", - "611752105022746848", - "611752105022748944", - "611752105022749272", - "611752105022749768", - "611752105022751902", - "611752105022752248", - "611752105022754267", - "611752105022755405", - "611752105022757577", - "611752105022758309", - "611752105022758395", - "611752105022761145", - "611752105022761151", - "611752105022764435", - "611752105022764965", - "611752105022766106", - "611752105022766675", - "611752105022774258", - "611752105022776257", - "611752105022776285", - "611752105022776577", - "611752105022776846", - "611752105022777151", - "611752105022777306", - "611752105022778977", - "611752105022779055", - "611752105022779169", - "611752105022779962", - "611752105022780648", - "611752105022784727", - "611752105022785048", - "611752105022785179", - "611752105022785551", - "611752105022811718", - "611752105022814368", - "611752105022815931", - "611752105022819519", - "611752105022824948", - "611752105022828998", - "611752105022833822", - "611752105022835250", - "611752105022838357", - "611752105022838544", - "611752105022838589", - "611752105022838666", - "611752105022839015", - "611752105022839300", - "611752105022839468", - "611752105022839559", - "611752105022892354", - "611752105022911042", - "611752105023134539", - "611752105023142842", - "611752105023588294", - "611752105024204862", - "611752105024546859", - "611752105024598735", - "611752105024938931", - "611752105025327479", - "611752105025565027", - "611752105025720331", - "611752105025741447", - "611752105025817802", - "611752105026281560", - "611752105026388268", - "611752105026421148", - "611752105026536897", - "611752105026536911", - "611752105026580839", - "611752105026648945", - "611752105026663363", - "611752105026736866", - "611752105027067863", - "611752105027112518", - "611752105027186556", - "611752105027189208", - "611752105027189301", - "611752105027326104", - "611752105027460089", - "611752105027484913", - "611752105027588072", - "611752105027611383", - "611752105027690075", - "611752105028470803", - "611752105028507652", - "611752105028528335", - "611752105028815367", - "611752105028820629", - "611752105028820633", - "611752105028878340", - "611752105028906600", - "611752105028944645", - "611752105028958744", - "611752105029006303", - "611752105029006319", - "611752105029059923", - "611752105029078388", - "611752105029090034", - "611752105029209546", - "611752105029243449", - "611752105029272970", - "611752105029291291", - "611752105029291294", - "611752105029291295", - "611752105029291297", - "611752105029291298", - "611752105029291304", - "611752105029395411", - "611752105029432787", - "611752105029570149", - "611752105029570153", - "611752105029570157", - "611752105029953987", - "611752105029954853", - "611752105029955024", - "611752105029955258", - "611752105029956379", - "611752105029956615", - "611752105029990162", - "611752105029990590", - "611752105029991249", - "611752105030103635", - "611752105030119229", - "611752105030124600", - "611752105030485000", - "611752105030485417", - "611752105030485428", - "611752105030485533", - "611752105030485561", - "611752105030485562", - "611752105030485565", - "611752105030485566", - "611752105030485569", - "611752105030485570", - "611752105030485572", - "611752105030485591", - "611752105030485592", - "611752105030485594", - "611752105030485595", - "611752105030485597", - "611752105030485598", - "611752105030485601", - "611752105030485602", - "611752105030485607", - "611752105030485608", - "611752105030485610", - "611752105030485612", - "611752105030485613", - "611752105030485616", - "611752105030485618", - "611752105030485620", - "611752105030485621", - "611752105030485626", - "611752105030485627", - "611752105030485631", - "611752105030485634", - "611752105030485637", - "611752105030485639", - "611752105030485642", - "611752105030485646", - "611752105030485650", - "611752105030485653", - "611752105030485655", - "611752105030485656", - "611752105030485662", - "611752105030485663", - "611752105030485666", - "611752105030485667", - "611752105030485669", - "611752105030485671", - "611752105030485672", - "611752105030485673", - "611752105030485676", - "611752105030485677", - "611752105030485679", - "611752105030485681", - "611752105030485682", - "611752105030485685", - "611752105030485687", - "611752105030485688", - "611752105030485691", - "611752105030485692", - "611752105030485693", - "611752105030485696", - "611752105030485697", - "611752105030485700", - "611752105030485702", - "611752105030485703", - "611752105030485707", - "611752105030485710", - "611752105030485711", - "611752105030485715", - "611752105030485716", - "611752105030485717", - "611752105030485721", - "611752105030485722", - "611752105030485724", - "611752105030485726", - "611752105030485727", - "611752105030485729", - "611752105030485731", - "611752105030485733", - "611752105030485734", - "611752105030485736", - "611752105030485737", - "611752105030485738", - "611752105030485739", - "611752105030485741", - "611752105030485742", - "611752105030485744", - "611752105030485745", - "611752105030485748", - "611752105030485749", - "611752105030485750", - "611752105030485755", - "611752105030485758", - "611752105030485759", - "611752105030485761", - "611752105030485763", - "611752105030485766", - "611752105030485768", - "611752105030485769", - "611752105030485772", - "611752105030485778", - "611752105030485779", - "611752105030485787", - "611752105030485790", - "611752105030485791", - "611752105030485794", - "611752105030485797", - "611752105030485799", - "611752105030488510", - "611752105030488594", - "611752105030488665", - "611752105030488713", - "611752105030488727", - "611752105030488744", - "611752105030488814", - "611752105030488836", - "611752105030488852", - "611752105030488864", - "611752105030488880", - "611752105030488962", - "611752105030488997", - "611752105030489153", - "611752105030489354", - "611752105030489380", - "611752105030489394", - "611752105030489403", - "611752105030489415" + "611752105020332343", + "611752105022647065", + "611752105022704186", + "611752105022729268", + "611752105022736024", + "611752105022739648", + "611752105022739650", + "611752105022741712", + "611752105022743896", + "611752105022746068", + "611752105022747108", + "611752105022757968", + "611752105022763880", + "611752105022763884", + "611752105022764688", + "611752105022764801", + "611752105022766341", + "611752105022767186", + "611752105022770004", + "611752105022770306", + "611752105022773633", + "611752105022773776", + "611752105022774127", + "611752105022774502", + "611752105022775091", + "611752105022775486", + "611752105022775907", + "611752105022776719", + "611752105022776721", + "611752105022776761", + "611752105022776857", + "611752105022777051", + "611752105022777076", + "611752105022777328", + "611752105022777573", + "611752105022777607", + "611752105022777608", + "611752105022777611", + "611752105022777835", + "611752105022780287", + "611752105022781374", + "611752105022785018", + "611752105022785313", + "611752105022812895", + "611752105022825467", + "611752105022837452", + "611752105022837464", + "611752105022840319", + "611752105022840637", + "611752105022841089", + "611752105022841355", + "611752105022842184", + "611752105022843089", + "611752105022843139", + "611752105022843331", + "611752105022843710", + "611752105022843728", + "611752105022876795", + "611752105022973113", + "611752105023184121", + "611752105023234496", + "611752105023258864", + "611752105023262008", + "611752105023301455", + "611752105023306231", + "611752105023329571", + "611752105023411931", + "611752105023449798", + "611752105023458990", + "611752105023610603", + "611752105023678577", + "611752105023683357", + "611752105023841037", + "611752105023929521", + "611752105024170140", + "611752105024466658", + "611752105024683212", + "611752105024765795", + "611752105024766050", + "611752105025475926", + "611752105025486355", + "611752105025503613", + "611752105025506533", + "611752105025515144", + "611752105025521388", + "611752105025524664", + "611752105025524932", + "611752105025526555", + "611752105025542775", + "611752105025542802", + "611752105025543710", + "611752105025555350", + "611752105025558173", + "611752105025565020", + "611752105025565029", + "611752105025565034", + "611752105025578884", + "611752105025581305", + "611752105026003288", + "611752105026090255", + "611752105026152320", + "611752105026180638", + "611752105026180797", + "611752105026205984", + "611752105026227884", + "611752105026343282", + "611752105026417620", + "611752105026449246", + "611752105026462848", + "611752105026533657", + "611752105026577993", + "611752105026614487", + "611752105026666894", + "611752105026666899", + "611752105026666904", + "611752105026666918", + "611752105026666950", + "611752105026666964", + "611752105026666995", + "611752105026667014", + "611752105026667025", + "611752105027030955", + "611752105027216307", + "611752105027228689", + "611752105027228702", + "611752105027460125", + "611752105027802526", + "611752105027854263", + "611752105028204403", + "611752105028408823", + "611752105028477541", + "611752105028558157", + "611752105028593043", + "611752105028793344", + "611752105028820643", + "611752105028820644", + "611752105028858622", + "611752105028878359", + "611752105028916096", + "611752105028916098", + "611752105028990740", + "611752105029006327", + "611752105029047058", + "611752105029054046", + "611752105029059915", + "611752105029204262", + "611752105029291293", + "611752105029306974", + "611752105029372452", + "611752105029648535", + "611752105030146069", + "611752105030483301", + "611752105030483312", + "611752105030499117", + "611752105030499185", + "611752105030499265", + "611752105030499310", + "611752105030503847", + "611752105030547499", + "611752105030547630", + "611752105030547632", + "611752105030547638", + "611752105030557261", + "611752105030557355", + "611752105030558663", + "611752105030559471", + "611752105030562192", + "611752105030562194", + "611752105030562196", + "611752105030562197", + "611752105030562199", + "611752105030562203", + "611752105030562205", + "611752105030562209", + "611752105030562211", + "611752105030562213", + "611752105030562214", + "611752105030562218", + "611752105030562221", + "611752105030562227", + "611752105030562228", + "611752105030562231", + "611752105030562234", + "611752105030562236", + "611752105030562239", + "611752105030562243", + "611752105030562245", + "611752105030562248", + "611752105030562251", + "611752105030562254", + "611752105030562255", + "611752105030562259", + "611752105030562262", + "611752105030562263", + "611752105030562266", + "611752105030562268", + "611752105030562271", + "611752105030562274", + "611752105030562277", + "611752105030562286", + "611752105030562289", + "611752105030562291", + "611752105030562296", + "611752105030562302", + "611752105030562303", + "611752105030562306", + "611752105030562311", + "611752105030562314", + "611752105030562316", + "611752105030562322", + "611752105030562325", + "611752105030562327", + "611752105030562333", + "611752105030562335", + "611752105030562337", + "611752105030562338", + "611752105030562345", + "611752105030562351", + "611752105030562378", + "611752105030562380", + "611752105030562383", + "611752105030562389", + "611752105030562391", + "611752105030562392", + "611752105030562397", + "611752105030562398", + "611752105030562399", + "611752105030562401", + "611752105030562404", + "611752105030562405", + "611752105030562411", + "611752105030562413", + "611752105030562414", + "611752105030562417", + "611752105030562419", + "611752105030562424", + "611752105030562425", + "611752105030562426", + "611752105030562428", + "611752105030562431", + "611752105030562448", + "611752105030562457", + "611752105030562459", + "611752105030562460", + "611752105030562463", + "611752105030562470", + "611752105030562472", + "611752105030562473", + "611752105030562479", + "611752105030562483", + "611752105030562489", + "611752105030562493", + "611752105030562494", + "611752105030562499", + "611752105030562502", + "611752105030562504", + "611752105030562507", + "611752105030562512", + "611752105030562513", + "611752105030562517", + "611752105030562522", + "611752105030562919", + "611752105030562921", + "611752105030562924", + "611752105030562925", + "611752105030562929", + "611752105030562931", + "611752105030562936", + "611752105030562938", + "611752105030562939", + "611752105030562940", + "611752105030562943", + "611752105030562950", + "611752105030562953", + "611752105030562954", + "611752105030562959", + "611752105030562960", + "611752105030562962", + "611752105030562968", + "611752105030562974", + "611752105030562978", + "611752105030562979", + "611752105030562981", + "611752105030562983", + "611752105030562986", + "611752105030562988", + "611752105030562999", + "611752105030563001", + "611752105030563003", + "611752105030563005", + "611752105030563006", + "611752105030563010", + "611752105030563014", + "611752105030563022", + "611752105030563025", + "611752105030563028", + "611752105030563031", + "611752105030563034", + "611752105030563035", + "611752105030563043" ] ban = deepcopy(banned_user_map) ban["db"] = "av_db" for sid in arr: url = get_url_by_song_id(sid) if url is not None: print("out,{},{}".format(url, sid)) - sql = "replace INTO svc_queue_table (song_id, url, create_time, update_time) VALUES ({}, \"{}\", NOW(), NOW())" \ - .format(sid, url) - update_db(sql, ban) + # 不在数据库中 + sql = "select song_id from svc_queue_table where song_id={}".format(sid) + data = get_data_by_mysql(sql, ban) + if len(data) == 0: + sql = "insert INTO svc_queue_table (song_id, url, create_time, update_time, song_src) VALUES ({}, \"{}\", NOW(), NOW(), 1)" \ + .format(sid, url) + update_db(sql, ban) def get_data_from_song(): sql = """ select tb1.song_id, tb1.recording_count from ( select song_id,recording_count from starmaker.song where song_src in (108,109) and song_status = 2 order by recording_count desc ) as tb1 left join ( select song_id from av_db.svc_queue_table ) as tb2 on tb1.song_id = tb2.song_id where tb2.song_id is null order by tb1.recording_count desc limit 400 """ ban = deepcopy(banned_user_map) ban_v1 = deepcopy(banned_user_map) ban["db"] = "starmaker_musicbook" ban_v1["db"] = "av_db" data = get_data_by_mysql(sql, ban) for dt in data: sid = dt[0] url = get_url_by_song_id(sid) if url is not None: print("out,{},{}".format(url, sid)) sql = "replace INTO svc_queue_table (song_id, url, create_time, update_time) VALUES ({}, \"{}\", NOW(), NOW())" \ .format(sid, url) update_db(sql, ban_v1) if __name__ == '__main__': - get_data_from_song() + # get_data_from_song() + process() diff --git a/AutoCoverTool/script/shuffle_music.py b/AutoCoverTool/script/shuffle_music.py index 0473944..0f80872 100644 --- a/AutoCoverTool/script/shuffle_music.py +++ b/AutoCoverTool/script/shuffle_music.py @@ -1,80 +1,225 @@ """ 载入人声,将人声的频谱进行向上平移 """ import librosa import soundfile import numpy as np from copy import deepcopy def local_maxium(x): """ 求序列的极大值 :param x: :return: """ d = np.diff(x) l_d = len(d) maxium = [] loc = [] for i in range(l_d - 1): if d[i] > 0 and d[i + 1] <= 0: maxium.append(x[i + 1]) loc.append(i + 1) return maxium, loc def Formant_Cepst(u, cepstL): """ 来源: https://github.com/taw19960426/-Speech-signal-processing-experiment-tutorial-_python/blob/master/%E5%85%B1%E6%8C%AF%E5%B3%B0%E4%BC%B0%E8%AE%A1%E5%87%BD%E6%95%B0.py 倒谱法共振峰估计函数 :param u:输入信号 :param cepstL:🔪频率上窗函数的宽度 :return: val共振峰幅值 :return: loc共振峰位置 :return: spec包络线 """ wlen2 = len(u) // 2 u_fft = np.fft.fft(u) # 按式(2-1)计算 U = np.log(np.abs(u_fft[:wlen2])) Cepst = np.fft.ifft(U) # 按式(2-2)计算 cepst = np.zeros(wlen2, dtype=np.complex) cepst[:cepstL] = Cepst[:cepstL] # 按式(2-3)计算 cepst[-cepstL + 1:] = Cepst[-cepstL + 1:] # 取第二个式子的相反 spec = np.real(np.fft.fft(cepst)) val, loc = local_maxium(spec) # 在包络线上寻找极大值 return val, loc, spec def test(in_vocal): + import matplotlib.pyplot as plt + sr = 44100 audio, sr = librosa.load(in_vocal, sr=sr, mono=True) - stft = librosa.stft(audio) + stft = librosa.stft(audio, n_fft=2048) stft = stft.transpose() - new_stft = deepcopy(stft) + new_stft = np.zeros_like(stft) for ii in range(0, len(stft)): - # 寻找峰值点 - max_pos = [] - for i in range(1, len(stft[ii]) - 1, ): - if stft[ii][i + 1] > stft[ii][i] > stft[ii][i - 1]: - max_pos.append(i) - # 只调整1kHz以上的峰值点, 从48的位置开始调整 - for i in range(len(max_pos) - 1): - if max_pos[i] < 48: + + power = np.abs(stft[ii]) + power = power / (np.max(power)) + + x = np.array(list(range(0, len(stft[ii])))) + y = power + + new_x = [] + new_y = [] + for i in range(1, len(x) - 1, 1): + if y[i - 1] < y[i] > y[i + 1] and y[i] > 0.01: + new_x.append(x[i]) + new_y.append(y[i]) + + # 前后100hz的合并 + x = new_x + y = new_y + new_x = [] + new_y = [] + for i in range(1, len(x) - 1, 1): + if y[i - 1] < y[i] > y[i + 1]: + if x[i] - x[i - 1] > 5: + new_x.append(x[i - 1]) + new_y.append(y[i - 1]) + new_x.append(x[i]) + new_y.append(y[i]) + if x[i + 1] - x[i] > 5: + new_x.append(x[i + 1]) + new_y.append(y[i + 1]) + + if len(new_x) <= 1: + new_stft[ii] = deepcopy(stft[ii]) + continue + # 从第一共振峰开始向上加 + st_freq_idx = 1 + for i in range(st_freq_idx, len(stft[ii])): + dst_i = int(i * 1.12) + if dst_i >= len(stft[ii]): continue - # 平移到两峰之间 - cur_i = max_pos[i] - next_i = max_pos[i + 1] - print(cur_i, next_i) - # 将cur_i移动到cur_i和next_i中间 - dst_i = (cur_i + next_i) // 2 - new_stft[ii][dst_i] = stft[ii][cur_i] - - print(stft.shape) + new_stft[ii][dst_i] = stft[ii][i] + new_stft[ii][0] = stft[ii][0] + # for i in range(0, len(stft[ii])): + # new_stft[ii][i] = stft[ii][i] + + # new_stft[ii] = deepcopy(stft[ii]) + + # # # 从0.01开始向后走 + # st_freq_idx = new_x[1] + # if len(new_x) >= 3: + # st_freq_idx = new_x[2] + # music_idx = int(4000 / (sr / 2048)) + # # 当前频率翻1.19倍 + # kk = -0.19 / (music_idx - st_freq_idx) + # bb = 1 - music_idx * kk + # for i in range(st_freq_idx, len(stft[ii])): + # cur_rate = i * kk + bb + # if i >= music_idx: + # cur_rate = 1.0 + # dst_idx = int(i * cur_rate + 0.5) + # if dst_idx >= len(stft[ii]): + # break + # new_stft[ii][dst_idx] += stft[ii][i] + # + # # 加平滑 + # st_freq_1 = new_x[1] + # # 当前频率从1倍翻到1.19倍 + # kk = 0.19 / (st_freq_idx - st_freq_1) + # bb = 1 - st_freq_1 * kk + # for i in range(st_freq_1, st_freq_idx): + # cur_rate = i * kk + bb + # dst_idx = int(i * cur_rate + 0.5) + # if dst_idx >= len(stft[ii]): + # break + # new_stft[ii][dst_idx] += stft[ii][i] + # for i in range(0, st_freq_1): + # new_stft[ii][i] += stft[ii][i] + new_stft = new_stft.transpose() istft = librosa.istft(new_stft) soundfile.write(str(in_vocal).replace(".wav", "_out.wav"), istft, 44100, format="wav") +def ttt(path): + from scipy.signal import lfilter + import matplotlib.pyplot as plt + # path="C4_3_y.wav" + # data, fs = soundBase('C4_3_y.wav').audioread() + data, fs = librosa.load(path, sr=44100, mono=True) # sr=None声音保持原采样频率, mono=False声音保持原通道数 + # 预处理-预加重 + u = lfilter([1, -0.99], [1], data) + + cepstL = 7 + wlen = len(u) + wlen2 = wlen // 2 + print("帧长={}".format(wlen)) + print("帧移={}".format(wlen2)) + # wlen = 256 + # wlen2 = 256//2 + # 预处理-加窗 + u2 = np.multiply(u, np.hamming(wlen)) + # 预处理-FFT,取对数 获得频域图像 取一半 + U_abs = np.log(np.abs(np.fft.fft(u2))[:wlen2]) + # 4.3.1 + freq = [i * fs / wlen for i in range(wlen2)] + # print(freq) + # val共振峰幅值 loc共振峰位置 spec包络线 + val, loc, spec = Formant_Cepst(u, cepstL) + plt.subplot(2, 1, 1) + plt.plot(freq, U_abs, 'k') + plt.xlabel('频率/Hz') # 设置x,y轴的标签 + plt.ylabel('幅值') + plt.title('男性a的发音频谱') + plt.subplot(2, 1, 2) + plt.plot(freq, spec, 'k') + plt.xlabel('频率/Hz') # 设置x,y轴的标签 + plt.ylabel('幅值') + plt.title('倒谱法共振峰估计') + for i in range(len(loc)): + plt.subplot(2, 1, 2) + plt.plot([freq[loc[i]], freq[loc[i]]], [np.min(spec), spec[loc[i]]], '-.k') + plt.text(freq[loc[i]], spec[loc[i]], 'Freq={}'.format(int(freq[loc[i]]))) + + # plt.savefig('images/共振峰估计.png') + plt.show() + plt.close() + + +def main(path): + import numpy as np + import pyworld as pw + from scipy.signal import freqz + import librosa + import math + + """ + 思路: + 先变调,再轻微调整共振峰进行合成 + """ + + base_rate = 1.05946 + pitch = 0 + + fs = 44100 + x, sr = librosa.load(path, sr=fs, mono=True) + x = x.reshape(-1).astype(np.float) + f0, t = pw.dio(x, fs) + f0 = pw.stonemask(x, f0, t, fs) + sp = pw.cheaptrick(x, f0, t, fs) + sp2 = np.zeros_like(sp) + cur_rate = 1 + for i in range(sp.shape[1]): + sp2[:, i] = sp[:, min(int(i * 1 / cur_rate), sp.shape[1] - 1)] + ap = pw.d4c(x, f0, t, fs) + rate = math.pow(base_rate, pitch) + out = pw.synthesize(f0 * rate, sp2, ap, fs).reshape(-1, 1) + soundfile.write(path.replace(".wav", "_out2.wav"), out, fs) + + if __name__ == '__main__': - test("/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal_out_01.wav") + # vc = VoiceChanger() + # vc.process("/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal.wav", + # "/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal_out1.wav") + + # test( + # "/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal.wav") + + main("/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal_p2.wav") + # ttt("/Users/yangjianli/starmaker-work/research/tmp_code/消音相关/test_out/ins_main_out/test2/tot/3/vocal_02_01.wav")