diff --git a/AutoCoverTool/script/inference_one.py b/AutoCoverTool/script/inference_one.py index 8aace4c..3565bec 100644 --- a/AutoCoverTool/script/inference_one.py +++ b/AutoCoverTool/script/inference_one.py @@ -1,1529 +1,1529 @@ """ 单个处理的逻辑 song_id: ---src.mp3 // 源数据,需要提前放进去 ---cache ---vocal.wav // 分离之后产生 ---acc.wav // 分离之后产生 ---vocal_32.wav // 分离之后产生 ---song_id_sp1.wav // 合成之后产生 ---song_id_sp2.wav // 合成之后产生 ---song_id_sp2_d.wav // 降噪之后生成 ---song_id_sp2_dv.wav // 降噪+拉伸之后产生 [占比太高的不产生] ---song_id_sp2_dve442.wav // 手动调整之后产生 ---song_id_sp2_dve442_replace.wav // 替换之后产生 ---song_id_sp2_dve442_replace_mix.wav // 人声+伴奏混合之后产生 ---song_id --acc.mp3 // 44k双声道320k --vocal.mp3 // 44k双声道320k --src.mp3 // 44k双声道320k --song_id_sp2_dv.mp3 // 44k单声道320k ---song_id_out // 对外输出 --src.mp3 // 原始音频 --song_id_sp2_dv_replace_mix.mp3 // 制作完成的音频 环境安装: conda create -n auto_song_cover python=3.9 # 安装demucs环境[进入到ref.music_remover 执行pip install -r requirements.txt] # 安装so_vits_svc环境[进入到ref.so_vits_svc 执行pip install -r requirements.txt] pip install librosa pip install scikit-maad pip install praat-parselmouth pip install matplotlib pip install torchvision pip install madmom pip install torchstat 环境设置: export PATH=$PATH:/data/gpu_env_common/env/bin/ffmpeg/bin export PYTHONPATH=$PWD:$PWD/ref/music_remover/demucs:$PWD/ref/so_vits_svc:$PWD/ref/split_dirty_frame """ import os import time import shutil import random import logging import librosa gs_err_code_success = 0 gs_err_code_no_src_mp3 = 1 gs_err_code_separate = 2 gs_err_code_trans_32 = 3 gs_err_code_encode_err = 4 gs_err_code_replace_err = 5 gs_err_code_replace_trans_err = 6 gs_err_code_mix_err = 7 gs_err_code_mix_transcode_err = 8 gs_err_code_no_src_dir = 9 gs_err_code_volume_err = 10 gs_err_code_trans2_442 = 11 gs_err_code_reverb = 12 gs_denoise_exe = "/opt/soft/bin/denoise_exe" gs_draw_volume_exe = "/opt/soft/bin/draw_volume_v1" gs_simple_mixer_path = "/opt/soft/bin/simple_mixer" gs_rever_path = "/data/rsync/jianli.yang/dereverbrate/build/dereverbrate_test" from ref.music_remover.separate_interface import SeparateInterface from ref.so_vits_svc.inference_main import * from ref.split_dirty_frame.script.process_one import ReplaceVocalFrame, construct_power_fragment from ref.split_dirty_frame.dataset.dataset import file2mfcc class SongCoverInference: def __init__(self): self.work_dir = None self.cache_dir = None self.cid = None self.src_mp3 = None self.vocal_path = None self.vocal_32_path = None self.acc_path = None self.speakers = [ 10414574138721494, 10414574140317353, 1688849864840588, 3634463651, 5629499489839033, 5910973794723621, 6755399374234747, 8162774327817435, 8162774329368194, 1125899914308640, # 以下为男声,包括这个 12384898975368914, 12947848931397021, 3096224748076687, 3096224751151928, 5066549357604730, 5348024335101054, 6755399442719465, 7036874421386111 ] self.speakers_model_path = "data/train_users/{}/logs/32k/G_2000.pth" self.speakers_model_config = "data/train_users/{}/config/config.json" st = time.time() self.separate_inst = SeparateInterface() self.replace_vocal_frame_inst = ReplaceVocalFrame("data/models/split_dirty_frame_v5_3_epoch3_852.pth") logging.info("SongCoverInference init sp={}".format(time.time() - st)) def separate(self, cid, src_mp3, vocal_path, acc_path): """ 人声伴奏分离 :param cid: :param src_mp3: :param vocal_path: :param acc_path: :return: """ st = time.time() if not self.separate_inst.process(cid, src_mp3, vocal_path, acc_path): return gs_err_code_separate if not os.path.exists(vocal_path) or not os.path.exists(acc_path): return gs_err_code_separate # 转码出一个32k单声道的数据 cmd = "ffmpeg -i {} -ar 32000 -ac 1 -y {} -loglevel fatal".format(vocal_path, self.vocal_32_path) os.system(cmd) if not os.path.exists(self.vocal_32_path): return gs_err_code_trans_32 print("separate:cid={}|sp={}".format(cid, time.time() - st)) return gs_err_code_success def get_start_ms(self, vocal_path): """ 给定原始音频,找一段连续10s的音频 :param vocal_path: :return: """ audio, sr = librosa.load(vocal_path, sr=16000) audio = librosa.util.normalize(audio) # 帧长100ms,帧移10ms,计算能量 power_arr = [] for i in range(0, len(audio) - 1600, 160): power_arr.append(np.sum(np.abs(audio[i:i + 160])) / 160) # 将能量小于等于10的部分做成段 power_arr = construct_power_fragment(power_arr) fragments = [] last_pos = 0 for idx, line in enumerate(power_arr): start = round(float(line[0]) * 0.01, 3) duration = round(float(line[1]) * 0.01, 3) fragments.append([last_pos, start - last_pos]) last_pos = start + duration if last_pos < len(audio) / sr: fragments.append([last_pos, len(audio) / sr - last_pos]) # 合并数据,两者间隔在50ms以内的合并起来 idx = 0 while idx < len(fragments) - 1: if fragments[idx + 1][0] - (fragments[idx][0] + fragments[idx][1]) < 0.05: fragments[idx][1] = fragments[idx + 1][0] + fragments[idx + 1][1] - fragments[idx][0] del fragments[idx + 1] idx -= 1 idx += 1 # out_file = vocal_path + "_power.csv" # with open(out_file, "w") as f: # f.write("Name\tStart\tDuration\tTime Format\tType\n") # for fragment in fragments: # start = round(float(fragment[0]), 3) # duration = round(float(fragment[1]), 3) # strr = "{}\t{}\t{}\t{}\n".format("11", start, duration, "decimal\tCue\t") # f.write(strr) # 筛选出开始的位置 # 1. 连续时长大于10s,当前段长度大于3s # 2. 不可用 # 从0到fragments[idx], 包含idx其中人声段的总和 tot_vocal_duration = [fragments[0][1]] for i in range(1, len(fragments)): tot_vocal_duration.append(tot_vocal_duration[i - 1] + fragments[i][1]) # 计算出任意两段之间非人声占比 for i in range(0, len(fragments)): if fragments[i][1] >= 3: now_tot = 0 if i > 0: now_tot = tot_vocal_duration[i - 1] for j in range(i + 1, len(fragments)): cur_rate = tot_vocal_duration[j] - now_tot cur_rate = cur_rate / (fragments[j][1] + fragments[j][0] - fragments[i][0]) if cur_rate > 0.1: return fragments[i][0] return -1 def inference_speaker(self): """ 推理生成合成后的音频 随机取5个干声,选择占比最小的,并且要求占比小于0.3 :return: """ st = time.time() out_speakers = random.sample(self.speakers, 5) out_songs_dict = {} for speaker in out_speakers: model_path = self.speakers_model_path.format(speaker) config_path = self.speakers_model_config.format(speaker) song_path = os.path.join(self.cache_dir, "{}_{}.wav".format(self.cid, speaker)) try: inf(model_path, config_path, self.vocal_32_path, song_path, "prod") except Exception as ex: logging.info("cid={}, inference_speaker err={}".format(self.cid, ex)) continue if os.path.exists(song_path): rate = self.replace_vocal_frame_inst.get_rate(song_path) if rate < 0.3: out_songs_dict[song_path] = rate # 从内部选择占比最低的 out_songs = [] if len(out_songs_dict.keys()) > 0: st_sec = self.get_start_ms(self.vocal_path) song_msg = sorted(out_songs_dict.items(), key=lambda kv: kv[1])[0] out_songs = [song_msg[0]] logging.info("GetRate:cid={},song={},rate={},st_tm={}".format(self.cid, song_msg[0], round(song_msg[1], 2), round(st_sec, 3))) print("GetRate:cid={},song={},rate={},st_tm={}".format(self.cid, song_msg[0], round(song_msg[1], 2), round(st_sec, 3))) # logging.info("inference_speaker len = {} finish sp = {}".format(len(out_songs), time.time() - st)) print("inference_speaker len = {} finish sp = {}".format(len(out_songs), time.time() - st)) return out_songs def get_new_vocal_rate(self, songs): """ 获取人声的比率 :param songs: :return: """ st = time.time() need_to_process_song = [] for song in songs: rate = self.replace_vocal_frame_inst.get_rate(song) logging.info("{} {} replace_rate={}".format(self.cid, song, rate)) if rate < 1.0: need_to_process_song.append(song) logging.info( "get_new_vocal_rate belen = {} len = {} finish sp = {}".format(len(songs), len(need_to_process_song), time.time() - st)) return need_to_process_song def preprocess_vocal(self, songs, vocal_path): """ 1. 降噪 2. 拉伸 :param songs: :param vocal_path: 参考的音频信号 :return: """ st = time.time() dv_out_list = [] for song in songs: denoise_path = str(song).replace(".wav", "_d.wav") cmd = "{} {} {}".format(gs_denoise_exe, song, denoise_path) os.system(cmd) if not os.path.exists(denoise_path): print("{} {} ERROR denoise".format(self.cid, song)) continue # 拉伸 volume_path = str(song).replace(".wav", "_dv.wav") cmd = "{} {} {} {}".format(gs_draw_volume_exe, denoise_path, vocal_path, volume_path) os.system(cmd) if not os.path.exists(volume_path): print("{} {} ERROR denoise".format(self.cid, volume_path)) continue dv_out_list.append(volume_path) print( "preprocess_vocal belen = {} len = {} finish sp = {}".format(len(songs), len(dv_out_list), time.time() - st)) return dv_out_list def output(self, dv_out_list): """ 对外输出数据 :param dv_out_list: :return: """ st = time.time() out_dir = os.path.join(self.work_dir, self.cid) if os.path.exists(out_dir): shutil.rmtree(out_dir) os.makedirs(out_dir) # 拷贝数据 dst_mp3_path = os.path.join(out_dir, "src_mp3") dst_acc_path = os.path.join(out_dir, "acc.mp3") dst_vocal_path = os.path.join(out_dir, "vocal.mp3") shutil.copyfile(self.src_mp3, dst_mp3_path) cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(self.acc_path, dst_acc_path) os.system(cmd) if not os.path.exists(dst_acc_path): return gs_err_code_encode_err cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(self.vocal_path, dst_vocal_path) os.system(cmd) if not os.path.exists(dst_vocal_path): return gs_err_code_encode_err # 将所有数据放到out_dir中,用于给人工标注 for dv_wav in dv_out_list: dv_wav_name = str(dv_wav).split("/")[-1].replace(".wav", "_441.mp3") dst_dv_path = os.path.join(out_dir, dv_wav_name) cmd = "ffmpeg -i {} -ar 44100 -ac 1 -ab 320k -y {} -loglevel fatal".format(dv_wav, dst_dv_path) os.system(cmd) if not os.path.exists(dst_dv_path): print("{} encode err!".format(cmd)) continue logging.info( "preprocess_vocal output sp = {}".format(time.time() - st)) def process_one(self, cid, work_dir, enable_output=False): logging.info("\nstart:cid={},work_dir={}----------------------->>>>>>>>".format(cid, work_dir)) self.cid = cid self.work_dir = work_dir # 所有不对外交付的,全部放到这里 self.cache_dir = os.path.join(work_dir, "cache") if os.path.exists(self.cache_dir): shutil.rmtree(self.cache_dir) os.makedirs(self.cache_dir) self.src_mp3 = os.path.join(self.work_dir, "src.mp3") if not os.path.exists(self.src_mp3): return gs_err_code_no_src_mp3 self.vocal_path = os.path.join(self.cache_dir, "vocal.wav") self.vocal_32_path = os.path.join(self.cache_dir, "vocal_32.wav") self.acc_path = os.path.join(self.cache_dir, "acc.wav") if not os.path.exists(self.vocal_32_path): logging.info("start separate ... {} {} {}".format(self.src_mp3, self.vocal_path, self.acc_path)) err = self.separate(cid, self.src_mp3, self.vocal_path, self.acc_path) if err != gs_err_code_success: return err logging.info("start inference_speaker ...") out_songs = self.inference_speaker() logging.info("start get_new_vocal_rate ...") # out_songs = self.get_new_vocal_rate(out_songs) dv_out_list = self.preprocess_vocal(out_songs, self.vocal_path) if enable_output: self.output(dv_out_list) else: # 默认全部处理一遍 for dv_out_path in dv_out_list: src_path = dv_out_path.replace("_dv.wav", ".wav") err = self.after_process(self.cid, self.work_dir, src_path, dv_out_path, self.vocal_path, self.acc_path, True, True) if err != 0: logging.info("after_process err {}".format(err)) logging.info("finish:cid={},work_dir={}----------------------->>>>>>>>".format(cid, work_dir)) return gs_err_code_success def reverb_by_vocal(self, file): st = time.time() file_442 = file.replace(".wav", "_442.wav") if not os.path.exists(file_442): cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {}".format(file, file_442) os.system(cmd) if not os.path.exists(file_442): return None, gs_err_code_trans2_442 file_dst = file.replace(".wav", "_442_dr.wav") cmd = "{} {} {} {}".format(gs_rever_path, self.vocal_path, file_442, file_dst) os.system(cmd) if not os.path.exists(file_dst): return None, gs_err_code_reverb print("cid = {}, reverb_by_vocal sp={}".format(self.cid, time.time() - st)) return file_dst, gs_err_code_success def after_process(self, cid, work_dir, in_file, effect_file, vocal_file, acc_file, need_draw=True, need_reverb=True): """ 后处理逻辑 将处理好的音频进行替换,然后和伴奏进行混合,最后进行编码 :return: """ if need_reverb: # 抓取混响 effect_file, err = self.reverb_by_vocal(in_file) if err != gs_err_code_success: return err if need_draw: # 增加一个拉伸的步骤 volume_path = str(effect_file).replace(".wav", "_dv.wav") cmd = "{} {} {} {}".format(gs_draw_volume_exe, effect_file, vocal_file, volume_path) print(cmd) os.system(cmd) if not os.path.exists(volume_path): print("{} {} ERROR draw volume".format(self.cid, volume_path)) return gs_err_code_volume_err effect_file = volume_path st = time.time() self.cid = cid self.work_dir = work_dir self.src_mp3 = os.path.join(self.work_dir, "src.mp3") if not os.path.exists(self.work_dir): return gs_err_code_no_src_dir self.replace_vocal_frame_inst.process(in_file, effect_file, vocal_file) dst_path = effect_file + "_replace.wav" if not os.path.exists(dst_path): return gs_err_code_replace_err print("replace_vocal_frame_inst sp = {}".format(time.time() - st)) # 转码 dst_path_442 = dst_path.replace("_replace.wav", "_replace442.wav") cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(dst_path, dst_path_442) os.system(cmd) if not os.path.exists(dst_path_442): return gs_err_code_replace_trans_err # 合并转码后再做一次拉伸,保证响度 volume_path = str(dst_path_442).replace(".wav", "_dv.wav") cmd = "{} {} {} {}".format(gs_draw_volume_exe, dst_path_442, vocal_file, volume_path) print(cmd) os.system(cmd) if not os.path.exists(volume_path): print("{} {} ERROR draw volume".format(self.cid, volume_path)) return gs_err_code_volume_err dst_path_442 = volume_path # 混合 mix_path = dst_path_442.replace("_replace442.wav", "_replace442_mix.wav") cmd = "{} {} {} {}".format(gs_simple_mixer_path, dst_path_442, acc_file, mix_path) print("{}".format(cmd)) os.system(cmd) if not os.path.exists(mix_path): return gs_err_code_mix_err # 编码为mp3 output_dir = os.path.join(self.work_dir, self.cid + "_out") if not os.path.exists(output_dir): os.makedirs(output_dir) name = str(mix_path).replace("_replace442_mix.wav", "_replace442_mix.mp3").split("/")[-1] mix_path_mp3 = os.path.join(output_dir, name) cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(mix_path, mix_path_mp3) os.system(cmd) if not os.path.exists(mix_path_mp3): return gs_err_code_mix_transcode_err # 拷贝src到output_dir # shutil.copyfile(self.src_mp3, os.path.join(output_dir, "src.mp3")) # logging.info("after_process sp = {}".format(time.time() - st)) return gs_err_code_success def test_volume_dir(): base_dir = "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_3_w4" # arr = [ # "611752105015523266/cache/611752105015523266_5066549357604730.wav", # "611752105017233541/cache/611752105017233541_6755399442719465.wav", # "611752105030414513/cache/611752105030414513_1125899914308640.wav", # "611752105030414549/cache/611752105030414549_5066549357604730.wav", # "611752105030414557/cache/611752105030414557_8162774327817435.wav", # "611752105030414588/cache/611752105030414588_1125899914308640.wav", # "611752105030414597/cache/611752105030414597_6755399374234747.wav", # "611752105030414613/cache/611752105030414613_5066549357604730.wav", # "611752105030414615/cache/611752105030414615_1125899914308640.wav", # "611752105030414619/cache/611752105030414619_5066549357604730.wav", # "611752105030414633/cache/611752105030414633_8162774327817435.wav", # "611752105030414638/cache/611752105030414638_8162774329368194.wav", # "611752105030414689/cache/611752105030414689_8162774327817435.wav", # "611752105030414702/cache/611752105030414702_6755399374234747.wav", # "611752105030414742/cache/611752105030414742_5066549357604730.wav", # "611752105030414763/cache/611752105030414763_1125899914308640.wav", # "611752105030414773/cache/611752105030414773_8162774329368194.wav", # "611752105030414777/cache/611752105030414777_8162774329368194.wav", # "611752105030414779/cache/611752105030414779_1125899914308640.wav", # "611752105030414784/cache/611752105030414784_6755399442719465.wav", # "611752105030414890/cache/611752105030414890_5066549357604730.wav", # "611752105030414915/cache/611752105030414915_5066549357604730.wav", # "611752105030414925/cache/611752105030414925_1125899914308640.wav", # "611752105030414929/cache/611752105030414929_1125899914308640.wav", # "611752105030414935/cache/611752105030414935_3634463651.wav", # "611752105030414943/cache/611752105030414943_6755399374234747.wav", # "611752105030414957/cache/611752105030414957_12384898975368914.wav", # "611752105030414962/cache/611752105030414962_8162774327817435.wav", # "611752105030414976/cache/611752105030414976_10414574138721494.wav", # "611752105030414993/cache/611752105030414993_12947848931397021.wav", # "611752105030414995/cache/611752105030414995_5066549357604730.wav", # "611752105030415003/cache/611752105030415003_12947848931397021.wav", # "611752105030415014/cache/611752105030415014_10414574138721494.wav", # "611752105030415018/cache/611752105030415018_8162774329368194.wav", # "611752105030415032/cache/611752105030415032_6755399442719465.wav", # "611752105030415056/cache/611752105030415056_3096224748076687.wav", # "611752105030415067/cache/611752105030415067_1125899914308640.wav", # "611752105030415071/cache/611752105030415071_5910973794723621.wav", # "611752105030415074/cache/611752105030415074_1125899914308640.wav", # "611752105030415083/cache/611752105030415083_1125899914308640.wav", # "611752105030415087/cache/611752105030415087_5910973794723621.wav", # "611752105030415100/cache/611752105030415100_10414574138721494.wav", # "611752105030415103/cache/611752105030415103_8162774329368194.wav" # ] # arr = [ # "611752105020256284/cache/611752105020256284_8162774329368194.wav", # "611752105020286433/cache/611752105020286433_1125899914308640.wav", # "611752105020286443/cache/611752105020286443_12384898975368914.wav", # "611752105020286446/cache/611752105020286446_5629499489839033.wav", # "611752105020290639/cache/611752105020290639_3634463651.wav", # "611752105020290695/cache/611752105020290695_1125899914308640.wav", # "611752105020315328/cache/611752105020315328_8162774329368194.wav", # "611752105020315368/cache/611752105020315368_1688849864840588.wav", # "611752105020336950/cache/611752105020336950_3634463651.wav", # "611752105020343687/cache/611752105020343687_8162774327817435.wav", # "611752105020343699/cache/611752105020343699_1125899914308640.wav", # "611752105020351134/cache/611752105020351134_10414574138721494.wav", # "611752105020357112/cache/611752105020357112_8162774327817435.wav", # "611752105020378620/cache/611752105020378620_8162774327817435.wav", # "611752105020387015/cache/611752105020387015_1125899914308640.wav", # "611752105020394121/cache/611752105020394121_1125899914308640.wav", # "611752105020394297/cache/611752105020394297_3634463651.wav", # "611752105020411654/cache/611752105020411654_3096224751151928.wav", # "611752105020417688/cache/611752105020417688_12947848931397021.wav", # "611752105020563523/cache/611752105020563523_8162774327817435.wav", # "611752105021332759/cache/611752105021332759_3634463651.wav", # "611752105022446809/cache/611752105022446809_8162774327817435.wav", # "611752105022647082/cache/611752105022647082_8162774327817435.wav", # "611752105022667231/cache/611752105022667231_8162774327817435.wav", # "611752105022735101/cache/611752105022735101_5066549357604730.wav", # "611752105022736204/cache/611752105022736204_1125899914308640.wav", # "611752105022745595/cache/611752105022745595_10414574138721494.wav", # "611752105022770952/cache/611752105022770952_1125899914308640.wav", # "611752105022842004/cache/611752105022842004_3634463651.wav", # "611752105022842477/cache/611752105022842477_1125899914308640.wav", # "611752105023434557/cache/611752105023434557_6755399374234747.wav", # "611752105023532439/cache/611752105023532439_8162774329368194.wav", # "611752105023623965/cache/611752105023623965_3096224748076687.wav", # "611752105024250202/cache/611752105024250202_8162774327817435.wav", # "611752105024628047/cache/611752105024628047_5066549357604730.wav", # "611752105024676794/cache/611752105024676794_6755399442719465.wav", # "611752105024678976/cache/611752105024678976_6755399442719465.wav", # "611752105024679221/cache/611752105024679221_8162774329368194.wav", # "611752105024953316/cache/611752105024953316_1688849864840588.wav", # "611752105025104181/cache/611752105025104181_6755399374234747.wav", # "611752105026189342/cache/611752105026189342_5910973794723621.wav", # "611752105026523547/cache/611752105026523547_1125899914308640.wav", # "611752105026707760/cache/611752105026707760_3096224748076687.wav", # "611752105026771723/cache/611752105026771723_8162774327817435.wav", # "611752105026946178/cache/611752105026946178_10414574140317353.wav", # "611752105027047993/cache/611752105027047993_5066549357604730.wav", # "611752105027188746/cache/611752105027188746_5066549357604730.wav", # "611752105027189453/cache/611752105027189453_8162774329368194.wav", # "611752105027302268/cache/611752105027302268_5066549357604730.wav", # "611752105027557408/cache/611752105027557408_1125899914308640.wav", # "611752105028650636/cache/611752105028650636_8162774327817435.wav", # "611752105028683824/cache/611752105028683824_1125899914308640.wav", # "611752105029990849/cache/611752105029990849_7036874421386111.wav", # "611752105029993297/cache/611752105029993297_6755399374234747.wav", # "611752105030077711/cache/611752105030077711_3096224748076687.wav", # "611752105030104548/cache/611752105030104548_5629499489839033.wav", # "611752105030419624/cache/611752105030419624_8162774327817435.wav", # "611752105030419633/cache/611752105030419633_1125899914308640.wav", # "611752105030419688/cache/611752105030419688_1125899914308640.wav", # "611752105030433779/cache/611752105030433779_3634463651.wav" # ] arr = [ "611752105020256284/cache/611752105020256284_8162774329368194.wav", "611752105020286433/cache/611752105020286433_1125899914308640.wav", "611752105020286443/cache/611752105020286443_12384898975368914.wav", "611752105020286446/cache/611752105020286446_5629499489839033.wav", "611752105020290639/cache/611752105020290639_3634463651.wav", "611752105020290695/cache/611752105020290695_1125899914308640.wav", "611752105020315328/cache/611752105020315328_8162774329368194.wav", "611752105020315368/cache/611752105020315368_1688849864840588.wav", "611752105020336950/cache/611752105020336950_3634463651.wav", "611752105020343687/cache/611752105020343687_8162774327817435.wav" ] s_inst = SongCoverInference() for vocal_file in arr: sstime = time.time() i_file = os.path.join(base_dir, vocal_file) cur_dir = "/".join(i_file.split("/")[:-1]) # e_file = os.path.join(base_dir, vocal_file.replace(".wav", "_dev_441.wav")) # e_file = os.path.join(base_dir, vocal_file.replace(".wav", "_442_dr.wav")) e_file = os.path.join(base_dir, vocal_file.replace(".wav", "_442_dr_v2.wav")) v_file = os.path.join(cur_dir, "vocal.wav") a_file = os.path.join(cur_dir, "acc.wav") cur_id = cur_dir.split("/")[-1] err = s_inst.after_process(cur_id, cur_dir, i_file, e_file, v_file, a_file, True, False) print("err={}, sp={}".format(err, time.time() - sstime)) def get_metop500(): arr = [ "611752105030249067", "611752105030248972", "611752105030249414", "611752105030249374", "611752105030249030", "611752105030249127", "611752105030249091", "611752105030249233", "611752105030249036", "611752105030249281", "611752105030249040", "611752105030249052", "611752105030249394", "611752105030249347", "611752105030249342", "611752105030249282", "611752105030249292", "611752105030249356", "611752105030249302", "611752105030249377", "611752105030248973", "611752105030249393", "611752105030249398", "611752105030250695", "611752105030249213", "611752105030250739", "611752105030249206", "611752105030249074", "611752105030249387", "611752105030250702", "611752105030249365", "611752105030249011", "611752105030249319", "611752105030249016", "611752105030249176", "611752105030250690", "611752105030250691", "611752105030249032", "611752105030249370", "611752105030249410", "611752105030249355", "611752105030250730", "611752105030249022", "611752105030249240", "611752105030249296", "611752105030249070", "611752105030249322", "611752105030249402", "611752105030249386", "611752105030249280", "611752105030249038", "611752105030250743", "611752105030249136", "611752105030249034", "611752105030249403", "611752105030249104", "611752105030249105", "611752105030249359", "611752105030250728", "611752105030249338", "611752105030249216", "611752105030249334", "611752105030249037", "611752105030249264", "611752105030249284", "611752105030249267", "611752105030249010", "611752105030249431", "611752105030249364", "611752105030249243", "611752105030249397", "611752105030249041", "611752105030249118", "611752105030249283", "611752105030249340", "611752105030249250", "611752105030249048", "611752105030249336", "611752105030249371", "611752105030249372", "611752105030249273", "611752105030249366", "611752105030249352", "611752105030249049", "611752105030249278", "611752105030249401", "611752105030249258", "611752105030249160", "611752105030249348", "611752105030249071", "611752105030249175", "611752105030249053", "611752105030249035", "611752105030249375", "611752105030249417", "611752105030249055", "611752105030249275", "611752105030249177", "611752105028480653", "611752105030249385", "611752105030249406", "611752105030249383", "611752105030249295", "611752105030250699", "611752105030249289", "611752105030248965", "611752105030249128", "611752105030249173", "611752105030249019", "611752105030249333", "611752105030249361", "611752105030250733", "611752105030249112", "611752105030249293", "611752105030249391", "611752105030249195", "611752105030249324", "611752105030249388", "611752105030249134", "611752105030249073", "611752105030249174", "611752105030249353", "611752105030249287", "611752105030249113", "611752105030249227" ] all = [ "611752105026649069", "611752105027201163", "611752105027601574", "611752105027602999", "611752105028392007", "611752105028480056", "611752105028480075", "611752105028480653", "611752105029330944", "611752105029790637", "611752105029951597", "611752105029951604", "611752105029951624", "611752105029956352", "611752105030248965", "611752105030248971", "611752105030248972", "611752105030248973", "611752105030248974", "611752105030248975", "611752105030248976", "611752105030248977", "611752105030248978", "611752105030248979", "611752105030248980", "611752105030248981", "611752105030248982", "611752105030248983", "611752105030248985", "611752105030248986", "611752105030248987", "611752105030248988", "611752105030248989", "611752105030248990", "611752105030248991", "611752105030248992", "611752105030248993", "611752105030248994", "611752105030248995", "611752105030248996", "611752105030248997", "611752105030248998", "611752105030248999", "611752105030249000", "611752105030249001", "611752105030249002", "611752105030249003", "611752105030249004", "611752105030249005", "611752105030249006", "611752105030249007", "611752105030249008", "611752105030249009", "611752105030249010", "611752105030249011", "611752105030249012", "611752105030249013", "611752105030249014", "611752105030249015", "611752105030249016", "611752105030249017", "611752105030249018", "611752105030249019", "611752105030249020", "611752105030249021", "611752105030249022", "611752105030249023", "611752105030249024", "611752105030249025", "611752105030249026", "611752105030249027", "611752105030249028", "611752105030249029", "611752105030249030", "611752105030249031", "611752105030249032", "611752105030249033", "611752105030249034", "611752105030249035", "611752105030249036", "611752105030249037", "611752105030249038", "611752105030249039", "611752105030249040", "611752105030249041", "611752105030249042", "611752105030249043", "611752105030249044", "611752105030249045", "611752105030249046", "611752105030249047", "611752105030249048", "611752105030249049", "611752105030249050", "611752105030249051", "611752105030249052", "611752105030249053", "611752105030249054", "611752105030249055", "611752105030249056", "611752105030249057", "611752105030249058", "611752105030249059", "611752105030249060", "611752105030249062", "611752105030249063", "611752105030249064", "611752105030249065", "611752105030249067", "611752105030249068", "611752105030249070", "611752105030249071", "611752105030249072", "611752105030249073", "611752105030249074", "611752105030249075", "611752105030249076", "611752105030249077", "611752105030249078", "611752105030249079", "611752105030249080", "611752105030249081", "611752105030249082", "611752105030249083", "611752105030249084", "611752105030249085", "611752105030249086", "611752105030249087", "611752105030249088", "611752105030249089", "611752105030249090", "611752105030249091", "611752105030249092", "611752105030249093", "611752105030249094", "611752105030249095", "611752105030249096", "611752105030249098", "611752105030249099", "611752105030249100", "611752105030249101", "611752105030249102", "611752105030249103", "611752105030249104", "611752105030249105", "611752105030249106", "611752105030249107", "611752105030249108", "611752105030249109", "611752105030249110", "611752105030249111", "611752105030249112", "611752105030249113", "611752105030249114", "611752105030249115", "611752105030249116", "611752105030249117", "611752105030249118", "611752105030249119", "611752105030249120", "611752105030249121", "611752105030249122", "611752105030249123", "611752105030249124", "611752105030249125", "611752105030249126", "611752105030249127", "611752105030249128", "611752105030249129", "611752105030249130", "611752105030249131", "611752105030249132", "611752105030249133", "611752105030249134", "611752105030249135", "611752105030249136", "611752105030249137", "611752105030249138", "611752105030249139", "611752105030249140", "611752105030249141", "611752105030249142", "611752105030249143", "611752105030249144", "611752105030249145", "611752105030249146", "611752105030249147", "611752105030249148", "611752105030249150", "611752105030249151", "611752105030249152", "611752105030249153", "611752105030249154", "611752105030249155", "611752105030249157", "611752105030249158", "611752105030249159", "611752105030249160", "611752105030249161", "611752105030249162", "611752105030249163", "611752105030249165", "611752105030249166", "611752105030249167", "611752105030249168", "611752105030249170", "611752105030249171", "611752105030249172", "611752105030249173", "611752105030249174", "611752105030249175", "611752105030249176", "611752105030249177", "611752105030249178", "611752105030249179", "611752105030249180", "611752105030249181", "611752105030249182", "611752105030249183", "611752105030249185", "611752105030249186", "611752105030249187", "611752105030249188", "611752105030249189", "611752105030249190", "611752105030249191", "611752105030249192", "611752105030249193", "611752105030249194", "611752105030249195", "611752105030249196", "611752105030249197", "611752105030249198", "611752105030249199", "611752105030249200", "611752105030249201", "611752105030249202", "611752105030249203", "611752105030249204", "611752105030249205", "611752105030249206", "611752105030249207", "611752105030249208", "611752105030249209", "611752105030249210", "611752105030249211", "611752105030249212", "611752105030249213", "611752105030249214", "611752105030249216", "611752105030249217", "611752105030249218", "611752105030249219", "611752105030249220", "611752105030249221", "611752105030249223", "611752105030249224", "611752105030249225", "611752105030249226", "611752105030249227", "611752105030249228", "611752105030249229", "611752105030249230", "611752105030249231", "611752105030249232", "611752105030249233", "611752105030249234", "611752105030249235", "611752105030249236", "611752105030249237", "611752105030249238", "611752105030249239", "611752105030249240", "611752105030249241", "611752105030249242", "611752105030249243", "611752105030249244", "611752105030249245", "611752105030249247", "611752105030249248", "611752105030249249", "611752105030249250", "611752105030249251", "611752105030249252", "611752105030249253", "611752105030249255", "611752105030249256", "611752105030249257", "611752105030249258", "611752105030249259", "611752105030249260", "611752105030249261", "611752105030249262", "611752105030249264", "611752105030249265", "611752105030249266", "611752105030249267", "611752105030249269", "611752105030249270", "611752105030249271", "611752105030249273", "611752105030249274", "611752105030249275", "611752105030249277", "611752105030249278", "611752105030249279", "611752105030249280", "611752105030249281", "611752105030249282", "611752105030249283", "611752105030249284", "611752105030249287", "611752105030249288", "611752105030249289", "611752105030249290", "611752105030249292", "611752105030249293", "611752105030249294", "611752105030249295", "611752105030249296", "611752105030249297", "611752105030249298", "611752105030249299", "611752105030249300", "611752105030249301", "611752105030249302", "611752105030249303", "611752105030249307", "611752105030249308", "611752105030249309", "611752105030249310", "611752105030249313", "611752105030249314", "611752105030249315", "611752105030249316", "611752105030249317", "611752105030249318", "611752105030249319", "611752105030249320", "611752105030249321", "611752105030249322", "611752105030249323", "611752105030249324", "611752105030249325", "611752105030249327", "611752105030249328", "611752105030249329", "611752105030249330", "611752105030249331", "611752105030249332", "611752105030249333", "611752105030249334", "611752105030249336", "611752105030249337", "611752105030249338", "611752105030249339", "611752105030249340", "611752105030249341", "611752105030249342", "611752105030249343", "611752105030249344", "611752105030249345", "611752105030249346", "611752105030249347", "611752105030249348", "611752105030249349", "611752105030249350", "611752105030249351", "611752105030249352", "611752105030249353", "611752105030249354", "611752105030249355", "611752105030249356", "611752105030249357", "611752105030249358", "611752105030249359", "611752105030249360", "611752105030249361", "611752105030249362", "611752105030249363", "611752105030249364", "611752105030249365", "611752105030249366", "611752105030249367", "611752105030249368", "611752105030249369", "611752105030249370", "611752105030249371", "611752105030249372", "611752105030249373", "611752105030249374", "611752105030249375", "611752105030249376", "611752105030249377", "611752105030249378", "611752105030249379", "611752105030249380", "611752105030249381", "611752105030249383", "611752105030249384", "611752105030249385", "611752105030249386", "611752105030249387", "611752105030249388", "611752105030249389", "611752105030249390", "611752105030249391", "611752105030249392", "611752105030249393", "611752105030249394", "611752105030249395", "611752105030249396", "611752105030249397", "611752105030249398", "611752105030249399", "611752105030249401", "611752105030249402", "611752105030249403", "611752105030249404", "611752105030249405", "611752105030249406", "611752105030249407", "611752105030249408", "611752105030249409", "611752105030249410", "611752105030249412", "611752105030249413", "611752105030249414", "611752105030249415", "611752105030249416", "611752105030249417", "611752105030249418", "611752105030249419", "611752105030249420", "611752105030249421", "611752105030249431", "611752105030249624", "611752105030250688", "611752105030250689", "611752105030250690", "611752105030250691", "611752105030250692", "611752105030250693", "611752105030250695", "611752105030250697", "611752105030250698", "611752105030250699", "611752105030250700", "611752105030250701", "611752105030250702", "611752105030250704", "611752105030250707", "611752105030250711", "611752105030250712", "611752105030250713", "611752105030250714", "611752105030250715", "611752105030250716", "611752105030250717", "611752105030250718", "611752105030250719", "611752105030250720", "611752105030250721", "611752105030250723", "611752105030250725", "611752105030250726", "611752105030250728", "611752105030250729", "611752105030250730", "611752105030250731", "611752105030250732", "611752105030250733", "611752105030250735", "611752105030250736", "611752105030250738", "611752105030250739", "611752105030250740", "611752105030250741", "611752105030250742", "611752105030250743" ] new_arr = [] for sid in all: if sid in arr: continue new_arr.append(sid) print("len={}".format(len(new_arr))) return new_arr def get_me_3_w4_zy(): arr = [ "611752105015523266", "611752105016527562", "611752105017233541", "611752105019423720", "611752105030113709", "611752105030414513", "611752105030414549", "611752105030414557", "611752105030414568", "611752105030414576", "611752105030414580", "611752105030414584", "611752105030414588", "611752105030414590", "611752105030414597", "611752105030414600", "611752105030414608", "611752105030414613", "611752105030414615", "611752105030414619", "611752105030414633", "611752105030414638", "611752105030414644", "611752105030414647", "611752105030414655", "611752105030414660", "611752105030414663", "611752105030414669", "611752105030414674", "611752105030414678", "611752105030414680", "611752105030414682", "611752105030414686", "611752105030414689", "611752105030414696", "611752105030414702", "611752105030414706", "611752105030414707", "611752105030414711", "611752105030414717", "611752105030414729", "611752105030414742", "611752105030414752", "611752105030414757", "611752105030414761", "611752105030414763", "611752105030414766", "611752105030414773", "611752105030414776", "611752105030414777", "611752105030414779", "611752105030414784", "611752105030414890", "611752105030414907", "611752105030414915", "611752105030414919", "611752105030414925", "611752105030414929", "611752105030414932", "611752105030414935", "611752105030414937", "611752105030414943", "611752105030414948", "611752105030414949", "611752105030414957", "611752105030414962", "611752105030414963", "611752105030414968", "611752105030414973", "611752105030414976", "611752105030414981", "611752105030414986", "611752105030414988", "611752105030414990", "611752105030414993", "611752105030414995", "611752105030415003", "611752105030415007", "611752105030415009", "611752105030415014", "611752105030415018", "611752105030415032", "611752105030415044", "611752105030415050", "611752105030415052", "611752105030415056", "611752105030415058", "611752105030415062", "611752105030415067", "611752105030415071", "611752105030415074", "611752105030415078", "611752105030415083", "611752105030415087", "611752105030415094", "611752105030415100", "611752105030415103", "611752105030425986", "611752105030426004" ] return arr def generate_arr(): # arr = [ # "611752105020256284", # "611752105020282612", # "611752105020282613", # "611752105020286433", # "611752105020286443", # "611752105020286446", # "611752105020286501", # "611752105020290639", # "611752105020290695", # "611752105020315328", # "611752105020315368", # "611752105020325137", # "611752105020336946", # "611752105020336950", # "611752105020343687", # "611752105020343699", # "611752105020350988", # "611752105020350990", # "611752105020351134", # "611752105020357112", # "611752105020376320", # "611752105020378620", # "611752105020382559", # "611752105020387015", # "611752105020390950", # "611752105020394121", # "611752105020394297", # "611752105020411654", # "611752105020417488", # "611752105020417688", # "611752105020548211", # "611752105020563523", # "611752105021273980", # "611752105021285282", # "611752105021330812", # "611752105021332759", # "611752105021375100", # "611752105021442406", # "611752105021442417", # "611752105021453011", # "611752105022345104", # "611752105022389596", # "611752105022446809", # "611752105022647082", # "611752105022667231", # "611752105022735101", # "611752105022736204", # "611752105022745595", # "611752105022770952", # "611752105022842004", # "611752105022842477", # "611752105023434557", # "611752105023532439", # "611752105023623965", # "611752105023811083", # "611752105024250202", # "611752105024429936", # "611752105024628047", # "611752105024676794", # "611752105024678976", # "611752105024679221", # "611752105024714646", # "611752105024786030", # "611752105024953316", # "611752105025104181", # "611752105025231610", # "611752105025510149", # "611752105026189342", # "611752105026523547", # "611752105026707760", # "611752105026771723", # "611752105026946178", # "611752105027047993", # "611752105027188746", # "611752105027189453", # "611752105027302268", # "611752105027557408", # "611752105027588072", # "611752105028650636", # "611752105028683824", # "611752105029689090", # "611752105029954089", # "611752105029954168", # "611752105029955214", # "611752105029990849", # "611752105029993297", # "611752105030047424", # "611752105030077711", # "611752105030104548", # "611752105030419624", # "611752105030419633", # "611752105030419688", # "611752105030433779" # ] # arr = get_metop500() arr = get_me_3_w4_zy() s_inst = SongCoverInference() for sid in arr: sstime = time.time() dir = os.path.join("/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_3_w4_zy", sid) # dir = os.path.join("/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_top500", sid) err = s_inst.process_one(sid, dir, True) print("sid={}, err={}, sp={}".format(sid, err, time.time() - sstime)) def test_rate(): arr = [ "611752105020256284", "611752105020286433", "611752105020286443", "611752105020286446", "611752105020290639", "611752105020290695", "611752105020315328", "611752105020315368", "611752105020336950", "611752105020343687", "611752105020343699", "611752105020351134", "611752105020357112", "611752105020378620", "611752105020387015", "611752105020394121", "611752105020394297", "611752105020411654", "611752105020417688", "611752105020548211", "611752105020563523", "611752105021285282", "611752105021332759", "611752105022446809", "611752105022647082", "611752105022667231", "611752105022735101", "611752105022736204", "611752105022745595", "611752105022770952", "611752105022842004", "611752105022842477", "611752105023434557", "611752105023532439", "611752105023623965", "611752105024250202", "611752105024628047", "611752105024676794", "611752105024678976", "611752105024679221", "611752105024953316", "611752105025104181", "611752105026189342", "611752105026523547", "611752105026707760", "611752105026771723", "611752105026946178", "611752105027047993", "611752105027188746", "611752105027189453", "611752105027302268", "611752105027557408", "611752105028650636", "611752105028683824", "611752105029990849", "611752105029993297", "611752105030077711", "611752105030104548", "611752105030419624", "611752105030419633", "611752105030419688", "611752105030433779" ] s_inst = SongCoverInference() for sid in arr: vocal_path = "data/inf_users/me_3_w4/{}/cache/vocal.wav".format(sid) tm = s_inst.get_start_ms(vocal_path) print("res,{},{}".format(vocal_path, tm)) def test(): arr = [ # "611752105020343687", # "611752105023532439", "611752105030419688", ] base_dir = "/data/rsync/jianli.yang/AutoCoverTool/data/test" s_inst = SongCoverInference() for cid in arr: st = time.time() err = s_inst.process_one(cid, os.path.join(base_dir, cid), False) print("cid={} RealFinish err={} sp={}".format(cid, err, time.time() - st)) if __name__ == '__main__': - # test() + test() # test_rate() - test_volume_dir() + # test_volume_dir() # generate_arr() # test_volume_dir() # s_inst = SongCoverInference() # sstime = time.time() # err = s_inst.process_one("611752105030249038", # "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_top500/611752105030249038", False) # # i_file = "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249121/611752105030249121_5629499489839033.wav" # # e_file = "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249121/611752105030249121_5629499489839033.wav" # # v_file = "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_top500/611752105030249121/vocal.wav" # # a_file = "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_top500/611752105030249121/acc.wav" # # w_dir = "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_top500/611752105030249121" # # # # err = s_inst.after_process("611752105030248965", w_dir, i_file, e_file, v_file, a_file) # print("err={}, sp={}".format(err, time.time() - sstime)) diff --git a/AutoCoverTool/script/test_simple_mix.py b/AutoCoverTool/script/test_simple_mix.py new file mode 100644 index 0000000..b4309df --- /dev/null +++ b/AutoCoverTool/script/test_simple_mix.py @@ -0,0 +1,67 @@ +""" +将人声和伴奏合并起来 +1. 人声重采样到44k +2. +""" +import os +import time + +gs_draw_volume = "/Users/yangjianli/linux/opt/soft/bin/draw_volume" +gs_simple_mixer = "/Users/yangjianli/linux/opt/soft/bin/simple_mixer" +gs_ffmpeg = "/usr/local/bin/ffmpeg" + + +def process(dir): + """ + 文件夹下要求: + 1. effect.wav + 2. vocal.wav + 3. acc.wav + 中间结果: effect_442.wav, effect_442_dv.wav + 最终输出: mix.wav + :param dir: + :return: + """ + st = time.time() + effect_wav = os.path.join(dir, "effect.wav") + effect442_wav = os.path.join(dir, "effect_442.wav") + effect442dv_wav = os.path.join(dir, "effect_442_dv.wav") + mix_wav = os.path.join(dir, "mix.wav") + vocal_wav = os.path.join(dir, "vocal.wav") + acc_wav = os.path.join(dir, "acc.wav") + if not os.path.exists(effect_wav): + print("no {}".format(effect_wav)) + return -1 + if not os.path.exists(vocal_wav): + print("no {}".format(vocal_wav)) + return -1 + if not os.path.exists(acc_wav): + print("no {}".format(acc_wav)) + return -1 + # 转码到44k双声道 + cmd = "{} -i {} -ar 44100 -ac 2 -y {}".format(gs_ffmpeg, effect_wav, effect442_wav) + os.system(cmd) + if not os.path.exists(effect442_wav): + print("err! {}".format(cmd)) + return -2 + + # 拉伸 + cmd = "{} {} {} {}".format(gs_draw_volume, effect442_wav, vocal_wav, effect442dv_wav) + os.system(cmd) + if not os.path.exists(effect442dv_wav): + print("err! {}".format(cmd)) + return -1 + + # 合并 + cmd = "{} {} {} {}".format(gs_simple_mixer, effect442dv_wav, acc_wav, mix_wav) + os.system(cmd) + if not os.path.exists(mix_wav): + print("err! {}".format(cmd)) + return -1 + print("{} success! sp={}".format(dir, time.time() - st)) + return 0 + + +if __name__ == '__main__': + process( + "/Users/yangjianli/starmaker-work/research/tmp_code/SVC方案调研/prod/out_0327/test/me_3_w4_10_compare_v3_src/611752105020336950")