diff --git a/AutoCoverTool/online/common.py b/AutoCoverTool/online/common.py new file mode 100644 index 0000000..982a1f0 --- /dev/null +++ b/AutoCoverTool/online/common.py @@ -0,0 +1,163 @@ +# -*-encoding=utf8-*- +import time +import pymysql + +banned_user_map = { + "host": "sg-songbook00.db.starmaker.co", + "user": "worker", + "passwd": "gRYppQtdTpP3nFzH", + "db": "starmaker" +} + +gs_songbook_test_banned_user_map = { + "host": "sg-test-server-goapi-1", + "user": "root", + "passwd": "solo2018", + "db": "av_db" +} + +banned_user_map_v1 = { + "host": "sg-starmaker-device-r2.db.starmaker.co", + "user": "worker", + "passwd": "gRYppQtdTpP3nFzH", + "db": "mis" +} + +banned_user_map_v2 = { + "host": "sg-sm-img-r1.starmaker.co", + "user": "worker", + "passwd": "gRYppQtdTpP3nFzH", + "db": "sm" +} + +# 做一下shared库的查询依赖 +shard_map = { + "shard_sm_12": "sg-shard02-r2.db.starmaker.co", + "shard_sm_13": "sg-shard02-r2.db.starmaker.co", + "shard_sm_14": "sg-shard02-r2.db.starmaker.co", + "shard_sm_15": "sg-shard02-r2.db.starmaker.co", + "shard_sm_30": "sg-shard02-r2.db.starmaker.co", + "shard_sm_31": "sg-shard02-r2.db.starmaker.co", + "shard_sm_20": "sg-shard02-r2.db.starmaker.co", + "shard_sm_21": "sg-shard02-r2.db.starmaker.co", + "shard_sm_22": "sg-shard03-r2.db.starmaker.co", + "shard_sm_23": "sg-shard03-r2.db.starmaker.co", + "shard_sm_24": "sg-shard03-r2.db.starmaker.co", + "shard_sm_25": "sg-shard03-r2.db.starmaker.co", + "shard_sm_26": "sg-shard03-r2.db.starmaker.co", + "shard_sm_27": "sg-shard03-r2.db.starmaker.co", + "shard_sm_28": "sg-shard03-r2.db.starmaker.co", + "shard_sm_29": "sg-shard03-r2.db.starmaker.co", + "shard_sm_0": "sg-shard00-r2.db.starmaker.co", + "shard_sm_1": "sg-shard00-r2.db.starmaker.co", + "shard_sm_2": "sg-shard00-r2.db.starmaker.co", + "shard_sm_3": "sg-shard00-r2.db.starmaker.co", + "shard_sm_4": "sg-shard00-r2.db.starmaker.co", + "shard_sm_5": "sg-shard00-r2.db.starmaker.co", + "shard_sm_16": "sg-shard00-r2.db.starmaker.co", + "shard_sm_17": "sg-shard00-r2.db.starmaker.co", + "shard_sm_6": "sg-shard01-r2.db.starmaker.co", + "shard_sm_7": "sg-shard01-r2.db.starmaker.co", + "shard_sm_8": "sg-shard01-r2.db.starmaker.co", + "shard_sm_9": "sg-shard01-r2.db.starmaker.co", + "shard_sm_10": "sg-shard01-r2.db.starmaker.co", + "shard_sm_11": "sg-shard01-r2.db.starmaker.co", + "shard_sm_18": "sg-shard01-r2.db.starmaker.co", + "shard_sm_19": "sg-shard01-r2.db.starmaker.co", + "shard_sm_32": "sg-shard04-r2.db.starmaker.co", + "shard_sm_33": "sg-shard04-r2.db.starmaker.co", + "shard_sm_34": "sg-shard04-r2.db.starmaker.co", + "shard_sm_35": "sg-shard04-r2.db.starmaker.co", + "shard_sm_36": "sg-shard04-r2.db.starmaker.co", + "shard_sm_37": "sg-shard04-r2.db.starmaker.co", + "shard_sm_38": "sg-shard04-r2.db.starmaker.co", + "shard_sm_39": "sg-shard04-r2.db.starmaker.co", + "shard_sm_40": "sg-shard05-r2.db.starmaker.co", + "shard_sm_41": "sg-shard05-r2.db.starmaker.co", + "shard_sm_42": "sg-shard05-r2.db.starmaker.co", + "shard_sm_43": "sg-shard05-r2.db.starmaker.co", + "shard_sm_44": "sg-shard05-r2.db.starmaker.co", + "shard_sm_45": "sg-shard05-r2.db.starmaker.co", + "shard_sm_46": "sg-shard05-r2.db.starmaker.co", + "shard_sm_47": "sg-shard05-r2.db.starmaker.co", + "shard_sm_48": "sg-shard05-r2.db.starmaker.co", + "shard_sm_49": "sg-shard05-r2.db.starmaker.co", + "shard_sm_50": "sg-shard05-r2.db.starmaker.co", + "name": "shard_sm_{}", + "port": 3306, + "user": "readonly", + "passwd": "JKw6woZgRXsveegL" +} + + +def connect_db(host="research-db-r1.starmaker.co", port=3306, user="root", passwd="Qrdl1130", db=""): + print("connect mysql host={} port={} user={} passwd={} db={}".format(host, port, user, passwd, db)) + return pymysql.connect(host=host, port=port, user=user, passwd=passwd, db=db) + + +def get_data_by_mysql(sql, ban=banned_user_map): + db = connect_db(host=ban["host"], passwd=ban["passwd"], user=ban["user"], + db=ban["db"]) + db_cursor = db.cursor() + if len(sql) < 100: + print("execute = {}".format(sql)) + else: + print("execute = {}...".format(sql[:100])) + + db_cursor.execute(sql) + res = db_cursor.fetchall() + db_cursor.close() + db.close() + print("res size={}".format(len(res))) + return res + + +def get_shard_db(user_id): + return int(float(user_id)) >> 48 + + +def get_shard_data_by_sql(sql, user_id): + shard_id = get_shard_db(user_id) + db_name = shard_map["name"].format(shard_id) + host = shard_map[db_name] + db = connect_db(host=host, passwd=shard_map["passwd"], user=shard_map["user"], db=db_name) + db_cursor = db.cursor() + if len(sql) < 100: + print("execute = {}".format(sql)) + else: + print("execute = {}...".format(sql[:100])) + + db_cursor.execute(sql) + res = db_cursor.fetchall() + db_cursor.close() + db.close() + print("res size={}".format(len(res))) + return res + + +def read_file(in_file): + with open(in_file, "r") as f: + lines = f.readlines() + return lines + + +def write2file(file_path, data): + with open(file_path, "w") as f: + for line in data: + line += "\n" + f.write(line) + + +def update_db(sql, ban=banned_user_map): + db = connect_db(host=ban["host"], passwd=ban["passwd"], user=ban["user"], + db=ban["db"]) + db_cursor = db.cursor() + if len(sql) < 100: + print("execute = {}".format(sql)) + else: + print("execute = {}...".format(sql[:100])) + + db_cursor.execute(sql) + db.commit() + db_cursor.close() + db.close() diff --git a/AutoCoverTool/online/inference_one.py b/AutoCoverTool/online/inference_one.py new file mode 100644 index 0000000..36e71d5 --- /dev/null +++ b/AutoCoverTool/online/inference_one.py @@ -0,0 +1,677 @@ +""" +单个处理的逻辑 +song_id: + ---src.mp3 // 源数据,需要提前放进去 + ---cache + ---vocal.wav // 分离之后产生 + ---acc.wav // 分离之后产生 + ---vocal_32.wav // 分离之后产生 + ---song_id_sp1.wav // 合成之后产生 + ---song_id_sp2.wav // 合成之后产生 + ---song_id_sp2_d.wav // 降噪之后生成 + ---song_id_sp2_dv.wav // 降噪+拉伸之后产生 [占比太高的不产生] + ---song_id_sp2_dve442.wav // 手动调整之后产生 + ---song_id_sp2_dve442_replace.wav // 替换之后产生 + ---song_id_sp2_dve442_replace_mix.wav // 人声+伴奏混合之后产生 + ---song_id + --acc.mp3 // 44k双声道320k + --vocal.mp3 // 44k双声道320k + --src.mp3 // 44k双声道320k + --song_id_sp2_dv.mp3 // 44k单声道320k + ---song_id_out // 对外输出 + --src.mp3 // 原始音频 + --song_id_sp2_dv_replace_mix.mp3 // 制作完成的音频 + +环境安装: +conda create -n auto_song_cover python=3.9 +# 安装demucs环境[进入到ref.music_remover 执行pip install -r requirements.txt] +# 安装so_vits_svc环境[进入到ref.so_vits_svc 执行pip install -r requirements.txt] +pip install librosa +pip install scikit-maad +pip install praat-parselmouth +pip install matplotlib +pip install torchvision +pip install madmom +pip install torchstat +环境设置: +export PATH=$PATH:/data/gpu_env_common/env/bin/ffmpeg/bin +export PYTHONPATH=$PWD:$PWD/ref/music_remover/demucs:$PWD/ref/so_vits_svc:$PWD/ref/split_dirty_frame +""" + +import os +import time +import shutil +import random +import logging +import librosa + +logging.basicConfig(filename='/tmp/inference.log', level=logging.INFO) + +gs_err_code_success = 0 +gs_err_code_no_src_mp3 = 1 +gs_err_code_separate = 2 +gs_err_code_trans_32 = 3 +gs_err_code_encode_err = 4 +gs_err_code_replace_err = 5 +gs_err_code_replace_trans_err = 6 +gs_err_code_mix_err = 7 +gs_err_code_mix_transcode_err = 8 +gs_err_code_no_src_dir = 9 +gs_err_code_volume_err = 10 +gs_err_code_trans2_442 = 11 +gs_err_code_reverb = 12 +gs_err_code_no_good_choice = 13 +gs_err_code_preprocess_vocal = 14 + +gs_denoise_exe = "/opt/soft/bin/denoise_exe" +gs_draw_volume_exe = "/opt/soft/bin/draw_volume" +gs_simple_mixer_path = "/opt/soft/bin/simple_mixer" +gs_rever_path = "/opt/soft/bin/dereverbrate" + +from ref.music_remover.separate_interface import SeparateInterface +from ref.so_vits_svc.inference_main import * +from ref.split_dirty_frame.script.process_one import ReplaceVocalFrame, construct_power_fragment + + +class SongCoverInference: + def __init__(self): + self.work_dir = None + self.cache_dir = None + self.cid = None + self.src_mp3 = None + self.vocal_path = None + self.vocal_32_path = None + self.acc_path = None + self.speakers = [ + 10414574138721494, + 10414574140317353, + 1688849864840588, + 3634463651, + 5629499489839033, + 5910973794723621, + 6755399374234747, + 8162774327817435, + 8162774329368194, + 1125899914308640, # 以下为男声,包括这个 + 12384898975368914, + 12947848931397021, + 3096224748076687, + 3096224751151928, + 5066549357604730, + 5348024335101054, + 6755399442719465, + 7036874421386111 + ] + + self.speakers2gender = { + 10414574138721494: 1, + 10414574140317353: 1, + 1688849864840588: 1, + 3634463651: 1, + 5629499489839033: 1, + 5910973794723621: 1, + 6755399374234747: 1, + 8162774327817435: 1, + 8162774329368194: 1, + 1125899914308640: 0, # 0是男 + 12384898975368914: 0, + 12947848931397021: 0, + 3096224748076687: 0, + 3096224751151928: 0, + 5066549357604730: 0, + 5348024335101054: 0, + 6755399442719465: 0, + 7036874421386111: 0 + } + self.speakers_model_path = "data/train_users/{}/logs/32k/G_2000.pth" + self.speakers_model_config = "data/train_users/{}/config/config.json" + + st = time.time() + self.separate_inst = None + logging.info("post process ... ReplaceVocalFrame init sp={}".format(time.time() - st)) + self.replace_vocal_frame_inst = None + logging.info("SongCoverInference init sp={}".format(time.time() - st)) + + def separate(self, cid, src_mp3, vocal_path, acc_path): + """ + 人声伴奏分离 + :param cid: + :param src_mp3: + :param vocal_path: + :param acc_path: + :return: + """ + st = time.time() + if self.separate_inst is None: + self.separate_inst = SeparateInterface() + if not self.separate_inst.process(cid, src_mp3, vocal_path, acc_path): + return gs_err_code_separate + if not os.path.exists(vocal_path) or not os.path.exists(acc_path): + return gs_err_code_separate + + # 转码出一个32k单声道的数据 + cmd = "ffmpeg -i {} -ar 32000 -ac 1 -y {} -loglevel fatal".format(vocal_path, self.vocal_32_path) + os.system(cmd) + if not os.path.exists(self.vocal_32_path): + return gs_err_code_trans_32 + print("separate:cid={}|sp={}".format(cid, time.time() - st)) + return gs_err_code_success + + def get_start_ms(self, vocal_path): + """ + 给定原始音频,找一段连续10s的音频 + :param vocal_path: + :return: + """ + audio, sr = librosa.load(vocal_path, sr=16000) + audio = librosa.util.normalize(audio) + # 帧长100ms,帧移10ms,计算能量 + power_arr = [] + for i in range(0, len(audio) - 1600, 160): + power_arr.append(np.sum(np.abs(audio[i:i + 160])) / 160) + # 将能量小于等于10的部分做成段 + power_arr = construct_power_fragment(power_arr) + fragments = [] + last_pos = 0 + for idx, line in enumerate(power_arr): + start = round(float(line[0]) * 0.01, 3) + duration = round(float(line[1]) * 0.01, 3) + fragments.append([last_pos, start - last_pos]) + last_pos = start + duration + if last_pos < len(audio) / sr: + fragments.append([last_pos, len(audio) / sr - last_pos]) + + # 合并数据,两者间隔在50ms以内的合并起来 + idx = 0 + while idx < len(fragments) - 1: + if fragments[idx + 1][0] - (fragments[idx][0] + fragments[idx][1]) < 0.05: + fragments[idx][1] = fragments[idx + 1][0] + fragments[idx + 1][1] - fragments[idx][0] + del fragments[idx + 1] + idx -= 1 + idx += 1 + + # out_file = vocal_path + "_power.csv" + # with open(out_file, "w") as f: + # f.write("Name\tStart\tDuration\tTime Format\tType\n") + # for fragment in fragments: + # start = round(float(fragment[0]), 3) + # duration = round(float(fragment[1]), 3) + # strr = "{}\t{}\t{}\t{}\n".format("11", start, duration, "decimal\tCue\t") + # f.write(strr) + + # 筛选出开始的位置 + # 1. 连续时长大于10s,当前段长度大于3s + # 2. 不可用 + # 从0到fragments[idx], 包含idx其中人声段的总和 + tot_vocal_duration = [fragments[0][1]] + for i in range(1, len(fragments)): + tot_vocal_duration.append(tot_vocal_duration[i - 1] + fragments[i][1]) + + # 计算出任意两段之间非人声占比 + for i in range(0, len(fragments)): + if fragments[i][1] >= 3: + now_tot = 0 + if i > 0: + now_tot = tot_vocal_duration[i - 1] + for j in range(i + 1, len(fragments)): + cur_rate = tot_vocal_duration[j] - now_tot + cur_rate = cur_rate / (fragments[j][1] + fragments[j][0] - fragments[i][0]) + if cur_rate > 0.1: + return fragments[i][0] + return -1 + + def inference_speaker(self): + """ + 推理生成合成后的音频 + 随机取5个干声,选择占比最小的,并且要求占比小于0.3 + :return: + """ + st = time.time() + out_speakers = random.sample(self.speakers, 5) + out_songs_dict = {} + for speaker in out_speakers: + model_path = self.speakers_model_path.format(speaker) + config_path = self.speakers_model_config.format(speaker) + song_path = os.path.join(self.cache_dir, "{}_{}.wav".format(self.cid, speaker)) + try: + inf(model_path, config_path, self.vocal_32_path, song_path, "prod") + except Exception as ex: + logging.info("cid={}, inference_speaker err={}".format(self.cid, ex)) + continue + if os.path.exists(song_path): + if self.replace_vocal_frame_inst is None: + self.replace_vocal_frame_inst = ReplaceVocalFrame( + "data/models/split_dirty_frame_v5_3_epoch3_852.pth") + rate = self.replace_vocal_frame_inst.get_rate(song_path) + if rate < 0.3: + out_songs_dict[song_path] = rate + + # 从内部选择占比最低的 + out_songs = [] + if len(out_songs_dict.keys()) > 0: + st_sec = self.get_start_ms(self.vocal_path) + song_msg = sorted(out_songs_dict.items(), key=lambda kv: kv[1])[0] + out_songs = [song_msg[0]] + logging.info("GetRate:cid={},song={},rate={},st_tm={}".format(self.cid, song_msg[0], round(song_msg[1], 2), + round(st_sec, 3))) + print("GetRate:cid={},song={},rate={},st_tm={}".format(self.cid, song_msg[0], round(song_msg[1], 2), + round(st_sec, 3))) + # logging.info("inference_speaker len = {} finish sp = {}".format(len(out_songs), time.time() - st)) + print("inference_speaker len = {} finish sp = {}".format(len(out_songs), time.time() - st)) + return out_songs + + def get_new_vocal_rate(self, songs): + """ + 获取人声的比率 + :param songs: + :return: + """ + st = time.time() + need_to_process_song = [] + for song in songs: + if self.replace_vocal_frame_inst is None: + self.replace_vocal_frame_inst = ReplaceVocalFrame("data/models/split_dirty_frame_v5_3_epoch3_852.pth") + rate = self.replace_vocal_frame_inst.get_rate(song) + logging.info("{} {} replace_rate={}".format(self.cid, song, rate)) + if rate < 1.0: + need_to_process_song.append(song) + logging.info( + "get_new_vocal_rate belen = {} len = {} finish sp = {}".format(len(songs), len(need_to_process_song), + time.time() - st)) + return need_to_process_song + + def preprocess_vocal(self, songs, vocal_path): + """ + 1. 降噪 + 2. 拉伸 + :param songs: + :param vocal_path: 参考的音频信号 + :return: + """ + st = time.time() + dv_out_list = [] + for song in songs: + denoise_path = str(song).replace(".wav", "_d.wav") + cmd = "{} {} {}".format(gs_denoise_exe, song, denoise_path) + os.system(cmd) + if not os.path.exists(denoise_path): + print("{} {} ERROR denoise".format(self.cid, song)) + continue + # 拉伸 + volume_path = str(song).replace(".wav", "_dv.wav") + cmd = "{} {} {} {}".format(gs_draw_volume_exe, denoise_path, vocal_path, volume_path) + os.system(cmd) + if not os.path.exists(volume_path): + print("{} {} ERROR denoise".format(self.cid, volume_path)) + continue + dv_out_list.append(volume_path) + print( + "preprocess_vocal belen = {} len = {} finish sp = {}".format(len(songs), len(dv_out_list), + time.time() - st)) + return dv_out_list + + def output(self, dv_out_list): + """ + 对外输出数据 + :param dv_out_list: + :return: + """ + st = time.time() + out_dir = os.path.join(self.work_dir, self.cid) + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + os.makedirs(out_dir) + + # 拷贝数据 + dst_mp3_path = os.path.join(out_dir, "src_mp3") + dst_acc_path = os.path.join(out_dir, "acc.mp3") + dst_vocal_path = os.path.join(out_dir, "vocal.mp3") + shutil.copyfile(self.src_mp3, dst_mp3_path) + cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(self.acc_path, dst_acc_path) + os.system(cmd) + if not os.path.exists(dst_acc_path): + return gs_err_code_encode_err + cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(self.vocal_path, dst_vocal_path) + os.system(cmd) + if not os.path.exists(dst_vocal_path): + return gs_err_code_encode_err + + # 将所有数据放到out_dir中,用于给人工标注 + for dv_wav in dv_out_list: + dv_wav_name = str(dv_wav).split("/")[-1].replace(".wav", "_441.mp3") + dst_dv_path = os.path.join(out_dir, dv_wav_name) + + cmd = "ffmpeg -i {} -ar 44100 -ac 1 -ab 320k -y {} -loglevel fatal".format(dv_wav, dst_dv_path) + os.system(cmd) + if not os.path.exists(dst_dv_path): + print("{} encode err!".format(cmd)) + continue + logging.info( + "preprocess_vocal output sp = {}".format(time.time() - st)) + + def process_one(self, cid, work_dir, enable_output=False): + logging.info("\nstart:cid={},work_dir={}----------------------->>>>>>>>".format(cid, work_dir)) + self.cid = cid + self.work_dir = work_dir + + # 所有不对外交付的,全部放到这里 + self.cache_dir = os.path.join(work_dir, "cache") + if os.path.exists(self.cache_dir): + shutil.rmtree(self.cache_dir) + os.makedirs(self.cache_dir) + + self.src_mp3 = os.path.join(self.work_dir, "src.mp3") + if not os.path.exists(self.src_mp3): + return gs_err_code_no_src_mp3 + self.vocal_path = os.path.join(self.cache_dir, "vocal.wav") + self.vocal_32_path = os.path.join(self.cache_dir, "vocal_32.wav") + self.acc_path = os.path.join(self.cache_dir, "acc.wav") + + if not os.path.exists(self.vocal_32_path): + logging.info("start separate ... {} {} {}".format(self.src_mp3, self.vocal_path, self.acc_path)) + err = self.separate(cid, self.src_mp3, self.vocal_path, self.acc_path) + if err != gs_err_code_success: + return err, None, None + logging.info("start inference_speaker ...") + out_songs = self.inference_speaker() + dv_out_list = self.preprocess_vocal(out_songs, self.vocal_path) + if len(dv_out_list) == 0: + return gs_err_code_no_good_choice, None, None + + mix_mp3_path = None + gender = -1 + if enable_output: + self.output(dv_out_list) + else: + # 默认全部处理一遍 + for dv_out_path in dv_out_list: + src_path = dv_out_path.replace("_dv.wav", ".wav") + err, mix_mp3_path = self.after_process(self.cid, self.work_dir, src_path, dv_out_path, self.vocal_path, + self.acc_path, + True, False) + if err != gs_err_code_success: + logging.info("after_process err {}".format(err)) + + # 取出性别属性 + if err == gs_err_code_success and mix_mp3_path is not None: + gender = self.speakers2gender[int(str(os.path.basename(mix_mp3_path)).split("_")[1])] + logging.info("finish:cid={},work_dir={}----------------------->>>>>>>>".format(cid, work_dir)) + return gs_err_code_success, mix_mp3_path, gender + + def reverb_by_vocal(self, file): + st = time.time() + file_442 = file.replace(".wav", "_442.wav") + if not os.path.exists(file_442): + cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {}".format(file, file_442) + os.system(cmd) + if not os.path.exists(file_442): + return gs_err_code_trans2_442, None + + file_dst = file.replace(".wav", "_442_dr.wav") + cmd = "{} {} {} {}".format(gs_rever_path, self.vocal_path, file_442, file_dst) + os.system(cmd) + if not os.path.exists(file_dst): + return gs_err_code_reverb, None + print("cid = {}, reverb_by_vocal sp={}".format(self.cid, time.time() - st)) + return gs_err_code_success, file_dst + + def after_process(self, cid, work_dir, in_file, effect_file, vocal_file, acc_file, need_draw=True, + need_reverb=True): + """ + 后处理逻辑 + 将处理好的音频进行替换,然后和伴奏进行混合,最后进行编码 + :return: + """ + if need_reverb: + # 抓取混响 + err, effect_file = self.reverb_by_vocal(in_file) + if err != gs_err_code_success: + return err, None + + if need_draw: + # 增加一个拉伸的步骤 + volume_path = str(effect_file).replace(".wav", "_dv.wav") + cmd = "{} {} {} {}".format(gs_draw_volume_exe, effect_file, vocal_file, volume_path) + print(cmd) + os.system(cmd) + if not os.path.exists(volume_path): + print("{} {} ERROR draw volume".format(self.cid, volume_path)) + return gs_err_code_volume_err, None + effect_file = volume_path + + st = time.time() + self.cid = cid + self.work_dir = work_dir + self.src_mp3 = os.path.join(self.work_dir, "src.mp3") + if not os.path.exists(self.work_dir): + return gs_err_code_no_src_dir + self.replace_vocal_frame_inst.process(in_file, effect_file, vocal_file) + dst_path = effect_file + "_replace.wav" + if not os.path.exists(dst_path): + return gs_err_code_replace_err, None + print("replace_vocal_frame_inst sp = {}".format(time.time() - st)) + + # 转码 + dst_path_442 = dst_path.replace("_replace.wav", "_replace442.wav") + cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(dst_path, dst_path_442) + os.system(cmd) + if not os.path.exists(dst_path_442): + return gs_err_code_replace_trans_err, None + + # 合并转码后再做一次拉伸,保证响度 + volume_path = str(dst_path_442).replace(".wav", "_dv.wav") + cmd = "{} {} {} {}".format(gs_draw_volume_exe, dst_path_442, vocal_file, volume_path) + print(cmd) + os.system(cmd) + if not os.path.exists(volume_path): + print("{} {} ERROR draw volume".format(self.cid, volume_path)) + return gs_err_code_volume_err, None + dst_path_442 = volume_path + + # 混合 + mix_path = dst_path_442.replace("_replace442.wav", "_replace442_mix.wav") + cmd = "{} {} {} {}".format(gs_simple_mixer_path, dst_path_442, acc_file, mix_path) + print("{}".format(cmd)) + os.system(cmd) + if not os.path.exists(mix_path): + return gs_err_code_mix_err, None + + # 编码为mp3 + output_dir = os.path.join(self.work_dir, self.cid + "_out") + if not os.path.exists(output_dir): + os.makedirs(output_dir) + name = str(mix_path).replace("_replace442_mix.wav", "_replace442_mix.mp3").split("/")[-1] + mix_path_mp3 = os.path.join(output_dir, name) + cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(mix_path, mix_path_mp3) + os.system(cmd) + if not os.path.exists(mix_path_mp3): + return gs_err_code_mix_transcode_err, None + + # 拷贝src到output_dir + # shutil.copyfile(self.src_mp3, os.path.join(output_dir, "src.mp3")) + # logging.info("after_process sp = {}".format(time.time() - st)) + return gs_err_code_success, mix_path_mp3 + + ####################################新对外接口############################################################ + def prepare_env(self, cid, work_dir, create_dir=False): + self.cid = cid + self.work_dir = work_dir + + # 所有不对外交付的,全部放到这里 + self.cache_dir = os.path.join(work_dir, "cache") + if create_dir: + if os.path.exists(self.cache_dir): + shutil.rmtree(self.cache_dir) + os.makedirs(self.cache_dir) + + self.src_mp3 = os.path.join(self.work_dir, "src.mp3") + if not os.path.exists(self.src_mp3): + return gs_err_code_no_src_mp3 + self.vocal_path = os.path.join(self.cache_dir, "vocal.wav") + self.vocal_32_path = os.path.join(self.cache_dir, "vocal_32.wav") + self.acc_path = os.path.join(self.cache_dir, "acc.wav") + return gs_err_code_success + + def generate_svc_file(self, cid, work_dir): + """ + :param cid: + :param work_dir: + :return:err_code, 生成出的svc的文件名称 + """ + err = self.prepare_env(cid, work_dir, create_dir=True) + if err != gs_err_code_success: + return err, None + + # 音源分离 + if not os.path.exists(self.vocal_32_path): + st = time.time() + err = self.separate(cid, self.src_mp3, self.vocal_path, self.acc_path) + logging.info("cid={},separate,sp={}".format(self.cid, time.time() - st)) + if err != gs_err_code_success: + return err, None + + # 生成svc,只保留一个最佳的 + st = time.time() + out_songs = self.inference_speaker() + if len(out_songs) == 0: + return gs_err_code_no_good_choice, None, None + logging.info("cid={},inference_speaker,{},sp={}".format(self.cid, out_songs[0], time.time() - st)) + + # 预处理人声 + dv_out_list = self.preprocess_vocal(out_songs, self.vocal_path) + if len(dv_out_list) == 0: + return gs_err_code_preprocess_vocal, None + return gs_err_code_success, dv_out_list[0] + + def effect(self, cid, work_dir, svc_file): + st = time.time() + err = self.prepare_env(cid, work_dir) + if err != gs_err_code_success: + return err, None + logging.info("cid={},effect_and_mix,{},sp={}".format(self.cid, svc_file, time.time() - st)) + # 做音效 + st = time.time() + err, effect_file = self.reverb_by_vocal(svc_file) + if err != gs_err_code_success: + return err, None + logging.info("cid={},reverb_by_vocal,{},sp={}".format(self.cid, svc_file, time.time() - st)) + return err, effect_file + + def mix(self, cid, work_dir, svc_file, effect_file): + """ + 做音效以及合并 + :param cid: + :param work_dir: + :param svc_file: + :param effect_file: + :return: err_code, 完成的mp3文件 + """ + st = time.time() + err = self.prepare_env(cid, work_dir) + if err != gs_err_code_success: + return err, None + logging.info("cid={},effect_and_mix,{},sp={}".format(self.cid, svc_file, time.time() - st)) + + # 拉伸 + st = time.time() + volume_path = str(effect_file).replace(".wav", "_dv.wav") + cmd = "{} {} {} {}".format(gs_draw_volume_exe, effect_file, self.vocal_path, volume_path) + os.system(cmd) + if not os.path.exists(volume_path): + print("{} {} ERROR draw volume".format(self.cid, volume_path)) + return gs_err_code_volume_err, None + effect_file = volume_path + logging.info("cid={},draw_volume,{},sp={}".format(self.cid, svc_file, time.time() - st)) + + # 替换 + st = time.time() + self.replace_vocal_frame_inst.process(svc_file, effect_file, self.vocal_path) + dst_path = effect_file + "_replace.wav" + if not os.path.exists(dst_path): + return gs_err_code_replace_err, None + logging.info("cid={},replace_vocal_frame_inst,{},sp={}".format(self.cid, svc_file, time.time() - st)) + + # 转码 + st = time.time() + dst_path_442 = dst_path.replace("_replace.wav", "_replace442.wav") + cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(dst_path, dst_path_442) + os.system(cmd) + if not os.path.exists(dst_path_442): + return gs_err_code_replace_trans_err, None + logging.info("cid={},transcode,{},sp={}".format(self.cid, svc_file, time.time() - st)) + + # 合并转码后再做一次拉伸,保证响度 + st = time.time() + volume_path = str(dst_path_442).replace("_replace442.wav", "_replace442_dv.wav") + cmd = "{} {} {} {}".format(gs_draw_volume_exe, dst_path_442, self.vocal_path, volume_path) + os.system(cmd) + if not os.path.exists(volume_path): + print("{} {} ERROR draw volume".format(self.cid, volume_path)) + return gs_err_code_volume_err, None + dst_path_442 = volume_path + logging.info("cid={},draw_volume2,{},sp={}".format(self.cid, svc_file, time.time() - st)) + + # 混合 + st = time.time() + mix_path = dst_path_442.replace("_replace442_dv.wav", "_replace442_dv_mix.wav") + cmd = "{} {} {} {}".format(gs_simple_mixer_path, dst_path_442, self.acc_path, mix_path) + os.system(cmd) + if not os.path.exists(mix_path): + return gs_err_code_mix_err, None + logging.info("cid={},mixer,{},sp={}".format(self.cid, svc_file, time.time() - st)) + + # 编码为mp3 + st = time.time() + output_dir = os.path.join(self.work_dir, self.cid + "_out") + if not os.path.exists(output_dir): + os.makedirs(output_dir) + name = str(mix_path).replace("_replace442_dv_mix.wav", "_replace442_dv_mix.mp3").split("/")[-1] + mix_path_mp3 = os.path.join(output_dir, name) + cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(mix_path, mix_path_mp3) + print(cmd) + os.system(cmd) + if not os.path.exists(mix_path_mp3): + return gs_err_code_mix_transcode_err, None + logging.info("cid={},encode,{},sp={}".format(self.cid, svc_file, time.time() - st)) + return gs_err_code_success, mix_path_mp3 + + def get_gender(self, svc_file): + return self.speakers2gender[int(os.path.basename(svc_file).split("_")[1])] + + def process_one_logic(self, cid, work_dir): + """ + 搞成两部分: + 1. 分离数据+5次推理,获取最佳结果,并保存 + 2. 利用最佳结果做音效以及合并 + :return: + """ + err, svc_file = self.generate_svc_file(cid, work_dir) + gender = -1 + if err != gs_err_code_success: + return err, svc_file, gender, + gender = self.get_gender(svc_file) + err, effect_file = self.effect(cid, work_dir, svc_file) + if err != gs_err_code_success: + return err, svc_file, gender + err, mix_mp3_path = self.mix(cid, work_dir, svc_file, effect_file) + return err, mix_mp3_path, gender + + +def test(): + arr = [ + # "611752105020343687", + # "611752105023532439", + "611752105030419688", + ] + base_dir = "/data/rsync/jianli.yang/AutoCoverTool/data/test" + s_inst = SongCoverInference() + for cid in arr: + st = time.time() + # err, mix_mp3, gender = s_inst.process_one(cid, os.path.join(base_dir, cid), False) + err, mix_mp3, gender = s_inst.process_one_logic(cid, os.path.join(base_dir, cid)) + print(mix_mp3, gender) + print("cid={} RealFinish err={} sp={}".format(cid, err, time.time() - st)) + + +if __name__ == '__main__': + test() diff --git a/AutoCoverTool/online/inference_worker.py b/AutoCoverTool/online/inference_worker.py new file mode 100644 index 0000000..2213f50 --- /dev/null +++ b/AutoCoverTool/online/inference_worker.py @@ -0,0 +1,237 @@ +""" +离线worker +数据库字段要求: +// 其中state的状态 +// 0:默认,1:被取走,<0异常情况,2完成 +// 超时到一定程度也会被重新放回来 +数据库格式: + id,song_id,url,state,svc_url,create_time,update_time,gender +启动时的环境要求: +export PATH=$PATH:/data/gpu_env_common/env/bin/ffmpeg/bin +export PYTHONPATH=$PWD:$PWD/ref/music_remover/demucs:$PWD/ref/so_vits_svc:$PWD/ref/split_dirty_frame +""" + +import os +import shutil +import logging +import multiprocessing as mp + +from online.inference_one import * +from online.common import * + +gs_actw_err_code_download_err = 10001 +gs_actw_err_code_trans_err = 10002 +gs_actw_err_code_upload_err = 10003 + +gs_state_default = 0 +gs_state_use = 1 +gs_state_finish = 2 + +GS_REGION = "ap-singapore" +GS_BUCKET_NAME = "starmaker-sg-1256122840" +# GS_COSCMD = "/bin/coscmd" +GS_COSCMD = "/opt/soft/anaconda3/bin/coscmd" +GS_RES_DIR = "/srv/dreambooth_worker_resource" +GS_CONFIG_PATH = os.path.join(GS_RES_DIR, ".online_cos.conf") + + +def exec_cmd(cmd): + ret = os.system(cmd) + if ret != 0: + return False + return True + + +def exec_cmd_and_result(cmd): + r = os.popen(cmd) + text = r.read() + r.close() + return text + + +def upload_file2cos(key, file_path, region=GS_REGION, bucket_name=GS_BUCKET_NAME): + """ + 将文件上传到cos + :param key: 桶上的具体地址 + :param file_path: 本地文件地址 + :param region: 区域 + :param bucket_name: 桶地址 + :return: + """ + cmd = "{} -c {} -r {} -b {} upload {} {}".format(GS_COSCMD, GS_CONFIG_PATH, region, bucket_name, file_path, key) + if exec_cmd(cmd): + cmd = "{} -c {} -r {} -b {} info {}".format(GS_COSCMD, GS_CONFIG_PATH, region, bucket_name, key) \ + + "| grep Content-Length |awk \'{print $2}\'" + res_str = exec_cmd_and_result(cmd) + logging.info("{},res={}".format(key, res_str)) + size = float(res_str) + if size > 0: + return True + return False + return False + + +def post_process_err_callback(msg): + print("ERROR|post_process|task_error_callback:", msg) + + +def effect(queue, finish_queue): + """ + 1. 添加音效 + 2. 混音 + 3. 上传到服务端 + :return: + """ + inst = SongCoverInference() + while True: + logging.info("effect start get...") + data = queue.get() + song_id, work_dir, svc_file, gender = data + logging.info("effect:{},{},{},{}".format(song_id, work_dir, svc_file, gender)) + err, effect_file = inst.effect(song_id, work_dir, svc_file) + msg = [song_id, err, svc_file, effect_file, gender] + logging.info("effect,finish:cid={},state={},svc_file={},effect_file={},gender={}". \ + format(song_id, err, svc_file, effect_file, gender)) + finish_queue.put(msg) + + +class AutoCoverToolWorker: + def __init__(self): + self.base_dir = "/tmp" + self.work_dir = "" + self.inst = SongCoverInference() + + def update_state(self, song_id, state): + sql = "update svc_queue_table set state={},update_time={} where song_id = {}". \ + format(state, int(time.time()), song_id) + banned_user_map['db'] = "av_db" + update_db(sql, banned_user_map) + + def get_one_data(self): + sql = "select song_id, url from svc_queue_table where state = 0 order by create_time desc limit 1" + banned_user_map["db"] = "av_db" + data = get_data_by_mysql(sql, banned_user_map) + if len(data) == 0: + return None, None + song_id, song_url = data[0] + if song_id != "": + self.update_state(song_id, gs_state_use) + return str(song_id), song_url + + def pre_process(self, work_dir, song_url): + """ + 创建文件夹,下载数据 + :return: + """ + + ext = str(song_url).split(".")[-1] + dst_file = "{}/src_origin.{}".format(work_dir, ext) + cmd = "wget {} -O {}".format(song_url, dst_file) + print(cmd) + os.system(cmd) + if not os.path.exists(dst_file): + return gs_actw_err_code_download_err + dst_mp3_file = "{}/src.mp3".format(work_dir) + cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} ".format(dst_file, dst_mp3_file) + os.system(cmd) + if not os.path.exists(dst_mp3_file): + return gs_actw_err_code_trans_err + return gs_err_code_success + + def post_process(self, msg): + song_id, err, svc_file, effect_file, gender = msg + work_dir = os.path.join(self.base_dir, str(song_id)) + if err != gs_err_code_success: + self.update_state(song_id, -err) + return + + # 替换和混音 + err, mix_path_mp3 = self.inst.mix(song_id, work_dir, svc_file, effect_file) + logging.info( + "post_process:song_id={},work_dir={},svc_file={},gender={}".format(song_id, work_dir, svc_file, gender)) + + svc_url = None + state = gs_state_finish + if err != gs_err_code_success: + state = -err + else: + # 上传到cos + mix_name = os.path.basename(mix_path_mp3) + key = "av_res/svc_res/{}".format(mix_name) + if not upload_file2cos(key, mix_path_mp3): + state = -err + else: + state = gs_state_finish + svc_url = key + logging.info("upload_file2cos:song_id={},key={},mix_path_mp3={}".format(song_id, key, mix_path_mp3)) + + # 更新数据库 + if state != gs_state_finish: + self.update_state(song_id, state) + return + + sql = "update svc_queue_table set state={},update_time={},svc_url=\"{}\",gender={} where song_id = {}". \ + format(gs_state_finish, int(time.time()), svc_url, gender, song_id) + logging.info("post_process:song_id={},sql={}".format(song_id, sql)) + banned_user_map['db'] = "av_db" + update_db(sql, banned_user_map) + + def process(self): + logging.info("start_process....") + worker_num = 4 + worker_queue = mp.Manager().Queue(maxsize=int(worker_num * 1.5)) + finish_queue = mp.Manager().Queue(maxsize=int(worker_num * 1.5)) + pool = mp.Pool(processes=worker_num) + for i in range(worker_num): + pool.apply_async(effect, + args=(worker_queue, finish_queue), + error_callback=post_process_err_callback) + + while True: + # 将堆积的内容处理一遍 + while finish_queue.qsize() > 0: + msg = finish_queue.get(timeout=1) + self.post_process(msg) + song_id, err, svc_file, effect_file, gender = msg + work_dir = os.path.join(self.base_dir, str(song_id)) + logging.info("clear = song_id={},work_dir={}".format(song_id, work_dir)) + shutil.rmtree(work_dir) + + song_id, song_url = self.get_one_data() + logging.info("\n\nget_one_data = {},{}".format(song_id, song_url)) + if song_id is None: + time.sleep(5) + continue + + # 创建空间 + work_dir = os.path.join(self.base_dir, str(song_id)) + if os.path.exists(work_dir): + shutil.rmtree(work_dir) + os.makedirs(work_dir) + logging.info("song_id={},work_dir={},finish".format(song_id, work_dir)) + + # 预处理 + err = self.pre_process(work_dir, song_url) + if err != gs_err_code_success: + self.update_state(song_id, -err) + continue + logging.info("song_id={},work_dir={},pre_process".format(song_id, work_dir)) + + # 获取svc数据 + err, svc_file = self.inst.generate_svc_file(song_id, work_dir) + if err != gs_err_code_success: + self.update_state(song_id, -err) + continue + logging.info("song_id={},work_dir={},generate_svc_file".format(song_id, work_dir)) + + # 做音效处理的异步代码 + gender = self.inst.get_gender(svc_file) + worker_queue.put([song_id, work_dir, svc_file, gender]) + logging.info("song_id={},work_dir={},svc_file={},gender={}".format(song_id, work_dir, svc_file, gender)) + pool.close() + pool.join() + + +if __name__ == '__main__': + actw = AutoCoverToolWorker() + actw.process() diff --git a/AutoCoverTool/online/readme.txt b/AutoCoverTool/online/readme.txt new file mode 100644 index 0000000..5d82d70 --- /dev/null +++ b/AutoCoverTool/online/readme.txt @@ -0,0 +1,44 @@ +自动翻唱工具 +功能: 输入原唱的音频文件,输出一个翻唱的作品 +结构介绍: +---online + ---common.py // 共用代码 + ---inference_one.py // 给定一个音频,输出一个翻唱作品 + ---inference_worker.py // 从数据库中不断读取获取音频,然后下载制作好翻唱作品后,上传cos,并将信息保存到数据库 + +环境安装: +1. python环境安装: + 环境安装: + conda create -n auto_song_cover python=3.9 + # 安装demucs环境[进入到ref.music_remover 执行pip install -r requirements.txt] + # 安装so_vits_svc环境[进入到ref.so_vits_svc 执行pip install -r requirements.txt] + pip install librosa + pip install scikit-maad + pip install praat-parselmouth + pip install matplotlib + pip install torchvision + pip install madmom + pip install torchstat + 环境设置: + export PATH=$PATH:/data/gpu_env_common/env/bin/ffmpeg/bin + export PYTHONPATH=$PWD:$PWD/ref/music_remover/demucs:$PWD/ref/so_vits_svc:$PWD/ref/split_dirty_frame + +2. 可执行文件安装: + 直接从sg-prod-ipaint-gpu-1的对应位置拷贝即可,如果需要编译,则进入到ref/tools/mixer中编译前三个,在本仓库的上一次目录中编译dereverbrate + gs_denoise_exe = "/opt/soft/bin/denoise_exe" + gs_draw_volume_exe = "/opt/soft/bin/draw_volume" + gs_simple_mixer_path = "/opt/soft/bin/simple_mixer" + gs_rever_path = "/opt/soft/bin/dereverbrate" + +3. 模型资源相关 + 从sg-prod-ipaint-gpu-1的data目录拷贝train_users数据 + data/models/split_dirty_frame_v5_3_epoch3_852.pth + self.speakers_model_path = "data/train_users/{}/logs/32k/G_2000.pth" + self.speakers_model_config = "data/train_users/{}/config/config.json" + 将上面的三个都拷贝到代码的data目录即可 + +快速使用: +1. 线上使用: python3 inference_worker.py +2. 离线跑单个: python3 inference_one.py + + diff --git a/AutoCoverTool/ref/music_remover/separate_interface.py b/AutoCoverTool/ref/music_remover/separate_interface.py index ea3bb73..646d96a 100644 --- a/AutoCoverTool/ref/music_remover/separate_interface.py +++ b/AutoCoverTool/ref/music_remover/separate_interface.py @@ -1,103 +1,103 @@ """ 分离的对外函数 """ import os import sys import time import shutil import logging from demucs.pretrained import get_model from demucs.separate import * # 第三方二进制文件 gs_standard_audio_exe = "/opt/soft/bin/standard_audio" gs_ffmpeg_exe = "ffmpeg" # 全局配置 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') # 错误码 ERR_CODE_SUCCESS = 0 ERR_CODE_INPUT_FILE_NOT_EXISTS = 1 class SeparateInterface: """ 分离器对外接口,只生成伴奏 """ def __init__(self): sp_start = time.time() # 评价之后该版本模型的效果效果最佳,性能也合适 # 对比评价过mdx_extra_q和htdemucs_ft # 其中mdx_extra_q和mdx_extra速度一致,但是和声保留情况不如后者 # htdemucs_ft耗时是mdx_extra的1.6倍,在部分歌曲消去的程度上比mdx_extra好,但是和声没有mdx_extra保留的完整|详情见xishuashua的开头和声部分 self.model = get_model('mdx_extra') self.tmp_dir = os.path.join("/tmp/separate") if not os.path.exists(self.tmp_dir): os.makedirs(self.tmp_dir) logging.info("SeparateInterface: load model spent = {}".format(time.time() - sp_start)) def process_logic(self, cid, cache_dir, in_file, vocal_out_file, acc_out_file, dev='cuda'): model = self.model sp_start = time.time() wav = load_track(in_file, model.audio_channels, model.samplerate) logging.info("--------load_track:cid={},sp={}".format(cid, time.time() - sp_start)) # 模型推理 sp_start = time.time() ref = wav.mean(0) wav = (wav - ref.mean()) / ref.std() # wav[None] -> 增加一个维度,原来是[2, xxx] -> [1, 2, xxx] sources = apply_model(model, wav[None], device=dev, shifts=1, split=True, overlap=0.25, progress=True, num_workers=0)[0] sources = sources * ref.std() + ref.mean() logging.info("--------apply_model:cid={},sp={}".format(cid, time.time() - sp_start)) # 只保留伴奏 sources = list(sources) vocals = sources.pop(model.sources.index("vocals")) if vocals is not None: save_audio(vocals, vocal_out_file, samplerate=model.samplerate) other_stem = th.zeros_like(sources[0]) for sc in sources: other_stem += sc if acc_out_file is not None: save_audio(other_stem, acc_out_file, samplerate=model.samplerate) if vocal_out_file is not None: if not os.path.exists(vocal_out_file): return False if acc_out_file is not None: if not os.path.exists(acc_out_file): return False return True def process(self, cid, in_file, vocal_out_file, acc_out_file, dev='cuda'): if not os.path.exists(in_file): return ERR_CODE_INPUT_FILE_NOT_EXISTS st_time = time.time() logging.info("--------process:cid={},{},{},{}".format(cid, in_file, vocal_out_file, acc_out_file)) cache_dir = os.path.join(self.tmp_dir, str(cid)) if os.path.exists(cache_dir): shutil.rmtree(cache_dir) os.makedirs(cache_dir) # 核心处理逻辑 ret = self.process_logic(cid, cache_dir, in_file, vocal_out_file, acc_out_file, dev) shutil.rmtree(cache_dir) logging.info( "--------finish:cid={},{},{},{}|{}|sp={}\n\n".format(cid, in_file, vocal_out_file, acc_out_file, dev, ret, time.time() - st_time)) return ret -# + # if __name__ == '__main__': # si = SeparateInterface() # in_f = sys.argv[1] # out_f = sys.argv[2] # dev = sys.argv[3] # cuda或者cpu # for i in range(0, 3): # si.process(str(1), in_f, out_f, dev) diff --git a/AutoCoverTool/script/inference_one.py b/AutoCoverTool/script/inference_one.py deleted file mode 100644 index 3565bec..0000000 --- a/AutoCoverTool/script/inference_one.py +++ /dev/null @@ -1,1529 +0,0 @@ -""" -单个处理的逻辑 -song_id: - ---src.mp3 // 源数据,需要提前放进去 - ---cache - ---vocal.wav // 分离之后产生 - ---acc.wav // 分离之后产生 - ---vocal_32.wav // 分离之后产生 - ---song_id_sp1.wav // 合成之后产生 - ---song_id_sp2.wav // 合成之后产生 - ---song_id_sp2_d.wav // 降噪之后生成 - ---song_id_sp2_dv.wav // 降噪+拉伸之后产生 [占比太高的不产生] - ---song_id_sp2_dve442.wav // 手动调整之后产生 - ---song_id_sp2_dve442_replace.wav // 替换之后产生 - ---song_id_sp2_dve442_replace_mix.wav // 人声+伴奏混合之后产生 - ---song_id - --acc.mp3 // 44k双声道320k - --vocal.mp3 // 44k双声道320k - --src.mp3 // 44k双声道320k - --song_id_sp2_dv.mp3 // 44k单声道320k - ---song_id_out // 对外输出 - --src.mp3 // 原始音频 - --song_id_sp2_dv_replace_mix.mp3 // 制作完成的音频 - -环境安装: -conda create -n auto_song_cover python=3.9 -# 安装demucs环境[进入到ref.music_remover 执行pip install -r requirements.txt] -# 安装so_vits_svc环境[进入到ref.so_vits_svc 执行pip install -r requirements.txt] -pip install librosa -pip install scikit-maad -pip install praat-parselmouth -pip install matplotlib -pip install torchvision -pip install madmom -pip install torchstat -环境设置: -export PATH=$PATH:/data/gpu_env_common/env/bin/ffmpeg/bin -export PYTHONPATH=$PWD:$PWD/ref/music_remover/demucs:$PWD/ref/so_vits_svc:$PWD/ref/split_dirty_frame -""" - -import os -import time -import shutil -import random -import logging -import librosa - -gs_err_code_success = 0 -gs_err_code_no_src_mp3 = 1 -gs_err_code_separate = 2 -gs_err_code_trans_32 = 3 -gs_err_code_encode_err = 4 -gs_err_code_replace_err = 5 -gs_err_code_replace_trans_err = 6 -gs_err_code_mix_err = 7 -gs_err_code_mix_transcode_err = 8 -gs_err_code_no_src_dir = 9 -gs_err_code_volume_err = 10 -gs_err_code_trans2_442 = 11 -gs_err_code_reverb = 12 - -gs_denoise_exe = "/opt/soft/bin/denoise_exe" -gs_draw_volume_exe = "/opt/soft/bin/draw_volume_v1" -gs_simple_mixer_path = "/opt/soft/bin/simple_mixer" -gs_rever_path = "/data/rsync/jianli.yang/dereverbrate/build/dereverbrate_test" - -from ref.music_remover.separate_interface import SeparateInterface -from ref.so_vits_svc.inference_main import * -from ref.split_dirty_frame.script.process_one import ReplaceVocalFrame, construct_power_fragment -from ref.split_dirty_frame.dataset.dataset import file2mfcc - - -class SongCoverInference: - def __init__(self): - self.work_dir = None - self.cache_dir = None - self.cid = None - self.src_mp3 = None - self.vocal_path = None - self.vocal_32_path = None - self.acc_path = None - self.speakers = [ - 10414574138721494, - 10414574140317353, - 1688849864840588, - 3634463651, - 5629499489839033, - 5910973794723621, - 6755399374234747, - 8162774327817435, - 8162774329368194, - 1125899914308640, # 以下为男声,包括这个 - 12384898975368914, - 12947848931397021, - 3096224748076687, - 3096224751151928, - 5066549357604730, - 5348024335101054, - 6755399442719465, - 7036874421386111 - ] - self.speakers_model_path = "data/train_users/{}/logs/32k/G_2000.pth" - self.speakers_model_config = "data/train_users/{}/config/config.json" - - st = time.time() - self.separate_inst = SeparateInterface() - self.replace_vocal_frame_inst = ReplaceVocalFrame("data/models/split_dirty_frame_v5_3_epoch3_852.pth") - logging.info("SongCoverInference init sp={}".format(time.time() - st)) - - def separate(self, cid, src_mp3, vocal_path, acc_path): - """ - 人声伴奏分离 - :param cid: - :param src_mp3: - :param vocal_path: - :param acc_path: - :return: - """ - st = time.time() - if not self.separate_inst.process(cid, src_mp3, vocal_path, acc_path): - return gs_err_code_separate - if not os.path.exists(vocal_path) or not os.path.exists(acc_path): - return gs_err_code_separate - - # 转码出一个32k单声道的数据 - cmd = "ffmpeg -i {} -ar 32000 -ac 1 -y {} -loglevel fatal".format(vocal_path, self.vocal_32_path) - os.system(cmd) - if not os.path.exists(self.vocal_32_path): - return gs_err_code_trans_32 - print("separate:cid={}|sp={}".format(cid, time.time() - st)) - return gs_err_code_success - - def get_start_ms(self, vocal_path): - """ - 给定原始音频,找一段连续10s的音频 - :param vocal_path: - :return: - """ - audio, sr = librosa.load(vocal_path, sr=16000) - audio = librosa.util.normalize(audio) - # 帧长100ms,帧移10ms,计算能量 - power_arr = [] - for i in range(0, len(audio) - 1600, 160): - power_arr.append(np.sum(np.abs(audio[i:i + 160])) / 160) - # 将能量小于等于10的部分做成段 - power_arr = construct_power_fragment(power_arr) - fragments = [] - last_pos = 0 - for idx, line in enumerate(power_arr): - start = round(float(line[0]) * 0.01, 3) - duration = round(float(line[1]) * 0.01, 3) - fragments.append([last_pos, start - last_pos]) - last_pos = start + duration - if last_pos < len(audio) / sr: - fragments.append([last_pos, len(audio) / sr - last_pos]) - - # 合并数据,两者间隔在50ms以内的合并起来 - idx = 0 - while idx < len(fragments) - 1: - if fragments[idx + 1][0] - (fragments[idx][0] + fragments[idx][1]) < 0.05: - fragments[idx][1] = fragments[idx + 1][0] + fragments[idx + 1][1] - fragments[idx][0] - del fragments[idx + 1] - idx -= 1 - idx += 1 - - # out_file = vocal_path + "_power.csv" - # with open(out_file, "w") as f: - # f.write("Name\tStart\tDuration\tTime Format\tType\n") - # for fragment in fragments: - # start = round(float(fragment[0]), 3) - # duration = round(float(fragment[1]), 3) - # strr = "{}\t{}\t{}\t{}\n".format("11", start, duration, "decimal\tCue\t") - # f.write(strr) - - # 筛选出开始的位置 - # 1. 连续时长大于10s,当前段长度大于3s - # 2. 不可用 - # 从0到fragments[idx], 包含idx其中人声段的总和 - tot_vocal_duration = [fragments[0][1]] - for i in range(1, len(fragments)): - tot_vocal_duration.append(tot_vocal_duration[i - 1] + fragments[i][1]) - - # 计算出任意两段之间非人声占比 - for i in range(0, len(fragments)): - if fragments[i][1] >= 3: - now_tot = 0 - if i > 0: - now_tot = tot_vocal_duration[i - 1] - for j in range(i + 1, len(fragments)): - cur_rate = tot_vocal_duration[j] - now_tot - cur_rate = cur_rate / (fragments[j][1] + fragments[j][0] - fragments[i][0]) - if cur_rate > 0.1: - return fragments[i][0] - return -1 - - def inference_speaker(self): - """ - 推理生成合成后的音频 - 随机取5个干声,选择占比最小的,并且要求占比小于0.3 - :return: - """ - st = time.time() - out_speakers = random.sample(self.speakers, 5) - out_songs_dict = {} - for speaker in out_speakers: - model_path = self.speakers_model_path.format(speaker) - config_path = self.speakers_model_config.format(speaker) - song_path = os.path.join(self.cache_dir, "{}_{}.wav".format(self.cid, speaker)) - try: - inf(model_path, config_path, self.vocal_32_path, song_path, "prod") - except Exception as ex: - logging.info("cid={}, inference_speaker err={}".format(self.cid, ex)) - continue - if os.path.exists(song_path): - rate = self.replace_vocal_frame_inst.get_rate(song_path) - if rate < 0.3: - out_songs_dict[song_path] = rate - - # 从内部选择占比最低的 - out_songs = [] - if len(out_songs_dict.keys()) > 0: - st_sec = self.get_start_ms(self.vocal_path) - song_msg = sorted(out_songs_dict.items(), key=lambda kv: kv[1])[0] - out_songs = [song_msg[0]] - logging.info("GetRate:cid={},song={},rate={},st_tm={}".format(self.cid, song_msg[0], round(song_msg[1], 2), - round(st_sec, 3))) - print("GetRate:cid={},song={},rate={},st_tm={}".format(self.cid, song_msg[0], round(song_msg[1], 2), - round(st_sec, 3))) - # logging.info("inference_speaker len = {} finish sp = {}".format(len(out_songs), time.time() - st)) - print("inference_speaker len = {} finish sp = {}".format(len(out_songs), time.time() - st)) - return out_songs - - def get_new_vocal_rate(self, songs): - """ - 获取人声的比率 - :param songs: - :return: - """ - st = time.time() - need_to_process_song = [] - for song in songs: - rate = self.replace_vocal_frame_inst.get_rate(song) - logging.info("{} {} replace_rate={}".format(self.cid, song, rate)) - if rate < 1.0: - need_to_process_song.append(song) - logging.info( - "get_new_vocal_rate belen = {} len = {} finish sp = {}".format(len(songs), len(need_to_process_song), - time.time() - st)) - return need_to_process_song - - def preprocess_vocal(self, songs, vocal_path): - """ - 1. 降噪 - 2. 拉伸 - :param songs: - :param vocal_path: 参考的音频信号 - :return: - """ - st = time.time() - dv_out_list = [] - for song in songs: - denoise_path = str(song).replace(".wav", "_d.wav") - cmd = "{} {} {}".format(gs_denoise_exe, song, denoise_path) - os.system(cmd) - if not os.path.exists(denoise_path): - print("{} {} ERROR denoise".format(self.cid, song)) - continue - # 拉伸 - volume_path = str(song).replace(".wav", "_dv.wav") - cmd = "{} {} {} {}".format(gs_draw_volume_exe, denoise_path, vocal_path, volume_path) - os.system(cmd) - if not os.path.exists(volume_path): - print("{} {} ERROR denoise".format(self.cid, volume_path)) - continue - dv_out_list.append(volume_path) - print( - "preprocess_vocal belen = {} len = {} finish sp = {}".format(len(songs), len(dv_out_list), - time.time() - st)) - return dv_out_list - - def output(self, dv_out_list): - """ - 对外输出数据 - :param dv_out_list: - :return: - """ - st = time.time() - out_dir = os.path.join(self.work_dir, self.cid) - if os.path.exists(out_dir): - shutil.rmtree(out_dir) - os.makedirs(out_dir) - - # 拷贝数据 - dst_mp3_path = os.path.join(out_dir, "src_mp3") - dst_acc_path = os.path.join(out_dir, "acc.mp3") - dst_vocal_path = os.path.join(out_dir, "vocal.mp3") - shutil.copyfile(self.src_mp3, dst_mp3_path) - cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(self.acc_path, dst_acc_path) - os.system(cmd) - if not os.path.exists(dst_acc_path): - return gs_err_code_encode_err - cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(self.vocal_path, dst_vocal_path) - os.system(cmd) - if not os.path.exists(dst_vocal_path): - return gs_err_code_encode_err - - # 将所有数据放到out_dir中,用于给人工标注 - for dv_wav in dv_out_list: - dv_wav_name = str(dv_wav).split("/")[-1].replace(".wav", "_441.mp3") - dst_dv_path = os.path.join(out_dir, dv_wav_name) - - cmd = "ffmpeg -i {} -ar 44100 -ac 1 -ab 320k -y {} -loglevel fatal".format(dv_wav, dst_dv_path) - os.system(cmd) - if not os.path.exists(dst_dv_path): - print("{} encode err!".format(cmd)) - continue - logging.info( - "preprocess_vocal output sp = {}".format(time.time() - st)) - - def process_one(self, cid, work_dir, enable_output=False): - logging.info("\nstart:cid={},work_dir={}----------------------->>>>>>>>".format(cid, work_dir)) - self.cid = cid - self.work_dir = work_dir - - # 所有不对外交付的,全部放到这里 - self.cache_dir = os.path.join(work_dir, "cache") - if os.path.exists(self.cache_dir): - shutil.rmtree(self.cache_dir) - os.makedirs(self.cache_dir) - - self.src_mp3 = os.path.join(self.work_dir, "src.mp3") - if not os.path.exists(self.src_mp3): - return gs_err_code_no_src_mp3 - self.vocal_path = os.path.join(self.cache_dir, "vocal.wav") - self.vocal_32_path = os.path.join(self.cache_dir, "vocal_32.wav") - self.acc_path = os.path.join(self.cache_dir, "acc.wav") - - if not os.path.exists(self.vocal_32_path): - logging.info("start separate ... {} {} {}".format(self.src_mp3, self.vocal_path, self.acc_path)) - err = self.separate(cid, self.src_mp3, self.vocal_path, self.acc_path) - if err != gs_err_code_success: - return err - logging.info("start inference_speaker ...") - out_songs = self.inference_speaker() - logging.info("start get_new_vocal_rate ...") - # out_songs = self.get_new_vocal_rate(out_songs) - dv_out_list = self.preprocess_vocal(out_songs, self.vocal_path) - if enable_output: - self.output(dv_out_list) - else: - # 默认全部处理一遍 - for dv_out_path in dv_out_list: - src_path = dv_out_path.replace("_dv.wav", ".wav") - err = self.after_process(self.cid, self.work_dir, src_path, dv_out_path, self.vocal_path, self.acc_path, - True, True) - if err != 0: - logging.info("after_process err {}".format(err)) - logging.info("finish:cid={},work_dir={}----------------------->>>>>>>>".format(cid, work_dir)) - return gs_err_code_success - - def reverb_by_vocal(self, file): - st = time.time() - file_442 = file.replace(".wav", "_442.wav") - if not os.path.exists(file_442): - cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {}".format(file, file_442) - os.system(cmd) - if not os.path.exists(file_442): - return None, gs_err_code_trans2_442 - - file_dst = file.replace(".wav", "_442_dr.wav") - cmd = "{} {} {} {}".format(gs_rever_path, self.vocal_path, file_442, file_dst) - os.system(cmd) - if not os.path.exists(file_dst): - return None, gs_err_code_reverb - print("cid = {}, reverb_by_vocal sp={}".format(self.cid, time.time() - st)) - return file_dst, gs_err_code_success - - def after_process(self, cid, work_dir, in_file, effect_file, vocal_file, acc_file, need_draw=True, - need_reverb=True): - """ - 后处理逻辑 - 将处理好的音频进行替换,然后和伴奏进行混合,最后进行编码 - :return: - """ - if need_reverb: - # 抓取混响 - effect_file, err = self.reverb_by_vocal(in_file) - if err != gs_err_code_success: - return err - - if need_draw: - # 增加一个拉伸的步骤 - volume_path = str(effect_file).replace(".wav", "_dv.wav") - cmd = "{} {} {} {}".format(gs_draw_volume_exe, effect_file, vocal_file, volume_path) - print(cmd) - os.system(cmd) - if not os.path.exists(volume_path): - print("{} {} ERROR draw volume".format(self.cid, volume_path)) - return gs_err_code_volume_err - effect_file = volume_path - - st = time.time() - self.cid = cid - self.work_dir = work_dir - self.src_mp3 = os.path.join(self.work_dir, "src.mp3") - if not os.path.exists(self.work_dir): - return gs_err_code_no_src_dir - self.replace_vocal_frame_inst.process(in_file, effect_file, vocal_file) - dst_path = effect_file + "_replace.wav" - if not os.path.exists(dst_path): - return gs_err_code_replace_err - print("replace_vocal_frame_inst sp = {}".format(time.time() - st)) - - # 转码 - dst_path_442 = dst_path.replace("_replace.wav", "_replace442.wav") - cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(dst_path, dst_path_442) - os.system(cmd) - if not os.path.exists(dst_path_442): - return gs_err_code_replace_trans_err - - # 合并转码后再做一次拉伸,保证响度 - volume_path = str(dst_path_442).replace(".wav", "_dv.wav") - cmd = "{} {} {} {}".format(gs_draw_volume_exe, dst_path_442, vocal_file, volume_path) - print(cmd) - os.system(cmd) - if not os.path.exists(volume_path): - print("{} {} ERROR draw volume".format(self.cid, volume_path)) - return gs_err_code_volume_err - dst_path_442 = volume_path - - # 混合 - mix_path = dst_path_442.replace("_replace442.wav", "_replace442_mix.wav") - cmd = "{} {} {} {}".format(gs_simple_mixer_path, dst_path_442, acc_file, mix_path) - print("{}".format(cmd)) - os.system(cmd) - if not os.path.exists(mix_path): - return gs_err_code_mix_err - - # 编码为mp3 - output_dir = os.path.join(self.work_dir, self.cid + "_out") - if not os.path.exists(output_dir): - os.makedirs(output_dir) - name = str(mix_path).replace("_replace442_mix.wav", "_replace442_mix.mp3").split("/")[-1] - mix_path_mp3 = os.path.join(output_dir, name) - cmd = "ffmpeg -i {} -ab 320k -y {} -loglevel fatal".format(mix_path, mix_path_mp3) - os.system(cmd) - if not os.path.exists(mix_path_mp3): - return gs_err_code_mix_transcode_err - - # 拷贝src到output_dir - # shutil.copyfile(self.src_mp3, os.path.join(output_dir, "src.mp3")) - # logging.info("after_process sp = {}".format(time.time() - st)) - return gs_err_code_success - - -def test_volume_dir(): - base_dir = "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_3_w4" - # arr = [ - # "611752105015523266/cache/611752105015523266_5066549357604730.wav", - # "611752105017233541/cache/611752105017233541_6755399442719465.wav", - # "611752105030414513/cache/611752105030414513_1125899914308640.wav", - # "611752105030414549/cache/611752105030414549_5066549357604730.wav", - # "611752105030414557/cache/611752105030414557_8162774327817435.wav", - # "611752105030414588/cache/611752105030414588_1125899914308640.wav", - # "611752105030414597/cache/611752105030414597_6755399374234747.wav", - # "611752105030414613/cache/611752105030414613_5066549357604730.wav", - # "611752105030414615/cache/611752105030414615_1125899914308640.wav", - # "611752105030414619/cache/611752105030414619_5066549357604730.wav", - # "611752105030414633/cache/611752105030414633_8162774327817435.wav", - # "611752105030414638/cache/611752105030414638_8162774329368194.wav", - # "611752105030414689/cache/611752105030414689_8162774327817435.wav", - # "611752105030414702/cache/611752105030414702_6755399374234747.wav", - # "611752105030414742/cache/611752105030414742_5066549357604730.wav", - # "611752105030414763/cache/611752105030414763_1125899914308640.wav", - # "611752105030414773/cache/611752105030414773_8162774329368194.wav", - # "611752105030414777/cache/611752105030414777_8162774329368194.wav", - # "611752105030414779/cache/611752105030414779_1125899914308640.wav", - # "611752105030414784/cache/611752105030414784_6755399442719465.wav", - # "611752105030414890/cache/611752105030414890_5066549357604730.wav", - # "611752105030414915/cache/611752105030414915_5066549357604730.wav", - # "611752105030414925/cache/611752105030414925_1125899914308640.wav", - # "611752105030414929/cache/611752105030414929_1125899914308640.wav", - # "611752105030414935/cache/611752105030414935_3634463651.wav", - # "611752105030414943/cache/611752105030414943_6755399374234747.wav", - # "611752105030414957/cache/611752105030414957_12384898975368914.wav", - # "611752105030414962/cache/611752105030414962_8162774327817435.wav", - # "611752105030414976/cache/611752105030414976_10414574138721494.wav", - # "611752105030414993/cache/611752105030414993_12947848931397021.wav", - # "611752105030414995/cache/611752105030414995_5066549357604730.wav", - # "611752105030415003/cache/611752105030415003_12947848931397021.wav", - # "611752105030415014/cache/611752105030415014_10414574138721494.wav", - # "611752105030415018/cache/611752105030415018_8162774329368194.wav", - # "611752105030415032/cache/611752105030415032_6755399442719465.wav", - # "611752105030415056/cache/611752105030415056_3096224748076687.wav", - # "611752105030415067/cache/611752105030415067_1125899914308640.wav", - # "611752105030415071/cache/611752105030415071_5910973794723621.wav", - # "611752105030415074/cache/611752105030415074_1125899914308640.wav", - # "611752105030415083/cache/611752105030415083_1125899914308640.wav", - # "611752105030415087/cache/611752105030415087_5910973794723621.wav", - # "611752105030415100/cache/611752105030415100_10414574138721494.wav", - # "611752105030415103/cache/611752105030415103_8162774329368194.wav" - # ] - # arr = [ - # "611752105020256284/cache/611752105020256284_8162774329368194.wav", - # "611752105020286433/cache/611752105020286433_1125899914308640.wav", - # "611752105020286443/cache/611752105020286443_12384898975368914.wav", - # "611752105020286446/cache/611752105020286446_5629499489839033.wav", - # "611752105020290639/cache/611752105020290639_3634463651.wav", - # "611752105020290695/cache/611752105020290695_1125899914308640.wav", - # "611752105020315328/cache/611752105020315328_8162774329368194.wav", - # "611752105020315368/cache/611752105020315368_1688849864840588.wav", - # "611752105020336950/cache/611752105020336950_3634463651.wav", - # "611752105020343687/cache/611752105020343687_8162774327817435.wav", - # "611752105020343699/cache/611752105020343699_1125899914308640.wav", - # "611752105020351134/cache/611752105020351134_10414574138721494.wav", - # "611752105020357112/cache/611752105020357112_8162774327817435.wav", - # "611752105020378620/cache/611752105020378620_8162774327817435.wav", - # "611752105020387015/cache/611752105020387015_1125899914308640.wav", - # "611752105020394121/cache/611752105020394121_1125899914308640.wav", - # "611752105020394297/cache/611752105020394297_3634463651.wav", - # "611752105020411654/cache/611752105020411654_3096224751151928.wav", - # "611752105020417688/cache/611752105020417688_12947848931397021.wav", - # "611752105020563523/cache/611752105020563523_8162774327817435.wav", - # "611752105021332759/cache/611752105021332759_3634463651.wav", - # "611752105022446809/cache/611752105022446809_8162774327817435.wav", - # "611752105022647082/cache/611752105022647082_8162774327817435.wav", - # "611752105022667231/cache/611752105022667231_8162774327817435.wav", - # "611752105022735101/cache/611752105022735101_5066549357604730.wav", - # "611752105022736204/cache/611752105022736204_1125899914308640.wav", - # "611752105022745595/cache/611752105022745595_10414574138721494.wav", - # "611752105022770952/cache/611752105022770952_1125899914308640.wav", - # "611752105022842004/cache/611752105022842004_3634463651.wav", - # "611752105022842477/cache/611752105022842477_1125899914308640.wav", - # "611752105023434557/cache/611752105023434557_6755399374234747.wav", - # "611752105023532439/cache/611752105023532439_8162774329368194.wav", - # "611752105023623965/cache/611752105023623965_3096224748076687.wav", - # "611752105024250202/cache/611752105024250202_8162774327817435.wav", - # "611752105024628047/cache/611752105024628047_5066549357604730.wav", - # "611752105024676794/cache/611752105024676794_6755399442719465.wav", - # "611752105024678976/cache/611752105024678976_6755399442719465.wav", - # "611752105024679221/cache/611752105024679221_8162774329368194.wav", - # "611752105024953316/cache/611752105024953316_1688849864840588.wav", - # "611752105025104181/cache/611752105025104181_6755399374234747.wav", - # "611752105026189342/cache/611752105026189342_5910973794723621.wav", - # "611752105026523547/cache/611752105026523547_1125899914308640.wav", - # "611752105026707760/cache/611752105026707760_3096224748076687.wav", - # "611752105026771723/cache/611752105026771723_8162774327817435.wav", - # "611752105026946178/cache/611752105026946178_10414574140317353.wav", - # "611752105027047993/cache/611752105027047993_5066549357604730.wav", - # "611752105027188746/cache/611752105027188746_5066549357604730.wav", - # "611752105027189453/cache/611752105027189453_8162774329368194.wav", - # "611752105027302268/cache/611752105027302268_5066549357604730.wav", - # "611752105027557408/cache/611752105027557408_1125899914308640.wav", - # "611752105028650636/cache/611752105028650636_8162774327817435.wav", - # "611752105028683824/cache/611752105028683824_1125899914308640.wav", - # "611752105029990849/cache/611752105029990849_7036874421386111.wav", - # "611752105029993297/cache/611752105029993297_6755399374234747.wav", - # "611752105030077711/cache/611752105030077711_3096224748076687.wav", - # "611752105030104548/cache/611752105030104548_5629499489839033.wav", - # "611752105030419624/cache/611752105030419624_8162774327817435.wav", - # "611752105030419633/cache/611752105030419633_1125899914308640.wav", - # "611752105030419688/cache/611752105030419688_1125899914308640.wav", - # "611752105030433779/cache/611752105030433779_3634463651.wav" - # ] - - arr = [ - "611752105020256284/cache/611752105020256284_8162774329368194.wav", - "611752105020286433/cache/611752105020286433_1125899914308640.wav", - "611752105020286443/cache/611752105020286443_12384898975368914.wav", - "611752105020286446/cache/611752105020286446_5629499489839033.wav", - "611752105020290639/cache/611752105020290639_3634463651.wav", - "611752105020290695/cache/611752105020290695_1125899914308640.wav", - "611752105020315328/cache/611752105020315328_8162774329368194.wav", - "611752105020315368/cache/611752105020315368_1688849864840588.wav", - "611752105020336950/cache/611752105020336950_3634463651.wav", - "611752105020343687/cache/611752105020343687_8162774327817435.wav" - ] - s_inst = SongCoverInference() - for vocal_file in arr: - sstime = time.time() - i_file = os.path.join(base_dir, vocal_file) - cur_dir = "/".join(i_file.split("/")[:-1]) - # e_file = os.path.join(base_dir, vocal_file.replace(".wav", "_dev_441.wav")) - # e_file = os.path.join(base_dir, vocal_file.replace(".wav", "_442_dr.wav")) - e_file = os.path.join(base_dir, vocal_file.replace(".wav", "_442_dr_v2.wav")) - v_file = os.path.join(cur_dir, "vocal.wav") - a_file = os.path.join(cur_dir, "acc.wav") - cur_id = cur_dir.split("/")[-1] - err = s_inst.after_process(cur_id, cur_dir, i_file, e_file, v_file, a_file, True, False) - print("err={}, sp={}".format(err, time.time() - sstime)) - - -def get_metop500(): - arr = [ - "611752105030249067", - "611752105030248972", - "611752105030249414", - "611752105030249374", - "611752105030249030", - "611752105030249127", - "611752105030249091", - "611752105030249233", - "611752105030249036", - "611752105030249281", - "611752105030249040", - "611752105030249052", - "611752105030249394", - "611752105030249347", - "611752105030249342", - "611752105030249282", - "611752105030249292", - "611752105030249356", - "611752105030249302", - "611752105030249377", - "611752105030248973", - "611752105030249393", - "611752105030249398", - "611752105030250695", - "611752105030249213", - "611752105030250739", - "611752105030249206", - "611752105030249074", - "611752105030249387", - "611752105030250702", - "611752105030249365", - "611752105030249011", - "611752105030249319", - "611752105030249016", - "611752105030249176", - "611752105030250690", - "611752105030250691", - "611752105030249032", - "611752105030249370", - "611752105030249410", - "611752105030249355", - "611752105030250730", - "611752105030249022", - "611752105030249240", - "611752105030249296", - "611752105030249070", - "611752105030249322", - "611752105030249402", - "611752105030249386", - "611752105030249280", - "611752105030249038", - "611752105030250743", - "611752105030249136", - "611752105030249034", - "611752105030249403", - "611752105030249104", - "611752105030249105", - "611752105030249359", - "611752105030250728", - "611752105030249338", - "611752105030249216", - "611752105030249334", - "611752105030249037", - "611752105030249264", - "611752105030249284", - "611752105030249267", - "611752105030249010", - "611752105030249431", - "611752105030249364", - "611752105030249243", - "611752105030249397", - "611752105030249041", - "611752105030249118", - "611752105030249283", - "611752105030249340", - "611752105030249250", - "611752105030249048", - "611752105030249336", - "611752105030249371", - "611752105030249372", - "611752105030249273", - "611752105030249366", - "611752105030249352", - "611752105030249049", - "611752105030249278", - "611752105030249401", - "611752105030249258", - "611752105030249160", - "611752105030249348", - "611752105030249071", - "611752105030249175", - "611752105030249053", - "611752105030249035", - "611752105030249375", - "611752105030249417", - "611752105030249055", - "611752105030249275", - "611752105030249177", - "611752105028480653", - "611752105030249385", - "611752105030249406", - "611752105030249383", - "611752105030249295", - "611752105030250699", - "611752105030249289", - "611752105030248965", - "611752105030249128", - "611752105030249173", - "611752105030249019", - "611752105030249333", - "611752105030249361", - "611752105030250733", - "611752105030249112", - "611752105030249293", - "611752105030249391", - "611752105030249195", - "611752105030249324", - "611752105030249388", - "611752105030249134", - "611752105030249073", - "611752105030249174", - "611752105030249353", - "611752105030249287", - "611752105030249113", - "611752105030249227" - ] - all = [ - "611752105026649069", - "611752105027201163", - "611752105027601574", - "611752105027602999", - "611752105028392007", - "611752105028480056", - "611752105028480075", - "611752105028480653", - "611752105029330944", - "611752105029790637", - "611752105029951597", - "611752105029951604", - "611752105029951624", - "611752105029956352", - "611752105030248965", - "611752105030248971", - "611752105030248972", - "611752105030248973", - "611752105030248974", - "611752105030248975", - "611752105030248976", - "611752105030248977", - "611752105030248978", - "611752105030248979", - "611752105030248980", - "611752105030248981", - "611752105030248982", - "611752105030248983", - "611752105030248985", - "611752105030248986", - "611752105030248987", - "611752105030248988", - "611752105030248989", - "611752105030248990", - "611752105030248991", - "611752105030248992", - "611752105030248993", - "611752105030248994", - "611752105030248995", - "611752105030248996", - "611752105030248997", - "611752105030248998", - "611752105030248999", - "611752105030249000", - "611752105030249001", - "611752105030249002", - "611752105030249003", - "611752105030249004", - "611752105030249005", - "611752105030249006", - "611752105030249007", - "611752105030249008", - "611752105030249009", - "611752105030249010", - "611752105030249011", - "611752105030249012", - "611752105030249013", - "611752105030249014", - "611752105030249015", - "611752105030249016", - "611752105030249017", - "611752105030249018", - "611752105030249019", - "611752105030249020", - "611752105030249021", - "611752105030249022", - "611752105030249023", - "611752105030249024", - "611752105030249025", - "611752105030249026", - "611752105030249027", - "611752105030249028", - "611752105030249029", - "611752105030249030", - "611752105030249031", - "611752105030249032", - "611752105030249033", - "611752105030249034", - "611752105030249035", - "611752105030249036", - "611752105030249037", - "611752105030249038", - "611752105030249039", - "611752105030249040", - "611752105030249041", - "611752105030249042", - "611752105030249043", - "611752105030249044", - "611752105030249045", - "611752105030249046", - "611752105030249047", - "611752105030249048", - "611752105030249049", - "611752105030249050", - "611752105030249051", - "611752105030249052", - "611752105030249053", - "611752105030249054", - "611752105030249055", - "611752105030249056", - "611752105030249057", - "611752105030249058", - "611752105030249059", - "611752105030249060", - "611752105030249062", - "611752105030249063", - "611752105030249064", - "611752105030249065", - "611752105030249067", - "611752105030249068", - "611752105030249070", - "611752105030249071", - "611752105030249072", - "611752105030249073", - "611752105030249074", - "611752105030249075", - "611752105030249076", - "611752105030249077", - "611752105030249078", - "611752105030249079", - "611752105030249080", - "611752105030249081", - "611752105030249082", - "611752105030249083", - "611752105030249084", - "611752105030249085", - "611752105030249086", - "611752105030249087", - "611752105030249088", - "611752105030249089", - "611752105030249090", - "611752105030249091", - "611752105030249092", - "611752105030249093", - "611752105030249094", - "611752105030249095", - "611752105030249096", - "611752105030249098", - "611752105030249099", - "611752105030249100", - "611752105030249101", - "611752105030249102", - "611752105030249103", - "611752105030249104", - "611752105030249105", - "611752105030249106", - "611752105030249107", - "611752105030249108", - "611752105030249109", - "611752105030249110", - "611752105030249111", - "611752105030249112", - "611752105030249113", - "611752105030249114", - "611752105030249115", - "611752105030249116", - "611752105030249117", - "611752105030249118", - "611752105030249119", - "611752105030249120", - "611752105030249121", - "611752105030249122", - "611752105030249123", - "611752105030249124", - "611752105030249125", - "611752105030249126", - "611752105030249127", - "611752105030249128", - "611752105030249129", - "611752105030249130", - "611752105030249131", - "611752105030249132", - "611752105030249133", - "611752105030249134", - "611752105030249135", - "611752105030249136", - "611752105030249137", - "611752105030249138", - "611752105030249139", - "611752105030249140", - "611752105030249141", - "611752105030249142", - "611752105030249143", - "611752105030249144", - "611752105030249145", - "611752105030249146", - "611752105030249147", - "611752105030249148", - "611752105030249150", - "611752105030249151", - "611752105030249152", - "611752105030249153", - "611752105030249154", - "611752105030249155", - "611752105030249157", - "611752105030249158", - "611752105030249159", - "611752105030249160", - "611752105030249161", - "611752105030249162", - "611752105030249163", - "611752105030249165", - "611752105030249166", - "611752105030249167", - "611752105030249168", - "611752105030249170", - "611752105030249171", - "611752105030249172", - "611752105030249173", - "611752105030249174", - "611752105030249175", - "611752105030249176", - "611752105030249177", - "611752105030249178", - "611752105030249179", - "611752105030249180", - "611752105030249181", - "611752105030249182", - "611752105030249183", - "611752105030249185", - "611752105030249186", - "611752105030249187", - "611752105030249188", - "611752105030249189", - "611752105030249190", - "611752105030249191", - "611752105030249192", - "611752105030249193", - "611752105030249194", - "611752105030249195", - "611752105030249196", - "611752105030249197", - "611752105030249198", - "611752105030249199", - "611752105030249200", - "611752105030249201", - "611752105030249202", - "611752105030249203", - "611752105030249204", - "611752105030249205", - "611752105030249206", - "611752105030249207", - "611752105030249208", - "611752105030249209", - "611752105030249210", - "611752105030249211", - "611752105030249212", - "611752105030249213", - "611752105030249214", - "611752105030249216", - "611752105030249217", - "611752105030249218", - "611752105030249219", - "611752105030249220", - "611752105030249221", - "611752105030249223", - "611752105030249224", - "611752105030249225", - "611752105030249226", - "611752105030249227", - "611752105030249228", - "611752105030249229", - "611752105030249230", - "611752105030249231", - "611752105030249232", - "611752105030249233", - "611752105030249234", - "611752105030249235", - "611752105030249236", - "611752105030249237", - "611752105030249238", - "611752105030249239", - "611752105030249240", - "611752105030249241", - "611752105030249242", - "611752105030249243", - "611752105030249244", - "611752105030249245", - "611752105030249247", - "611752105030249248", - "611752105030249249", - "611752105030249250", - "611752105030249251", - "611752105030249252", - "611752105030249253", - "611752105030249255", - "611752105030249256", - "611752105030249257", - "611752105030249258", - "611752105030249259", - "611752105030249260", - "611752105030249261", - "611752105030249262", - "611752105030249264", - "611752105030249265", - "611752105030249266", - "611752105030249267", - "611752105030249269", - "611752105030249270", - "611752105030249271", - "611752105030249273", - "611752105030249274", - "611752105030249275", - "611752105030249277", - "611752105030249278", - "611752105030249279", - "611752105030249280", - "611752105030249281", - "611752105030249282", - "611752105030249283", - "611752105030249284", - "611752105030249287", - "611752105030249288", - "611752105030249289", - "611752105030249290", - "611752105030249292", - "611752105030249293", - "611752105030249294", - "611752105030249295", - "611752105030249296", - "611752105030249297", - "611752105030249298", - "611752105030249299", - "611752105030249300", - "611752105030249301", - "611752105030249302", - "611752105030249303", - "611752105030249307", - "611752105030249308", - "611752105030249309", - "611752105030249310", - "611752105030249313", - "611752105030249314", - "611752105030249315", - "611752105030249316", - "611752105030249317", - "611752105030249318", - "611752105030249319", - "611752105030249320", - "611752105030249321", - "611752105030249322", - "611752105030249323", - "611752105030249324", - "611752105030249325", - "611752105030249327", - "611752105030249328", - "611752105030249329", - "611752105030249330", - "611752105030249331", - "611752105030249332", - "611752105030249333", - "611752105030249334", - "611752105030249336", - "611752105030249337", - "611752105030249338", - "611752105030249339", - "611752105030249340", - "611752105030249341", - "611752105030249342", - "611752105030249343", - "611752105030249344", - "611752105030249345", - "611752105030249346", - "611752105030249347", - "611752105030249348", - "611752105030249349", - "611752105030249350", - "611752105030249351", - "611752105030249352", - "611752105030249353", - "611752105030249354", - "611752105030249355", - "611752105030249356", - "611752105030249357", - "611752105030249358", - "611752105030249359", - "611752105030249360", - "611752105030249361", - "611752105030249362", - "611752105030249363", - "611752105030249364", - "611752105030249365", - "611752105030249366", - "611752105030249367", - "611752105030249368", - "611752105030249369", - "611752105030249370", - "611752105030249371", - "611752105030249372", - "611752105030249373", - "611752105030249374", - "611752105030249375", - "611752105030249376", - "611752105030249377", - "611752105030249378", - "611752105030249379", - "611752105030249380", - "611752105030249381", - "611752105030249383", - "611752105030249384", - "611752105030249385", - "611752105030249386", - "611752105030249387", - "611752105030249388", - "611752105030249389", - "611752105030249390", - "611752105030249391", - "611752105030249392", - "611752105030249393", - "611752105030249394", - "611752105030249395", - "611752105030249396", - "611752105030249397", - "611752105030249398", - "611752105030249399", - "611752105030249401", - "611752105030249402", - "611752105030249403", - "611752105030249404", - "611752105030249405", - "611752105030249406", - "611752105030249407", - "611752105030249408", - "611752105030249409", - "611752105030249410", - "611752105030249412", - "611752105030249413", - "611752105030249414", - "611752105030249415", - "611752105030249416", - "611752105030249417", - "611752105030249418", - "611752105030249419", - "611752105030249420", - "611752105030249421", - "611752105030249431", - "611752105030249624", - "611752105030250688", - "611752105030250689", - "611752105030250690", - "611752105030250691", - "611752105030250692", - "611752105030250693", - "611752105030250695", - "611752105030250697", - "611752105030250698", - "611752105030250699", - "611752105030250700", - "611752105030250701", - "611752105030250702", - "611752105030250704", - "611752105030250707", - "611752105030250711", - "611752105030250712", - "611752105030250713", - "611752105030250714", - "611752105030250715", - "611752105030250716", - "611752105030250717", - "611752105030250718", - "611752105030250719", - "611752105030250720", - "611752105030250721", - "611752105030250723", - "611752105030250725", - "611752105030250726", - "611752105030250728", - "611752105030250729", - "611752105030250730", - "611752105030250731", - "611752105030250732", - "611752105030250733", - "611752105030250735", - "611752105030250736", - "611752105030250738", - "611752105030250739", - "611752105030250740", - "611752105030250741", - "611752105030250742", - "611752105030250743" - ] - - new_arr = [] - for sid in all: - if sid in arr: - continue - new_arr.append(sid) - print("len={}".format(len(new_arr))) - return new_arr - - -def get_me_3_w4_zy(): - arr = [ - "611752105015523266", - "611752105016527562", - "611752105017233541", - "611752105019423720", - "611752105030113709", - "611752105030414513", - "611752105030414549", - "611752105030414557", - "611752105030414568", - "611752105030414576", - "611752105030414580", - "611752105030414584", - "611752105030414588", - "611752105030414590", - "611752105030414597", - "611752105030414600", - "611752105030414608", - "611752105030414613", - "611752105030414615", - "611752105030414619", - "611752105030414633", - "611752105030414638", - "611752105030414644", - "611752105030414647", - "611752105030414655", - "611752105030414660", - "611752105030414663", - "611752105030414669", - "611752105030414674", - "611752105030414678", - "611752105030414680", - "611752105030414682", - "611752105030414686", - "611752105030414689", - "611752105030414696", - "611752105030414702", - "611752105030414706", - "611752105030414707", - "611752105030414711", - "611752105030414717", - "611752105030414729", - "611752105030414742", - "611752105030414752", - "611752105030414757", - "611752105030414761", - "611752105030414763", - "611752105030414766", - "611752105030414773", - "611752105030414776", - "611752105030414777", - "611752105030414779", - "611752105030414784", - "611752105030414890", - "611752105030414907", - "611752105030414915", - "611752105030414919", - "611752105030414925", - "611752105030414929", - "611752105030414932", - "611752105030414935", - "611752105030414937", - "611752105030414943", - "611752105030414948", - "611752105030414949", - "611752105030414957", - "611752105030414962", - "611752105030414963", - "611752105030414968", - "611752105030414973", - "611752105030414976", - "611752105030414981", - "611752105030414986", - "611752105030414988", - "611752105030414990", - "611752105030414993", - "611752105030414995", - "611752105030415003", - "611752105030415007", - "611752105030415009", - "611752105030415014", - "611752105030415018", - "611752105030415032", - "611752105030415044", - "611752105030415050", - "611752105030415052", - "611752105030415056", - "611752105030415058", - "611752105030415062", - "611752105030415067", - "611752105030415071", - "611752105030415074", - "611752105030415078", - "611752105030415083", - "611752105030415087", - "611752105030415094", - "611752105030415100", - "611752105030415103", - "611752105030425986", - "611752105030426004" - ] - return arr - - -def generate_arr(): - # arr = [ - # "611752105020256284", - # "611752105020282612", - # "611752105020282613", - # "611752105020286433", - # "611752105020286443", - # "611752105020286446", - # "611752105020286501", - # "611752105020290639", - # "611752105020290695", - # "611752105020315328", - # "611752105020315368", - # "611752105020325137", - # "611752105020336946", - # "611752105020336950", - # "611752105020343687", - # "611752105020343699", - # "611752105020350988", - # "611752105020350990", - # "611752105020351134", - # "611752105020357112", - # "611752105020376320", - # "611752105020378620", - # "611752105020382559", - # "611752105020387015", - # "611752105020390950", - # "611752105020394121", - # "611752105020394297", - # "611752105020411654", - # "611752105020417488", - # "611752105020417688", - # "611752105020548211", - # "611752105020563523", - # "611752105021273980", - # "611752105021285282", - # "611752105021330812", - # "611752105021332759", - # "611752105021375100", - # "611752105021442406", - # "611752105021442417", - # "611752105021453011", - # "611752105022345104", - # "611752105022389596", - # "611752105022446809", - # "611752105022647082", - # "611752105022667231", - # "611752105022735101", - # "611752105022736204", - # "611752105022745595", - # "611752105022770952", - # "611752105022842004", - # "611752105022842477", - # "611752105023434557", - # "611752105023532439", - # "611752105023623965", - # "611752105023811083", - # "611752105024250202", - # "611752105024429936", - # "611752105024628047", - # "611752105024676794", - # "611752105024678976", - # "611752105024679221", - # "611752105024714646", - # "611752105024786030", - # "611752105024953316", - # "611752105025104181", - # "611752105025231610", - # "611752105025510149", - # "611752105026189342", - # "611752105026523547", - # "611752105026707760", - # "611752105026771723", - # "611752105026946178", - # "611752105027047993", - # "611752105027188746", - # "611752105027189453", - # "611752105027302268", - # "611752105027557408", - # "611752105027588072", - # "611752105028650636", - # "611752105028683824", - # "611752105029689090", - # "611752105029954089", - # "611752105029954168", - # "611752105029955214", - # "611752105029990849", - # "611752105029993297", - # "611752105030047424", - # "611752105030077711", - # "611752105030104548", - # "611752105030419624", - # "611752105030419633", - # "611752105030419688", - # "611752105030433779" - # ] - - # arr = get_metop500() - arr = get_me_3_w4_zy() - s_inst = SongCoverInference() - for sid in arr: - sstime = time.time() - dir = os.path.join("/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_3_w4_zy", sid) - # dir = os.path.join("/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_top500", sid) - err = s_inst.process_one(sid, dir, True) - print("sid={}, err={}, sp={}".format(sid, err, time.time() - sstime)) - - -def test_rate(): - arr = [ - "611752105020256284", - "611752105020286433", - "611752105020286443", - "611752105020286446", - "611752105020290639", - "611752105020290695", - "611752105020315328", - "611752105020315368", - "611752105020336950", - "611752105020343687", - "611752105020343699", - "611752105020351134", - "611752105020357112", - "611752105020378620", - "611752105020387015", - "611752105020394121", - "611752105020394297", - "611752105020411654", - "611752105020417688", - "611752105020548211", - "611752105020563523", - "611752105021285282", - "611752105021332759", - "611752105022446809", - "611752105022647082", - "611752105022667231", - "611752105022735101", - "611752105022736204", - "611752105022745595", - "611752105022770952", - "611752105022842004", - "611752105022842477", - "611752105023434557", - "611752105023532439", - "611752105023623965", - "611752105024250202", - "611752105024628047", - "611752105024676794", - "611752105024678976", - "611752105024679221", - "611752105024953316", - "611752105025104181", - "611752105026189342", - "611752105026523547", - "611752105026707760", - "611752105026771723", - "611752105026946178", - "611752105027047993", - "611752105027188746", - "611752105027189453", - "611752105027302268", - "611752105027557408", - "611752105028650636", - "611752105028683824", - "611752105029990849", - "611752105029993297", - "611752105030077711", - "611752105030104548", - "611752105030419624", - "611752105030419633", - "611752105030419688", - "611752105030433779" - ] - s_inst = SongCoverInference() - for sid in arr: - vocal_path = "data/inf_users/me_3_w4/{}/cache/vocal.wav".format(sid) - tm = s_inst.get_start_ms(vocal_path) - print("res,{},{}".format(vocal_path, tm)) - - -def test(): - arr = [ - # "611752105020343687", - # "611752105023532439", - "611752105030419688", - ] - base_dir = "/data/rsync/jianli.yang/AutoCoverTool/data/test" - s_inst = SongCoverInference() - for cid in arr: - st = time.time() - err = s_inst.process_one(cid, os.path.join(base_dir, cid), False) - print("cid={} RealFinish err={} sp={}".format(cid, err, time.time() - st)) - - -if __name__ == '__main__': - test() - # test_rate() - # test_volume_dir() - # generate_arr() - # test_volume_dir() - # s_inst = SongCoverInference() - # sstime = time.time() - # err = s_inst.process_one("611752105030249038", - # "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_top500/611752105030249038", False) - # # i_file = "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249121/611752105030249121_5629499489839033.wav" - # # e_file = "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249121/611752105030249121_5629499489839033.wav" - # # v_file = "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_top500/611752105030249121/vocal.wav" - # # a_file = "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_top500/611752105030249121/acc.wav" - # # w_dir = "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_top500/611752105030249121" - # # - # # err = s_inst.after_process("611752105030248965", w_dir, i_file, e_file, v_file, a_file) - # print("err={}, sp={}".format(err, time.time() - sstime))