diff --git a/AIMeiSheng/docker_demo/Dockerfile b/AIMeiSheng/docker_demo/Dockerfile index 8a6fc25..94fb28a 100644 --- a/AIMeiSheng/docker_demo/Dockerfile +++ b/AIMeiSheng/docker_demo/Dockerfile @@ -1,28 +1,29 @@ # 系统版本 CUDA Version 11.8.0 # NAME="CentOS Linux" VERSION="7 (Core)" # FROM starmaker.tencentcloudcr.com/starmaker/av/av:1.1 # 基础镜像, python3.9,cuda118,centos7,外加ffmpeg #FROM starmaker.tencentcloudcr.com/starmaker/av/av_base:1.0 FROM registry.ushow.media/av/av_base:1.0 #FROM av_base_test:1.0 RUN source /etc/profile && sed -i 's|mirrorlist=|#mirrorlist=|g' /etc/yum.repos.d/CentOS-Base.repo && sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-Base.repo && yum clean all && yum install -y unzip && yum install -y libsndfile && yum install -y libsamplerate libsamplerate-devel RUN source /etc/profile && pip3 install librosa==0.9.1 && pip3 install gradio && pip3 install torch==2.1.2 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 RUN source /etc/profile && pip3 install urllib3==1.26.15 && pip3 install coscmd && coscmd config -a AKIDoQmshFWXGitnQmrfCTYNwEExPaU6RVHm -s F9n9E2ZonWy93f04qMaYFfogHadPt62h -b log-sg-1256122840 -r ap-singapore RUN source /etc/profile && pip3 install asteroid-filterbanks RUN source /etc/profile && pip3 install praat-parselmouth==0.4.3 RUN source /etc/profile && pip3 install pyworld RUN source /etc/profile && pip3 install faiss-cpu RUN source /etc/profile && pip3 install torchcrepe RUN source /etc/profile && pip3 install thop RUN source /etc/profile && pip3 install ffmpeg-python -RUN source /etc/profile && pip3 install fairseq +RUN source /etc/profile && pip3 install pip3==24.0 +RUN source /etc/profile && pip3 install fairseq==0.12.2 RUN source /etc/profile && pip3 install redis==4.5.0 RUN source /etc/profile && pip3 install numpy==1.26.4 COPY ./ /data/code/ WORKDIR /data/code CMD ["/bin/bash", "-c", "source /etc/profile; export PYTHONPATH=/data/code; cd /data/code/AIMeiSheng/docker_demo; python3 offline_server.py"] #CMD ["/bin/bash", "-c", "source /etc/profile; export PYTHONPATH=/data/code; cd /data/code/AIMeiSheng/docker_demo; python3 tmp.py"] \ No newline at end of file diff --git a/AIMeiSheng/docker_demo/svc_online.py b/AIMeiSheng/docker_demo/svc_online.py index f12143f..3efdb58 100644 --- a/AIMeiSheng/docker_demo/svc_online.py +++ b/AIMeiSheng/docker_demo/svc_online.py @@ -1,194 +1,194 @@ # -*- coding: UTF-8 -*- """ SVC的核心处理逻辑 """ import os import time import socket import shutil import hashlib from AIMeiSheng.meisheng_svc_final import load_model, process_svc_online from AIMeiSheng.cos_similar_ui_zoom import cos_similar from AIMeiSheng.meisheng_env_preparex import meisheng_env_prepare from AIMeiSheng.voice_classification.online.voice_class_online_fang import VoiceClass, download_volume_balanced from AIMeiSheng.docker_demo.common import * import logging hostname = socket.gethostname() log_file_name = f"{os.path.dirname(os.path.abspath(__file__))}/av_meisheng_{hostname}.log" # 设置logger svc_offline_logger = logging.getLogger("svc_offline") file_handler = logging.FileHandler(log_file_name) file_handler.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %I:%M:%S') file_handler.setFormatter(formatter) if gs_prod: svc_offline_logger.addHandler(file_handler) if os.path.exists(gs_tmp_dir): shutil.rmtree(gs_tmp_dir) os.makedirs(gs_model_dir, exist_ok=True) os.makedirs(gs_resource_cache_dir, exist_ok=True) # 预设参数 gs_gender_models_url = "https://av-audit-sync-sg-1256122840.cos.ap-singapore.myqcloud.com/hub/voice_classification/models.zip" -gs_volume_bin_url = "https://av-audit-sync-sg-1256122840.cos.ap-singapore.myqcloud.com/dataset/AIMeiSheng/ebur128_tool" +gs_volume_bin_url = "https://av-audit-sync-sg-1256122840.cos.ap-singapore.myqcloud.com/dataset/AIMeiSheng/ebur128_tool/v1/ebur128_tool" class GSWorkerAttr: def __init__(self, input_data): # 取出输入资源 vocal_url = input_data["record_song_url"] target_url = input_data["target_url"] start = input_data["start"] # 单位是ms end = input_data["end"] # 单位是ms vocal_loudness = input_data["vocal_loudness"] female_recording_url = input_data["female_recording_url"] male_recording_url = input_data["male_recording_url"] self.distinct_id = hashlib.md5(vocal_url.encode()).hexdigest() self.tmp_dir = os.path.join(gs_tmp_dir, self.distinct_id) if os.path.exists(self.tmp_dir): shutil.rmtree(self.tmp_dir) os.makedirs(self.tmp_dir) self.vocal_url = vocal_url self.target_url = target_url ext = vocal_url.split(".")[-1] self.vocal_path = os.path.join(self.tmp_dir, self.distinct_id + f"_in.{ext}") self.target_wav_path = os.path.join(self.tmp_dir, self.distinct_id + "_out.wav") self.target_wav_ad_path = os.path.join(self.tmp_dir, self.distinct_id + "_out_ad.wav") self.target_path = os.path.join(self.tmp_dir, self.distinct_id + "_out.m4a") self.female_svc_source_url = female_recording_url self.male_svc_source_url = male_recording_url ext = female_recording_url.split(".")[-1] self.female_svc_source_path = os.path.join(gs_resource_cache_dir, hashlib.md5(female_recording_url.encode()).hexdigest() + "." + ext) ext = male_recording_url.split(".")[-1] self.male_svc_source_path = os.path.join(gs_resource_cache_dir, hashlib.md5(male_recording_url.encode()).hexdigest() + "." + ext) self.st_tm = start self.ed_tm = end self.target_loudness = vocal_loudness def log_info_name(self): return f"d_id={self.distinct_id}, vocal_url={self.vocal_url}" def rm_cache(self): if os.path.exists(self.tmp_dir): shutil.rmtree(self.tmp_dir) def init_gender_model(): """ 下载模型 :return: """ dst_model_dir = os.path.join(gs_model_dir, "voice_classification") if not os.path.exists(dst_model_dir): dst_zip_path = os.path.join(gs_model_dir, "models.zip") if not download2disk(gs_gender_models_url, dst_zip_path): svc_offline_logger.fatal(f"download gender_model err={gs_gender_models_url}") cmd = f"cd {gs_model_dir}; unzip {dst_zip_path}; mv models voice_classification; rm -f {dst_zip_path}" os.system(cmd) if not os.path.exists(dst_model_dir): svc_offline_logger.fatal(f"unzip {dst_zip_path} err") music_voice_pure_model = os.path.join(dst_model_dir, "voice_005_rec_v5.pth") music_voice_no_pure_model = os.path.join(dst_model_dir, "voice_10_v5.pth") gender_pure_model = os.path.join(dst_model_dir, "gender_8k_ratev5_v6_adam.pth") gender_no_pure_model = os.path.join(dst_model_dir, "gender_8k_v6_adam.pth") vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model) return vc def init_svc_model(): meisheng_env_prepare(logging, gs_model_dir) embed_model, hubert_model = load_model() cs_sim = cos_similar() - return embed_model, hubert_model,cs_sim + return embed_model, hubert_model, cs_sim def download_volume_adjustment(): """ 下载音量调整工具 :return: """ volume_bin_path = os.path.join(gs_model_dir, "ebur128_tool") if not os.path.exists(volume_bin_path): if not download2disk(gs_volume_bin_url, volume_bin_path): svc_offline_logger.fatal(f"download volume_bin err={gs_volume_bin_url}") os.system(f"chmod +x {volume_bin_path}") def volume_adjustment(wav_path, target_loudness, out_path): """ 音量调整 :param wav_path: :param target_loudness: :param out_path: :return: """ volume_bin_path = os.path.join(gs_model_dir, "ebur128_tool") cmd = f"{volume_bin_path} {wav_path} {target_loudness} {out_path}" os.system(cmd) class SVCOnline: def __init__(self): st = time.time() self.gender_model = init_gender_model() self.embed_model, self.hubert_model, self.cs_sim = init_svc_model() download_volume_adjustment() download_volume_balanced() svc_offline_logger.info(f"svc init finished, sp = {time.time() - st}") def gender_process(self, worker_attr): st = time.time() gender, female_rate, is_pure = self.gender_model.process(worker_attr.vocal_path) svc_offline_logger.info( f"{worker_attr.vocal_url}, gender={gender}, female_rate={female_rate}, is_pure={is_pure}, " f"gender_process sp = {time.time() - st}") if gender == 0: gender = 'female' elif gender == 1: gender = 'male' elif female_rate == None: gender = 'male' return gender, gs_err_code_gender_classify elif female_rate > 0.5: gender = 'female' else: gender = 'male' svc_offline_logger.info(f"{worker_attr.vocal_url}, modified gender={gender}") # err = gs_err_code_success # if female_rate == -1: # err = gs_err_code_target_silence return gender, gs_err_code_success def process(self, worker_attr): gender, err = self.gender_process(worker_attr) if err != gs_err_code_success: return gender, err song_path = worker_attr.female_svc_source_path if gender == "male": song_path = worker_attr.male_svc_source_path params = {'gender': gender, 'tst': worker_attr.st_tm, "tnd": worker_attr.ed_tm, 'delay': 0, 'song_path': None} st = time.time() err_code = process_svc_online(song_path, worker_attr.vocal_path, worker_attr.target_wav_path, self.embed_model, self.hubert_model, self.cs_sim, params) svc_offline_logger.info(f"{worker_attr.vocal_url}, err_code={err_code} process svc sp = {time.time() - st}") return gender, err_code diff --git a/tools/ebur128_tool/ebur128_tool.cpp b/tools/ebur128_tool/ebur128_tool.cpp index c3d171c..ca42875 100644 --- a/tools/ebur128_tool/ebur128_tool.cpp +++ b/tools/ebur128_tool/ebur128_tool.cpp @@ -1,107 +1,118 @@ // // Created by Administrator on 2024/7/8. // #include #include #include #include #include "alimiter.h" #include "ebur128.h" #include "WaveFile.h" #define PROC_LEN 1024 /** * 获取增益 * @param nChannel * @param nSampleRate * @param pData * @param nLength * @param gain * @return */ int ebur128_whole(int nChannel, int nSampleRate, short *pData, const int nLength, double &gated_loudness) { ebur128_state *st = NULL; st = ebur128_init(nChannel, nSampleRate, EBUR128_MODE_I); if (NULL == st) { return -1; } int nPos = 0; int nTmpLength = 0; int nRet; while (nPos < nLength) { nTmpLength = PROC_LEN; if (nLength - nPos < PROC_LEN) { nTmpLength = nLength - nPos; } nRet = ebur128_add_frames_short(st, pData + nPos, nTmpLength / nChannel); if (nRet != 0) { return -2; } nPos += nTmpLength; } gated_loudness = -1; ebur128_loudness_global(st, &gated_loudness); ebur128_destroy(&st); return 0; } int main(int argc, char* argv[]) { if (argc < 4) { printf("input error! example: ./main input_wav target_loudness dst_wav\n"); return -1; } std::string vocal_path = argv[1]; double target_loudness = atof(argv[2]); std::string out_vocal_path = argv[3]; // 读取数据 CWaveFile vocal_wav = CWaveFile(vocal_path.c_str(), false); if (!vocal_wav.GetStatus()) { printf("%s not ok!\n", vocal_path.c_str()); return -2; } int vocal_buf_len = vocal_wav.GetChannels() * vocal_wav.GetTotalFrames(); float *vocal_buf = new float[vocal_buf_len]; short *short_vocal_buf = new short[vocal_buf_len]; vocal_wav.ReadFrameAsfloat(vocal_buf, vocal_wav.GetTotalFrames()); for(int i = 0; i < vocal_wav.GetTotalFrames() * vocal_wav.GetChannels(); i++) { short_vocal_buf[i] = float(vocal_buf[i]) * 32767.f; } double vocal_gated_loudness = 0; ebur128_whole(vocal_wav.GetChannels(), vocal_wav.GetSampleRate(), short_vocal_buf, vocal_wav.GetTotalFrames() * vocal_wav.GetChannels(), vocal_gated_loudness); + if (std::isnan(vocal_gated_loudness)) + { + printf("vocal_gated_loudness is nan\n"); + vocal_gated_loudness = target_loudness; + } float db = (target_loudness - vocal_gated_loudness) / 20.f; float ebur128_rate = pow(10, db); - + if (ebur128_rate > 100) { + printf("ebur128_rate=%f bigger than 100\n", ebur128_rate); + ebur128_rate = 100; + } else if (ebur128_rate < 0.01) { + printf("ebur128_rate=%f little than 100\n", ebur128_rate); + ebur128_rate = 0.01; + } printf("vocal_gated_loudness = %f, db = %f, gain = %f\n", vocal_gated_loudness, db, ebur128_rate); SUPERSOUND::Alimiter limiter; limiter.SetParam(vocal_wav.GetSampleRate(), vocal_wav.GetChannels()); for (int i = 0; i < vocal_buf_len; i++) { float out = vocal_buf[i] * ebur128_rate; limiter.Filter(&out, &out, 1); vocal_buf[i] = out; } CWaveFile out_wav = CWaveFile(out_vocal_path.c_str(), true); out_wav.SetChannels(vocal_wav.GetChannels()); out_wav.SetSampleRate(vocal_wav.GetSampleRate()); out_wav.SetSampleFormat(SF_IEEE_FLOAT); out_wav.SetupDone(); out_wav.WriteFrame(vocal_buf, vocal_wav.GetTotalFrames()); delete[] vocal_buf; delete[] short_vocal_buf; return 0; } \ No newline at end of file