Page MenuHomePhabricator

No OneTemporary

diff --git a/AIMeiSheng/docker_demo/Dockerfile b/AIMeiSheng/docker_demo/Dockerfile
index 8a6fc25..94fb28a 100644
--- a/AIMeiSheng/docker_demo/Dockerfile
+++ b/AIMeiSheng/docker_demo/Dockerfile
@@ -1,28 +1,29 @@
# 系统版本 CUDA Version 11.8.0
# NAME="CentOS Linux" VERSION="7 (Core)"
# FROM starmaker.tencentcloudcr.com/starmaker/av/av:1.1
# 基础镜像, python3.9,cuda118,centos7,外加ffmpeg
#FROM starmaker.tencentcloudcr.com/starmaker/av/av_base:1.0
FROM registry.ushow.media/av/av_base:1.0
#FROM av_base_test:1.0
RUN source /etc/profile && sed -i 's|mirrorlist=|#mirrorlist=|g' /etc/yum.repos.d/CentOS-Base.repo && sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-Base.repo && yum clean all && yum install -y unzip && yum install -y libsndfile && yum install -y libsamplerate libsamplerate-devel
RUN source /etc/profile && pip3 install librosa==0.9.1 && pip3 install gradio && pip3 install torch==2.1.2 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
RUN source /etc/profile && pip3 install urllib3==1.26.15 && pip3 install coscmd && coscmd config -a AKIDoQmshFWXGitnQmrfCTYNwEExPaU6RVHm -s F9n9E2ZonWy93f04qMaYFfogHadPt62h -b log-sg-1256122840 -r ap-singapore
RUN source /etc/profile && pip3 install asteroid-filterbanks
RUN source /etc/profile && pip3 install praat-parselmouth==0.4.3
RUN source /etc/profile && pip3 install pyworld
RUN source /etc/profile && pip3 install faiss-cpu
RUN source /etc/profile && pip3 install torchcrepe
RUN source /etc/profile && pip3 install thop
RUN source /etc/profile && pip3 install ffmpeg-python
-RUN source /etc/profile && pip3 install fairseq
+RUN source /etc/profile && pip3 install pip3==24.0
+RUN source /etc/profile && pip3 install fairseq==0.12.2
RUN source /etc/profile && pip3 install redis==4.5.0
RUN source /etc/profile && pip3 install numpy==1.26.4
COPY ./ /data/code/
WORKDIR /data/code
CMD ["/bin/bash", "-c", "source /etc/profile; export PYTHONPATH=/data/code; cd /data/code/AIMeiSheng/docker_demo; python3 offline_server.py"]
#CMD ["/bin/bash", "-c", "source /etc/profile; export PYTHONPATH=/data/code; cd /data/code/AIMeiSheng/docker_demo; python3 tmp.py"]
\ No newline at end of file
diff --git a/AIMeiSheng/docker_demo/svc_online.py b/AIMeiSheng/docker_demo/svc_online.py
index f12143f..3efdb58 100644
--- a/AIMeiSheng/docker_demo/svc_online.py
+++ b/AIMeiSheng/docker_demo/svc_online.py
@@ -1,194 +1,194 @@
# -*- coding: UTF-8 -*-
"""
SVC的核心处理逻辑
"""
import os
import time
import socket
import shutil
import hashlib
from AIMeiSheng.meisheng_svc_final import load_model, process_svc_online
from AIMeiSheng.cos_similar_ui_zoom import cos_similar
from AIMeiSheng.meisheng_env_preparex import meisheng_env_prepare
from AIMeiSheng.voice_classification.online.voice_class_online_fang import VoiceClass, download_volume_balanced
from AIMeiSheng.docker_demo.common import *
import logging
hostname = socket.gethostname()
log_file_name = f"{os.path.dirname(os.path.abspath(__file__))}/av_meisheng_{hostname}.log"
# 设置logger
svc_offline_logger = logging.getLogger("svc_offline")
file_handler = logging.FileHandler(log_file_name)
file_handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %I:%M:%S')
file_handler.setFormatter(formatter)
if gs_prod:
svc_offline_logger.addHandler(file_handler)
if os.path.exists(gs_tmp_dir):
shutil.rmtree(gs_tmp_dir)
os.makedirs(gs_model_dir, exist_ok=True)
os.makedirs(gs_resource_cache_dir, exist_ok=True)
# 预设参数
gs_gender_models_url = "https://av-audit-sync-sg-1256122840.cos.ap-singapore.myqcloud.com/hub/voice_classification/models.zip"
-gs_volume_bin_url = "https://av-audit-sync-sg-1256122840.cos.ap-singapore.myqcloud.com/dataset/AIMeiSheng/ebur128_tool"
+gs_volume_bin_url = "https://av-audit-sync-sg-1256122840.cos.ap-singapore.myqcloud.com/dataset/AIMeiSheng/ebur128_tool/v1/ebur128_tool"
class GSWorkerAttr:
def __init__(self, input_data):
# 取出输入资源
vocal_url = input_data["record_song_url"]
target_url = input_data["target_url"]
start = input_data["start"] # 单位是ms
end = input_data["end"] # 单位是ms
vocal_loudness = input_data["vocal_loudness"]
female_recording_url = input_data["female_recording_url"]
male_recording_url = input_data["male_recording_url"]
self.distinct_id = hashlib.md5(vocal_url.encode()).hexdigest()
self.tmp_dir = os.path.join(gs_tmp_dir, self.distinct_id)
if os.path.exists(self.tmp_dir):
shutil.rmtree(self.tmp_dir)
os.makedirs(self.tmp_dir)
self.vocal_url = vocal_url
self.target_url = target_url
ext = vocal_url.split(".")[-1]
self.vocal_path = os.path.join(self.tmp_dir, self.distinct_id + f"_in.{ext}")
self.target_wav_path = os.path.join(self.tmp_dir, self.distinct_id + "_out.wav")
self.target_wav_ad_path = os.path.join(self.tmp_dir, self.distinct_id + "_out_ad.wav")
self.target_path = os.path.join(self.tmp_dir, self.distinct_id + "_out.m4a")
self.female_svc_source_url = female_recording_url
self.male_svc_source_url = male_recording_url
ext = female_recording_url.split(".")[-1]
self.female_svc_source_path = os.path.join(gs_resource_cache_dir,
hashlib.md5(female_recording_url.encode()).hexdigest() + "." + ext)
ext = male_recording_url.split(".")[-1]
self.male_svc_source_path = os.path.join(gs_resource_cache_dir,
hashlib.md5(male_recording_url.encode()).hexdigest() + "." + ext)
self.st_tm = start
self.ed_tm = end
self.target_loudness = vocal_loudness
def log_info_name(self):
return f"d_id={self.distinct_id}, vocal_url={self.vocal_url}"
def rm_cache(self):
if os.path.exists(self.tmp_dir):
shutil.rmtree(self.tmp_dir)
def init_gender_model():
"""
下载模型
:return:
"""
dst_model_dir = os.path.join(gs_model_dir, "voice_classification")
if not os.path.exists(dst_model_dir):
dst_zip_path = os.path.join(gs_model_dir, "models.zip")
if not download2disk(gs_gender_models_url, dst_zip_path):
svc_offline_logger.fatal(f"download gender_model err={gs_gender_models_url}")
cmd = f"cd {gs_model_dir}; unzip {dst_zip_path}; mv models voice_classification; rm -f {dst_zip_path}"
os.system(cmd)
if not os.path.exists(dst_model_dir):
svc_offline_logger.fatal(f"unzip {dst_zip_path} err")
music_voice_pure_model = os.path.join(dst_model_dir, "voice_005_rec_v5.pth")
music_voice_no_pure_model = os.path.join(dst_model_dir, "voice_10_v5.pth")
gender_pure_model = os.path.join(dst_model_dir, "gender_8k_ratev5_v6_adam.pth")
gender_no_pure_model = os.path.join(dst_model_dir, "gender_8k_v6_adam.pth")
vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model)
return vc
def init_svc_model():
meisheng_env_prepare(logging, gs_model_dir)
embed_model, hubert_model = load_model()
cs_sim = cos_similar()
- return embed_model, hubert_model,cs_sim
+ return embed_model, hubert_model, cs_sim
def download_volume_adjustment():
"""
下载音量调整工具
:return:
"""
volume_bin_path = os.path.join(gs_model_dir, "ebur128_tool")
if not os.path.exists(volume_bin_path):
if not download2disk(gs_volume_bin_url, volume_bin_path):
svc_offline_logger.fatal(f"download volume_bin err={gs_volume_bin_url}")
os.system(f"chmod +x {volume_bin_path}")
def volume_adjustment(wav_path, target_loudness, out_path):
"""
音量调整
:param wav_path:
:param target_loudness:
:param out_path:
:return:
"""
volume_bin_path = os.path.join(gs_model_dir, "ebur128_tool")
cmd = f"{volume_bin_path} {wav_path} {target_loudness} {out_path}"
os.system(cmd)
class SVCOnline:
def __init__(self):
st = time.time()
self.gender_model = init_gender_model()
self.embed_model, self.hubert_model, self.cs_sim = init_svc_model()
download_volume_adjustment()
download_volume_balanced()
svc_offline_logger.info(f"svc init finished, sp = {time.time() - st}")
def gender_process(self, worker_attr):
st = time.time()
gender, female_rate, is_pure = self.gender_model.process(worker_attr.vocal_path)
svc_offline_logger.info(
f"{worker_attr.vocal_url}, gender={gender}, female_rate={female_rate}, is_pure={is_pure}, "
f"gender_process sp = {time.time() - st}")
if gender == 0:
gender = 'female'
elif gender == 1:
gender = 'male'
elif female_rate == None:
gender = 'male'
return gender, gs_err_code_gender_classify
elif female_rate > 0.5:
gender = 'female'
else:
gender = 'male'
svc_offline_logger.info(f"{worker_attr.vocal_url}, modified gender={gender}")
# err = gs_err_code_success
# if female_rate == -1:
# err = gs_err_code_target_silence
return gender, gs_err_code_success
def process(self, worker_attr):
gender, err = self.gender_process(worker_attr)
if err != gs_err_code_success:
return gender, err
song_path = worker_attr.female_svc_source_path
if gender == "male":
song_path = worker_attr.male_svc_source_path
params = {'gender': gender, 'tst': worker_attr.st_tm, "tnd": worker_attr.ed_tm, 'delay': 0, 'song_path': None}
st = time.time()
err_code = process_svc_online(song_path, worker_attr.vocal_path, worker_attr.target_wav_path, self.embed_model,
self.hubert_model, self.cs_sim, params)
svc_offline_logger.info(f"{worker_attr.vocal_url}, err_code={err_code} process svc sp = {time.time() - st}")
return gender, err_code
diff --git a/tools/ebur128_tool/ebur128_tool.cpp b/tools/ebur128_tool/ebur128_tool.cpp
index c3d171c..ca42875 100644
--- a/tools/ebur128_tool/ebur128_tool.cpp
+++ b/tools/ebur128_tool/ebur128_tool.cpp
@@ -1,107 +1,118 @@
//
// Created by Administrator on 2024/7/8.
//
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include "alimiter.h"
#include "ebur128.h"
#include "WaveFile.h"
#define PROC_LEN 1024
/**
* 获取增益
* @param nChannel
* @param nSampleRate
* @param pData
* @param nLength
* @param gain
* @return
*/
int ebur128_whole(int nChannel, int nSampleRate, short *pData, const int nLength, double &gated_loudness)
{
ebur128_state *st = NULL;
st = ebur128_init(nChannel, nSampleRate, EBUR128_MODE_I);
if (NULL == st)
{
return -1;
}
int nPos = 0;
int nTmpLength = 0;
int nRet;
while (nPos < nLength)
{
nTmpLength = PROC_LEN;
if (nLength - nPos < PROC_LEN)
{
nTmpLength = nLength - nPos;
}
nRet = ebur128_add_frames_short(st, pData + nPos, nTmpLength / nChannel);
if (nRet != 0)
{
return -2;
}
nPos += nTmpLength;
}
gated_loudness = -1;
ebur128_loudness_global(st, &gated_loudness);
ebur128_destroy(&st);
return 0;
}
int main(int argc, char* argv[]) {
if (argc < 4)
{
printf("input error! example: ./main input_wav target_loudness dst_wav\n");
return -1;
}
std::string vocal_path = argv[1];
double target_loudness = atof(argv[2]);
std::string out_vocal_path = argv[3];
// 读取数据
CWaveFile vocal_wav = CWaveFile(vocal_path.c_str(), false);
if (!vocal_wav.GetStatus())
{
printf("%s not ok!\n", vocal_path.c_str());
return -2;
}
int vocal_buf_len = vocal_wav.GetChannels() * vocal_wav.GetTotalFrames();
float *vocal_buf = new float[vocal_buf_len];
short *short_vocal_buf = new short[vocal_buf_len];
vocal_wav.ReadFrameAsfloat(vocal_buf, vocal_wav.GetTotalFrames());
for(int i = 0; i < vocal_wav.GetTotalFrames() * vocal_wav.GetChannels(); i++)
{
short_vocal_buf[i] = float(vocal_buf[i]) * 32767.f;
}
double vocal_gated_loudness = 0;
ebur128_whole(vocal_wav.GetChannels(), vocal_wav.GetSampleRate(), short_vocal_buf,
vocal_wav.GetTotalFrames() * vocal_wav.GetChannels(), vocal_gated_loudness);
+ if (std::isnan(vocal_gated_loudness))
+ {
+ printf("vocal_gated_loudness is nan\n");
+ vocal_gated_loudness = target_loudness;
+ }
float db = (target_loudness - vocal_gated_loudness) / 20.f;
float ebur128_rate = pow(10, db);
-
+ if (ebur128_rate > 100) {
+ printf("ebur128_rate=%f bigger than 100\n", ebur128_rate);
+ ebur128_rate = 100;
+ } else if (ebur128_rate < 0.01) {
+ printf("ebur128_rate=%f little than 100\n", ebur128_rate);
+ ebur128_rate = 0.01;
+ }
printf("vocal_gated_loudness = %f, db = %f, gain = %f\n", vocal_gated_loudness, db, ebur128_rate);
SUPERSOUND::Alimiter limiter;
limiter.SetParam(vocal_wav.GetSampleRate(), vocal_wav.GetChannels());
for (int i = 0; i < vocal_buf_len; i++)
{
float out = vocal_buf[i] * ebur128_rate;
limiter.Filter(&out, &out, 1);
vocal_buf[i] = out;
}
CWaveFile out_wav = CWaveFile(out_vocal_path.c_str(), true);
out_wav.SetChannels(vocal_wav.GetChannels());
out_wav.SetSampleRate(vocal_wav.GetSampleRate());
out_wav.SetSampleFormat(SF_IEEE_FLOAT);
out_wav.SetupDone();
out_wav.WriteFrame(vocal_buf, vocal_wav.GetTotalFrames());
delete[] vocal_buf;
delete[] short_vocal_buf;
return 0;
}
\ No newline at end of file

File Metadata

Mime Type
text/x-diff
Expires
Sun, Jan 12, 08:35 (1 d, 15 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1347221
Default Alt Text
(14 KB)

Event Timeline